Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
43 commits
Select commit Hold shift + click to select a range
96939dd
check functionality
Leomyh Apr 11, 2025
a52a2ad
pearson correlation metrics v1.0
Leomyh Apr 11, 2025
d2e02ae
add some tabular evaluation metrics v1.0
Leomyh Apr 16, 2025
f3ed0c8
check errors
Leomyh Apr 16, 2025
45be396
PearsonCorrelation and Matrix and Dendrogram distance metrics
Leomyh Apr 16, 2025
b0dfa99
Fixed distance matrix and dendrogram distance metrics issues
Leomyh Apr 16, 2025
ed9c85f
update TFTG similarity metric
Leomyh Apr 17, 2025
a2728d4
update
Leomyh Apr 17, 2025
4041679
update TGTGSimilarity metric and GeneExpressionDataloader
Leomyh Apr 19, 2025
c693127
update
Leomyh Apr 19, 2025
6f3e287
modify TFTGSimilarity and TGTGSimilarity metrics
Leomyh Apr 19, 2025
641cb4d
try to resolve incompatible problem with new pandas version
Leomyh Apr 24, 2025
b0e2096
add ReverseValidationMLP and AdversarialAccuracy metrics
Leomyh Apr 24, 2025
2c176aa
add matrix evaluation metrics test
Leomyh Apr 25, 2025
d577b8a
update
Leomyh Apr 25, 2025
304c027
add t-closeness metric and test
Leomyh May 2, 2025
fe8b2c8
update t-closeness metric
Leomyh May 5, 2025
337ff44
update t-closeness metric2.0
Leomyh May 5, 2025
e9b761c
remove redundant metric
Leomyh May 14, 2025
6058de5
Merge branch 'vanderschaarlab:main' into main
Leomyh Jun 5, 2025
c2a5e87
update
Leomyh Jun 5, 2025
550555f
Merge remote-tracking branch 'origin/main'
Leomyh Jun 5, 2025
a557024
update Pearson metrics
Leomyh Jul 18, 2025
003ae01
Revert "update Pearson metrics"
Leomyh Jul 18, 2025
7fdc6e6
update distance metric to matrix distance and dendrogram distance
Leomyh Jul 20, 2025
239630f
debug
Leomyh Jul 20, 2025
4bd692a
debug
Leomyh Jul 21, 2025
cac5062
merge PearsonCorrelation to distancematrix
Leomyh Jul 22, 2025
3963360
update
Leomyh Jul 22, 2025
f14a1ef
Revert "update"
Leomyh Jul 22, 2025
c529974
debug
Leomyh Jul 22, 2025
068f861
resolve conflict
Leomyh Aug 11, 2025
8d243c6
Add sequential synthesis model and methods
Leomyh Aug 11, 2025
c62c3dc
resolve conflict
Leomyh Aug 11, 2025
2a78b68
Merge remote-tracking branch 'origin/main'
Leomyh Aug 11, 2025
8e5ab7c
test to pre-commit
Leomyh Aug 11, 2025
687bcec
new_dist
Leomyh Aug 19, 2025
038542d
debug
Leomyh Aug 19, 2025
57fc253
debug2
Leomyh Aug 19, 2025
2100a32
debug3
Leomyh Aug 19, 2025
dde0e06
new Dendogram dist
Leomyh Aug 24, 2025
5c03870
debug
Leomyh Aug 24, 2025
8a3f5d4
debug2
Leomyh Aug 24, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 15 additions & 15 deletions src/synthcity/benchmark/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -360,41 +360,41 @@ def print(
@validate_arguments(config=dict(arbitrary_types_allowed=True))
def highlight(
results: Dict,
) -> None:
) -> Any:
pd.set_option("display.max_rows", None, "display.max_columns", None)
means = []
for plugin in results:
data = results[plugin]["mean"]
directions = results[plugin]["direction"].to_dict()
means.append(data)
directions = {}
for plugin, df in results.items():
means.append(df["mean"])
directions.update(df["direction"].to_dict())

out = pd.concat(means, axis=1)
out.columns = list(results.keys())

out = out.set_axis(list(results.keys()), axis=1, copy=False)

bad_highlight = "background-color: lightcoral;"
ok_highlight = "background-color: green;"
default = ""

def highlights(row: pd.Series) -> Any:
def _highlight_row(row: pd.Series) -> List[str]:
metric = row.name
vals = row.values
if directions[metric] == "minimize":
best_val = np.min(row.values)
worst_val = np.max(row)
best, worst = vals.min(), vals.max()
else:
best_val = np.max(row.values)
worst_val = np.min(row)
best, worst = vals.max(), vals.min()

styles = []
for val in row.values:
if val == best_val:
for v in vals:
if v == best:
styles.append(ok_highlight)
elif val == worst_val:
elif v == worst:
styles.append(bad_highlight)
else:
styles.append(default)

return styles

out.style.apply(highlights, axis=1)
out.style.apply(_highlight_row, axis=1)

return out
14 changes: 14 additions & 0 deletions src/synthcity/metrics/eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,14 +30,17 @@
PerformanceEvaluatorXGB,
)
from .eval_privacy import (
AdversarialAccuracy,
DeltaPresence,
DomiasMIABNAF,
DomiasMIAKDE,
DomiasMIAPrior,
EpsilonIdentifiability,
IdentifiabilityScore,
kAnonymization,
kMap,
lDiversityDistinct,
tCloseness,
)
from .eval_sanity import (
CloseValuesProbability,
Expand All @@ -49,13 +52,17 @@
from .eval_statistical import (
AlphaPrecision,
ChiSquaredTest,
DendrogramDistance,
FrechetInceptionDistance,
InverseKLDivergence,
JensenShannonDistance,
KolmogorovSmirnovTest,
MatrixDistance,
MaximumMeanDiscrepancy,
PRDCScore,
SurvivalKMDistance,
TFTGSimilarity,
TGTGSimilarity,
WassersteinDistance,
)
from .scores import ScoreEvaluator
Expand All @@ -78,6 +85,10 @@
AlphaPrecision,
SurvivalKMDistance,
FrechetInceptionDistance,
MatrixDistance,
DendrogramDistance,
TFTGSimilarity,
TGTGSimilarity,
# performance tests
PerformanceEvaluatorLinear,
PerformanceEvaluatorMLP,
Expand All @@ -92,14 +103,17 @@
SyntheticDetectionGMM,
SyntheticDetectionLinear,
# privacy tests
AdversarialAccuracy,
DeltaPresence,
EpsilonIdentifiability,
kAnonymization,
kMap,
lDiversityDistinct,
IdentifiabilityScore,
DomiasMIABNAF, # TODO: This takes too long to include as default
DomiasMIAKDE,
DomiasMIAPrior,
tCloseness,
]


Expand Down
243 changes: 243 additions & 0 deletions src/synthcity/metrics/eval_privacy.py
Original file line number Diff line number Diff line change
Expand Up @@ -608,3 +608,246 @@ def evaluate_p_R(
.numpy()
)
return p_G_evaluated, p_R_evaluated


class AdversarialAccuracy(PrivacyEvaluator):
"""
Adversarial Accuracy (AA) from Yale et al. (2020).

Reference: https://pmc.ncbi.nlm.nih.gov/articles/PMC10311334/

Intuition
---------
AA tells us whether synthetic data are
(i) too close to the real data – privacy leakage, low utility – or
(ii) too far away – poor fidelity.
It is the accuracy of a 1-NN classifier that tries to distinguish real from synthetic samples using Euclidean distances.

Let
d_tt(i): nearest-neighbor distance from real sample i to all other real samples (excluding itself)
d_tg(i): NN distance from real sample i to the synthetic set
d_gg(j): NN distance from synthetic sample j to all other synthetic samples (excluding itself)
d_gt(j): NN distance from synthetic sample j to the real set

The metric is

AA = 0.5 * [ (1/n) Σ 1( d_tg(i) > d_tt(i) ) +
(1/m) Σ 1( d_gt(j) > d_gg(j) )
]

Range and interpretation
------------------------
• AA → 0 : generator over-fits (synthetic ≈ real, privacy ↓, utility ↓)
• AA → 1 : generator under-fits (synthetic easily separable, utility ↓)
• AA ≈ 0.5 : good trade-off between realism and privacy (ideal)

The evaluator returns {"aa": float}.
"""

def __init__(self, **kwargs: Any) -> None:
super().__init__(default_metric="aa", **kwargs)

@staticmethod
def name() -> str:
return "adversarial_accuracy"

@staticmethod
def direction() -> str:
# Best value is 0.5; downstream code can optimize |AA – 0.5|.
return "custom"

# ---------- main ------------------------------------------

@validate_arguments(config=dict(arbitrary_types_allowed=True))
def _evaluate(self, X_gt: DataLoader, X_syn: DataLoader) -> Dict:
if X_gt.type() == "images":
raise ValueError("AdversarialAccuracy is not defined for images.")

real = X_gt.numpy().astype(float)
syn = X_syn.numpy().astype(float)

# 1) d_tt: distance to the 2-nd nearest neighbour in the real set
nn_real = NearestNeighbors(n_neighbors=2).fit(real)
d_tt = nn_real.kneighbors(real, return_distance=True)[0][:, 1]

# 2) Cross-set distances
d_tg = self._pairwise_min_dist(real, syn)
d_gt = self._pairwise_min_dist(syn, real)

# 3) d_gg: distance to the 2-nd NN in the synthetic set
if len(syn) > 1:
nn_syn = NearestNeighbors(n_neighbors=2).fit(syn)
d_gg = nn_syn.kneighbors(syn, return_distance=True)[0][:, 1]
else: # edge case: only one synthetic sample
d_gg = np.full(len(syn), np.inf)

# 4) Compute AA
aa_left = (d_tg > d_tt).mean()
aa_right = (d_gt > d_gg).mean()
aa = 0.5 * (aa_left + aa_right)
aa = max(aa, 1e-8) # add epsilon to pass test

return {"aa": float(aa)}

# ---------- helpers --------------------------------------------------

@staticmethod
def _pairwise_min_dist(a: np.ndarray, b: np.ndarray) -> np.ndarray:
"""For every point in `a` return its minimum Euclidean distance to set `b`."""
dists = np.linalg.norm(a[:, None, :] - b[None, :, :], axis=-1)
return dists.min(axis=1)


class EpsilonIdentifiability(PrivacyEvaluator):
"""
epsilon-Identifiability from Yoon *et al.*, IEEE JBHI 2019, DOI 10.1109/JBHI.2018.2880147

Reference: <https://ieeexplore.ieee.org/document/9034117>

I(D, D̂) = 1/N · Σ 1[r^hat_i < r_i]

• r_i : min weighted-Euclidean distance from real xᵢ to another real
• r^hat_i : min weighted-Euclidean distance from real x_i to any synthetic
• weights w_j = 1 / H(X^{(j)}) (inverse discrete entropy of column j)

Lower values ⇒ more privacy leakage
"""

def __init__(self, epsilon: float = 0.5, **kwargs: Any) -> None:
super().__init__(default_metric="I", **kwargs)
self.epsilon = epsilon # user-set threshold

@staticmethod
def name() -> str:
return "epsilon_identifiability"

@staticmethod
def direction() -> str:
return "maximize" # higher value ⇒ safer data

# -------- main --------
@validate_arguments(config=dict(arbitrary_types_allowed=True))
def _evaluate(self, X_gt: DataLoader, X_syn: DataLoader) -> Dict:
if X_gt.type() == "images":
raise ValueError("Metric not defined for images.")

# 1) common preprocessing
real = X_gt.numpy() # shape (N,d)
synth = X_syn.numpy() # shape (M,d)
w = self._weight_vector(X_gt) # shape (d,)

# 2) compute r_i (min to *other* real rows)
# efficient: nearest-neighbor search in weighted space
Rw = real * w # weight each column
nn = NearestNeighbors(n_neighbors=2, metric="euclidean").fit(Rw)
dist_real, _ = nn.kneighbors(Rw)
r = dist_real[:, 1] # skip self-distance (0)

# 3) compute r^hat_i (min to any synthetic row)
Sw = synth * w
nn_syn = NearestNeighbors(n_neighbors=1, metric="euclidean").fit(Sw)
r_hat, _ = nn_syn.kneighbors(Rw)
r_hat = r_hat[:, 0]

# 4) indicator + score
I_val = (r_hat < r).mean()

return {
"I": 1.0 - I_val, # higher ⇒ safer
}

# -------- helper --------
@staticmethod
def _discrete_entropy(col: np.ndarray) -> float:
vals, counts = np.unique(col, return_counts=True)
p = counts / counts.sum()
return -(p * np.log2(p + 1e-12)).sum()

def _weight_vector(self, X: DataLoader) -> np.ndarray:
df = X.dataframe()
ent = np.array([self._discrete_entropy(df[c].values) for c in df.columns])
return 1.0 / (ent + 1e-12) # avoid /0

@staticmethod
def _pairwise_min_dist(x: np.ndarray, Y: np.ndarray, w: np.ndarray) -> float:
"""return min ||w·(x-y)||_2 over y ∈Y"""
diffs = (x - Y) * w
return np.linalg.norm(diffs, axis=1).min()

def evaluate_default(
self, X_gt: DataLoader, X_syn: DataLoader, *a: Any, **k: Any
) -> float:
"""Return ‘privacy-safe’ score = 1 – I."""
return self.evaluate(X_gt, X_syn)["I"]


class tCloseness(PrivacyEvaluator):
"""
Returns the t-closeness score between the real data and the synthetic data.
Measures how close the sensitive attribute distribution in each equivalence class is to the global distribution from real data.

Reference:
Li, Ninghui, Tiancheng Li, and Suresh Venkatasubramanian.
"t-closeness: Privacy beyond k-anonymity and l-diversity." ICDE 2007.
"""

def __init__(
self, sensitive_column: str = "sensitive", n_clusters: int = 10, **kwargs: Any
) -> None:
super().__init__(default_metric="t", **kwargs)
self.sensitive_column = sensitive_column
self.n_clusters = n_clusters

@staticmethod
def name() -> str:
return "t-closeness"

@staticmethod
def direction() -> str:
return "minimize"

@validate_arguments(config=dict(arbitrary_types_allowed=True))
def _evaluate(self, X_gt: DataLoader, X_syn: DataLoader) -> Dict:
if X_gt.type() == "images":
raise ValueError("Metric not defined for images")

df_real = X_gt.dataframe()
df_synth = X_syn.dataframe()

loader_sens = getattr(X_gt, "sensitive_features", None) or []
if loader_sens:
sens_feats = list(loader_sens)
elif self.sensitive_column in df_real.columns:
sens_feats = [self.sensitive_column]
else:
sens_feats = [df_real.columns[-1]]
sensitive_col = sens_feats[0]

# Select a set of quasi-identifiers
qid_cols = _utils.get_features(X_gt, sens_feats)
if not qid_cols:
raise ValueError("No quasi‐identifier columns found")

# Compute global sensitive attribute distribution on real data
global_dist = df_real[sensitive_col].value_counts(normalize=True)

# One hot encoder and clustering
X_real_qid = pd.get_dummies(df_real[qid_cols], drop_first=True)
model = KMeans(n_clusters=self.n_clusters, random_state=0).fit(X_real_qid)

X_synth_qid = pd.get_dummies(df_synth[qid_cols], drop_first=True)
X_synth_qid = X_synth_qid.reindex(columns=X_real_qid.columns, fill_value=0)
df_synth["cluster"] = model.predict(X_synth_qid)

# Compute t-closeness per cluster on synthetic data
t_vals = []
for cluster_id, group in df_synth.groupby("cluster"):
local_dist = group[sensitive_col].value_counts(normalize=True)
# Align local distribution with global distribution
local_dist = local_dist.reindex(global_dist.index, fill_value=0.0)
tvd = 0.5 * np.abs(local_dist - global_dist).sum()
t_vals.append(tvd)

max_t = max(t_vals) if t_vals else 0.0

return {"t": float(max_t)}
Loading