vanderschaarlab
diff --git a/‎.gitignore‎
Lines changed: 1 addition & 1 deletion b/‎.gitignore‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎setup.cfg‎
Lines changed: 1 addition & 2 deletions b/‎setup.cfg‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎src/synthcity/plugins/core/dataloader.py‎
Lines changed: 1 addition & 1 deletion b/‎src/synthcity/plugins/core/dataloader.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/synthcity/plugins/core/distribution.py‎
Lines changed: 49 additions & 22 deletions b/‎src/synthcity/plugins/core/distribution.py‎
Lines changed: 49 additions & 22 deletions
diff --git a/‎src/synthcity/plugins/core/models/convnet.py‎
Lines changed: 2 additions & 2 deletions b/‎src/synthcity/plugins/core/models/convnet.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎src/synthcity/plugins/core/models/data_encoder.py‎
Lines changed: 0 additions & 110 deletions b/‎src/synthcity/plugins/core/models/data_encoder.py‎
Lines changed: 0 additions & 110 deletions
@@ -67,4 +67,4 @@ lightning_logs
 generated
 MNIST
 cifar-10*
-src/test.py
+local_test*.py
@@ -35,7 +35,7 @@ install_requires =
     scikit-learn>=1.0
     nflows>=0.14
     pandas>=1.3,<2.0
-    torch>=1.10,<2.0
+    torch>=1.10.0,<2.0
     numpy>=1.20
     lifelines>=0.27
     opacus>=1.3
@@ -59,7 +59,6 @@ install_requires =
     tsai; python_version>"3.7"
     importlib-metadata; python_version<"3.8"
 
-
 [options.packages.find]
 where = src
 exclude =
 
@@ -16,7 +16,7 @@
 # synthcity absolute
 from synthcity.plugins.core.constraints import Constraints
 from synthcity.plugins.core.dataset import FlexibleDataset, TensorDataset
-from synthcity.plugins.core.models.data_encoder import DatetimeEncoder
+from synthcity.plugins.core.models.feature_encoder import DatetimeEncoder
 from synthcity.utils.compression import compress_dataset, decompress_dataset
 from synthcity.utils.serialization import dataframe_hash
 
 
@@ -157,7 +157,7 @@ def sample(self, count: int = 1) -> Any:
         if msamples is not None:
             return msamples
 
-        return np.random.choice(self.choices, count).tolist()
+        return np.random.choice(self.choices, count)
 
     def has(self, val: Any) -> bool:
         return val in self.choices
@@ -209,8 +209,8 @@ class FloatDistribution(Distribution):
         :parts: 1
     """
 
-    low: float = np.iinfo(np.int64).min
-    high: float = np.iinfo(np.int64).max
+    low: float = np.finfo(np.float64).min
+    high: float = np.finfo(np.float64).max
 
     @validator("low", always=True)
     def _validate_low_thresh(cls: Any, v: float, values: Dict) -> float:
@@ -260,7 +260,7 @@ def max(self) -> Any:
         return self.high
 
     def __eq__(self, other: Any) -> bool:
-        if not isinstance(other, FloatDistribution):
+        if not isinstance(other, type(self)):
             return False
 
         return (
@@ -273,6 +273,21 @@ def dtype(self) -> str:
         return "float"
 
 
+class LogDistribution(FloatDistribution):
+    low: float = np.finfo(np.float64).tiny
+    high: float = np.finfo(np.float64).max
+    base: float = 2.0
+
+    def sample(self, count: int = 1) -> Any:
+        np.random.seed(self.random_state)
+        msamples = self.sample_marginal(count)
+        if msamples is not None:
+            return msamples
+        lo = np.log2(self.low) / np.log2(self.base)
+        hi = np.log2(self.high) / np.log2(self.base)
+        return self.base ** np.random.uniform(lo, hi, count)
+
+
 class IntegerDistribution(Distribution):
     """
     .. inheritance-diagram:: synthcity.plugins.core.distribution.IntegerDistribution
@@ -307,8 +322,9 @@ def sample(self, count: int = 1) -> Any:
         if msamples is not None:
             return msamples
 
-        choices = [val for val in range(self.low, self.high + 1, self.step)]
-        return np.random.choice(choices, count).tolist()
+        high = (self.high + 1 - self.low) // self.step
+        s = np.random.choice(high, count)
+        return s * self.step + self.low
 
     def has(self, val: Any) -> bool:
         return self.low <= val and val <= self.high
@@ -345,7 +361,20 @@ def dtype(self) -> str:
         return "int"
 
 
-OFFSET = 120
+class LogIntDistribution(FloatDistribution):
+    low: float = 1.0
+    high: float = float(np.iinfo(np.int64).max)
+    base: float = 2.0
+
+    def sample(self, count: int = 1) -> Any:
+        np.random.seed(self.random_state)
+        msamples = self.sample_marginal(count)
+        if msamples is not None:
+            return msamples
+        lo = np.log2(self.low) / np.log2(self.base)
+        hi = np.log2(self.high) / np.log2(self.base)
+        s = self.base ** np.random.uniform(lo, hi, count)
+        return s.astype(int)
 
 
 class DatetimeDistribution(Distribution):
@@ -354,24 +383,29 @@ class DatetimeDistribution(Distribution):
         :parts: 1
     """
 
+    offset: int = 120
     low: datetime = datetime.utcfromtimestamp(0)
     high: datetime = datetime.now()
 
+    @validator("offset", always=True)
+    def _validate_offset(cls: Any, v: int) -> int:
+        if v < 0:
+            raise ValueError("offset must be greater than 0")
+        return v
+
     @validator("low", always=True)
     def _validate_low_thresh(cls: Any, v: datetime, values: Dict) -> datetime:
         mkey = "marginal_distribution"
         if mkey in values and values[mkey] is not None:
             v = values[mkey].index.min()
-
-        return v - timedelta(seconds=OFFSET)
+        return v - timedelta(seconds=values["offset"])
 
     @validator("high", always=True)
     def _validate_high_thresh(cls: Any, v: datetime, values: Dict) -> datetime:
         mkey = "marginal_distribution"
         if mkey in values and values[mkey] is not None:
             v = values[mkey].index.max()
-
-        return v + timedelta(seconds=OFFSET)
+        return v + timedelta(seconds=values["offset"])
 
     def get(self) -> List[Any]:
         return [self.name, self.low, self.high]
@@ -382,23 +416,16 @@ def sample(self, count: int = 1) -> Any:
         if msamples is not None:
             return msamples
 
-        samples = np.random.uniform(
-            datetime.timestamp(self.low), datetime.timestamp(self.high), count
-        )
-
-        samples_dt = []
-        for s in samples:
-            samples_dt.append(datetime.fromtimestamp(s))
-
-        return samples_dt
+        delta = self.high - self.low
+        return self.low + delta * np.random.rand(count)
 
     def has(self, val: datetime) -> bool:
         return self.low <= val and val <= self.high
 
     def includes(self, other: "Distribution") -> bool:
         return self.min() - timedelta(
-            seconds=OFFSET
-        ) <= other.min() and other.max() <= self.max() + timedelta(seconds=OFFSET)
+            seconds=self.offset
+        ) <= other.min() and other.max() <= self.max() + timedelta(seconds=self.offset)
 
     def as_constraint(self) -> Constraints:
         return Constraints(
 
@@ -69,8 +69,8 @@ class ConvNet(nn.Module):
     @validate_arguments(config=dict(arbitrary_types_allowed=True))
     def __init__(
         self,
-        task_type: str,
-        model: nn.Module,  # classification/regression
+        task_type: str,  # classification/regression
+        model: nn.Module,
         lr: float = 1e-3,
         weight_decay: float = 1e-3,
         opt_betas: tuple = (0.9, 0.999),