meta-pytorch
diff --git a/‎.github/workflows/unittest_ci.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/unittest_ci.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/unittest_ci_cpu.yml‎
Lines changed: 2 additions & 2 deletions b/‎.github/workflows/unittest_ci_cpu.yml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎torchrec/distributed/benchmark/yaml/sparse_data_dist_base_vbe.yml‎
Lines changed: 40 additions & 0 deletions b/‎torchrec/distributed/benchmark/yaml/sparse_data_dist_base_vbe.yml‎
Lines changed: 40 additions & 0 deletions
diff --git a/‎torchrec/distributed/test_utils/input_config.py‎
Lines changed: 26 additions & 2 deletions b/‎torchrec/distributed/test_utils/input_config.py‎
Lines changed: 26 additions & 2 deletions
@@ -32,7 +32,7 @@ on:
           - test
 
 jobs:
-  build_test:
+  unittest_ci_gpu:
     strategy:
       fail-fast: false
       matrix:
 
@@ -32,7 +32,7 @@ on:
           - test
 
 jobs:
-  build_test:
+  unittest_ci_cpu:
     strategy:
       fail-fast: false
       matrix:
@@ -65,7 +65,7 @@ jobs:
       contents: read
     with:
       runner: ${{ matrix.os }}
-      timeout: 15
+      timeout: 20
       script: |
         ldd --version
         conda create -y --name build_binary python=${{ matrix.python.version }}
 
@@ -0,0 +1,40 @@
+# this is a very basic sparse data dist config
+# runs on 2 ranks, showing traces with reasonable workloads
+RunOptions:
+  world_size: 2
+  batch_size: 16384
+  num_batches: 10
+  num_benchmarks: 1
+  num_profiles: 1
+  sharding_type: table_wise
+  profile_dir: "."
+  name: "sparse_data_dist_base"
+  # export_stacks: True # enable this to export stack traces
+PipelineConfig:
+  pipeline: "sparse"
+ModelInputConfig:
+  feature_pooling_avg: 30
+  use_variable_batch: True
+EmbeddingTablesConfig:
+  num_unweighted_features: 90
+  num_weighted_features: 80
+  embedding_feature_dim: 256
+  additional_tables:
+    - - name: FP16_table
+        embedding_dim: 512
+        num_embeddings: 100_000
+        feature_names: ["additional_0_0"]
+        data_type: FP16
+      - name: large_table
+        embedding_dim: 2048
+        num_embeddings: 1_000_000
+        feature_names: ["additional_0_1"]
+    - []
+    - - name: skipped_table
+        embedding_dim: 128
+        num_embeddings: 100_000
+        feature_names: ["additional_2_1"]
+PlannerConfig:
+  additional_constraints:
+    large_table:
+      sharding_types: [column_wise]
@@ -13,7 +13,7 @@
 import torch
 from torchrec.modules.embedding_configs import EmbeddingBagConfig
 
-from .model_input import ModelInput
+from .model_input import ModelInput, VariableBatchModelInput
 
 
 @dataclass
@@ -30,6 +30,7 @@ class ModelInputConfig:
     long_kjt_offsets: bool = True
     long_kjt_lengths: bool = True
     pin_memory: bool = True
+    use_variable_batch: bool = False
 
     def generate_batches(
         self,
@@ -47,6 +48,29 @@ def generate_batches(
         """
         device = torch.device(self.device) if self.device is not None else None
 
+        if self.use_variable_batch:
+            return [
+                VariableBatchModelInput.generate(
+                    batch_size=self.batch_size,
+                    num_float_features=self.num_float_features,
+                    tables=tables,
+                    weighted_tables=weighted_tables,
+                    use_offsets=self.use_offsets,
+                    indices_dtype=(
+                        torch.int64 if self.long_kjt_indices else torch.int32
+                    ),
+                    offsets_dtype=(
+                        torch.int64 if self.long_kjt_offsets else torch.int32
+                    ),
+                    lengths_dtype=(
+                        torch.int64 if self.long_kjt_lengths else torch.int32
+                    ),
+                    device=device,
+                    pin_memory=self.pin_memory,
+                )
+                for _ in range(self.num_batches)
+            ]
+
         return [
             ModelInput.generate(
                 batch_size=self.batch_size,
@@ -61,5 +85,5 @@ def generate_batches(
                 lengths_dtype=(torch.int64 if self.long_kjt_lengths else torch.int32),
                 pin_memory=self.pin_memory,
             )
-            for batch_size in range(self.num_batches)
+            for _ in range(self.num_batches)
         ]