pytorch
diff --git a/‎.ci/docker/requirements-dev.txt‎
Lines changed: 1 addition & 0 deletions b/‎.ci/docker/requirements-dev.txt‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎.ci/docker/requirements-flux.txt‎
Lines changed: 0 additions & 2 deletions b/‎.ci/docker/requirements-flux.txt‎
Lines changed: 0 additions & 2 deletions
diff --git a/‎.ci/docker/requirements.txt‎
Lines changed: 2 additions & 0 deletions b/‎.ci/docker/requirements.txt‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎.github/workflows/lint.yaml‎
Lines changed: 2 additions & 1 deletion b/‎.github/workflows/lint.yaml‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎.pre-commit-config.yaml‎
Lines changed: 8 additions & 0 deletions b/‎.pre-commit-config.yaml‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎CONTRIBUTING.md‎
Lines changed: 1 addition & 1 deletion b/‎CONTRIBUTING.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎pyproject.toml‎
Lines changed: 6 additions & 0 deletions b/‎pyproject.toml‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎scripts/checkpoint_conversion/convert_from_hf.py‎
Lines changed: 2 additions & 2 deletions b/‎scripts/checkpoint_conversion/convert_from_hf.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎scripts/checkpoint_conversion/convert_to_hf.py‎
Lines changed: 2 additions & 0 deletions b/‎scripts/checkpoint_conversion/convert_to_hf.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎scripts/checkpoint_conversion/numerical_tests_example.py‎
Lines changed: 8 additions & 2 deletions b/‎scripts/checkpoint_conversion/numerical_tests_example.py‎
Lines changed: 8 additions & 2 deletions
@@ -2,5 +2,6 @@ expecttest==0.1.6
 pytest==7.3.2
 pytest-cov
 pre-commit
+pyrefly==0.45.1
 tomli-w >= 1.1.0
 transformers
@@ -1,4 +1,2 @@
 transformers>=4.51.1
-einops
 sentencepiece
-pillow
@@ -9,3 +9,5 @@ tyro
 tokenizers >= 0.15.0
 safetensors
 psutil
+einops
+pillow
@@ -28,7 +28,8 @@ jobs:
         run: python -m pip install --upgrade pip
       - name: Install lint utilities
         run: |
-          python -m pip install pre-commit
+          python -m pip install -r requirements.txt -r requirements-dev.txt
+          python -m pip install --force-reinstall --pre --index-url https://download.pytorch.org/whl/nightly/cu126 torch
           pre-commit install-hooks
       - name: Get changed files
         id: changed-files
 
@@ -61,3 +61,11 @@ repos:
       types: [text]
       additional_dependencies:
         - tomli
+
+- repo: https://github.com/facebook/pyrefly-pre-commit
+  rev: 0.45.1
+  hooks:
+    - id: pyrefly-check
+      name: Pyrefly (type checking)
+      pass_filenames: false
+      language: system
@@ -4,7 +4,7 @@ possible. Contributions should follow the [Contributing Guidelines](#contributin
 
 ### Setup
 ```
-pip install -r requirements-dev.txt
+pip install -r requirements.txt -r requirements-dev.txt
 ```
 
 ### Pull Requests
 
@@ -25,6 +25,8 @@ dependencies = [
     "tyro",
     "tensorboard",
     "psutil",
+    "einops",
+    "pillow",
 ]
 dynamic = ["version"]
 
@@ -62,3 +64,7 @@ include = ["torchtitan*"]
 [tool.pytest.ini_options]
 addopts = ["--showlocals"]  # show local variables in tracebacks
 testpaths = ["tests"]
+
+[tool.pyrefly]
+project-excludes = ["torchtitan/experiments", "**/tests/**"]
+ignore-missing-imports = ["torchao.*", "torchft"]  # optional dependencies
@@ -16,16 +16,16 @@
 
 @torch.inference_mode()
 def convert_from_hf(input_dir, output_dir, model_name, model_flavor):
-    if model_name == "flux":
-        import torchtitan.experiments.flux  # noqa: F401
     # initialize model to allocate memory for state dict
     train_spec = train_spec_module.get_train_spec(model_name)
     model_args = train_spec.model_args[model_flavor]
 
     with torch.device("cpu"):
         model = train_spec.model_cls(model_args)
+    # pyrefly: ignore [bad-argument-type]
     model = ModelWrapper(model)
 
+    # pyrefly: ignore [not-callable]
     sd_adapter = train_spec.state_dict_adapter(model_args, None)
     assert (
         sd_adapter is not None
 
@@ -30,8 +30,10 @@ def convert_to_hf(
 
     with torch.device("cpu"):
         model = train_spec.model_cls(model_args)
+    # pyrefly: ignore [bad-argument-type]
     model = ModelWrapper(model)
 
+    # pyrefly: ignore [not-callable]
     sd_adapter = train_spec.state_dict_adapter(model_args, hf_assets_path)
     assert (
         sd_adapter is not None
 
@@ -25,7 +25,7 @@ def loss_fn(logits1, logits2):
     probs2 = F.softmax(logits2, dim=-1)
 
     # Calculate KL Divergence
-    kl_loss = F.kl_div(probs1, probs2, "mean")
+    kl_loss = F.kl_div(probs1, probs2, reduction="mean")
     return kl_loss
 
 
@@ -75,10 +75,13 @@ def forward_tt(config_path, checkpoint_path, test_set):
 
     # materalize model
     device = torch.device(device_type)
+    # pyrefly: ignore [missing-attribute]
     model.to_empty(device=device)
     model.init_weights(buffer_device=device)
+    # pyrefly: ignore [missing-attribute]
     model.eval()
 
+    # pyrefly: ignore [bad-argument-type]
     modelWrapper = ModelWrapper(model)
     state_dict = modelWrapper._get_state_dict()
 
@@ -94,6 +97,7 @@ def forward_tt(config_path, checkpoint_path, test_set):
             input_ids = input_ids.unsqueeze(0)
 
         # obtains the logits of only the last token in the predictions
+        # pyrefly: ignore [not-callable]
         predictions = model(input_ids)[:, -1, :].unsqueeze(1)
         output_list.append(predictions)
 
@@ -120,6 +124,7 @@ def forward_tt(config_path, checkpoint_path, test_set):
     config_manager = ConfigManager()
     config = config_manager.parse_args([f"--job.config_file={config_path}"])
     train_spec = get_train_spec(config.model.name)
+    # pyrefly: ignore [not-callable]
     tokenizer = train_spec.build_tokenizer_fn(config)
 
     # Build test set of randomly generated token ids
@@ -150,10 +155,11 @@ def forward_tt(config_path, checkpoint_path, test_set):
     avg_losses = {}
 
     for test_name, (baseline_outputs, conversion_outputs) in test_configs.items():
-        total_loss = 0
+        total_loss: int | torch.Tensor = 0
         for baseline, outputs in zip(baseline_outputs, conversion_outputs):
             total_loss += loss_fn(baseline, outputs)
         avg_loss = total_loss / len(test_set)
+        # pyrefly: ignore [missing-attribute]
         avg_losses[test_name] = avg_loss.item()
 
     for test_name, avg_loss in avg_losses.items():
-Original file line number
+Diff line change
@@ @@ -1,4 +1,2 @@ @@
 transformers>=4.51.1
 -einops
 sentencepiece
 -pillow