NVIDIA · ksivaman · Feb 26, 2024
diff --git a/qa/L0_pytorch_unittest/test.sh b/qa/L0_pytorch_unittest/test.sh
@@ -9,7 +9,7 @@ set -e
 pip install pytest==6.2.5 onnxruntime==1.13.1
 pytest -v -s $TE_PATH/tests/pytorch/test_sanity.py
 pytest -v -s $TE_PATH/tests/pytorch/test_deferred_init.py
-PYTORCH_JIT=0 NVTE_ALLOW_NONDETERMINISTIC_ALGO=0 pytest -v -s $TE_PATH/tests/pytorch/test_numerics.py
+NVTE_TORCH_COMPILE=0 PYTORCH_JIT=0 NVTE_ALLOW_NONDETERMINISTIC_ALGO=0 pytest -v -s $TE_PATH/tests/pytorch/test_numerics.py
 pytest -v -s $TE_PATH/tests/pytorch/test_jit.py
 pytest -v -s $TE_PATH/tests/pytorch/fused_attn/test_fused_attn.py
 pytest -v -s $TE_PATH/tests/pytorch/test_fused_rope.py

diff --git a/tests/pytorch/test_numerics.py b/tests/pytorch/test_numerics.py
@@ -558,19 +558,11 @@ def test_gpt_full_activation_recompute(dtype, bs, model, fp8, fp8_model_params,
 
     config = model_configs[model]
 
-    if not use_reentrant:
-        # Non-reentrant checkpoint becomes non-deterministic with bias+GELU fusion
-        os.environ["NVTE_BIAS_GELU_NVFUSION"] = "0"
-
     outputs, names = _test_e2e_full_recompute(bs, dtype, config, fp8, fp8_model_params,
                                               recompute=False, use_reentrant=use_reentrant)
     outputs_recompute, _ = _test_e2e_full_recompute(bs, dtype, config, fp8, fp8_model_params,
                                                     recompute=True, use_reentrant=use_reentrant)
 
-    if not use_reentrant:
-        # Reset bias+GELU fusion flag to avoid contaminating other tests
-        del os.environ["NVTE_BIAS_GELU_NVFUSION"]
-
     assert_all_equal(outputs, outputs_recompute, names=names)
 
 

diff --git a/transformer_engine/pytorch/module/layernorm_mlp.py b/transformer_engine/pytorch/module/layernorm_mlp.py
@@ -1416,11 +1416,6 @@ def forward(
                         is_first_microbatch
                 )
 
-            # Disable bias_gelu_nvfusion for determinism checkpointing in non-reentrant mode
-            if (self.bias_gelu_nvfusion
-                and not use_reentrant_activation_recompute()):
-                self.bias_gelu_nvfusion = False
-
             from ..cpu_offload import CPUOffloadEnabled
 
             if torch.is_grad_enabled():