arxlang · yuvimittal · Jan 19, 2026 · Nov 20, 2025 · Dec 16, 2025 · Jan 19, 2026
diff --git a/src/irx/builders/llvmliteir.py b/src/irx/builders/llvmliteir.py
@@ -165,6 +165,7 @@ def __init__(self) -> None:
         self.named_values: dict[str, Any] = {}
         self.function_protos: dict[str, astx.FunctionPrototype] = {}
         self.result_stack: list[ir.Value | ir.Function] = []
+        self._fast_math_enabled = False
 
         self.initialize()
 
@@ -420,7 +421,36 @@ def _emit_fma(
             return builder.fma(lhs, rhs, addend, name="vfma")
 
         fma_fn = self._get_fma_function(lhs.type)
-        return builder.call(fma_fn, [lhs, rhs, addend], name="vfma")
+        inst = builder.call(fma_fn, [lhs, rhs, addend], name="vfma")
+        self._apply_fast_math(inst)
+        return inst
+
+    def set_fast_math(self, enabled: bool) -> None:
+        """Enable/disable fast-math flags for subsequent FP instructions."""
+        self._fast_math_enabled = enabled
+
+    def _apply_fast_math(self, inst: ir.Instruction) -> None:
+        """Attach fast-math flags when enabled and applicable."""
+        if not self._fast_math_enabled:
+            return
+        ty = inst.type
+        if isinstance(ty, ir.VectorType):
+            if not is_fp_type(ty.element):
+                return
+        elif not is_fp_type(ty):
+            return
+
+        flags = getattr(inst, "flags", None)
+        if flags is None:
+            return
+
+        if "fast" in flags:
+            return
+
+        try:
+            flags.append("fast")
+        except (AttributeError, TypeError):
+            return
 
     @dispatch.abstract
     def visit(self, node: astx.AST) -> None:
@@ -616,6 +646,7 @@ def visit(self, node: astx.BinaryOp) -> None:
                         result = self._llvm.ir_builder.fadd(
                             llvm_lhs, llvm_rhs, name="vfaddtmp"
                         )
+                        self._apply_fast_math(result)
                     else:
                         result = self._llvm.ir_builder.add(
                             llvm_lhs, llvm_rhs, name="vaddtmp"
@@ -625,6 +656,7 @@ def visit(self, node: astx.BinaryOp) -> None:
                         result = self._llvm.ir_builder.fsub(
                             llvm_lhs, llvm_rhs, name="vfsubtmp"
                         )
+                        self._apply_fast_math(result)
                     else:
                         result = self._llvm.ir_builder.sub(
                             llvm_lhs, llvm_rhs, name="vsubtmp"
@@ -634,6 +666,7 @@ def visit(self, node: astx.BinaryOp) -> None:
                         result = self._llvm.ir_builder.fmul(
                             llvm_lhs, llvm_rhs, name="vfmultmp"
                         )
+                        self._apply_fast_math(result)
                     else:
                         result = self._llvm.ir_builder.mul(
                             llvm_lhs, llvm_rhs, name="vmultmp"
@@ -643,6 +676,7 @@ def visit(self, node: astx.BinaryOp) -> None:
                         result = self._llvm.ir_builder.fdiv(
                             llvm_lhs, llvm_rhs, name="vfdivtmp"
                         )
+                        self._apply_fast_math(result)
                     else:
                         unsigned = getattr(node, "unsigned", None)
                         if unsigned is None:
@@ -690,6 +724,7 @@ def visit(self, node: astx.BinaryOp) -> None:
                 result = self._llvm.ir_builder.fadd(
                     llvm_lhs, llvm_rhs, "addtmp"
                 )
+                self._apply_fast_math(result)
             else:
                 # there's more conditions to be handled
                 result = self._llvm.ir_builder.add(
@@ -703,6 +738,7 @@ def visit(self, node: astx.BinaryOp) -> None:
                 result = self._llvm.ir_builder.fsub(
                     llvm_lhs, llvm_rhs, "subtmp"
                 )
+                self._apply_fast_math(result)
             else:
                 # note: be careful you should handle this as  INT32
                 result = self._llvm.ir_builder.sub(
@@ -717,6 +753,7 @@ def visit(self, node: astx.BinaryOp) -> None:
                 result = self._llvm.ir_builder.fmul(
                     llvm_lhs, llvm_rhs, "multmp"
                 )
+                self._apply_fast_math(result)
             else:
                 # note: be careful you should handle this as INT32
                 result = self._llvm.ir_builder.mul(
@@ -782,6 +819,7 @@ def visit(self, node: astx.BinaryOp) -> None:
                 result = self._llvm.ir_builder.fdiv(
                     llvm_lhs, llvm_rhs, "divtmp"
                 )
+                self._apply_fast_math(result)
             else:
                 # Assuming the division is signed by default. Use `udiv` for
                 # unsigned division.

diff --git a/tests/test_llvmlite_helpers.py b/tests/test_llvmlite_helpers.py
@@ -95,3 +95,23 @@ def test_emit_int_div_signed_and_unsigned() -> None:
 
     assert getattr(signed, "opname", "") == "sdiv"
     assert getattr(unsigned, "opname", "") == "udiv"
+
+
+def test_set_fast_math_marks_float_ops() -> None:
+    """set_fast_math should add fast flag to floating instructions."""
+    visitor = LLVMLiteIRVisitor()
+    _prime_builder(visitor)
+
+    float_ty = visitor._llvm.FLOAT_TYPE
+    lhs = ir.Constant(float_ty, 1.0)
+    rhs = ir.Constant(float_ty, 2.0)
+
+    visitor.set_fast_math(True)
+    inst_fast = visitor._llvm.ir_builder.fadd(lhs, rhs)
+    visitor._apply_fast_math(inst_fast)
+    assert "fast" in inst_fast.flags
+
+    visitor.set_fast_math(False)
+    inst_normal = visitor._llvm.ir_builder.fadd(lhs, rhs)
+    visitor._apply_fast_math(inst_normal)
+    assert "fast" not in inst_normal.flags