From 7880ec72098475360a2f45c3e0f2ea31f5de2ae3 Mon Sep 17 00:00:00 2001
From: omsherikar <omsherikar0229@gmail.com>
Date: Thu, 20 Nov 2025 19:34:27 +0530
Subject: [PATCH 1/3] fix: make fast-math toggles work

---
 src/irx/builders/llvmliteir.py | 30 +++++++++++++++++++++++++++++-
 tests/test_llvmlite_helpers.py | 20 ++++++++++++++++++++
 2 files changed, 49 insertions(+), 1 deletion(-)

diff --git a/src/irx/builders/llvmliteir.py b/src/irx/builders/llvmliteir.py
index e978b70..b248b1f 100644
--- a/src/irx/builders/llvmliteir.py
+++ b/src/irx/builders/llvmliteir.py
@@ -165,6 +165,7 @@ def __init__(self) -> None:
         self.named_values: dict[str, Any] = {}
         self.function_protos: dict[str, astx.FunctionPrototype] = {}
         self.result_stack: list[ir.Value | ir.Function] = []
+        self._fast_math_enabled = False
 
         self.initialize()
 
@@ -420,7 +421,26 @@ def _emit_fma(
             return builder.fma(lhs, rhs, addend, name="vfma")
 
         fma_fn = self._get_fma_function(lhs.type)
-        return builder.call(fma_fn, [lhs, rhs, addend], name="vfma")
+        inst = builder.call(fma_fn, [lhs, rhs, addend], name="vfma")
+        self._apply_fast_math(inst)
+        return inst
+
+    def set_fast_math(self, enabled: bool) -> None:
+        """Enable/disable fast-math flags for subsequent FP instructions."""
+        self._fast_math_enabled = enabled
+
+    def _apply_fast_math(self, inst: ir.Instruction) -> None:
+        """Attach fast-math flags when enabled and applicable."""
+        if not self._fast_math_enabled:
+            return
+        ty = inst.type
+        if isinstance(ty, ir.VectorType):
+            if not is_fp_type(ty.element):
+                return
+        elif not is_fp_type(ty):
+            return
+        if "fast" not in inst.flags:
+            inst.flags.append("fast")
 
     @dispatch.abstract
     def visit(self, node: astx.AST) -> None:
@@ -616,6 +636,7 @@ def visit(self, node: astx.BinaryOp) -> None:
                         result = self._llvm.ir_builder.fadd(
                             llvm_lhs, llvm_rhs, name="vfaddtmp"
                         )
+                        self._apply_fast_math(result)
                     else:
                         result = self._llvm.ir_builder.add(
                             llvm_lhs, llvm_rhs, name="vaddtmp"
@@ -625,6 +646,7 @@ def visit(self, node: astx.BinaryOp) -> None:
                         result = self._llvm.ir_builder.fsub(
                             llvm_lhs, llvm_rhs, name="vfsubtmp"
                         )
+                        self._apply_fast_math(result)
                     else:
                         result = self._llvm.ir_builder.sub(
                             llvm_lhs, llvm_rhs, name="vsubtmp"
@@ -634,6 +656,7 @@ def visit(self, node: astx.BinaryOp) -> None:
                         result = self._llvm.ir_builder.fmul(
                             llvm_lhs, llvm_rhs, name="vfmultmp"
                         )
+                        self._apply_fast_math(result)
                     else:
                         result = self._llvm.ir_builder.mul(
                             llvm_lhs, llvm_rhs, name="vmultmp"
@@ -643,6 +666,7 @@ def visit(self, node: astx.BinaryOp) -> None:
                         result = self._llvm.ir_builder.fdiv(
                             llvm_lhs, llvm_rhs, name="vfdivtmp"
                         )
+                        self._apply_fast_math(result)
                     else:
                         unsigned = getattr(node, "unsigned", None)
                         if unsigned is None:
@@ -690,6 +714,7 @@ def visit(self, node: astx.BinaryOp) -> None:
                 result = self._llvm.ir_builder.fadd(
                     llvm_lhs, llvm_rhs, "addtmp"
                 )
+                self._apply_fast_math(result)
             else:
                 # there's more conditions to be handled
                 result = self._llvm.ir_builder.add(
@@ -703,6 +728,7 @@ def visit(self, node: astx.BinaryOp) -> None:
                 result = self._llvm.ir_builder.fsub(
                     llvm_lhs, llvm_rhs, "subtmp"
                 )
+                self._apply_fast_math(result)
             else:
                 # note: be careful you should handle this as  INT32
                 result = self._llvm.ir_builder.sub(
@@ -717,6 +743,7 @@ def visit(self, node: astx.BinaryOp) -> None:
                 result = self._llvm.ir_builder.fmul(
                     llvm_lhs, llvm_rhs, "multmp"
                 )
+                self._apply_fast_math(result)
             else:
                 # note: be careful you should handle this as INT32
                 result = self._llvm.ir_builder.mul(
@@ -782,6 +809,7 @@ def visit(self, node: astx.BinaryOp) -> None:
                 result = self._llvm.ir_builder.fdiv(
                     llvm_lhs, llvm_rhs, "divtmp"
                 )
+                self._apply_fast_math(result)
             else:
                 # Assuming the division is signed by default. Use `udiv` for
                 # unsigned division.
diff --git a/tests/test_llvmlite_helpers.py b/tests/test_llvmlite_helpers.py
index bb48cc7..ae63dfd 100644
--- a/tests/test_llvmlite_helpers.py
+++ b/tests/test_llvmlite_helpers.py
@@ -95,3 +95,23 @@ def test_emit_int_div_signed_and_unsigned() -> None:
 
     assert getattr(signed, "opname", "") == "sdiv"
     assert getattr(unsigned, "opname", "") == "udiv"
+
+
+def test_set_fast_math_marks_float_ops() -> None:
+    """set_fast_math should add fast flag to floating instructions."""
+    visitor = LLVMLiteIRVisitor()
+    _prime_builder(visitor)
+
+    float_ty = visitor._llvm.FLOAT_TYPE
+    lhs = ir.Constant(float_ty, 1.0)
+    rhs = ir.Constant(float_ty, 2.0)
+
+    visitor.set_fast_math(True)
+    inst_fast = visitor._llvm.ir_builder.fadd(lhs, rhs)
+    visitor._apply_fast_math(inst_fast)
+    assert "fast" in inst_fast.flags
+
+    visitor.set_fast_math(False)
+    inst_normal = visitor._llvm.ir_builder.fadd(lhs, rhs)
+    visitor._apply_fast_math(inst_normal)
+    assert "fast" not in inst_normal.flags

From 843fe254d5098db0fd0ce3b35d9d938818148184 Mon Sep 17 00:00:00 2001
From: omsherikar <omsherikar0229@gmail.com>
Date: Tue, 16 Dec 2025 21:17:51 +0530
Subject: [PATCH 2/3] Added a getattr guard around inst.flags and wrapped the
 append in a small try/except

---
 src/irx/builders/llvmliteir.py | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/src/irx/builders/llvmliteir.py b/src/irx/builders/llvmliteir.py
index b248b1f..e2fe53b 100644
--- a/src/irx/builders/llvmliteir.py
+++ b/src/irx/builders/llvmliteir.py
@@ -439,8 +439,18 @@ def _apply_fast_math(self, inst: ir.Instruction) -> None:
                 return
         elif not is_fp_type(ty):
             return
-        if "fast" not in inst.flags:
-            inst.flags.append("fast")
+
+        flags = getattr(inst, "flags", None)
+        if flags is None:
+            return
+
+        if "fast" in flags:
+            return
+
+        try:
+            flags.append("fast")
+        except (AttributeError, TypeError):
+            return
 
     @dispatch.abstract
     def visit(self, node: astx.AST) -> None:

From c79767b8e173c813879be9bf2fff655b46c62e0c Mon Sep 17 00:00:00 2001
From: omsherikar <omsherikar0229@gmail.com>
Date: Mon, 19 Jan 2026 13:40:44 +0530
Subject: [PATCH 3/3] Trigger CI build