diff --git a/.noir-sync-commit b/.noir-sync-commit
index 9bbde85e56b..c14b7ffa9ff 100644
--- a/.noir-sync-commit
+++ b/.noir-sync-commit
@@ -1 +1 @@
-68c32b4ffd9b069fe4b119327dbf4018c17ab9d4
+dfc9ff7266d2b6694cae3da88418013664440daa
diff --git a/noir/noir-repo/compiler/noirc_evaluator/src/acir/mod.rs b/noir/noir-repo/compiler/noirc_evaluator/src/acir/mod.rs
index 46d0924b322..7274fe908d1 100644
--- a/noir/noir-repo/compiler/noirc_evaluator/src/acir/mod.rs
+++ b/noir/noir-repo/compiler/noirc_evaluator/src/acir/mod.rs
@@ -2762,6 +2762,13 @@ impl<'a> Context<'a> {
             Intrinsic::FieldLessThan => {
                 unreachable!("FieldLessThan can only be called in unconstrained")
             }
+            Intrinsic::ArrayRefCount | Intrinsic::SliceRefCount => {
+                let zero = self.acir_context.add_constant(FieldElement::zero());
+                Ok(vec![AcirValue::Var(
+                    zero,
+                    AcirType::NumericType(NumericType::Unsigned { bit_size: 32 }),
+                )])
+            }
         }
     }
 
diff --git a/noir/noir-repo/compiler/noirc_evaluator/src/brillig/brillig_gen/brillig_block.rs b/noir/noir-repo/compiler/noirc_evaluator/src/brillig/brillig_gen/brillig_block.rs
index 36e1ee90e11..1fa4985295a 100644
--- a/noir/noir-repo/compiler/noirc_evaluator/src/brillig/brillig_gen/brillig_block.rs
+++ b/noir/noir-repo/compiler/noirc_evaluator/src/brillig/brillig_gen/brillig_block.rs
@@ -402,210 +402,251 @@ impl<'block> BrilligBlock<'block> {
                     let result_ids = dfg.instruction_results(instruction_id);
                     self.convert_ssa_function_call(*func_id, arguments, dfg, result_ids);
                 }
-                Value::Intrinsic(Intrinsic::BlackBox(bb_func)) => {
-                    // Slices are represented as a tuple of (length, slice contents).
-                    // We must check the inputs to determine if there are slices
-                    // and make sure that we pass the correct inputs to the black box function call.
-                    // The loop below only keeps the slice contents, so that
-                    // setting up a black box function with slice inputs matches the expected
-                    // number of arguments specified in the function signature.
-                    let mut arguments_no_slice_len = Vec::new();
-                    for (i, arg) in arguments.iter().enumerate() {
-                        if matches!(dfg.type_of_value(*arg), Type::Numeric(_)) {
-                            if i < arguments.len() - 1 {
-                                if !matches!(dfg.type_of_value(arguments[i + 1]), Type::Slice(_)) {
-                                    arguments_no_slice_len.push(*arg);
-                                }
+                Value::Intrinsic(intrinsic) => {
+                    // This match could be combined with the above but without it rust analyzer
+                    // can't automatically insert any missing cases
+                    match intrinsic {
+                        Intrinsic::ArrayLen => {
+                            let result_variable = self.variables.define_single_addr_variable(
+                                self.function_context,
+                                self.brillig_context,
+                                dfg.instruction_results(instruction_id)[0],
+                                dfg,
+                            );
+                            let param_id = arguments[0];
+                            // Slices are represented as a tuple in the form: (length, slice contents).
+                            // Thus, we can expect the first argument to a field in the case of a slice
+                            // or an array in the case of an array.
+                            if let Type::Numeric(_) = dfg.type_of_value(param_id) {
+                                let len_variable = self.convert_ssa_value(arguments[0], dfg);
+                                let length = len_variable.extract_single_addr();
+                                self.brillig_context
+                                    .mov_instruction(result_variable.address, length.address);
                             } else {
-                                arguments_no_slice_len.push(*arg);
+                                self.convert_ssa_array_len(
+                                    arguments[0],
+                                    result_variable.address,
+                                    dfg,
+                                );
                             }
-                        } else {
-                            arguments_no_slice_len.push(*arg);
                         }
-                    }
-
-                    let function_arguments =
-                        vecmap(&arguments_no_slice_len, |arg| self.convert_ssa_value(*arg, dfg));
-                    let function_results = dfg.instruction_results(instruction_id);
-                    let function_results = vecmap(function_results, |result| {
-                        self.allocate_external_call_result(*result, dfg)
-                    });
-                    convert_black_box_call(
-                        self.brillig_context,
-                        bb_func,
-                        &function_arguments,
-                        &function_results,
-                    );
-                }
-                Value::Intrinsic(Intrinsic::ArrayLen) => {
-                    let result_variable = self.variables.define_single_addr_variable(
-                        self.function_context,
-                        self.brillig_context,
-                        dfg.instruction_results(instruction_id)[0],
-                        dfg,
-                    );
-                    let param_id = arguments[0];
-                    // Slices are represented as a tuple in the form: (length, slice contents).
-                    // Thus, we can expect the first argument to a field in the case of a slice
-                    // or an array in the case of an array.
-                    if let Type::Numeric(_) = dfg.type_of_value(param_id) {
-                        let len_variable = self.convert_ssa_value(arguments[0], dfg);
-                        let length = len_variable.extract_single_addr();
-                        self.brillig_context
-                            .mov_instruction(result_variable.address, length.address);
-                    } else {
-                        self.convert_ssa_array_len(arguments[0], result_variable.address, dfg);
-                    }
-                }
-                Value::Intrinsic(Intrinsic::AsSlice) => {
-                    let source_variable = self.convert_ssa_value(arguments[0], dfg);
-                    let result_ids = dfg.instruction_results(instruction_id);
-                    let destination_len_variable = self.variables.define_single_addr_variable(
-                        self.function_context,
-                        self.brillig_context,
-                        result_ids[0],
-                        dfg,
-                    );
-                    let destination_variable = self.variables.define_variable(
-                        self.function_context,
-                        self.brillig_context,
-                        result_ids[1],
-                        dfg,
-                    );
-                    let destination_vector = destination_variable.extract_vector();
-                    let source_array = source_variable.extract_array();
-                    let element_size = dfg.type_of_value(arguments[0]).element_size();
-
-                    let source_size_register = self
-                        .brillig_context
-                        .make_usize_constant_instruction(source_array.size.into());
-
-                    // we need to explicitly set the destination_len_variable
-                    self.brillig_context.codegen_usize_op(
-                        source_size_register.address,
-                        destination_len_variable.address,
-                        BrilligBinaryOp::UnsignedDiv,
-                        element_size,
-                    );
-
-                    self.brillig_context.codegen_initialize_vector(
-                        destination_vector,
-                        source_size_register,
-                        None,
-                    );
-
-                    // Items
-                    let vector_items_pointer =
-                        self.brillig_context.codegen_make_vector_items_pointer(destination_vector);
-                    let array_items_pointer =
-                        self.brillig_context.codegen_make_array_items_pointer(source_array);
-
-                    self.brillig_context.codegen_mem_copy(
-                        array_items_pointer,
-                        vector_items_pointer,
-                        source_size_register,
-                    );
-
-                    self.brillig_context.deallocate_single_addr(source_size_register);
-                    self.brillig_context.deallocate_register(vector_items_pointer);
-                    self.brillig_context.deallocate_register(array_items_pointer);
-                }
-                Value::Intrinsic(
-                    Intrinsic::SlicePushBack
-                    | Intrinsic::SlicePopBack
-                    | Intrinsic::SlicePushFront
-                    | Intrinsic::SlicePopFront
-                    | Intrinsic::SliceInsert
-                    | Intrinsic::SliceRemove,
-                ) => {
-                    self.convert_ssa_slice_intrinsic_call(
-                        dfg,
-                        &dfg[dfg.resolve(*func)],
-                        instruction_id,
-                        arguments,
-                    );
-                }
-                Value::Intrinsic(Intrinsic::ToRadix(endianness)) => {
-                    let results = dfg.instruction_results(instruction_id);
-
-                    let source = self.convert_ssa_single_addr_value(arguments[0], dfg);
-                    let radix = self.convert_ssa_single_addr_value(arguments[1], dfg);
-
-                    let target_array = self
-                        .variables
-                        .define_variable(
-                            self.function_context,
-                            self.brillig_context,
-                            results[0],
-                            dfg,
-                        )
-                        .extract_array();
-
-                    self.brillig_context.codegen_to_radix(
-                        source,
-                        target_array,
-                        radix,
-                        matches!(endianness, Endian::Little),
-                        false,
-                    );
-                }
-                Value::Intrinsic(Intrinsic::ToBits(endianness)) => {
-                    let results = dfg.instruction_results(instruction_id);
+                        Intrinsic::AsSlice => {
+                            let source_variable = self.convert_ssa_value(arguments[0], dfg);
+                            let result_ids = dfg.instruction_results(instruction_id);
+                            let destination_len_variable =
+                                self.variables.define_single_addr_variable(
+                                    self.function_context,
+                                    self.brillig_context,
+                                    result_ids[0],
+                                    dfg,
+                                );
+                            let destination_variable = self.variables.define_variable(
+                                self.function_context,
+                                self.brillig_context,
+                                result_ids[1],
+                                dfg,
+                            );
+                            let destination_vector = destination_variable.extract_vector();
+                            let source_array = source_variable.extract_array();
+                            let element_size = dfg.type_of_value(arguments[0]).element_size();
 
-                    let source = self.convert_ssa_single_addr_value(arguments[0], dfg);
+                            let source_size_register = self
+                                .brillig_context
+                                .make_usize_constant_instruction(source_array.size.into());
+
+                            // we need to explicitly set the destination_len_variable
+                            self.brillig_context.codegen_usize_op(
+                                source_size_register.address,
+                                destination_len_variable.address,
+                                BrilligBinaryOp::UnsignedDiv,
+                                element_size,
+                            );
 
-                    let target_array = self
-                        .variables
-                        .define_variable(
-                            self.function_context,
-                            self.brillig_context,
-                            results[0],
-                            dfg,
-                        )
-                        .extract_array();
+                            self.brillig_context.codegen_initialize_vector(
+                                destination_vector,
+                                source_size_register,
+                                None,
+                            );
 
-                    let two = self.brillig_context.make_usize_constant_instruction(2_usize.into());
+                            // Items
+                            let vector_items_pointer = self
+                                .brillig_context
+                                .codegen_make_vector_items_pointer(destination_vector);
+                            let array_items_pointer =
+                                self.brillig_context.codegen_make_array_items_pointer(source_array);
+
+                            self.brillig_context.codegen_mem_copy(
+                                array_items_pointer,
+                                vector_items_pointer,
+                                source_size_register,
+                            );
 
-                    self.brillig_context.codegen_to_radix(
-                        source,
-                        target_array,
-                        two,
-                        matches!(endianness, Endian::Little),
-                        true,
-                    );
+                            self.brillig_context.deallocate_single_addr(source_size_register);
+                            self.brillig_context.deallocate_register(vector_items_pointer);
+                            self.brillig_context.deallocate_register(array_items_pointer);
+                        }
+                        Intrinsic::SlicePushBack
+                        | Intrinsic::SlicePopBack
+                        | Intrinsic::SlicePushFront
+                        | Intrinsic::SlicePopFront
+                        | Intrinsic::SliceInsert
+                        | Intrinsic::SliceRemove => {
+                            self.convert_ssa_slice_intrinsic_call(
+                                dfg,
+                                &dfg[dfg.resolve(*func)],
+                                instruction_id,
+                                arguments,
+                            );
+                        }
+                        Intrinsic::ToBits(endianness) => {
+                            let results = dfg.instruction_results(instruction_id);
+
+                            let source = self.convert_ssa_single_addr_value(arguments[0], dfg);
+
+                            let target_array = self
+                                .variables
+                                .define_variable(
+                                    self.function_context,
+                                    self.brillig_context,
+                                    results[0],
+                                    dfg,
+                                )
+                                .extract_array();
+
+                            let two = self
+                                .brillig_context
+                                .make_usize_constant_instruction(2_usize.into());
+
+                            self.brillig_context.codegen_to_radix(
+                                source,
+                                target_array,
+                                two,
+                                matches!(endianness, Endian::Little),
+                                true,
+                            );
 
-                    self.brillig_context.deallocate_single_addr(two);
-                }
+                            self.brillig_context.deallocate_single_addr(two);
+                        }
 
-                // `Intrinsic::AsWitness` is used to provide hints to acir-gen on optimal expression splitting.
-                // It is then useless in the brillig runtime and so we can ignore it
-                Value::Intrinsic(Intrinsic::AsWitness) => (),
-                Value::Intrinsic(Intrinsic::FieldLessThan) => {
-                    let lhs = self.convert_ssa_single_addr_value(arguments[0], dfg);
-                    assert!(lhs.bit_size == FieldElement::max_num_bits());
-                    let rhs = self.convert_ssa_single_addr_value(arguments[1], dfg);
-                    assert!(rhs.bit_size == FieldElement::max_num_bits());
-
-                    let results = dfg.instruction_results(instruction_id);
-                    let destination = self
-                        .variables
-                        .define_variable(
-                            self.function_context,
-                            self.brillig_context,
-                            results[0],
-                            dfg,
-                        )
-                        .extract_single_addr();
-                    assert!(destination.bit_size == 1);
+                        Intrinsic::ToRadix(endianness) => {
+                            let results = dfg.instruction_results(instruction_id);
+
+                            let source = self.convert_ssa_single_addr_value(arguments[0], dfg);
+                            let radix = self.convert_ssa_single_addr_value(arguments[1], dfg);
+
+                            let target_array = self
+                                .variables
+                                .define_variable(
+                                    self.function_context,
+                                    self.brillig_context,
+                                    results[0],
+                                    dfg,
+                                )
+                                .extract_array();
+
+                            self.brillig_context.codegen_to_radix(
+                                source,
+                                target_array,
+                                radix,
+                                matches!(endianness, Endian::Little),
+                                false,
+                            );
+                        }
+                        Intrinsic::BlackBox(bb_func) => {
+                            // Slices are represented as a tuple of (length, slice contents).
+                            // We must check the inputs to determine if there are slices
+                            // and make sure that we pass the correct inputs to the black box function call.
+                            // The loop below only keeps the slice contents, so that
+                            // setting up a black box function with slice inputs matches the expected
+                            // number of arguments specified in the function signature.
+                            let mut arguments_no_slice_len = Vec::new();
+                            for (i, arg) in arguments.iter().enumerate() {
+                                if matches!(dfg.type_of_value(*arg), Type::Numeric(_)) {
+                                    if i < arguments.len() - 1 {
+                                        if !matches!(
+                                            dfg.type_of_value(arguments[i + 1]),
+                                            Type::Slice(_)
+                                        ) {
+                                            arguments_no_slice_len.push(*arg);
+                                        }
+                                    } else {
+                                        arguments_no_slice_len.push(*arg);
+                                    }
+                                } else {
+                                    arguments_no_slice_len.push(*arg);
+                                }
+                            }
 
-                    self.brillig_context.binary_instruction(
-                        lhs,
-                        rhs,
-                        destination,
-                        BrilligBinaryOp::LessThan,
-                    );
+                            let function_arguments = vecmap(&arguments_no_slice_len, |arg| {
+                                self.convert_ssa_value(*arg, dfg)
+                            });
+                            let function_results = dfg.instruction_results(instruction_id);
+                            let function_results = vecmap(function_results, |result| {
+                                self.allocate_external_call_result(*result, dfg)
+                            });
+                            convert_black_box_call(
+                                self.brillig_context,
+                                bb_func,
+                                &function_arguments,
+                                &function_results,
+                            );
+                        }
+                        // `Intrinsic::AsWitness` is used to provide hints to acir-gen on optimal expression splitting.
+                        // It is then useless in the brillig runtime and so we can ignore it
+                        Intrinsic::AsWitness => (),
+                        Intrinsic::FieldLessThan => {
+                            let lhs = self.convert_ssa_single_addr_value(arguments[0], dfg);
+                            assert!(lhs.bit_size == FieldElement::max_num_bits());
+                            let rhs = self.convert_ssa_single_addr_value(arguments[1], dfg);
+                            assert!(rhs.bit_size == FieldElement::max_num_bits());
+
+                            let results = dfg.instruction_results(instruction_id);
+                            let destination = self
+                                .variables
+                                .define_variable(
+                                    self.function_context,
+                                    self.brillig_context,
+                                    results[0],
+                                    dfg,
+                                )
+                                .extract_single_addr();
+                            assert!(destination.bit_size == 1);
+
+                            self.brillig_context.binary_instruction(
+                                lhs,
+                                rhs,
+                                destination,
+                                BrilligBinaryOp::LessThan,
+                            );
+                        }
+                        Intrinsic::ArrayRefCount | Intrinsic::SliceRefCount => {
+                            let array = self.convert_ssa_value(arguments[0], dfg);
+                            let result = dfg.instruction_results(instruction_id)[0];
+
+                            let destination = self.variables.define_variable(
+                                self.function_context,
+                                self.brillig_context,
+                                result,
+                                dfg,
+                            );
+                            let destination = destination.extract_register();
+                            let array = array.extract_register();
+                            self.brillig_context.load_instruction(destination, array);
+                        }
+                        Intrinsic::FromField
+                        | Intrinsic::AsField
+                        | Intrinsic::IsUnconstrained
+                        | Intrinsic::DerivePedersenGenerators
+                        | Intrinsic::ApplyRangeConstraint
+                        | Intrinsic::StrAsBytes
+                        | Intrinsic::AssertConstant
+                        | Intrinsic::StaticAssert
+                        | Intrinsic::ArrayAsStrUnchecked => {
+                            unreachable!("unsupported function call type {:?}", dfg[*func])
+                        }
+                    }
                 }
-                _ => {
+                Value::Instruction { .. } | Value::Param { .. } | Value::NumericConstant { .. } => {
                     unreachable!("unsupported function call type {:?}", dfg[*func])
                 }
             },
diff --git a/noir/noir-repo/compiler/noirc_evaluator/src/ssa.rs b/noir/noir-repo/compiler/noirc_evaluator/src/ssa.rs
index 45d10323b06..97c1760d87c 100644
--- a/noir/noir-repo/compiler/noirc_evaluator/src/ssa.rs
+++ b/noir/noir-repo/compiler/noirc_evaluator/src/ssa.rs
@@ -141,6 +141,23 @@ pub(crate) fn optimize_into_acir(
         ssa.to_brillig(options.enable_brillig_logging)
     });
 
+    let ssa_gen_span = span!(Level::TRACE, "ssa_generation");
+    let ssa_gen_span_guard = ssa_gen_span.enter();
+
+    let ssa = SsaBuilder {
+        ssa,
+        print_ssa_passes: options.enable_ssa_logging,
+        print_codegen_timings: options.print_codegen_timings,
+    }
+    .run_pass(
+        |ssa| ssa.fold_constants_with_brillig(&brillig),
+        "After Constant Folding with Brillig:",
+    )
+    .run_pass(Ssa::dead_instruction_elimination, "After Dead Instruction Elimination:")
+    .finish();
+
+    drop(ssa_gen_span_guard);
+
     let artifacts = time("SSA to ACIR", options.print_codegen_timings, || {
         ssa.into_acir(&brillig, options.expression_width)
     })?;
diff --git a/noir/noir-repo/compiler/noirc_evaluator/src/ssa/checks/check_for_underconstrained_values.rs b/noir/noir-repo/compiler/noirc_evaluator/src/ssa/checks/check_for_underconstrained_values.rs
index cf884c98be9..7a4e336c33e 100644
--- a/noir/noir-repo/compiler/noirc_evaluator/src/ssa/checks/check_for_underconstrained_values.rs
+++ b/noir/noir-repo/compiler/noirc_evaluator/src/ssa/checks/check_for_underconstrained_values.rs
@@ -205,16 +205,18 @@ impl Context {
                             | Intrinsic::IsUnconstrained => {}
                             Intrinsic::ArrayLen
                             | Intrinsic::ArrayAsStrUnchecked
+                            | Intrinsic::ArrayRefCount
                             | Intrinsic::AsField
                             | Intrinsic::AsSlice
                             | Intrinsic::BlackBox(..)
                             | Intrinsic::DerivePedersenGenerators
                             | Intrinsic::FromField
+                            | Intrinsic::SliceInsert
                             | Intrinsic::SlicePushBack
                             | Intrinsic::SlicePushFront
                             | Intrinsic::SlicePopBack
                             | Intrinsic::SlicePopFront
-                            | Intrinsic::SliceInsert
+                            | Intrinsic::SliceRefCount
                             | Intrinsic::SliceRemove
                             | Intrinsic::StaticAssert
                             | Intrinsic::StrAsBytes
diff --git a/noir/noir-repo/compiler/noirc_evaluator/src/ssa/ir/instruction.rs b/noir/noir-repo/compiler/noirc_evaluator/src/ssa/ir/instruction.rs
index ba854d6a3c1..b48c755dbe5 100644
--- a/noir/noir-repo/compiler/noirc_evaluator/src/ssa/ir/instruction.rs
+++ b/noir/noir-repo/compiler/noirc_evaluator/src/ssa/ir/instruction.rs
@@ -11,7 +11,7 @@ use fxhash::FxHasher64;
 use iter_extended::vecmap;
 use noirc_frontend::hir_def::types::Type as HirType;
 
-use crate::ssa::opt::flatten_cfg::value_merger::ValueMerger;
+use crate::ssa::{ir::function::RuntimeType, opt::flatten_cfg::value_merger::ValueMerger};
 
 use super::{
     basic_block::BasicBlockId,
@@ -45,8 +45,7 @@ pub(crate) type InstructionId = Id<Instruction>;
 /// - Opcodes which the IR knows the target machine has
 /// special support for. (LowLevel)
 /// - Opcodes which have no function definition in the
-/// source code and must be processed by the IR. An example
-/// of this is println.
+/// source code and must be processed by the IR.
 #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
 pub(crate) enum Intrinsic {
     ArrayLen,
@@ -71,6 +70,8 @@ pub(crate) enum Intrinsic {
     IsUnconstrained,
     DerivePedersenGenerators,
     FieldLessThan,
+    ArrayRefCount,
+    SliceRefCount,
 }
 
 impl std::fmt::Display for Intrinsic {
@@ -100,6 +101,8 @@ impl std::fmt::Display for Intrinsic {
             Intrinsic::IsUnconstrained => write!(f, "is_unconstrained"),
             Intrinsic::DerivePedersenGenerators => write!(f, "derive_pedersen_generators"),
             Intrinsic::FieldLessThan => write!(f, "field_less_than"),
+            Intrinsic::ArrayRefCount => write!(f, "array_refcount"),
+            Intrinsic::SliceRefCount => write!(f, "slice_refcount"),
         }
     }
 }
@@ -108,11 +111,18 @@ impl Intrinsic {
     /// Returns whether the `Intrinsic` has side effects.
     ///
     /// If there are no side effects then the `Intrinsic` can be removed if the result is unused.
+    ///
+    /// An example of a side effect is increasing the reference count of an array, but functions
+    /// which can fail due to implicit constraints are also considered to have a side effect.
     pub(crate) fn has_side_effects(&self) -> bool {
         match self {
             Intrinsic::AssertConstant
             | Intrinsic::StaticAssert
             | Intrinsic::ApplyRangeConstraint
+            // Array & slice ref counts are treated as having side effects since they operate
+            // on hidden variables on otherwise identical array values.
+            | Intrinsic::ArrayRefCount
+            | Intrinsic::SliceRefCount
             | Intrinsic::AsWitness => true,
 
             // These apply a constraint that the input must fit into a specified number of limbs.
@@ -144,6 +154,39 @@ impl Intrinsic {
         }
     }
 
+    /// Intrinsics which only have a side effect due to the chance that
+    /// they can fail a constraint can be deduplicated.
+    pub(crate) fn can_be_deduplicated(&self, deduplicate_with_predicate: bool) -> bool {
+        match self {
+            // These apply a constraint in the form of ACIR opcodes, but they can be deduplicated
+            // if the inputs are the same. If they depend on a side effect variable (e.g. because
+            // they were in an if-then-else) then `handle_instruction_side_effects` in `flatten_cfg`
+            // will have attached the condition variable to their inputs directly, so they don't
+            // directly depend on the corresponding `enable_side_effect` instruction any more.
+            // However, to conform with the expectations of `Instruction::can_be_deduplicated` and
+            // `constant_folding` we only use this information if the caller shows interest in it.
+            Intrinsic::ToBits(_)
+            | Intrinsic::ToRadix(_)
+            | Intrinsic::BlackBox(
+                BlackBoxFunc::MultiScalarMul
+                | BlackBoxFunc::EmbeddedCurveAdd
+                | BlackBoxFunc::RecursiveAggregation,
+            ) => deduplicate_with_predicate,
+
+            // Operations that remove items from a slice don't modify the slice, they just assert it's non-empty.
+            Intrinsic::SlicePopBack | Intrinsic::SlicePopFront | Intrinsic::SliceRemove => {
+                deduplicate_with_predicate
+            }
+
+            Intrinsic::AssertConstant
+            | Intrinsic::StaticAssert
+            | Intrinsic::ApplyRangeConstraint
+            | Intrinsic::AsWitness => deduplicate_with_predicate,
+
+            _ => !self.has_side_effects(),
+        }
+    }
+
     /// Lookup an Intrinsic by name and return it if found.
     /// If there is no such intrinsic by that name, None is returned.
     pub(crate) fn lookup(name: &str) -> Option<Intrinsic> {
@@ -171,6 +214,8 @@ impl Intrinsic {
             "is_unconstrained" => Some(Intrinsic::IsUnconstrained),
             "derive_pedersen_generators" => Some(Intrinsic::DerivePedersenGenerators),
             "field_less_than" => Some(Intrinsic::FieldLessThan),
+            "array_refcount" => Some(Intrinsic::ArrayRefCount),
+            "slice_refcount" => Some(Intrinsic::SliceRefCount),
 
             other => BlackBoxFunc::lookup(other).map(Intrinsic::BlackBox),
         }
@@ -235,7 +280,7 @@ pub(crate) enum Instruction {
     /// - `code1` will have side effects iff `condition1` evaluates to `true`
     ///
     /// This instruction is only emitted after the cfg flattening pass, and is used to annotate
-    /// instruction regions with an condition that corresponds to their position in the CFG's
+    /// instruction regions with a condition that corresponds to their position in the CFG's
     /// if-branching structure.
     EnableSideEffectsIf { condition: ValueId },
 
@@ -270,9 +315,6 @@ pub(crate) enum Instruction {
     ///     else_value
     /// }
     /// ```
-    ///
-    /// Where we save the result of !then_condition so that we have the same
-    /// ValueId for it each time.
     IfElse { then_condition: ValueId, then_value: ValueId, else_value: ValueId },
 
     /// Creates a new array or slice.
@@ -324,6 +366,11 @@ impl Instruction {
     /// If `deduplicate_with_predicate` is set, we assume we're deduplicating with the instruction
     /// and its predicate, rather than just the instruction. Setting this means instructions that
     /// rely on predicates can be deduplicated as well.
+    ///
+    /// Some instructions get the predicate attached to their inputs by `handle_instruction_side_effects` in `flatten_cfg`.
+    /// These can be deduplicated because they implicitly depend on the predicate, not only when the caller uses the
+    /// predicate variable as a key to cache results. However, to avoid tight coupling between passes, we make the deduplication
+    /// conditional on whether the caller wants the predicate to be taken into account or not.
     pub(crate) fn can_be_deduplicated(
         &self,
         dfg: &DataFlowGraph,
@@ -341,7 +388,9 @@ impl Instruction {
             | DecrementRc { .. } => false,
 
             Call { func, .. } => match dfg[*func] {
-                Value::Intrinsic(intrinsic) => !intrinsic.has_side_effects(),
+                Value::Intrinsic(intrinsic) => {
+                    intrinsic.can_be_deduplicated(deduplicate_with_predicate)
+                }
                 _ => false,
             },
 
@@ -391,8 +440,19 @@ impl Instruction {
             | ArraySet { .. }
             | MakeArray { .. } => true,
 
+            // Store instructions must be removed by DIE in acir code, any load
+            // instructions should already be unused by that point.
+            //
+            // Note that this check assumes that it is being performed after the flattening
+            // pass and after the last mem2reg pass. This is currently the case for the DIE
+            // pass where this check is done, but does mean that we cannot perform mem2reg
+            // after the DIE pass.
+            Store { .. } => {
+                matches!(function.runtime(), RuntimeType::Acir(_))
+                    && function.reachable_blocks().len() == 1
+            }
+
             Constrain(..)
-            | Store { .. }
             | EnableSideEffectsIf { .. }
             | IncrementRc { .. }
             | DecrementRc { .. }
@@ -403,6 +463,7 @@ impl Instruction {
                 // Explicitly allows removal of unused ec operations, even if they can fail
                 Value::Intrinsic(Intrinsic::BlackBox(BlackBoxFunc::MultiScalarMul))
                 | Value::Intrinsic(Intrinsic::BlackBox(BlackBoxFunc::EmbeddedCurveAdd)) => true,
+
                 Value::Intrinsic(intrinsic) => !intrinsic.has_side_effects(),
 
                 // All foreign functions are treated as having side effects.
@@ -418,7 +479,7 @@ impl Instruction {
         }
     }
 
-    /// If true the instruction will depends on enable_side_effects context during acir-gen
+    /// If true the instruction will depend on `enable_side_effects` context during acir-gen.
     pub(crate) fn requires_acir_gen_predicate(&self, dfg: &DataFlowGraph) -> bool {
         match self {
             Instruction::Binary(binary)
diff --git a/noir/noir-repo/compiler/noirc_evaluator/src/ssa/ir/instruction/call.rs b/noir/noir-repo/compiler/noirc_evaluator/src/ssa/ir/instruction/call.rs
index 7e41512fd8f..4be37b3c626 100644
--- a/noir/noir-repo/compiler/noirc_evaluator/src/ssa/ir/instruction/call.rs
+++ b/noir/noir-repo/compiler/noirc_evaluator/src/ssa/ir/instruction/call.rs
@@ -368,6 +368,8 @@ pub(super) fn simplify_call(
                 SimplifyResult::None
             }
         }
+        Intrinsic::ArrayRefCount => SimplifyResult::None,
+        Intrinsic::SliceRefCount => SimplifyResult::None,
     }
 }
 
diff --git a/noir/noir-repo/compiler/noirc_evaluator/src/ssa/opt/constant_folding.rs b/noir/noir-repo/compiler/noirc_evaluator/src/ssa/opt/constant_folding.rs
index 9ee9a52b5ad..7b4d569c4d0 100644
--- a/noir/noir-repo/compiler/noirc_evaluator/src/ssa/opt/constant_folding.rs
+++ b/noir/noir-repo/compiler/noirc_evaluator/src/ssa/opt/constant_folding.rs
@@ -6,7 +6,7 @@
 //!   by the [`DataFlowGraph`] automatically as new instructions are pushed.
 //! - Check whether any input values have been constrained to be equal to a value of a simpler form
 //!   by a [constrain instruction][Instruction::Constrain]. If so, replace the input value with the simpler form.
-//! - Check whether the instruction [can_be_replaced][Instruction::can_be_replaced()]
+//! - Check whether the instruction [can_be_deduplicated][Instruction::can_be_deduplicated()]
 //!   by duplicate instruction earlier in the same block.
 //!
 //! These operations are done in parallel so that they can each benefit from each other
@@ -19,33 +19,49 @@
 //!
 //! This is the only pass which removes duplicated pure [`Instruction`]s however and so is needed when
 //! different blocks are merged, i.e. after the [`flatten_cfg`][super::flatten_cfg] pass.
-use std::collections::{HashSet, VecDeque};
+use std::collections::{BTreeMap, HashSet, VecDeque};
 
-use acvm::{acir::AcirField, FieldElement};
+use acvm::{
+    acir::AcirField,
+    brillig_vm::{MemoryValue, VMStatus, VM},
+    FieldElement,
+};
+use bn254_blackbox_solver::Bn254BlackBoxSolver;
+use im::Vector;
 use iter_extended::vecmap;
 
-use crate::ssa::{
-    ir::{
-        basic_block::BasicBlockId,
-        dfg::{DataFlowGraph, InsertInstructionResult},
-        dom::DominatorTree,
-        function::Function,
-        instruction::{Instruction, InstructionId},
-        types::Type,
-        value::{Value, ValueId},
+use crate::{
+    brillig::{
+        brillig_gen::gen_brillig_for,
+        brillig_ir::{artifact::BrilligParameter, brillig_variable::get_bit_size_from_ssa_type},
+        Brillig,
+    },
+    ssa::{
+        ir::{
+            basic_block::BasicBlockId,
+            dfg::{DataFlowGraph, InsertInstructionResult},
+            dom::DominatorTree,
+            function::{Function, FunctionId, RuntimeType},
+            instruction::{Instruction, InstructionId},
+            types::Type,
+            value::{Value, ValueId},
+        },
+        ssa_gen::Ssa,
     },
-    ssa_gen::Ssa,
 };
 use fxhash::FxHashMap as HashMap;
 
 impl Ssa {
     /// Performs constant folding on each instruction.
     ///
+    /// It will not look at constraints to inform simplifications
+    /// based on the stated equivalence of two instructions.
+    ///
     /// See [`constant_folding`][self] module for more information.
     #[tracing::instrument(level = "trace", skip(self))]
     pub(crate) fn fold_constants(mut self) -> Ssa {
         for function in self.functions.values_mut() {
-            function.constant_fold(false);
+            function.constant_fold(false, None);
         }
         self
     }
@@ -58,17 +74,82 @@ impl Ssa {
     #[tracing::instrument(level = "trace", skip(self))]
     pub(crate) fn fold_constants_using_constraints(mut self) -> Ssa {
         for function in self.functions.values_mut() {
-            function.constant_fold(true);
+            function.constant_fold(true, None);
         }
         self
     }
+
+    /// Performs constant folding on each instruction while also replacing calls to brillig functions
+    /// with all constant arguments by trying to evaluate those calls.
+    #[tracing::instrument(level = "trace", skip(self, brillig))]
+    pub(crate) fn fold_constants_with_brillig(mut self, brillig: &Brillig) -> Ssa {
+        // Collect all brillig functions so that later we can find them when processing a call instruction
+        let mut brillig_functions: BTreeMap<FunctionId, Function> = BTreeMap::new();
+        for (func_id, func) in &self.functions {
+            if let RuntimeType::Brillig(..) = func.runtime() {
+                let cloned_function = Function::clone_with_id(*func_id, func);
+                brillig_functions.insert(*func_id, cloned_function);
+            };
+        }
+
+        let brillig_info = Some(BrilligInfo { brillig, brillig_functions: &brillig_functions });
+
+        for function in self.functions.values_mut() {
+            function.constant_fold(false, brillig_info);
+        }
+
+        // It could happen that we inlined all calls to a given brillig function.
+        // In that case it's unused so we can remove it. This is what we check next.
+        self.remove_unused_brillig_functions(brillig_functions)
+    }
+
+    fn remove_unused_brillig_functions(
+        mut self,
+        mut brillig_functions: BTreeMap<FunctionId, Function>,
+    ) -> Ssa {
+        // Remove from the above map functions that are called
+        for function in self.functions.values() {
+            for block_id in function.reachable_blocks() {
+                for instruction_id in function.dfg[block_id].instructions() {
+                    let instruction = &function.dfg[*instruction_id];
+                    let Instruction::Call { func: func_id, arguments: _ } = instruction else {
+                        continue;
+                    };
+
+                    let func_value = &function.dfg[*func_id];
+                    let Value::Function(func_id) = func_value else { continue };
+
+                    brillig_functions.remove(func_id);
+                }
+            }
+        }
+
+        // The ones that remain are never called: let's remove them.
+        for func_id in brillig_functions.keys() {
+            // We never want to remove the main function (it could be `unconstrained` or it
+            // could have been turned into brillig if `--force-brillig` was given).
+            // We also don't want to remove entry points.
+            if self.main_id == *func_id || self.entry_point_to_generated_index.contains_key(func_id)
+            {
+                continue;
+            }
+
+            self.functions.remove(func_id);
+        }
+
+        self
+    }
 }
 
 impl Function {
     /// The structure of this pass is simple:
     /// Go through each block and re-insert all instructions.
-    pub(crate) fn constant_fold(&mut self, use_constraint_info: bool) {
-        let mut context = Context::new(self, use_constraint_info);
+    pub(crate) fn constant_fold(
+        &mut self,
+        use_constraint_info: bool,
+        brillig_info: Option<BrilligInfo>,
+    ) {
+        let mut context = Context::new(self, use_constraint_info, brillig_info);
         context.block_queue.push_back(self.entry_block());
 
         while let Some(block) = context.block_queue.pop_front() {
@@ -82,25 +163,21 @@ impl Function {
     }
 }
 
-struct Context {
+struct Context<'a> {
     use_constraint_info: bool,
+    brillig_info: Option<BrilligInfo<'a>>,
     /// Maps pre-folded ValueIds to the new ValueIds obtained by re-inserting the instruction.
     visited_blocks: HashSet<BasicBlockId>,
     block_queue: VecDeque<BasicBlockId>,
 
     /// Contains sets of values which are constrained to be equivalent to each other.
     ///
-    /// The mapping's structure is `side_effects_enabled_var => (constrained_value => [(block, simplified_value)])`.
+    /// The mapping's structure is `side_effects_enabled_var => (constrained_value => simplified_value)`.
     ///
     /// We partition the maps of constrained values according to the side-effects flag at the point
     /// at which the values are constrained. This prevents constraints which are only sometimes enforced
     /// being used to modify the rest of the program.
-    ///
-    /// We also keep track of how a value was simplified to other values per block. That is,
-    /// a same ValueId could have been simplified to one value in one block and to another value
-    /// in another block.
-    constraint_simplification_mappings:
-        HashMap<ValueId, HashMap<ValueId, Vec<(BasicBlockId, ValueId)>>>,
+    constraint_simplification_mappings: HashMap<ValueId, HashMap<ValueId, ValueId>>,
 
     // Cache of instructions without any side-effects along with their outputs.
     cached_instruction_results: InstructionResultCache,
@@ -108,9 +185,18 @@ struct Context {
     dom: DominatorTree,
 }
 
-/// HashMap from (Instruction, side_effects_enabled_var) to the results of the instruction.
+#[derive(Copy, Clone)]
+pub(crate) struct BrilligInfo<'a> {
+    brillig: &'a Brillig,
+    brillig_functions: &'a BTreeMap<FunctionId, Function>,
+}
+
+/// HashMap from `(Instruction, side_effects_enabled_var)` to the results of the instruction.
 /// Stored as a two-level map to avoid cloning Instructions during the `.get` call.
 ///
+/// The `side_effects_enabled_var` is optional because we only use them when `Instruction::requires_acir_gen_predicate`
+/// is true _and_ the constraint information is also taken into account.
+///
 /// In addition to each result, the original BasicBlockId is stored as well. This allows us
 /// to deduplicate instructions across blocks as long as the new block dominates the original.
 type InstructionResultCache = HashMap<Instruction, HashMap<Option<ValueId>, ResultCache>>;
@@ -120,13 +206,18 @@ type InstructionResultCache = HashMap<Instruction, HashMap<Option<ValueId>, Resu
 /// For more information see [`InstructionResultCache`].
 #[derive(Default)]
 struct ResultCache {
-    results: Vec<(BasicBlockId, Vec<ValueId>)>,
+    result: Option<(BasicBlockId, Vec<ValueId>)>,
 }
 
-impl Context {
-    fn new(function: &Function, use_constraint_info: bool) -> Self {
+impl<'brillig> Context<'brillig> {
+    fn new(
+        function: &Function,
+        use_constraint_info: bool,
+        brillig_info: Option<BrilligInfo<'brillig>>,
+    ) -> Self {
         Self {
             use_constraint_info,
+            brillig_info,
             visited_blocks: Default::default(),
             block_queue: Default::default(),
             constraint_simplification_mappings: Default::default(),
@@ -138,6 +229,7 @@ impl Context {
     fn fold_constants_in_block(&mut self, function: &mut Function, block: BasicBlockId) {
         let instructions = function.dfg[block].take_instructions();
 
+        // Default side effect condition variable with an enabled state.
         let mut side_effects_enabled_var =
             function.dfg.make_constant(FieldElement::one(), Type::bool());
 
@@ -155,31 +247,52 @@ impl Context {
     fn fold_constants_into_instruction(
         &mut self,
         dfg: &mut DataFlowGraph,
-        block: BasicBlockId,
+        mut block: BasicBlockId,
         id: InstructionId,
         side_effects_enabled_var: &mut ValueId,
     ) {
-        let constraint_simplification_mapping =
-            self.constraint_simplification_mappings.get(side_effects_enabled_var);
-        let instruction = Self::resolve_instruction(
-            id,
-            block,
-            dfg,
-            &mut self.dom,
-            constraint_simplification_mapping,
-        );
+        let constraint_simplification_mapping = self.get_constraint_map(*side_effects_enabled_var);
+        let instruction = Self::resolve_instruction(id, dfg, constraint_simplification_mapping);
         let old_results = dfg.instruction_results(id).to_vec();
 
         // If a copy of this instruction exists earlier in the block, then reuse the previous results.
-        if let Some(cached_results) =
+        if let Some(cache_result) =
             self.get_cached(dfg, &instruction, *side_effects_enabled_var, block)
         {
-            Self::replace_result_ids(dfg, &old_results, cached_results);
-            return;
+            match cache_result {
+                CacheResult::Cached(cached) => {
+                    Self::replace_result_ids(dfg, &old_results, cached);
+                    return;
+                }
+                CacheResult::NeedToHoistToCommonBlock(dominator, _cached) => {
+                    // Just change the block to insert in the common dominator instead.
+                    // This will only move the current instance of the instruction right now.
+                    // When constant folding is run a second time later on, it'll catch
+                    // that the previous instance can be deduplicated to this instance.
+                    block = dominator;
+                }
+            }
         }
 
-        // Otherwise, try inserting the instruction again to apply any optimizations using the newly resolved inputs.
-        let new_results = Self::push_instruction(id, instruction.clone(), &old_results, block, dfg);
+        let new_results =
+        // First try to inline a call to a brillig function with all constant arguments.
+        Self::try_inline_brillig_call_with_all_constants(
+            &instruction,
+            &old_results,
+            block,
+            dfg,
+            self.brillig_info,
+        )
+        .unwrap_or_else(|| {
+            // Otherwise, try inserting the instruction again to apply any optimizations using the newly resolved inputs.
+            Self::push_instruction(
+                id,
+                instruction.clone(),
+                &old_results,
+                block,
+                dfg,
+            )
+        });
 
         Self::replace_result_ids(dfg, &old_results, &new_results);
 
@@ -272,12 +385,14 @@ impl Context {
     }
 
     fn cache_instruction(
+        &mut self,
         &mut self,
         instruction: Instruction,
         instruction_results: Vec<ValueId>,
         dfg: &DataFlowGraph,
         side_effects_enabled_var: ValueId,
         block: BasicBlockId,
+        block: BasicBlockId,
     ) {
         if self.use_constraint_info {
             // If the instruction was a constraint, then create a link between the two `ValueId`s
@@ -295,6 +410,7 @@ impl Context {
 
         // If the instruction doesn't have side-effects and if it won't interact with enable_side_effects during acir_gen,
         // we cache the results so we can reuse them if the same instruction appears again later in the block.
+        // Others have side effects representing failure, which are implicit in the ACIR code and can also be deduplicated.
         if instruction.can_be_deduplicated(dfg, self.use_constraint_info) {
             let use_predicate =
                 self.use_constraint_info && instruction.requires_acir_gen_predicate(dfg);
@@ -309,10 +425,12 @@ impl Context {
         }
     }
 
+    /// Get the simplification mapping from complex to simpler instructions,
+    /// which all depend on the same side effect condition variable.
     fn get_constraint_map(
         &mut self,
         side_effects_enabled_var: ValueId,
-    ) -> &mut HashMap<ValueId, Vec<(BasicBlockId, ValueId)>> {
+    ) -> &mut HashMap<ValueId, ValueId> {
         self.constraint_simplification_mappings.entry(side_effects_enabled_var).or_default()
     }
 
@@ -327,13 +445,13 @@ impl Context {
         }
     }
 
-    fn get_cached<'a>(
-        &'a mut self,
+    fn get_cached(
+        &mut self,
         dfg: &DataFlowGraph,
         instruction: &Instruction,
         side_effects_enabled_var: ValueId,
         block: BasicBlockId,
-    ) -> Option<&'a [ValueId]> {
+    ) -> Option<CacheResult> {
         let results_for_instruction = self.cached_instruction_results.get(instruction)?;
 
         let predicate = self.use_constraint_info && instruction.requires_acir_gen_predicate(dfg);
@@ -341,12 +459,174 @@ impl Context {
 
         results_for_instruction.get(&predicate)?.get(block, &mut self.dom)
     }
+
+    /// Checks if the given instruction is a call to a brillig function with all constant arguments.
+    /// If so, we can try to evaluate that function and replace the results with the evaluation results.
+    fn try_inline_brillig_call_with_all_constants(
+        instruction: &Instruction,
+        old_results: &[ValueId],
+        block: BasicBlockId,
+        dfg: &mut DataFlowGraph,
+        brillig_info: Option<BrilligInfo>,
+    ) -> Option<Vec<ValueId>> {
+        let evaluation_result = Self::evaluate_const_brillig_call(
+            instruction,
+            brillig_info?.brillig,
+            brillig_info?.brillig_functions,
+            dfg,
+        );
+
+        match evaluation_result {
+            EvaluationResult::NotABrilligCall | EvaluationResult::CannotEvaluate(_) => None,
+            EvaluationResult::Evaluated(memory_values) => {
+                let mut memory_index = 0;
+                let new_results = vecmap(old_results, |old_result| {
+                    let typ = dfg.type_of_value(*old_result);
+                    Self::new_value_for_type_and_memory_values(
+                        typ,
+                        block,
+                        &memory_values,
+                        &mut memory_index,
+                        dfg,
+                    )
+                });
+                Some(new_results)
+            }
+        }
+    }
+
+    /// Tries to evaluate an instruction if it's a call that points to a brillig function,
+    /// and all its arguments are constant.
+    /// We do this by directly executing the function with a brillig VM.
+    fn evaluate_const_brillig_call(
+        instruction: &Instruction,
+        brillig: &Brillig,
+        brillig_functions: &BTreeMap<FunctionId, Function>,
+        dfg: &mut DataFlowGraph,
+    ) -> EvaluationResult {
+        let Instruction::Call { func: func_id, arguments } = instruction else {
+            return EvaluationResult::NotABrilligCall;
+        };
+
+        let func_value = &dfg[*func_id];
+        let Value::Function(func_id) = func_value else {
+            return EvaluationResult::NotABrilligCall;
+        };
+
+        let Some(func) = brillig_functions.get(func_id) else {
+            return EvaluationResult::NotABrilligCall;
+        };
+
+        if !arguments.iter().all(|argument| dfg.is_constant(*argument)) {
+            return EvaluationResult::CannotEvaluate(*func_id);
+        }
+
+        let mut brillig_arguments = Vec::new();
+        for argument in arguments {
+            let typ = dfg.type_of_value(*argument);
+            let Some(parameter) = type_to_brillig_parameter(&typ) else {
+                return EvaluationResult::CannotEvaluate(*func_id);
+            };
+            brillig_arguments.push(parameter);
+        }
+
+        // Check that return value types are supported by brillig
+        for return_id in func.returns().iter() {
+            let typ = func.dfg.type_of_value(*return_id);
+            if type_to_brillig_parameter(&typ).is_none() {
+                return EvaluationResult::CannotEvaluate(*func_id);
+            }
+        }
+
+        let Ok(generated_brillig) = gen_brillig_for(func, brillig_arguments, brillig) else {
+            return EvaluationResult::CannotEvaluate(*func_id);
+        };
+
+        let mut calldata = Vec::new();
+        for argument in arguments {
+            value_id_to_calldata(*argument, dfg, &mut calldata);
+        }
+
+        let bytecode = &generated_brillig.byte_code;
+        let foreign_call_results = Vec::new();
+        let black_box_solver = Bn254BlackBoxSolver;
+        let profiling_active = false;
+        let mut vm =
+            VM::new(calldata, bytecode, foreign_call_results, &black_box_solver, profiling_active);
+        let vm_status: VMStatus<_> = vm.process_opcodes();
+        let VMStatus::Finished { return_data_offset, return_data_size } = vm_status else {
+            return EvaluationResult::CannotEvaluate(*func_id);
+        };
+
+        let memory =
+            vm.get_memory()[return_data_offset..(return_data_offset + return_data_size)].to_vec();
+
+        EvaluationResult::Evaluated(memory)
+    }
+
+    /// Creates a new value inside this function by reading it from `memory_values` starting at
+    /// `memory_index` depending on the given Type: if it's an array multiple values will be read
+    /// and a new `make_array` instruction will be created.
+    fn new_value_for_type_and_memory_values(
+        typ: Type,
+        block_id: BasicBlockId,
+        memory_values: &[MemoryValue<FieldElement>],
+        memory_index: &mut usize,
+        dfg: &mut DataFlowGraph,
+    ) -> ValueId {
+        match typ {
+            Type::Numeric(_) => {
+                let memory = memory_values[*memory_index];
+                *memory_index += 1;
+
+                let field_value = match memory {
+                    MemoryValue::Field(field_value) => field_value,
+                    MemoryValue::Integer(u128_value, _) => u128_value.into(),
+                };
+                dfg.make_constant(field_value, typ)
+            }
+            Type::Array(types, length) => {
+                let mut new_array_values = Vector::new();
+                for _ in 0..length {
+                    for typ in types.iter() {
+                        let new_value = Self::new_value_for_type_and_memory_values(
+                            typ.clone(),
+                            block_id,
+                            memory_values,
+                            memory_index,
+                            dfg,
+                        );
+                        new_array_values.push_back(new_value);
+                    }
+                }
+
+                let instruction = Instruction::MakeArray {
+                    elements: new_array_values,
+                    typ: Type::Array(types, length),
+                };
+                let instruction_id = dfg.make_instruction(instruction, None);
+                dfg[block_id].instructions_mut().push(instruction_id);
+                *dfg.instruction_results(instruction_id).first().unwrap()
+            }
+            Type::Reference(_) => {
+                panic!("Unexpected reference type in brillig function result")
+            }
+            Type::Slice(_) => {
+                panic!("Unexpected slice type in brillig function result")
+            }
+            Type::Function => {
+                panic!("Unexpected function type in brillig function result")
+            }
+        }
+    }
 }
 
 impl ResultCache {
     /// Records that an `Instruction` in block `block` produced the result values `results`.
     fn cache(&mut self, block: BasicBlockId, results: Vec<ValueId>) {
-        self.results.push((block, results));
+        if self.result.is_none() {
+            self.result = Some((block, results));
+        }
     }
 
     /// Returns a set of [`ValueId`]s produced from a copy of this [`Instruction`] which sits
@@ -355,33 +635,66 @@ impl ResultCache {
     /// We require that the cached instruction's block dominates `block` in order to avoid
     /// cycles causing issues (e.g. two instructions being replaced with the results of each other
     /// such that neither instruction exists anymore.)
-    fn get(&self, block: BasicBlockId, dom: &mut DominatorTree) -> Option<&[ValueId]> {
-        for (origin_block, results) in &self.results {
+    fn get(&self, block: BasicBlockId, dom: &mut DominatorTree) -> Option<CacheResult> {
+        self.result.as_ref().map(|(origin_block, results)| {
             if dom.dominates(*origin_block, block) {
-                return Some(results);
+                CacheResult::Cached(results)
+            } else {
+                // Insert a copy of this instruction in the common dominator
+                let dominator = dom.common_dominator(*origin_block, block);
+                CacheResult::NeedToHoistToCommonBlock(dominator, results)
             }
+        })
+    }
+}
+
+enum CacheResult<'a> {
+    Cached(&'a [ValueId]),
+    NeedToHoistToCommonBlock(BasicBlockId, &'a [ValueId]),
+}
+
+/// Result of trying to evaluate an instruction (any instruction) in this pass.
+enum EvaluationResult {
+    /// Nothing was done because the instruction wasn't a call to a brillig function,
+    /// or some arguments to it were not constants.
+    NotABrilligCall,
+    /// The instruction was a call to a brillig function, but we couldn't evaluate it.
+    /// This can occur in the situation where the brillig function reaches a "trap" or a foreign call opcode.
+    CannotEvaluate(FunctionId),
+    /// The instruction was a call to a brillig function and we were able to evaluate it,
+    /// returning evaluation memory values.
+    Evaluated(Vec<MemoryValue<FieldElement>>),
+}
+
+/// Similar to FunctionContext::ssa_type_to_parameter but never panics and disallows reference types.
+pub(crate) fn type_to_brillig_parameter(typ: &Type) -> Option<BrilligParameter> {
+    match typ {
+        Type::Numeric(_) => Some(BrilligParameter::SingleAddr(get_bit_size_from_ssa_type(typ))),
+        Type::Array(item_type, size) => {
+            let mut parameters = Vec::with_capacity(item_type.len());
+            for item_typ in item_type.iter() {
+                parameters.push(type_to_brillig_parameter(item_typ)?);
+            }
+            Some(BrilligParameter::Array(parameters, *size))
         }
-        None
+        _ => None,
     }
 }
 
-/// Check if one expression is simpler than the other.
-/// Returns `Some((complex, simple))` if a simplification was found, otherwise `None`.
-/// Expects the `ValueId`s to be fully resolved.
-fn simplify(dfg: &DataFlowGraph, lhs: ValueId, rhs: ValueId) -> Option<(ValueId, ValueId)> {
-    match (&dfg[lhs], &dfg[rhs]) {
-        // Ignore trivial constraints
-        (Value::NumericConstant { .. }, Value::NumericConstant { .. }) => None,
-
-        // Prefer replacing with constants where possible.
-        (Value::NumericConstant { .. }, _) => Some((rhs, lhs)),
-        (_, Value::NumericConstant { .. }) => Some((lhs, rhs)),
-        // Otherwise prefer block parameters over instruction results.
-        // This is as block parameters are more likely to be a single witness rather than a full expression.
-        (Value::Param { .. }, Value::Instruction { .. }) => Some((rhs, lhs)),
-        (Value::Instruction { .. }, Value::Param { .. }) => Some((lhs, rhs)),
-        (_, _) => None,
+fn value_id_to_calldata(value_id: ValueId, dfg: &DataFlowGraph, calldata: &mut Vec<FieldElement>) {
+    if let Some(value) = dfg.get_numeric_constant(value_id) {
+        calldata.push(value);
+        return;
     }
+
+    if let Some((values, _type)) = dfg.get_array_constant(value_id) {
+        for value in values {
+            value_id_to_calldata(value, dfg, calldata);
+        }
+        return;
+    }
+
+    panic!("Expected ValueId to be numeric constant or array constant");
 }
 
 #[cfg(test)]
@@ -620,22 +933,32 @@ mod test {
     // Regression for #4600
     #[test]
     fn array_get_regression() {
+        // fn main f0 {
+        //   b0(v0: u1, v1: u64):
+        //     enable_side_effects_if v0
+        //     v2 = make_array [Field 0, Field 1]
+        //     v3 = array_get v2, index v1
+        //     v4 = not v0
+        //     enable_side_effects_if v4
+        //     v5 = array_get v2, index v1
+        // }
+        //
         // We want to make sure after constant folding both array_gets remain since they are
         // under different enable_side_effects_if contexts and thus one may be disabled while
         // the other is not. If one is removed, it is possible e.g. v4 is replaced with v2 which
         // is disabled (only gets from index 0) and thus returns the wrong result.
         let src = "
-             acir(inline) fn main f0 {
-               b0(v0: u1, v1: u64):
-                 enable_side_effects v0
-                 v4 = make_array [Field 0, Field 1] : [Field; 2]
-                 v5 = array_get v4, index v1 -> Field
-                 v6 = not v0
-                 enable_side_effects v6
-                 v7 = array_get v4, index v1 -> Field
-                 return
-             }
-             ";
+            acir(inline) fn main f0 {
+              b0(v0: u1, v1: u64):
+                enable_side_effects v0
+                v4 = make_array [Field 0, Field 1] : [Field; 2]
+                v5 = array_get v4, index v1 -> Field
+                v6 = not v0
+                enable_side_effects v6
+                v7 = array_get v4, index v1 -> Field
+                return
+            }
+            ";
         let ssa = Ssa::from_str(src).unwrap();
 
         // Expected output is unchanged
@@ -693,14 +1016,14 @@ mod test {
         assert_normalized_ssa_equals(ssa, expected);
     }
 
-    // This test currently fails. It being fixed will address the issue https://github.com/noir-lang/noir/issues/5756
     #[test]
-    #[should_panic]
     fn constant_array_deduplication() {
         // fn main f0 {
         //   b0(v0: u64):
-        //     v5 = call keccakf1600([v0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0])
-        //     v6 = call keccakf1600([v0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0])
+        //     v1 = make_array [v0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0]
+        //     v2 = make_array [v0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0]
+        //     v5 = call keccakf1600(v1)
+        //     v6 = call keccakf1600(v2)
         // }
         //
         // Here we're checking a situation where two identical arrays are being initialized twice and being assigned separate `ValueId`s.
@@ -720,12 +1043,13 @@ mod test {
         let array1 = builder.insert_make_array(array_contents.clone(), typ.clone());
         let array2 = builder.insert_make_array(array_contents, typ.clone());
 
-        assert_eq!(array1, array2, "arrays were assigned different value ids");
+        assert_ne!(array1, array2, "arrays were not assigned different value ids");
 
         let keccakf1600 =
             builder.import_intrinsic("keccakf1600").expect("keccakf1600 intrinsic should exist");
         let _v10 = builder.insert_call(keccakf1600, vec![array1], vec![typ.clone()]);
         let _v11 = builder.insert_call(keccakf1600, vec![array2], vec![typ.clone()]);
+        builder.terminate_with_return(Vec::new());
 
         let mut ssa = builder.finish();
         ssa.normalize_ids();
@@ -735,8 +1059,13 @@ mod test {
         let main = ssa.main();
         let instructions = main.dfg[main.entry_block()].instructions();
         let starting_instruction_count = instructions.len();
-        assert_eq!(starting_instruction_count, 2);
+        assert_eq!(starting_instruction_count, 4);
 
+        // fn main f0 {
+        //   b0(v0: u64):
+        //     v1 = make_array [v0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0, u64 0]
+        //     v5 = call keccakf1600(v1)
+        // }
         let ssa = ssa.fold_constants();
 
         println!("{ssa}");
@@ -744,7 +1073,7 @@ mod test {
         let main = ssa.main();
         let instructions = main.dfg[main.entry_block()].instructions();
         let ending_instruction_count = instructions.len();
-        assert_eq!(ending_instruction_count, 1);
+        assert_eq!(ending_instruction_count, 2);
     }
 
     #[test]
@@ -790,4 +1119,275 @@ mod test {
         assert_eq!(main.dfg[main.entry_block()].instructions().len(), 1);
         assert_eq!(main.dfg[b1].instructions().len(), 0);
     }
+
+    #[test]
+    fn deduplicate_across_non_dominated_blocks() {
+        let src = "
+            brillig(inline) fn main f0 {
+              b0(v0: u32):
+                v2 = lt u32 1000, v0
+                jmpif v2 then: b1, else: b2
+              b1():
+                v4 = add v0, u32 1
+                v5 = lt v0, v4
+                constrain v5 == u1 1
+                jmp b2()
+              b2():
+                v7 = lt u32 1000, v0
+                jmpif v7 then: b3, else: b4
+              b3():
+                v8 = add v0, u32 1
+                v9 = lt v0, v8
+                constrain v9 == u1 1
+                jmp b4()
+              b4():
+                return
+            }
+        ";
+        let ssa = Ssa::from_str(src).unwrap();
+
+        // v4 has been hoisted, although:
+        // - v5 has not yet been removed since it was encountered earlier in the program
+        // - v8 hasn't been recognized as a duplicate of v6 yet since they still reference v4 and
+        //   v5 respectively
+        let expected = "
+            brillig(inline) fn main f0 {
+              b0(v0: u32):
+                v2 = lt u32 1000, v0
+                v4 = add v0, u32 1
+                jmpif v2 then: b1, else: b2
+              b1():
+                v5 = add v0, u32 1
+                v6 = lt v0, v5
+                constrain v6 == u1 1
+                jmp b2()
+              b2():
+                jmpif v2 then: b3, else: b4
+              b3():
+                v8 = lt v0, v4
+                constrain v8 == u1 1
+                jmp b4()
+              b4():
+                return
+            }
+        ";
+
+        let ssa = ssa.fold_constants_using_constraints();
+        assert_normalized_ssa_equals(ssa, expected);
+    }
+
+    #[test]
+    fn inlines_brillig_call_without_arguments() {
+        let src = "
+            acir(inline) fn main f0 {
+              b0():
+                v0 = call f1() -> Field
+                return v0
+            }
+
+            brillig(inline) fn one f1 {
+              b0():
+                v0 = add Field 2, Field 3
+                return v0
+            }
+            ";
+        let ssa = Ssa::from_str(src).unwrap();
+        let brillig = ssa.to_brillig(false);
+
+        let expected = "
+            acir(inline) fn main f0 {
+              b0():
+                return Field 5
+            }
+            ";
+        let ssa = ssa.fold_constants_with_brillig(&brillig);
+        assert_normalized_ssa_equals(ssa, expected);
+    }
+
+    #[test]
+    fn inlines_brillig_call_with_two_field_arguments() {
+        let src = "
+            acir(inline) fn main f0 {
+              b0():
+                v0 = call f1(Field 2, Field 3) -> Field
+                return v0
+            }
+
+            brillig(inline) fn one f1 {
+              b0(v0: Field, v1: Field):
+                v2 = add v0, v1
+                return v2
+            }
+            ";
+        let ssa = Ssa::from_str(src).unwrap();
+        let brillig = ssa.to_brillig(false);
+
+        let expected = "
+            acir(inline) fn main f0 {
+              b0():
+                return Field 5
+            }
+            ";
+        let ssa = ssa.fold_constants_with_brillig(&brillig);
+        assert_normalized_ssa_equals(ssa, expected);
+    }
+
+    #[test]
+    fn inlines_brillig_call_with_two_i32_arguments() {
+        let src = "
+            acir(inline) fn main f0 {
+              b0():
+                v0 = call f1(i32 2, i32 3) -> i32
+                return v0
+            }
+
+            brillig(inline) fn one f1 {
+              b0(v0: i32, v1: i32):
+                v2 = add v0, v1
+                return v2
+            }
+            ";
+        let ssa = Ssa::from_str(src).unwrap();
+        let brillig = ssa.to_brillig(false);
+
+        let expected = "
+            acir(inline) fn main f0 {
+              b0():
+                return i32 5
+            }
+            ";
+        let ssa = ssa.fold_constants_with_brillig(&brillig);
+        assert_normalized_ssa_equals(ssa, expected);
+    }
+
+    #[test]
+    fn inlines_brillig_call_with_array_return() {
+        let src = "
+            acir(inline) fn main f0 {
+              b0():
+                v0 = call f1(Field 2, Field 3, Field 4) -> [Field; 3]
+                return v0
+            }
+
+            brillig(inline) fn one f1 {
+              b0(v0: Field, v1: Field, v2: Field):
+                v3 = make_array [v0, v1, v2] : [Field; 3]
+                return v3
+            }
+            ";
+        let ssa = Ssa::from_str(src).unwrap();
+        let brillig = ssa.to_brillig(false);
+
+        let expected = "
+            acir(inline) fn main f0 {
+              b0():
+                v3 = make_array [Field 2, Field 3, Field 4] : [Field; 3]
+                return v3
+            }
+            ";
+        let ssa = ssa.fold_constants_with_brillig(&brillig);
+        assert_normalized_ssa_equals(ssa, expected);
+    }
+
+    #[test]
+    fn inlines_brillig_call_with_composite_array_return() {
+        let src = "
+            acir(inline) fn main f0 {
+              b0():
+                v0 = call f1(Field 2, i32 3, Field 4, i32 5) -> [(Field, i32); 2]
+                return v0
+            }
+
+            brillig(inline) fn one f1 {
+              b0(v0: Field, v1: i32, v2: i32, v3: Field):
+                v4 = make_array [v0, v1, v2, v3] : [(Field, i32); 2]
+                return v4
+            }
+            ";
+        let ssa = Ssa::from_str(src).unwrap();
+        let brillig = ssa.to_brillig(false);
+
+        let expected = "
+            acir(inline) fn main f0 {
+              b0():
+                v4 = make_array [Field 2, i32 3, Field 4, i32 5] : [(Field, i32); 2]
+                return v4
+            }
+            ";
+        let ssa = ssa.fold_constants_with_brillig(&brillig);
+        assert_normalized_ssa_equals(ssa, expected);
+    }
+
+    #[test]
+    fn inlines_brillig_call_with_array_arguments() {
+        let src = "
+            acir(inline) fn main f0 {
+              b0():
+                v0 = make_array [Field 2, Field 3] : [Field; 2]
+                v1 = call f1(v0) -> Field
+                return v1
+            }
+
+            brillig(inline) fn one f1 {
+              b0(v0: [Field; 2]):
+                inc_rc v0
+                v2 = array_get v0, index u32 0 -> Field
+                v4 = array_get v0, index u32 1 -> Field
+                v5 = add v2, v4
+                dec_rc v0
+                return v5
+            }
+            ";
+        let ssa = Ssa::from_str(src).unwrap();
+        let brillig = ssa.to_brillig(false);
+
+        let expected = "
+            acir(inline) fn main f0 {
+              b0():
+                v2 = make_array [Field 2, Field 3] : [Field; 2]
+                return Field 5
+            }
+            ";
+        let ssa = ssa.fold_constants_with_brillig(&brillig);
+        assert_normalized_ssa_equals(ssa, expected);
+    }
+
+    #[test]
+    fn deduplicates_side_effecting_intrinsics() {
+        let src = "
+        // After EnableSideEffectsIf removal:
+        acir(inline) fn main f0 {
+          b0(v0: Field, v1: Field, v2: u1):
+            v4 = call is_unconstrained() -> u1
+            v7 = call to_be_radix(v0, u32 256) -> [u8; 1]    // `a.to_be_radix(256)`;
+            inc_rc v7
+            v8 = call to_be_radix(v0, u32 256) -> [u8; 1]    // duplicate load of `a`
+            inc_rc v8
+            v9 = cast v2 as Field                            // `if c { a.to_be_radix(256) }`
+            v10 = mul v0, v9                                 // attaching `c` to `a`
+            v11 = call to_be_radix(v10, u32 256) -> [u8; 1]  // calling `to_radix(c * a)`
+            inc_rc v11
+            enable_side_effects v2                           // side effect var for `c` shifted down by removal
+            return
+        }
+        ";
+        let ssa = Ssa::from_str(src).unwrap();
+        let expected = "
+        acir(inline) fn main f0 {
+          b0(v0: Field, v1: Field, v2: u1):
+            v4 = call is_unconstrained() -> u1
+            v7 = call to_be_radix(v0, u32 256) -> [u8; 1]
+            inc_rc v7
+            inc_rc v7
+            v8 = cast v2 as Field
+            v9 = mul v0, v8
+            v10 = call to_be_radix(v9, u32 256) -> [u8; 1]
+            inc_rc v10
+            enable_side_effects v2
+            return
+        }
+        ";
+        let ssa = ssa.fold_constants_using_constraints();
+        assert_normalized_ssa_equals(ssa, expected);
+    }
 }
diff --git a/noir/noir-repo/compiler/noirc_evaluator/src/ssa/opt/flatten_cfg.rs b/noir/noir-repo/compiler/noirc_evaluator/src/ssa/opt/flatten_cfg.rs
index 5d114672a55..0e8d9f7ee83 100644
--- a/noir/noir-repo/compiler/noirc_evaluator/src/ssa/opt/flatten_cfg.rs
+++ b/noir/noir-repo/compiler/noirc_evaluator/src/ssa/opt/flatten_cfg.rs
@@ -131,8 +131,7 @@
 //!   v11 = mul v4, Field 12
 //!   v12 = add v10, v11
 //!   store v12 at v5         (new store)
-use fxhash::FxHashMap as HashMap;
-use std::collections::{BTreeMap, HashSet};
+use fxhash::{FxHashMap as HashMap, FxHashSet as HashSet};
 
 use acvm::{acir::AcirField, acir::BlackBoxFunc, FieldElement};
 use iter_extended::vecmap;
@@ -186,18 +185,6 @@ struct Context<'f> {
     /// Maps start of branch -> end of branch
     branch_ends: HashMap<BasicBlockId, BasicBlockId>,
 
-    /// Maps an address to the old and new value of the element at that address
-    /// These only hold stores for one block at a time and is cleared
-    /// between inlining of branches.
-    store_values: HashMap<ValueId, Store>,
-
-    /// Stores all allocations local to the current branch.
-    /// Since these branches are local to the current branch (ie. only defined within one branch of
-    /// an if expression), they should not be merged with their previous value or stored value in
-    /// the other branch since there is no such value. The ValueId here is that which is returned
-    /// by the allocate instruction.
-    local_allocations: HashSet<ValueId>,
-
     /// A stack of each jmpif condition that was taken to reach a particular point in the program.
     /// When two branches are merged back into one, this constitutes a join point, and is analogous
     /// to the rest of the program after an if statement. When such a join point / end block is
@@ -214,13 +201,15 @@ struct Context<'f> {
     /// When processing a block, we pop this stack to get its arguments
     /// and at the end we push the arguments for his successor
     arguments_stack: Vec<Vec<ValueId>>,
-}
 
-#[derive(Clone)]
-pub(crate) struct Store {
-    old_value: ValueId,
-    new_value: ValueId,
-    call_stack: CallStack,
+    /// Stores all allocations local to the current branch.
+    ///
+    /// Since these branches are local to the current branch (i.e. only defined within one branch of
+    /// an if expression), they should not be merged with their previous value or stored value in
+    /// the other branch since there is no such value.
+    ///
+    /// The `ValueId` here is that which is returned by the allocate instruction.
+    local_allocations: HashSet<ValueId>,
 }
 
 #[derive(Clone)]
@@ -231,8 +220,6 @@ struct ConditionalBranch {
     old_condition: ValueId,
     // The condition of the branch
     condition: ValueId,
-    // The store values accumulated when processing the branch
-    store_values: HashMap<ValueId, Store>,
     // The allocations accumulated when processing the branch
     local_allocations: HashSet<ValueId>,
 }
@@ -263,12 +250,11 @@ fn flatten_function_cfg(function: &mut Function, no_predicates: &HashMap<Functio
     let mut context = Context {
         inserter: FunctionInserter::new(function),
         cfg,
-        store_values: HashMap::default(),
-        local_allocations: HashSet::new(),
         branch_ends,
         slice_sizes: HashMap::default(),
         condition_stack: Vec::new(),
         arguments_stack: Vec::new(),
+        local_allocations: HashSet::default(),
     };
     context.flatten(no_predicates);
 }
@@ -343,6 +329,7 @@ impl<'f> Context<'f> {
         // If this is not a separate variable, clippy gets confused and says the to_vec is
         // unnecessary, when removing it actually causes an aliasing/mutability error.
         let instructions = self.inserter.function.dfg[block].instructions().to_vec();
+
         for instruction in instructions.iter() {
             if self.is_no_predicate(no_predicates, instruction) {
                 // disable side effect for no_predicate functions
@@ -429,14 +416,12 @@ impl<'f> Context<'f> {
         let old_condition = *condition;
         let then_condition = self.inserter.resolve(old_condition);
 
-        let old_stores = std::mem::take(&mut self.store_values);
         let old_allocations = std::mem::take(&mut self.local_allocations);
         let branch = ConditionalBranch {
             old_condition,
             condition: self.link_condition(then_condition),
-            store_values: old_stores,
-            local_allocations: old_allocations,
             last_block: *then_destination,
+            local_allocations: old_allocations,
         };
         let cond_context = ConditionalContext {
             condition: then_condition,
@@ -473,21 +458,13 @@ impl<'f> Context<'f> {
         );
         let else_condition = self.link_condition(else_condition);
 
-        // Make sure the else branch sees the previous values of each store
-        // rather than any values created in the 'then' branch.
-        let old_stores = std::mem::take(&mut cond_context.then_branch.store_values);
-        cond_context.then_branch.store_values = std::mem::take(&mut self.store_values);
-        self.undo_stores_in_then_branch(&cond_context.then_branch.store_values);
-
         let old_allocations = std::mem::take(&mut self.local_allocations);
         let else_branch = ConditionalBranch {
             old_condition: cond_context.then_branch.old_condition,
             condition: else_condition,
-            store_values: old_stores,
-            local_allocations: old_allocations,
             last_block: *block,
+            local_allocations: old_allocations,
         };
-        cond_context.then_branch.local_allocations.clear();
         cond_context.else_branch = Some(else_branch);
         self.condition_stack.push(cond_context);
 
@@ -509,10 +486,8 @@ impl<'f> Context<'f> {
         }
 
         let mut else_branch = cond_context.else_branch.unwrap();
-        let stores_in_branch = std::mem::replace(&mut self.store_values, else_branch.store_values);
         self.local_allocations = std::mem::take(&mut else_branch.local_allocations);
         else_branch.last_block = *block;
-        else_branch.store_values = stores_in_branch;
         cond_context.else_branch = Some(else_branch);
 
         // We must remember to reset whether side effects are enabled when both branches
@@ -580,8 +555,6 @@ impl<'f> Context<'f> {
                 .first()
         });
 
-        let call_stack = cond_context.call_stack;
-        self.merge_stores(cond_context.then_branch, cond_context.else_branch, call_stack);
         self.arguments_stack.pop();
         self.arguments_stack.pop();
         self.arguments_stack.push(args);
@@ -636,120 +609,37 @@ impl<'f> Context<'f> {
         self.insert_instruction_with_typevars(enable_side_effects, None, call_stack);
     }
 
-    /// Merge any store instructions found in each branch.
-    ///
-    /// This function relies on the 'then' branch being merged before the 'else' branch of a jmpif
-    /// instruction. If this ordering is changed, the ordering that store values are merged within
-    /// this function also needs to be changed to reflect that.
-    fn merge_stores(
-        &mut self,
-        then_branch: ConditionalBranch,
-        else_branch: Option<ConditionalBranch>,
-        call_stack: CallStack,
-    ) {
-        // Address -> (then_value, else_value, value_before_the_if)
-        let mut new_map = BTreeMap::new();
-
-        for (address, store) in then_branch.store_values {
-            new_map.insert(address, (store.new_value, store.old_value, store.old_value));
-        }
-
-        if else_branch.is_some() {
-            for (address, store) in else_branch.clone().unwrap().store_values {
-                if let Some(entry) = new_map.get_mut(&address) {
-                    entry.1 = store.new_value;
-                } else {
-                    new_map.insert(address, (store.old_value, store.new_value, store.old_value));
-                }
-            }
-        }
-
-        let then_condition = then_branch.condition;
-        let block = self.inserter.function.entry_block();
-
-        // Merging must occur in a separate loop as we cannot borrow `self` as mutable while `value_merger` does
-        let mut new_values = HashMap::default();
-        for (address, (then_case, else_case, _)) in &new_map {
-            let instruction = Instruction::IfElse {
-                then_condition,
-                then_value: *then_case,
-                else_value: *else_case,
-            };
-            let dfg = &mut self.inserter.function.dfg;
-            let value = dfg
-                .insert_instruction_and_results(instruction, block, None, call_stack.clone())
-                .first();
-
-            new_values.insert(address, value);
-        }
-
-        // Replace stores with new merged values
-        for (address, (_, _, old_value)) in &new_map {
-            let value = new_values[address];
-            let address = *address;
-            self.insert_instruction_with_typevars(
-                Instruction::Store { address, value },
-                None,
-                call_stack.clone(),
-            );
-
-            if let Some(store) = self.store_values.get_mut(&address) {
-                store.new_value = value;
-            } else {
-                self.store_values.insert(
-                    address,
-                    Store {
-                        old_value: *old_value,
-                        new_value: value,
-                        call_stack: call_stack.clone(),
-                    },
-                );
-            }
-        }
-    }
-
-    fn remember_store(&mut self, address: ValueId, new_value: ValueId, call_stack: CallStack) {
-        if !self.local_allocations.contains(&address) {
-            if let Some(store_value) = self.store_values.get_mut(&address) {
-                store_value.new_value = new_value;
-            } else {
-                let load = Instruction::Load { address };
-
-                let load_type = Some(vec![self.inserter.function.dfg.type_of_value(new_value)]);
-                let old_value = self
-                    .insert_instruction_with_typevars(load.clone(), load_type, call_stack.clone())
-                    .first();
-
-                self.store_values.insert(address, Store { old_value, new_value, call_stack });
-            }
-        }
-    }
-
     /// Push the given instruction to the end of the entry block of the current function.
     ///
     /// Note that each ValueId of the instruction will be mapped via self.inserter.resolve.
     /// As a result, the instruction that will be pushed will actually be a new instruction
     /// with a different InstructionId from the original. The results of the given instruction
     /// will also be mapped to the results of the new instruction.
-    fn push_instruction(&mut self, id: InstructionId) -> Vec<ValueId> {
+    ///
+    /// `previous_allocate_result` should only be set to the result of an allocate instruction
+    /// if that instruction was the instruction immediately previous to this one - if there are
+    /// any instructions in between it should be None.
+    fn push_instruction(&mut self, id: InstructionId) {
         let (instruction, call_stack) = self.inserter.map_instruction(id);
         let instruction = self.handle_instruction_side_effects(instruction, call_stack.clone());
-        let is_allocate = matches!(instruction, Instruction::Allocate);
 
+        let instruction_is_allocate = matches!(&instruction, Instruction::Allocate);
         let entry = self.inserter.function.entry_block();
         let results = self.inserter.push_instruction_value(instruction, id, entry, call_stack);
 
         // Remember an allocate was created local to this branch so that we do not try to merge store
         // values across branches for it later.
-        if is_allocate {
+        if instruction_is_allocate {
             self.local_allocations.insert(results.first());
         }
-
-        results.results().into_owned()
     }
 
     /// If we are currently in a branch, we need to modify constrain instructions
     /// to multiply them by the branch's condition (see optimization #1 in the module comment).
+    ///
+    /// `previous_allocate_result` should only be set to the result of an allocate instruction
+    /// if that instruction was the instruction immediately previous to this one - if there are
+    /// any instructions in between it should be None.
     fn handle_instruction_side_effects(
         &mut self,
         instruction: Instruction,
@@ -782,8 +672,32 @@ impl<'f> Context<'f> {
                     Instruction::Constrain(lhs, rhs, message)
                 }
                 Instruction::Store { address, value } => {
-                    self.remember_store(address, value, call_stack);
-                    Instruction::Store { address, value }
+                    // If this instruction immediately follows an allocate, and stores to that
+                    // address there is no previous value to load and we don't need a merge anyway.
+                    if self.local_allocations.contains(&address) {
+                        Instruction::Store { address, value }
+                    } else {
+                        // Instead of storing `value`, store `if condition { value } else { previous_value }`
+                        let typ = self.inserter.function.dfg.type_of_value(value);
+                        let load = Instruction::Load { address };
+                        let previous_value = self
+                            .insert_instruction_with_typevars(
+                                load,
+                                Some(vec![typ]),
+                                call_stack.clone(),
+                            )
+                            .first();
+
+                        let instruction = Instruction::IfElse {
+                            then_condition: condition,
+                            then_value: value,
+
+                            else_value: previous_value,
+                        };
+
+                        let updated_value = self.insert_instruction(instruction, call_stack);
+                        Instruction::Store { address, value: updated_value }
+                    }
                 }
                 Instruction::RangeCheck { value, max_bit_size, assert_message } => {
                     // Replace value with `value * predicate` to zero out value when predicate is inactive.
@@ -905,23 +819,11 @@ impl<'f> Context<'f> {
             call_stack,
         )
     }
-
-    fn undo_stores_in_then_branch(&mut self, store_values: &HashMap<ValueId, Store>) {
-        for (address, store) in store_values {
-            let address = *address;
-            let value = store.old_value;
-            let instruction = Instruction::Store { address, value };
-            // Considering the location of undoing a store to be the same as the original store.
-            self.insert_instruction_with_typevars(instruction, None, store.call_stack.clone());
-        }
-    }
 }
 
 #[cfg(test)]
 mod test {
-    use std::sync::Arc;
-
-    use acvm::{acir::AcirField, FieldElement};
+    use acvm::acir::{AcirField, FieldElement};
 
     use crate::ssa::{
         function_builder::FunctionBuilder,
@@ -1023,15 +925,13 @@ mod test {
               b0(v0: u1, v1: &mut Field):
                 enable_side_effects v0
                 v2 = load v1 -> Field
-                store Field 5 at v1
-                v4 = not v0
-                store v2 at v1
+                v3 = cast v0 as Field
+                v5 = sub Field 5, v2
+                v6 = mul v3, v5
+                v7 = add v2, v6
+                store v7 at v1
+                v8 = not v0
                 enable_side_effects u1 1
-                v6 = cast v0 as Field
-                v7 = sub Field 5, v2
-                v8 = mul v6, v7
-                v9 = add v2, v8
-                store v9 at v1
                 return
             }
             ";
@@ -1062,17 +962,20 @@ mod test {
               b0(v0: u1, v1: &mut Field):
                 enable_side_effects v0
                 v2 = load v1 -> Field
-                store Field 5 at v1
-                v4 = not v0
-                store v2 at v1
-                enable_side_effects v4
-                v5 = load v1 -> Field
-                store Field 6 at v1
+                v3 = cast v0 as Field
+                v5 = sub Field 5, v2
+                v6 = mul v3, v5
+                v7 = add v2, v6
+                store v7 at v1
+                v8 = not v0
+                enable_side_effects v8
+                v9 = load v1 -> Field
+                v10 = cast v8 as Field
+                v12 = sub Field 6, v9
+                v13 = mul v10, v12
+                v14 = add v9, v13
+                store v14 at v1
                 enable_side_effects u1 1
-                v8 = cast v0 as Field
-                v10 = mul v8, Field -1
-                v11 = add Field 6, v10
-                store v11 at v1
                 return
             }
             ";
@@ -1176,22 +1079,36 @@ mod test {
         acir(inline) fn main f0 {
           b0(v0: u1, v1: u1):
             v2 = allocate -> &mut Field
+            store Field 0 at v2
+            store Field 1 at v2
             enable_side_effects v0
-            v3 = mul v0, v1
-            enable_side_effects v3
-            v4 = not v1
-            v5 = mul v0, v4
+            v5 = cast v0 as Field
+            v6 = add Field 1, v5
+            store v6 at v2
+            v7 = mul v0, v1
+            enable_side_effects v7
+            v8 = cast v7 as Field
+            v10 = sub Field 5, v6
+            v11 = mul v8, v10
+            v12 = add v6, v11
+            store v12 at v2
+            v13 = not v1
+            v14 = mul v0, v13
+            enable_side_effects v14
+            v15 = cast v14 as Field
+            v17 = sub Field 6, v12
+            v18 = mul v15, v17
+            v19 = add v12, v18
+            store v19 at v2
             enable_side_effects v0
-            v6 = cast v3 as Field
-            v8 = mul v6, Field -1
-            v10 = add Field 6, v8
-            v11 = not v0
+            v20 = not v0
+            enable_side_effects v20
+            v21 = cast v20 as Field
+            v23 = sub Field 3, v19
+            v24 = mul v21, v23
+            v25 = add v19, v24
             enable_side_effects u1 1
-            v13 = cast v0 as Field
-            v15 = sub v10, Field 3
-            v16 = mul v13, v15
-            v17 = add Field 3, v16
-            return v17
+            return v25
         }";
 
         let main = ssa.main();
@@ -1203,7 +1120,12 @@ mod test {
         let merged_values = get_all_constants_reachable_from_instruction(&main.dfg, ret);
         assert_eq!(
             merged_values,
-            vec![FieldElement::from(3u128), FieldElement::from(6u128), -FieldElement::from(1u128)]
+            vec![
+                FieldElement::from(1u128),
+                FieldElement::from(3u128),
+                FieldElement::from(5u128),
+                FieldElement::from(6u128)
+            ]
         );
 
         assert_normalized_ssa_equals(ssa, expected);
@@ -1344,63 +1266,73 @@ mod test {
     fn should_not_merge_incorrectly_to_false() {
         // Regression test for #1792
         // Tests that it does not simplify a true constraint an always-false constraint
-        // acir(inline) fn main f1 {
-        //     b0(v0: [u8; 2]):
-        //       v5 = array_get v0, index u8 0
-        //       v6 = cast v5 as u32
-        //       v8 = truncate v6 to 1 bits, max_bit_size: 32
-        //       v9 = cast v8 as u1
-        //       v10 = allocate
-        //       store u8 0 at v10
-        //       jmpif v9 then: b2, else: b3
-        //     b2():
-        //       v12 = cast v5 as Field
-        //       v13 = add v12, Field 1
-        //       store v13 at v10
-        //       jmp b4()
-        //     b4():
-        //       constrain v9 == u1 1
-        //       return
-        //     b3():
-        //       store u8 0 at v10
-        //       jmp b4()
-        //   }
-        let main_id = Id::test_new(1);
-        let mut builder = FunctionBuilder::new("main".into(), main_id);
-        builder.insert_block(); // b0
-        let b1 = builder.insert_block();
-        let b2 = builder.insert_block();
-        let b3 = builder.insert_block();
-        let element_type = Arc::new(vec![Type::unsigned(8)]);
-        let array_type = Type::Array(element_type.clone(), 2);
-        let array = builder.add_parameter(array_type);
-        let zero = builder.numeric_constant(0_u128, Type::unsigned(8));
-        let v5 = builder.insert_array_get(array, zero, Type::unsigned(8));
-        let v6 = builder.insert_cast(v5, Type::unsigned(32));
-        let i_two = builder.numeric_constant(2_u128, Type::unsigned(32));
-        let v8 = builder.insert_binary(v6, BinaryOp::Mod, i_two);
-        let v9 = builder.insert_cast(v8, Type::bool());
-        let v10 = builder.insert_allocate(Type::field());
-        builder.insert_store(v10, zero);
-        builder.terminate_with_jmpif(v9, b1, b2);
-        builder.switch_to_block(b1);
-        let one = builder.field_constant(1_u128);
-        let v5b = builder.insert_cast(v5, Type::field());
-        let v13: Id<Value> = builder.insert_binary(v5b, BinaryOp::Add, one);
-        let v14 = builder.insert_cast(v13, Type::unsigned(8));
-        builder.insert_store(v10, v14);
-        builder.terminate_with_jmp(b3, vec![]);
-        builder.switch_to_block(b2);
-        builder.insert_store(v10, zero);
-        builder.terminate_with_jmp(b3, vec![]);
-        builder.switch_to_block(b3);
-        let v_true = builder.numeric_constant(true, Type::bool());
-        let v12 = builder.insert_binary(v9, BinaryOp::Eq, v_true);
-        builder.insert_constrain(v12, v_true, None);
-        builder.terminate_with_return(vec![]);
-        let ssa = builder.finish();
+        let src = "
+        acir(inline) fn main f0 {
+          b0(v0: [u8; 2]):
+            v2 = array_get v0, index u8 0 -> u8
+            v3 = cast v2 as u32
+            v4 = truncate v3 to 1 bits, max_bit_size: 32
+            v5 = cast v4 as u1
+            v6 = allocate -> &mut Field
+            store u8 0 at v6
+            jmpif v5 then: b2, else: b1
+          b2():
+            v7 = cast v2 as Field
+            v9 = add v7, Field 1
+            v10 = cast v9 as u8
+            store v10 at v6
+            jmp b3()
+          b3():
+            constrain v5 == u1 1
+            return
+          b1():
+            store u8 0 at v6
+            jmp b3()
+        }
+        ";
+
+        let ssa = Ssa::from_str(src).unwrap();
+
+        let expected = "
+        acir(inline) fn main f0 {
+          b0(v0: [u8; 2]):
+            v2 = array_get v0, index u8 0 -> u8
+            v3 = cast v2 as u32
+            v4 = truncate v3 to 1 bits, max_bit_size: 32
+            v5 = cast v4 as u1
+            v6 = allocate -> &mut Field
+            store u8 0 at v6
+            enable_side_effects v5
+            v7 = cast v2 as Field
+            v9 = add v7, Field 1
+            v10 = cast v9 as u8
+            v11 = load v6 -> u8
+            v12 = cast v4 as Field
+            v13 = cast v11 as Field
+            v14 = sub v9, v13
+            v15 = mul v12, v14
+            v16 = add v13, v15
+            v17 = cast v16 as u8
+            store v17 at v6
+            v18 = not v5
+            enable_side_effects v18
+            v19 = load v6 -> u8
+            v20 = cast v18 as Field
+            v21 = cast v19 as Field
+            v23 = sub Field 0, v21
+            v24 = mul v20, v23
+            v25 = add v21, v24
+            v26 = cast v25 as u8
+            store v26 at v6
+            enable_side_effects u1 1
+            constrain v5 == u1 1
+            return
+        }
+        ";
+
         let flattened_ssa = ssa.flatten_cfg();
         let main = flattened_ssa.main();
+
         // Now assert that there is not an always-false constraint after flattening:
         let mut constrain_count = 0;
         for instruction in main.dfg[main.entry_block()].instructions() {
@@ -1414,6 +1346,8 @@ mod test {
             }
         }
         assert_eq!(constrain_count, 1);
+
+        assert_normalized_ssa_equals(flattened_ssa, expected);
     }
 
     #[test]
diff --git a/noir/noir-repo/compiler/noirc_evaluator/src/ssa/opt/mem2reg.rs b/noir/noir-repo/compiler/noirc_evaluator/src/ssa/opt/mem2reg.rs
index 0690dbbf204..53a31ae57c1 100644
--- a/noir/noir-repo/compiler/noirc_evaluator/src/ssa/opt/mem2reg.rs
+++ b/noir/noir-repo/compiler/noirc_evaluator/src/ssa/opt/mem2reg.rs
@@ -18,6 +18,7 @@
 //!   - A reference with 0 aliases means we were unable to find which reference this reference
 //!     refers to. If such a reference is stored to, we must conservatively invalidate every
 //!     reference in the current block.
+//! - We also track the last load instruction to each address per block.
 //!
 //! From there, to figure out the value of each reference at the end of block, iterate each instruction:
 //! - On `Instruction::Allocate`:
@@ -28,6 +29,13 @@
 //!   - Furthermore, if the result of the load is a reference, mark the result as an alias
 //!     of the reference it dereferences to (if known).
 //!     - If which reference it dereferences to is not known, this load result has no aliases.
+//!   - We also track the last instance of a load instruction to each address in a block.
+//!     If we see that the last load instruction was from the same address as the current load instruction,
+//!     we move to replace the result of the current load with the result of the previous load.
+//!     This removal requires a couple conditions:
+//!     - No store occurs to that address before the next load,
+//!     - The address is not used as an argument to a call
+//!     This optimization helps us remove repeated loads for which there are not known values.
 //! - On `Instruction::Store { address, value }`:
 //!   - If the address of the store is known:
 //!     - If the address has exactly 1 alias:
@@ -40,11 +48,13 @@
 //!     - Conservatively mark every alias in the block to `Unknown`.
 //!   - Additionally, if there were no Loads to any alias of the address between this Store and
 //!     the previous Store to the same address, the previous store can be removed.
+//!   - Remove the instance of the last load instruction to the address and its aliases
 //! - On `Instruction::Call { arguments }`:
 //!   - If any argument of the call is a reference, set the value of each alias of that
 //!     reference to `Unknown`
 //!   - Any builtin functions that may return aliases if their input also contains a
 //!     reference should be tracked. Examples: `slice_push_back`, `slice_insert`, `slice_remove`, etc.
+//!   - Remove the instance of the last load instruction for any reference arguments and their aliases
 //!
 //! On a terminator instruction:
 //! - If the terminator is a `Jmp`:
@@ -274,6 +284,9 @@ impl<'f> PerFunctionContext<'f> {
         if let Some(first_predecessor) = predecessors.next() {
             let mut first = self.blocks.get(&first_predecessor).cloned().unwrap_or_default();
             first.last_stores.clear();
+            // Last loads are tracked per block. During unification we are creating a new block from the current one,
+            // so we must clear the last loads of the current block before we return the new block.
+            first.last_loads.clear();
 
             // Note that we have to start folding with the first block as the accumulator.
             // If we started with an empty block, an empty block union'd with any other block
@@ -410,6 +423,28 @@ impl<'f> PerFunctionContext<'f> {
 
                     self.last_loads.insert(address, (instruction, block_id));
                 }
+
+                // Check whether the block has a repeat load from the same address (w/ no calls or stores in between the loads).
+                // If we do have a repeat load, we can remove the current load and map its result to the previous load's result.
+                if let Some(last_load) = references.last_loads.get(&address) {
+                    let Instruction::Load { address: previous_address } =
+                        &self.inserter.function.dfg[*last_load]
+                    else {
+                        panic!("Expected a Load instruction here");
+                    };
+                    let result = self.inserter.function.dfg.instruction_results(instruction)[0];
+                    let previous_result =
+                        self.inserter.function.dfg.instruction_results(*last_load)[0];
+                    if *previous_address == address {
+                        self.inserter.map_value(result, previous_result);
+                        self.instructions_to_remove.insert(instruction);
+                    }
+                }
+                // We want to set the load for every load even if the address has a known value
+                // and the previous load instruction was removed.
+                // We are safe to still remove a repeat load in this case as we are mapping from the current load's
+                // result to the previous load, which if it was removed should already have a mapping to the known value.
+                references.set_last_load(address, instruction);
             }
             Instruction::Store { address, value } => {
                 let address = self.inserter.function.dfg.resolve(*address);
@@ -435,6 +470,8 @@ impl<'f> PerFunctionContext<'f> {
                 }
 
                 references.set_known_value(address, value);
+                // If we see a store to an address, the last load to that address needs to remain.
+                references.keep_last_load_for(address, self.inserter.function);
                 references.last_stores.insert(address, instruction);
             }
             Instruction::Allocate => {
@@ -542,6 +579,9 @@ impl<'f> PerFunctionContext<'f> {
                 let value = self.inserter.function.dfg.resolve(*value);
                 references.set_unknown(value);
                 references.mark_value_used(value, self.inserter.function);
+
+                // If a reference is an argument to a call, the last load to that address and its aliases needs to remain.
+                references.keep_last_load_for(value, self.inserter.function);
             }
         }
     }
@@ -572,6 +612,12 @@ impl<'f> PerFunctionContext<'f> {
                 let destination_parameters = self.inserter.function.dfg[*destination].parameters();
                 assert_eq!(destination_parameters.len(), arguments.len());
 
+                // If we have multiple parameters that alias that same argument value,
+                // then those parameters also alias each other.
+                // We save parameters with repeat arguments to later mark those
+                // parameters as aliasing one another.
+                let mut arg_set: HashMap<ValueId, BTreeSet<ValueId>> = HashMap::default();
+
                 // Add an alias for each reference parameter
                 for (parameter, argument) in destination_parameters.iter().zip(arguments) {
                     if self.inserter.function.dfg.value_is_reference(*parameter) {
@@ -581,10 +627,27 @@ impl<'f> PerFunctionContext<'f> {
                             if let Some(aliases) = references.aliases.get_mut(expression) {
                                 // The argument reference is possibly aliased by this block parameter
                                 aliases.insert(*parameter);
+
+                                // Check if we have seen the same argument
+                                let seen_parameters = arg_set.entry(argument).or_default();
+                                // Add the current parameter to the parameters we have seen for this argument.
+                                // The previous parameters and the current one alias one another.
+                                seen_parameters.insert(*parameter);
                             }
                         }
                     }
                 }
+
+                // Set the aliases of the parameters
+                for (_, aliased_params) in arg_set {
+                    for param in aliased_params.iter() {
+                        self.set_aliases(
+                            references,
+                            *param,
+                            AliasSet::known_multiple(aliased_params.clone()),
+                        );
+                    }
+                }
             }
             TerminatorInstruction::Return { return_values, .. } => {
                 // Removing all `last_stores` for each returned reference is more important here
@@ -612,6 +675,8 @@ mod tests {
             map::Id,
             types::Type,
         },
+        opt::assert_normalized_ssa_equals,
+        Ssa,
     };
 
     #[test]
@@ -822,88 +887,53 @@ mod tests {
     // is later stored in a successor block
     #[test]
     fn load_aliases_in_predecessor_block() {
-        // fn main {
-        //     b0():
-        //       v0 = allocate
-        //       store Field 0 at v0
-        //       v2 = allocate
-        //       store v0 at v2
-        //       v3 = load v2
-        //       v4 = load v2
-        //       jmp b1()
-        //     b1():
-        //       store Field 1 at v3
-        //       store Field 2 at v4
-        //       v7 = load v3
-        //       v8 = eq v7, Field 2
-        //       return
-        // }
-        let main_id = Id::test_new(0);
-        let mut builder = FunctionBuilder::new("main".into(), main_id);
-
-        let v0 = builder.insert_allocate(Type::field());
-
-        let zero = builder.field_constant(0u128);
-        builder.insert_store(v0, zero);
-
-        let v2 = builder.insert_allocate(Type::Reference(Arc::new(Type::field())));
-        builder.insert_store(v2, v0);
-
-        let v3 = builder.insert_load(v2, Type::field());
-        let v4 = builder.insert_load(v2, Type::field());
-        let b1 = builder.insert_block();
-        builder.terminate_with_jmp(b1, vec![]);
-
-        builder.switch_to_block(b1);
-
-        let one = builder.field_constant(1u128);
-        builder.insert_store(v3, one);
-
-        let two = builder.field_constant(2u128);
-        builder.insert_store(v4, two);
-
-        let v8 = builder.insert_load(v3, Type::field());
-        let _ = builder.insert_binary(v8, BinaryOp::Eq, two);
-
-        builder.terminate_with_return(vec![]);
-
-        let ssa = builder.finish();
-        assert_eq!(ssa.main().reachable_blocks().len(), 2);
+        let src = "
+        acir(inline) fn main f0 {
+          b0():
+            v0 = allocate -> &mut Field
+            store Field 0 at v0
+            v2 = allocate -> &mut &mut Field
+            store v0 at v2
+            v3 = load v2 -> &mut Field
+            v4 = load v2 -> &mut Field
+            jmp b1()
+          b1():
+            store Field 1 at v3
+            store Field 2 at v4
+            v7 = load v3 -> Field
+            v8 = eq v7, Field 2
+            return
+        }
+        ";
 
-        // Expected result:
-        // acir fn main f0 {
-        //   b0():
-        //     v9 = allocate
-        //     store Field 0 at v9
-        //     v10 = allocate
-        //     jmp b1()
-        //   b1():
-        //     return
-        // }
-        let ssa = ssa.mem2reg();
-        println!("{}", ssa);
+        let mut ssa = Ssa::from_str(src).unwrap();
+        let main = ssa.main_mut();
 
-        let main = ssa.main();
-        assert_eq!(main.reachable_blocks().len(), 2);
+        let instructions = main.dfg[main.entry_block()].instructions();
+        assert_eq!(instructions.len(), 6); // The final return is not counted
 
         // All loads should be removed
-        assert_eq!(count_loads(main.entry_block(), &main.dfg), 0);
-        assert_eq!(count_loads(b1, &main.dfg), 0);
-
         // The first store is not removed as it is used as a nested reference in another store.
-        // We would need to track whether the store where `v9` is the store value gets removed to know whether
+        // We would need to track whether the store where `v0` is the store value gets removed to know whether
         // to remove it.
-        assert_eq!(count_stores(main.entry_block(), &main.dfg), 1);
         // The first store in b1 is removed since there is another store to the same reference
         // in the same block, and the store is not needed before the later store.
         // The rest of the stores are also removed as no loads are done within any blocks
         // to the stored values.
-        assert_eq!(count_stores(b1, &main.dfg), 0);
-
-        let b1_instructions = main.dfg[b1].instructions();
+        let expected = "
+        acir(inline) fn main f0 {
+          b0():
+            v0 = allocate -> &mut Field
+            store Field 0 at v0
+            v2 = allocate -> &mut &mut Field
+            jmp b1()
+          b1():
+            return
+        }
+        ";
 
-        // We expect the last eq to be optimized out
-        assert_eq!(b1_instructions.len(), 0);
+        let ssa = ssa.mem2reg();
+        assert_normalized_ssa_equals(ssa, expected);
     }
 
     #[test]
@@ -933,7 +963,7 @@ mod tests {
         //       v10 = eq v9, Field 2
         //       constrain v9 == Field 2
         //       v11 = load v2
-        //       v12 = load v10
+        //       v12 = load v11
         //       v13 = eq v12, Field 2
         //       constrain v11 == Field 2
         //       return
@@ -992,7 +1022,7 @@ mod tests {
         let main = ssa.main();
         assert_eq!(main.reachable_blocks().len(), 4);
 
-        // The store from the original SSA should remain
+        // The stores from the original SSA should remain
         assert_eq!(count_stores(main.entry_block(), &main.dfg), 2);
         assert_eq!(count_stores(b2, &main.dfg), 1);
 
@@ -1039,4 +1069,160 @@ mod tests {
         let main = ssa.main();
         assert_eq!(count_loads(main.entry_block(), &main.dfg), 1);
     }
+
+    #[test]
+    fn remove_repeat_loads() {
+        // This tests starts with two loads from the same unknown load.
+        // Specifically you should look for `load v2` in `b3`.
+        // We should be able to remove the second repeated load.
+        let src = "
+        acir(inline) fn main f0 {
+          b0():
+            v0 = allocate -> &mut Field
+            store Field 0 at v0
+            v2 = allocate -> &mut &mut Field
+            store v0 at v2
+            jmp b1(Field 0)
+          b1(v3: Field):
+            v4 = eq v3, Field 0
+            jmpif v4 then: b2, else: b3
+          b2():
+            v5 = load v2 -> &mut Field
+            store Field 2 at v5
+            v8 = add v3, Field 1
+            jmp b1(v8)
+          b3():
+            v9 = load v0 -> Field
+            v10 = eq v9, Field 2
+            constrain v9 == Field 2
+            v11 = load v2 -> &mut Field
+            v12 = load v2 -> &mut Field
+            v13 = load v12 -> Field
+            v14 = eq v13, Field 2
+            constrain v13 == Field 2
+            return
+        }
+        ";
+
+        let ssa = Ssa::from_str(src).unwrap();
+
+        // The repeated load from v3 should be removed
+        // b3 should only have three loads now rather than four previously
+        //
+        // All stores are expected to remain.
+        let expected = "
+        acir(inline) fn main f0 {
+          b0():
+            v1 = allocate -> &mut Field
+            store Field 0 at v1
+            v3 = allocate -> &mut &mut Field
+            store v1 at v3
+            jmp b1(Field 0)
+          b1(v0: Field):
+            v4 = eq v0, Field 0
+            jmpif v4 then: b3, else: b2
+          b3():
+            v11 = load v3 -> &mut Field
+            store Field 2 at v11
+            v13 = add v0, Field 1
+            jmp b1(v13)
+          b2():
+            v5 = load v1 -> Field
+            v7 = eq v5, Field 2
+            constrain v5 == Field 2
+            v8 = load v3 -> &mut Field
+            v9 = load v8 -> Field
+            v10 = eq v9, Field 2
+            constrain v9 == Field 2
+            return
+        }
+        ";
+
+        let ssa = ssa.mem2reg();
+        assert_normalized_ssa_equals(ssa, expected);
+    }
+
+    #[test]
+    fn keep_repeat_loads_passed_to_a_call() {
+        // The test is the exact same as `remove_repeat_loads` above except with the call
+        // to `f1` between the repeated loads.
+        let src = "
+        acir(inline) fn main f0 {
+          b0():
+            v1 = allocate -> &mut Field
+            store Field 0 at v1
+            v3 = allocate -> &mut &mut Field
+            store v1 at v3
+            jmp b1(Field 0)
+          b1(v0: Field):
+            v4 = eq v0, Field 0
+            jmpif v4 then: b3, else: b2
+          b3():
+            v13 = load v3 -> &mut Field
+            store Field 2 at v13
+            v15 = add v0, Field 1
+            jmp b1(v15)
+          b2():
+            v5 = load v1 -> Field
+            v7 = eq v5, Field 2
+            constrain v5 == Field 2
+            v8 = load v3 -> &mut Field
+            call f1(v3)
+            v10 = load v3 -> &mut Field
+            v11 = load v10 -> Field
+            v12 = eq v11, Field 2
+            constrain v11 == Field 2
+            return
+        }
+        acir(inline) fn foo f1 {
+          b0(v0: &mut Field):
+            return
+        }  
+        ";
+
+        let ssa = Ssa::from_str(src).unwrap();
+
+        let ssa = ssa.mem2reg();
+        // We expect the program to be unchanged
+        assert_normalized_ssa_equals(ssa, src);
+    }
+
+    #[test]
+    fn keep_repeat_loads_with_alias_store() {
+        // v7, v8, and v9 alias one another. We want to make sure that a repeat load to v7 with a store
+        // to its aliases in between the repeat loads does not remove those loads.
+        let src = "
+        acir(inline) fn main f0 {
+          b0(v0: u1):
+            jmpif v0 then: b2, else: b1
+          b2():
+            v6 = allocate -> &mut Field
+            store Field 0 at v6
+            jmp b3(v6, v6, v6)
+          b3(v1: &mut Field, v2: &mut Field, v3: &mut Field):
+            v8 = load v1 -> Field
+            store Field 2 at v2
+            v10 = load v1 -> Field
+            store Field 1 at v3
+            v11 = load v1 -> Field
+            store Field 3 at v3
+            v13 = load v1 -> Field
+            constrain v8 == Field 0
+            constrain v10 == Field 2
+            constrain v11 == Field 1
+            constrain v13 == Field 3
+            return
+          b1():
+            v4 = allocate -> &mut Field
+            store Field 1 at v4
+            jmp b3(v4, v4, v4)
+        }
+        ";
+
+        let ssa = Ssa::from_str(src).unwrap();
+
+        let ssa = ssa.mem2reg();
+        // We expect the program to be unchanged
+        assert_normalized_ssa_equals(ssa, src);
+    }
 }
diff --git a/noir/noir-repo/compiler/noirc_evaluator/src/ssa/opt/mem2reg/alias_set.rs b/noir/noir-repo/compiler/noirc_evaluator/src/ssa/opt/mem2reg/alias_set.rs
index 4d768caa36b..e32eaa70186 100644
--- a/noir/noir-repo/compiler/noirc_evaluator/src/ssa/opt/mem2reg/alias_set.rs
+++ b/noir/noir-repo/compiler/noirc_evaluator/src/ssa/opt/mem2reg/alias_set.rs
@@ -24,6 +24,10 @@ impl AliasSet {
         Self { aliases: Some(aliases) }
     }
 
+    pub(super) fn known_multiple(values: BTreeSet<ValueId>) -> AliasSet {
+        Self { aliases: Some(values) }
+    }
+
     /// In rare cases, such as when creating an empty array of references, the set of aliases for a
     /// particular value will be known to be zero, which is distinct from being unknown and
     /// possibly referring to any alias.
diff --git a/noir/noir-repo/compiler/noirc_evaluator/src/ssa/opt/mem2reg/block.rs b/noir/noir-repo/compiler/noirc_evaluator/src/ssa/opt/mem2reg/block.rs
index 532785d2928..f4265b2466d 100644
--- a/noir/noir-repo/compiler/noirc_evaluator/src/ssa/opt/mem2reg/block.rs
+++ b/noir/noir-repo/compiler/noirc_evaluator/src/ssa/opt/mem2reg/block.rs
@@ -34,6 +34,9 @@ pub(super) struct Block {
 
     /// The last instance of a `Store` instruction to each address in this block
     pub(super) last_stores: im::OrdMap<ValueId, InstructionId>,
+
+    // The last instance of a `Load` instruction to each address in this block
+    pub(super) last_loads: im::OrdMap<ValueId, InstructionId>,
 }
 
 /// An `Expression` here is used to represent a canonical key
@@ -237,4 +240,14 @@ impl Block {
 
         Cow::Owned(AliasSet::unknown())
     }
+
+    pub(super) fn set_last_load(&mut self, address: ValueId, instruction: InstructionId) {
+        self.last_loads.insert(address, instruction);
+    }
+
+    pub(super) fn keep_last_load_for(&mut self, address: ValueId, function: &Function) {
+        let address = function.dfg.resolve(address);
+        self.last_loads.remove(&address);
+        self.for_each_alias_of(address, |block, alias| block.last_loads.remove(&alias));
+    }
 }
diff --git a/noir/noir-repo/compiler/noirc_evaluator/src/ssa/opt/remove_enable_side_effects.rs b/noir/noir-repo/compiler/noirc_evaluator/src/ssa/opt/remove_enable_side_effects.rs
index 0517f9ef89f..f735d9300ce 100644
--- a/noir/noir-repo/compiler/noirc_evaluator/src/ssa/opt/remove_enable_side_effects.rs
+++ b/noir/noir-repo/compiler/noirc_evaluator/src/ssa/opt/remove_enable_side_effects.rs
@@ -180,6 +180,8 @@ impl Context {
                     | Intrinsic::AsWitness
                     | Intrinsic::IsUnconstrained
                     | Intrinsic::DerivePedersenGenerators
+                    | Intrinsic::ArrayRefCount
+                    | Intrinsic::SliceRefCount
                     | Intrinsic::FieldLessThan => false,
                 },
 
diff --git a/noir/noir-repo/compiler/noirc_evaluator/src/ssa/opt/remove_if_else.rs b/noir/noir-repo/compiler/noirc_evaluator/src/ssa/opt/remove_if_else.rs
index 8076bc3cc99..8e25c3f0a35 100644
--- a/noir/noir-repo/compiler/noirc_evaluator/src/ssa/opt/remove_if_else.rs
+++ b/noir/noir-repo/compiler/noirc_evaluator/src/ssa/opt/remove_if_else.rs
@@ -232,6 +232,8 @@ fn slice_capacity_change(
         | Intrinsic::DerivePedersenGenerators
         | Intrinsic::ToBits(_)
         | Intrinsic::ToRadix(_)
+        | Intrinsic::ArrayRefCount
+        | Intrinsic::SliceRefCount
         | Intrinsic::FieldLessThan => SizeChange::None,
     }
 }
diff --git a/noir/noir-repo/compiler/noirc_evaluator/src/ssa/opt/unrolling.rs b/noir/noir-repo/compiler/noirc_evaluator/src/ssa/opt/unrolling.rs
index 44e25f9d4a1..777c16dacd1 100644
--- a/noir/noir-repo/compiler/noirc_evaluator/src/ssa/opt/unrolling.rs
+++ b/noir/noir-repo/compiler/noirc_evaluator/src/ssa/opt/unrolling.rs
@@ -18,8 +18,6 @@
 //!
 //! When unrolling ACIR code, we remove reference count instructions because they are
 //! only used by Brillig bytecode.
-use std::collections::HashSet;
-
 use acvm::{acir::AcirField, FieldElement};
 
 use crate::{
@@ -39,7 +37,7 @@ use crate::{
         ssa_gen::Ssa,
     },
 };
-use fxhash::FxHashMap as HashMap;
+use fxhash::{FxHashMap as HashMap, FxHashSet as HashSet};
 
 impl Ssa {
     /// Loop unrolling can return errors, since ACIR functions need to be fully unrolled.
@@ -163,9 +161,9 @@ impl Loops {
         loops.sort_by_key(|loop_| loop_.blocks.len());
 
         Self {
-            failed_to_unroll: HashSet::new(),
+            failed_to_unroll: HashSet::default(),
             yet_to_unroll: loops,
-            modified_blocks: HashSet::new(),
+            modified_blocks: HashSet::default(),
             cfg,
         }
     }
@@ -209,7 +207,7 @@ impl Loop {
         back_edge_start: BasicBlockId,
         cfg: &ControlFlowGraph,
     ) -> Self {
-        let mut blocks = HashSet::new();
+        let mut blocks = HashSet::default();
         blocks.insert(header);
 
         let mut insert = |block, stack: &mut Vec<BasicBlockId>| {
diff --git a/noir/noir-repo/compiler/noirc_evaluator/src/ssa/ssa_gen/context.rs b/noir/noir-repo/compiler/noirc_evaluator/src/ssa/ssa_gen/context.rs
index 0c6041029da..ddc3365b551 100644
--- a/noir/noir-repo/compiler/noirc_evaluator/src/ssa/ssa_gen/context.rs
+++ b/noir/noir-repo/compiler/noirc_evaluator/src/ssa/ssa_gen/context.rs
@@ -172,6 +172,7 @@ impl<'a> FunctionContext<'a> {
     /// Always returns a Value::Mutable wrapping the allocate instruction.
     pub(super) fn new_mutable_variable(&mut self, value_to_store: ValueId) -> Value {
         let element_type = self.builder.current_function.dfg.type_of_value(value_to_store);
+        self.builder.increment_array_reference_count(value_to_store);
         let alloc = self.builder.insert_allocate(element_type);
         self.builder.insert_store(alloc, value_to_store);
         let typ = self.builder.type_of_value(value_to_store);
@@ -735,7 +736,6 @@ impl<'a> FunctionContext<'a> {
             // Reference counting in brillig relies on us incrementing reference
             // counts when arrays/slices are constructed or indexed.
             // Thus, if we dereference an lvalue which happens to be array/slice we should increment its reference counter.
-            self.builder.increment_array_reference_count(reference);
             self.builder.insert_load(reference, element_type).into()
         })
     }
@@ -916,7 +916,10 @@ impl<'a> FunctionContext<'a> {
         let parameters = self.builder.current_function.dfg.block_parameters(entry).to_vec();
 
         for parameter in parameters {
-            self.builder.increment_array_reference_count(parameter);
+            // Avoid reference counts for immutable arrays that aren't behind references.
+            if self.builder.current_function.dfg.value_is_reference(parameter) {
+                self.builder.increment_array_reference_count(parameter);
+            }
         }
 
         entry
@@ -933,7 +936,9 @@ impl<'a> FunctionContext<'a> {
         dropped_parameters.retain(|parameter| !terminator_args.contains(parameter));
 
         for parameter in dropped_parameters {
-            self.builder.decrement_array_reference_count(parameter);
+            if self.builder.current_function.dfg.value_is_reference(parameter) {
+                self.builder.decrement_array_reference_count(parameter);
+            }
         }
     }
 
diff --git a/noir/noir-repo/compiler/noirc_evaluator/src/ssa/ssa_gen/mod.rs b/noir/noir-repo/compiler/noirc_evaluator/src/ssa/ssa_gen/mod.rs
index c50f0a7f45c..d28236bd360 100644
--- a/noir/noir-repo/compiler/noirc_evaluator/src/ssa/ssa_gen/mod.rs
+++ b/noir/noir-repo/compiler/noirc_evaluator/src/ssa/ssa_gen/mod.rs
@@ -665,12 +665,11 @@ impl<'a> FunctionContext<'a> {
         values = values.map(|value| {
             let value = value.eval(self);
 
-            // Make sure to increment array reference counts on each let binding
-            self.builder.increment_array_reference_count(value);
-
             Tree::Leaf(if let_expr.mutable {
                 self.new_mutable_variable(value)
             } else {
+                // `new_mutable_variable` already increments rcs internally
+                self.builder.increment_array_reference_count(value);
                 value::Value::Normal(value)
             })
         });
diff --git a/noir/noir-repo/test_programs/execution_success/loop_invariant_regression/Nargo.toml b/noir/noir-repo/test_programs/execution_success/loop_invariant_regression/Nargo.toml
new file mode 100644
index 00000000000..9590789f52e
--- /dev/null
+++ b/noir/noir-repo/test_programs/execution_success/loop_invariant_regression/Nargo.toml
@@ -0,0 +1,7 @@
+[package]
+name = "loop_invariant_regression"
+type = "bin"
+authors = [""]
+compiler_version = ">=0.38.0"
+
+[dependencies]
\ No newline at end of file
diff --git a/noir/noir-repo/test_programs/execution_success/loop_invariant_regression/Prover.toml b/noir/noir-repo/test_programs/execution_success/loop_invariant_regression/Prover.toml
new file mode 100644
index 00000000000..18680c805a7
--- /dev/null
+++ b/noir/noir-repo/test_programs/execution_success/loop_invariant_regression/Prover.toml
@@ -0,0 +1,2 @@
+x = "2"
+y = "3"
diff --git a/noir/noir-repo/test_programs/execution_success/loop_invariant_regression/src/main.nr b/noir/noir-repo/test_programs/execution_success/loop_invariant_regression/src/main.nr
new file mode 100644
index 00000000000..25f6e92f868
--- /dev/null
+++ b/noir/noir-repo/test_programs/execution_success/loop_invariant_regression/src/main.nr
@@ -0,0 +1,13 @@
+// Tests a simple loop where we expect loop invariant instructions
+// to be hoisted to the loop's pre-header block.
+fn main(x: u32, y: u32) {
+    loop(4, x, y);
+}
+
+fn loop(upper_bound: u32, x: u32, y: u32) {
+    for _ in 0..upper_bound {
+        let mut z = x * y;
+        z = z * x;
+        assert_eq(z, 12);
+    }
+}
diff --git a/noir/noir-repo/test_programs/execution_success/reference_counts/Nargo.toml b/noir/noir-repo/test_programs/execution_success/reference_counts/Nargo.toml
new file mode 100644
index 00000000000..ae787e0ccb9
--- /dev/null
+++ b/noir/noir-repo/test_programs/execution_success/reference_counts/Nargo.toml
@@ -0,0 +1,7 @@
+[package]
+name = "reference_counts"
+type = "bin"
+authors = [""]
+compiler_version = ">=0.35.0"
+
+[dependencies]
diff --git a/noir/noir-repo/test_programs/execution_success/reference_counts/Prover.toml b/noir/noir-repo/test_programs/execution_success/reference_counts/Prover.toml
new file mode 100644
index 00000000000..c01dd9462d8
--- /dev/null
+++ b/noir/noir-repo/test_programs/execution_success/reference_counts/Prover.toml
@@ -0,0 +1,2 @@
+x = 5
+b = true
diff --git a/noir/noir-repo/test_programs/execution_success/reference_counts/src/main.nr b/noir/noir-repo/test_programs/execution_success/reference_counts/src/main.nr
new file mode 100644
index 00000000000..7ab7de893fa
--- /dev/null
+++ b/noir/noir-repo/test_programs/execution_success/reference_counts/src/main.nr
@@ -0,0 +1,40 @@
+fn main() {
+    let mut array = [0, 1, 2];
+    assert_refcount(array, 1);
+
+    borrow(array, std::mem::array_refcount(array));
+    borrow_mut(&mut array, std::mem::array_refcount(array));
+    copy_mut(array, std::mem::array_refcount(array));
+}
+
+fn borrow(array: [Field; 3], rc_before_call: u32) {
+    assert_refcount(array, rc_before_call);
+    println(array[0]);
+}
+
+fn borrow_mut(array: &mut [Field; 3], rc_before_call: u32) {
+    assert_refcount(*array, rc_before_call + 0); // Issue! This should be rc_before_call + 1
+    array[0] = 5;
+    println(array[0]);
+}
+
+fn copy_mut(mut array: [Field; 3], rc_before_call: u32) {
+    assert_refcount(array, rc_before_call + 0); // Issue! This should be rc_before_call + 1
+    array[0] = 6;
+    println(array[0]);
+}
+
+fn assert_refcount(array: [Field; 3], expected: u32) {
+    let count = std::mem::array_refcount(array);
+
+    // All refcounts are zero when running this as a constrained program
+    if std::runtime::is_unconstrained() {
+        if count != expected {
+            // Brillig doesn't print the actual & expected arguments on assertion failure
+            println(f"actual = {count}, expected = {expected}");
+        }
+        assert_eq(count, expected);
+    } else {
+        assert_eq(count, 0);
+    }
+}