From 3aaea4f699132aaae33ebc701356c53785753eb3 Mon Sep 17 00:00:00 2001
From: laurent <laurent.mazare@gmail.com>
Date: Thu, 26 Sep 2024 10:20:51 +0200
Subject: [PATCH] Default to avoid the faster cuda kernels.

---
 candle-examples/examples/flux/main.rs | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/candle-examples/examples/flux/main.rs b/candle-examples/examples/flux/main.rs
index 17c406b4a..24b1fa2bc 100644
--- a/candle-examples/examples/flux/main.rs
+++ b/candle-examples/examples/flux/main.rs
@@ -44,6 +44,10 @@ struct Args {
 
     #[arg(long, value_enum, default_value = "schnell")]
     model: Model,
+
+    /// Use the faster kernels which are buggy at the moment.
+    #[arg(long)]
+    no_dmmv: bool,
 }
 
 #[derive(Debug, Clone, Copy, clap::ValueEnum, PartialEq, Eq)]
@@ -65,6 +69,7 @@ fn run(args: Args) -> Result<()> {
         decode_only,
         model,
         quantized,
+        ..
     } = args;
     let width = width.unwrap_or(1360);
     let height = height.unwrap_or(768);
@@ -244,5 +249,7 @@ fn run(args: Args) -> Result<()> {
 
 fn main() -> Result<()> {
     let args = Args::parse();
+    #[cfg(feature = "cuda")]
+    candle::quantized::cuda::set_force_dmmv(!args.no_dmmv);
     run(args)
 }