Default to avoid the faster cuda kernels.

huggingface · Sep 26, 2024 · 3aaea4f · 3aaea4f
1 parent fd3b53f
commit 3aaea4f
Showing 1 changed file with 7 additions and 0 deletions.
diff --git a/candle-examples/examples/flux/main.rs b/candle-examples/examples/flux/main.rs
@@ -44,6 +44,10 @@ struct Args {
 
  #[arg(long, value_enum, default_value = "schnell")]
  model: Model,
+
+ /// Use the faster kernels which are buggy at the moment.
+ #[arg(long)]
+ no_dmmv: bool,
 }
 
 #[derive(Debug, Clone, Copy, clap::ValueEnum, PartialEq, Eq)]
@@ -65,6 +69,7 @@ fn run(args: Args) -> Result<()> {
  decode_only,
  model,
  quantized,
+ ..
  } = args;
  let width = width.unwrap_or(1360);
  let height = height.unwrap_or(768);
@@ -244,5 +249,7 @@ fn run(args: Args) -> Result<()> {
 
 fn main() -> Result<()> {
  let args = Args::parse();
+ #[cfg(feature = "cuda")]
+ candle::quantized::cuda::set_force_dmmv(!args.no_dmmv);
  run(args)
 }