Merge pull request #5 from asomoza/main

LCM and SSD-1B support added
ZCode-opensource · Nov 10, 2023 · ece48a6 · ece48a6
2 parents 628a019 + 5ab2fee
commit ece48a6
Show file tree

Hide file tree

Showing 6 changed files with 66 additions and 13 deletions.
diff --git a/CHANGELOG.MD b/CHANGELOG.MD
@@ -0,0 +1,13 @@
+# Changelog
+
+## 0.1.0 - 2023-11-09
+
+- initial release
+- Windows installer
+
+## 0.1.1 - 2023-11-10
+
+- Support for LCM models
+- Support for SSD-1B models
+- Support for LMC LoRAs
+- Diffusers 0.23 dependency required
diff --git a/README.md b/README.md
@@ -38,11 +38,13 @@ It is highly recommended to use the included VAE with the FP16 fix, since the VA
 - Will completely run offline after the first installation.
 - Powerfull features only avalaible as a desktop application.
 - Easy sharing of models and Lora's metadata since the information its stored in each model, including sample image, sample generation, triggers and tags for filtering.
+- Latent Consistency Models (LCM) and LoRAs for fast inference.
+- Segmind Stable Diffusion (SSD-1B) models for VRAM savings.
 
 ## Limitations
 
 - Only runs with Stable Diffusion XL models.
-- It has the default 75 CLIP token limitation for the prompts.
+- It has the default CLIP 75 token limitation for the prompts.
 
 You can read why [here](https://github.com/ZCode-opensource/image-artisan-xl/blob/main/EXPLANATIONS.MD).
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -4,15 +4,15 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "iartisanxl"
-version = "0.0.1"
+version = "0.1.1"
 authors = [
  { name="Alvaro Somoza", email="[email protected]" },
 ]
 description = "Dekstop application for generating images using Stable Diffusion."
 requires-python = ">=3.11"
 dependencies = [
  "accelerate>=0.24.1 ",
- "diffusers>=0.22.0",
+ "diffusers>=0.23.0",
  "Pillow>=9.3.0",
  "PyOpenGL",
  "PyOpenGL_accelerate",

diff --git a/src/iartisanxl/convert_model/convert_functions.py b/src/iartisanxl/convert_model/convert_functions.py
@@ -763,6 +763,10 @@ def convert_ldm_unet_checkpoint(
  "label_emb.0.2.bias"
  ]
 
+ # Relevant to StableDiffusionUpscalePipeline
+ if "num_class_embeds" in config:
+ new_checkpoint["class_embedding.weight"] = unet_state_dict["label_emb.weight"]
+
  new_checkpoint["conv_in.weight"] = unet_state_dict["input_blocks.0.0.weight"]
  new_checkpoint["conv_in.bias"] = unet_state_dict["input_blocks.0.0.bias"]
 
@@ -845,6 +849,7 @@ def convert_ldm_unet_checkpoint(
 
  if len(attentions):
  paths = renew_attention_paths(attentions)
+
  meta_path = {
  "old": f"input_blocks.{i}.1",
  "new": f"down_blocks.{block_id}.attentions.{layer_in_block_id}",

diff --git a/src/iartisanxl/generation/schedulers/schedulers.py b/src/iartisanxl/generation/schedulers/schedulers.py
@@ -15,6 +15,7 @@
  DEISMultistepScheduler,
  DDPMScheduler,
  DPMSolverSDEScheduler,
+ LCMScheduler,
 )
 
 
@@ -68,4 +69,5 @@ class Scheduler:
  Scheduler("LMS Karras", LMSDiscreteScheduler, dict(use_karras_sigmas=True)),
  Scheduler("Euler Ancestral", EulerAncestralDiscreteScheduler, dict()),
  Scheduler("KDPM 2 Ancestral", KDPM2AncestralDiscreteScheduler, dict()),
+ Scheduler("LCM", LCMScheduler, dict()),
 ]
diff --git a/src/iartisanxl/pipelines/txt_pipeline.py b/src/iartisanxl/pipelines/txt_pipeline.py
@@ -358,6 +358,20 @@ def _get_add_time_ids(
  add_time_ids = torch.tensor([add_time_ids], dtype=dtype)
  return add_time_ids
 
+ def get_guidance_scale_embedding(self, w, embedding_dim=512, dtype=torch.float32):
+ assert len(w.shape) == 1
+ w = w * 1000.0
+
+ half_dim = embedding_dim // 2
+ emb = torch.log(torch.tensor(10000.0)) / (half_dim - 1)
+ emb = torch.exp(torch.arange(half_dim, dtype=dtype) * -emb)
+ emb = w.to(dtype)[:, None] * emb[None, :]
+ emb = torch.cat([torch.sin(emb), torch.cos(emb)], dim=1)
+ if embedding_dim % 2 == 1: # zero pad
+ emb = torch.nn.functional.pad(emb, (0, 1)) # pylint: disable=not-callable
+ assert emb.shape == (w.shape[0], embedding_dim)
+ return emb
+
  @torch.no_grad()
  def __call__(
  self,
@@ -387,6 +401,10 @@ def __call__(
  device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
  self.logger.debug("Using device: %s", device)
 
+ do_classifier_free_guidance = False
+ if guidance_scale > 1:
+ do_classifier_free_guidance = True
+
  status_update("Encoding the prompt...")
  text_encoder_lora_scale = (
  cross_attention_kwargs.get("scale", None)
@@ -455,16 +473,25 @@ def __call__(
  negative_add_time_ids = add_time_ids
 
  status_update("Preparing emdeddings...")
- prompt_embeds = torch.cat([negative_prompt_embeds, prompt_embeds], dim=0)
- add_text_embeds = torch.cat(
- [negative_pooled_prompt_embeds, add_text_embeds], dim=0
- )
- add_time_ids = torch.cat([negative_add_time_ids, add_time_ids], dim=0)
+
+ if do_classifier_free_guidance:
+ prompt_embeds = torch.cat([negative_prompt_embeds, prompt_embeds], dim=0)
+ add_text_embeds = torch.cat(
+ [negative_pooled_prompt_embeds, add_text_embeds], dim=0
+ )
+ add_time_ids = torch.cat([negative_add_time_ids, add_time_ids], dim=0)
 
  prompt_embeds = prompt_embeds.to(device)
  add_text_embeds = add_text_embeds.to(device)
  add_time_ids = add_time_ids.to(device)
 
+ timestep_cond = None
+ if self.unet.config.time_cond_proj_dim is not None:
+ guidance_scale_tensor = torch.tensor(guidance_scale - 1).repeat(1)
+ timestep_cond = self.get_guidance_scale_embedding(
+ guidance_scale_tensor, embedding_dim=self.unet.config.time_cond_proj_dim
+ ).to(device=device, dtype=latents.dtype)
+
  status_update("Generating image...")
  num_warmup_steps = max(
  len(timesteps) - num_inference_steps * self.scheduler.order, 0
@@ -478,7 +505,9 @@ def __call__(
  return
 
  # expand the latents
- latent_model_input = torch.cat([latents] * 2)
+ latent_model_input = (
+ torch.cat([latents] * 2) if do_classifier_free_guidance else latents
+ )
  latent_model_input = self.scheduler.scale_model_input(latent_model_input, t)
 
  # predict the noise residual
@@ -491,6 +520,7 @@ def __call__(
  latent_model_input,
  t,
  encoder_hidden_states=prompt_embeds,
+ timestep_cond=timestep_cond,
  cross_attention_kwargs=cross_attention_kwargs,
  added_cond_kwargs=added_cond_kwargs,
  return_dict=False,
@@ -501,10 +531,11 @@ def __call__(
  return
 
  # perform guidance
- noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
- noise_pred = noise_pred_uncond + guidance_scale * (
- noise_pred_text - noise_pred_uncond
- )
+ if do_classifier_free_guidance:
+ noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
+ noise_pred = noise_pred_uncond + guidance_scale * (
+ noise_pred_text - noise_pred_uncond
+ )
 
  # compute the previous noisy sample x_t -> x_t-1
  latents = self.scheduler.step(