Skip to content

Commit

Permalink
Merge pull request #5 from asomoza/main
Browse files Browse the repository at this point in the history
LCM and SSD-1B support added
  • Loading branch information
asomoza authored Nov 10, 2023
2 parents 628a019 + 5ab2fee commit ece48a6
Show file tree
Hide file tree
Showing 6 changed files with 66 additions and 13 deletions.
13 changes: 13 additions & 0 deletions CHANGELOG.MD
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Changelog

## 0.1.0 - 2023-11-09

- initial release
- Windows installer

## 0.1.1 - 2023-11-10

- Support for LCM models
- Support for SSD-1B models
- Support for LMC LoRAs
- Diffusers 0.23 dependency required
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,11 +38,13 @@ It is highly recommended to use the included VAE with the FP16 fix, since the VA
- Will completely run offline after the first installation.
- Powerfull features only avalaible as a desktop application.
- Easy sharing of models and Lora's metadata since the information its stored in each model, including sample image, sample generation, triggers and tags for filtering.
- Latent Consistency Models (LCM) and LoRAs for fast inference.
- Segmind Stable Diffusion (SSD-1B) models for VRAM savings.

## Limitations

- Only runs with Stable Diffusion XL models.
- It has the default 75 CLIP token limitation for the prompts.
- It has the default CLIP 75 token limitation for the prompts.

You can read why [here](https://github.com/ZCode-opensource/image-artisan-xl/blob/main/EXPLANATIONS.MD).

Expand Down
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,15 @@ build-backend = "setuptools.build_meta"

[project]
name = "iartisanxl"
version = "0.0.1"
version = "0.1.1"
authors = [
{ name="Alvaro Somoza", email="[email protected]" },
]
description = "Dekstop application for generating images using Stable Diffusion."
requires-python = ">=3.11"
dependencies = [
"accelerate>=0.24.1 ",
"diffusers>=0.22.0",
"diffusers>=0.23.0",
"Pillow>=9.3.0",
"PyOpenGL",
"PyOpenGL_accelerate",
Expand Down
5 changes: 5 additions & 0 deletions src/iartisanxl/convert_model/convert_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -763,6 +763,10 @@ def convert_ldm_unet_checkpoint(
"label_emb.0.2.bias"
]

# Relevant to StableDiffusionUpscalePipeline
if "num_class_embeds" in config:
new_checkpoint["class_embedding.weight"] = unet_state_dict["label_emb.weight"]

new_checkpoint["conv_in.weight"] = unet_state_dict["input_blocks.0.0.weight"]
new_checkpoint["conv_in.bias"] = unet_state_dict["input_blocks.0.0.bias"]

Expand Down Expand Up @@ -845,6 +849,7 @@ def convert_ldm_unet_checkpoint(

if len(attentions):
paths = renew_attention_paths(attentions)

meta_path = {
"old": f"input_blocks.{i}.1",
"new": f"down_blocks.{block_id}.attentions.{layer_in_block_id}",
Expand Down
2 changes: 2 additions & 0 deletions src/iartisanxl/generation/schedulers/schedulers.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
DEISMultistepScheduler,
DDPMScheduler,
DPMSolverSDEScheduler,
LCMScheduler,
)


Expand Down Expand Up @@ -68,4 +69,5 @@ class Scheduler:
Scheduler("LMS Karras", LMSDiscreteScheduler, dict(use_karras_sigmas=True)),
Scheduler("Euler Ancestral", EulerAncestralDiscreteScheduler, dict()),
Scheduler("KDPM 2 Ancestral", KDPM2AncestralDiscreteScheduler, dict()),
Scheduler("LCM", LCMScheduler, dict()),
]
51 changes: 41 additions & 10 deletions src/iartisanxl/pipelines/txt_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -358,6 +358,20 @@ def _get_add_time_ids(
add_time_ids = torch.tensor([add_time_ids], dtype=dtype)
return add_time_ids

def get_guidance_scale_embedding(self, w, embedding_dim=512, dtype=torch.float32):
assert len(w.shape) == 1
w = w * 1000.0

half_dim = embedding_dim // 2
emb = torch.log(torch.tensor(10000.0)) / (half_dim - 1)
emb = torch.exp(torch.arange(half_dim, dtype=dtype) * -emb)
emb = w.to(dtype)[:, None] * emb[None, :]
emb = torch.cat([torch.sin(emb), torch.cos(emb)], dim=1)
if embedding_dim % 2 == 1: # zero pad
emb = torch.nn.functional.pad(emb, (0, 1)) # pylint: disable=not-callable
assert emb.shape == (w.shape[0], embedding_dim)
return emb

@torch.no_grad()
def __call__(
self,
Expand Down Expand Up @@ -387,6 +401,10 @@ def __call__(
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
self.logger.debug("Using device: %s", device)

do_classifier_free_guidance = False
if guidance_scale > 1:
do_classifier_free_guidance = True

status_update("Encoding the prompt...")
text_encoder_lora_scale = (
cross_attention_kwargs.get("scale", None)
Expand Down Expand Up @@ -455,16 +473,25 @@ def __call__(
negative_add_time_ids = add_time_ids

status_update("Preparing emdeddings...")
prompt_embeds = torch.cat([negative_prompt_embeds, prompt_embeds], dim=0)
add_text_embeds = torch.cat(
[negative_pooled_prompt_embeds, add_text_embeds], dim=0
)
add_time_ids = torch.cat([negative_add_time_ids, add_time_ids], dim=0)

if do_classifier_free_guidance:
prompt_embeds = torch.cat([negative_prompt_embeds, prompt_embeds], dim=0)
add_text_embeds = torch.cat(
[negative_pooled_prompt_embeds, add_text_embeds], dim=0
)
add_time_ids = torch.cat([negative_add_time_ids, add_time_ids], dim=0)

prompt_embeds = prompt_embeds.to(device)
add_text_embeds = add_text_embeds.to(device)
add_time_ids = add_time_ids.to(device)

timestep_cond = None
if self.unet.config.time_cond_proj_dim is not None:
guidance_scale_tensor = torch.tensor(guidance_scale - 1).repeat(1)
timestep_cond = self.get_guidance_scale_embedding(
guidance_scale_tensor, embedding_dim=self.unet.config.time_cond_proj_dim
).to(device=device, dtype=latents.dtype)

status_update("Generating image...")
num_warmup_steps = max(
len(timesteps) - num_inference_steps * self.scheduler.order, 0
Expand All @@ -478,7 +505,9 @@ def __call__(
return

# expand the latents
latent_model_input = torch.cat([latents] * 2)
latent_model_input = (
torch.cat([latents] * 2) if do_classifier_free_guidance else latents
)
latent_model_input = self.scheduler.scale_model_input(latent_model_input, t)

# predict the noise residual
Expand All @@ -491,6 +520,7 @@ def __call__(
latent_model_input,
t,
encoder_hidden_states=prompt_embeds,
timestep_cond=timestep_cond,
cross_attention_kwargs=cross_attention_kwargs,
added_cond_kwargs=added_cond_kwargs,
return_dict=False,
Expand All @@ -501,10 +531,11 @@ def __call__(
return

# perform guidance
noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
noise_pred = noise_pred_uncond + guidance_scale * (
noise_pred_text - noise_pred_uncond
)
if do_classifier_free_guidance:
noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
noise_pred = noise_pred_uncond + guidance_scale * (
noise_pred_text - noise_pred_uncond
)

# compute the previous noisy sample x_t -> x_t-1
latents = self.scheduler.step(
Expand Down

0 comments on commit ece48a6

Please sign in to comment.