Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions modules/util/triton_mm_8bit.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@
)

@triton.jit
def __mm_kernel(
def _mm_kernel(
a_ptr, b_ptr, c_ptr,
M, N, K,
stride_am, stride_ak,
Expand Down Expand Up @@ -109,7 +109,7 @@ def mm_8bit(a: torch.Tensor, b: torch.Tensor) -> torch.Tensor:

def grid(META):
return (triton.cdiv(N, META['BLOCK_SIZE_N']) , triton.cdiv(M, META['BLOCK_SIZE_M']), )
__mm_kernel[grid](
_mm_kernel[grid](
a, b, c,
M, N, K,
a.stride(0), a.stride(1),
Expand Down
12 changes: 6 additions & 6 deletions requirements-cuda.txt
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
# pytorch
--extra-index-url https://download.pytorch.org/whl/cu128
torch==2.8.0+cu128
torchvision==0.23.0+cu128
onnxruntime-gpu==1.22.0
nvidia-nccl-cu12==2.27.3; sys_platform == "linux"
triton-windows==3.4.0.post20; sys_platform == "win32"
torch==2.9.1+cu128
torchvision==0.24.1+cu128
onnxruntime-gpu==1.23.2
nvidia-nccl-cu12==2.27.5; sys_platform == "linux"
triton-windows==3.5.1.post24; sys_platform == "win32"

# optimizers
bitsandbytes==0.46.0 # bitsandbytes for 8-bit optimizers and weight quantization
bitsandbytes==0.49.1 # bitsandbytes for 8-bit optimizers and weight quantization
6 changes: 3 additions & 3 deletions requirements-default.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# pytorch
torch==2.8.0
torchvision==0.23.0
onnxruntime==1.22.1
torch==2.9.1
torchvision==0.24.1
onnxruntime==1.23.2

# optimizers
# TODO
10 changes: 5 additions & 5 deletions requirements-global.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,16 @@ yt-dlp #no pinned version, frequently updated for compatibility with sites
scenedetect==0.6.6

# pytorch
accelerate==1.7.0
safetensors==0.5.3
tensorboard==2.19.0
pytorch-lightning==2.5.1.post0
accelerate==1.12.0
safetensors==0.7.0
tensorboard==2.20.0
pytorch-lightning==2.6.0

# diffusion models
#Note: check whether Qwen bugs in diffusers have been fixed before upgrading diffusers (see BaseQwenSetup):
-e git+https://github.com/huggingface/diffusers.git@256e010#egg=diffusers
gguf==0.17.1
transformers==4.56.2
transformers==4.57.3
sentencepiece==0.2.1 # transitive dependency of transformers for tokenizer loading
omegaconf==2.3.0 # needed to load stable diffusion from single ckpt files
invisible-watermark==0.2.0 # needed for the SDXL pipeline
Expand Down
6 changes: 3 additions & 3 deletions requirements-rocm.txt
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can add bitsandbytes here but not in requirements-global.txt or requirements-cpu.txt because macOS doesn't support 8-bit optimizers and that's like 90% of what we use bitsandbytes for.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sounds good but that needs to be seperate (and tested!), I dont have a AMD gpu ;).

Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@

# pytorch
--extra-index-url https://download.pytorch.org/whl/rocm6.3
torch==2.7.1+rocm6.3 #intentionally not upgraded because of reported problems
torchvision==0.22.1+rocm6.3
onnxruntime==1.22.1
torch==2.9.1+rocm6.3
torchvision==0.24.1+rocm6.3
onnxruntime==1.23.2

# optimizers
# TODO