We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 769215a commit b18f54fCopy full SHA for b18f54f
hqq/utils/vllm.py
@@ -463,8 +463,6 @@ def get_quant_method(
463
class HQQGemLiteVLLMLinear(HQQBaseVLLMLinear):
464
"""Linear HQQ VLLM with GemLite backend"""
465
466
- gemlite_packing_bitwidth = 32
467
-
468
def __init__(
469
self,
470
quant_config: QuantizationConfig,
@@ -492,7 +490,6 @@ def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
492
490
layer.scale.view(-1, 1),
493
491
layer.zero.view(-1, 1),
494
bias=None,
495
- packing_bitwidth=HQQGemLiteVLLMLinear.gemlite_packing_bitwidth,
496
)
497
498
layer.gemlite_linear = gemlite_linear
0 commit comments