From 521157e390aa8bca62953e251495257e334a9477 Mon Sep 17 00:00:00 2001 From: Chang Xu Date: Thu, 28 Dec 2023 21:36:33 +0800 Subject: [PATCH] [Cherry-Pick]Cp fit paddle26 (#1823) --- paddleslim/quant/advanced/gptq.py | 19 +++++++++++++------ paddleslim/quant/advanced/piecewise_search.py | 3 +++ 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/paddleslim/quant/advanced/gptq.py b/paddleslim/quant/advanced/gptq.py index 96566858f..5ae47205c 100644 --- a/paddleslim/quant/advanced/gptq.py +++ b/paddleslim/quant/advanced/gptq.py @@ -106,8 +106,9 @@ def fasterquant(self, H = self.hessian del self.hessian dead = paddle.where(paddle.diag(H) == 0) - H[dead, dead] = 1 - W[:, dead] = 0 + if dead[0].shape[0] != 0: + H[dead, dead] = 1 + W[:, dead] = 0 del dead if actorder: perm = paddle.argsort(paddle.diag(H), descending=True) @@ -122,9 +123,15 @@ def fasterquant(self, damp = percdamp * paddle.mean(paddle.diag(H)) diag = paddle.arange(self.columns) H[diag, diag] += damp - - H = paddle.inverse(H) - H = paddle.linalg.cholesky(H, upper=True) + try: + H = paddle.inverse(H) + H = paddle.linalg.cholesky(H, upper=True) + except: + print('We skip GPTQ this layer now.') + print( + 'If you want GPTQ this layer, please try setting damp_percent larger or increasing the number of samples.' + ) + return Hinv = H for i1 in range(0, self.columns, blocksize): @@ -182,4 +189,4 @@ def fasterquant(self, self.quantized = True del H, Q, Hinv, W, Losses - paddle.device.cuda.empty_cache() + paddle.device.cuda.empty_cache() \ No newline at end of file diff --git a/paddleslim/quant/advanced/piecewise_search.py b/paddleslim/quant/advanced/piecewise_search.py index e326f2e55..a95b2a1c7 100644 --- a/paddleslim/quant/advanced/piecewise_search.py +++ b/paddleslim/quant/advanced/piecewise_search.py @@ -97,6 +97,8 @@ def search(self, layer_name, sampled_input, act_abs_max, weight): mask_for_search = paddle.where(labels == centroids.argsort()[i], 1., 0.) mask_for_ones = paddle.where(mask_for_search == 0., 1., 0.) + mask_for_search = mask_for_search.cast(dtype) + mask_for_ones = mask_for_ones.cast(dtype) while alpha <= alpha_max: if alpha < 1: @@ -125,6 +127,7 @@ def search(self, layer_name, sampled_input, act_abs_max, weight): if smooth_scale_out is not None: mask_for_ones_new = paddle.where( smooth_scale_out == 0., 1., 0.) + mask_for_ones_new = mask_for_ones_new.cast(dtype) mask_for_ones *= mask_for_ones_new smooth_scale_ = smooth_scale_out + smooth_scale smooth_scale_tmp = smooth_scale_ + mask_for_ones