From 65fcbf5f60351d49ee4b16ad17020a1bdc5e35d6 Mon Sep 17 00:00:00 2001 From: Ryan Dick Date: Thu, 12 Dec 2024 21:34:54 +0000 Subject: [PATCH] Bump bitsandbytes. The new verson contains improvements to state_dict loading/saving for LLM.int8 and promises improved speed on some HW. --- invokeai/backend/quantization/bnb_llm_int8.py | 7 ++----- pyproject.toml | 2 +- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/invokeai/backend/quantization/bnb_llm_int8.py b/invokeai/backend/quantization/bnb_llm_int8.py index 02f94936e9..52b342e96c 100644 --- a/invokeai/backend/quantization/bnb_llm_int8.py +++ b/invokeai/backend/quantization/bnb_llm_int8.py @@ -25,12 +25,9 @@ class InvokeInt8Params(bnb.nn.Int8Params): self.CB = self.data self.SCB = self.SCB.cuda() else: - # we store the 8-bit rows-major weight - # we convert this weight to the turning/ampere weight during the first inference pass + # We quantize the weight and store in 8bit row-major B = self.data.contiguous().half().cuda(device) - CB, CBt, SCB, SCBt, coo_tensorB = bnb.functional.double_quant(B) - del CBt - del SCBt + CB, SCB, _ = bnb.functional.int8_vectorwise_quant(B) self.data = CB self.CB = CB self.SCB = SCB diff --git a/pyproject.toml b/pyproject.toml index c9cca90a03..1a989e93f1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -34,7 +34,7 @@ classifiers = [ dependencies = [ # Core generation dependencies, pinned for reproducible builds. "accelerate==1.0.1", - "bitsandbytes==0.43.3; sys_platform!='darwin'", + "bitsandbytes==0.45.0; sys_platform!='darwin'", "clip_anytorch==2.6.0", # replacing "clip @ https://github.com/openai/CLIP/archive/eaa22acb90a5876642d0507623e859909230a52d.zip", "compel==2.0.2", "controlnet-aux==0.0.7",