Bump bitsandbytes. The new verson contains improvements to state_dict loading/saving for LLM.int8 and promises improved speed on some HW.

2025-01-07 03:17:05 +08:00 · 2024-12-12 21:34:54 +00:00 · 2024-12-12 21:34:54 +00:00 · 65fcbf5f60
commit 65fcbf5f60
parent d3916dbdb6
2 changed files with 3 additions and 6 deletions
--- a/invokeai/backend/quantization/bnb_llm_int8.py
+++ b/invokeai/backend/quantization/bnb_llm_int8.py
@ -25,12 +25,9 @@ class InvokeInt8Params(bnb.nn.Int8Params):
            self.CB = self.data
            self.SCB = self.SCB.cuda()
        else:
-            # we store the 8-bit rows-major weight
+            # We quantize the weight and store in 8bit row-major
            # we convert this weight to the turning/ampere weight during the first inference pass
            B = self.data.contiguous().half().cuda(device)
-            CB, CBt, SCB, SCBt, coo_tensorB = bnb.functional.double_quant(B)
+            CB, SCB, _ = bnb.functional.int8_vectorwise_quant(B)
            del CBt
            del SCBt
            self.data = CB
            self.CB = CB
            self.SCB = SCB
--- a/pyproject.toml
+++ b/pyproject.toml
@ -34,7 +34,7 @@ classifiers = [
 dependencies = [
  # Core generation dependencies, pinned for reproducible builds.
  "accelerate==1.0.1",
-  "bitsandbytes==0.43.3; sys_platform!='darwin'",
+  "bitsandbytes==0.45.0; sys_platform!='darwin'",
  "clip_anytorch==2.6.0",       # replacing "clip @ https://github.com/openai/CLIP/archive/eaa22acb90a5876642d0507623e859909230a52d.zip",
  "compel==2.0.2",
  "controlnet-aux==0.0.7",