From 65fcbf5f60351d49ee4b16ad17020a1bdc5e35d6 Mon Sep 17 00:00:00 2001
From: Ryan Dick <ryanjdick3@gmail.com>
Date: Thu, 12 Dec 2024 21:34:54 +0000
Subject: [PATCH] Bump bitsandbytes. The new verson contains improvements to
 state_dict loading/saving for LLM.int8 and promises improved speed on some
 HW.

---
 invokeai/backend/quantization/bnb_llm_int8.py | 7 ++-----
 pyproject.toml                                | 2 +-
 2 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/invokeai/backend/quantization/bnb_llm_int8.py b/invokeai/backend/quantization/bnb_llm_int8.py
index 02f94936e9..52b342e96c 100644
--- a/invokeai/backend/quantization/bnb_llm_int8.py
+++ b/invokeai/backend/quantization/bnb_llm_int8.py
@@ -25,12 +25,9 @@ class InvokeInt8Params(bnb.nn.Int8Params):
             self.CB = self.data
             self.SCB = self.SCB.cuda()
         else:
-            # we store the 8-bit rows-major weight
-            # we convert this weight to the turning/ampere weight during the first inference pass
+            # We quantize the weight and store in 8bit row-major
             B = self.data.contiguous().half().cuda(device)
-            CB, CBt, SCB, SCBt, coo_tensorB = bnb.functional.double_quant(B)
-            del CBt
-            del SCBt
+            CB, SCB, _ = bnb.functional.int8_vectorwise_quant(B)
             self.data = CB
             self.CB = CB
             self.SCB = SCB
diff --git a/pyproject.toml b/pyproject.toml
index c9cca90a03..1a989e93f1 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -34,7 +34,7 @@ classifiers = [
 dependencies = [
   # Core generation dependencies, pinned for reproducible builds.
   "accelerate==1.0.1",
-  "bitsandbytes==0.43.3; sys_platform!='darwin'",
+  "bitsandbytes==0.45.0; sys_platform!='darwin'",
   "clip_anytorch==2.6.0",       # replacing "clip @ https://github.com/openai/CLIP/archive/eaa22acb90a5876642d0507623e859909230a52d.zip",
   "compel==2.0.2",
   "controlnet-aux==0.0.7",