Bump bitsandbytes. The new verson contains improvements to state_dict loading/saving for LLM.int8 and promises improved speed on some HW.

This commit is contained in:
Ryan Dick 2024-12-12 21:34:54 +00:00
parent d3916dbdb6
commit 65fcbf5f60
2 changed files with 3 additions and 6 deletions

View File

@ -25,12 +25,9 @@ class InvokeInt8Params(bnb.nn.Int8Params):
self.CB = self.data self.CB = self.data
self.SCB = self.SCB.cuda() self.SCB = self.SCB.cuda()
else: else:
# we store the 8-bit rows-major weight # We quantize the weight and store in 8bit row-major
# we convert this weight to the turning/ampere weight during the first inference pass
B = self.data.contiguous().half().cuda(device) B = self.data.contiguous().half().cuda(device)
CB, CBt, SCB, SCBt, coo_tensorB = bnb.functional.double_quant(B) CB, SCB, _ = bnb.functional.int8_vectorwise_quant(B)
del CBt
del SCBt
self.data = CB self.data = CB
self.CB = CB self.CB = CB
self.SCB = SCB self.SCB = SCB

View File

@ -34,7 +34,7 @@ classifiers = [
dependencies = [ dependencies = [
# Core generation dependencies, pinned for reproducible builds. # Core generation dependencies, pinned for reproducible builds.
"accelerate==1.0.1", "accelerate==1.0.1",
"bitsandbytes==0.43.3; sys_platform!='darwin'", "bitsandbytes==0.45.0; sys_platform!='darwin'",
"clip_anytorch==2.6.0", # replacing "clip @ https://github.com/openai/CLIP/archive/eaa22acb90a5876642d0507623e859909230a52d.zip", "clip_anytorch==2.6.0", # replacing "clip @ https://github.com/openai/CLIP/archive/eaa22acb90a5876642d0507623e859909230a52d.zip",
"compel==2.0.2", "compel==2.0.2",
"controlnet-aux==0.0.7", "controlnet-aux==0.0.7",