Bump bitsandbytes. The new verson contains improvements to state_dict loading/saving for LLM.int8 and promises improved speed on some HW.

This commit is contained in:
Ryan Dick 2024-12-12 21:34:54 +00:00
parent d3916dbdb6
commit 65fcbf5f60
2 changed files with 3 additions and 6 deletions

View File

@ -25,12 +25,9 @@ class InvokeInt8Params(bnb.nn.Int8Params):
self.CB = self.data
self.SCB = self.SCB.cuda()
else:
# we store the 8-bit rows-major weight
# we convert this weight to the turning/ampere weight during the first inference pass
# We quantize the weight and store in 8bit row-major
B = self.data.contiguous().half().cuda(device)
CB, CBt, SCB, SCBt, coo_tensorB = bnb.functional.double_quant(B)
del CBt
del SCBt
CB, SCB, _ = bnb.functional.int8_vectorwise_quant(B)
self.data = CB
self.CB = CB
self.SCB = SCB

View File

@ -34,7 +34,7 @@ classifiers = [
dependencies = [
# Core generation dependencies, pinned for reproducible builds.
"accelerate==1.0.1",
"bitsandbytes==0.43.3; sys_platform!='darwin'",
"bitsandbytes==0.45.0; sys_platform!='darwin'",
"clip_anytorch==2.6.0", # replacing "clip @ https://github.com/openai/CLIP/archive/eaa22acb90a5876642d0507623e859909230a52d.zip",
"compel==2.0.2",
"controlnet-aux==0.0.7",