mirror of
https://github.com/invoke-ai/InvokeAI.git
synced 2025-01-05 10:27:02 +08:00
Bump bitsandbytes. The new verson contains improvements to state_dict loading/saving for LLM.int8 and promises improved speed on some HW.
This commit is contained in:
parent
d3916dbdb6
commit
65fcbf5f60
@ -25,12 +25,9 @@ class InvokeInt8Params(bnb.nn.Int8Params):
|
||||
self.CB = self.data
|
||||
self.SCB = self.SCB.cuda()
|
||||
else:
|
||||
# we store the 8-bit rows-major weight
|
||||
# we convert this weight to the turning/ampere weight during the first inference pass
|
||||
# We quantize the weight and store in 8bit row-major
|
||||
B = self.data.contiguous().half().cuda(device)
|
||||
CB, CBt, SCB, SCBt, coo_tensorB = bnb.functional.double_quant(B)
|
||||
del CBt
|
||||
del SCBt
|
||||
CB, SCB, _ = bnb.functional.int8_vectorwise_quant(B)
|
||||
self.data = CB
|
||||
self.CB = CB
|
||||
self.SCB = SCB
|
||||
|
@ -34,7 +34,7 @@ classifiers = [
|
||||
dependencies = [
|
||||
# Core generation dependencies, pinned for reproducible builds.
|
||||
"accelerate==1.0.1",
|
||||
"bitsandbytes==0.43.3; sys_platform!='darwin'",
|
||||
"bitsandbytes==0.45.0; sys_platform!='darwin'",
|
||||
"clip_anytorch==2.6.0", # replacing "clip @ https://github.com/openai/CLIP/archive/eaa22acb90a5876642d0507623e859909230a52d.zip",
|
||||
"compel==2.0.2",
|
||||
"controlnet-aux==0.0.7",
|
||||
|
Loading…
Reference in New Issue
Block a user