mirror of
https://github.com/invoke-ai/InvokeAI.git
synced 2025-01-07 03:17:05 +08:00
Bump bitsandbytes. The new verson contains improvements to state_dict loading/saving for LLM.int8 and promises improved speed on some HW.
This commit is contained in:
parent
d3916dbdb6
commit
65fcbf5f60
@ -25,12 +25,9 @@ class InvokeInt8Params(bnb.nn.Int8Params):
|
|||||||
self.CB = self.data
|
self.CB = self.data
|
||||||
self.SCB = self.SCB.cuda()
|
self.SCB = self.SCB.cuda()
|
||||||
else:
|
else:
|
||||||
# we store the 8-bit rows-major weight
|
# We quantize the weight and store in 8bit row-major
|
||||||
# we convert this weight to the turning/ampere weight during the first inference pass
|
|
||||||
B = self.data.contiguous().half().cuda(device)
|
B = self.data.contiguous().half().cuda(device)
|
||||||
CB, CBt, SCB, SCBt, coo_tensorB = bnb.functional.double_quant(B)
|
CB, SCB, _ = bnb.functional.int8_vectorwise_quant(B)
|
||||||
del CBt
|
|
||||||
del SCBt
|
|
||||||
self.data = CB
|
self.data = CB
|
||||||
self.CB = CB
|
self.CB = CB
|
||||||
self.SCB = SCB
|
self.SCB = SCB
|
||||||
|
@ -34,7 +34,7 @@ classifiers = [
|
|||||||
dependencies = [
|
dependencies = [
|
||||||
# Core generation dependencies, pinned for reproducible builds.
|
# Core generation dependencies, pinned for reproducible builds.
|
||||||
"accelerate==1.0.1",
|
"accelerate==1.0.1",
|
||||||
"bitsandbytes==0.43.3; sys_platform!='darwin'",
|
"bitsandbytes==0.45.0; sys_platform!='darwin'",
|
||||||
"clip_anytorch==2.6.0", # replacing "clip @ https://github.com/openai/CLIP/archive/eaa22acb90a5876642d0507623e859909230a52d.zip",
|
"clip_anytorch==2.6.0", # replacing "clip @ https://github.com/openai/CLIP/archive/eaa22acb90a5876642d0507623e859909230a52d.zip",
|
||||||
"compel==2.0.2",
|
"compel==2.0.2",
|
||||||
"controlnet-aux==0.0.7",
|
"controlnet-aux==0.0.7",
|
||||||
|
Loading…
Reference in New Issue
Block a user