mirror of
https://github.com/invoke-ai/InvokeAI.git
synced 2025-01-08 11:57:36 +08:00
reduce VRAM memory usage by half during model loading
* This moves the call to half() before model.to(device) to avoid GPU copy of full model. Improves speed and reduces memory usage dramatically * This fix contributed by @mh-dm (Mihai)
This commit is contained in:
parent
99122708ca
commit
5c43988862
@ -536,9 +536,6 @@ class Generate:
|
||||
sd = pl_sd['state_dict']
|
||||
model = instantiate_from_config(config.model)
|
||||
m, u = model.load_state_dict(sd, strict=False)
|
||||
model.to(self.device)
|
||||
model.eval()
|
||||
|
||||
|
||||
if self.full_precision:
|
||||
print(
|
||||
@ -549,6 +546,8 @@ class Generate:
|
||||
'>> Using half precision math. Call with --full_precision to use more accurate but VRAM-intensive full precision.'
|
||||
)
|
||||
model.half()
|
||||
model.to(self.device)
|
||||
model.eval()
|
||||
|
||||
# usage statistics
|
||||
toc = time.time()
|
||||
|
Loading…
Reference in New Issue
Block a user