mirror of
https://github.com/invoke-ai/InvokeAI.git
synced 2025-01-08 11:57:36 +08:00
Adding support for .bin files from huggingface concepts (#498)
* Adding support for .bin files from huggingface concepts * Updating documentation to include huggingface .bin info
This commit is contained in:
parent
79b445b0ca
commit
4951e66103
@ -1,6 +1,8 @@
|
||||
# **Personalizing Text-to-Image Generation**
|
||||
|
||||
You may personalize the generated images to provide your own styles or objects by training a new LDM checkpoint and introducing a new vocabulary to the fixed model.
|
||||
You may personalize the generated images to provide your own styles or objects by training a new LDM checkpoint and introducing a new vocabulary to the fixed model as a (.pt) embeddings file. Alternatively, you may use or train HuggingFace Concepts embeddings files (.bin) from https://huggingface.co/sd-concepts-library and its associated notebooks.
|
||||
|
||||
**Training**
|
||||
|
||||
To train, prepare a folder that contains images sized at 512x512 and execute the following:
|
||||
|
||||
@ -26,9 +28,11 @@ On a RTX3090, the process for SD will take ~1h @1.6 iterations/sec.
|
||||
|
||||
_Note_: According to the associated paper, the optimal number of images is 3-5. Your model may not converge if you use more images than that.
|
||||
|
||||
Training will run indefinately, but you may wish to stop it before the heat death of the universe, when you find a low loss epoch or around ~5000 iterations.
|
||||
Training will run indefinitely, but you may wish to stop it before the heat death of the universe, when you find a low loss epoch or around ~5000 iterations.
|
||||
|
||||
Once the model is trained, specify the trained .pt file when starting dream using
|
||||
**Running**
|
||||
|
||||
Once the model is trained, specify the trained .pt or .bin file when starting dream using
|
||||
|
||||
```
|
||||
(ldm) ~/stable-diffusion$ python3 ./scripts/dream.py --embedding_path /path/to/embedding.pt --full_precision
|
||||
@ -46,7 +50,7 @@ This also works with image2image
|
||||
dream> "waterfall and rainbow in the style of *" --init_img=./init-images/crude_drawing.png --strength=0.5 -s100 -n4
|
||||
```
|
||||
|
||||
It's also possible to train multiple token (modify the placeholder string in `configs/stable-diffusion/v1-finetune.yaml`) and combine LDM checkpoints using:
|
||||
For .pt files it's also possible to train multiple tokens (modify the placeholder string in `configs/stable-diffusion/v1-finetune.yaml`) and combine LDM checkpoints using:
|
||||
|
||||
```
|
||||
(ldm) ~/stable-diffusion$ python3 ./scripts/merge_embeddings.py \
|
||||
|
@ -24,9 +24,9 @@ def get_clip_token_for_string(tokenizer, string):
|
||||
return_tensors='pt',
|
||||
)
|
||||
tokens = batch_encoding['input_ids']
|
||||
assert (
|
||||
""" assert (
|
||||
torch.count_nonzero(tokens - 49407) == 2
|
||||
), f"String '{string}' maps to more than a single token. Please use another string"
|
||||
), f"String '{string}' maps to more than a single token. Please use another string" """
|
||||
|
||||
return tokens[0, 1]
|
||||
|
||||
@ -57,8 +57,9 @@ class EmbeddingManager(nn.Module):
|
||||
):
|
||||
super().__init__()
|
||||
|
||||
self.string_to_token_dict = {}
|
||||
self.embedder = embedder
|
||||
|
||||
self.string_to_token_dict = {}
|
||||
self.string_to_param_dict = nn.ParameterDict()
|
||||
|
||||
self.initial_embeddings = (
|
||||
@ -217,12 +218,28 @@ class EmbeddingManager(nn.Module):
|
||||
|
||||
def load(self, ckpt_path, full=True):
|
||||
ckpt = torch.load(ckpt_path, map_location='cpu')
|
||||
self.string_to_token_dict = ckpt["string_to_token"]
|
||||
self.string_to_param_dict = ckpt["string_to_param"]
|
||||
|
||||
# Handle .pt textual inversion files
|
||||
if 'string_to_token' in ckpt and 'string_to_param' in ckpt:
|
||||
self.string_to_token_dict = ckpt["string_to_token"]
|
||||
self.string_to_param_dict = ckpt["string_to_param"]
|
||||
|
||||
# Handle .bin textual inversion files from Huggingface Concepts
|
||||
# https://huggingface.co/sd-concepts-library
|
||||
else:
|
||||
for token_str in list(ckpt.keys()):
|
||||
token = get_clip_token_for_string(self.embedder.tokenizer, token_str)
|
||||
self.string_to_token_dict[token_str] = token
|
||||
ckpt[token_str] = torch.nn.Parameter(ckpt[token_str])
|
||||
|
||||
self.string_to_param_dict.update(ckpt)
|
||||
|
||||
if not full:
|
||||
for key, value in self.string_to_param_dict.items():
|
||||
self.string_to_param_dict[key] = torch.nn.Parameter(value.half())
|
||||
|
||||
print(f'Added terms: {", ".join(self.string_to_param_dict.keys())}')
|
||||
|
||||
def get_embedding_norms_squared(self):
|
||||
all_params = torch.cat(
|
||||
list(self.string_to_param_dict.values()), axis=0
|
||||
|
Loading…
Reference in New Issue
Block a user