Merge branch 'stripped-models' into git-lfs

This commit is contained in:
Billy 2025-03-18 14:57:58 +11:00
commit 6375214878
144 changed files with 2248 additions and 2481 deletions

View File

@ -44,7 +44,12 @@ jobs:
- name: check for changed frontend files
if: ${{ inputs.always_run != true }}
id: changed-files
uses: tj-actions/changed-files@v42
# Pinned to the _hash_ for v45.0.9 to prevent supply-chain attacks.
# See:
# - CVE-2025-30066
# - https://www.stepsecurity.io/blog/harden-runner-detection-tj-actions-changed-files-action-is-compromised
# - https://github.com/tj-actions/changed-files/issues/2463
uses: tj-actions/changed-files@a284dc1814e3fd07f2e34267fc8f81227ed29fb8
with:
files_yaml: |
frontend:

View File

@ -44,7 +44,12 @@ jobs:
- name: check for changed frontend files
if: ${{ inputs.always_run != true }}
id: changed-files
uses: tj-actions/changed-files@v42
# Pinned to the _hash_ for v45.0.9 to prevent supply-chain attacks.
# See:
# - CVE-2025-30066
# - https://www.stepsecurity.io/blog/harden-runner-detection-tj-actions-changed-files-action-is-compromised
# - https://github.com/tj-actions/changed-files/issues/2463
uses: tj-actions/changed-files@a284dc1814e3fd07f2e34267fc8f81227ed29fb8
with:
files_yaml: |
frontend:

View File

@ -43,7 +43,12 @@ jobs:
- name: check for changed python files
if: ${{ inputs.always_run != true }}
id: changed-files
uses: tj-actions/changed-files@v42
# Pinned to the _hash_ for v45.0.9 to prevent supply-chain attacks.
# See:
# - CVE-2025-30066
# - https://www.stepsecurity.io/blog/harden-runner-detection-tj-actions-changed-files-action-is-compromised
# - https://github.com/tj-actions/changed-files/issues/2463
uses: tj-actions/changed-files@a284dc1814e3fd07f2e34267fc8f81227ed29fb8
with:
files_yaml: |
python:

View File

@ -77,7 +77,12 @@ jobs:
- name: check for changed python files
if: ${{ inputs.always_run != true }}
id: changed-files
uses: tj-actions/changed-files@v42
# Pinned to the _hash_ for v45.0.9 to prevent supply-chain attacks.
# See:
# - CVE-2025-30066
# - https://www.stepsecurity.io/blog/harden-runner-detection-tj-actions-changed-files-action-is-compromised
# - https://github.com/tj-actions/changed-files/issues/2463
uses: tj-actions/changed-files@a284dc1814e3fd07f2e34267fc8f81227ed29fb8
with:
files_yaml: |
python:

View File

@ -42,7 +42,12 @@ jobs:
- name: check for changed files
if: ${{ inputs.always_run != true }}
id: changed-files
uses: tj-actions/changed-files@v42
# Pinned to the _hash_ for v45.0.9 to prevent supply-chain attacks.
# See:
# - CVE-2025-30066
# - https://www.stepsecurity.io/blog/harden-runner-detection-tj-actions-changed-files-action-is-compromised
# - https://github.com/tj-actions/changed-files/issues/2463
uses: tj-actions/changed-files@a284dc1814e3fd07f2e34267fc8f81227ed29fb8
with:
files_yaml: |
src:

View File

@ -1,79 +0,0 @@
model:
target: cldm.cldm.ControlLDM
params:
linear_start: 0.00085
linear_end: 0.0120
num_timesteps_cond: 1
log_every_t: 200
timesteps: 1000
first_stage_key: "jpg"
cond_stage_key: "txt"
control_key: "hint"
image_size: 64
channels: 4
cond_stage_trainable: false
conditioning_key: crossattn
monitor: val/loss_simple_ema
scale_factor: 0.18215
use_ema: False
only_mid_control: False
control_stage_config:
target: cldm.cldm.ControlNet
params:
image_size: 32 # unused
in_channels: 4
hint_channels: 3
model_channels: 320
attention_resolutions: [ 4, 2, 1 ]
num_res_blocks: 2
channel_mult: [ 1, 2, 4, 4 ]
num_heads: 8
use_spatial_transformer: True
transformer_depth: 1
context_dim: 768
use_checkpoint: True
legacy: False
unet_config:
target: cldm.cldm.ControlledUnetModel
params:
image_size: 32 # unused
in_channels: 4
out_channels: 4
model_channels: 320
attention_resolutions: [ 4, 2, 1 ]
num_res_blocks: 2
channel_mult: [ 1, 2, 4, 4 ]
num_heads: 8
use_spatial_transformer: True
transformer_depth: 1
context_dim: 768
use_checkpoint: True
legacy: False
first_stage_config:
target: ldm.models.autoencoder.AutoencoderKL
params:
embed_dim: 4
monitor: val/rec_loss
ddconfig:
double_z: true
z_channels: 4
resolution: 256
in_channels: 3
out_ch: 3
ch: 128
ch_mult:
- 1
- 2
- 4
- 4
num_res_blocks: 2
attn_resolutions: []
dropout: 0.0
lossconfig:
target: torch.nn.Identity
cond_stage_config:
target: ldm.modules.encoders.modules.FrozenCLIPEmbedder

View File

@ -1,85 +0,0 @@
model:
target: cldm.cldm.ControlLDM
params:
linear_start: 0.00085
linear_end: 0.0120
num_timesteps_cond: 1
log_every_t: 200
timesteps: 1000
first_stage_key: "jpg"
cond_stage_key: "txt"
control_key: "hint"
image_size: 64
channels: 4
cond_stage_trainable: false
conditioning_key: crossattn
monitor: val/loss_simple_ema
scale_factor: 0.18215
use_ema: False
only_mid_control: False
control_stage_config:
target: cldm.cldm.ControlNet
params:
use_checkpoint: True
image_size: 32 # unused
in_channels: 4
hint_channels: 3
model_channels: 320
attention_resolutions: [ 4, 2, 1 ]
num_res_blocks: 2
channel_mult: [ 1, 2, 4, 4 ]
num_head_channels: 64 # need to fix for flash-attn
use_spatial_transformer: True
use_linear_in_transformer: True
transformer_depth: 1
context_dim: 1024
legacy: False
unet_config:
target: cldm.cldm.ControlledUnetModel
params:
use_checkpoint: True
image_size: 32 # unused
in_channels: 4
out_channels: 4
model_channels: 320
attention_resolutions: [ 4, 2, 1 ]
num_res_blocks: 2
channel_mult: [ 1, 2, 4, 4 ]
num_head_channels: 64 # need to fix for flash-attn
use_spatial_transformer: True
use_linear_in_transformer: True
transformer_depth: 1
context_dim: 1024
legacy: False
first_stage_config:
target: ldm.models.autoencoder.AutoencoderKL
params:
embed_dim: 4
monitor: val/rec_loss
ddconfig:
#attn_type: "vanilla-xformers"
double_z: true
z_channels: 4
resolution: 256
in_channels: 3
out_ch: 3
ch: 128
ch_mult:
- 1
- 2
- 4
- 4
num_res_blocks: 2
attn_resolutions: []
dropout: 0.0
lossconfig:
target: torch.nn.Identity
cond_stage_config:
target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder
params:
freeze: True
layer: "penultimate"

View File

@ -1,98 +0,0 @@
model:
target: sgm.models.diffusion.DiffusionEngine
params:
scale_factor: 0.13025
disable_first_stage_autocast: True
denoiser_config:
target: sgm.modules.diffusionmodules.denoiser.DiscreteDenoiser
params:
num_idx: 1000
weighting_config:
target: sgm.modules.diffusionmodules.denoiser_weighting.EpsWeighting
scaling_config:
target: sgm.modules.diffusionmodules.denoiser_scaling.EpsScaling
discretization_config:
target: sgm.modules.diffusionmodules.discretizer.LegacyDDPMDiscretization
network_config:
target: sgm.modules.diffusionmodules.openaimodel.UNetModel
params:
adm_in_channels: 2816
num_classes: sequential
use_checkpoint: True
in_channels: 4
out_channels: 4
model_channels: 320
attention_resolutions: [4, 2]
num_res_blocks: 2
channel_mult: [1, 2, 4]
num_head_channels: 64
use_spatial_transformer: True
use_linear_in_transformer: True
transformer_depth: [1, 2, 10] # note: the first is unused (due to attn_res starting at 2) 32, 16, 8 --> 64, 32, 16
context_dim: 2048
spatial_transformer_attn_type: softmax-xformers
legacy: False
conditioner_config:
target: sgm.modules.GeneralConditioner
params:
emb_models:
# crossattn cond
- is_trainable: False
input_key: txt
target: sgm.modules.encoders.modules.FrozenCLIPEmbedder
params:
layer: hidden
layer_idx: 11
# crossattn and vector cond
- is_trainable: False
input_key: txt
target: sgm.modules.encoders.modules.FrozenOpenCLIPEmbedder2
params:
arch: ViT-bigG-14
version: laion2b_s39b_b160k
freeze: True
layer: penultimate
always_return_pooled: True
legacy: False
# vector cond
- is_trainable: False
input_key: original_size_as_tuple
target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND
params:
outdim: 256 # multiplied by two
# vector cond
- is_trainable: False
input_key: crop_coords_top_left
target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND
params:
outdim: 256 # multiplied by two
# vector cond
- is_trainable: False
input_key: target_size_as_tuple
target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND
params:
outdim: 256 # multiplied by two
first_stage_config:
target: sgm.models.autoencoder.AutoencoderKLInferenceWrapper
params:
embed_dim: 4
monitor: val/rec_loss
ddconfig:
attn_type: vanilla-xformers
double_z: true
z_channels: 4
resolution: 256
in_channels: 3
out_ch: 3
ch: 128
ch_mult: [1, 2, 4, 4]
num_res_blocks: 2
attn_resolutions: []
dropout: 0.0
lossconfig:
target: torch.nn.Identity

View File

@ -1,98 +0,0 @@
model:
target: sgm.models.diffusion.DiffusionEngine
params:
scale_factor: 0.13025
disable_first_stage_autocast: True
denoiser_config:
target: sgm.modules.diffusionmodules.denoiser.DiscreteDenoiser
params:
num_idx: 1000
weighting_config:
target: sgm.modules.diffusionmodules.denoiser_weighting.EpsWeighting
scaling_config:
target: sgm.modules.diffusionmodules.denoiser_scaling.EpsScaling
discretization_config:
target: sgm.modules.diffusionmodules.discretizer.LegacyDDPMDiscretization
network_config:
target: sgm.modules.diffusionmodules.openaimodel.UNetModel
params:
adm_in_channels: 2816
num_classes: sequential
use_checkpoint: True
in_channels: 9
out_channels: 4
model_channels: 320
attention_resolutions: [4, 2]
num_res_blocks: 2
channel_mult: [1, 2, 4]
num_head_channels: 64
use_spatial_transformer: True
use_linear_in_transformer: True
transformer_depth: [1, 2, 10] # note: the first is unused (due to attn_res starting at 2) 32, 16, 8 --> 64, 32, 16
context_dim: 2048
spatial_transformer_attn_type: softmax-xformers
legacy: False
conditioner_config:
target: sgm.modules.GeneralConditioner
params:
emb_models:
# crossattn cond
- is_trainable: False
input_key: txt
target: sgm.modules.encoders.modules.FrozenCLIPEmbedder
params:
layer: hidden
layer_idx: 11
# crossattn and vector cond
- is_trainable: False
input_key: txt
target: sgm.modules.encoders.modules.FrozenOpenCLIPEmbedder2
params:
arch: ViT-bigG-14
version: laion2b_s39b_b160k
freeze: True
layer: penultimate
always_return_pooled: True
legacy: False
# vector cond
- is_trainable: False
input_key: original_size_as_tuple
target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND
params:
outdim: 256 # multiplied by two
# vector cond
- is_trainable: False
input_key: crop_coords_top_left
target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND
params:
outdim: 256 # multiplied by two
# vector cond
- is_trainable: False
input_key: target_size_as_tuple
target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND
params:
outdim: 256 # multiplied by two
first_stage_config:
target: sgm.models.autoencoder.AutoencoderKLInferenceWrapper
params:
embed_dim: 4
monitor: val/rec_loss
ddconfig:
attn_type: vanilla-xformers
double_z: true
z_channels: 4
resolution: 256
in_channels: 3
out_ch: 3
ch: 128
ch_mult: [1, 2, 4, 4]
num_res_blocks: 2
attn_resolutions: []
dropout: 0.0
lossconfig:
target: torch.nn.Identity

View File

@ -1,91 +0,0 @@
model:
target: sgm.models.diffusion.DiffusionEngine
params:
scale_factor: 0.13025
disable_first_stage_autocast: True
denoiser_config:
target: sgm.modules.diffusionmodules.denoiser.DiscreteDenoiser
params:
num_idx: 1000
weighting_config:
target: sgm.modules.diffusionmodules.denoiser_weighting.EpsWeighting
scaling_config:
target: sgm.modules.diffusionmodules.denoiser_scaling.EpsScaling
discretization_config:
target: sgm.modules.diffusionmodules.discretizer.LegacyDDPMDiscretization
network_config:
target: sgm.modules.diffusionmodules.openaimodel.UNetModel
params:
adm_in_channels: 2560
num_classes: sequential
use_checkpoint: True
in_channels: 4
out_channels: 4
model_channels: 384
attention_resolutions: [4, 2]
num_res_blocks: 2
channel_mult: [1, 2, 4, 4]
num_head_channels: 64
use_spatial_transformer: True
use_linear_in_transformer: True
transformer_depth: 4
context_dim: [1280, 1280, 1280, 1280] # 1280
spatial_transformer_attn_type: softmax-xformers
legacy: False
conditioner_config:
target: sgm.modules.GeneralConditioner
params:
emb_models:
# crossattn and vector cond
- is_trainable: False
input_key: txt
target: sgm.modules.encoders.modules.FrozenOpenCLIPEmbedder2
params:
arch: ViT-bigG-14
version: laion2b_s39b_b160k
legacy: False
freeze: True
layer: penultimate
always_return_pooled: True
# vector cond
- is_trainable: False
input_key: original_size_as_tuple
target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND
params:
outdim: 256 # multiplied by two
# vector cond
- is_trainable: False
input_key: crop_coords_top_left
target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND
params:
outdim: 256 # multiplied by two
# vector cond
- is_trainable: False
input_key: aesthetic_score
target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND
params:
outdim: 256 # multiplied by one
first_stage_config:
target: sgm.models.autoencoder.AutoencoderKLInferenceWrapper
params:
embed_dim: 4
monitor: val/rec_loss
ddconfig:
attn_type: vanilla-xformers
double_z: true
z_channels: 4
resolution: 256
in_channels: 3
out_ch: 3
ch: 128
ch_mult: [1, 2, 4, 4]
num_res_blocks: 2
attn_resolutions: []
dropout: 0.0
lossconfig:
target: torch.nn.Identity

View File

@ -1,110 +0,0 @@
model:
base_learning_rate: 5.0e-03
target: invokeai.backend.stable_diffusion.diffusion.ddpm.LatentDiffusion
params:
linear_start: 0.00085
linear_end: 0.0120
num_timesteps_cond: 1
log_every_t: 200
timesteps: 1000
first_stage_key: image
cond_stage_key: caption
image_size: 64
channels: 4
cond_stage_trainable: true # Note: different from the one we trained before
conditioning_key: crossattn
monitor: val/loss_simple_ema
scale_factor: 0.18215
use_ema: False
embedding_reg_weight: 0.0
personalization_config:
target: invokeai.backend.stable_diffusion.embedding_manager.EmbeddingManager
params:
placeholder_strings: ["*"]
initializer_words: ["sculpture"]
per_image_tokens: false
num_vectors_per_token: 1
progressive_words: False
unet_config:
target: invokeai.backend.stable_diffusion.diffusionmodules.openaimodel.UNetModel
params:
image_size: 32 # unused
in_channels: 4
out_channels: 4
model_channels: 320
attention_resolutions: [ 4, 2, 1 ]
num_res_blocks: 2
channel_mult: [ 1, 2, 4, 4 ]
num_heads: 8
use_spatial_transformer: True
transformer_depth: 1
context_dim: 768
use_checkpoint: True
legacy: False
first_stage_config:
target: invokeai.backend.stable_diffusion.autoencoder.AutoencoderKL
params:
embed_dim: 4
monitor: val/rec_loss
ddconfig:
double_z: true
z_channels: 4
resolution: 256
in_channels: 3
out_ch: 3
ch: 128
ch_mult:
- 1
- 2
- 4
- 4
num_res_blocks: 2
attn_resolutions: []
dropout: 0.0
lossconfig:
target: torch.nn.Identity
cond_stage_config:
target: invokeai.backend.stable_diffusion.encoders.modules.FrozenCLIPEmbedder
data:
target: main.DataModuleFromConfig
params:
batch_size: 1
num_workers: 2
wrap: false
train:
target: invokeai.backend.stable_diffusion.data.personalized.PersonalizedBase
params:
size: 512
set: train
per_image_tokens: false
repeats: 100
validation:
target: invokeai.backend.stable_diffusion.data.personalized.PersonalizedBase
params:
size: 512
set: val
per_image_tokens: false
repeats: 10
lightning:
modelcheckpoint:
params:
every_n_train_steps: 500
callbacks:
image_logger:
target: main.ImageLogger
params:
batch_frequency: 500
max_images: 8
increase_log_steps: False
trainer:
benchmark: True
max_steps: 4000000
# max_steps: 4000

View File

@ -1,103 +0,0 @@
model:
base_learning_rate: 5.0e-03
target: invokeai.backend.models.diffusion.ddpm.LatentDiffusion
params:
linear_start: 0.00085
linear_end: 0.0120
num_timesteps_cond: 1
log_every_t: 200
timesteps: 1000
first_stage_key: image
cond_stage_key: caption
image_size: 64
channels: 4
cond_stage_trainable: true # Note: different from the one we trained before
conditioning_key: crossattn
monitor: val/loss_simple_ema
scale_factor: 0.18215
use_ema: False
embedding_reg_weight: 0.0
personalization_config:
target: invokeai.backend.stable_diffusion.embedding_manager.EmbeddingManager
params:
placeholder_strings: ["*"]
initializer_words: ["painting"]
per_image_tokens: false
num_vectors_per_token: 1
unet_config:
target: invokeai.backend.stable_diffusion.diffusionmodules.openaimodel.UNetModel
params:
image_size: 32 # unused
in_channels: 4
out_channels: 4
model_channels: 320
attention_resolutions: [ 4, 2, 1 ]
num_res_blocks: 2
channel_mult: [ 1, 2, 4, 4 ]
num_heads: 8
use_spatial_transformer: True
transformer_depth: 1
context_dim: 768
use_checkpoint: True
legacy: False
first_stage_config:
target: invokeai.backend.stable_diffusion.autoencoder.AutoencoderKL
params:
embed_dim: 4
monitor: val/rec_loss
ddconfig:
double_z: true
z_channels: 4
resolution: 256
in_channels: 3
out_ch: 3
ch: 128
ch_mult:
- 1
- 2
- 4
- 4
num_res_blocks: 2
attn_resolutions: []
dropout: 0.0
lossconfig:
target: torch.nn.Identity
cond_stage_config:
target: invokeai.backend.stable_diffusion.encoders.modules.FrozenCLIPEmbedder
data:
target: main.DataModuleFromConfig
params:
batch_size: 2
num_workers: 16
wrap: false
train:
target: invokeai.backend.stable_diffusion.data.personalized_style.PersonalizedBase
params:
size: 512
set: train
per_image_tokens: false
repeats: 100
validation:
target: invokeai.backend.stable_diffusion.data.personalized_style.PersonalizedBase
params:
size: 512
set: val
per_image_tokens: false
repeats: 10
lightning:
callbacks:
image_logger:
target: main.ImageLogger
params:
batch_frequency: 500
max_images: 8
increase_log_steps: False
trainer:
benchmark: True

View File

@ -1,80 +0,0 @@
model:
base_learning_rate: 1.0e-04
target: invokeai.backend.models.diffusion.ddpm.LatentDiffusion
params:
parameterization: "v"
linear_start: 0.00085
linear_end: 0.0120
num_timesteps_cond: 1
log_every_t: 200
timesteps: 1000
first_stage_key: "jpg"
cond_stage_key: "txt"
image_size: 64
channels: 4
cond_stage_trainable: false # Note: different from the one we trained before
conditioning_key: crossattn
monitor: val/loss_simple_ema
scale_factor: 0.18215
use_ema: False
scheduler_config: # 10000 warmup steps
target: invokeai.backend.stable_diffusion.lr_scheduler.LambdaLinearScheduler
params:
warm_up_steps: [ 10000 ]
cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases
f_start: [ 1.e-6 ]
f_max: [ 1. ]
f_min: [ 1. ]
personalization_config:
target: invokeai.backend.stable_diffusion.embedding_manager.EmbeddingManager
params:
placeholder_strings: ["*"]
initializer_words: ['sculpture']
per_image_tokens: false
num_vectors_per_token: 1
progressive_words: False
unet_config:
target: invokeai.backend.stable_diffusion.diffusionmodules.openaimodel.UNetModel
params:
image_size: 32 # unused
in_channels: 4
out_channels: 4
model_channels: 320
attention_resolutions: [ 4, 2, 1 ]
num_res_blocks: 2
channel_mult: [ 1, 2, 4, 4 ]
num_heads: 8
use_spatial_transformer: True
transformer_depth: 1
context_dim: 768
use_checkpoint: True
legacy: False
first_stage_config:
target: invokeai.backend.stable_diffusion.autoencoder.AutoencoderKL
params:
embed_dim: 4
monitor: val/rec_loss
ddconfig:
double_z: true
z_channels: 4
resolution: 256
in_channels: 3
out_ch: 3
ch: 128
ch_mult:
- 1
- 2
- 4
- 4
num_res_blocks: 2
attn_resolutions: []
dropout: 0.0
lossconfig:
target: torch.nn.Identity
cond_stage_config:
target: invokeai.backend.stable_diffusion.encoders.modules.WeightedFrozenCLIPEmbedder

View File

@ -1,79 +0,0 @@
model:
base_learning_rate: 1.0e-04
target: invokeai.backend.models.diffusion.ddpm.LatentDiffusion
params:
linear_start: 0.00085
linear_end: 0.0120
num_timesteps_cond: 1
log_every_t: 200
timesteps: 1000
first_stage_key: "jpg"
cond_stage_key: "txt"
image_size: 64
channels: 4
cond_stage_trainable: false # Note: different from the one we trained before
conditioning_key: crossattn
monitor: val/loss_simple_ema
scale_factor: 0.18215
use_ema: False
scheduler_config: # 10000 warmup steps
target: invokeai.backend.stable_diffusion.lr_scheduler.LambdaLinearScheduler
params:
warm_up_steps: [ 10000 ]
cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases
f_start: [ 1.e-6 ]
f_max: [ 1. ]
f_min: [ 1. ]
personalization_config:
target: invokeai.backend.stable_diffusion.embedding_manager.EmbeddingManager
params:
placeholder_strings: ["*"]
initializer_words: ['sculpture']
per_image_tokens: false
num_vectors_per_token: 1
progressive_words: False
unet_config:
target: invokeai.backend.stable_diffusion.diffusionmodules.openaimodel.UNetModel
params:
image_size: 32 # unused
in_channels: 4
out_channels: 4
model_channels: 320
attention_resolutions: [ 4, 2, 1 ]
num_res_blocks: 2
channel_mult: [ 1, 2, 4, 4 ]
num_heads: 8
use_spatial_transformer: True
transformer_depth: 1
context_dim: 768
use_checkpoint: True
legacy: False
first_stage_config:
target: invokeai.backend.stable_diffusion.autoencoder.AutoencoderKL
params:
embed_dim: 4
monitor: val/rec_loss
ddconfig:
double_z: true
z_channels: 4
resolution: 256
in_channels: 3
out_ch: 3
ch: 128
ch_mult:
- 1
- 2
- 4
- 4
num_res_blocks: 2
attn_resolutions: []
dropout: 0.0
lossconfig:
target: torch.nn.Identity
cond_stage_config:
target: invokeai.backend.stable_diffusion.encoders.modules.WeightedFrozenCLIPEmbedder

View File

@ -1,79 +0,0 @@
model:
base_learning_rate: 7.5e-05
target: invokeai.backend.models.diffusion.ddpm.LatentInpaintDiffusion
params:
linear_start: 0.00085
linear_end: 0.0120
num_timesteps_cond: 1
log_every_t: 200
timesteps: 1000
first_stage_key: "jpg"
cond_stage_key: "txt"
image_size: 64
channels: 4
cond_stage_trainable: false # Note: different from the one we trained before
conditioning_key: hybrid # important
monitor: val/loss_simple_ema
scale_factor: 0.18215
finetune_keys: null
scheduler_config: # 10000 warmup steps
target: invokeai.backend.stable_diffusion.lr_scheduler.LambdaLinearScheduler
params:
warm_up_steps: [ 2500 ] # NOTE for resuming. use 10000 if starting from scratch
cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases
f_start: [ 1.e-6 ]
f_max: [ 1. ]
f_min: [ 1. ]
personalization_config:
target: invokeai.backend.stable_diffusion.embedding_manager.EmbeddingManager
params:
placeholder_strings: ["*"]
initializer_words: ['sculpture']
per_image_tokens: false
num_vectors_per_token: 8
progressive_words: False
unet_config:
target: invokeai.backend.stable_diffusion.diffusionmodules.openaimodel.UNetModel
params:
image_size: 32 # unused
in_channels: 9 # 4 data + 4 downscaled image + 1 mask
out_channels: 4
model_channels: 320
attention_resolutions: [ 4, 2, 1 ]
num_res_blocks: 2
channel_mult: [ 1, 2, 4, 4 ]
num_heads: 8
use_spatial_transformer: True
transformer_depth: 1
context_dim: 768
use_checkpoint: True
legacy: False
first_stage_config:
target: invokeai.backend.stable_diffusion.autoencoder.AutoencoderKL
params:
embed_dim: 4
monitor: val/rec_loss
ddconfig:
double_z: true
z_channels: 4
resolution: 256
in_channels: 3
out_ch: 3
ch: 128
ch_mult:
- 1
- 2
- 4
- 4
num_res_blocks: 2
attn_resolutions: []
dropout: 0.0
lossconfig:
target: torch.nn.Identity
cond_stage_config:
target: invokeai.backend.stable_diffusion.encoders.modules.WeightedFrozenCLIPEmbedder

View File

@ -1,110 +0,0 @@
model:
base_learning_rate: 5.0e-03
target: invokeai.backend.models.diffusion.ddpm.LatentDiffusion
params:
linear_start: 0.00085
linear_end: 0.0120
num_timesteps_cond: 1
log_every_t: 200
timesteps: 1000
first_stage_key: image
cond_stage_key: caption
image_size: 64
channels: 4
cond_stage_trainable: true # Note: different from the one we trained before
conditioning_key: crossattn
monitor: val/loss_simple_ema
scale_factor: 0.18215
use_ema: False
embedding_reg_weight: 0.0
personalization_config:
target: invokeai.backend.stable_diffusion.embedding_manager.EmbeddingManager
params:
placeholder_strings: ["*"]
initializer_words: ['sculpture']
per_image_tokens: false
num_vectors_per_token: 6
progressive_words: False
unet_config:
target: invokeai.backend.stable_diffusion.diffusionmodules.openaimodel.UNetModel
params:
image_size: 32 # unused
in_channels: 4
out_channels: 4
model_channels: 320
attention_resolutions: [ 4, 2, 1 ]
num_res_blocks: 2
channel_mult: [ 1, 2, 4, 4 ]
num_heads: 8
use_spatial_transformer: True
transformer_depth: 1
context_dim: 768
use_checkpoint: True
legacy: False
first_stage_config:
target: invokeai.backend.stable_diffusion.autoencoder.AutoencoderKL
params:
embed_dim: 4
monitor: val/rec_loss
ddconfig:
double_z: true
z_channels: 4
resolution: 256
in_channels: 3
out_ch: 3
ch: 128
ch_mult:
- 1
- 2
- 4
- 4
num_res_blocks: 2
attn_resolutions: []
dropout: 0.0
lossconfig:
target: torch.nn.Identity
cond_stage_config:
target: invokeai.backend.stable_diffusion.encoders.modules.FrozenCLIPEmbedder
data:
target: main.DataModuleFromConfig
params:
batch_size: 1
num_workers: 2
wrap: false
train:
target: invokeai.backend.stable_diffusion.data.personalized.PersonalizedBase
params:
size: 512
set: train
per_image_tokens: false
repeats: 100
validation:
target: invokeai.backend.stable_diffusion.data.personalized.PersonalizedBase
params:
size: 512
set: val
per_image_tokens: false
repeats: 10
lightning:
modelcheckpoint:
params:
every_n_train_steps: 500
callbacks:
image_logger:
target: main.ImageLogger
params:
batch_frequency: 500
max_images: 5
increase_log_steps: False
trainer:
benchmark: False
max_steps: 6200
# max_steps: 4000

View File

@ -1,68 +0,0 @@
model:
base_learning_rate: 1.0e-4
target: invokeai.backend.stable_diffusion.diffusion.ddpm.LatentDiffusion
params:
parameterization: "v"
linear_start: 0.00085
linear_end: 0.0120
num_timesteps_cond: 1
log_every_t: 200
timesteps: 1000
first_stage_key: "jpg"
cond_stage_key: "txt"
image_size: 64
channels: 4
cond_stage_trainable: false
conditioning_key: crossattn
monitor: val/loss_simple_ema
scale_factor: 0.18215
use_ema: False # we set this to false because this is an inference only config
unet_config:
target: invokeai.backend.stable_diffusion.diffusionmodules.openaimodel.UNetModel
params:
use_checkpoint: True
use_fp16: True
image_size: 32 # unused
in_channels: 4
out_channels: 4
model_channels: 320
attention_resolutions: [ 4, 2, 1 ]
num_res_blocks: 2
channel_mult: [ 1, 2, 4, 4 ]
num_head_channels: 64 # need to fix for flash-attn
use_spatial_transformer: True
use_linear_in_transformer: True
transformer_depth: 1
context_dim: 1024
legacy: False
first_stage_config:
target: invokeai.backend.stable_diffusion.autoencoder.AutoencoderKL
params:
embed_dim: 4
monitor: val/rec_loss
ddconfig:
#attn_type: "vanilla-xformers"
double_z: true
z_channels: 4
resolution: 256
in_channels: 3
out_ch: 3
ch: 128
ch_mult:
- 1
- 2
- 4
- 4
num_res_blocks: 2
attn_resolutions: []
dropout: 0.0
lossconfig:
target: torch.nn.Identity
cond_stage_config:
target: invokeai.backend.stable_diffusion.encoders.modules.FrozenOpenCLIPEmbedder
params:
freeze: True
layer: "penultimate"

View File

@ -1,67 +0,0 @@
model:
base_learning_rate: 1.0e-4
target: invokeai.backend.stable_diffusion.diffusion.ddpm.LatentDiffusion
params:
linear_start: 0.00085
linear_end: 0.0120
num_timesteps_cond: 1
log_every_t: 200
timesteps: 1000
first_stage_key: "jpg"
cond_stage_key: "txt"
image_size: 64
channels: 4
cond_stage_trainable: false
conditioning_key: crossattn
monitor: val/loss_simple_ema
scale_factor: 0.18215
use_ema: False # we set this to false because this is an inference only config
unet_config:
target: invokeai.backend.stable_diffusion.diffusionmodules.openaimodel.UNetModel
params:
use_checkpoint: True
use_fp16: True
image_size: 32 # unused
in_channels: 4
out_channels: 4
model_channels: 320
attention_resolutions: [ 4, 2, 1 ]
num_res_blocks: 2
channel_mult: [ 1, 2, 4, 4 ]
num_head_channels: 64 # need to fix for flash-attn
use_spatial_transformer: True
use_linear_in_transformer: True
transformer_depth: 1
context_dim: 1024
legacy: False
first_stage_config:
target: invokeai.backend.stable_diffusion.autoencoder.AutoencoderKL
params:
embed_dim: 4
monitor: val/rec_loss
ddconfig:
#attn_type: "vanilla-xformers"
double_z: true
z_channels: 4
resolution: 256
in_channels: 3
out_ch: 3
ch: 128
ch_mult:
- 1
- 2
- 4
- 4
num_res_blocks: 2
attn_resolutions: []
dropout: 0.0
lossconfig:
target: torch.nn.Identity
cond_stage_config:
target: invokeai.backend.stable_diffusion.encoders.modules.FrozenOpenCLIPEmbedder
params:
freeze: True
layer: "penultimate"

View File

@ -1,159 +0,0 @@
model:
base_learning_rate: 5.0e-05
target: ldm.models.diffusion.ddpm.LatentInpaintDiffusion
params:
linear_start: 0.00085
linear_end: 0.0120
parameterization: "v"
num_timesteps_cond: 1
log_every_t: 200
timesteps: 1000
first_stage_key: "jpg"
cond_stage_key: "txt"
image_size: 64
channels: 4
cond_stage_trainable: false
conditioning_key: hybrid
scale_factor: 0.18215
monitor: val/loss_simple_ema
finetune_keys: null
use_ema: False
unet_config:
target: ldm.modules.diffusionmodules.openaimodel.UNetModel
params:
use_checkpoint: True
image_size: 32 # unused
in_channels: 9
out_channels: 4
model_channels: 320
attention_resolutions: [ 4, 2, 1 ]
num_res_blocks: 2
channel_mult: [ 1, 2, 4, 4 ]
num_head_channels: 64 # need to fix for flash-attn
use_spatial_transformer: True
use_linear_in_transformer: True
transformer_depth: 1
context_dim: 1024
legacy: False
first_stage_config:
target: ldm.models.autoencoder.AutoencoderKL
params:
embed_dim: 4
monitor: val/rec_loss
ddconfig:
#attn_type: "vanilla-xformers"
double_z: true
z_channels: 4
resolution: 256
in_channels: 3
out_ch: 3
ch: 128
ch_mult:
- 1
- 2
- 4
- 4
num_res_blocks: 2
attn_resolutions: [ ]
dropout: 0.0
lossconfig:
target: torch.nn.Identity
cond_stage_config:
target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder
params:
freeze: True
layer: "penultimate"
data:
target: ldm.data.laion.WebDataModuleFromConfig
params:
tar_base: null # for concat as in LAION-A
p_unsafe_threshold: 0.1
filter_word_list: "data/filters.yaml"
max_pwatermark: 0.45
batch_size: 8
num_workers: 6
multinode: True
min_size: 512
train:
shards:
- "pipe:aws s3 cp s3://stability-aws/laion-a-native/part-0/{00000..18699}.tar -"
- "pipe:aws s3 cp s3://stability-aws/laion-a-native/part-1/{00000..18699}.tar -"
- "pipe:aws s3 cp s3://stability-aws/laion-a-native/part-2/{00000..18699}.tar -"
- "pipe:aws s3 cp s3://stability-aws/laion-a-native/part-3/{00000..18699}.tar -"
- "pipe:aws s3 cp s3://stability-aws/laion-a-native/part-4/{00000..18699}.tar -" #{00000-94333}.tar"
shuffle: 10000
image_key: jpg
image_transforms:
- target: torchvision.transforms.Resize
params:
size: 512
interpolation: 3
- target: torchvision.transforms.RandomCrop
params:
size: 512
postprocess:
target: ldm.data.laion.AddMask
params:
mode: "512train-large"
p_drop: 0.25
# NOTE use enough shards to avoid empty validation loops in workers
validation:
shards:
- "pipe:aws s3 cp s3://deep-floyd-s3/datasets/laion_cleaned-part5/{93001..94333}.tar - "
shuffle: 0
image_key: jpg
image_transforms:
- target: torchvision.transforms.Resize
params:
size: 512
interpolation: 3
- target: torchvision.transforms.CenterCrop
params:
size: 512
postprocess:
target: ldm.data.laion.AddMask
params:
mode: "512train-large"
p_drop: 0.25
lightning:
find_unused_parameters: True
modelcheckpoint:
params:
every_n_train_steps: 5000
callbacks:
metrics_over_trainsteps_checkpoint:
params:
every_n_train_steps: 10000
image_logger:
target: main.ImageLogger
params:
enable_autocast: False
disabled: False
batch_frequency: 1000
max_images: 4
increase_log_steps: False
log_first_step: False
log_images_kwargs:
use_ema_scope: False
inpaint: False
plot_progressive_rows: False
plot_diffusion_rows: False
N: 4
unconditional_guidance_scale: 5.0
unconditional_guidance_label: [""]
ddim_steps: 50 # todo check these out for depth2img,
ddim_eta: 0.0 # todo check these out for depth2img,
trainer:
benchmark: True
val_check_interval: 5000000
num_sanity_val_steps: 0
accumulate_grad_batches: 1

View File

@ -1,158 +0,0 @@
model:
base_learning_rate: 5.0e-05
target: ldm.models.diffusion.ddpm.LatentInpaintDiffusion
params:
linear_start: 0.00085
linear_end: 0.0120
num_timesteps_cond: 1
log_every_t: 200
timesteps: 1000
first_stage_key: "jpg"
cond_stage_key: "txt"
image_size: 64
channels: 4
cond_stage_trainable: false
conditioning_key: hybrid
scale_factor: 0.18215
monitor: val/loss_simple_ema
finetune_keys: null
use_ema: False
unet_config:
target: ldm.modules.diffusionmodules.openaimodel.UNetModel
params:
use_checkpoint: True
image_size: 32 # unused
in_channels: 9
out_channels: 4
model_channels: 320
attention_resolutions: [ 4, 2, 1 ]
num_res_blocks: 2
channel_mult: [ 1, 2, 4, 4 ]
num_head_channels: 64 # need to fix for flash-attn
use_spatial_transformer: True
use_linear_in_transformer: True
transformer_depth: 1
context_dim: 1024
legacy: False
first_stage_config:
target: ldm.models.autoencoder.AutoencoderKL
params:
embed_dim: 4
monitor: val/rec_loss
ddconfig:
#attn_type: "vanilla-xformers"
double_z: true
z_channels: 4
resolution: 256
in_channels: 3
out_ch: 3
ch: 128
ch_mult:
- 1
- 2
- 4
- 4
num_res_blocks: 2
attn_resolutions: [ ]
dropout: 0.0
lossconfig:
target: torch.nn.Identity
cond_stage_config:
target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder
params:
freeze: True
layer: "penultimate"
data:
target: ldm.data.laion.WebDataModuleFromConfig
params:
tar_base: null # for concat as in LAION-A
p_unsafe_threshold: 0.1
filter_word_list: "data/filters.yaml"
max_pwatermark: 0.45
batch_size: 8
num_workers: 6
multinode: True
min_size: 512
train:
shards:
- "pipe:aws s3 cp s3://stability-aws/laion-a-native/part-0/{00000..18699}.tar -"
- "pipe:aws s3 cp s3://stability-aws/laion-a-native/part-1/{00000..18699}.tar -"
- "pipe:aws s3 cp s3://stability-aws/laion-a-native/part-2/{00000..18699}.tar -"
- "pipe:aws s3 cp s3://stability-aws/laion-a-native/part-3/{00000..18699}.tar -"
- "pipe:aws s3 cp s3://stability-aws/laion-a-native/part-4/{00000..18699}.tar -" #{00000-94333}.tar"
shuffle: 10000
image_key: jpg
image_transforms:
- target: torchvision.transforms.Resize
params:
size: 512
interpolation: 3
- target: torchvision.transforms.RandomCrop
params:
size: 512
postprocess:
target: ldm.data.laion.AddMask
params:
mode: "512train-large"
p_drop: 0.25
# NOTE use enough shards to avoid empty validation loops in workers
validation:
shards:
- "pipe:aws s3 cp s3://deep-floyd-s3/datasets/laion_cleaned-part5/{93001..94333}.tar - "
shuffle: 0
image_key: jpg
image_transforms:
- target: torchvision.transforms.Resize
params:
size: 512
interpolation: 3
- target: torchvision.transforms.CenterCrop
params:
size: 512
postprocess:
target: ldm.data.laion.AddMask
params:
mode: "512train-large"
p_drop: 0.25
lightning:
find_unused_parameters: True
modelcheckpoint:
params:
every_n_train_steps: 5000
callbacks:
metrics_over_trainsteps_checkpoint:
params:
every_n_train_steps: 10000
image_logger:
target: main.ImageLogger
params:
enable_autocast: False
disabled: False
batch_frequency: 1000
max_images: 4
increase_log_steps: False
log_first_step: False
log_images_kwargs:
use_ema_scope: False
inpaint: False
plot_progressive_rows: False
plot_diffusion_rows: False
N: 4
unconditional_guidance_scale: 5.0
unconditional_guidance_label: [""]
ddim_steps: 50 # todo check these out for depth2img,
ddim_eta: 0.0 # todo check these out for depth2img,
trainer:
benchmark: True
val_check_interval: 5000000
num_sanity_val_steps: 0
accumulate_grad_batches: 1

View File

@ -1,74 +0,0 @@
model:
base_learning_rate: 5.0e-07
target: ldm.models.diffusion.ddpm.LatentDepth2ImageDiffusion
params:
linear_start: 0.00085
linear_end: 0.0120
num_timesteps_cond: 1
log_every_t: 200
timesteps: 1000
first_stage_key: "jpg"
cond_stage_key: "txt"
image_size: 64
channels: 4
cond_stage_trainable: false
conditioning_key: hybrid
scale_factor: 0.18215
monitor: val/loss_simple_ema
finetune_keys: null
use_ema: False
depth_stage_config:
target: ldm.modules.midas.api.MiDaSInference
params:
model_type: "dpt_hybrid"
unet_config:
target: ldm.modules.diffusionmodules.openaimodel.UNetModel
params:
use_checkpoint: True
image_size: 32 # unused
in_channels: 5
out_channels: 4
model_channels: 320
attention_resolutions: [ 4, 2, 1 ]
num_res_blocks: 2
channel_mult: [ 1, 2, 4, 4 ]
num_head_channels: 64 # need to fix for flash-attn
use_spatial_transformer: True
use_linear_in_transformer: True
transformer_depth: 1
context_dim: 1024
legacy: False
first_stage_config:
target: ldm.models.autoencoder.AutoencoderKL
params:
embed_dim: 4
monitor: val/rec_loss
ddconfig:
#attn_type: "vanilla-xformers"
double_z: true
z_channels: 4
resolution: 256
in_channels: 3
out_ch: 3
ch: 128
ch_mult:
- 1
- 2
- 4
- 4
num_res_blocks: 2
attn_resolutions: [ ]
dropout: 0.0
lossconfig:
target: torch.nn.Identity
cond_stage_config:
target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder
params:
freeze: True
layer: "penultimate"

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -1,59 +0,0 @@
# This is an example file with default and example settings.
# You should not copy this whole file into your config.
# Only add the settings you need to change to your config file.
# Internal metadata - do not edit:
schema_version: 4.0.2
# Put user settings here - see https://invoke-ai.github.io/InvokeAI/configuration/:
host: 127.0.0.1
port: 9090
allow_origins: []
allow_credentials: true
allow_methods:
- '*'
allow_headers:
- '*'
log_tokenization: false
patchmatch: true
models_dir: models
convert_cache_dir: models/.convert_cache
download_cache_dir: models/.download_cache
legacy_conf_dir: configs
db_dir: databases
outputs_dir: outputs
custom_nodes_dir: nodes
style_presets_dir: style_presets
workflow_thumbnails_dir: workflow_thumbnails
log_handlers:
- console
log_format: color
log_level: info
log_sql: false
log_level_network: warning
use_memory_db: false
dev_reload: false
profile_graphs: false
profiles_dir: profiles
log_memory_usage: false
device_working_mem_gb: 3.0
enable_partial_loading: false
keep_ram_copy_of_weights: true
lazy_offload: true
device: auto
precision: auto
sequential_guidance: false
attention_type: auto
attention_slice_size: auto
force_tiled_decode: false
pil_compress_level: 1
max_queue_size: 10000
clear_queue_on_startup: false
node_cache_size: 512
hashing_algorithm: blake3_single
remote_api_tokens:
- url_regex: cool-models.com
token: my_secret_token
- url_regex: nifty-models.com
token: some_other_token
scan_models_on_startup: false

View File

@ -1,4 +0,0 @@
# Internal metadata - do not edit:
schema_version: 4.0.2
# Put user settings here - see https://invoke-ai.github.io/InvokeAI/configuration/:

View File

@ -40,10 +40,10 @@ from invokeai.backend.util.devices import TorchDevice
@invocation(
"compel",
title="Prompt",
title="Prompt - SD1.5",
tags=["prompt", "compel"],
category="conditioning",
version="1.2.0",
version="1.2.1",
)
class CompelInvocation(BaseInvocation):
"""Parse prompt using compel package to conditioning."""
@ -233,10 +233,10 @@ class SDXLPromptInvocationBase:
@invocation(
"sdxl_compel_prompt",
title="SDXL Prompt",
title="Prompt - SDXL",
tags=["sdxl", "compel", "prompt"],
category="conditioning",
version="1.2.0",
version="1.2.1",
)
class SDXLCompelPromptInvocation(BaseInvocation, SDXLPromptInvocationBase):
"""Parse prompt using compel package to conditioning."""
@ -327,10 +327,10 @@ class SDXLCompelPromptInvocation(BaseInvocation, SDXLPromptInvocationBase):
@invocation(
"sdxl_refiner_compel_prompt",
title="SDXL Refiner Prompt",
title="Prompt - SDXL Refiner",
tags=["sdxl", "compel", "prompt"],
category="conditioning",
version="1.1.1",
version="1.1.2",
)
class SDXLRefinerCompelPromptInvocation(BaseInvocation, SDXLPromptInvocationBase):
"""Parse prompt using compel package to conditioning."""
@ -376,10 +376,10 @@ class CLIPSkipInvocationOutput(BaseInvocationOutput):
@invocation(
"clip_skip",
title="CLIP Skip",
title="Apply CLIP Skip - SD1.5, SDXL",
tags=["clipskip", "clip", "skip"],
category="conditioning",
version="1.1.0",
version="1.1.1",
)
class CLIPSkipInvocation(BaseInvocation):
"""Skip layers in clip text_encoder model."""

View File

@ -87,7 +87,7 @@ class ControlOutput(BaseInvocationOutput):
control: ControlField = OutputField(description=FieldDescriptions.control)
@invocation("controlnet", title="ControlNet", tags=["controlnet"], category="controlnet", version="1.1.2")
@invocation("controlnet", title="ControlNet - SD1.5, SDXL", tags=["controlnet"], category="controlnet", version="1.1.3")
class ControlNetInvocation(BaseInvocation):
"""Collects ControlNet info to pass to other nodes"""

View File

@ -127,10 +127,10 @@ def get_scheduler(
@invocation(
"denoise_latents",
title="Denoise Latents",
title="Denoise - SD1.5, SDXL",
tags=["latents", "denoise", "txt2img", "t2i", "t2l", "img2img", "i2i", "l2l"],
category="latents",
version="1.5.3",
version="1.5.4",
)
class DenoiseLatentsInvocation(BaseInvocation):
"""Denoises noisy latents to decodable images"""

View File

@ -59,6 +59,7 @@ class UIType(str, Enum, metaclass=MetaEnum):
ControlLoRAModel = "ControlLoRAModelField"
SigLipModel = "SigLipModelField"
FluxReduxModel = "FluxReduxModelField"
LlavaOnevisionModel = "LLaVAModelField"
# endregion
# region Misc Field Types
@ -205,6 +206,7 @@ class FieldDescriptions:
freeu_b2 = "Scaling factor for stage 2 to amplify the contributions of backbone features."
instantx_control_mode = "The control mode for InstantX ControlNet union models. Ignored for other ControlNet models. The standard mapping is: canny (0), tile (1), depth (2), blur (3), pose (4), gray (5), low quality (6). Negative values will be treated as 'None'."
flux_redux_conditioning = "FLUX Redux conditioning tensor"
vllm_model = "The VLLM model to use"
class ImageField(BaseModel):

View File

@ -21,10 +21,10 @@ class FluxControlLoRALoaderOutput(BaseInvocationOutput):
@invocation(
"flux_control_lora_loader",
title="Flux Control LoRA",
title="Control LoRA - FLUX",
tags=["lora", "model", "flux"],
category="model",
version="1.1.0",
version="1.1.1",
classification=Classification.Prototype,
)
class FluxControlLoRALoaderInvocation(BaseInvocation):

View File

@ -37,10 +37,10 @@ class FluxModelLoaderOutput(BaseInvocationOutput):
@invocation(
"flux_model_loader",
title="Flux Main Model",
title="Main Model - FLUX",
tags=["model", "flux"],
category="model",
version="1.0.5",
version="1.0.6",
classification=Classification.Prototype,
)
class FluxModelLoaderInvocation(BaseInvocation):

View File

@ -26,10 +26,10 @@ from invokeai.backend.stable_diffusion.diffusion.conditioning_data import Condit
@invocation(
"flux_text_encoder",
title="FLUX Text Encoding",
title="Prompt - FLUX",
tags=["prompt", "conditioning", "flux"],
category="conditioning",
version="1.1.1",
version="1.1.2",
classification=Classification.Prototype,
)
class FluxTextEncoderInvocation(BaseInvocation):

View File

@ -22,10 +22,10 @@ from invokeai.backend.util.devices import TorchDevice
@invocation(
"flux_vae_decode",
title="FLUX Latents to Image",
title="Latents to Image - FLUX",
tags=["latents", "image", "vae", "l2i", "flux"],
category="latents",
version="1.0.1",
version="1.0.2",
)
class FluxVaeDecodeInvocation(BaseInvocation, WithMetadata, WithBoard):
"""Generates an image from latents."""

View File

@ -19,10 +19,10 @@ from invokeai.backend.util.devices import TorchDevice
@invocation(
"flux_vae_encode",
title="FLUX Image to Latents",
title="Image to Latents - FLUX",
tags=["latents", "image", "vae", "i2l", "flux"],
category="latents",
version="1.0.0",
version="1.0.1",
)
class FluxVaeEncodeInvocation(BaseInvocation):
"""Encodes an image into latents."""

View File

@ -19,9 +19,9 @@ class IdealSizeOutput(BaseInvocationOutput):
@invocation(
"ideal_size",
title="Ideal Size",
title="Ideal Size - SD1.5, SDXL",
tags=["latents", "math", "ideal_size"],
version="1.0.4",
version="1.0.5",
)
class IdealSizeInvocation(BaseInvocation):
"""Calculates the ideal size for generation to avoid duplication"""

View File

@ -31,10 +31,10 @@ from invokeai.backend.util.devices import TorchDevice
@invocation(
"i2l",
title="Image to Latents",
title="Image to Latents - SD1.5, SDXL",
tags=["latents", "image", "vae", "i2l"],
category="latents",
version="1.1.0",
version="1.1.1",
)
class ImageToLatentsInvocation(BaseInvocation):
"""Encodes an image into latents."""

View File

@ -69,7 +69,13 @@ CLIP_VISION_MODEL_MAP: dict[Literal["ViT-L", "ViT-H", "ViT-G"], StarterModel] =
}
@invocation("ip_adapter", title="IP-Adapter", tags=["ip_adapter", "control"], category="ip_adapter", version="1.5.0")
@invocation(
"ip_adapter",
title="IP-Adapter - SD1.5, SDXL",
tags=["ip_adapter", "control"],
category="ip_adapter",
version="1.5.1",
)
class IPAdapterInvocation(BaseInvocation):
"""Collects IP-Adapter info to pass to other nodes."""

View File

@ -31,10 +31,10 @@ from invokeai.backend.util.devices import TorchDevice
@invocation(
"l2i",
title="Latents to Image",
title="Latents to Image - SD1.5, SDXL",
tags=["latents", "image", "vae", "l2i"],
category="latents",
version="1.3.1",
version="1.3.2",
)
class LatentsToImageInvocation(BaseInvocation, WithMetadata, WithBoard):
"""Generates an image from latents."""

View File

@ -0,0 +1,60 @@
from typing import Any
import torch
from PIL.Image import Image
from pydantic import field_validator
from invokeai.app.invocations.baseinvocation import BaseInvocation, invocation
from invokeai.app.invocations.fields import FieldDescriptions, ImageField, InputField, UIComponent, UIType
from invokeai.app.invocations.model import ModelIdentifierField
from invokeai.app.invocations.primitives import StringOutput
from invokeai.app.services.shared.invocation_context import InvocationContext
from invokeai.backend.llava_onevision_model import LlavaOnevisionModel
from invokeai.backend.util.devices import TorchDevice
@invocation("llava_onevision_vllm", title="LLaVA OneVision VLLM", tags=["vllm"], category="vllm", version="1.0.0")
class LlavaOnevisionVllmInvocation(BaseInvocation):
"""Run a LLaVA OneVision VLLM model."""
images: list[ImageField] | ImageField | None = InputField(default=None, max_length=3, description="Input image.")
prompt: str = InputField(
default="",
description="Input text prompt.",
ui_component=UIComponent.Textarea,
)
vllm_model: ModelIdentifierField = InputField(
title="LLaVA Model Type",
description=FieldDescriptions.vllm_model,
ui_type=UIType.LlavaOnevisionModel,
)
@field_validator("images", mode="before")
def listify_images(cls, v: Any) -> list:
if v is None:
return v
if not isinstance(v, list):
return [v]
return v
def _get_images(self, context: InvocationContext) -> list[Image]:
if self.images is None:
return []
image_fields = self.images if isinstance(self.images, list) else [self.images]
return [context.images.get_pil(image_field.image_name, "RGB") for image_field in image_fields]
@torch.no_grad()
def invoke(self, context: InvocationContext) -> StringOutput:
images = self._get_images(context)
with context.models.load(self.vllm_model) as vllm_model:
assert isinstance(vllm_model, LlavaOnevisionModel)
output = vllm_model.run(
prompt=self.prompt,
images=images,
device=TorchDevice.choose_torch_device(),
dtype=TorchDevice.choose_torch_dtype(),
)
return StringOutput(value=output)

View File

@ -610,10 +610,10 @@ class LatentsMetaOutput(LatentsOutput, MetadataOutput):
@invocation(
"denoise_latents_meta",
title="Denoise Latents + metadata",
title=f"{DenoiseLatentsInvocation.UIConfig.title} + Metadata",
tags=["latents", "denoise", "txt2img", "t2i", "t2l", "img2img", "i2i", "l2l"],
category="latents",
version="1.1.0",
version="1.1.1",
)
class DenoiseLatentsMetaInvocation(DenoiseLatentsInvocation, WithMetadata):
def invoke(self, context: InvocationContext) -> LatentsMetaOutput:
@ -675,10 +675,10 @@ class DenoiseLatentsMetaInvocation(DenoiseLatentsInvocation, WithMetadata):
@invocation(
"flux_denoise_meta",
title="Flux Denoise + metadata",
title=f"{FluxDenoiseInvocation.UIConfig.title} + Metadata",
tags=["flux", "latents", "denoise", "txt2img", "t2i", "t2l", "img2img", "i2i", "l2l"],
category="latents",
version="1.0.0",
version="1.0.1",
)
class FluxDenoiseLatentsMetaInvocation(FluxDenoiseInvocation, WithMetadata):
"""Run denoising process with a FLUX transformer model + metadata."""

View File

@ -122,10 +122,10 @@ class ModelIdentifierOutput(BaseInvocationOutput):
@invocation(
"model_identifier",
title="Model identifier",
title="Any Model",
tags=["model"],
category="model",
version="1.0.0",
version="1.0.1",
classification=Classification.Prototype,
)
class ModelIdentifierInvocation(BaseInvocation):
@ -144,10 +144,10 @@ class ModelIdentifierInvocation(BaseInvocation):
@invocation(
"main_model_loader",
title="Main Model",
title="Main Model - SD1.5",
tags=["model"],
category="model",
version="1.0.3",
version="1.0.4",
)
class MainModelLoaderInvocation(BaseInvocation):
"""Loads a main model, outputting its submodels."""
@ -244,7 +244,7 @@ class LoRASelectorOutput(BaseInvocationOutput):
lora: LoRAField = OutputField(description="LoRA model and weight", title="LoRA")
@invocation("lora_selector", title="LoRA Selector", tags=["model"], category="model", version="1.0.1")
@invocation("lora_selector", title="LoRA Model - SD1.5", tags=["model"], category="model", version="1.0.2")
class LoRASelectorInvocation(BaseInvocation):
"""Selects a LoRA model and weight."""
@ -257,7 +257,9 @@ class LoRASelectorInvocation(BaseInvocation):
return LoRASelectorOutput(lora=LoRAField(lora=self.lora, weight=self.weight))
@invocation("lora_collection_loader", title="LoRA Collection Loader", tags=["model"], category="model", version="1.1.0")
@invocation(
"lora_collection_loader", title="LoRA Collection - SD1.5", tags=["model"], category="model", version="1.1.1"
)
class LoRACollectionLoader(BaseInvocation):
"""Applies a collection of LoRAs to the provided UNet and CLIP models."""
@ -320,10 +322,10 @@ class SDXLLoRALoaderOutput(BaseInvocationOutput):
@invocation(
"sdxl_lora_loader",
title="SDXL LoRA",
title="LoRA Model - SDXL",
tags=["lora", "model"],
category="model",
version="1.0.3",
version="1.0.4",
)
class SDXLLoRALoaderInvocation(BaseInvocation):
"""Apply selected lora to unet and text_encoder."""
@ -400,10 +402,10 @@ class SDXLLoRALoaderInvocation(BaseInvocation):
@invocation(
"sdxl_lora_collection_loader",
title="SDXL LoRA Collection Loader",
title="LoRA Collection - SDXL",
tags=["model"],
category="model",
version="1.1.0",
version="1.1.1",
)
class SDXLLoRACollectionLoader(BaseInvocation):
"""Applies a collection of SDXL LoRAs to the provided UNet and CLIP models."""
@ -469,7 +471,9 @@ class SDXLLoRACollectionLoader(BaseInvocation):
return output
@invocation("vae_loader", title="VAE", tags=["vae", "model"], category="model", version="1.0.3")
@invocation(
"vae_loader", title="VAE Model - SD1.5, SDXL, SD3, FLUX", tags=["vae", "model"], category="model", version="1.0.4"
)
class VAELoaderInvocation(BaseInvocation):
"""Loads a VAE model, outputting a VaeLoaderOutput"""
@ -496,10 +500,10 @@ class SeamlessModeOutput(BaseInvocationOutput):
@invocation(
"seamless",
title="Seamless",
title="Apply Seamless - SD1.5, SDXL",
tags=["seamless", "model"],
category="model",
version="1.0.1",
version="1.0.2",
)
class SeamlessModeInvocation(BaseInvocation):
"""Applies the seamless transformation to the Model UNet and VAE."""
@ -539,7 +543,7 @@ class SeamlessModeInvocation(BaseInvocation):
return SeamlessModeOutput(unet=unet, vae=vae)
@invocation("freeu", title="FreeU", tags=["freeu"], category="unet", version="1.0.1")
@invocation("freeu", title="Apply FreeU - SD1.5, SDXL", tags=["freeu"], category="unet", version="1.0.2")
class FreeUInvocation(BaseInvocation):
"""
Applies FreeU to the UNet. Suggested values (b1/b2/s1/s2):

View File

@ -72,10 +72,10 @@ class NoiseOutput(BaseInvocationOutput):
@invocation(
"noise",
title="Noise",
title="Create Latent Noise",
tags=["latents", "noise"],
category="latents",
version="1.0.2",
version="1.0.3",
)
class NoiseInvocation(BaseInvocation):
"""Generates latent noise."""

View File

@ -32,10 +32,10 @@ from invokeai.backend.util.devices import TorchDevice
@invocation(
"sd3_denoise",
title="SD3 Denoise",
title="Denoise - SD3",
tags=["image", "sd3"],
category="image",
version="1.1.0",
version="1.1.1",
classification=Classification.Prototype,
)
class SD3DenoiseInvocation(BaseInvocation, WithMetadata, WithBoard):

View File

@ -21,10 +21,10 @@ from invokeai.backend.util.devices import TorchDevice
@invocation(
"sd3_i2l",
title="SD3 Image to Latents",
title="Image to Latents - SD3",
tags=["image", "latents", "vae", "i2l", "sd3"],
category="image",
version="1.0.0",
version="1.0.1",
classification=Classification.Prototype,
)
class SD3ImageToLatentsInvocation(BaseInvocation, WithMetadata, WithBoard):

View File

@ -24,10 +24,10 @@ from invokeai.backend.util.devices import TorchDevice
@invocation(
"sd3_l2i",
title="SD3 Latents to Image",
title="Latents to Image - SD3",
tags=["latents", "image", "vae", "l2i", "sd3"],
category="latents",
version="1.3.1",
version="1.3.2",
)
class SD3LatentsToImageInvocation(BaseInvocation, WithMetadata, WithBoard):
"""Generates an image from latents."""

View File

@ -30,10 +30,10 @@ class Sd3ModelLoaderOutput(BaseInvocationOutput):
@invocation(
"sd3_model_loader",
title="SD3 Main Model",
title="Main Model - SD3",
tags=["model", "sd3"],
category="model",
version="1.0.0",
version="1.0.1",
classification=Classification.Prototype,
)
class Sd3ModelLoaderInvocation(BaseInvocation):

View File

@ -29,10 +29,10 @@ SD3_T5_MAX_SEQ_LEN = 256
@invocation(
"sd3_text_encoder",
title="SD3 Text Encoding",
title="Prompt - SD3",
tags=["prompt", "conditioning", "sd3"],
category="conditioning",
version="1.0.0",
version="1.0.1",
classification=Classification.Prototype,
)
class Sd3TextEncoderInvocation(BaseInvocation):

View File

@ -24,7 +24,7 @@ class SDXLRefinerModelLoaderOutput(BaseInvocationOutput):
vae: VAEField = OutputField(description=FieldDescriptions.vae, title="VAE")
@invocation("sdxl_model_loader", title="SDXL Main Model", tags=["model", "sdxl"], category="model", version="1.0.3")
@invocation("sdxl_model_loader", title="Main Model - SDXL", tags=["model", "sdxl"], category="model", version="1.0.4")
class SDXLModelLoaderInvocation(BaseInvocation):
"""Loads an sdxl base model, outputting its submodels."""
@ -58,10 +58,10 @@ class SDXLModelLoaderInvocation(BaseInvocation):
@invocation(
"sdxl_refiner_model_loader",
title="SDXL Refiner Model",
title="Refiner Model - SDXL",
tags=["model", "sdxl", "refiner"],
category="model",
version="1.0.3",
version="1.0.4",
)
class SDXLRefinerModelLoaderInvocation(BaseInvocation):
"""Loads an sdxl refiner model, outputting its submodels."""

View File

@ -45,7 +45,11 @@ class T2IAdapterOutput(BaseInvocationOutput):
@invocation(
"t2i_adapter", title="T2I-Adapter", tags=["t2i_adapter", "control"], category="t2i_adapter", version="1.0.3"
"t2i_adapter",
title="T2I-Adapter - SD1.5, SDXL",
tags=["t2i_adapter", "control"],
category="t2i_adapter",
version="1.0.4",
)
class T2IAdapterInvocation(BaseInvocation):
"""Collects T2I-Adapter info to pass to other nodes."""

View File

@ -53,11 +53,11 @@ def crop_controlnet_data(control_data: ControlNetData, latent_region: TBLR) -> C
@invocation(
"tiled_multi_diffusion_denoise_latents",
title="Tiled Multi-Diffusion Denoise Latents",
title="Tiled Multi-Diffusion Denoise - SD1.5, SDXL",
tags=["upscale", "denoise"],
category="latents",
classification=Classification.Beta,
version="1.0.0",
version="1.0.1",
)
class TiledMultiDiffusionDenoiseLatents(BaseInvocation):
"""Tiled Multi-Diffusion denoising.

View File

@ -570,7 +570,10 @@ ValueToInsertTuple: TypeAlias = tuple[
str | None, # destination (optional)
int | None, # retried_from_item_id (optional, this is always None for new items)
]
"""A type alias for the tuple of values to insert into the session queue table."""
"""A type alias for the tuple of values to insert into the session queue table.
**If you change this, be sure to update the `enqueue_batch` and `retry_items_by_id` methods in the session queue service!**
"""
def prepare_values_to_insert(

View File

@ -27,6 +27,7 @@ from invokeai.app.services.session_queue.session_queue_common import (
SessionQueueItemDTO,
SessionQueueItemNotFoundError,
SessionQueueStatus,
ValueToInsertTuple,
calc_session_count,
prepare_values_to_insert,
)
@ -689,7 +690,7 @@ class SqliteSessionQueue(SessionQueueBase):
"""Retries the given queue items"""
try:
cursor = self._conn.cursor()
values_to_insert: list[tuple] = []
values_to_insert: list[ValueToInsertTuple] = []
retried_item_ids: list[int] = []
for item_id in item_ids:
@ -715,16 +716,16 @@ class SqliteSessionQueue(SessionQueueBase):
else queue_item.item_id
)
value_to_insert = (
value_to_insert: ValueToInsertTuple = (
queue_item.queue_id,
queue_item.batch_id,
queue_item.destination,
field_values_json,
queue_item.origin,
queue_item.priority,
workflow_json,
cloned_session_json,
cloned_session.id,
queue_item.batch_id,
field_values_json,
queue_item.priority,
workflow_json,
queue_item.origin,
queue_item.destination,
retried_from_item_id,
)
values_to_insert.append(value_to_insert)

View File

@ -1,11 +1,11 @@
{
"id": "default_686bb1d0-d086-4c70-9fa3-2f600b922023",
"name": "ESRGAN Upscaling with Canny ControlNet",
"name": "Upscaler - SD1.5, ESRGAN",
"author": "InvokeAI",
"description": "Sample workflow for using Upscaling with ControlNet with SD1.5",
"description": "Sample workflow for using ESRGAN to upscale with ControlNet with SD1.5",
"version": "2.1.0",
"contact": "invoke@invoke.ai",
"tags": "upscaling, controlnet, default",
"tags": "sd1.5, upscaling, control",
"notes": "",
"exposedFields": [
{
@ -185,14 +185,7 @@
},
"control_model": {
"name": "control_model",
"label": "Control Model (select Canny)",
"value": {
"key": "a7b9c76f-4bc5-42aa-b918-c1c458a5bb24",
"hash": "blake3:260c7f8e10aefea9868cfc68d89970e91033bd37132b14b903e70ee05ebf530e",
"name": "sd-controlnet-canny",
"base": "sd-1",
"type": "controlnet"
}
"label": "Control Model (select Canny)"
},
"control_weight": {
"name": "control_weight",
@ -295,14 +288,7 @@
"inputs": {
"model": {
"name": "model",
"label": "",
"value": {
"key": "5cd43ca0-dd0a-418d-9f7e-35b2b9d5e106",
"hash": "blake3:6987f323017f597213cc3264250edf57056d21a40a0a85d83a1a33a7d44dc41a",
"name": "Deliberate_v5",
"base": "sd-1",
"type": "main"
}
"label": ""
}
},
"isOpen": true,
@ -849,4 +835,4 @@
"targetHandle": "image_resolution"
}
]
}
}

View File

@ -1,11 +1,11 @@
{
"id": "default_cbf0e034-7b54-4b2c-b670-3b1e2e4b4a88",
"name": "FLUX Image to Image",
"name": "Image to Image - FLUX",
"author": "InvokeAI",
"description": "A simple image-to-image workflow using a FLUX dev model. ",
"version": "1.1.0",
"contact": "",
"tags": "image2image, flux, image-to-image, image to image",
"tags": "flux, image to image",
"notes": "Prerequisite model downloads: T5 Encoder, CLIP-L Encoder, and FLUX VAE. Quantized and un-quantized versions can be found in the starter models tab within your Model Manager. We recommend using FLUX dev models for image-to-image workflows. The image-to-image performance with FLUX schnell models is poor.",
"exposedFields": [
{
@ -201,36 +201,15 @@
},
"t5_encoder_model": {
"name": "t5_encoder_model",
"label": "",
"value": {
"key": "d18d5575-96b6-4da3-b3d8-eb58308d6705",
"hash": "random:f2f9ed74acdfb4bf6fec200e780f6c25f8dd8764a35e65d425d606912fdf573a",
"name": "t5_bnb_int8_quantized_encoder",
"base": "any",
"type": "t5_encoder"
}
"label": ""
},
"clip_embed_model": {
"name": "clip_embed_model",
"label": "",
"value": {
"key": "5a19d7e5-8d98-43cd-8a81-87515e4b3b4e",
"hash": "random:4bd08514c08fb6ff04088db9aeb45def3c488e8b5fd09a35f2cc4f2dc346f99f",
"name": "clip-vit-large-patch14",
"base": "any",
"type": "clip_embed"
}
"label": ""
},
"vae_model": {
"name": "vae_model",
"label": "",
"value": {
"key": "9172beab-5c1d-43f0-b2f0-6e0b956710d9",
"hash": "random:c54dde288e5fa2e6137f1c92e9d611f598049e6f16e360207b6d96c9f5a67ba0",
"name": "FLUX.1-schnell_ae",
"base": "flux",
"type": "vae"
}
"label": ""
}
}
},

View File

@ -1,11 +1,11 @@
{
"id": "default_dec5a2e9-f59c-40d9-8869-a056751d79b8",
"name": "Face Detailer with IP-Adapter & Canny (See Note in Details)",
"name": "Face Detailer - SD1.5",
"author": "kosmoskatten",
"description": "A workflow to add detail to and improve faces. This workflow is most effective when used with a model that creates realistic outputs. ",
"version": "2.1.0",
"contact": "invoke@invoke.ai",
"tags": "face detailer, IP-Adapter, Canny",
"tags": "sd1.5, reference image, control",
"notes": "Set this image as the blur mask: https://i.imgur.com/Gxi61zP.png",
"exposedFields": [
{
@ -136,14 +136,7 @@
},
"control_model": {
"name": "control_model",
"label": "Control Model (select canny)",
"value": {
"key": "5bdaacf7-a7a3-4fb8-b394-cc0ffbb8941d",
"hash": "blake3:260c7f8e10aefea9868cfc68d89970e91033bd37132b14b903e70ee05ebf530e",
"name": "sd-controlnet-canny",
"base": "sd-1",
"type": "controlnet"
}
"label": "Control Model (select canny)"
},
"control_weight": {
"name": "control_weight",
@ -197,14 +190,7 @@
},
"ip_adapter_model": {
"name": "ip_adapter_model",
"label": "IP-Adapter Model (select IP Adapter Face)",
"value": {
"key": "1cc210bb-4d0a-4312-b36c-b5d46c43768e",
"hash": "blake3:3d669dffa7471b357b4df088b99ffb6bf4d4383d5e0ef1de5ec1c89728a3d5a5",
"name": "ip_adapter_sd15",
"base": "sd-1",
"type": "ip_adapter"
}
"label": "IP-Adapter Model (select IP Adapter Face)"
},
"clip_vision_model": {
"name": "clip_vision_model",

View File

@ -1,11 +1,11 @@
{
"id": "default_444fe292-896b-44fd-bfc6-c0b5d220fffc",
"name": "FLUX Text to Image",
"name": "Text to Image - FLUX",
"author": "InvokeAI",
"description": "A simple text-to-image workflow using FLUX dev or schnell models.",
"version": "1.1.0",
"contact": "",
"tags": "text2image, flux, text to image",
"tags": "flux, text to image",
"notes": "Prerequisite model downloads: T5 Encoder, CLIP-L Encoder, and FLUX VAE. Quantized and un-quantized versions can be found in the starter models tab within your Model Manager. We recommend 4 steps for FLUX schnell models and 30 steps for FLUX dev models.",
"exposedFields": [
{
@ -169,36 +169,15 @@
},
"t5_encoder_model": {
"name": "t5_encoder_model",
"label": "",
"value": {
"key": "d18d5575-96b6-4da3-b3d8-eb58308d6705",
"hash": "random:f2f9ed74acdfb4bf6fec200e780f6c25f8dd8764a35e65d425d606912fdf573a",
"name": "t5_bnb_int8_quantized_encoder",
"base": "any",
"type": "t5_encoder"
}
"label": ""
},
"clip_embed_model": {
"name": "clip_embed_model",
"label": "",
"value": {
"key": "5a19d7e5-8d98-43cd-8a81-87515e4b3b4e",
"hash": "random:4bd08514c08fb6ff04088db9aeb45def3c488e8b5fd09a35f2cc4f2dc346f99f",
"name": "clip-vit-large-patch14",
"base": "any",
"type": "clip_embed"
}
"label": ""
},
"vae_model": {
"name": "vae_model",
"label": "",
"value": {
"key": "9172beab-5c1d-43f0-b2f0-6e0b956710d9",
"hash": "random:c54dde288e5fa2e6137f1c92e9d611f598049e6f16e360207b6d96c9f5a67ba0",
"name": "FLUX.1-schnell_ae",
"base": "flux",
"type": "vae"
}
"label": ""
}
}
},

View File

@ -1,11 +1,11 @@
{
"id": "default_2d05e719-a6b9-4e64-9310-b875d3b2f9d2",
"name": "Multi ControlNet (Canny & Depth)",
"name": "Text to Image - SD1.5, Control",
"author": "InvokeAI",
"description": "A sample workflow using canny & depth ControlNets to guide the generation process. ",
"version": "2.1.0",
"contact": "invoke@invoke.ai",
"tags": "ControlNet, canny, depth",
"tags": "sd1.5, control, text to image",
"notes": "",
"exposedFields": [
{
@ -217,14 +217,7 @@
},
"control_model": {
"name": "control_model",
"label": "Control Model (select canny)",
"value": {
"key": "5bdaacf7-a7a3-4fb8-b394-cc0ffbb8941d",
"hash": "blake3:260c7f8e10aefea9868cfc68d89970e91033bd37132b14b903e70ee05ebf530e",
"name": "sd-controlnet-canny",
"base": "sd-1",
"type": "controlnet"
}
"label": "Control Model (select canny)"
},
"control_weight": {
"name": "control_weight",
@ -371,14 +364,7 @@
},
"control_model": {
"name": "control_model",
"label": "Control Model (select depth)",
"value": {
"key": "87e8855c-671f-4c9e-bbbb-8ed47ccb4aac",
"hash": "blake3:2550bf22a53942dfa28ab2fed9d10d80851112531f44d977168992edf9d0534c",
"name": "control_v11f1p_sd15_depth",
"base": "sd-1",
"type": "controlnet"
}
"label": "Control Model (select depth)"
},
"control_weight": {
"name": "control_weight",

View File

@ -1,11 +1,11 @@
{
"id": "default_f96e794f-eb3e-4d01-a960-9b4e43402bcf",
"name": "MultiDiffusion SD1.5",
"name": "Upscaler - SD1.5, MultiDiffusion",
"author": "Invoke",
"description": "A workflow to upscale an input image with tiled upscaling, using SD1.5 based models.",
"version": "1.0.0",
"contact": "invoke@invoke.ai",
"tags": "tiled, upscaling, sdxl",
"tags": "sd1.5, upscaling",
"notes": "",
"exposedFields": [
{
@ -135,14 +135,7 @@
"inputs": {
"model": {
"name": "model",
"label": "",
"value": {
"key": "e7b402e5-62e5-4acb-8c39-bee6bdb758ab",
"hash": "c8659e796168d076368256b57edbc1b48d6dafc1712f1bb37cc57c7c06889a6b",
"name": "526mix",
"base": "sd-1",
"type": "main"
}
"label": ""
}
}
},
@ -384,21 +377,11 @@
},
"image": {
"name": "image",
"label": "Image to Upscale",
"value": {
"image_name": "ee7009f7-a35d-488b-a2a6-21237ef5ae05.png"
}
"label": "Image to Upscale"
},
"image_to_image_model": {
"name": "image_to_image_model",
"label": "",
"value": {
"key": "38bb1a29-8ede-42ba-b77f-64b3478896eb",
"hash": "blake3:e52fdbee46a484ebe9b3b20ea0aac0a35a453ab6d0d353da00acfd35ce7a91ed",
"name": "4xNomosWebPhoto_esrgan",
"base": "sdxl",
"type": "spandrel_image_to_image"
}
"label": ""
},
"tile_size": {
"name": "tile_size",
@ -437,14 +420,7 @@
"inputs": {
"model": {
"name": "model",
"label": "ControlNet Model - Choose a Tile ControlNet",
"value": {
"key": "20645e4d-ef97-4c5a-9243-b834a3483925",
"hash": "f0812e13758f91baf4e54b7dbb707b70642937d3b2098cd2b94cc36d3eba308e",
"name": "tile",
"base": "sd-1",
"type": "controlnet"
}
"label": "ControlNet Model - Choose a Tile ControlNet"
}
}
},

View File

@ -1,11 +1,11 @@
{
"id": "default_35658541-6d41-4a20-8ec5-4bf2561faed0",
"name": "MultiDiffusion SDXL",
"name": "Upscaler - SDXL, MultiDiffusion",
"author": "Invoke",
"description": "A workflow to upscale an input image with tiled upscaling, using SDXL based models.",
"version": "1.1.0",
"contact": "invoke@invoke.ai",
"tags": "tiled, upscaling, sdxl",
"tags": "sdxl, upscaling",
"notes": "",
"exposedFields": [
{
@ -341,14 +341,7 @@
"inputs": {
"model": {
"name": "model",
"label": "ControlNet Model - Choose a Tile ControlNet",
"value": {
"key": "74f4651f-0ace-4b7b-b616-e98360257797",
"hash": "blake3:167a5b84583aaed3e5c8d660b45830e82e1c602743c689d3c27773c6c8b85b4a",
"name": "controlnet-tile-sdxl-1.0",
"base": "sdxl",
"type": "controlnet"
}
"label": "ControlNet Model - Choose a Tile ControlNet"
}
}
},
@ -801,14 +794,7 @@
"inputs": {
"vae_model": {
"name": "vae_model",
"label": "",
"value": {
"key": "ff926845-090e-4d46-b81e-30289ee47474",
"hash": "9705ab1c31fa96b308734214fb7571a958621c7a9247eed82b7d277145f8d9fa",
"name": "VAEFix",
"base": "sdxl",
"type": "vae"
}
"label": ""
}
}
},
@ -832,14 +818,7 @@
"inputs": {
"model": {
"name": "model",
"label": "SDXL Model",
"value": {
"key": "ab191f73-68d2-492c-8aec-b438a8cf0f45",
"hash": "blake3:2d50e940627e3bf555f015280ec0976d5c1fa100f7bc94e95ffbfc770e98b6fe",
"name": "CustomXLv7",
"base": "sdxl",
"type": "main"
}
"label": "SDXL Model"
}
}
},

View File

@ -1,11 +1,11 @@
{
"id": "default_d7a1c60f-ca2f-4f90-9e33-75a826ca6d8f",
"name": "Prompt from File",
"name": "Text to Image - SD1.5, Prompt from File",
"author": "InvokeAI",
"description": "Sample workflow using Prompt from File node",
"version": "2.1.0",
"contact": "invoke@invoke.ai",
"tags": "text2image, prompt from file, default, text to image",
"tags": "sd1.5, text to image",
"notes": "",
"exposedFields": [
{
@ -513,4 +513,4 @@
"targetHandle": "vae"
}
]
}
}

View File

@ -10,6 +10,7 @@ _default workflows_ on app startup.
An exception will be raised during sync if this is not set correctly.
- Default workflows appear on the "Default Workflows" tab of the Workflow
Library.
- Default workflows should not reference any resources that are user-created or installed. That includes images and models. For example, if a default workflow references Juggernaut as an SDXL model, when a user loads the workflow, even if they have a version of Juggernaut installed, it will have a different UUID. They may see a warning. So, it's best to ship default workflows without any references to these types of resources.
After adding or updating default workflows, you **must** start the app up and
load them to ensure:

View File

@ -1,11 +1,11 @@
{
"id": "default_dbe46d95-22aa-43fb-9c16-94400d0ce2fd",
"name": "SD3.5 Text to Image",
"name": "Text to Image - SD3.5",
"author": "InvokeAI",
"description": "Sample text to image workflow for Stable Diffusion 3.5",
"version": "1.0.0",
"contact": "invoke@invoke.ai",
"tags": "text2image, SD3.5, text to image",
"tags": "SD3.5, text to image",
"notes": "",
"exposedFields": [
{
@ -38,14 +38,7 @@
"inputs": {
"model": {
"name": "model",
"label": "",
"value": {
"key": "f7b20be9-92a8-4cfb-bca4-6c3b5535c10b",
"hash": "placeholder",
"name": "stable-diffusion-3.5-medium",
"base": "sd-3",
"type": "main"
}
"label": ""
},
"t5_encoder_model": {
"name": "t5_encoder_model",

View File

@ -5,7 +5,7 @@
"description": "Sample text to image workflow for Stable Diffusion 1.5/2",
"version": "2.1.0",
"contact": "invoke@invoke.ai",
"tags": "text2image, SD1.5, SD2, text to image",
"tags": "SD1.5, text to image",
"notes": "",
"exposedFields": [
{
@ -417,4 +417,4 @@
"targetHandle": "vae"
}
]
}
}

View File

@ -5,7 +5,7 @@
"description": "Sample text to image workflow for SDXL",
"version": "2.1.0",
"contact": "invoke@invoke.ai",
"tags": "text2image, SDXL, text to image",
"tags": "SDXL, text to image",
"notes": "",
"exposedFields": [
{
@ -46,14 +46,7 @@
"inputs": {
"vae_model": {
"name": "vae_model",
"label": "VAE (use the FP16 model)",
"value": {
"key": "f20f9e5c-1bce-4c46-a84d-34ebfa7df069",
"hash": "blake3:9705ab1c31fa96b308734214fb7571a958621c7a9247eed82b7d277145f8d9fa",
"name": "sdxl-vae-fp16-fix",
"base": "sdxl",
"type": "vae"
}
"label": "VAE (use the FP16 model)"
}
},
"isOpen": true,
@ -203,14 +196,7 @@
"inputs": {
"model": {
"name": "model",
"label": "",
"value": {
"key": "4a63b226-e8ff-4da4-854e-0b9f04b562ba",
"hash": "blake3:d279309ea6e5ee6e8fd52504275865cc280dac71cbf528c5b07c98b888bddaba",
"name": "dreamshaper-xl-v2-turbo",
"base": "sdxl",
"type": "main"
}
"label": ""
}
},
"isOpen": true,
@ -715,4 +701,4 @@
"targetHandle": "style"
}
]
}
}

View File

@ -1,11 +1,11 @@
{
"id": "default_e71d153c-2089-43c7-bd2c-f61f37d4c1c1",
"name": "Text to Image with LoRA",
"name": "Text to Image - SD1.5, LoRA",
"author": "InvokeAI",
"description": "Simple text to image workflow with a LoRA",
"version": "2.1.0",
"contact": "invoke@invoke.ai",
"tags": "text to image, lora, text to image",
"tags": "sd1.5, text to image, lora",
"notes": "",
"exposedFields": [
{

View File

@ -1,11 +1,11 @@
{
"id": "default_43b0d7f7-6a12-4dcf-a5a4-50c940cbee29",
"name": "Tiled Upscaling (Beta)",
"name": "Upscaler - SD1.5, Tiled",
"author": "Invoke",
"description": "A workflow to upscale an input image with tiled upscaling. ",
"version": "2.1.0",
"contact": "invoke@invoke.ai",
"tags": "tiled, upscaling, sd1.5",
"tags": "sd1.5, upscaling",
"notes": "",
"exposedFields": [
{
@ -86,14 +86,7 @@
},
"ip_adapter_model": {
"name": "ip_adapter_model",
"label": "IP-Adapter Model (select ip_adapter_sd15)",
"value": {
"key": "1cc210bb-4d0a-4312-b36c-b5d46c43768e",
"hash": "blake3:3d669dffa7471b357b4df088b99ffb6bf4d4383d5e0ef1de5ec1c89728a3d5a5",
"name": "ip_adapter_sd15",
"base": "sd-1",
"type": "ip_adapter"
}
"label": "IP-Adapter Model (select ip_adapter_sd15)"
},
"clip_vision_model": {
"name": "clip_vision_model",
@ -201,14 +194,7 @@
},
"control_model": {
"name": "control_model",
"label": "Control Model (select contro_v11f1e_sd15_tile)",
"value": {
"key": "773843c8-db1f-4502-8f65-59782efa7960",
"hash": "blake3:f0812e13758f91baf4e54b7dbb707b70642937d3b2098cd2b94cc36d3eba308e",
"name": "control_v11f1e_sd15_tile",
"base": "sd-1",
"type": "controlnet"
}
"label": "Control Model (select control_v11f1e_sd15_tile)"
},
"control_weight": {
"name": "control_weight",
@ -1816,4 +1802,4 @@
"targetHandle": "unet"
}
]
}
}

View File

@ -8,12 +8,12 @@ class WorkflowThumbnailServiceBase(ABC):
"""Base class for workflow thumbnail services"""
@abstractmethod
def get_path(self, workflow_id: str) -> Path:
def get_path(self, workflow_id: str, with_hash: bool = True) -> Path:
"""Gets the path to a workflow thumbnail"""
pass
@abstractmethod
def get_url(self, workflow_id: str) -> str | None:
def get_url(self, workflow_id: str, with_hash: bool = True) -> str | None:
"""Gets the URL of a workflow thumbnail"""
pass

View File

@ -41,7 +41,7 @@ class WorkflowThumbnailFileStorageDisk(WorkflowThumbnailServiceBase):
except Exception as e:
raise WorkflowThumbnailFileSaveException from e
def get_path(self, workflow_id: str) -> Path:
def get_path(self, workflow_id: str, with_hash: bool = True) -> Path:
workflow = self._invoker.services.workflow_records.get(workflow_id).workflow
if workflow.meta.category is WorkflowCategory.Default:
default_thumbnails_dir = Path(__file__).parent / Path("default_workflow_thumbnails")
@ -51,7 +51,7 @@ class WorkflowThumbnailFileStorageDisk(WorkflowThumbnailServiceBase):
return path
def get_url(self, workflow_id: str) -> str | None:
def get_url(self, workflow_id: str, with_hash: bool = True) -> str | None:
path = self.get_path(workflow_id)
if not self._validate_path(path):
return
@ -59,7 +59,8 @@ class WorkflowThumbnailFileStorageDisk(WorkflowThumbnailServiceBase):
url = self._invoker.services.urls.get_workflow_thumbnail_url(workflow_id)
# The image URL never changes, so we must add random query string to it to prevent caching
url += f"?{uuid_string()}"
if with_hash:
url += f"?{uuid_string()}"
return url

View File

@ -0,0 +1,49 @@
from pathlib import Path
from typing import Optional
import torch
from PIL.Image import Image
from transformers import AutoProcessor, LlavaOnevisionForConditionalGeneration, LlavaOnevisionProcessor
from invokeai.backend.raw_model import RawModel
class LlavaOnevisionModel(RawModel):
def __init__(self, vllm_model: LlavaOnevisionForConditionalGeneration, processor: LlavaOnevisionProcessor):
self._vllm_model = vllm_model
self._processor = processor
@classmethod
def load_from_path(cls, path: str | Path):
vllm_model = LlavaOnevisionForConditionalGeneration.from_pretrained(path, local_files_only=True)
assert isinstance(vllm_model, LlavaOnevisionForConditionalGeneration)
processor = AutoProcessor.from_pretrained(path, local_files_only=True)
assert isinstance(processor, LlavaOnevisionProcessor)
return cls(vllm_model, processor)
def run(self, prompt: str, images: list[Image], device: torch.device, dtype: torch.dtype) -> str:
# TODO(ryand): Tune the max number of images that are useful for the model.
if len(images) > 3:
raise ValueError(
f"{len(images)} images were provided as input to the LLaVA OneVision model. "
"Pass <=3 images for good performance."
)
# Define a chat history and use `apply_chat_template` to get correctly formatted prompt.
# "content" is a list of dicts with types "text" or "image".
content = [{"type": "text", "text": prompt}]
# Add the correct number of images.
for _ in images:
content.append({"type": "image"})
conversation = [{"role": "user", "content": content}]
prompt = self._processor.apply_chat_template(conversation, add_generation_prompt=True)
inputs = self._processor(images=images or None, text=prompt, return_tensors="pt").to(device=device, dtype=dtype)
output = self._vllm_model.generate(**inputs, max_new_tokens=400, do_sample=False)
output_str: str = self._processor.decode(output[0][2:], skip_special_tokens=True)
# The output_str will include the prompt, so we extract the response.
response = output_str.split("assistant\n", 1)[1].strip()
return response
def to(self, device: Optional[torch.device] = None, dtype: Optional[torch.dtype] = None) -> None:
self._vllm_model.to(device=device, dtype=dtype)

View File

@ -90,6 +90,7 @@ class ModelType(str, Enum):
SpandrelImageToImage = "spandrel_image_to_image"
SigLIP = "siglip"
FluxRedux = "flux_redux"
LlavaOnevision = "llava_onevision"
class SubModelType(str, Enum):
@ -624,6 +625,13 @@ class FluxReduxConfig(LegacyProbeMixin, ModelConfigBase):
format: Literal[ModelFormat.Checkpoint] = ModelFormat.Checkpoint
class LlavaOnevisionConfig(DiffusersConfigBase, LegacyProbeMixin, ModelConfigBase):
"""Model config for Llava Onevision models."""
type: Literal[ModelType.LlavaOnevision] = ModelType.LlavaOnevision
format: Literal[ModelFormat.Diffusers] = ModelFormat.Diffusers
def get_model_discriminator_value(v: Any) -> str:
"""
Computes the discriminator value for a model config.
@ -690,6 +698,7 @@ AnyModelConfig = Annotated[
Annotated[CLIPGEmbedDiffusersConfig, CLIPGEmbedDiffusersConfig.get_tag()],
Annotated[SigLIPConfig, SigLIPConfig.get_tag()],
Annotated[FluxReduxConfig, FluxReduxConfig.get_tag()],
Annotated[LlavaOnevisionConfig, LlavaOnevisionConfig.get_tag()],
],
Discriminator(get_model_discriminator_value),
]

View File

@ -141,6 +141,7 @@ class ModelProbe(object):
"SD3Transformer2DModel": ModelType.Main,
"CLIPTextModelWithProjection": ModelType.CLIPEmbed,
"SiglipModel": ModelType.SigLIP,
"LlavaOnevisionForConditionalGeneration": ModelType.LlavaOnevision,
}
TYPE2VARIANT: Dict[ModelType, Callable[[str], Optional[AnyVariant]]] = {ModelType.CLIPEmbed: get_clip_variant_type}
@ -767,6 +768,11 @@ class FluxReduxCheckpointProbe(CheckpointProbeBase):
return BaseModelType.Flux
class LlavaOnevisionCheckpointProbe(CheckpointProbeBase):
def get_base_type(self) -> BaseModelType:
raise NotImplementedError()
########################################################
# classes for probing folders
#######################################################
@ -1047,6 +1053,11 @@ class FluxReduxFolderProbe(FolderProbeBase):
raise NotImplementedError()
class LlaveOnevisionFolderProbe(FolderProbeBase):
def get_base_type(self) -> BaseModelType:
return BaseModelType.Any
class T2IAdapterFolderProbe(FolderProbeBase):
def get_base_type(self) -> BaseModelType:
config_file = self.model_path / "config.json"
@ -1082,6 +1093,7 @@ ModelProbe.register_probe("diffusers", ModelType.T2IAdapter, T2IAdapterFolderPro
ModelProbe.register_probe("diffusers", ModelType.SpandrelImageToImage, SpandrelImageToImageFolderProbe)
ModelProbe.register_probe("diffusers", ModelType.SigLIP, SigLIPFolderProbe)
ModelProbe.register_probe("diffusers", ModelType.FluxRedux, FluxReduxFolderProbe)
ModelProbe.register_probe("diffusers", ModelType.LlavaOnevision, LlaveOnevisionFolderProbe)
ModelProbe.register_probe("checkpoint", ModelType.Main, PipelineCheckpointProbe)
ModelProbe.register_probe("checkpoint", ModelType.VAE, VaeCheckpointProbe)
@ -1095,5 +1107,6 @@ ModelProbe.register_probe("checkpoint", ModelType.T2IAdapter, T2IAdapterCheckpoi
ModelProbe.register_probe("checkpoint", ModelType.SpandrelImageToImage, SpandrelImageToImageCheckpointProbe)
ModelProbe.register_probe("checkpoint", ModelType.SigLIP, SigLIPCheckpointProbe)
ModelProbe.register_probe("checkpoint", ModelType.FluxRedux, FluxReduxCheckpointProbe)
ModelProbe.register_probe("checkpoint", ModelType.LlavaOnevision, LlavaOnevisionCheckpointProbe)
ModelProbe.register_probe("onnx", ModelType.ONNX, ONNXFolderProbe)

View File

@ -0,0 +1,32 @@
from pathlib import Path
from typing import Optional
from invokeai.backend.llava_onevision_model import LlavaOnevisionModel
from invokeai.backend.model_manager.config import (
AnyModel,
AnyModelConfig,
BaseModelType,
ModelFormat,
ModelType,
SubModelType,
)
from invokeai.backend.model_manager.load.load_default import ModelLoader
from invokeai.backend.model_manager.load.model_loader_registry import ModelLoaderRegistry
@ModelLoaderRegistry.register(base=BaseModelType.Any, type=ModelType.LlavaOnevision, format=ModelFormat.Diffusers)
class LlavaOnevisionModelLoader(ModelLoader):
"""Class for loading LLaVA Onevision VLLM models."""
def _load_model(
self,
config: AnyModelConfig,
submodel_type: Optional[SubModelType] = None,
) -> AnyModel:
if submodel_type is not None:
raise ValueError("Unexpected submodel requested for LLaVA OneVision model.")
model_path = Path(config.path)
model = LlavaOnevisionModel.load_from_path(model_path)
model.to(dtype=self._torch_dtype)
return model

View File

@ -614,6 +614,16 @@ flux_redux = StarterModel(
)
# endregion
# region LlavaOnevisionModel
llava_onevision = StarterModel(
name="LLaVA Onevision Qwen2 0.5B",
base=BaseModelType.Any,
source="llava-hf/llava-onevision-qwen2-0.5b-ov-hf",
description="LLaVA Onevision VLLM model",
type=ModelType.LlavaOnevision,
)
# endregion
# List of starter models, displayed on the frontend.
# The order/sort of this list is not changed by the frontend - set it how you want it here.
STARTER_MODELS: list[StarterModel] = [
@ -683,6 +693,7 @@ STARTER_MODELS: list[StarterModel] = [
clip_l_encoder,
siglip,
flux_redux,
llava_onevision,
]
sd1_bundle: list[StarterModel] = [

View File

@ -116,6 +116,8 @@
"dontShowMeThese": "Don't show me these",
"editor": "Editor",
"error": "Error",
"error_withCount_one": "{{count}} error",
"error_withCount_other": "{{count}} errors",
"file": "File",
"folder": "Folder",
"format": "format",
@ -844,6 +846,7 @@
"starterModels": "Starter Models",
"starterModelsInModelManager": "Starter Models can be found in Model Manager",
"controlLora": "Control LoRA",
"llavaOnevision": "LLaVA OneVision",
"syncModels": "Sync Models",
"textualInversions": "Textual Inversions",
"triggerPhrases": "Trigger Phrases",
@ -1205,6 +1208,7 @@
"informationalPopoversDisabled": "Informational Popovers Disabled",
"informationalPopoversDisabledDesc": "Informational popovers have been disabled. Enable them in Settings.",
"enableModelDescriptions": "Enable Model Descriptions in Dropdowns",
"enableHighlightFocusedRegions": "Highlight Focused Regions",
"modelDescriptionsDisabled": "Model Descriptions in Dropdowns Disabled",
"modelDescriptionsDisabledDesc": "Model descriptions in dropdowns have been disabled. Enable them in Settings.",
"enableInvisibleWatermark": "Enable Invisible Watermark",
@ -1765,7 +1769,9 @@
"containerPlaceholder": "Empty Container",
"headingPlaceholder": "Empty Heading",
"textPlaceholder": "Empty Text",
"workflowBuilderAlphaWarning": "The workflow builder is currently in alpha. There may be breaking changes before the stable release."
"workflowBuilderAlphaWarning": "The workflow builder is currently in alpha. There may be breaking changes before the stable release.",
"minimum": "Minimum",
"maximum": "Maximum"
}
},
"controlLayers": {
@ -2317,6 +2323,7 @@
"newUserExperience": {
"toGetStartedLocal": "To get started, make sure to download or import models needed to run Invoke. Then, enter a prompt in the box and click <StrongComponent>Invoke</StrongComponent> to generate your first image. Select a prompt template to improve results. You can choose to save your images directly to the <StrongComponent>Gallery</StrongComponent> or edit them to the <StrongComponent>Canvas</StrongComponent>.",
"toGetStarted": "To get started, enter a prompt in the box and click <StrongComponent>Invoke</StrongComponent> to generate your first image. Select a prompt template to improve results. You can choose to save your images directly to the <StrongComponent>Gallery</StrongComponent> or edit them to the <StrongComponent>Canvas</StrongComponent>.",
"toGetStartedWorkflow": "To get started, fill in the fields on the left and press <StrongComponent>Invoke</StrongComponent> to generate your image. Want to explore more workflows? Click the <StrongComponent>folder icon</StrongComponent> next to the workflow title to see a list of other templates you can try.",
"gettingStartedSeries": "Want more guidance? Check out our <LinkComponent>Getting Started Series</LinkComponent> for tips on unlocking the full potential of the Invoke Studio.",
"lowVRAMMode": "For best performance, follow our <LinkComponent>Low VRAM guide</LinkComponent>.",
"noModelsInstalled": "It looks like you don't have any models installed! You can <DownloadStarterModelsButton>download a starter model bundle</DownloadStarterModelsButton> or <ImportModelsButton>import models</ImportModelsButton>."
@ -2324,8 +2331,8 @@
"whatsNew": {
"whatsNewInInvoke": "What's New in Invoke",
"items": [
"Memory Management: New setting for users with Nvidia GPUs to reduce VRAM usage.",
"Performance: Continued improvements to overall application performance and responsiveness."
"Workflows: New and improved Workflow Library.",
"FLUX: Support for FLUX Redux in Workflows and Canvas."
],
"readReleaseNotes": "Read Release Notes",
"watchRecentReleaseVideos": "Watch Recent Release Videos",

View File

@ -110,7 +110,10 @@
"layout": "Schema",
"row": "Riga",
"column": "Colonna",
"saveChanges": "Salva modifiche"
"saveChanges": "Salva modifiche",
"error_withCount_one": "{{count}} errore",
"error_withCount_many": "{{count}} errori",
"error_withCount_other": "{{count}} errori"
},
"gallery": {
"galleryImageSize": "Dimensione dell'immagine",
@ -1771,20 +1774,24 @@
"divider": "Divisore",
"container": "Contenitore",
"text": "Testo",
"numberInput": "Ingresso numerico"
"numberInput": "Ingresso numerico",
"containerRowLayout": "Contenitore (disposizione riga)",
"containerColumnLayout": "Contenitore (disposizione colonna)"
},
"loadMore": "Carica altro",
"searchPlaceholder": "Cerca per nome, descrizione o etichetta",
"filterByTags": "Filtra per etichetta",
"shared": "Condiviso",
"browseWorkflows": "Sfoglia i flussi di lavoro",
"resetTags": "Reimposta le etichette",
"allLoaded": "Tutti i flussi di lavoro caricati",
"saveChanges": "Salva modifiche",
"yourWorkflows": "I tuoi flussi di lavoro",
"recentlyOpened": "Aperto di recente",
"workflowThumbnail": "Miniatura del flusso di lavoro",
"private": "Privato"
"private": "Privato",
"deselectAll": "Deseleziona tutto",
"noRecentWorkflows": "Nessun flusso di lavoro recente",
"view": "Visualizza"
},
"accordions": {
"compositing": {
@ -2334,7 +2341,8 @@
"toGetStarted": "Per iniziare, inserisci un prompt nella casella e fai clic su <StrongComponent>Invoke</StrongComponent> per generare la tua prima immagine. Seleziona un modello di prompt per migliorare i risultati. Puoi scegliere di salvare le tue immagini direttamente nella <StrongComponent>Galleria</StrongComponent> o modificarle nella <StrongComponent>Tela</StrongComponent>.",
"noModelsInstalled": "Sembra che non hai installato alcun modello! Puoi <DownloadStarterModelsButton>scaricare un pacchetto di modelli di avvio</DownloadStarterModelsButton> o <ImportModelsButton>importare modelli</ImportModelsButton>.",
"toGetStartedLocal": "Per iniziare, assicurati di scaricare o importare i modelli necessari per eseguire Invoke. Quindi, inserisci un prompt nella casella e fai clic su <StrongComponent>Invoke</StrongComponent> per generare la tua prima immagine. Seleziona un modello di prompt per migliorare i risultati. Puoi scegliere di salvare le tue immagini direttamente nella <StrongComponent>Galleria</StrongComponent> o modificarle nella <StrongComponent>Tela</StrongComponent>.",
"lowVRAMMode": "Per prestazioni ottimali, segui la nostra <LinkComponent>guida per bassa VRAM</LinkComponent>."
"lowVRAMMode": "Per prestazioni ottimali, segui la nostra <LinkComponent>guida per bassa VRAM</LinkComponent>.",
"toGetStartedWorkflow": "Per iniziare, compila i campi a sinistra e premi <StrongComponent>Invoke</StrongComponent> per generare la tua immagine. Vuoi esplorare altri flussi di lavoro? Fai clic sull'<StrongComponent>icona della cartella</StrongComponent> accanto al titolo del flusso di lavoro per visualizzare un elenco di altri modelli che puoi provare."
},
"whatsNew": {
"whatsNewInInvoke": "Novità in Invoke",

View File

@ -2300,7 +2300,6 @@
"filterByTags": "Lọc Theo Nhãn",
"recentlyOpened": "Mở Gần Đây",
"private": "Cá Nhân",
"resetTags": "Khởi Động Lại Nhãn",
"loadMore": "Tải Thêm"
},
"upscaling": {

View File

@ -19,11 +19,13 @@ import { $store } from 'app/store/nanostores/store';
import { createStore } from 'app/store/store';
import type { PartialAppConfig } from 'app/types/invokeai';
import Loading from 'common/components/Loading/Loading';
import type { WorkflowTagCategory } from 'features/nodes/store/workflowLibrarySlice';
import type { WorkflowSortOption, WorkflowTagCategory } from 'features/nodes/store/workflowLibrarySlice';
import {
$workflowLibraryCategoriesOptions,
$workflowLibrarySortOptions,
$workflowLibraryTagCategoriesOptions,
DEFAULT_WORKFLOW_LIBRARY_CATEGORIES,
DEFAULT_WORKFLOW_LIBRARY_SORT_OPTIONS,
DEFAULT_WORKFLOW_LIBRARY_TAG_CATEGORIES,
} from 'features/nodes/store/workflowLibrarySlice';
import type { WorkflowCategory } from 'features/nodes/types/workflow';
@ -55,6 +57,7 @@ interface Props extends PropsWithChildren {
logo?: ReactNode;
workflowCategories?: WorkflowCategory[];
workflowTagCategories?: WorkflowTagCategory[];
workflowSortOptions?: WorkflowSortOption[];
loggingOverrides?: LoggingOverrides;
}
@ -76,6 +79,7 @@ const InvokeAIUI = ({
logo,
workflowCategories,
workflowTagCategories,
workflowSortOptions,
loggingOverrides,
}: Props) => {
useLayoutEffect(() => {
@ -221,6 +225,16 @@ const InvokeAIUI = ({
};
}, [workflowTagCategories]);
useEffect(() => {
if (workflowSortOptions) {
$workflowLibrarySortOptions.set(workflowSortOptions);
}
return () => {
$workflowLibrarySortOptions.set(DEFAULT_WORKFLOW_LIBRARY_SORT_OPTIONS);
};
}, [workflowSortOptions]);
useEffect(() => {
if (socketOptions) {
$socketOptions.set(socketOptions);

View File

@ -0,0 +1,43 @@
import type { Selector } from '@reduxjs/toolkit';
import { useAppStore } from 'app/store/nanostores/store';
import type { RootState } from 'app/store/store';
import { useEffect, useState } from 'react';
/**
* A hook that returns a debounced value from the app state.
*
* @param selector The redux selector
* @param debounceMs The debounce time in milliseconds
* @returns The debounced value
*/
export const useDebouncedAppSelector = <T>(selector: Selector<RootState, T>, debounceMs: number = 300) => {
const store = useAppStore();
const [value, setValue] = useState<T>(() => selector(store.getState()));
useEffect(() => {
let prevValue = selector(store.getState());
let timeout: number | null = null;
const unsubscribe = store.subscribe(() => {
const value = selector(store.getState());
if (value !== prevValue) {
if (timeout !== null) {
window.clearTimeout(timeout);
}
timeout = window.setTimeout(() => {
setValue(value);
prevValue = value;
}, debounceMs);
}
});
return () => {
unsubscribe();
if (timeout !== null) {
window.clearTimeout(timeout);
}
};
}, [debounceMs, selector, store]);
return value;
};

View File

@ -27,7 +27,8 @@ export type AppFeature =
| 'bulkDownload'
| 'starterModels'
| 'hfToken'
| 'retryQueueItem';
| 'retryQueueItem'
| 'cancelAndClearAll';
/**
* A disable-able Stable Diffusion feature
*/

View File

@ -0,0 +1,56 @@
import { Box, type BoxProps, type SystemStyleObject } from '@invoke-ai/ui-library';
import { useAppSelector } from 'app/store/storeHooks';
import { type FocusRegionName, useFocusRegion, useIsRegionFocused } from 'common/hooks/focus';
import { selectSystemShouldEnableHighlightFocusedRegions } from 'features/system/store/systemSlice';
import { memo, useMemo, useRef } from 'react';
interface FocusRegionWrapperProps extends BoxProps {
region: FocusRegionName;
focusOnMount?: boolean;
}
const FOCUS_REGION_STYLES: SystemStyleObject = {
position: 'relative',
'&[data-highlighted="true"]::after': {
borderColor: 'blue.700',
},
'&::after': {
content: '""',
position: 'absolute',
inset: 0,
zIndex: 1,
borderRadius: 'base',
border: '2px solid',
borderColor: 'transparent',
pointerEvents: 'none',
transition: 'border-color 0.1s ease-in-out',
},
};
export const FocusRegionWrapper = memo(
({ region, focusOnMount = false, sx, children, ...boxProps }: FocusRegionWrapperProps) => {
const shouldHighlightFocusedRegions = useAppSelector(selectSystemShouldEnableHighlightFocusedRegions);
const ref = useRef<HTMLDivElement>(null);
const options = useMemo(() => ({ focusOnMount }), [focusOnMount]);
useFocusRegion(region, ref, options);
const isFocused = useIsRegionFocused(region);
const isHighlighted = isFocused && shouldHighlightFocusedRegions;
return (
<Box
ref={ref}
tabIndex={-1}
sx={useMemo(() => ({ ...FOCUS_REGION_STYLES, ...sx }), [sx])}
data-highlighted={isHighlighted}
{...boxProps}
>
{children}
</Box>
);
}
);
FocusRegionWrapper.displayName = 'FocusRegionWrapper';

View File

@ -30,7 +30,7 @@ const log = logger('system');
/**
* The names of the focus regions.
*/
type FocusRegionName = 'gallery' | 'layers' | 'canvas' | 'workflows' | 'viewer';
export type FocusRegionName = 'gallery' | 'layers' | 'canvas' | 'workflows' | 'viewer';
/**
* A map of focus regions to the elements that are part of that region.

View File

@ -1,28 +1,33 @@
import { Divider, Flex } from '@invoke-ai/ui-library';
import { Divider, Flex, type SystemStyleObject } from '@invoke-ai/ui-library';
import { useAppSelector } from 'app/store/storeHooks';
import { useFocusRegion } from 'common/hooks/focus';
import { FocusRegionWrapper } from 'common/components/FocusRegionWrapper';
import { CanvasAddEntityButtons } from 'features/controlLayers/components/CanvasAddEntityButtons';
import { CanvasEntityList } from 'features/controlLayers/components/CanvasEntityList/CanvasEntityList';
import { EntityListSelectedEntityActionBar } from 'features/controlLayers/components/CanvasEntityList/EntityListSelectedEntityActionBar';
import { selectHasEntities } from 'features/controlLayers/store/selectors';
import { memo, useRef } from 'react';
import { memo } from 'react';
import { ParamDenoisingStrength } from './ParamDenoisingStrength';
const FOCUS_REGION_STYLES: SystemStyleObject = {
width: 'full',
height: 'full',
};
export const CanvasLayersPanelContent = memo(() => {
const hasEntities = useAppSelector(selectHasEntities);
const layersPanelFocusRef = useRef<HTMLDivElement>(null);
useFocusRegion('layers', layersPanelFocusRef);
return (
<Flex ref={layersPanelFocusRef} flexDir="column" gap={2} w="full" h="full">
<EntityListSelectedEntityActionBar />
<Divider py={0} />
<ParamDenoisingStrength />
<Divider py={0} />
{!hasEntities && <CanvasAddEntityButtons />}
{hasEntities && <CanvasEntityList />}
</Flex>
<FocusRegionWrapper region="layers" sx={FOCUS_REGION_STYLES}>
<Flex flexDir="column" gap={2} w="full" h="full">
<EntityListSelectedEntityActionBar />
<Divider py={0} />
<ParamDenoisingStrength />
<Divider py={0} />
{!hasEntities && <CanvasAddEntityButtons />}
{hasEntities && <CanvasEntityList />}
</Flex>
</FocusRegionWrapper>
);
});

View File

@ -1,6 +1,14 @@
import { ContextMenu, Flex, IconButton, Menu, MenuButton, MenuList } from '@invoke-ai/ui-library';
import {
ContextMenu,
Flex,
IconButton,
Menu,
MenuButton,
MenuList,
type SystemStyleObject,
} from '@invoke-ai/ui-library';
import { useAppSelector } from 'app/store/storeHooks';
import { useFocusRegion } from 'common/hooks/focus';
import { FocusRegionWrapper } from 'common/components/FocusRegionWrapper';
import { CanvasAlertsPreserveMask } from 'features/controlLayers/components/CanvasAlerts/CanvasAlertsPreserveMask';
import { CanvasAlertsSelectedEntityStatus } from 'features/controlLayers/components/CanvasAlerts/CanvasAlertsSelectedEntityStatus';
import { CanvasAlertsSendingToGallery } from 'features/controlLayers/components/CanvasAlerts/CanvasAlertsSendingTo';
@ -18,11 +26,16 @@ import { Transform } from 'features/controlLayers/components/Transform/Transform
import { CanvasManagerProviderGate } from 'features/controlLayers/contexts/CanvasManagerProviderGate';
import { selectDynamicGrid, selectShowHUD } from 'features/controlLayers/store/canvasSettingsSlice';
import { GatedImageViewer } from 'features/gallery/components/ImageViewer/ImageViewer';
import { memo, useCallback, useRef } from 'react';
import { memo, useCallback } from 'react';
import { PiDotsThreeOutlineVerticalFill } from 'react-icons/pi';
import { CanvasAlertsInvocationProgress } from './CanvasAlerts/CanvasAlertsInvocationProgress';
const FOCUS_REGION_STYLES: SystemStyleObject = {
width: 'full',
height: 'full',
};
const MenuContent = () => {
return (
<CanvasManagerProviderGate>
@ -35,7 +48,6 @@ const MenuContent = () => {
};
export const CanvasMainPanelContent = memo(() => {
const ref = useRef<HTMLDivElement>(null);
const dynamicGrid = useAppSelector(selectDynamicGrid);
const showHUD = useAppSelector(selectShowHUD);
@ -43,82 +55,81 @@ export const CanvasMainPanelContent = memo(() => {
return <MenuContent />;
}, []);
useFocusRegion('canvas', ref);
return (
<Flex
tabIndex={-1}
ref={ref}
borderRadius="base"
position="relative"
flexDirection="column"
height="full"
width="full"
gap={2}
alignItems="center"
justifyContent="center"
overflow="hidden"
>
<CanvasManagerProviderGate>
<CanvasToolbar />
</CanvasManagerProviderGate>
<ContextMenu<HTMLDivElement> renderMenu={renderMenu} withLongPress={false}>
{(ref) => (
<Flex
ref={ref}
position="relative"
w="full"
h="full"
bg={dynamicGrid ? 'base.850' : 'base.900'}
borderRadius="base"
overflow="hidden"
>
<InvokeCanvasComponent />
<CanvasManagerProviderGate>
<Flex
position="absolute"
flexDir="column"
top={1}
insetInlineStart={1}
pointerEvents="none"
gap={2}
alignItems="flex-start"
>
{showHUD && <CanvasHUD />}
<CanvasAlertsSelectedEntityStatus />
<CanvasAlertsPreserveMask />
<CanvasAlertsSendingToGallery />
<CanvasAlertsInvocationProgress />
</Flex>
<Flex position="absolute" top={1} insetInlineEnd={1}>
<Menu>
<MenuButton as={IconButton} icon={<PiDotsThreeOutlineVerticalFill />} colorScheme="base" />
<MenuContent />
</Menu>
</Flex>
</CanvasManagerProviderGate>
</Flex>
)}
</ContextMenu>
<Flex position="absolute" bottom={4} gap={2} align="center" justify="center">
<FocusRegionWrapper region="canvas" sx={FOCUS_REGION_STYLES}>
<Flex
tabIndex={-1}
borderRadius="base"
position="relative"
flexDirection="column"
height="full"
width="full"
gap={2}
alignItems="center"
justifyContent="center"
overflow="hidden"
>
<CanvasManagerProviderGate>
<StagingAreaIsStagingGate>
<StagingAreaToolbar />
</StagingAreaIsStagingGate>
<CanvasToolbar />
</CanvasManagerProviderGate>
</Flex>
<Flex position="absolute" bottom={4}>
<ContextMenu<HTMLDivElement> renderMenu={renderMenu} withLongPress={false}>
{(ref) => (
<Flex
ref={ref}
position="relative"
w="full"
h="full"
bg={dynamicGrid ? 'base.850' : 'base.900'}
borderRadius="base"
overflow="hidden"
>
<InvokeCanvasComponent />
<CanvasManagerProviderGate>
<Flex
position="absolute"
flexDir="column"
top={1}
insetInlineStart={1}
pointerEvents="none"
gap={2}
alignItems="flex-start"
>
{showHUD && <CanvasHUD />}
<CanvasAlertsSelectedEntityStatus />
<CanvasAlertsPreserveMask />
<CanvasAlertsSendingToGallery />
<CanvasAlertsInvocationProgress />
</Flex>
<Flex position="absolute" top={1} insetInlineEnd={1}>
<Menu>
<MenuButton as={IconButton} icon={<PiDotsThreeOutlineVerticalFill />} colorScheme="base" />
<MenuContent />
</Menu>
</Flex>
</CanvasManagerProviderGate>
</Flex>
)}
</ContextMenu>
<Flex position="absolute" bottom={4} gap={2} align="center" justify="center">
<CanvasManagerProviderGate>
<StagingAreaIsStagingGate>
<StagingAreaToolbar />
</StagingAreaIsStagingGate>
</CanvasManagerProviderGate>
</Flex>
<Flex position="absolute" bottom={4}>
<CanvasManagerProviderGate>
<Filter />
<Transform />
<SelectObject />
</CanvasManagerProviderGate>
</Flex>
<CanvasManagerProviderGate>
<Filter />
<Transform />
<SelectObject />
<CanvasDropArea />
</CanvasManagerProviderGate>
<GatedImageViewer />
</Flex>
<CanvasManagerProviderGate>
<CanvasDropArea />
</CanvasManagerProviderGate>
<GatedImageViewer />
</Flex>
</FocusRegionWrapper>
);
});

View File

@ -7,6 +7,7 @@ import { Box, Flex, Heading } from '@invoke-ai/ui-library';
import { useStore } from '@nanostores/react';
import { getStore } from 'app/store/nanostores/store';
import { useAppSelector } from 'app/store/storeHooks';
import { $focusedRegion } from 'common/hooks/focus';
import { setFileToPaste } from 'features/controlLayers/components/CanvasPasteModal';
import { DndDropOverlay } from 'features/dnd/DndDropOverlay';
import type { DndTargetState } from 'features/dnd/types';
@ -99,10 +100,18 @@ export const FullscreenDropzone = memo(() => {
return;
}
const focusedRegion = $focusedRegion.get();
// While on the canvas tab and when pasting a single image, canvas may want to create a new layer. Let it handle
// the paste event.
const [firstImageFile] = files;
if (!isImageViewerOpen && activeTab === 'canvas' && files.length === 1 && firstImageFile) {
if (
focusedRegion === 'canvas' &&
!isImageViewerOpen &&
activeTab === 'canvas' &&
files.length === 1 &&
firstImageFile
) {
setFileToPaste(firstImageFile);
return;
}

View File

@ -1,6 +1,15 @@
import { Box, Button, Collapse, Divider, Flex, IconButton, useDisclosure } from '@invoke-ai/ui-library';
import {
Box,
Button,
Collapse,
Divider,
Flex,
IconButton,
type SystemStyleObject,
useDisclosure,
} from '@invoke-ai/ui-library';
import { useAppDispatch, useAppSelector } from 'app/store/storeHooks';
import { useFocusRegion } from 'common/hooks/focus';
import { FocusRegionWrapper } from 'common/components/FocusRegionWrapper';
import { GalleryHeader } from 'features/gallery/components/GalleryHeader';
import { selectBoardSearchText } from 'features/gallery/store/gallerySelectors';
import { boardSearchTextChanged } from 'features/gallery/store/gallerySlice';
@ -20,14 +29,20 @@ import { Gallery } from './Gallery';
const COLLAPSE_STYLES: CSSProperties = { flexShrink: 0, minHeight: 0 };
const FOCUS_REGION_STYLES: SystemStyleObject = {
width: 'full',
height: 'full',
position: 'relative',
flexDirection: 'column',
display: 'flex',
};
const GalleryPanelContent = () => {
const { t } = useTranslation();
const boardSearchText = useAppSelector(selectBoardSearchText);
const dispatch = useAppDispatch();
const boardSearchDisclosure = useDisclosure({ defaultIsOpen: !!boardSearchText.length });
const imperativePanelGroupRef = useRef<ImperativePanelGroupHandle>(null);
const galleryPanelFocusRef = useRef<HTMLDivElement>(null);
useFocusRegion('gallery', galleryPanelFocusRef);
const boardsListPanelOptions = useMemo<UsePanelOptions>(
() => ({
@ -50,7 +65,7 @@ const GalleryPanelContent = () => {
}, [boardSearchText.length, boardSearchDisclosure, boardsListPanel, dispatch]);
return (
<Flex ref={galleryPanelFocusRef} position="relative" flexDirection="column" h="full" w="full" tabIndex={-1}>
<FocusRegionWrapper region="gallery" sx={FOCUS_REGION_STYLES}>
<Flex alignItems="center" justifyContent="space-between" w="full">
<Flex flexGrow={1} flexBasis={0}>
<Button
@ -99,7 +114,7 @@ const GalleryPanelContent = () => {
<Gallery />
</Panel>
</PanelGroup>
</Flex>
</FocusRegionWrapper>
);
};

View File

@ -1,6 +1,6 @@
import { Box, Flex, IconButton } from '@invoke-ai/ui-library';
import { Box, IconButton, type SystemStyleObject } from '@invoke-ai/ui-library';
import { useAppSelector } from 'app/store/storeHooks';
import { useFocusRegion } from 'common/hooks/focus';
import { FocusRegionWrapper } from 'common/components/FocusRegionWrapper';
import { useAssertSingleton } from 'common/hooks/useAssertSingleton';
import { CompareToolbar } from 'features/gallery/components/ImageViewer/CompareToolbar';
import CurrentImagePreview from 'features/gallery/components/ImageViewer/CurrentImagePreview';
@ -9,7 +9,7 @@ import { ImageComparisonDroppable } from 'features/gallery/components/ImageViewe
import { ViewerToolbar } from 'features/gallery/components/ImageViewer/ViewerToolbar';
import { selectHasImageToCompare } from 'features/gallery/store/gallerySelectors';
import type { ReactNode } from 'react';
import { memo, useRef } from 'react';
import { memo } from 'react';
import { useHotkeys } from 'react-hotkeys-hook';
import { useTranslation } from 'react-i18next';
import { PiXBold } from 'react-icons/pi';
@ -25,29 +25,24 @@ const useFocusRegionOptions = {
focusOnMount: true,
};
const FOCUS_REGION_STYLES: SystemStyleObject = {
width: 'full',
height: 'full',
position: 'absolute',
flexDirection: 'column',
inset: 0,
alignItems: 'center',
justifyContent: 'center',
overflow: 'hidden',
};
export const ImageViewer = memo(({ closeButton }: Props) => {
useAssertSingleton('ImageViewer');
const hasImageToCompare = useAppSelector(selectHasImageToCompare);
const [containerRef, containerDims] = useMeasure<HTMLDivElement>();
const ref = useRef<HTMLDivElement>(null);
useFocusRegion('viewer', ref, useFocusRegionOptions);
return (
<Flex
ref={ref}
tabIndex={-1}
layerStyle="first"
borderRadius="base"
position="absolute"
flexDirection="column"
top={0}
right={0}
bottom={0}
left={0}
alignItems="center"
justifyContent="center"
overflow="hidden"
>
<FocusRegionWrapper region="viewer" sx={FOCUS_REGION_STYLES} layerStyle="first" {...useFocusRegionOptions}>
{hasImageToCompare && <CompareToolbar />}
{!hasImageToCompare && <ViewerToolbar closeButton={closeButton} />}
<Box ref={containerRef} w="full" h="full" p={2}>
@ -55,7 +50,7 @@ export const ImageViewer = memo(({ closeButton }: Props) => {
{hasImageToCompare && <ImageComparison containerDims={containerDims} />}
</Box>
<ImageComparisonDroppable />
</Flex>
</FocusRegionWrapper>
);
});

View File

@ -7,6 +7,7 @@ import { LOADING_SYMBOL, useHasImages } from 'features/gallery/hooks/useHasImage
import { $installModelsTab } from 'features/modelManagerV2/subpanels/InstallModels';
import { useFeatureStatus } from 'features/system/hooks/useFeatureStatus';
import { selectIsLocal } from 'features/system/store/configSlice';
import { selectActiveTab } from 'features/ui/store/uiSelectors';
import { setActiveTab } from 'features/ui/store/uiSlice';
import type { PropsWithChildren } from 'react';
import { memo, useCallback, useMemo } from 'react';
@ -19,6 +20,7 @@ export const NoContentForViewer = memo(() => {
const [mainModels, { data }] = useMainModels();
const isLocal = useAppSelector(selectIsLocal);
const isEnabled = useFeatureStatus('starterModels');
const activeTab = useAppSelector(selectActiveTab);
const { t } = useTranslation();
const showStarterBundles = useMemo(() => {
@ -38,10 +40,10 @@ export const NoContentForViewer = memo(() => {
}
return (
<Flex flexDir="column" gap={8} alignItems="center" textAlign="center" maxW="600px">
<Flex flexDir="column" gap={8} alignItems="center" textAlign="center" maxW="400px">
<InvokeLogoIcon w={32} h={32} />
<Flex flexDir="column" gap={4} alignItems="center" textAlign="center">
{isLocal ? <GetStartedLocal /> : <GetStartedCommercial />}
{isLocal ? <GetStartedLocal /> : activeTab === 'workflows' ? <GetStartedWorkflows /> : <GetStartedCommercial />}
{showStarterBundles && <StarterBundlesCallout />}
<Divider />
<GettingStartedVideosCallout />
@ -103,6 +105,14 @@ const GetStartedCommercial = () => {
);
};
const GetStartedWorkflows = () => {
return (
<Text fontSize="md" color="base.200">
<Trans i18nKey="newUserExperience.toGetStartedWorkflow" components={{ StrongComponent }} />
</Text>
);
};
const GettingStartedVideosCallout = () => {
return (
<Text fontSize="md" color="base.200">

View File

@ -16,6 +16,7 @@ import {
useEmbeddingModels,
useFluxReduxModels,
useIPAdapterModels,
useLLaVAModels,
useLoRAModels,
useMainModels,
useRefinerModels,
@ -126,6 +127,12 @@ const ModelList = () => {
[fluxReduxModels, searchTerm, filteredModelType]
);
const [llavaOneVisionModels, { isLoading: isLoadingLlavaOneVisionModels }] = useLLaVAModels();
const filteredLlavaOneVisionModels = useMemo(
() => modelsFilter(llavaOneVisionModels, searchTerm, filteredModelType),
[llavaOneVisionModels, searchTerm, filteredModelType]
);
const totalFilteredModels = useMemo(() => {
return (
filteredMainModels.length +
@ -236,6 +243,17 @@ const ModelList = () => {
{!isLoadingClipEmbedModels && filteredClipEmbedModels.length > 0 && (
<ModelListWrapper title={t('modelManager.clipEmbed')} modelList={filteredClipEmbedModels} key="clip-embed" />
)}
{/* LLaVA OneVision List */}
{isLoadingLlavaOneVisionModels && <FetchingModelsLoader loadingMessage="Loading LLaVA OneVision Models..." />}
{!isLoadingLlavaOneVisionModels && filteredLlavaOneVisionModels.length > 0 && (
<ModelListWrapper
title={t('modelManager.llavaOnevision')}
modelList={filteredLlavaOneVisionModels}
key="llava-onevision"
/>
)}
{/* Spandrel Image to Image List */}
{isLoadingSpandrelImageToImageModels && (
<FetchingModelsLoader loadingMessage="Loading Image-to-Image Models..." />

View File

@ -27,6 +27,7 @@ export const ModelTypeFilter = memo(() => {
control_lora: t('modelManager.controlLora'),
siglip: t('modelManager.siglip'),
flux_redux: t('modelManager.fluxRedux'),
llava_onevision: t('modelManager.llavaOnevision'),
}),
[t]
);

View File

@ -1,10 +1,10 @@
import { Flex } from '@invoke-ai/ui-library';
import type { SystemStyleObject } from '@invoke-ai/ui-library';
import { FocusRegionWrapper } from 'common/components/FocusRegionWrapper';
import { IAINoContentFallback } from 'common/components/IAIImageFallback';
import { useFocusRegion } from 'common/hooks/focus';
import { AddNodeCmdk } from 'features/nodes/components/flow/AddNodeCmdk/AddNodeCmdk';
import TopPanel from 'features/nodes/components/flow/panels/TopPanel/TopPanel';
import WorkflowEditorSettings from 'features/nodes/components/flow/panels/TopRightPanel/WorkflowEditorSettings';
import { memo, useRef } from 'react';
import { memo } from 'react';
import { useTranslation } from 'react-i18next';
import { PiFlowArrowBold } from 'react-icons/pi';
import { useGetOpenAPISchemaQuery } from 'services/api/endpoints/appInfo';
@ -13,24 +13,20 @@ import { Flow } from './flow/Flow';
import BottomLeftPanel from './flow/panels/BottomLeftPanel/BottomLeftPanel';
import MinimapPanel from './flow/panels/MinimapPanel/MinimapPanel';
const FOCUS_REGION_STYLES: SystemStyleObject = {
position: 'relative',
width: 'full',
height: 'full',
alignItems: 'center',
justifyContent: 'center',
};
const NodeEditor = () => {
const { data, isLoading } = useGetOpenAPISchemaQuery();
const { t } = useTranslation();
const ref = useRef<HTMLDivElement>(null);
useFocusRegion('workflows', ref);
return (
<Flex
tabIndex={-1}
ref={ref}
layerStyle="first"
position="relative"
width="full"
height="full"
borderRadius="base"
alignItems="center"
justifyContent="center"
>
<FocusRegionWrapper region="workflows" layerStyle="first" sx={FOCUS_REGION_STYLES}>
{data && (
<>
<Flow />
@ -42,7 +38,7 @@ const NodeEditor = () => {
)}
<WorkflowEditorSettings />
{isLoading && <IAINoContentFallback label={t('nodes.loadingNodes')} icon={PiFlowArrowBold} />}
</Flex>
</FocusRegionWrapper>
);
};

View File

@ -252,7 +252,7 @@ export const Flow = memo(() => {
id: 'pasteSelection',
category: 'workflows',
callback: pasteSelection,
options: { preventDefault: true },
options: { enabled: isWorkflowsFocused, preventDefault: true },
dependencies: [pasteSelection],
});
@ -260,7 +260,7 @@ export const Flow = memo(() => {
id: 'pasteSelectionWithEdges',
category: 'workflows',
callback: pasteSelectionWithEdges,
options: { preventDefault: true },
options: { enabled: isWorkflowsFocused, preventDefault: true },
dependencies: [pasteSelectionWithEdges],
});
@ -270,7 +270,7 @@ export const Flow = memo(() => {
callback: () => {
dispatch(undo());
},
options: { enabled: mayUndo, preventDefault: true },
options: { enabled: isWorkflowsFocused && mayUndo, preventDefault: true },
dependencies: [mayUndo],
});
@ -280,7 +280,7 @@ export const Flow = memo(() => {
callback: () => {
dispatch(redo());
},
options: { enabled: mayRedo, preventDefault: true },
options: { enabled: isWorkflowsFocused && mayRedo, preventDefault: true },
dependencies: [mayRedo],
});

View File

@ -3,24 +3,35 @@ import { useFloatField } from 'features/nodes/components/flow/nodes/Invocation/f
import type { FieldComponentProps } from 'features/nodes/components/flow/nodes/Invocation/fields/inputs/types';
import { NO_DRAG_CLASS } from 'features/nodes/types/constants';
import type { FloatFieldInputInstance, FloatFieldInputTemplate } from 'features/nodes/types/field';
import type { NodeFieldFloatSettings } from 'features/nodes/types/workflow';
import { memo } from 'react';
export const FloatFieldInput = memo((props: FieldComponentProps<FloatFieldInputInstance, FloatFieldInputTemplate>) => {
const { defaultValue, onChange, value, min, max, step, fineStep } = useFloatField(props);
export const FloatFieldInput = memo(
(
props: FieldComponentProps<FloatFieldInputInstance, FloatFieldInputTemplate, { settings?: NodeFieldFloatSettings }>
) => {
const { nodeId, field, fieldTemplate, settings } = props;
const { defaultValue, onChange, min, max, step, fineStep } = useFloatField(
nodeId,
field.name,
fieldTemplate,
settings
);
return (
<CompositeNumberInput
defaultValue={defaultValue}
onChange={onChange}
value={value}
min={min}
max={max}
step={step}
fineStep={fineStep}
className={NO_DRAG_CLASS}
flex="1 1 0"
/>
);
});
return (
<CompositeNumberInput
defaultValue={defaultValue}
onChange={onChange}
value={field.value}
min={min}
max={max}
step={step}
fineStep={fineStep}
className={NO_DRAG_CLASS}
flex="1 1 0"
/>
);
}
);
FloatFieldInput.displayName = 'FloatFieldInput ';

View File

@ -3,18 +3,27 @@ import { useFloatField } from 'features/nodes/components/flow/nodes/Invocation/f
import type { FieldComponentProps } from 'features/nodes/components/flow/nodes/Invocation/fields/inputs/types';
import { NO_DRAG_CLASS } from 'features/nodes/types/constants';
import type { FloatFieldInputInstance, FloatFieldInputTemplate } from 'features/nodes/types/field';
import type { NodeFieldFloatSettings } from 'features/nodes/types/workflow';
import { memo } from 'react';
export const FloatFieldInputAndSlider = memo(
(props: FieldComponentProps<FloatFieldInputInstance, FloatFieldInputTemplate>) => {
const { defaultValue, onChange, value, min, max, step, fineStep } = useFloatField(props);
(
props: FieldComponentProps<FloatFieldInputInstance, FloatFieldInputTemplate, { settings?: NodeFieldFloatSettings }>
) => {
const { nodeId, field, fieldTemplate, settings } = props;
const { defaultValue, onChange, min, max, step, fineStep } = useFloatField(
nodeId,
field.name,
fieldTemplate,
settings
);
return (
<>
<CompositeSlider
defaultValue={defaultValue}
onChange={onChange}
value={value}
value={field.value}
min={min}
max={max}
step={step}
@ -27,7 +36,7 @@ export const FloatFieldInputAndSlider = memo(
<CompositeNumberInput
defaultValue={defaultValue}
onChange={onChange}
value={value}
value={field.value}
min={min}
max={max}
step={step}

View File

@ -3,26 +3,37 @@ import { useFloatField } from 'features/nodes/components/flow/nodes/Invocation/f
import type { FieldComponentProps } from 'features/nodes/components/flow/nodes/Invocation/fields/inputs/types';
import { NO_DRAG_CLASS } from 'features/nodes/types/constants';
import type { FloatFieldInputInstance, FloatFieldInputTemplate } from 'features/nodes/types/field';
import type { NodeFieldFloatSettings } from 'features/nodes/types/workflow';
import { memo } from 'react';
export const FloatFieldSlider = memo((props: FieldComponentProps<FloatFieldInputInstance, FloatFieldInputTemplate>) => {
const { defaultValue, onChange, value, min, max, step, fineStep } = useFloatField(props);
export const FloatFieldSlider = memo(
(
props: FieldComponentProps<FloatFieldInputInstance, FloatFieldInputTemplate, { settings?: NodeFieldFloatSettings }>
) => {
const { nodeId, field, fieldTemplate, settings } = props;
const { defaultValue, onChange, min, max, step, fineStep } = useFloatField(
nodeId,
field.name,
fieldTemplate,
settings
);
return (
<CompositeSlider
defaultValue={defaultValue}
onChange={onChange}
value={value}
min={min}
max={max}
step={step}
fineStep={fineStep}
className={NO_DRAG_CLASS}
marks
withThumbTooltip
flex="1 1 0"
/>
);
});
return (
<CompositeSlider
defaultValue={defaultValue}
onChange={onChange}
value={field.value}
min={min}
max={max}
step={step}
fineStep={fineStep}
className={NO_DRAG_CLASS}
marks
withThumbTooltip
flex="1 1 0"
/>
);
}
);
FloatFieldSlider.displayName = 'FloatFieldSlider ';

View File

@ -1,65 +1,89 @@
import { NUMPY_RAND_MAX } from 'app/constants';
import { useAppDispatch } from 'app/store/storeHooks';
import type { FieldComponentProps } from 'features/nodes/components/flow/nodes/Invocation/fields/inputs/types';
import { roundDownToMultiple, roundUpToMultiple } from 'common/util/roundDownToMultiple';
import { fieldFloatValueChanged } from 'features/nodes/store/nodesSlice';
import type { FloatFieldInputInstance, FloatFieldInputTemplate } from 'features/nodes/types/field';
import type { FloatFieldInputTemplate } from 'features/nodes/types/field';
import { constrainNumber } from 'features/nodes/util/constrainNumber';
import { isNil } from 'lodash-es';
import { useCallback, useMemo } from 'react';
export const useFloatField = (props: FieldComponentProps<FloatFieldInputInstance, FloatFieldInputTemplate>) => {
const { nodeId, field, fieldTemplate } = props;
export const useFloatField = (
nodeId: string,
fieldName: string,
fieldTemplate: FloatFieldInputTemplate,
overrides: { min?: number; max?: number; step?: number } = {}
) => {
const { min: overrideMin, max: overrideMax, step: overrideStep } = overrides;
const dispatch = useAppDispatch();
const onChange = useCallback(
(value: number) => {
dispatch(fieldFloatValueChanged({ nodeId, fieldName: field.name, value }));
},
[dispatch, field.name, nodeId]
);
const min = useMemo(() => {
let min = -NUMPY_RAND_MAX;
if (!isNil(fieldTemplate.minimum)) {
min = fieldTemplate.minimum;
}
if (!isNil(fieldTemplate.exclusiveMinimum)) {
min = fieldTemplate.exclusiveMinimum + 0.01;
}
return min;
}, [fieldTemplate.exclusiveMinimum, fieldTemplate.minimum]);
const max = useMemo(() => {
let max = NUMPY_RAND_MAX;
if (!isNil(fieldTemplate.maximum)) {
max = fieldTemplate.maximum;
}
if (!isNil(fieldTemplate.exclusiveMaximum)) {
max = fieldTemplate.exclusiveMaximum - 0.01;
}
return max;
}, [fieldTemplate.exclusiveMaximum, fieldTemplate.maximum]);
const step = useMemo(() => {
if (overrideStep !== undefined) {
return overrideStep;
}
if (isNil(fieldTemplate.multipleOf)) {
return 0.1;
}
return fieldTemplate.multipleOf;
}, [fieldTemplate.multipleOf]);
}, [fieldTemplate.multipleOf, overrideStep]);
const fineStep = useMemo(() => {
if (overrideStep !== undefined) {
return overrideStep;
}
if (isNil(fieldTemplate.multipleOf)) {
return 0.01;
}
return fieldTemplate.multipleOf;
}, [fieldTemplate.multipleOf]);
}, [fieldTemplate.multipleOf, overrideStep]);
const min = useMemo(() => {
let min = -NUMPY_RAND_MAX;
if (overrideMin !== undefined) {
min = overrideMin;
} else if (!isNil(fieldTemplate.minimum)) {
min = fieldTemplate.minimum;
} else if (!isNil(fieldTemplate.exclusiveMinimum)) {
min = fieldTemplate.exclusiveMinimum + 0.01;
}
return roundUpToMultiple(min, step);
}, [fieldTemplate.exclusiveMinimum, fieldTemplate.minimum, overrideMin, step]);
const max = useMemo(() => {
let max = NUMPY_RAND_MAX;
if (overrideMax !== undefined) {
max = overrideMax;
} else if (!isNil(fieldTemplate.maximum)) {
max = fieldTemplate.maximum;
} else if (!isNil(fieldTemplate.exclusiveMaximum)) {
max = fieldTemplate.exclusiveMaximum - 0.01;
}
return roundDownToMultiple(max, step);
}, [fieldTemplate.exclusiveMaximum, fieldTemplate.maximum, overrideMax, step]);
const constrainValue = useCallback(
(v: number) =>
constrainNumber(v, { min, max, step: step }, { min: overrideMin, max: overrideMax, step: overrideStep }),
[max, min, overrideMax, overrideMin, overrideStep, step]
);
const onChange = useCallback(
(value: number) => {
dispatch(fieldFloatValueChanged({ nodeId, fieldName, value }));
},
[dispatch, fieldName, nodeId]
);
return {
defaultValue: fieldTemplate.default,
onChange,
value: field.value,
min,
max,
step,
fineStep,
constrainValue,
};
};

View File

@ -62,6 +62,8 @@ import {
isIntegerGeneratorFieldInputTemplate,
isIPAdapterModelFieldInputInstance,
isIPAdapterModelFieldInputTemplate,
isLLaVAModelFieldInputInstance,
isLLaVAModelFieldInputTemplate,
isLoRAModelFieldInputInstance,
isLoRAModelFieldInputTemplate,
isMainModelFieldInputInstance,
@ -95,6 +97,8 @@ import {
} from 'features/nodes/types/field';
import type { NodeFieldElement } from 'features/nodes/types/workflow';
import { memo } from 'react';
import type { Equals } from 'tsafe';
import { assert } from 'tsafe';
import BoardFieldInputComponent from './inputs/BoardFieldInputComponent';
import BooleanFieldInputComponent from './inputs/BooleanFieldInputComponent';
@ -110,6 +114,7 @@ import FluxReduxModelFieldInputComponent from './inputs/FluxReduxModelFieldInput
import FluxVAEModelFieldInputComponent from './inputs/FluxVAEModelFieldInputComponent';
import ImageFieldInputComponent from './inputs/ImageFieldInputComponent';
import IPAdapterModelFieldInputComponent from './inputs/IPAdapterModelFieldInputComponent';
import LLaVAModelFieldInputComponent from './inputs/LLaVAModelFieldInputComponent';
import LoRAModelFieldInputComponent from './inputs/LoRAModelFieldInputComponent';
import MainModelFieldInputComponent from './inputs/MainModelFieldInputComponent';
import RefinerModelFieldInputComponent from './inputs/RefinerModelFieldInputComponent';
@ -157,6 +162,8 @@ export const InputFieldRenderer = memo(({ nodeId, fieldName, settings }: Props)
return <StringFieldInput nodeId={nodeId} field={field} fieldTemplate={template} />;
} else if (settings.component === 'textarea') {
return <StringFieldTextarea nodeId={nodeId} field={field} fieldTemplate={template} />;
} else {
assert<Equals<never, typeof settings.component>>(false, 'Unexpected settings.component');
}
}
@ -171,32 +178,47 @@ export const InputFieldRenderer = memo(({ nodeId, fieldName, settings }: Props)
if (!isIntegerFieldInputInstance(field)) {
return null;
}
if (settings?.type !== 'integer-field-config') {
if (!settings || settings.type !== 'integer-field-config') {
return <IntegerFieldInput nodeId={nodeId} field={field} fieldTemplate={template} />;
}
if (settings.component === 'number-input') {
return <IntegerFieldInput nodeId={nodeId} field={field} fieldTemplate={template} />;
} else if (settings.component === 'slider') {
return <IntegerFieldSlider nodeId={nodeId} field={field} fieldTemplate={template} />;
} else if (settings.component === 'number-input-and-slider') {
return <IntegerFieldInputAndSlider nodeId={nodeId} field={field} fieldTemplate={template} />;
return <IntegerFieldInput nodeId={nodeId} field={field} fieldTemplate={template} settings={settings} />;
}
if (settings.component === 'slider') {
return <IntegerFieldSlider nodeId={nodeId} field={field} fieldTemplate={template} settings={settings} />;
}
if (settings.component === 'number-input-and-slider') {
return <IntegerFieldInputAndSlider nodeId={nodeId} field={field} fieldTemplate={template} settings={settings} />;
}
assert<Equals<never, typeof settings.component>>(false, 'Unexpected settings.component');
}
if (isFloatFieldInputTemplate(template)) {
if (!isFloatFieldInputInstance(field)) {
return null;
}
if (settings?.type !== 'float-field-config') {
if (!settings || settings.type !== 'float-field-config') {
return <FloatFieldInput nodeId={nodeId} field={field} fieldTemplate={template} />;
}
if (settings.component === 'number-input') {
return <FloatFieldInput nodeId={nodeId} field={field} fieldTemplate={template} />;
} else if (settings.component === 'slider') {
return <FloatFieldSlider nodeId={nodeId} field={field} fieldTemplate={template} />;
} else if (settings.component === 'number-input-and-slider') {
return <FloatFieldInputAndSlider nodeId={nodeId} field={field} fieldTemplate={template} />;
return <FloatFieldInput nodeId={nodeId} field={field} fieldTemplate={template} settings={settings} />;
}
if (settings.component === 'slider') {
return <FloatFieldSlider nodeId={nodeId} field={field} fieldTemplate={template} settings={settings} />;
}
if (settings.component === 'number-input-and-slider') {
return <FloatFieldInputAndSlider nodeId={nodeId} field={field} fieldTemplate={template} settings={settings} />;
}
assert<Equals<never, typeof settings.component>>(false, 'Unexpected settings.component');
}
if (isIntegerFieldCollectionInputTemplate(template)) {
@ -303,6 +325,13 @@ export const InputFieldRenderer = memo(({ nodeId, fieldName, settings }: Props)
return <ControlLoRAModelFieldInputComponent nodeId={nodeId} field={field} fieldTemplate={template} />;
}
if (isLLaVAModelFieldInputTemplate(template)) {
if (!isLLaVAModelFieldInputInstance(field)) {
return null;
}
return <LLaVAModelFieldInputComponent nodeId={nodeId} field={field} fieldTemplate={template} />;
}
if (isFluxVAEModelFieldInputTemplate(template)) {
if (!isFluxVAEModelFieldInputInstance(field)) {
return null;

View File

@ -1,4 +1,5 @@
import { Flex, Text } from '@invoke-ai/ui-library';
import { Flex, ListItem, Text, UnorderedList } from '@invoke-ai/ui-library';
import { useInputFieldErrors } from 'features/nodes/hooks/useInputFieldErrors';
import { useInputFieldInstance } from 'features/nodes/hooks/useInputFieldInstance';
import { useInputFieldTemplate } from 'features/nodes/hooks/useInputFieldTemplate';
import { useFieldTypeName } from 'features/nodes/hooks/usePrettyFieldType';
@ -17,6 +18,7 @@ export const InputFieldTooltipContent = memo(({ nodeId, fieldName }: Props) => {
const fieldInstance = useInputFieldInstance(nodeId, fieldName);
const fieldTemplate = useInputFieldTemplate(nodeId, fieldName);
const fieldTypeName = useFieldTypeName(fieldTemplate.type);
const fieldErrors = useInputFieldErrors(nodeId, fieldName);
const fieldTitle = useMemo(() => {
if (fieldInstance.label && fieldTemplate.title) {
@ -42,6 +44,16 @@ export const InputFieldTooltipContent = memo(({ nodeId, fieldName }: Props) => {
<Text>
{t('common.input')}: {startCase(fieldTemplate.input)}
</Text>
{fieldErrors.length > 0 && (
<>
<Text color="error.500">{t('common.error_withCount', { count: fieldErrors.length })}:</Text>
<UnorderedList>
{fieldErrors.map(({ issue }) => (
<ListItem key={issue}>{issue}</ListItem>
))}
</UnorderedList>
</>
)}
</Flex>
);
});

View File

@ -3,23 +3,37 @@ import type { FieldComponentProps } from 'features/nodes/components/flow/nodes/I
import { useIntegerField } from 'features/nodes/components/flow/nodes/Invocation/fields/IntegerField/useIntegerField';
import { NO_DRAG_CLASS } from 'features/nodes/types/constants';
import type { IntegerFieldInputInstance, IntegerFieldInputTemplate } from 'features/nodes/types/field';
import type { NodeFieldIntegerSettings } from 'features/nodes/types/workflow';
import { memo } from 'react';
export const IntegerFieldInput = memo(
(props: FieldComponentProps<IntegerFieldInputInstance, IntegerFieldInputTemplate>) => {
const { defaultValue, onChange, value, min, max, step, fineStep } = useIntegerField(props);
(
props: FieldComponentProps<
IntegerFieldInputInstance,
IntegerFieldInputTemplate,
{ settings?: NodeFieldIntegerSettings }
>
) => {
const { nodeId, field, fieldTemplate, settings } = props;
const { defaultValue, onChange, min, max, step, fineStep, constrainValue } = useIntegerField(
nodeId,
field.name,
fieldTemplate,
settings
);
return (
<CompositeNumberInput
defaultValue={defaultValue}
onChange={onChange}
value={value}
value={field.value}
min={min}
max={max}
step={step}
fineStep={fineStep}
className={NO_DRAG_CLASS}
flex="1 1 0"
constrainValue={constrainValue}
/>
);
}

Some files were not shown because too many files have changed in this diff Show More