diff --git a/toolkit/stable_diffusion_model.py b/toolkit/stable_diffusion_model.py index 2f1030c20..2150c8c33 100644 --- a/toolkit/stable_diffusion_model.py +++ b/toolkit/stable_diffusion_model.py @@ -1012,6 +1012,12 @@ def load_model(self): self.vae: 'AutoencoderKL' = pipe.vae.to(self.vae_device_torch, dtype=self.vae_torch_dtype) self.vae.eval() self.vae.requires_grad_(False) + # Flux VAE loaded from ComfyUI single-file checkpoints may not carry these config values + if self.is_flux: + if getattr(self.vae.config, 'scaling_factor', None) is None: + self.vae.config.scaling_factor = 0.3611 + if getattr(self.vae.config, 'shift_factor', None) is None: + self.vae.config.shift_factor = 0.1159 VAE_SCALE_FACTOR = 2 ** (len(self.vae.config['block_out_channels']) - 1) self.vae_scale_factor = VAE_SCALE_FACTOR self.unet.to(self.device_torch, dtype=dtype) @@ -2449,7 +2455,7 @@ def encode_prompt( prompt_embeds, pooled_prompt_embeds = train_tools.encode_prompts_flux( self.tokenizer, # list self.text_encoder, # list - prompt, + [prompt if prompt is not None else ''] if isinstance(prompt, str) or prompt is None else prompt, truncate=not long_prompts, max_length=512, dropout_prob=dropout_prob, diff --git a/toolkit/train_tools.py b/toolkit/train_tools.py index 78e2183c3..0f4b84000 100644 --- a/toolkit/train_tools.py +++ b/toolkit/train_tools.py @@ -528,6 +528,13 @@ def encode_prompts_flux( device = text_encoder[0].device dtype = text_encoder[0].dtype + # Normalize prompts: ensure every element is a non-None string so the + # CLIP/T5 tokenizers never receive None or False as input + if isinstance(prompts, list): + prompts = [str(p) if p is not None and p is not False else '' for p in prompts] + else: + prompts = [str(prompts) if prompts is not None and prompts is not False else ''] + batch_size = len(prompts) # clip