Skip to content

Commit e57809e

Browse files
authored
Merge branch 'main' into lstein/feat/multi-gpu
2 parents 1c0067f + 0787c6c commit e57809e

File tree

197 files changed

+6954
-4244
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

197 files changed

+6954
-4244
lines changed

invokeai/app/invocations/controlnet_image_processors.py

Lines changed: 47 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -165,13 +165,13 @@ def invoke(self, context: InvocationContext) -> ImageOutput:
165165
title="Canny Processor",
166166
tags=["controlnet", "canny"],
167167
category="controlnet",
168-
version="1.3.2",
168+
version="1.3.3",
169169
)
170170
class CannyImageProcessorInvocation(ImageProcessorInvocation):
171171
"""Canny edge detection for ControlNet"""
172172

173-
detect_resolution: int = InputField(default=512, ge=0, description=FieldDescriptions.detect_res)
174-
image_resolution: int = InputField(default=512, ge=0, description=FieldDescriptions.image_res)
173+
detect_resolution: int = InputField(default=512, ge=1, description=FieldDescriptions.detect_res)
174+
image_resolution: int = InputField(default=512, ge=1, description=FieldDescriptions.image_res)
175175
low_threshold: int = InputField(
176176
default=100, ge=0, le=255, description="The low threshold of the Canny pixel gradient (0-255)"
177177
)
@@ -199,13 +199,13 @@ def run_processor(self, image: Image.Image) -> Image.Image:
199199
title="HED (softedge) Processor",
200200
tags=["controlnet", "hed", "softedge"],
201201
category="controlnet",
202-
version="1.2.2",
202+
version="1.2.3",
203203
)
204204
class HedImageProcessorInvocation(ImageProcessorInvocation):
205205
"""Applies HED edge detection to image"""
206206

207-
detect_resolution: int = InputField(default=512, ge=0, description=FieldDescriptions.detect_res)
208-
image_resolution: int = InputField(default=512, ge=0, description=FieldDescriptions.image_res)
207+
detect_resolution: int = InputField(default=512, ge=1, description=FieldDescriptions.detect_res)
208+
image_resolution: int = InputField(default=512, ge=1, description=FieldDescriptions.image_res)
209209
# safe not supported in controlnet_aux v0.0.3
210210
# safe: bool = InputField(default=False, description=FieldDescriptions.safe_mode)
211211
scribble: bool = InputField(default=False, description=FieldDescriptions.scribble_mode)
@@ -228,13 +228,13 @@ def run_processor(self, image: Image.Image) -> Image.Image:
228228
title="Lineart Processor",
229229
tags=["controlnet", "lineart"],
230230
category="controlnet",
231-
version="1.2.2",
231+
version="1.2.3",
232232
)
233233
class LineartImageProcessorInvocation(ImageProcessorInvocation):
234234
"""Applies line art processing to image"""
235235

236-
detect_resolution: int = InputField(default=512, ge=0, description=FieldDescriptions.detect_res)
237-
image_resolution: int = InputField(default=512, ge=0, description=FieldDescriptions.image_res)
236+
detect_resolution: int = InputField(default=512, ge=1, description=FieldDescriptions.detect_res)
237+
image_resolution: int = InputField(default=512, ge=1, description=FieldDescriptions.image_res)
238238
coarse: bool = InputField(default=False, description="Whether to use coarse mode")
239239

240240
def run_processor(self, image: Image.Image) -> Image.Image:
@@ -250,13 +250,13 @@ def run_processor(self, image: Image.Image) -> Image.Image:
250250
title="Lineart Anime Processor",
251251
tags=["controlnet", "lineart", "anime"],
252252
category="controlnet",
253-
version="1.2.2",
253+
version="1.2.3",
254254
)
255255
class LineartAnimeImageProcessorInvocation(ImageProcessorInvocation):
256256
"""Applies line art anime processing to image"""
257257

258-
detect_resolution: int = InputField(default=512, ge=0, description=FieldDescriptions.detect_res)
259-
image_resolution: int = InputField(default=512, ge=0, description=FieldDescriptions.image_res)
258+
detect_resolution: int = InputField(default=512, ge=1, description=FieldDescriptions.detect_res)
259+
image_resolution: int = InputField(default=512, ge=1, description=FieldDescriptions.image_res)
260260

261261
def run_processor(self, image: Image.Image) -> Image.Image:
262262
processor = LineartAnimeProcessor()
@@ -273,15 +273,15 @@ def run_processor(self, image: Image.Image) -> Image.Image:
273273
title="Midas Depth Processor",
274274
tags=["controlnet", "midas"],
275275
category="controlnet",
276-
version="1.2.3",
276+
version="1.2.4",
277277
)
278278
class MidasDepthImageProcessorInvocation(ImageProcessorInvocation):
279279
"""Applies Midas depth processing to image"""
280280

281281
a_mult: float = InputField(default=2.0, ge=0, description="Midas parameter `a_mult` (a = a_mult * PI)")
282282
bg_th: float = InputField(default=0.1, ge=0, description="Midas parameter `bg_th`")
283-
detect_resolution: int = InputField(default=512, ge=0, description=FieldDescriptions.detect_res)
284-
image_resolution: int = InputField(default=512, ge=0, description=FieldDescriptions.image_res)
283+
detect_resolution: int = InputField(default=512, ge=1, description=FieldDescriptions.detect_res)
284+
image_resolution: int = InputField(default=512, ge=1, description=FieldDescriptions.image_res)
285285
# depth_and_normal not supported in controlnet_aux v0.0.3
286286
# depth_and_normal: bool = InputField(default=False, description="whether to use depth and normal mode")
287287

@@ -304,13 +304,13 @@ def run_processor(self, image):
304304
title="Normal BAE Processor",
305305
tags=["controlnet"],
306306
category="controlnet",
307-
version="1.2.2",
307+
version="1.2.3",
308308
)
309309
class NormalbaeImageProcessorInvocation(ImageProcessorInvocation):
310310
"""Applies NormalBae processing to image"""
311311

312-
detect_resolution: int = InputField(default=512, ge=0, description=FieldDescriptions.detect_res)
313-
image_resolution: int = InputField(default=512, ge=0, description=FieldDescriptions.image_res)
312+
detect_resolution: int = InputField(default=512, ge=1, description=FieldDescriptions.detect_res)
313+
image_resolution: int = InputField(default=512, ge=1, description=FieldDescriptions.image_res)
314314

315315
def run_processor(self, image):
316316
normalbae_processor = NormalBaeDetector.from_pretrained("lllyasviel/Annotators")
@@ -321,13 +321,13 @@ def run_processor(self, image):
321321

322322

323323
@invocation(
324-
"mlsd_image_processor", title="MLSD Processor", tags=["controlnet", "mlsd"], category="controlnet", version="1.2.2"
324+
"mlsd_image_processor", title="MLSD Processor", tags=["controlnet", "mlsd"], category="controlnet", version="1.2.3"
325325
)
326326
class MlsdImageProcessorInvocation(ImageProcessorInvocation):
327327
"""Applies MLSD processing to image"""
328328

329-
detect_resolution: int = InputField(default=512, ge=0, description=FieldDescriptions.detect_res)
330-
image_resolution: int = InputField(default=512, ge=0, description=FieldDescriptions.image_res)
329+
detect_resolution: int = InputField(default=512, ge=1, description=FieldDescriptions.detect_res)
330+
image_resolution: int = InputField(default=512, ge=1, description=FieldDescriptions.image_res)
331331
thr_v: float = InputField(default=0.1, ge=0, description="MLSD parameter `thr_v`")
332332
thr_d: float = InputField(default=0.1, ge=0, description="MLSD parameter `thr_d`")
333333

@@ -344,13 +344,13 @@ def run_processor(self, image):
344344

345345

346346
@invocation(
347-
"pidi_image_processor", title="PIDI Processor", tags=["controlnet", "pidi"], category="controlnet", version="1.2.2"
347+
"pidi_image_processor", title="PIDI Processor", tags=["controlnet", "pidi"], category="controlnet", version="1.2.3"
348348
)
349349
class PidiImageProcessorInvocation(ImageProcessorInvocation):
350350
"""Applies PIDI processing to image"""
351351

352-
detect_resolution: int = InputField(default=512, ge=0, description=FieldDescriptions.detect_res)
353-
image_resolution: int = InputField(default=512, ge=0, description=FieldDescriptions.image_res)
352+
detect_resolution: int = InputField(default=512, ge=1, description=FieldDescriptions.detect_res)
353+
image_resolution: int = InputField(default=512, ge=1, description=FieldDescriptions.image_res)
354354
safe: bool = InputField(default=False, description=FieldDescriptions.safe_mode)
355355
scribble: bool = InputField(default=False, description=FieldDescriptions.scribble_mode)
356356

@@ -371,13 +371,13 @@ def run_processor(self, image):
371371
title="Content Shuffle Processor",
372372
tags=["controlnet", "contentshuffle"],
373373
category="controlnet",
374-
version="1.2.2",
374+
version="1.2.3",
375375
)
376376
class ContentShuffleImageProcessorInvocation(ImageProcessorInvocation):
377377
"""Applies content shuffle processing to image"""
378378

379-
detect_resolution: int = InputField(default=512, ge=0, description=FieldDescriptions.detect_res)
380-
image_resolution: int = InputField(default=512, ge=0, description=FieldDescriptions.image_res)
379+
detect_resolution: int = InputField(default=512, ge=1, description=FieldDescriptions.detect_res)
380+
image_resolution: int = InputField(default=512, ge=1, description=FieldDescriptions.image_res)
381381
h: int = InputField(default=512, ge=0, description="Content shuffle `h` parameter")
382382
w: int = InputField(default=512, ge=0, description="Content shuffle `w` parameter")
383383
f: int = InputField(default=256, ge=0, description="Content shuffle `f` parameter")
@@ -401,7 +401,7 @@ def run_processor(self, image):
401401
title="Zoe (Depth) Processor",
402402
tags=["controlnet", "zoe", "depth"],
403403
category="controlnet",
404-
version="1.2.2",
404+
version="1.2.3",
405405
)
406406
class ZoeDepthImageProcessorInvocation(ImageProcessorInvocation):
407407
"""Applies Zoe depth processing to image"""
@@ -417,15 +417,15 @@ def run_processor(self, image):
417417
title="Mediapipe Face Processor",
418418
tags=["controlnet", "mediapipe", "face"],
419419
category="controlnet",
420-
version="1.2.3",
420+
version="1.2.4",
421421
)
422422
class MediapipeFaceProcessorInvocation(ImageProcessorInvocation):
423423
"""Applies mediapipe face processing to image"""
424424

425425
max_faces: int = InputField(default=1, ge=1, description="Maximum number of faces to detect")
426426
min_confidence: float = InputField(default=0.5, ge=0, le=1, description="Minimum confidence for face detection")
427-
detect_resolution: int = InputField(default=512, ge=0, description=FieldDescriptions.detect_res)
428-
image_resolution: int = InputField(default=512, ge=0, description=FieldDescriptions.image_res)
427+
detect_resolution: int = InputField(default=512, ge=1, description=FieldDescriptions.detect_res)
428+
image_resolution: int = InputField(default=512, ge=1, description=FieldDescriptions.image_res)
429429

430430
def run_processor(self, image):
431431
mediapipe_face_processor = MediapipeFaceDetector()
@@ -444,16 +444,16 @@ def run_processor(self, image):
444444
title="Leres (Depth) Processor",
445445
tags=["controlnet", "leres", "depth"],
446446
category="controlnet",
447-
version="1.2.2",
447+
version="1.2.3",
448448
)
449449
class LeresImageProcessorInvocation(ImageProcessorInvocation):
450450
"""Applies leres processing to image"""
451451

452452
thr_a: float = InputField(default=0, description="Leres parameter `thr_a`")
453453
thr_b: float = InputField(default=0, description="Leres parameter `thr_b`")
454454
boost: bool = InputField(default=False, description="Whether to use boost mode")
455-
detect_resolution: int = InputField(default=512, ge=0, description=FieldDescriptions.detect_res)
456-
image_resolution: int = InputField(default=512, ge=0, description=FieldDescriptions.image_res)
455+
detect_resolution: int = InputField(default=512, ge=1, description=FieldDescriptions.detect_res)
456+
image_resolution: int = InputField(default=512, ge=1, description=FieldDescriptions.image_res)
457457

458458
def run_processor(self, image):
459459
leres_processor = LeresDetector.from_pretrained("lllyasviel/Annotators")
@@ -473,7 +473,7 @@ def run_processor(self, image):
473473
title="Tile Resample Processor",
474474
tags=["controlnet", "tile"],
475475
category="controlnet",
476-
version="1.2.2",
476+
version="1.2.3",
477477
)
478478
class TileResamplerProcessorInvocation(ImageProcessorInvocation):
479479
"""Tile resampler processor"""
@@ -513,13 +513,13 @@ def run_processor(self, img):
513513
title="Segment Anything Processor",
514514
tags=["controlnet", "segmentanything"],
515515
category="controlnet",
516-
version="1.2.3",
516+
version="1.2.4",
517517
)
518518
class SegmentAnythingProcessorInvocation(ImageProcessorInvocation):
519519
"""Applies segment anything processing to image"""
520520

521-
detect_resolution: int = InputField(default=512, ge=0, description=FieldDescriptions.detect_res)
522-
image_resolution: int = InputField(default=512, ge=0, description=FieldDescriptions.image_res)
521+
detect_resolution: int = InputField(default=512, ge=1, description=FieldDescriptions.detect_res)
522+
image_resolution: int = InputField(default=512, ge=1, description=FieldDescriptions.image_res)
523523

524524
def run_processor(self, image):
525525
# segment_anything_processor = SamDetector.from_pretrained("ybelkada/segment-anything", subfolder="checkpoints")
@@ -560,12 +560,12 @@ def show_anns(self, anns: List[Dict]):
560560
title="Color Map Processor",
561561
tags=["controlnet"],
562562
category="controlnet",
563-
version="1.2.2",
563+
version="1.2.3",
564564
)
565565
class ColorMapImageProcessorInvocation(ImageProcessorInvocation):
566566
"""Generates a color map from the provided image"""
567567

568-
color_map_tile_size: int = InputField(default=64, ge=0, description=FieldDescriptions.tile_size)
568+
color_map_tile_size: int = InputField(default=64, ge=1, description=FieldDescriptions.tile_size)
569569

570570
def run_processor(self, image: Image.Image):
571571
np_image = np.array(image, dtype=np.uint8)
@@ -592,15 +592,15 @@ def run_processor(self, image: Image.Image):
592592
title="Depth Anything Processor",
593593
tags=["controlnet", "depth", "depth anything"],
594594
category="controlnet",
595-
version="1.1.1",
595+
version="1.1.2",
596596
)
597597
class DepthAnythingImageProcessorInvocation(ImageProcessorInvocation):
598598
"""Generates a depth map based on the Depth Anything algorithm"""
599599

600600
model_size: DEPTH_ANYTHING_MODEL_SIZES = InputField(
601601
default="small", description="The size of the depth model to use"
602602
)
603-
resolution: int = InputField(default=512, ge=64, multiple_of=64, description=FieldDescriptions.image_res)
603+
resolution: int = InputField(default=512, ge=1, description=FieldDescriptions.image_res)
604604

605605
def run_processor(self, image: Image.Image):
606606
depth_anything_detector = DepthAnythingDetector()
@@ -615,15 +615,15 @@ def run_processor(self, image: Image.Image):
615615
title="DW Openpose Image Processor",
616616
tags=["controlnet", "dwpose", "openpose"],
617617
category="controlnet",
618-
version="1.1.0",
618+
version="1.1.1",
619619
)
620620
class DWOpenposeImageProcessorInvocation(ImageProcessorInvocation):
621621
"""Generates an openpose pose from an image using DWPose"""
622622

623623
draw_body: bool = InputField(default=True)
624624
draw_face: bool = InputField(default=False)
625625
draw_hands: bool = InputField(default=False)
626-
image_resolution: int = InputField(default=512, ge=0, description=FieldDescriptions.image_res)
626+
image_resolution: int = InputField(default=512, ge=1, description=FieldDescriptions.image_res)
627627

628628
def run_processor(self, image: Image.Image):
629629
dw_openpose = DWOpenposeDetector()
@@ -642,15 +642,15 @@ def run_processor(self, image: Image.Image):
642642
title="Heuristic Resize",
643643
tags=["image, controlnet"],
644644
category="image",
645-
version="1.0.0",
645+
version="1.0.1",
646646
classification=Classification.Prototype,
647647
)
648648
class HeuristicResizeInvocation(BaseInvocation):
649649
"""Resize an image using a heuristic method. Preserves edge maps."""
650650

651651
image: ImageField = InputField(description="The image to resize")
652-
width: int = InputField(default=512, gt=0, description="The width to resize to (px)")
653-
height: int = InputField(default=512, gt=0, description="The height to resize to (px)")
652+
width: int = InputField(default=512, ge=1, description="The width to resize to (px)")
653+
height: int = InputField(default=512, ge=1, description="The height to resize to (px)")
654654

655655
def invoke(self, context: InvocationContext) -> ImageOutput:
656656
image = context.images.get_pil(self.image.image_name, "RGB")

invokeai/app/invocations/latent.py

Lines changed: 15 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3,15 +3,14 @@
33
import math
44
from contextlib import ExitStack
55
from functools import singledispatchmethod
6-
from typing import Any, Iterator, List, Literal, Optional, Tuple, Union
6+
from typing import Any, Dict, Iterator, List, Literal, Optional, Tuple, Union
77

88
import einops
99
import numpy as np
1010
import numpy.typing as npt
1111
import torch
1212
import torchvision
1313
import torchvision.transforms as T
14-
from diffusers import AutoencoderKL, AutoencoderTiny
1514
from diffusers.configuration_utils import ConfigMixin
1615
from diffusers.image_processor import VaeImageProcessor
1716
from diffusers.models.adapter import T2IAdapter
@@ -21,9 +20,12 @@
2120
LoRAXFormersAttnProcessor,
2221
XFormersAttnProcessor,
2322
)
23+
from diffusers.models.autoencoders.autoencoder_kl import AutoencoderKL
24+
from diffusers.models.autoencoders.autoencoder_tiny import AutoencoderTiny
2425
from diffusers.models.unets.unet_2d_condition import UNet2DConditionModel
25-
from diffusers.schedulers import DPMSolverSDEScheduler
26-
from diffusers.schedulers import SchedulerMixin as Scheduler
26+
from diffusers.schedulers.scheduling_dpmsolver_sde import DPMSolverSDEScheduler
27+
from diffusers.schedulers.scheduling_tcd import TCDScheduler
28+
from diffusers.schedulers.scheduling_utils import SchedulerMixin as Scheduler
2729
from PIL import Image, ImageFilter
2830
from pydantic import field_validator
2931
from torchvision.transforms.functional import resize as tv_resize
@@ -521,9 +523,10 @@ def _concat_regional_text_embeddings(
521523
)
522524

523525
if is_sdxl:
524-
return SDXLConditioningInfo(
525-
embeds=text_embedding, pooled_embeds=pooled_embedding, add_time_ids=add_time_ids
526-
), regions
526+
return (
527+
SDXLConditioningInfo(embeds=text_embedding, pooled_embeds=pooled_embedding, add_time_ids=add_time_ids),
528+
regions,
529+
)
527530
return BasicConditioningInfo(embeds=text_embedding), regions
528531

529532
def get_conditioning_data(
@@ -825,7 +828,7 @@ def init_scheduler(
825828
denoising_start: float,
826829
denoising_end: float,
827830
seed: int,
828-
) -> Tuple[int, List[int], int]:
831+
) -> Tuple[int, List[int], int, Dict[str, Any]]:
829832
assert isinstance(scheduler, ConfigMixin)
830833
if scheduler.config.get("cpu_only", False):
831834
scheduler.set_timesteps(steps, device="cpu")
@@ -853,13 +856,15 @@ def init_scheduler(
853856
timesteps = timesteps[t_start_idx : t_start_idx + t_end_idx]
854857
num_inference_steps = len(timesteps) // scheduler.order
855858

856-
scheduler_step_kwargs = {}
859+
scheduler_step_kwargs: Dict[str, Any] = {}
857860
scheduler_step_signature = inspect.signature(scheduler.step)
858861
if "generator" in scheduler_step_signature.parameters:
859862
# At some point, someone decided that schedulers that accept a generator should use the original seed with
860863
# all bits flipped. I don't know the original rationale for this, but now we must keep it like this for
861864
# reproducibility.
862-
scheduler_step_kwargs = {"generator": torch.Generator(device=device).manual_seed(seed ^ 0xFFFFFFFF)}
865+
scheduler_step_kwargs.update({"generator": torch.Generator(device=device).manual_seed(seed ^ 0xFFFFFFFF)})
866+
if isinstance(scheduler, TCDScheduler):
867+
scheduler_step_kwargs.update({"eta": 1.0})
863868

864869
return num_inference_steps, timesteps, init_timestep, scheduler_step_kwargs
865870

invokeai/backend/stable_diffusion/schedulers/schedulers.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
LCMScheduler,
1414
LMSDiscreteScheduler,
1515
PNDMScheduler,
16+
TCDScheduler,
1617
UniPCMultistepScheduler,
1718
)
1819

@@ -40,4 +41,5 @@
4041
"dpmpp_sde_k": (DPMSolverSDEScheduler, {"use_karras_sigmas": True, "noise_sampler_seed": 0}),
4142
"unipc": (UniPCMultistepScheduler, {"cpu_only": True}),
4243
"lcm": (LCMScheduler, {}),
44+
"tcd": (TCDScheduler, {}),
4345
}

0 commit comments

Comments
 (0)