-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathutil.py
More file actions
135 lines (115 loc) · 6.37 KB
/
util.py
File metadata and controls
135 lines (115 loc) · 6.37 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
import diffusers
import numpy as np
from PIL import Image
import torch
import argparse
from diffusers import StableDiffusionInpaintPipeline, StableDiffusionInpaintPipelineLegacy, StableDiffusionXLInpaintPipeline
from diffusers.schedulers import DDIMScheduler, DPMSolverMultistepScheduler, EulerAncestralDiscreteScheduler, PNDMScheduler
schedulers = {
"DDIM": DDIMScheduler,
"DPM++": DPMSolverMultistepScheduler,
"EULERA": EulerAncestralDiscreteScheduler,
"PNDM": PNDMScheduler
}
def generate_image(pipe, prompt, base_size, nsteps):
return pipe(prompt=prompt, num_inference_steps=nsteps, height=base_size, width=base_size).images[0]
def generate_image_with_inpainting_pipeline(pipe, prompt, base_size, pipe_args):
"""
Inpainting models (e.g. runwayml/stable-diffusion-inpainting) do not support the normal pipeline, so generating
a non-inpainted image requires the workaround of using the inpainting pipeline.
"""
image = np.zeros((base_size, base_size, 3), dtype=np.uint8)
image = Image.fromarray(image)
mask = np.zeros((base_size, base_size, 3), dtype=np.uint8) + 255
mask = Image.fromarray(mask)
image = pipe(prompt=prompt,
image=image,
mask_image=mask,
**pipe_args).images[0]
return image
def next_image(pipe, image, base_size, prompt, shiftx, shifty, pipe_args):
"""Given an image, uses inpainting to produce the next image (which overlaps with the previous image)"""
assert image.size == (base_size, base_size), f"Expected image of size {(base_size, base_size)} but pipeline output has shape {image.size}."
image_n = np.array(image)
image_n = np.concatenate((image_n[:, shiftx:],
np.zeros((base_size, shiftx, 3), dtype=np.uint8)),
axis=1)
image_n = np.concatenate((image_n[shifty:],
np.zeros((shifty, base_size, 3), dtype=np.uint8)),
axis=0)
image = Image.fromarray(image_n)
mask = np.zeros((base_size, base_size, 3), dtype=np.uint8)
mask[:, base_size - shiftx:] = 255
mask[base_size - shifty:, :] = 255
mask = Image.fromarray(mask)
image = pipe(prompt=prompt,
image=image,
mask_image=mask,
**pipe_args).images[0]
return image
def get_argparser():
parser = argparse.ArgumentParser()
parser.add_argument("prompts", nargs="+", help="Prompt list to use for generation. Separate prompts using "
"the '|' character.")
parser.add_argument("-np", "--negative-prompt", nargs="?",
default=["split image", "two images", "watermark", "text"], help="Negative prompt (optional)."
"Defaults to \"split image, two images, watermark, text\".")
parser.add_argument("-cfg", "--guidance_scale", type=float, default=7.5, help="Context-free guidance")
parser.add_argument("-s", "--steps", type=int, default=30, help="Number of steps to use for SD gen.")
parser.add_argument("-as", "--attn-slicing", action='store_true',
help="Whether to use attention slicing. Attention slicing reduces memory usage at the cost of "
"increased inference time (unless your memory was already maxed out, in which case it "
"might be faster!")
parser.add_argument("-ii", "--init-image", nargs="?",
help="Path to the init image. Will be scaled to the required resolution. If left blank, "
"the init image is generated with SD.")
parser.add_argument("-d", "--direction", default="H", choices=["H", "V"],
help="Horizontal (H) or vertical (V) scroll mode. Default H.")
parser.add_argument("-sh", "--shift", default=256, type=int,
help="How much (in pixels) to shift an image before applying inpainting to fill the space. "
"Default 256.")
# "stabilityai/stable-diffusion-2-inpainting" is another option
parser.add_argument("-m", "--model", default="runwayml/stable-diffusion-inpainting",
help="Stable Diffusion model to use. Can specify a local path or a HuggingFace model.")
parser.add_argument("-sc", "--scheduler", default="DPM++", help=f"Scheduler. Options are {', '.join(schedulers.keys())}.")
parser.add_argument("-r", "--res", default=512, type=int,
help="Resolution to run SD at. 512 recommended (768 for XL). Consider also modifying 'shift' "
"if you modify this. If larger values cause your GPU VRAM to max out, try using "
"attention slicing (-as).")
parser.add_argument("--legacy", action='store_true',
help="Uses StableDiffusionInpaintingPipelineLegacy rather than "
"StableDiffusionInpaintingPipeline. This is useful for running non-inpainting or old "
"models.")
parser.add_argument("--xl", action='store_true', help="XL model support. Note: only inpainting XL "
"models are supported.")
return parser
def get_pipe(name, scheduler_name: str, attn_slicing: bool, legacy: bool, xl: bool = False):
assert scheduler_name.upper() in schedulers, f"Scheduler '{scheduler_name}' not found. Options are {', '.join(schedulers.keys())}."
if xl:
loader = StableDiffusionXLInpaintPipeline
elif legacy:
loader = StableDiffusionInpaintPipelineLegacy
else:
loader = StableDiffusionInpaintPipeline
if name.endswith(".safetensors"):
pipe = loader.from_single_file(
name,
revision="fp16",
torch_dtype=torch.float16,
load_safety_checker=False
)
else:
pipe = loader.from_pretrained(
name,
revision="fp16",
torch_dtype=torch.float16,
load_safety_checker=False
)
pipe.safety_checker = None # A single black image causes a lot of problems for this scroller
sclass = schedulers[scheduler_name.upper()]
pipe.scheduler = sclass.from_config(pipe.scheduler.config)
print("Scheduler:", pipe.scheduler)
pipe = pipe.to("cuda")
if attn_slicing:
pipe.enable_attention_slicing()
return pipe