diff --git a/StreamDiffusionTD/install_tensorrt.py b/StreamDiffusionTD/install_tensorrt.py new file mode 100644 index 00000000..2d140a40 --- /dev/null +++ b/StreamDiffusionTD/install_tensorrt.py @@ -0,0 +1,165 @@ +""" +Standalone TensorRT installation script for StreamDiffusionTD +This is a self-contained version that doesn't rely on the streamdiffusion package imports +""" + +import platform +import subprocess +import sys +from typing import Optional + + +def run_pip(command: str): + """Run pip command with proper error handling""" + return subprocess.check_call([sys.executable, "-m", "pip"] + command.split()) + + +def is_installed(package_name: str) -> bool: + """Check if a package is installed""" + try: + __import__(package_name.replace("-", "_")) + return True + except ImportError: + return False + + +def version(package_name: str) -> Optional[str]: + """Get version of installed package""" + try: + import importlib.metadata + + return importlib.metadata.version(package_name) + except: + return None + + +def get_cuda_version_from_torch() -> Optional[str]: + try: + import torch + except ImportError: + return None + + cuda_version = torch.version.cuda + if cuda_version: + # Return full version like "12.8" for better detection + major_minor = ".".join(cuda_version.split(".")[:2]) + return major_minor + return None + + +def install(cu: Optional[str] = None): + if cu is None: + cu = get_cuda_version_from_torch() + + if cu is None: + print("Could not detect CUDA version. Please specify manually.") + return + + print(f"Detected CUDA version: {cu}") + print("Installing TensorRT requirements...") + + # Determine CUDA major version for package selection + cuda_major = cu.split(".")[0] if cu else "12" + cuda_version_float = float(cu) if cu else 12.0 + + # Skip nvidia-pyindex - it's broken with pip 25.3+ and not actually needed + # The NVIDIA index is already accessible via pip config or environment variables + + # Uninstall old TensorRT versions + if is_installed("tensorrt"): + current_version_str = version("tensorrt") + if current_version_str: + try: + from packaging.version import Version + + current_version = Version(current_version_str) + if current_version < Version("10.8.0"): + print("Uninstalling old TensorRT version...") + run_pip("uninstall -y tensorrt") + except: + # If packaging is not available, check version string directly + if current_version_str.startswith("9."): + print("Uninstalling old TensorRT version...") + run_pip("uninstall -y tensorrt") + + # For CUDA 12.8+ (RTX 5090/Blackwell support), use TensorRT 10.8+ + if cuda_version_float >= 12.8: + print("Installing TensorRT 10.8+ for CUDA 12.8+ (Blackwell GPU support)...") + + # Install cuDNN 9 for CUDA 12 + cudnn_name = "nvidia-cudnn-cu12" + print(f"Installing cuDNN: {cudnn_name}") + run_pip(f"install {cudnn_name} --no-cache-dir") + + # Install TensorRT for CUDA 12 (RTX 5090/Blackwell support) + tensorrt_version = "tensorrt-cu12" + print(f"Installing TensorRT for CUDA {cu}: {tensorrt_version}") + run_pip(f"install {tensorrt_version} --no-cache-dir") + + elif cuda_major == "12": + print("Installing TensorRT for CUDA 12.x...") + + # Install cuDNN for CUDA 12 + cudnn_name = "nvidia-cudnn-cu12" + print(f"Installing cuDNN: {cudnn_name}") + run_pip(f"install {cudnn_name} --no-cache-dir") + + # Install TensorRT for CUDA 12 + tensorrt_version = "tensorrt-cu12" + print(f"Installing TensorRT for CUDA {cu}: {tensorrt_version}") + run_pip(f"install {tensorrt_version} --no-cache-dir") + + elif cuda_major == "11": + print("Installing TensorRT for CUDA 11.x...") + + # Install cuDNN for CUDA 11 + cudnn_name = "nvidia-cudnn-cu11==8.9.4.25" + print(f"Installing cuDNN: {cudnn_name}") + run_pip(f"install {cudnn_name} --no-cache-dir") + + # Install TensorRT for CUDA 11 + tensorrt_version = "tensorrt==9.0.1.post11.dev4" + print(f"Installing TensorRT for CUDA {cu}: {tensorrt_version}") + run_pip( + f"install --pre --extra-index-url https://pypi.nvidia.com {tensorrt_version} --no-cache-dir" + ) + else: + print(f"Unsupported CUDA version: {cu}") + print("Supported versions: CUDA 11.x, 12.x") + return + + # Install additional TensorRT tools + if not is_installed("polygraphy"): + print("Installing polygraphy...") + run_pip( + "install polygraphy==0.49.24 --extra-index-url https://pypi.ngc.nvidia.com --no-cache-dir" + ) + if not is_installed("onnx_graphsurgeon"): + print("Installing onnx-graphsurgeon...") + run_pip( + "install onnx-graphsurgeon==0.5.8 --extra-index-url https://pypi.ngc.nvidia.com --no-cache-dir" + ) + if platform.system() == "Windows" and not is_installed("pywin32"): + print("Installing pywin32...") + run_pip("install pywin32==306 --no-cache-dir") + + # Pin onnx 1.18 + onnxruntime-gpu 1.24 together: + # - onnx 1.18 exports IR 11; modelopt needs FLOAT4E2M1 added in 1.18 + # - onnx 1.19+ exports IR 12 (ORT 1.24 max) and removes float32_to_bfloat16 (onnx-gs needs it) + # - onnxruntime-gpu 1.24 supports IR 11; never co-install CPU onnxruntime (shared files conflict) + print("Pinning onnx==1.18.0 + onnxruntime-gpu==1.24.3...") + run_pip("install onnx==1.18.0 onnxruntime-gpu==1.24.3 --no-cache-dir") + + # FP8 quantization dependencies (CUDA 12 only) + # nvidia-modelopt requires cupy; pin cupy 13.x + numpy<2 for mediapipe compat + if cuda_major == "12": + print("Installing FP8 quantization dependencies (nvidia-modelopt, cupy, numpy)...") + run_pip( + 'install "nvidia-modelopt[onnx]" "cupy-cuda12x==13.6.0" "numpy==1.26.4" --no-cache-dir' + ) + + print("TensorRT installation completed successfully!") + + +if __name__ == "__main__": + install() diff --git a/setup.py b/setup.py index 4255f52c..e4d2973a 100644 --- a/setup.py +++ b/setup.py @@ -4,11 +4,11 @@ from setuptools import find_packages, setup + # Copied from pip_utils.py to avoid import def _check_torch_installed(): try: import torch - import torchvision except Exception: msg = ( "Missing required pre-installed packages: torch, torchvision\n" @@ -19,16 +19,18 @@ def _check_torch_installed(): raise RuntimeError(msg) if not torch.version.cuda: - raise RuntimeError("Detected CPU-only PyTorch. Install CUDA-enabled torch/vision/audio before installing this package.") + raise RuntimeError( + "Detected CPU-only PyTorch. Install CUDA-enabled torch/vision/audio before installing this package." + ) def get_cuda_constraint(): - cuda_version = os.environ.get("STREAMDIFFUSION_CUDA_VERSION") or \ - os.environ.get("CUDA_VERSION") + cuda_version = os.environ.get("STREAMDIFFUSION_CUDA_VERSION") or os.environ.get("CUDA_VERSION") if not cuda_version: try: import torch + cuda_version = torch.version.cuda except Exception: # might not be available during wheel build, so we have to ignore @@ -56,10 +58,9 @@ def get_cuda_constraint(): "Pillow>=12.1.1", # CVE-2026-25990: out-of-bounds write in PSD loading "fire==0.7.1", "omegaconf==2.3.0", - "onnx==1.18.0", # onnx-graphsurgeon 0.5.8 requires onnx.helper.float32_to_bfloat16 (removed in onnx 1.19+) - "onnxruntime==1.24.3", - "onnxruntime-gpu==1.24.3", - "polygraphy==0.49.26", + "onnx==1.18.0", # IR 11 — modelopt needs FLOAT4E2M1 (added in 1.18); float32_to_bfloat16 present (removed in 1.19+) + "onnxruntime-gpu==1.24.3", # TRT EP, supports IR 11; never co-install CPU onnxruntime — shared files conflict + "polygraphy==0.49.24", "protobuf>=4.25.8,<5", # mediapipe 0.10.21 requires protobuf 4.x; 4.25.8 fixes CVE-2025-4565; CVE-2026-0994 (JSON DoS) accepted risk for local pipeline "colored==2.3.1", "pywin32==311;sys_platform == 'win32'", @@ -82,7 +83,9 @@ def deps_list(*pkgs): extras = {} extras["xformers"] = deps_list("xformers") extras["torch"] = deps_list("torch", "accelerate") -extras["tensorrt"] = deps_list("protobuf", "cuda-python", "onnx", "onnxruntime", "onnxruntime-gpu", "colored", "polygraphy", "onnx-graphsurgeon") +extras["tensorrt"] = deps_list( + "protobuf", "cuda-python", "onnx", "onnxruntime-gpu", "colored", "polygraphy", "onnx-graphsurgeon" +) extras["controlnet"] = deps_list("onnx-graphsurgeon", "controlnet-aux") extras["ipadapter"] = deps_list("diffusers-ipadapter", "mediapipe", "insightface") diff --git a/src/streamdiffusion/modules/ipadapter_module.py b/src/streamdiffusion/modules/ipadapter_module.py index b886c2a0..b283799f 100644 --- a/src/streamdiffusion/modules/ipadapter_module.py +++ b/src/streamdiffusion/modules/ipadapter_module.py @@ -1,7 +1,7 @@ from __future__ import annotations from dataclasses import dataclass -from typing import Optional, Tuple, Any +from typing import Dict, Optional, Tuple, Any from enum import Enum import torch @@ -40,6 +40,158 @@ class IPAdapterConfig: insightface_model_name: Optional[str] = None +# --------------------------------------------------------------------------- +# IP-Adapter model path mapping by base model architecture and adapter type +# --------------------------------------------------------------------------- +# None means the variant is unavailable for that architecture — callers fall +# back to REGULAR automatically. +IPADAPTER_MODEL_MAP: Dict[tuple, Optional[Dict[str, str]]] = { + ("SD1.5", IPAdapterType.REGULAR): { + "model_path": "h94/IP-Adapter/models/ip-adapter_sd15.bin", + "image_encoder_path": "h94/IP-Adapter/models/image_encoder", + }, + ("SD1.5", IPAdapterType.PLUS): { + "model_path": "h94/IP-Adapter/models/ip-adapter-plus_sd15.safetensors", + "image_encoder_path": "h94/IP-Adapter/models/image_encoder", + }, + ("SD1.5", IPAdapterType.FACEID): { + "model_path": "h94/IP-Adapter-FaceID/ip-adapter-faceid_sd15.bin", + "image_encoder_path": "h94/IP-Adapter/models/image_encoder", + }, + ("SD2.1", IPAdapterType.REGULAR): None, # not available from h94 (ip-adapter_sd21.bin was never released) + ("SD2.1", IPAdapterType.PLUS): None, # not available from h94 + ("SD2.1", IPAdapterType.FACEID): None, # not available from h94 + ("SDXL", IPAdapterType.REGULAR): { + "model_path": "h94/IP-Adapter/sdxl_models/ip-adapter_sdxl.bin", + "image_encoder_path": "h94/IP-Adapter/sdxl_models/image_encoder", + }, + ("SDXL", IPAdapterType.PLUS): { + "model_path": "h94/IP-Adapter/sdxl_models/ip-adapter-plus_sdxl_vit-h.safetensors", + "image_encoder_path": "h94/IP-Adapter/sdxl_models/image_encoder", + }, + ("SDXL", IPAdapterType.FACEID): { + "model_path": "h94/IP-Adapter-FaceID/ip-adapter-faceid_sdxl.bin", + "image_encoder_path": "h94/IP-Adapter/sdxl_models/image_encoder", + }, +} + +# Set of all known HF model paths — used to distinguish known vs custom paths. +# Custom/local paths are never overridden. +_KNOWN_IPADAPTER_PATHS: frozenset = frozenset( + entry["model_path"] + for entry in IPADAPTER_MODEL_MAP.values() + if entry is not None +) + +_KNOWN_ENCODER_PATHS: frozenset = frozenset({ + "h94/IP-Adapter/models/image_encoder", + "h94/IP-Adapter/sdxl_models/image_encoder", +}) + + +def _normalize_model_type(detected_model_type: str, is_sdxl: bool) -> Optional[str]: + """Map model detection strings to IPADAPTER_MODEL_MAP keys.""" + if is_sdxl: + return "SDXL" + return { + "SD1.5": "SD1.5", + "SD15": "SD1.5", + "SD2.1": "SD2.1", + "SD21": "SD2.1", + "SDXL": "SDXL", + }.get(detected_model_type) + + +def resolve_ipadapter_paths( + cfg: Dict[str, Any], + detected_model_type: str, + is_sdxl: bool, +) -> Dict[str, Any]: + """Validate and auto-resolve IP-Adapter model/encoder paths for the detected base model. + + Mutates *cfg* in-place and returns it. Custom/local paths are never overridden. + + Args: + cfg: Single IP-Adapter config dict (keys: ipadapter_model_path, image_encoder_path, type, ...). + detected_model_type: Value from detect_model() e.g. "SD1.5", "SD2.1", "SDXL". + is_sdxl: Whether the base model is SDXL-family (takes precedence over detected_model_type). + + Returns: + The (potentially mutated) cfg dict. + """ + current_model_path = cfg.get("ipadapter_model_path") or "" + current_encoder_path = cfg.get("image_encoder_path") or "" + + # Parse adapter type, default to REGULAR + try: + adapter_type = IPAdapterType(cfg.get("type", "regular")) + except ValueError: + adapter_type = IPAdapterType.REGULAR + + # Normalize to map key; unknown types are left unchanged + norm_type = _normalize_model_type(detected_model_type, is_sdxl) + if norm_type is None: + logger.warning( + f"IP-Adapter auto-resolution: unknown model type '{detected_model_type}' — " + f"cannot validate compatibility. Ensure ipadapter_model_path is correct for this model." + ) + return cfg + + # Custom/local path — respect it, only log info + if current_model_path and current_model_path not in _KNOWN_IPADAPTER_PATHS: + logger.info( + f"IP-Adapter: custom model path '{current_model_path}' — " + f"skipping auto-resolution (manual compatibility check required for {detected_model_type})." + ) + return cfg + + # Look up the correct entry for this architecture + type + target_entry = IPADAPTER_MODEL_MAP.get((norm_type, adapter_type)) + + # Variant unavailable for this architecture — fall back to REGULAR with warning + if target_entry is None: + logger.warning( + f"IP-Adapter type '{adapter_type.value}' is not available for {detected_model_type}. " + f"Falling back to 'regular' adapter type." + ) + adapter_type = IPAdapterType.REGULAR + cfg["type"] = adapter_type.value + target_entry = IPADAPTER_MODEL_MAP.get((norm_type, adapter_type)) + + if target_entry is None: + logger.warning( + f"IP-Adapter: no compatible adapter exists for {detected_model_type} " + f"(type='{adapter_type.value}'). No IP-Adapter was released for this architecture. " + f"IP-Adapter will be disabled for this model." + ) + cfg["enabled"] = False + return cfg + + correct_model_path = target_entry["model_path"] + correct_encoder_path = target_entry["image_encoder_path"] + + # Resolve model path + if current_model_path != correct_model_path: + logger.warning( + f"IP-Adapter auto-resolution: '{current_model_path}' is incompatible with " + f"{detected_model_type} (cross_attention_dim mismatch). " + f"Resolving to '{correct_model_path}'." + ) + cfg["ipadapter_model_path"] = correct_model_path + else: + logger.info(f"IP-Adapter: '{current_model_path}' is compatible with {detected_model_type}.") + + # Resolve encoder path (only if it's a known HF encoder — custom encoders untouched) + if current_encoder_path in _KNOWN_ENCODER_PATHS and current_encoder_path != correct_encoder_path: + logger.info( + f"IP-Adapter: resolving image encoder " + f"'{current_encoder_path}' → '{correct_encoder_path}'." + ) + cfg["image_encoder_path"] = correct_encoder_path + + return cfg + + class IPAdapterModule(OrchestratorUser): """IP-Adapter embedding hook provider. diff --git a/src/streamdiffusion/tools/install-tensorrt.py b/src/streamdiffusion/tools/install-tensorrt.py index 46ea28b4..116ac5bf 100644 --- a/src/streamdiffusion/tools/install-tensorrt.py +++ b/src/streamdiffusion/tools/install-tensorrt.py @@ -1,10 +1,10 @@ +import platform from typing import Literal, Optional import fire from packaging.version import Version -from ..pip_utils import is_installed, run_pip, version, get_cuda_major -import platform +from ..pip_utils import get_cuda_major, is_installed, run_pip, version def install(cu: Optional[Literal["11", "12"]] = get_cuda_major()): @@ -20,28 +20,34 @@ def install(cu: Optional[Literal["11", "12"]] = get_cuda_major()): cudnn_package, trt_package = ( ("nvidia-cudnn-cu12==9.7.1.26", "tensorrt==10.12.0.36") - if cu == "12" else - ("nvidia-cudnn-cu11==8.9.7.29", "tensorrt==9.0.1.post11.dev4") + if cu == "12" + else ("nvidia-cudnn-cu11==8.9.7.29", "tensorrt==9.0.1.post11.dev4") ) if not is_installed(trt_package): run_pip(f"install {cudnn_package} --no-cache-dir") run_pip(f"install --extra-index-url https://pypi.nvidia.com {trt_package} --no-cache-dir") if not is_installed("polygraphy"): - run_pip( - "install polygraphy==0.49.24 --extra-index-url https://pypi.ngc.nvidia.com" - ) + run_pip("install polygraphy==0.49.24 --extra-index-url https://pypi.ngc.nvidia.com") if not is_installed("onnx_graphsurgeon"): + run_pip("install onnx-graphsurgeon==0.5.8 --extra-index-url https://pypi.ngc.nvidia.com") + if platform.system() == "Windows" and not is_installed("pywin32"): + run_pip("install pywin32==306") + if platform.system() == "Windows" and not is_installed("triton"): + run_pip("install triton-windows==3.4.0.post21") + + # Pin onnx 1.18 + onnxruntime-gpu 1.24 together: + # - onnx 1.18 exports IR 11; modelopt needs FLOAT4E2M1 added in 1.18 + # - onnx 1.19+ exports IR 12 (ORT 1.24 max) and removes float32_to_bfloat16 (onnx-gs needs it) + # - onnxruntime-gpu 1.24 supports IR 11; never co-install CPU onnxruntime (shared files conflict) + run_pip("install onnx==1.18.0 onnxruntime-gpu==1.24.3 --no-cache-dir") + + # FP8 quantization dependencies (CUDA 12 only) + # nvidia-modelopt requires cupy; pin cupy 13.x + numpy<2 for mediapipe compat + if cu == "12": run_pip( - "install onnx-graphsurgeon==0.5.8 --extra-index-url https://pypi.ngc.nvidia.com" - ) - if platform.system() == 'Windows' and not is_installed("pywin32"): - run_pip( - "install pywin32==306" - ) - if platform.system() == 'Windows' and not is_installed("triton"): - run_pip( - "install triton-windows==3.4.0.post21" + 'install "nvidia-modelopt[onnx]" "cupy-cuda12x==13.6.0" "numpy==1.26.4"' + " --no-cache-dir" ) diff --git a/src/streamdiffusion/wrapper.py b/src/streamdiffusion/wrapper.py index cd41efc4..47604f1a 100644 --- a/src/streamdiffusion/wrapper.py +++ b/src/streamdiffusion/wrapper.py @@ -350,6 +350,11 @@ def __init__( seed=seed, ) + # Offload text encoders to CPU after initial encoding to free ~1.6 GB VRAM (SDXL). + # They are reloaded on-demand before each prompt re-encoding call. + if acceleration == "tensorrt": + self._offload_text_encoders() + # Set wrapper reference on parameter updater so it can access pipeline structure self.stream._param_updater.wrapper = self @@ -413,13 +418,17 @@ def prepare( # Handle both single prompt and prompt blending if isinstance(prompt, str): # Single prompt mode (legacy interface) - self.stream.prepare( - prompt, - negative_prompt, - num_inference_steps=num_inference_steps, - guidance_scale=guidance_scale, - delta=delta, - ) + self._reload_text_encoders() + try: + self.stream.prepare( + prompt, + negative_prompt, + num_inference_steps=num_inference_steps, + guidance_scale=guidance_scale, + delta=delta, + ) + finally: + self._offload_text_encoders() # Apply seed blending if provided if seed_list is not None: @@ -435,15 +444,20 @@ def prepare( # Prepare with first prompt to initialize the pipeline first_prompt = prompt[0][0] - self.stream.prepare( - first_prompt, - negative_prompt, - num_inference_steps=num_inference_steps, - guidance_scale=guidance_scale, - delta=delta, - ) + self._reload_text_encoders() + try: + self.stream.prepare( + first_prompt, + negative_prompt, + num_inference_steps=num_inference_steps, + guidance_scale=guidance_scale, + delta=delta, + ) + finally: + self._offload_text_encoders() # Then apply prompt blending (and seed blending if provided) + # update_stream_params handles its own reload/offload self.update_stream_params( prompt_list=prompt, negative_prompt=negative_prompt, @@ -455,6 +469,31 @@ def prepare( else: raise TypeError(f"prepare: prompt must be str or List[Tuple[str, float]], got {type(prompt)}") + def _offload_text_encoders(self) -> None: + """Move text encoders to CPU to free VRAM (~1.6 GB for SDXL). + + Called automatically after initial prepare() when using TRT acceleration. + Text encoders are reloaded to GPU before each prompt re-encoding call. + """ + pipe = self.stream.pipe + if hasattr(pipe, "text_encoder") and pipe.text_encoder is not None: + if next(pipe.text_encoder.parameters(), None) is not None: + pipe.text_encoder = pipe.text_encoder.to("cpu") + if hasattr(pipe, "text_encoder_2") and pipe.text_encoder_2 is not None: + if next(pipe.text_encoder_2.parameters(), None) is not None: + pipe.text_encoder_2 = pipe.text_encoder_2.to("cpu") + torch.cuda.empty_cache() + logger.debug("[VRAM] Text encoders offloaded to CPU") + + def _reload_text_encoders(self) -> None: + """Move text encoders back to GPU before prompt re-encoding.""" + pipe = self.stream.pipe + if hasattr(pipe, "text_encoder") and pipe.text_encoder is not None: + pipe.text_encoder = pipe.text_encoder.to(self.device) + if hasattr(pipe, "text_encoder_2") and pipe.text_encoder_2 is not None: + pipe.text_encoder_2 = pipe.text_encoder_2.to(self.device) + logger.debug("[VRAM] Text encoders reloaded to GPU") + def update_prompt( self, prompt: Union[str, List[Tuple[str, float]]], @@ -501,8 +540,12 @@ def update_prompt( # Clear the blending caches to avoid conflicts self.stream._param_updater.clear_caches() - # Use the legacy single prompt update - self.stream.update_prompt(prompt) + # Reload text encoders to GPU for re-encoding, then offload when done. + self._reload_text_encoders() + try: + self.stream.update_prompt(prompt) + finally: + self._offload_text_encoders() elif isinstance(prompt, list): # Prompt blending mode @@ -513,7 +556,7 @@ def update_prompt( if len(current_prompts) <= 1 and warn_about_conflicts: logger.warning("update_prompt: Switching from single prompt to prompt blending mode.") - # Apply prompt blending + # Apply prompt blending (update_stream_params handles reload/offload internally) self.update_stream_params( prompt_list=prompt, negative_prompt=negative_prompt, @@ -598,29 +641,37 @@ def update_stream_params( safety_checker_threshold : Optional[float] The threshold for the safety checker. """ - # Handle all parameters via parameter updater (including ControlNet) - self.stream._param_updater.update_stream_params( - num_inference_steps=num_inference_steps, - guidance_scale=guidance_scale, - delta=delta, - t_index_list=t_index_list, - seed=seed, - prompt_list=prompt_list, - negative_prompt=negative_prompt, - prompt_interpolation_method=prompt_interpolation_method, - seed_list=seed_list, - seed_interpolation_method=seed_interpolation_method, - normalize_prompt_weights=normalize_prompt_weights, - normalize_seed_weights=normalize_seed_weights, - controlnet_config=controlnet_config, - ipadapter_config=ipadapter_config, - image_preprocessing_config=image_preprocessing_config, - image_postprocessing_config=image_postprocessing_config, - latent_preprocessing_config=latent_preprocessing_config, - latent_postprocessing_config=latent_postprocessing_config, - cache_maxframes=cache_maxframes, - cache_interval=cache_interval, - ) + # Reload text encoders to GPU if a new prompt needs encoding. + needs_encoding = prompt_list is not None or negative_prompt is not None + if needs_encoding: + self._reload_text_encoders() + try: + # Handle all parameters via parameter updater (including ControlNet) + self.stream._param_updater.update_stream_params( + num_inference_steps=num_inference_steps, + guidance_scale=guidance_scale, + delta=delta, + t_index_list=t_index_list, + seed=seed, + prompt_list=prompt_list, + negative_prompt=negative_prompt, + prompt_interpolation_method=prompt_interpolation_method, + seed_list=seed_list, + seed_interpolation_method=seed_interpolation_method, + normalize_prompt_weights=normalize_prompt_weights, + normalize_seed_weights=normalize_seed_weights, + controlnet_config=controlnet_config, + ipadapter_config=ipadapter_config, + image_preprocessing_config=image_preprocessing_config, + image_postprocessing_config=image_postprocessing_config, + latent_preprocessing_config=latent_preprocessing_config, + latent_postprocessing_config=latent_postprocessing_config, + cache_maxframes=cache_maxframes, + cache_interval=cache_interval, + ) + finally: + if needs_encoding: + self._offload_text_encoders() if use_safety_checker is not None: self.use_safety_checker = use_safety_checker and (self._acceleration == "tensorrt") if safety_checker_threshold is not None: @@ -1202,7 +1253,15 @@ def _load_model( self._is_sdxl = is_sdxl logger.info(f"_load_model: Detected model type: {model_type} (confidence: {confidence:.2f})") - + + # Auto-resolve IP-Adapter model/encoder paths for detected architecture. + # Runs once here so both pre-TRT and post-TRT installation paths see the resolved cfg. + if use_ipadapter and ipadapter_config: + from streamdiffusion.modules.ipadapter_module import resolve_ipadapter_paths + _ip_cfgs = ipadapter_config if isinstance(ipadapter_config, list) else [ipadapter_config] + for _ip_cfg in _ip_cfgs: + resolve_ipadapter_paths(_ip_cfg, model_type, is_sdxl) + # DEPRECATED: THIS WILL LOAD LCM_LORA IF USE_LCM_LORA IS TRUE # Validate backwards compatibility LCM LoRA selection using proper model detection if hasattr(self, 'use_lcm_lora') and self.use_lcm_lora is not None: @@ -1534,74 +1593,90 @@ def _load_model( # CRITICAL: Install IPAdapter module BEFORE TensorRT compilation to ensure processors are baked into engines if use_ipadapter and ipadapter_config and not hasattr(stream, '_ipadapter_module'): - try: - from streamdiffusion.modules.ipadapter_module import IPAdapterModule, IPAdapterConfig, IPAdapterType - logger.info("Installing IPAdapter module before TensorRT compilation...") - - # Snapshot processors before install — IPAdapter.set_ip_adapter() replaces them - # before load_state_dict(), so a failure leaves the UNet in corrupted state - _saved_unet_processors = {name: proc for name, proc in stream.unet.attn_processors.items()} - - # Use first config if list provided - cfg = ipadapter_config[0] if isinstance(ipadapter_config, list) else ipadapter_config - ip_cfg = IPAdapterConfig( - style_image_key=cfg.get('style_image_key') or 'ipadapter_main', - num_image_tokens=cfg.get('num_image_tokens', 4), - ipadapter_model_path=cfg['ipadapter_model_path'], - image_encoder_path=cfg['image_encoder_path'], - style_image=cfg.get('style_image'), - scale=cfg.get('scale', 1.0), - type=IPAdapterType(cfg.get('type', "regular")), - insightface_model_name=cfg.get('insightface_model_name'), + # Check if auto-resolution disabled IP-Adapter (e.g. no adapter released for this arch) + _cfg_check = ipadapter_config[0] if isinstance(ipadapter_config, list) else ipadapter_config + if _cfg_check.get('enabled', True) is False: + logger.info( + "IP-Adapter disabled by auto-resolution (no compatible adapter for this model). Skipping." ) - ip_module = IPAdapterModule(ip_cfg) - ip_module.install(stream) - # Expose for later updates - stream._ipadapter_module = ip_module - logger.info("IPAdapter module installed successfully before TensorRT compilation") - - # Cleanup after IPAdapter installation - import gc - gc.collect() - torch.cuda.empty_cache() - torch.cuda.synchronize() - - except torch.cuda.OutOfMemoryError as oom_error: - logger.error(f"CUDA Out of Memory during early IPAdapter installation: {oom_error}") - logger.error("Try reducing batch size, using smaller models, or increasing GPU memory") - raise RuntimeError("Insufficient VRAM for IPAdapter installation. Consider using a GPU with more memory or reducing model complexity.") - - except RuntimeError as rt_error: - if "size mismatch" in str(rt_error): - unet_dim = getattr(getattr(stream, 'unet', None), 'config', None) - unet_cross_attn = getattr(unet_dim, 'cross_attention_dim', 'unknown') if unet_dim else 'unknown' + use_ipadapter_trt = False + else: + try: + from streamdiffusion.modules.ipadapter_module import IPAdapterModule, IPAdapterConfig, IPAdapterType + logger.info("Installing IPAdapter module before TensorRT compilation...") + + # Snapshot processors before install — IPAdapter.set_ip_adapter() replaces them + # before load_state_dict(), so a failure leaves the UNet in corrupted state + _saved_unet_processors = {name: proc for name, proc in stream.unet.attn_processors.items()} + + # Use first config if list provided + cfg = ipadapter_config[0] if isinstance(ipadapter_config, list) else ipadapter_config + ip_cfg = IPAdapterConfig( + style_image_key=cfg.get('style_image_key') or 'ipadapter_main', + num_image_tokens=cfg.get('num_image_tokens', 4), + ipadapter_model_path=cfg['ipadapter_model_path'], + image_encoder_path=cfg['image_encoder_path'], + style_image=cfg.get('style_image'), + scale=cfg.get('scale', 1.0), + type=IPAdapterType(cfg.get('type', "regular")), + insightface_model_name=cfg.get('insightface_model_name'), + ) + ip_module = IPAdapterModule(ip_cfg) + ip_module.install(stream) + # Expose for later updates + stream._ipadapter_module = ip_module + logger.info("IPAdapter module installed successfully before TensorRT compilation") + + # Cleanup after IPAdapter installation + import gc + gc.collect() + torch.cuda.empty_cache() + torch.cuda.synchronize() + + except torch.cuda.OutOfMemoryError as oom_error: + logger.error(f"CUDA Out of Memory during early IPAdapter installation: {oom_error}") + logger.error("Try reducing batch size, using smaller models, or increasing GPU memory") + raise RuntimeError("Insufficient VRAM for IPAdapter installation. Consider using a GPU with more memory or reducing model complexity.") + + except RuntimeError as rt_error: + if "size mismatch" in str(rt_error): + unet_dim = getattr(getattr(stream, 'unet', None), 'config', None) + unet_cross_attn = getattr(unet_dim, 'cross_attention_dim', 'unknown') if unet_dim else 'unknown' + logger.warning( + f"IP-Adapter weights are incompatible with this model " + f"(UNet cross_attention_dim={unet_cross_attn}). " + f"Checkpoint dimension does not match — this may be a custom model path " + f"that could not be auto-resolved. " + f"Check ipadapter_model_path in td_config.yaml. " + f"Skipping IP-Adapter and continuing without it." + ) + # Restore original processors — IPAdapter.set_ip_adapter() already replaced + # them before load_state_dict() failed, leaving the UNet in a corrupted state + try: + stream.unet.set_attn_processor(_saved_unet_processors) + logger.info("Restored original UNet attention processors after IP-Adapter failure.") + except Exception as restore_err: + logger.warning(f"Could not restore UNet processors: {restore_err}") + use_ipadapter_trt = False + else: + import traceback + traceback.print_exc() + logger.error("Failed to install IPAdapterModule before TensorRT compilation") + raise + + except Exception as e: + import traceback + traceback.print_exc() logger.warning( - f"IP-Adapter weights are incompatible with this model " - f"(UNet cross_attention_dim={unet_cross_attn}). " - f"Checkpoint dimension does not match. " - f"SD-Turbo is SD2.1-based (dim=1024) — use h94/IP-Adapter/models/ip-adapter_sd21.bin " - f"or disable IP-Adapter in td_config.yaml. " - f"Skipping IP-Adapter and continuing without it." + f"Failed to install IPAdapterModule: {e}. " + f"Continuing without IP-Adapter." ) - # Restore original processors — IPAdapter.set_ip_adapter() already replaced - # them before load_state_dict() failed, leaving the UNet in a corrupted state try: stream.unet.set_attn_processor(_saved_unet_processors) logger.info("Restored original UNet attention processors after IP-Adapter failure.") except Exception as restore_err: logger.warning(f"Could not restore UNet processors: {restore_err}") use_ipadapter_trt = False - else: - import traceback - traceback.print_exc() - logger.error("Failed to install IPAdapterModule before TensorRT compilation") - raise - - except Exception: - import traceback - traceback.print_exc() - logger.error("Failed to install IPAdapterModule before TensorRT compilation") - raise # NOTE: When IPAdapter is enabled, we must pass num_ip_layers. We cannot know it until after # installing processors in the export wrapper. We construct the wrapper first to discover it,