diff --git a/StreamDiffusionTD/install_tensorrt.py b/StreamDiffusionTD/install_tensorrt.py
new file mode 100644
index 00000000..2d140a40
--- /dev/null
+++ b/StreamDiffusionTD/install_tensorrt.py
@@ -0,0 +1,165 @@
+"""
+Standalone TensorRT installation script for StreamDiffusionTD
+This is a self-contained version that doesn't rely on the streamdiffusion package imports
+"""
+
+import platform
+import subprocess
+import sys
+from typing import Optional
+
+
+def run_pip(command: str):
+    """Run pip command with proper error handling"""
+    return subprocess.check_call([sys.executable, "-m", "pip"] + command.split())
+
+
+def is_installed(package_name: str) -> bool:
+    """Check if a package is installed"""
+    try:
+        __import__(package_name.replace("-", "_"))
+        return True
+    except ImportError:
+        return False
+
+
+def version(package_name: str) -> Optional[str]:
+    """Get version of installed package"""
+    try:
+        import importlib.metadata
+
+        return importlib.metadata.version(package_name)
+    except:
+        return None
+
+
+def get_cuda_version_from_torch() -> Optional[str]:
+    try:
+        import torch
+    except ImportError:
+        return None
+
+    cuda_version = torch.version.cuda
+    if cuda_version:
+        # Return full version like "12.8" for better detection
+        major_minor = ".".join(cuda_version.split(".")[:2])
+        return major_minor
+    return None
+
+
+def install(cu: Optional[str] = None):
+    if cu is None:
+        cu = get_cuda_version_from_torch()
+
+    if cu is None:
+        print("Could not detect CUDA version. Please specify manually.")
+        return
+
+    print(f"Detected CUDA version: {cu}")
+    print("Installing TensorRT requirements...")
+
+    # Determine CUDA major version for package selection
+    cuda_major = cu.split(".")[0] if cu else "12"
+    cuda_version_float = float(cu) if cu else 12.0
+
+    # Skip nvidia-pyindex - it's broken with pip 25.3+ and not actually needed
+    # The NVIDIA index is already accessible via pip config or environment variables
+
+    # Uninstall old TensorRT versions
+    if is_installed("tensorrt"):
+        current_version_str = version("tensorrt")
+        if current_version_str:
+            try:
+                from packaging.version import Version
+
+                current_version = Version(current_version_str)
+                if current_version < Version("10.8.0"):
+                    print("Uninstalling old TensorRT version...")
+                    run_pip("uninstall -y tensorrt")
+            except:
+                # If packaging is not available, check version string directly
+                if current_version_str.startswith("9."):
+                    print("Uninstalling old TensorRT version...")
+                    run_pip("uninstall -y tensorrt")
+
+    # For CUDA 12.8+ (RTX 5090/Blackwell support), use TensorRT 10.8+
+    if cuda_version_float >= 12.8:
+        print("Installing TensorRT 10.8+ for CUDA 12.8+ (Blackwell GPU support)...")
+
+        # Install cuDNN 9 for CUDA 12
+        cudnn_name = "nvidia-cudnn-cu12"
+        print(f"Installing cuDNN: {cudnn_name}")
+        run_pip(f"install {cudnn_name} --no-cache-dir")
+
+        # Install TensorRT for CUDA 12 (RTX 5090/Blackwell support)
+        tensorrt_version = "tensorrt-cu12"
+        print(f"Installing TensorRT for CUDA {cu}: {tensorrt_version}")
+        run_pip(f"install {tensorrt_version} --no-cache-dir")
+
+    elif cuda_major == "12":
+        print("Installing TensorRT for CUDA 12.x...")
+
+        # Install cuDNN for CUDA 12
+        cudnn_name = "nvidia-cudnn-cu12"
+        print(f"Installing cuDNN: {cudnn_name}")
+        run_pip(f"install {cudnn_name} --no-cache-dir")
+
+        # Install TensorRT for CUDA 12
+        tensorrt_version = "tensorrt-cu12"
+        print(f"Installing TensorRT for CUDA {cu}: {tensorrt_version}")
+        run_pip(f"install {tensorrt_version} --no-cache-dir")
+
+    elif cuda_major == "11":
+        print("Installing TensorRT for CUDA 11.x...")
+
+        # Install cuDNN for CUDA 11
+        cudnn_name = "nvidia-cudnn-cu11==8.9.4.25"
+        print(f"Installing cuDNN: {cudnn_name}")
+        run_pip(f"install {cudnn_name} --no-cache-dir")
+
+        # Install TensorRT for CUDA 11
+        tensorrt_version = "tensorrt==9.0.1.post11.dev4"
+        print(f"Installing TensorRT for CUDA {cu}: {tensorrt_version}")
+        run_pip(
+            f"install --pre --extra-index-url https://pypi.nvidia.com {tensorrt_version} --no-cache-dir"
+        )
+    else:
+        print(f"Unsupported CUDA version: {cu}")
+        print("Supported versions: CUDA 11.x, 12.x")
+        return
+
+    # Install additional TensorRT tools
+    if not is_installed("polygraphy"):
+        print("Installing polygraphy...")
+        run_pip(
+            "install polygraphy==0.49.24 --extra-index-url https://pypi.ngc.nvidia.com --no-cache-dir"
+        )
+    if not is_installed("onnx_graphsurgeon"):
+        print("Installing onnx-graphsurgeon...")
+        run_pip(
+            "install onnx-graphsurgeon==0.5.8 --extra-index-url https://pypi.ngc.nvidia.com --no-cache-dir"
+        )
+    if platform.system() == "Windows" and not is_installed("pywin32"):
+        print("Installing pywin32...")
+        run_pip("install pywin32==306 --no-cache-dir")
+
+    # Pin onnx 1.18 + onnxruntime-gpu 1.24 together:
+    #   - onnx 1.18 exports IR 11; modelopt needs FLOAT4E2M1 added in 1.18
+    #   - onnx 1.19+ exports IR 12 (ORT 1.24 max) and removes float32_to_bfloat16 (onnx-gs needs it)
+    #   - onnxruntime-gpu 1.24 supports IR 11; never co-install CPU onnxruntime (shared files conflict)
+    print("Pinning onnx==1.18.0 + onnxruntime-gpu==1.24.3...")
+    run_pip("install onnx==1.18.0 onnxruntime-gpu==1.24.3 --no-cache-dir")
+
+    # FP8 quantization dependencies (CUDA 12 only)
+    # nvidia-modelopt requires cupy; pin cupy 13.x + numpy<2 for mediapipe compat
+    if cuda_major == "12":
+        print("Installing FP8 quantization dependencies (nvidia-modelopt, cupy, numpy)...")
+        run_pip(
+            'install "nvidia-modelopt[onnx]" "cupy-cuda12x==13.6.0" "numpy==1.26.4" --no-cache-dir'
+        )
+
+    print("TensorRT installation completed successfully!")
+
+
+if __name__ == "__main__":
+    install()
diff --git a/setup.py b/setup.py
index 4255f52c..e4d2973a 100644
--- a/setup.py
+++ b/setup.py
@@ -4,11 +4,11 @@
 
 from setuptools import find_packages, setup
 
+
 # Copied from pip_utils.py to avoid import
 def _check_torch_installed():
     try:
         import torch
-        import torchvision
     except Exception:
         msg = (
             "Missing required pre-installed packages: torch, torchvision\n"
@@ -19,16 +19,18 @@ def _check_torch_installed():
         raise RuntimeError(msg)
 
     if not torch.version.cuda:
-        raise RuntimeError("Detected CPU-only PyTorch. Install CUDA-enabled torch/vision/audio before installing this package.")
+        raise RuntimeError(
+            "Detected CPU-only PyTorch. Install CUDA-enabled torch/vision/audio before installing this package."
+        )
 
 
 def get_cuda_constraint():
-    cuda_version = os.environ.get("STREAMDIFFUSION_CUDA_VERSION") or \
-                    os.environ.get("CUDA_VERSION")
+    cuda_version = os.environ.get("STREAMDIFFUSION_CUDA_VERSION") or os.environ.get("CUDA_VERSION")
 
     if not cuda_version:
         try:
             import torch
+
             cuda_version = torch.version.cuda
         except Exception:
             # might not be available during wheel build, so we have to ignore
@@ -56,10 +58,9 @@ def get_cuda_constraint():
     "Pillow>=12.1.1",  # CVE-2026-25990: out-of-bounds write in PSD loading
     "fire==0.7.1",
     "omegaconf==2.3.0",
-    "onnx==1.18.0",  # onnx-graphsurgeon 0.5.8 requires onnx.helper.float32_to_bfloat16 (removed in onnx 1.19+)
-    "onnxruntime==1.24.3",
-    "onnxruntime-gpu==1.24.3",
-    "polygraphy==0.49.26",
+    "onnx==1.18.0",  # IR 11 — modelopt needs FLOAT4E2M1 (added in 1.18); float32_to_bfloat16 present (removed in 1.19+)
+    "onnxruntime-gpu==1.24.3",  # TRT EP, supports IR 11; never co-install CPU onnxruntime — shared files conflict
+    "polygraphy==0.49.24",
     "protobuf>=4.25.8,<5",  # mediapipe 0.10.21 requires protobuf 4.x; 4.25.8 fixes CVE-2025-4565; CVE-2026-0994 (JSON DoS) accepted risk for local pipeline
     "colored==2.3.1",
     "pywin32==311;sys_platform == 'win32'",
@@ -82,7 +83,9 @@ def deps_list(*pkgs):
 extras = {}
 extras["xformers"] = deps_list("xformers")
 extras["torch"] = deps_list("torch", "accelerate")
-extras["tensorrt"] = deps_list("protobuf", "cuda-python", "onnx", "onnxruntime", "onnxruntime-gpu", "colored", "polygraphy", "onnx-graphsurgeon")
+extras["tensorrt"] = deps_list(
+    "protobuf", "cuda-python", "onnx", "onnxruntime-gpu", "colored", "polygraphy", "onnx-graphsurgeon"
+)
 extras["controlnet"] = deps_list("onnx-graphsurgeon", "controlnet-aux")
 extras["ipadapter"] = deps_list("diffusers-ipadapter", "mediapipe", "insightface")
 
diff --git a/src/streamdiffusion/modules/ipadapter_module.py b/src/streamdiffusion/modules/ipadapter_module.py
index b886c2a0..b283799f 100644
--- a/src/streamdiffusion/modules/ipadapter_module.py
+++ b/src/streamdiffusion/modules/ipadapter_module.py
@@ -1,7 +1,7 @@
 from __future__ import annotations
 
 from dataclasses import dataclass
-from typing import Optional, Tuple, Any
+from typing import Dict, Optional, Tuple, Any
 from enum import Enum
 import torch
 
@@ -40,6 +40,158 @@ class IPAdapterConfig:
     insightface_model_name: Optional[str] = None
 
 
+# ---------------------------------------------------------------------------
+# IP-Adapter model path mapping by base model architecture and adapter type
+# ---------------------------------------------------------------------------
+# None means the variant is unavailable for that architecture — callers fall
+# back to REGULAR automatically.
+IPADAPTER_MODEL_MAP: Dict[tuple, Optional[Dict[str, str]]] = {
+    ("SD1.5", IPAdapterType.REGULAR): {
+        "model_path": "h94/IP-Adapter/models/ip-adapter_sd15.bin",
+        "image_encoder_path": "h94/IP-Adapter/models/image_encoder",
+    },
+    ("SD1.5", IPAdapterType.PLUS): {
+        "model_path": "h94/IP-Adapter/models/ip-adapter-plus_sd15.safetensors",
+        "image_encoder_path": "h94/IP-Adapter/models/image_encoder",
+    },
+    ("SD1.5", IPAdapterType.FACEID): {
+        "model_path": "h94/IP-Adapter-FaceID/ip-adapter-faceid_sd15.bin",
+        "image_encoder_path": "h94/IP-Adapter/models/image_encoder",
+    },
+    ("SD2.1", IPAdapterType.REGULAR): None,  # not available from h94 (ip-adapter_sd21.bin was never released)
+    ("SD2.1", IPAdapterType.PLUS): None,    # not available from h94
+    ("SD2.1", IPAdapterType.FACEID): None,  # not available from h94
+    ("SDXL", IPAdapterType.REGULAR): {
+        "model_path": "h94/IP-Adapter/sdxl_models/ip-adapter_sdxl.bin",
+        "image_encoder_path": "h94/IP-Adapter/sdxl_models/image_encoder",
+    },
+    ("SDXL", IPAdapterType.PLUS): {
+        "model_path": "h94/IP-Adapter/sdxl_models/ip-adapter-plus_sdxl_vit-h.safetensors",
+        "image_encoder_path": "h94/IP-Adapter/sdxl_models/image_encoder",
+    },
+    ("SDXL", IPAdapterType.FACEID): {
+        "model_path": "h94/IP-Adapter-FaceID/ip-adapter-faceid_sdxl.bin",
+        "image_encoder_path": "h94/IP-Adapter/sdxl_models/image_encoder",
+    },
+}
+
+# Set of all known HF model paths — used to distinguish known vs custom paths.
+# Custom/local paths are never overridden.
+_KNOWN_IPADAPTER_PATHS: frozenset = frozenset(
+    entry["model_path"]
+    for entry in IPADAPTER_MODEL_MAP.values()
+    if entry is not None
+)
+
+_KNOWN_ENCODER_PATHS: frozenset = frozenset({
+    "h94/IP-Adapter/models/image_encoder",
+    "h94/IP-Adapter/sdxl_models/image_encoder",
+})
+
+
+def _normalize_model_type(detected_model_type: str, is_sdxl: bool) -> Optional[str]:
+    """Map model detection strings to IPADAPTER_MODEL_MAP keys."""
+    if is_sdxl:
+        return "SDXL"
+    return {
+        "SD1.5": "SD1.5",
+        "SD15": "SD1.5",
+        "SD2.1": "SD2.1",
+        "SD21": "SD2.1",
+        "SDXL": "SDXL",
+    }.get(detected_model_type)
+
+
+def resolve_ipadapter_paths(
+    cfg: Dict[str, Any],
+    detected_model_type: str,
+    is_sdxl: bool,
+) -> Dict[str, Any]:
+    """Validate and auto-resolve IP-Adapter model/encoder paths for the detected base model.
+
+    Mutates *cfg* in-place and returns it. Custom/local paths are never overridden.
+
+    Args:
+        cfg: Single IP-Adapter config dict (keys: ipadapter_model_path, image_encoder_path, type, ...).
+        detected_model_type: Value from detect_model() e.g. "SD1.5", "SD2.1", "SDXL".
+        is_sdxl: Whether the base model is SDXL-family (takes precedence over detected_model_type).
+
+    Returns:
+        The (potentially mutated) cfg dict.
+    """
+    current_model_path = cfg.get("ipadapter_model_path") or ""
+    current_encoder_path = cfg.get("image_encoder_path") or ""
+
+    # Parse adapter type, default to REGULAR
+    try:
+        adapter_type = IPAdapterType(cfg.get("type", "regular"))
+    except ValueError:
+        adapter_type = IPAdapterType.REGULAR
+
+    # Normalize to map key; unknown types are left unchanged
+    norm_type = _normalize_model_type(detected_model_type, is_sdxl)
+    if norm_type is None:
+        logger.warning(
+            f"IP-Adapter auto-resolution: unknown model type '{detected_model_type}' — "
+            f"cannot validate compatibility. Ensure ipadapter_model_path is correct for this model."
+        )
+        return cfg
+
+    # Custom/local path — respect it, only log info
+    if current_model_path and current_model_path not in _KNOWN_IPADAPTER_PATHS:
+        logger.info(
+            f"IP-Adapter: custom model path '{current_model_path}' — "
+            f"skipping auto-resolution (manual compatibility check required for {detected_model_type})."
+        )
+        return cfg
+
+    # Look up the correct entry for this architecture + type
+    target_entry = IPADAPTER_MODEL_MAP.get((norm_type, adapter_type))
+
+    # Variant unavailable for this architecture — fall back to REGULAR with warning
+    if target_entry is None:
+        logger.warning(
+            f"IP-Adapter type '{adapter_type.value}' is not available for {detected_model_type}. "
+            f"Falling back to 'regular' adapter type."
+        )
+        adapter_type = IPAdapterType.REGULAR
+        cfg["type"] = adapter_type.value
+        target_entry = IPADAPTER_MODEL_MAP.get((norm_type, adapter_type))
+
+    if target_entry is None:
+        logger.warning(
+            f"IP-Adapter: no compatible adapter exists for {detected_model_type} "
+            f"(type='{adapter_type.value}'). No IP-Adapter was released for this architecture. "
+            f"IP-Adapter will be disabled for this model."
+        )
+        cfg["enabled"] = False
+        return cfg
+
+    correct_model_path = target_entry["model_path"]
+    correct_encoder_path = target_entry["image_encoder_path"]
+
+    # Resolve model path
+    if current_model_path != correct_model_path:
+        logger.warning(
+            f"IP-Adapter auto-resolution: '{current_model_path}' is incompatible with "
+            f"{detected_model_type} (cross_attention_dim mismatch). "
+            f"Resolving to '{correct_model_path}'."
+        )
+        cfg["ipadapter_model_path"] = correct_model_path
+    else:
+        logger.info(f"IP-Adapter: '{current_model_path}' is compatible with {detected_model_type}.")
+
+    # Resolve encoder path (only if it's a known HF encoder — custom encoders untouched)
+    if current_encoder_path in _KNOWN_ENCODER_PATHS and current_encoder_path != correct_encoder_path:
+        logger.info(
+            f"IP-Adapter: resolving image encoder "
+            f"'{current_encoder_path}' → '{correct_encoder_path}'."
+        )
+        cfg["image_encoder_path"] = correct_encoder_path
+
+    return cfg
+
+
 class IPAdapterModule(OrchestratorUser):
     """IP-Adapter embedding hook provider.
 
diff --git a/src/streamdiffusion/tools/install-tensorrt.py b/src/streamdiffusion/tools/install-tensorrt.py
index 46ea28b4..116ac5bf 100644
--- a/src/streamdiffusion/tools/install-tensorrt.py
+++ b/src/streamdiffusion/tools/install-tensorrt.py
@@ -1,10 +1,10 @@
+import platform
 from typing import Literal, Optional
 
 import fire
 from packaging.version import Version
 
-from ..pip_utils import is_installed, run_pip, version, get_cuda_major
-import platform
+from ..pip_utils import get_cuda_major, is_installed, run_pip, version
 
 
 def install(cu: Optional[Literal["11", "12"]] = get_cuda_major()):
@@ -20,28 +20,34 @@ def install(cu: Optional[Literal["11", "12"]] = get_cuda_major()):
 
     cudnn_package, trt_package = (
         ("nvidia-cudnn-cu12==9.7.1.26", "tensorrt==10.12.0.36")
-        if cu == "12" else
-        ("nvidia-cudnn-cu11==8.9.7.29", "tensorrt==9.0.1.post11.dev4")
+        if cu == "12"
+        else ("nvidia-cudnn-cu11==8.9.7.29", "tensorrt==9.0.1.post11.dev4")
     )
     if not is_installed(trt_package):
         run_pip(f"install {cudnn_package} --no-cache-dir")
         run_pip(f"install --extra-index-url https://pypi.nvidia.com {trt_package} --no-cache-dir")
 
     if not is_installed("polygraphy"):
-        run_pip(
-            "install polygraphy==0.49.24 --extra-index-url https://pypi.ngc.nvidia.com"
-        )
+        run_pip("install polygraphy==0.49.24 --extra-index-url https://pypi.ngc.nvidia.com")
     if not is_installed("onnx_graphsurgeon"):
+        run_pip("install onnx-graphsurgeon==0.5.8 --extra-index-url https://pypi.ngc.nvidia.com")
+    if platform.system() == "Windows" and not is_installed("pywin32"):
+        run_pip("install pywin32==306")
+    if platform.system() == "Windows" and not is_installed("triton"):
+        run_pip("install triton-windows==3.4.0.post21")
+
+    # Pin onnx 1.18 + onnxruntime-gpu 1.24 together:
+    #   - onnx 1.18 exports IR 11; modelopt needs FLOAT4E2M1 added in 1.18
+    #   - onnx 1.19+ exports IR 12 (ORT 1.24 max) and removes float32_to_bfloat16 (onnx-gs needs it)
+    #   - onnxruntime-gpu 1.24 supports IR 11; never co-install CPU onnxruntime (shared files conflict)
+    run_pip("install onnx==1.18.0 onnxruntime-gpu==1.24.3 --no-cache-dir")
+
+    # FP8 quantization dependencies (CUDA 12 only)
+    # nvidia-modelopt requires cupy; pin cupy 13.x + numpy<2 for mediapipe compat
+    if cu == "12":
         run_pip(
-            "install onnx-graphsurgeon==0.5.8 --extra-index-url https://pypi.ngc.nvidia.com"
-        )
-    if platform.system() == 'Windows' and not is_installed("pywin32"):
-        run_pip(
-            "install pywin32==306"
-        )
-    if platform.system() == 'Windows' and not is_installed("triton"):
-        run_pip(
-            "install triton-windows==3.4.0.post21"
+            'install "nvidia-modelopt[onnx]" "cupy-cuda12x==13.6.0" "numpy==1.26.4"'
+            " --no-cache-dir"
         )
 
 
diff --git a/src/streamdiffusion/wrapper.py b/src/streamdiffusion/wrapper.py
index cd41efc4..47604f1a 100644
--- a/src/streamdiffusion/wrapper.py
+++ b/src/streamdiffusion/wrapper.py
@@ -350,6 +350,11 @@ def __init__(
             seed=seed,
         )
 
+        # Offload text encoders to CPU after initial encoding to free ~1.6 GB VRAM (SDXL).
+        # They are reloaded on-demand before each prompt re-encoding call.
+        if acceleration == "tensorrt":
+            self._offload_text_encoders()
+
         # Set wrapper reference on parameter updater so it can access pipeline structure
         self.stream._param_updater.wrapper = self
 
@@ -413,13 +418,17 @@ def prepare(
         # Handle both single prompt and prompt blending
         if isinstance(prompt, str):
             # Single prompt mode (legacy interface)
-            self.stream.prepare(
-                prompt,
-                negative_prompt,
-                num_inference_steps=num_inference_steps,
-                guidance_scale=guidance_scale,
-                delta=delta,
-            )
+            self._reload_text_encoders()
+            try:
+                self.stream.prepare(
+                    prompt,
+                    negative_prompt,
+                    num_inference_steps=num_inference_steps,
+                    guidance_scale=guidance_scale,
+                    delta=delta,
+                )
+            finally:
+                self._offload_text_encoders()
 
             # Apply seed blending if provided
             if seed_list is not None:
@@ -435,15 +444,20 @@ def prepare(
 
             # Prepare with first prompt to initialize the pipeline
             first_prompt = prompt[0][0]
-            self.stream.prepare(
-                first_prompt,
-                negative_prompt,
-                num_inference_steps=num_inference_steps,
-                guidance_scale=guidance_scale,
-                delta=delta,
-            )
+            self._reload_text_encoders()
+            try:
+                self.stream.prepare(
+                    first_prompt,
+                    negative_prompt,
+                    num_inference_steps=num_inference_steps,
+                    guidance_scale=guidance_scale,
+                    delta=delta,
+                )
+            finally:
+                self._offload_text_encoders()
 
             # Then apply prompt blending (and seed blending if provided)
+            # update_stream_params handles its own reload/offload
             self.update_stream_params(
                 prompt_list=prompt,
                 negative_prompt=negative_prompt,
@@ -455,6 +469,31 @@ def prepare(
         else:
             raise TypeError(f"prepare: prompt must be str or List[Tuple[str, float]], got {type(prompt)}")
 
+    def _offload_text_encoders(self) -> None:
+        """Move text encoders to CPU to free VRAM (~1.6 GB for SDXL).
+
+        Called automatically after initial prepare() when using TRT acceleration.
+        Text encoders are reloaded to GPU before each prompt re-encoding call.
+        """
+        pipe = self.stream.pipe
+        if hasattr(pipe, "text_encoder") and pipe.text_encoder is not None:
+            if next(pipe.text_encoder.parameters(), None) is not None:
+                pipe.text_encoder = pipe.text_encoder.to("cpu")
+        if hasattr(pipe, "text_encoder_2") and pipe.text_encoder_2 is not None:
+            if next(pipe.text_encoder_2.parameters(), None) is not None:
+                pipe.text_encoder_2 = pipe.text_encoder_2.to("cpu")
+        torch.cuda.empty_cache()
+        logger.debug("[VRAM] Text encoders offloaded to CPU")
+
+    def _reload_text_encoders(self) -> None:
+        """Move text encoders back to GPU before prompt re-encoding."""
+        pipe = self.stream.pipe
+        if hasattr(pipe, "text_encoder") and pipe.text_encoder is not None:
+            pipe.text_encoder = pipe.text_encoder.to(self.device)
+        if hasattr(pipe, "text_encoder_2") and pipe.text_encoder_2 is not None:
+            pipe.text_encoder_2 = pipe.text_encoder_2.to(self.device)
+        logger.debug("[VRAM] Text encoders reloaded to GPU")
+
     def update_prompt(
         self,
         prompt: Union[str, List[Tuple[str, float]]],
@@ -501,8 +540,12 @@ def update_prompt(
                 # Clear the blending caches to avoid conflicts
                 self.stream._param_updater.clear_caches()
 
-            # Use the legacy single prompt update
-            self.stream.update_prompt(prompt)
+            # Reload text encoders to GPU for re-encoding, then offload when done.
+            self._reload_text_encoders()
+            try:
+                self.stream.update_prompt(prompt)
+            finally:
+                self._offload_text_encoders()
 
         elif isinstance(prompt, list):
             # Prompt blending mode
@@ -513,7 +556,7 @@ def update_prompt(
             if len(current_prompts) <= 1 and warn_about_conflicts:
                 logger.warning("update_prompt: Switching from single prompt to prompt blending mode.")
 
-            # Apply prompt blending
+            # Apply prompt blending (update_stream_params handles reload/offload internally)
             self.update_stream_params(
                 prompt_list=prompt,
                 negative_prompt=negative_prompt,
@@ -598,29 +641,37 @@ def update_stream_params(
         safety_checker_threshold : Optional[float]
             The threshold for the safety checker.
         """
-        # Handle all parameters via parameter updater (including ControlNet)
-        self.stream._param_updater.update_stream_params(
-            num_inference_steps=num_inference_steps,
-            guidance_scale=guidance_scale,
-            delta=delta,
-            t_index_list=t_index_list,
-            seed=seed,
-            prompt_list=prompt_list,
-            negative_prompt=negative_prompt,
-            prompt_interpolation_method=prompt_interpolation_method,
-            seed_list=seed_list,
-            seed_interpolation_method=seed_interpolation_method,
-            normalize_prompt_weights=normalize_prompt_weights,
-            normalize_seed_weights=normalize_seed_weights,
-            controlnet_config=controlnet_config,
-            ipadapter_config=ipadapter_config,
-            image_preprocessing_config=image_preprocessing_config,
-            image_postprocessing_config=image_postprocessing_config,
-            latent_preprocessing_config=latent_preprocessing_config,
-            latent_postprocessing_config=latent_postprocessing_config,
-            cache_maxframes=cache_maxframes,
-            cache_interval=cache_interval,
-        )
+        # Reload text encoders to GPU if a new prompt needs encoding.
+        needs_encoding = prompt_list is not None or negative_prompt is not None
+        if needs_encoding:
+            self._reload_text_encoders()
+        try:
+            # Handle all parameters via parameter updater (including ControlNet)
+            self.stream._param_updater.update_stream_params(
+                num_inference_steps=num_inference_steps,
+                guidance_scale=guidance_scale,
+                delta=delta,
+                t_index_list=t_index_list,
+                seed=seed,
+                prompt_list=prompt_list,
+                negative_prompt=negative_prompt,
+                prompt_interpolation_method=prompt_interpolation_method,
+                seed_list=seed_list,
+                seed_interpolation_method=seed_interpolation_method,
+                normalize_prompt_weights=normalize_prompt_weights,
+                normalize_seed_weights=normalize_seed_weights,
+                controlnet_config=controlnet_config,
+                ipadapter_config=ipadapter_config,
+                image_preprocessing_config=image_preprocessing_config,
+                image_postprocessing_config=image_postprocessing_config,
+                latent_preprocessing_config=latent_preprocessing_config,
+                latent_postprocessing_config=latent_postprocessing_config,
+                cache_maxframes=cache_maxframes,
+                cache_interval=cache_interval,
+            )
+        finally:
+            if needs_encoding:
+                self._offload_text_encoders()
         if use_safety_checker is not None:
             self.use_safety_checker = use_safety_checker and (self._acceleration == "tensorrt")
         if safety_checker_threshold is not None:
@@ -1202,7 +1253,15 @@ def _load_model(
         self._is_sdxl = is_sdxl
         
         logger.info(f"_load_model: Detected model type: {model_type} (confidence: {confidence:.2f})")
-        
+
+        # Auto-resolve IP-Adapter model/encoder paths for detected architecture.
+        # Runs once here so both pre-TRT and post-TRT installation paths see the resolved cfg.
+        if use_ipadapter and ipadapter_config:
+            from streamdiffusion.modules.ipadapter_module import resolve_ipadapter_paths
+            _ip_cfgs = ipadapter_config if isinstance(ipadapter_config, list) else [ipadapter_config]
+            for _ip_cfg in _ip_cfgs:
+                resolve_ipadapter_paths(_ip_cfg, model_type, is_sdxl)
+
         # DEPRECATED: THIS WILL LOAD LCM_LORA IF USE_LCM_LORA IS TRUE
         # Validate backwards compatibility LCM LoRA selection using proper model detection
         if hasattr(self, 'use_lcm_lora') and self.use_lcm_lora is not None:
@@ -1534,74 +1593,90 @@ def _load_model(
                 
                 # CRITICAL: Install IPAdapter module BEFORE TensorRT compilation to ensure processors are baked into engines
                 if use_ipadapter and ipadapter_config and not hasattr(stream, '_ipadapter_module'):
-                    try:
-                        from streamdiffusion.modules.ipadapter_module import IPAdapterModule, IPAdapterConfig, IPAdapterType
-                        logger.info("Installing IPAdapter module before TensorRT compilation...")
-
-                        # Snapshot processors before install — IPAdapter.set_ip_adapter() replaces them
-                        # before load_state_dict(), so a failure leaves the UNet in corrupted state
-                        _saved_unet_processors = {name: proc for name, proc in stream.unet.attn_processors.items()}
-
-                        # Use first config if list provided
-                        cfg = ipadapter_config[0] if isinstance(ipadapter_config, list) else ipadapter_config
-                        ip_cfg = IPAdapterConfig(
-                            style_image_key=cfg.get('style_image_key') or 'ipadapter_main',
-                            num_image_tokens=cfg.get('num_image_tokens', 4),
-                            ipadapter_model_path=cfg['ipadapter_model_path'],
-                            image_encoder_path=cfg['image_encoder_path'],
-                            style_image=cfg.get('style_image'),
-                            scale=cfg.get('scale', 1.0),
-                            type=IPAdapterType(cfg.get('type', "regular")),
-                            insightface_model_name=cfg.get('insightface_model_name'),
+                    # Check if auto-resolution disabled IP-Adapter (e.g. no adapter released for this arch)
+                    _cfg_check = ipadapter_config[0] if isinstance(ipadapter_config, list) else ipadapter_config
+                    if _cfg_check.get('enabled', True) is False:
+                        logger.info(
+                            "IP-Adapter disabled by auto-resolution (no compatible adapter for this model). Skipping."
                         )
-                        ip_module = IPAdapterModule(ip_cfg)
-                        ip_module.install(stream)
-                        # Expose for later updates
-                        stream._ipadapter_module = ip_module
-                        logger.info("IPAdapter module installed successfully before TensorRT compilation")
-                        
-                        # Cleanup after IPAdapter installation
-                        import gc
-                        gc.collect()
-                        torch.cuda.empty_cache()
-                        torch.cuda.synchronize()
-                        
-                    except torch.cuda.OutOfMemoryError as oom_error:
-                        logger.error(f"CUDA Out of Memory during early IPAdapter installation: {oom_error}")
-                        logger.error("Try reducing batch size, using smaller models, or increasing GPU memory")
-                        raise RuntimeError("Insufficient VRAM for IPAdapter installation. Consider using a GPU with more memory or reducing model complexity.")
-
-                    except RuntimeError as rt_error:
-                        if "size mismatch" in str(rt_error):
-                            unet_dim = getattr(getattr(stream, 'unet', None), 'config', None)
-                            unet_cross_attn = getattr(unet_dim, 'cross_attention_dim', 'unknown') if unet_dim else 'unknown'
+                        use_ipadapter_trt = False
+                    else:
+                        try:
+                            from streamdiffusion.modules.ipadapter_module import IPAdapterModule, IPAdapterConfig, IPAdapterType
+                            logger.info("Installing IPAdapter module before TensorRT compilation...")
+
+                            # Snapshot processors before install — IPAdapter.set_ip_adapter() replaces them
+                            # before load_state_dict(), so a failure leaves the UNet in corrupted state
+                            _saved_unet_processors = {name: proc for name, proc in stream.unet.attn_processors.items()}
+
+                            # Use first config if list provided
+                            cfg = ipadapter_config[0] if isinstance(ipadapter_config, list) else ipadapter_config
+                            ip_cfg = IPAdapterConfig(
+                                style_image_key=cfg.get('style_image_key') or 'ipadapter_main',
+                                num_image_tokens=cfg.get('num_image_tokens', 4),
+                                ipadapter_model_path=cfg['ipadapter_model_path'],
+                                image_encoder_path=cfg['image_encoder_path'],
+                                style_image=cfg.get('style_image'),
+                                scale=cfg.get('scale', 1.0),
+                                type=IPAdapterType(cfg.get('type', "regular")),
+                                insightface_model_name=cfg.get('insightface_model_name'),
+                            )
+                            ip_module = IPAdapterModule(ip_cfg)
+                            ip_module.install(stream)
+                            # Expose for later updates
+                            stream._ipadapter_module = ip_module
+                            logger.info("IPAdapter module installed successfully before TensorRT compilation")
+
+                            # Cleanup after IPAdapter installation
+                            import gc
+                            gc.collect()
+                            torch.cuda.empty_cache()
+                            torch.cuda.synchronize()
+
+                        except torch.cuda.OutOfMemoryError as oom_error:
+                            logger.error(f"CUDA Out of Memory during early IPAdapter installation: {oom_error}")
+                            logger.error("Try reducing batch size, using smaller models, or increasing GPU memory")
+                            raise RuntimeError("Insufficient VRAM for IPAdapter installation. Consider using a GPU with more memory or reducing model complexity.")
+
+                        except RuntimeError as rt_error:
+                            if "size mismatch" in str(rt_error):
+                                unet_dim = getattr(getattr(stream, 'unet', None), 'config', None)
+                                unet_cross_attn = getattr(unet_dim, 'cross_attention_dim', 'unknown') if unet_dim else 'unknown'
+                                logger.warning(
+                                    f"IP-Adapter weights are incompatible with this model "
+                                    f"(UNet cross_attention_dim={unet_cross_attn}). "
+                                    f"Checkpoint dimension does not match — this may be a custom model path "
+                                    f"that could not be auto-resolved. "
+                                    f"Check ipadapter_model_path in td_config.yaml. "
+                                    f"Skipping IP-Adapter and continuing without it."
+                                )
+                                # Restore original processors — IPAdapter.set_ip_adapter() already replaced
+                                # them before load_state_dict() failed, leaving the UNet in a corrupted state
+                                try:
+                                    stream.unet.set_attn_processor(_saved_unet_processors)
+                                    logger.info("Restored original UNet attention processors after IP-Adapter failure.")
+                                except Exception as restore_err:
+                                    logger.warning(f"Could not restore UNet processors: {restore_err}")
+                                use_ipadapter_trt = False
+                            else:
+                                import traceback
+                                traceback.print_exc()
+                                logger.error("Failed to install IPAdapterModule before TensorRT compilation")
+                                raise
+
+                        except Exception as e:
+                            import traceback
+                            traceback.print_exc()
                             logger.warning(
-                                f"IP-Adapter weights are incompatible with this model "
-                                f"(UNet cross_attention_dim={unet_cross_attn}). "
-                                f"Checkpoint dimension does not match. "
-                                f"SD-Turbo is SD2.1-based (dim=1024) — use h94/IP-Adapter/models/ip-adapter_sd21.bin "
-                                f"or disable IP-Adapter in td_config.yaml. "
-                                f"Skipping IP-Adapter and continuing without it."
+                                f"Failed to install IPAdapterModule: {e}. "
+                                f"Continuing without IP-Adapter."
                             )
-                            # Restore original processors — IPAdapter.set_ip_adapter() already replaced
-                            # them before load_state_dict() failed, leaving the UNet in a corrupted state
                             try:
                                 stream.unet.set_attn_processor(_saved_unet_processors)
                                 logger.info("Restored original UNet attention processors after IP-Adapter failure.")
                             except Exception as restore_err:
                                 logger.warning(f"Could not restore UNet processors: {restore_err}")
                             use_ipadapter_trt = False
-                        else:
-                            import traceback
-                            traceback.print_exc()
-                            logger.error("Failed to install IPAdapterModule before TensorRT compilation")
-                            raise
-
-                    except Exception:
-                        import traceback
-                        traceback.print_exc()
-                        logger.error("Failed to install IPAdapterModule before TensorRT compilation")
-                        raise
 
                 # NOTE: When IPAdapter is enabled, we must pass num_ip_layers. We cannot know it until after
                 # installing processors in the export wrapper. We construct the wrapper first to discover it,