diff --git a/Install_StreamDiffusion.bat b/Install_StreamDiffusion.bat new file mode 100644 index 00000000..22d1a5ab --- /dev/null +++ b/Install_StreamDiffusion.bat @@ -0,0 +1,43 @@ +@echo off +echo ======================================== +echo StreamDiffusionTD v0.3.1 Installation +echo Daydream Fork with StreamV2V +echo ======================================== +echo. + +:: Prerequisite checks +echo Checking prerequisites... + +py -3.11 --version >nul 2>&1 +if errorlevel 1 ( + echo ERROR: Python 3.11 not found via py launcher. + echo Install Python 3.11 from https://python.org and ensure the py launcher is available. + pause + exit /b 1 +) + +git --version >nul 2>&1 +if errorlevel 1 ( + echo ERROR: Git not found in PATH. + echo Install Git from https://git-scm.com/ (required for pip git+ packages). + pause + exit /b 1 +) + +where cl.exe >nul 2>&1 +if errorlevel 1 ( + echo WARNING: C++ compiler (cl.exe) not found. Some packages may require it to build. + echo If installation fails, install Visual Studio Build Tools from: + echo https://visualstudio.microsoft.com/visual-cpp-build-tools/ + echo. +) + +echo Prerequisites OK. Starting installation... +echo. + +cd /d "%~dp0" +cd StreamDiffusion-installer + +py -3.11 -m sd_installer --base-folder "%~dp0." install --cuda cu128 --no-cache + +pause diff --git a/Install_TensorRT.bat b/Install_TensorRT.bat new file mode 100644 index 00000000..f269db7b --- /dev/null +++ b/Install_TensorRT.bat @@ -0,0 +1,34 @@ +@echo off +echo ======================================== +echo StreamDiffusionTD TensorRT Installation +echo ======================================== +echo. + +cd /d "%~dp0" + +:: Check venv exists before trying to activate +if not exist "venv\Scripts\activate.bat" ( + echo ERROR: Virtual environment not found at venv\Scripts\activate.bat + echo Run Install_StreamDiffusion.bat first to create the environment. + pause + exit /b 1 +) + +echo Activating virtual environment... +call "venv\Scripts\activate.bat" + +if "%VIRTUAL_ENV%" == "" ( + echo ERROR: Failed to activate virtual environment. + pause + exit /b 1 +) +echo Virtual environment activated: %VIRTUAL_ENV% + +echo. +echo Installing TensorRT via CLI... +cd StreamDiffusion-installer +python -m sd_installer install-tensorrt + +echo. +echo TensorRT installation finished +pause diff --git a/Start_StreamDiffusion.bat b/Start_StreamDiffusion.bat new file mode 100644 index 00000000..50d5164d --- /dev/null +++ b/Start_StreamDiffusion.bat @@ -0,0 +1,14 @@ +@echo off +cd /d %~dp0 + +:: Load runtime environment variables if set_env.bat exists +if exist "%~dp0set_env.bat" call "%~dp0set_env.bat" + +if exist venv ( + call venv\Scripts\activate.bat + venv\Scripts\python.exe StreamDiffusionTD\td_main.py +) else ( + call .venv\Scripts\activate.bat + .venv\Scripts\python.exe StreamDiffusionTD\td_main.py +) +pause diff --git a/StreamDiffusion-installer b/StreamDiffusion-installer new file mode 160000 index 00000000..24a5693b --- /dev/null +++ b/StreamDiffusion-installer @@ -0,0 +1 @@ +Subproject commit 24a5693b07868fd679111b4dd2de5ddc753a2cc0 diff --git a/StreamDiffusionTD/install_tensorrt.py b/StreamDiffusionTD/install_tensorrt.py new file mode 100644 index 00000000..5f169ae3 --- /dev/null +++ b/StreamDiffusionTD/install_tensorrt.py @@ -0,0 +1,157 @@ +""" +Standalone TensorRT installation script for StreamDiffusionTD +This is a self-contained version that doesn't rely on the streamdiffusion package imports + +Version pins aligned with sd_installer/tensorrt.py and src/streamdiffusion/tools/install-tensorrt.py +""" + +import platform +import subprocess +import sys +from typing import Optional + +# Canonical version pins — keep in sync with sd_installer/tensorrt.py +TENSORRT_PINS = { + "cu12": { + "cudnn": "nvidia-cudnn-cu12==9.7.1.26", + "tensorrt": "tensorrt==10.12.0.36", + }, + "cu11": { + "cudnn": "nvidia-cudnn-cu11==8.9.7.29", + "tensorrt": "tensorrt==9.0.1.post11.dev4", + }, + "polygraphy": "polygraphy==0.49.26", + "onnx_graphsurgeon": "onnx-graphsurgeon==0.5.8", + "pywin32": "pywin32==311", + "triton_windows": "triton-windows==3.4.0.post21", +} + + +def run_pip(command: str): + """Run pip command with proper error handling""" + return subprocess.check_call([sys.executable, "-m", "pip"] + command.split()) + + +def is_installed(package_name: str) -> bool: + """Check if a package is installed""" + try: + __import__(package_name.replace("-", "_")) + return True + except ImportError: + return False + + +def version(package_name: str) -> Optional[str]: + """Get version of installed package""" + try: + import importlib.metadata + return importlib.metadata.version(package_name) + except Exception: + return None + + +def get_cuda_version_from_torch() -> Optional[str]: + try: + import torch + except ImportError: + return None + + cuda_version = torch.version.cuda + if cuda_version: + # Return full version like "12.8" for better detection + major_minor = ".".join(cuda_version.split(".")[:2]) + return major_minor + return None + + +def install(cu: Optional[str] = None): + if cu is None: + cu = get_cuda_version_from_torch() + + if cu is None: + print("Could not detect CUDA version. Please specify manually.") + return + + print(f"Detected CUDA version: {cu}") + print("Installing TensorRT requirements...") + + # Determine CUDA major version for package selection + cuda_major = cu.split(".")[0] if cu else "12" + cuda_version_float = float(cu) if cu else 12.0 + + # Uninstall old TensorRT versions (anything below 10.8) + if is_installed("tensorrt"): + current_version_str = version("tensorrt") + if current_version_str: + try: + from packaging.version import Version + needs_uninstall = Version(current_version_str) < Version("10.8.0") + except ImportError: + # packaging not available - compare by major version + try: + major = int(current_version_str.split(".")[0]) + needs_uninstall = major < 10 + except (ValueError, IndexError): + needs_uninstall = False + if needs_uninstall: + print("Uninstalling old TensorRT version...") + run_pip("uninstall -y tensorrt") + + if cuda_major == "12": + pins = TENSORRT_PINS["cu12"] + if cuda_version_float >= 12.8: + print("Installing TensorRT 10.12+ for CUDA 12.8+ (Blackwell GPU support)...") + else: + print("Installing TensorRT for CUDA 12.x...") + + cudnn_name = pins["cudnn"] + tensorrt_pkg = pins["tensorrt"] + + print(f"Installing cuDNN: {cudnn_name}") + run_pip(f"install {cudnn_name} --no-cache-dir") + + print(f"Installing TensorRT for CUDA {cu}: {tensorrt_pkg}") + run_pip(f"install --extra-index-url https://pypi.nvidia.com {tensorrt_pkg} --no-cache-dir") + + elif cuda_major == "11": + pins = TENSORRT_PINS["cu11"] + print("Installing TensorRT for CUDA 11.x...") + + cudnn_name = pins["cudnn"] + tensorrt_pkg = pins["tensorrt"] + + print(f"Installing cuDNN: {cudnn_name}") + run_pip(f"install {cudnn_name} --no-cache-dir") + + print(f"Installing TensorRT for CUDA {cu}: {tensorrt_pkg}") + run_pip( + f"install --pre --extra-index-url https://pypi.nvidia.com {tensorrt_pkg} --no-cache-dir" + ) + else: + print(f"Unsupported CUDA version: {cu}") + print("Supported versions: CUDA 11.x, 12.x") + return + + # Install additional TensorRT tools (pinned versions) + if not is_installed("polygraphy"): + print("Installing polygraphy...") + run_pip( + f"install {TENSORRT_PINS['polygraphy']} --extra-index-url https://pypi.ngc.nvidia.com --no-cache-dir" + ) + if not is_installed("onnx_graphsurgeon"): + print("Installing onnx-graphsurgeon...") + run_pip( + f"install {TENSORRT_PINS['onnx_graphsurgeon']} --extra-index-url https://pypi.ngc.nvidia.com --no-cache-dir" + ) + if platform.system() == "Windows" and not is_installed("pywin32"): + print("Installing pywin32...") + run_pip(f"install {TENSORRT_PINS['pywin32']} --no-cache-dir") + if platform.system() == "Windows" and not is_installed("triton"): + print("Installing triton-windows...") + run_pip(f"install {TENSORRT_PINS['triton_windows']} --no-cache-dir") + + print("TensorRT installation completed successfully!") + + +if __name__ == "__main__": + install() diff --git a/set_env.bat b/set_env.bat new file mode 100644 index 00000000..519f3124 --- /dev/null +++ b/set_env.bat @@ -0,0 +1,22 @@ +@echo off +:: StreamDiffusionTD Runtime Environment Variables +:: Called automatically by Start_StreamDiffusion.bat if this file exists. +:: Edit values here to tune GPU memory and CUDA behavior. + +:: Reduce CUDA memory fragmentation (required for large models at 512x512+) +set PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:128,expandable_segments:True + +:: Lazy CUDA module loading — speeds up startup, reduces VRAM footprint +set CUDA_MODULE_LOADING=LAZY + +:: L2 cache persistence (Ampere+ only, compute 8.0+) +:: Set to "0" to disable. Default: "1" (enabled, 64 MB reserved) +set SDTD_L2_PERSIST=1 +set SDTD_L2_PERSIST_MB=64 + +:: HuggingFace offline mode — set to "1" to use cached models only (no downloads) +:: set HF_HUB_OFFLINE=1 +:: set TRANSFORMERS_OFFLINE=1 + +:: Uncomment to override CUDA version detected by setup.py (e.g., for CI) +:: set STREAMDIFFUSION_CUDA_VERSION=12.8 diff --git a/setup.py b/setup.py index 4255f52c..d226b9d5 100644 --- a/setup.py +++ b/setup.py @@ -4,11 +4,11 @@ from setuptools import find_packages, setup + # Copied from pip_utils.py to avoid import def _check_torch_installed(): try: import torch - import torchvision except Exception: msg = ( "Missing required pre-installed packages: torch, torchvision\n" @@ -19,16 +19,18 @@ def _check_torch_installed(): raise RuntimeError(msg) if not torch.version.cuda: - raise RuntimeError("Detected CPU-only PyTorch. Install CUDA-enabled torch/vision/audio before installing this package.") + raise RuntimeError( + "Detected CPU-only PyTorch. Install CUDA-enabled torch/vision/audio before installing this package." + ) def get_cuda_constraint(): - cuda_version = os.environ.get("STREAMDIFFUSION_CUDA_VERSION") or \ - os.environ.get("CUDA_VERSION") + cuda_version = os.environ.get("STREAMDIFFUSION_CUDA_VERSION") or os.environ.get("CUDA_VERSION") if not cuda_version: try: import torch + cuda_version = torch.version.cuda except Exception: # might not be available during wheel build, so we have to ignore @@ -53,15 +55,14 @@ def get_cuda_constraint(): "transformers==4.56.0", "accelerate==1.13.0", "huggingface_hub==0.35.0", - "Pillow>=12.1.1", # CVE-2026-25990: out-of-bounds write in PSD loading + "Pillow>=12.2.0", # CVE-2026-25990: out-of-bounds write in PSD loading; 12.2.0 verified "fire==0.7.1", "omegaconf==2.3.0", - "onnx==1.18.0", # onnx-graphsurgeon 0.5.8 requires onnx.helper.float32_to_bfloat16 (removed in onnx 1.19+) - "onnxruntime==1.24.3", - "onnxruntime-gpu==1.24.3", + "onnx==1.18.0", # IR 11 — modelopt needs FLOAT4E2M1 (added in 1.18); float32_to_bfloat16 present (removed in 1.19+) + "onnxruntime-gpu==1.24.4", # TRT EP, supports IR 11; never co-install CPU onnxruntime — shared files conflict "polygraphy==0.49.26", "protobuf>=4.25.8,<5", # mediapipe 0.10.21 requires protobuf 4.x; 4.25.8 fixes CVE-2025-4565; CVE-2026-0994 (JSON DoS) accepted risk for local pipeline - "colored==2.3.1", + "colored==2.3.2", "pywin32==311;sys_platform == 'win32'", "onnx-graphsurgeon==0.5.8", "controlnet-aux==0.0.10", @@ -82,7 +83,9 @@ def deps_list(*pkgs): extras = {} extras["xformers"] = deps_list("xformers") extras["torch"] = deps_list("torch", "accelerate") -extras["tensorrt"] = deps_list("protobuf", "cuda-python", "onnx", "onnxruntime", "onnxruntime-gpu", "colored", "polygraphy", "onnx-graphsurgeon") +extras["tensorrt"] = deps_list( + "protobuf", "cuda-python", "onnx", "onnxruntime-gpu", "colored", "polygraphy", "onnx-graphsurgeon" +) extras["controlnet"] = deps_list("onnx-graphsurgeon", "controlnet-aux") extras["ipadapter"] = deps_list("diffusers-ipadapter", "mediapipe", "insightface") diff --git a/src/streamdiffusion/pip_utils.py b/src/streamdiffusion/pip_utils.py index 9395c548..4a28c0a0 100644 --- a/src/streamdiffusion/pip_utils.py +++ b/src/streamdiffusion/pip_utils.py @@ -17,7 +17,6 @@ def _check_torch_installed(): try: import torch - import torchvision # type: ignore except Exception: msg = ( "Missing required pre-installed packages: torch, torchvision\n" @@ -28,13 +27,16 @@ def _check_torch_installed(): raise RuntimeError(msg) if not torch.version.cuda: - raise RuntimeError("Detected CPU-only PyTorch. Install CUDA-enabled torch/vision/audio before installing this package.") + raise RuntimeError( + "Detected CPU-only PyTorch. Install CUDA-enabled torch/vision/audio before installing this package." + ) def get_cuda_version() -> str | None: _check_torch_installed() import torch + return torch.version.cuda @@ -67,7 +69,7 @@ def is_installed(package: str) -> bool: def run_python(command: str, env: Dict[str, str] | None = None) -> str: run_kwargs = { - "args": f"\"{python}\" {command}", + "args": f'"{python}" {command}', "shell": True, "env": os.environ if env is None else env, "encoding": "utf8", diff --git a/src/streamdiffusion/tools/install-tensorrt.py b/src/streamdiffusion/tools/install-tensorrt.py index 46ea28b4..696960f1 100644 --- a/src/streamdiffusion/tools/install-tensorrt.py +++ b/src/streamdiffusion/tools/install-tensorrt.py @@ -1,10 +1,10 @@ +import platform from typing import Literal, Optional import fire from packaging.version import Version -from ..pip_utils import is_installed, run_pip, version, get_cuda_major -import platform +from ..pip_utils import get_cuda_major, is_installed, run_pip, version def install(cu: Optional[Literal["11", "12"]] = get_cuda_major()): @@ -20,28 +20,34 @@ def install(cu: Optional[Literal["11", "12"]] = get_cuda_major()): cudnn_package, trt_package = ( ("nvidia-cudnn-cu12==9.7.1.26", "tensorrt==10.12.0.36") - if cu == "12" else - ("nvidia-cudnn-cu11==8.9.7.29", "tensorrt==9.0.1.post11.dev4") + if cu == "12" + else ("nvidia-cudnn-cu11==8.9.7.29", "tensorrt==9.0.1.post11.dev4") ) if not is_installed(trt_package): run_pip(f"install {cudnn_package} --no-cache-dir") run_pip(f"install --extra-index-url https://pypi.nvidia.com {trt_package} --no-cache-dir") if not is_installed("polygraphy"): - run_pip( - "install polygraphy==0.49.24 --extra-index-url https://pypi.ngc.nvidia.com" - ) + run_pip("install polygraphy==0.49.26 --extra-index-url https://pypi.ngc.nvidia.com") if not is_installed("onnx_graphsurgeon"): + run_pip("install onnx-graphsurgeon==0.5.8 --extra-index-url https://pypi.ngc.nvidia.com") + if platform.system() == "Windows" and not is_installed("pywin32"): + run_pip("install pywin32==311") + if platform.system() == "Windows" and not is_installed("triton"): + run_pip("install triton-windows==3.4.0.post21") + + # Pin onnx 1.18 + onnxruntime-gpu 1.24 together: + # - onnx 1.18 exports IR 11; modelopt needs FLOAT4E2M1 added in 1.18 + # - onnx 1.19+ exports IR 12 (ORT 1.24 max) and removes float32_to_bfloat16 (onnx-gs needs it) + # - onnxruntime-gpu 1.24 supports IR 11; never co-install CPU onnxruntime (shared files conflict) + run_pip("install onnx==1.18.0 onnxruntime-gpu==1.24.3 --no-cache-dir") + + # FP8 quantization dependencies (CUDA 12 only) + # nvidia-modelopt requires cupy; pin cupy 13.x + numpy<2 for mediapipe compat + if cu == "12": run_pip( - "install onnx-graphsurgeon==0.5.8 --extra-index-url https://pypi.ngc.nvidia.com" - ) - if platform.system() == 'Windows' and not is_installed("pywin32"): - run_pip( - "install pywin32==306" - ) - if platform.system() == 'Windows' and not is_installed("triton"): - run_pip( - "install triton-windows==3.4.0.post21" + 'install "nvidia-modelopt[onnx]" "cupy-cuda12x==13.6.0" "numpy==1.26.4"' + " --no-cache-dir" )