From 9678c7b747ccbbccd6948f1e7a9c4066c7242f3e Mon Sep 17 00:00:00 2001 From: Kevin Turcios Date: Thu, 19 Mar 2026 01:34:03 -0600 Subject: [PATCH] mem: disable ONNX mem_pattern and cpu_mem_arena to reduce idle memory MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Set enable_mem_pattern=False and enable_cpu_mem_arena=False on SessionOptions for both YoloX and Detectron2 ONNX sessions. These flags control pre-allocation strategies that trade memory for speed on repeated inference. With both disabled, peak memory drops ~36% (553→351 MB) on the YoloX model with negligible latency impact. --- CHANGELOG.md | 3 +++ unstructured_inference/models/detectron2onnx.py | 5 +++++ unstructured_inference/models/yolox.py | 5 +++++ 3 files changed, 13 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index e9b58ffd..ce576053 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,9 @@ - Store routing in LayoutElement +### Enhancement +- Disable ONNX Runtime memory pattern and CPU memory arena on YoloX and Detectron2 sessions to reduce idle memory after inference + ## 1.5.2 ### Fix diff --git a/unstructured_inference/models/detectron2onnx.py b/unstructured_inference/models/detectron2onnx.py index 79cd0a1a..650846a3 100644 --- a/unstructured_inference/models/detectron2onnx.py +++ b/unstructured_inference/models/detectron2onnx.py @@ -115,8 +115,13 @@ def initialize( ] providers = [provider for provider in ordered_providers if provider in available_providers] + sess_options = onnxruntime.SessionOptions() + sess_options.enable_mem_pattern = False + sess_options.enable_cpu_mem_arena = False + self.model = onnxruntime.InferenceSession( model_path, + sess_options=sess_options, providers=providers, ) self.model_path = model_path diff --git a/unstructured_inference/models/yolox.py b/unstructured_inference/models/yolox.py index 932242ec..3b2cee19 100644 --- a/unstructured_inference/models/yolox.py +++ b/unstructured_inference/models/yolox.py @@ -80,8 +80,13 @@ def initialize(self, model_path: str, label_map: dict): ] providers = [provider for provider in ordered_providers if provider in available_providers] + sess_options = onnxruntime.SessionOptions() + sess_options.enable_mem_pattern = False + sess_options.enable_cpu_mem_arena = False + self.model = onnxruntime.InferenceSession( model_path, + sess_options=sess_options, providers=providers, )