From 9dcf745b5e482da63c6a021a2ece936e75065ea2 Mon Sep 17 00:00:00 2001 From: Patrick Heneise Date: Sat, 21 Mar 2026 00:20:56 -0600 Subject: [PATCH 1/2] fix: remove memory_efficient_threshold incompatible with MLX 0.22+ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The `memory_efficient_threshold` parameter was removed from `scaled_dot_product_attention` in MLX 0.22+. Remove it from the two SDPA call sites in mmdit.py to restore compatibility. Also guard the `DiffusionKitInferenceContext.os_spec()` call in `__init__.py` — it raises an IndexError on macOS when the system profiler output format doesn't match the expected tab layout, crashing generation after the image is already decoded. Co-Authored-By: Claude Sonnet 4.6 --- python/src/diffusionkit/mlx/__init__.py | 13 ++++++++----- python/src/diffusionkit/mlx/mmdit.py | 10 ---------- 2 files changed, 8 insertions(+), 15 deletions(-) diff --git a/python/src/diffusionkit/mlx/__init__.py b/python/src/diffusionkit/mlx/__init__.py index 3d41ab3..9daed24 100644 --- a/python/src/diffusionkit/mlx/__init__.py +++ b/python/src/diffusionkit/mlx/__init__.py @@ -510,11 +510,14 @@ def generate_image( logger.info(f"Peak memory: {log['peak_memory']:.1f}GB") logger.info("============= Inference Context =============") - ic = DiffusionKitInferenceContext() - logger.info("Operating System:") - pprint(ic.os_spec()) - logger.info("Device:") - pprint(ic.device_spec()) + try: + ic = DiffusionKitInferenceContext() + logger.info("Operating System:") + pprint(ic.os_spec()) + logger.info("Device:") + pprint(ic.device_spec()) + except Exception: + pass # unload VAE Decoder model after decoding in low memory mode if self.low_memory_mode: diff --git a/python/src/diffusionkit/mlx/mmdit.py b/python/src/diffusionkit/mlx/mmdit.py index 3394466..3e8eb5c 100644 --- a/python/src/diffusionkit/mlx/mmdit.py +++ b/python/src/diffusionkit/mlx/mmdit.py @@ -633,11 +633,6 @@ def rearrange_for_sdpa(t): multimodal_sdpa_inputs["k"], positional_encodings ) - if self.config.low_memory_mode: - multimodal_sdpa_inputs[ - "memory_efficient_threshold" - ] = SDPA_FLASH_ATTN_THRESHOLD - # Compute multi-modal SDPA sdpa_outputs = ( self.sdpa(**multimodal_sdpa_inputs) @@ -726,11 +721,6 @@ def rearrange_for_sdpa(t): multimodal_sdpa_inputs["k"], positional_encodings ) - if self.config.low_memory_mode: - multimodal_sdpa_inputs[ - "memory_efficient_threshold" - ] = SDPA_FLASH_ATTN_THRESHOLD - # Compute multi-modal SDPA sdpa_outputs = ( self.sdpa(**multimodal_sdpa_inputs) From a5635ead68c8c70d9c1763345e5c8c22ff04c912 Mon Sep 17 00:00:00 2001 From: Patrick Heneise Date: Sat, 21 Mar 2026 00:28:20 -0600 Subject: [PATCH 2/2] docs: update login command from huggingface-cli to hf Co-Authored-By: Claude Sonnet 4.6 --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 9ecb5a7..a664afd 100644 --- a/README.md +++ b/README.md @@ -43,7 +43,7 @@ Once you accept the terms, sign in with your Hugging Face hub READ token as belo > If using a fine-grained token, it is also necessary to [edit permissions](https://huggingface.co/settings/tokens) to allow `Read access to contents of all public gated repos you can access` ```bash -huggingface-cli login --token YOUR_HF_HUB_TOKEN +hf auth login --token YOUR_HF_HUB_TOKEN ```