From 9dcf745b5e482da63c6a021a2ece936e75065ea2 Mon Sep 17 00:00:00 2001
From: Patrick Heneise <patrick@nevados.solar>
Date: Sat, 21 Mar 2026 00:20:56 -0600
Subject: [PATCH 1/2] fix: remove memory_efficient_threshold incompatible with
 MLX 0.22+
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The `memory_efficient_threshold` parameter was removed from
`scaled_dot_product_attention` in MLX 0.22+. Remove it from the
two SDPA call sites in mmdit.py to restore compatibility.

Also guard the `DiffusionKitInferenceContext.os_spec()` call in
`__init__.py` — it raises an IndexError on macOS when the system
profiler output format doesn't match the expected tab layout,
crashing generation after the image is already decoded.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 python/src/diffusionkit/mlx/__init__.py | 13 ++++++++-----
 python/src/diffusionkit/mlx/mmdit.py    | 10 ----------
 2 files changed, 8 insertions(+), 15 deletions(-)
diff --git a/python/src/diffusionkit/mlx/__init__.py b/python/src/diffusionkit/mlx/__init__.py
index 3d41ab3..9daed24 100644
--- a/python/src/diffusionkit/mlx/__init__.py
+++ b/python/src/diffusionkit/mlx/__init__.py
@@ -510,11 +510,14 @@ def generate_image(
             logger.info(f"Peak memory: {log['peak_memory']:.1f}GB")
 
             logger.info("============= Inference Context =============")
-            ic = DiffusionKitInferenceContext()
-            logger.info("Operating System:")
-            pprint(ic.os_spec())
-            logger.info("Device:")
-            pprint(ic.device_spec())
+            try:
+                ic = DiffusionKitInferenceContext()
+                logger.info("Operating System:")
+                pprint(ic.os_spec())
+                logger.info("Device:")
+                pprint(ic.device_spec())
+            except Exception:
+                pass
 
         # unload VAE Decoder model after decoding in low memory mode
         if self.low_memory_mode:
diff --git a/python/src/diffusionkit/mlx/mmdit.py b/python/src/diffusionkit/mlx/mmdit.py
index 3394466..3e8eb5c 100644
--- a/python/src/diffusionkit/mlx/mmdit.py
+++ b/python/src/diffusionkit/mlx/mmdit.py
@@ -633,11 +633,6 @@ def rearrange_for_sdpa(t):
                 multimodal_sdpa_inputs["k"], positional_encodings
             )
 
-        if self.config.low_memory_mode:
-            multimodal_sdpa_inputs[
-                "memory_efficient_threshold"
-            ] = SDPA_FLASH_ATTN_THRESHOLD
-
         # Compute multi-modal SDPA
         sdpa_outputs = (
             self.sdpa(**multimodal_sdpa_inputs)
@@ -726,11 +721,6 @@ def rearrange_for_sdpa(t):
                 multimodal_sdpa_inputs["k"], positional_encodings
             )
 
-        if self.config.low_memory_mode:
-            multimodal_sdpa_inputs[
-                "memory_efficient_threshold"
-            ] = SDPA_FLASH_ATTN_THRESHOLD
-
         # Compute multi-modal SDPA
         sdpa_outputs = (
             self.sdpa(**multimodal_sdpa_inputs)

From a5635ead68c8c70d9c1763345e5c8c22ff04c912 Mon Sep 17 00:00:00 2001
From: Patrick Heneise <patrick@nevados.solar>
Date: Sat, 21 Mar 2026 00:28:20 -0600
Subject: [PATCH 2/2] docs: update login command from huggingface-cli to hf

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 9ecb5a7..a664afd 100644
--- a/README.md
+++ b/README.md
@@ -43,7 +43,7 @@ Once you accept the terms, sign in with your Hugging Face hub READ token as belo
 > If using a fine-grained token, it is also necessary to [edit permissions](https://huggingface.co/settings/tokens) to allow `Read access to contents of all public gated repos you can access`
 
 ```bash
-huggingface-cli login --token YOUR_HF_HUB_TOKEN
+hf auth login --token YOUR_HF_HUB_TOKEN
 ```
 
 </details>