argmaxinc · PatrickHeneise · Mar 21, 2026 · Mar 21, 2026
diff --git a/README.md b/README.md
@@ -43,7 +43,7 @@ Once you accept the terms, sign in with your Hugging Face hub READ token as belo
 > If using a fine-grained token, it is also necessary to [edit permissions](https://huggingface.co/settings/tokens) to allow `Read access to contents of all public gated repos you can access`
 
 ```bash
-huggingface-cli login --token YOUR_HF_HUB_TOKEN
+hf auth login --token YOUR_HF_HUB_TOKEN
 ```
 
 </details>

diff --git a/python/src/diffusionkit/mlx/__init__.py b/python/src/diffusionkit/mlx/__init__.py
@@ -510,11 +510,14 @@ def generate_image(
             logger.info(f"Peak memory: {log['peak_memory']:.1f}GB")
 
             logger.info("============= Inference Context =============")
-            ic = DiffusionKitInferenceContext()
-            logger.info("Operating System:")
-            pprint(ic.os_spec())
-            logger.info("Device:")
-            pprint(ic.device_spec())
+            try:
+                ic = DiffusionKitInferenceContext()
+                logger.info("Operating System:")
+                pprint(ic.os_spec())
+                logger.info("Device:")
+                pprint(ic.device_spec())
+            except Exception:
+                pass
 
         # unload VAE Decoder model after decoding in low memory mode
         if self.low_memory_mode:

diff --git a/python/src/diffusionkit/mlx/mmdit.py b/python/src/diffusionkit/mlx/mmdit.py
@@ -633,11 +633,6 @@ def rearrange_for_sdpa(t):
                 multimodal_sdpa_inputs["k"], positional_encodings
             )
 
-        if self.config.low_memory_mode:
-            multimodal_sdpa_inputs[
-                "memory_efficient_threshold"
-            ] = SDPA_FLASH_ATTN_THRESHOLD
-
         # Compute multi-modal SDPA
         sdpa_outputs = (
             self.sdpa(**multimodal_sdpa_inputs)
@@ -726,11 +721,6 @@ def rearrange_for_sdpa(t):
                 multimodal_sdpa_inputs["k"], positional_encodings
             )
 
-        if self.config.low_memory_mode:
-            multimodal_sdpa_inputs[
-                "memory_efficient_threshold"
-            ] = SDPA_FLASH_ATTN_THRESHOLD
-
         # Compute multi-modal SDPA
         sdpa_outputs = (
             self.sdpa(**multimodal_sdpa_inputs)