diff --git a/README.md b/README.md index 9ecb5a7..a664afd 100644 --- a/README.md +++ b/README.md @@ -43,7 +43,7 @@ Once you accept the terms, sign in with your Hugging Face hub READ token as belo > If using a fine-grained token, it is also necessary to [edit permissions](https://huggingface.co/settings/tokens) to allow `Read access to contents of all public gated repos you can access` ```bash -huggingface-cli login --token YOUR_HF_HUB_TOKEN +hf auth login --token YOUR_HF_HUB_TOKEN ``` diff --git a/python/src/diffusionkit/mlx/__init__.py b/python/src/diffusionkit/mlx/__init__.py index 3d41ab3..9daed24 100644 --- a/python/src/diffusionkit/mlx/__init__.py +++ b/python/src/diffusionkit/mlx/__init__.py @@ -510,11 +510,14 @@ def generate_image( logger.info(f"Peak memory: {log['peak_memory']:.1f}GB") logger.info("============= Inference Context =============") - ic = DiffusionKitInferenceContext() - logger.info("Operating System:") - pprint(ic.os_spec()) - logger.info("Device:") - pprint(ic.device_spec()) + try: + ic = DiffusionKitInferenceContext() + logger.info("Operating System:") + pprint(ic.os_spec()) + logger.info("Device:") + pprint(ic.device_spec()) + except Exception: + pass # unload VAE Decoder model after decoding in low memory mode if self.low_memory_mode: diff --git a/python/src/diffusionkit/mlx/mmdit.py b/python/src/diffusionkit/mlx/mmdit.py index 3394466..3e8eb5c 100644 --- a/python/src/diffusionkit/mlx/mmdit.py +++ b/python/src/diffusionkit/mlx/mmdit.py @@ -633,11 +633,6 @@ def rearrange_for_sdpa(t): multimodal_sdpa_inputs["k"], positional_encodings ) - if self.config.low_memory_mode: - multimodal_sdpa_inputs[ - "memory_efficient_threshold" - ] = SDPA_FLASH_ATTN_THRESHOLD - # Compute multi-modal SDPA sdpa_outputs = ( self.sdpa(**multimodal_sdpa_inputs) @@ -726,11 +721,6 @@ def rearrange_for_sdpa(t): multimodal_sdpa_inputs["k"], positional_encodings ) - if self.config.low_memory_mode: - multimodal_sdpa_inputs[ - "memory_efficient_threshold" - ] = SDPA_FLASH_ATTN_THRESHOLD - # Compute multi-modal SDPA sdpa_outputs = ( self.sdpa(**multimodal_sdpa_inputs)