diff --git a/llama_cpp/server/app.py b/llama_cpp/server/app.py
index 5120f24162..6118ab9f4b 100644
--- a/llama_cpp/server/app.py
+++ b/llama_cpp/server/app.py
@@ -139,8 +139,8 @@ def create_app(
     )
     app.add_middleware(
         CORSMiddleware,
-        allow_origins=["*"],
-        allow_credentials=True,
+        allow_origins=server_settings.cors_allow_origins,
+        allow_credentials="*" not in server_settings.cors_allow_origins,
         allow_methods=["*"],
         allow_headers=["*"],
     )
diff --git a/llama_cpp/server/model.py b/llama_cpp/server/model.py
index 11bd363b56..0790b616da 100644
--- a/llama_cpp/server/model.py
+++ b/llama_cpp/server/model.py
@@ -198,9 +198,10 @@ def load_llama_from_model_settings(settings: ModelSettings) -> llama_cpp.Llama:
             assert (
                 settings.hf_tokenizer_config_path is not None
             ), "hf_tokenizer_config_path must be set for hf-tokenizer-config"
-            chat_handler = llama_cpp.llama_chat_format.hf_tokenizer_config_to_chat_completion_handler(
-                json.load(open(settings.hf_tokenizer_config_path))
-            )
+            with open(settings.hf_tokenizer_config_path) as f:
+                chat_handler = llama_cpp.llama_chat_format.hf_tokenizer_config_to_chat_completion_handler(
+                    json.load(f)
+                )
 
         tokenizer: Optional[llama_cpp.BaseLlamaTokenizer] = None
         if settings.hf_pretrained_model_name_or_path is not None:
diff --git a/llama_cpp/server/settings.py b/llama_cpp/server/settings.py
index 13c9512419..fc09248f7d 100644
--- a/llama_cpp/server/settings.py
+++ b/llama_cpp/server/settings.py
@@ -216,6 +216,10 @@ class ServerSettings(BaseSettings):
         default=None,
         description="API key for authentication. If set all requests need to be authenticated.",
     )
+    cors_allow_origins: List[str] = Field(
+        default=["*"],
+        description="Allowed CORS origins. Use ['*'] to allow all origins (not recommended for production).",
+    )
     interrupt_requests: bool = Field(
         default=True,
         description="Whether to interrupt requests when a new request is received.",