diff --git a/llama_cpp/server/app.py b/llama_cpp/server/app.py index 5120f24162..6118ab9f4b 100644 --- a/llama_cpp/server/app.py +++ b/llama_cpp/server/app.py @@ -139,8 +139,8 @@ def create_app( ) app.add_middleware( CORSMiddleware, - allow_origins=["*"], - allow_credentials=True, + allow_origins=server_settings.cors_allow_origins, + allow_credentials="*" not in server_settings.cors_allow_origins, allow_methods=["*"], allow_headers=["*"], ) diff --git a/llama_cpp/server/model.py b/llama_cpp/server/model.py index 11bd363b56..0790b616da 100644 --- a/llama_cpp/server/model.py +++ b/llama_cpp/server/model.py @@ -198,9 +198,10 @@ def load_llama_from_model_settings(settings: ModelSettings) -> llama_cpp.Llama: assert ( settings.hf_tokenizer_config_path is not None ), "hf_tokenizer_config_path must be set for hf-tokenizer-config" - chat_handler = llama_cpp.llama_chat_format.hf_tokenizer_config_to_chat_completion_handler( - json.load(open(settings.hf_tokenizer_config_path)) - ) + with open(settings.hf_tokenizer_config_path) as f: + chat_handler = llama_cpp.llama_chat_format.hf_tokenizer_config_to_chat_completion_handler( + json.load(f) + ) tokenizer: Optional[llama_cpp.BaseLlamaTokenizer] = None if settings.hf_pretrained_model_name_or_path is not None: diff --git a/llama_cpp/server/settings.py b/llama_cpp/server/settings.py index 13c9512419..fc09248f7d 100644 --- a/llama_cpp/server/settings.py +++ b/llama_cpp/server/settings.py @@ -216,6 +216,10 @@ class ServerSettings(BaseSettings): default=None, description="API key for authentication. If set all requests need to be authenticated.", ) + cors_allow_origins: List[str] = Field( + default=["*"], + description="Allowed CORS origins. Use ['*'] to allow all origins (not recommended for production).", + ) interrupt_requests: bool = Field( default=True, description="Whether to interrupt requests when a new request is received.",