From 0f70e3e0cd90326b768bb6b212e47dd5987e20ce Mon Sep 17 00:00:00 2001 From: Xuan Son Nguyen Date: Thu, 1 Jan 2026 12:49:19 +0100 Subject: [PATCH 1/8] arg: support remote preset --- common/arg.cpp | 151 +++++++++++++++++++++++++++++--------------- common/download.cpp | 15 +++-- common/download.h | 6 ++ common/preset.cpp | 77 +++++++++++++++++++++- common/preset.h | 11 +++- docs/preset.md | 50 +++++++++++++++ 6 files changed, 253 insertions(+), 57 deletions(-) create mode 100644 docs/preset.md diff --git a/common/arg.cpp b/common/arg.cpp index 62d31393c43..4d4f91e96f5 100644 --- a/common/arg.cpp +++ b/common/arg.cpp @@ -6,6 +6,7 @@ #include "log.h" #include "sampling.h" #include "download.h" +#include "preset.h" // fix problem with std::min and std::max #if defined(_WIN32) @@ -268,6 +269,42 @@ static void parse_tensor_buffer_overrides(const std::string & value, std::vector } } +static std::string clean_file_name(const std::string & fname) { + std::string clean_fname = fname; + string_replace_all(clean_fname, "\\", "/"); + string_replace_all(clean_fname, "/", ""); + return clean_fname; +} + +static bool common_params_handle_remote_preset(common_params & params, llama_example ex) { + GGML_ASSERT(!params.model.hf_repo.empty()); + + const bool offline = params.offline; + std::string model_endpoint = get_model_endpoint(); + auto preset_url = model_endpoint + params.model.hf_repo + "/resolve/main/preset.ini"; + + // prepare local path for caching + auto preset_fname = clean_file_name(params.model.hf_repo + "_preset.ini"); + auto preset_path = fs_get_cache_file(preset_fname); + bool has_preset = common_download_file_single(preset_url, preset_path, params.hf_token, offline); + + // remote preset is optional, so we don't error out if not found + if (has_preset) { + LOG_INF("applying remote preset from %s\n", preset_url.c_str()); + common_preset_context ctx(ex, /* only_remote_allowed */ true); + common_preset global; // unused for now + auto remote_presets = ctx.load_from_ini(preset_path, global); + if (remote_presets.find(COMMON_PRESET_DEFAULT_NAME) != remote_presets.end()) { + common_preset & preset = remote_presets.at(COMMON_PRESET_DEFAULT_NAME); + preset.apply_to_params(params); + } else { + throw std::runtime_error("Remote preset.ini does not contain [" + std::string(COMMON_PRESET_DEFAULT_NAME) + "] section"); + } + } + + return has_preset; +} + struct handle_model_result { bool found_mmproj = false; common_params_model mmproj; @@ -309,9 +346,7 @@ static handle_model_result common_params_handle_model( // make sure model path is present (for caching purposes) if (model.path.empty()) { // this is to avoid different repo having same file name, or same file name in different subdirs - std::string filename = model.hf_repo + "_" + model.hf_file; - // to make sure we don't have any slashes in the filename - string_replace_all(filename, "/", "_"); + std::string filename = clean_file_name(model.hf_repo + "_" + model.hf_file); model.path = fs_get_cache_file(filename); } @@ -425,61 +460,75 @@ static bool common_params_parse_ex(int argc, char ** argv, common_params_context } }; - std::set seen_args; + auto parse_cli_args = [&]() { + std::set seen_args; - for (int i = 1; i < argc; i++) { - const std::string arg_prefix = "--"; + for (int i = 1; i < argc; i++) { + const std::string arg_prefix = "--"; - std::string arg = argv[i]; - if (arg.compare(0, arg_prefix.size(), arg_prefix) == 0) { - std::replace(arg.begin(), arg.end(), '_', '-'); - } - if (arg_to_options.find(arg) == arg_to_options.end()) { - throw std::invalid_argument(string_format("error: invalid argument: %s", arg.c_str())); - } - if (!seen_args.insert(arg).second) { - LOG_WRN("DEPRECATED: argument '%s' specified multiple times, use comma-separated values instead (only last value will be used)\n", arg.c_str()); - } - auto & tmp = arg_to_options[arg]; - auto opt = *tmp.first; - bool is_positive = tmp.second; - if (opt.has_value_from_env()) { - fprintf(stderr, "warn: %s environment variable is set, but will be overwritten by command line argument %s\n", opt.env, arg.c_str()); - } - try { - if (opt.handler_void) { - opt.handler_void(params); - continue; + std::string arg = argv[i]; + if (arg.compare(0, arg_prefix.size(), arg_prefix) == 0) { + std::replace(arg.begin(), arg.end(), '_', '-'); } - if (opt.handler_bool) { - opt.handler_bool(params, is_positive); - continue; + if (arg_to_options.find(arg) == arg_to_options.end()) { + throw std::invalid_argument(string_format("error: invalid argument: %s", arg.c_str())); } - - // arg with single value - check_arg(i); - std::string val = argv[++i]; - if (opt.handler_int) { - opt.handler_int(params, std::stoi(val)); - continue; + if (!seen_args.insert(arg).second) { + LOG_WRN("DEPRECATED: argument '%s' specified multiple times, use comma-separated values instead (only last value will be used)\n", arg.c_str()); } - if (opt.handler_string) { - opt.handler_string(params, val); - continue; + auto & tmp = arg_to_options[arg]; + auto opt = *tmp.first; + bool is_positive = tmp.second; + if (opt.has_value_from_env()) { + fprintf(stderr, "warn: %s environment variable is set, but will be overwritten by command line argument %s\n", opt.env, arg.c_str()); } + try { + if (opt.handler_void) { + opt.handler_void(params); + continue; + } + if (opt.handler_bool) { + opt.handler_bool(params, is_positive); + continue; + } - // arg with 2 values - check_arg(i); - std::string val2 = argv[++i]; - if (opt.handler_str_str) { - opt.handler_str_str(params, val, val2); - continue; - } - } catch (std::exception & e) { - throw std::invalid_argument(string_format( - "error while handling argument \"%s\": %s\n\n" - "usage:\n%s\n\nto show complete usage, run with -h", - arg.c_str(), e.what(), opt.to_string().c_str())); + // arg with single value + check_arg(i); + std::string val = argv[++i]; + if (opt.handler_int) { + opt.handler_int(params, std::stoi(val)); + continue; + } + if (opt.handler_string) { + opt.handler_string(params, val); + continue; + } + + // arg with 2 values + check_arg(i); + std::string val2 = argv[++i]; + if (opt.handler_str_str) { + opt.handler_str_str(params, val, val2); + continue; + } + } catch (std::exception & e) { + throw std::invalid_argument(string_format( + "error while handling argument \"%s\": %s\n\n" + "usage:\n%s\n\nto show complete usage, run with -h", + arg.c_str(), e.what(), opt.to_string().c_str())); + } + } + }; + + // parse the first time to get -hf option (used for remote preset) + parse_cli_args(); + + // maybe handle remote preset + if (!params.model.hf_repo.empty()) { + bool has_preset = common_params_handle_remote_preset(params, ctx_arg.ex); + if (has_preset) { + // re-parse CLI args to override preset values + parse_cli_args(); } } diff --git a/common/download.cpp b/common/download.cpp index ef874725607..d0aa3860812 100644 --- a/common/download.cpp +++ b/common/download.cpp @@ -769,10 +769,10 @@ std::pair> common_remote_get_content(const std::string #if defined(LLAMA_USE_CURL) || defined(LLAMA_USE_HTTPLIB) -static bool common_download_file_single(const std::string & url, - const std::string & path, - const std::string & bearer_token, - bool offline) { +bool common_download_file_single(const std::string & url, + const std::string & path, + const std::string & bearer_token, + bool offline) { if (!offline) { return common_download_file_single_online(url, path, bearer_token); } @@ -1096,6 +1096,13 @@ std::string common_docker_resolve_model(const std::string &) { throw std::runtime_error("download functionality is not enabled in this build"); } +bool common_download_file_single(const std::string &, + const std::string &, + const std::string &, + bool) { + throw std::runtime_error("download functionality is not enabled in this build"); +} + #endif // LLAMA_USE_CURL || LLAMA_USE_HTTPLIB std::vector common_list_cached_models() { diff --git a/common/download.h b/common/download.h index d1321e6e90e..5f42527af8d 100644 --- a/common/download.h +++ b/common/download.h @@ -52,6 +52,12 @@ bool common_download_model( // returns list of cached models std::vector common_list_cached_models(); +// download single file from url to local path +bool common_download_file_single(const std::string & url, + const std::string & path, + const std::string & bearer_token, + bool offline); + // resolve and download model from Docker registry // return local path to downloaded model file std::string common_docker_resolve_model(const std::string & docker); diff --git a/common/preset.cpp b/common/preset.cpp index e2fc18c5dad..949fe001109 100644 --- a/common/preset.cpp +++ b/common/preset.cpp @@ -16,6 +16,46 @@ static std::string rm_leading_dashes(const std::string & str) { return str.substr(pos); } +// only allow a subset of args for remote presets for security reasons +// do not add more args unless absolutely necessary +// args that output to files are strictly prohibited +static std::set get_remote_preset_whitelist(std::map & key_to_opt) { + static const std::set allowed_options = { + "model-url", + "hf-repo", + "hf-repo-draft", + "hf-repo-v", // vocoder + "hf-file-v", // vocoder + "mmproj-url", + "pooling", + "jinja", + "batch-size", + "ubatch-size", + "cache-reuse", + // note: sampling params are automatically allowed by default + // negated args will be added automatically + }; + + std::set allowed_keys; + + for (const auto & it : key_to_opt) { + const std::string & key = it.first; + const common_arg & opt = it.second; + if (allowed_options.find(key) != allowed_options.end() || opt.is_sparam) { + allowed_keys.insert(key); + // also add variant keys (args without leading dashes and env vars) + for (const auto & arg : opt.get_args()) { + allowed_keys.insert(rm_leading_dashes(arg)); + } + for (const auto & env : opt.get_env()) { + allowed_keys.insert(env); + } + } + } + + return allowed_keys; +} + std::vector common_preset::to_args(const std::string & bin_path) const { std::vector args; @@ -121,6 +161,29 @@ void common_preset::merge(const common_preset & other) { } } +void common_preset::apply_to_params(common_params & params) const { + for (const auto & [opt, val] : options) { + // apply each option to params + if (opt.handler_string) { + opt.handler_string(params, val); + } else if (opt.handler_int) { + opt.handler_int(params, std::stoi(val)); + } else if (opt.handler_bool) { + opt.handler_bool(params, common_arg_utils::is_truthy(val)); + } else if (opt.handler_str_str) { + // not supported yet + throw std::runtime_error(string_format( + "%s: option with two values is not supported yet", + __func__ + )); + } else if (opt.handler_void) { + opt.handler_void(params); + } else { + GGML_ABORT("unknown handler type"); + } + } +} + static std::map> parse_ini_from_file(const std::string & path) { std::map> parsed; @@ -230,10 +293,16 @@ static std::string parse_bool_arg(const common_arg & arg, const std::string & ke return value; } -common_preset_context::common_preset_context(llama_example ex) +common_preset_context::common_preset_context(llama_example ex, bool only_remote_allowed) : ctx_params(common_params_parser_init(default_params, ex)) { common_params_add_preset_options(ctx_params.options); key_to_opt = get_map_key_opt(ctx_params); + + // setup allowed keys if only_remote_allowed is true + if (only_remote_allowed) { + filter_allowed_keys = true; + allowed_keys = get_remote_preset_whitelist(key_to_opt); + } } common_presets common_preset_context::load_from_ini(const std::string & path, common_preset & global) const { @@ -250,6 +319,12 @@ common_presets common_preset_context::load_from_ini(const std::string & path, co LOG_DBG("loading preset: %s\n", preset.name.c_str()); for (const auto & [key, value] : section.second) { LOG_DBG("option: %s = %s\n", key.c_str(), value.c_str()); + if (filter_allowed_keys && allowed_keys.find(key) == allowed_keys.end()) { + throw std::runtime_error(string_format( + "option '%s' is not allowed in remote presets", + key.c_str() + )); + } if (key_to_opt.find(key) != key_to_opt.end()) { const auto & opt = key_to_opt.at(key); if (is_bool_arg(opt)) { diff --git a/common/preset.h b/common/preset.h index 3a84d1be29c..11ba6ef8124 100644 --- a/common/preset.h +++ b/common/preset.h @@ -6,6 +6,7 @@ #include #include #include +#include // // INI preset parser and writer @@ -40,6 +41,9 @@ struct common_preset { // merge another preset into this one, overwriting existing options void merge(const common_preset & other); + + // apply preset options to common_params + void apply_to_params(common_params & params) const; }; // interface for multiple presets in one file @@ -50,7 +54,12 @@ struct common_preset_context { common_params default_params; // unused for now common_params_context ctx_params; std::map key_to_opt; - common_preset_context(llama_example ex); + + bool filter_allowed_keys = false; + std::set allowed_keys; + + // if only_remote_allowed is true, only accept whitelisted keys + common_preset_context(llama_example ex, bool only_remote_allowed = false); // load presets from INI file common_presets load_from_ini(const std::string & path, common_preset & global) const; diff --git a/docs/preset.md b/docs/preset.md new file mode 100644 index 00000000000..daea562cc55 --- /dev/null +++ b/docs/preset.md @@ -0,0 +1,50 @@ +# llama.cpp INI preset + +## Introduction + +INI preset is a feature that was added in [PR#17859](https://github.com/ggml-org/llama.cpp/pull/17859). The goal is to allow writing reusable and sharable parameter presets in llama.cpp + +### Using preset on server + +When using multiple models on server (router mode), INI preset file can be used to configure model-specific parameters. Please refer to [server documentations](../tools/server/README.md) for more. + +### Using a remote preset + +> [!NOTE] +> +> This feature is currently only supported via the `-hf` option + +For GGUF models stored on Hugging Face, you can create a file named `preset.ini` in the root directory of the repository that contains specific configurations for the current model. + +Example: + +```ini +hf-repo-draft = username/my-draft-model-GGUF +temp = 0.5 +top-k = 20 +top-p = 0.95 +``` + +For security reason, only certain options are allowed. Please refer to [preset.cpp](../common/preset.cpp) for the list of allowed options. + +Example usage: + +Provided your repo is `username/my-model-with-preset` having a `preset.ini` with the content above. + +```sh +llama-cli -hf username/my-model-with-preset + +# equivalent to +llama-cli -hf username/my-model-with-preset \ + --hf-repo-draft username/my-draft-model-GGUF \ + --temp 0.5 \ + --top-k 20 \ + --top-p 0.95 +``` + +You can also optionally override preset args by specifying them in the arguments: + +```sh +# forcing temp = 0.1 +llama-cli -hf username/my-model-with-preset --temp 0.1 +``` From f9a97375cb539c7f195b473871a3a1eccf9da3d8 Mon Sep 17 00:00:00 2001 From: Xuan Son Nguyen Date: Thu, 1 Jan 2026 12:50:45 +0100 Subject: [PATCH 2/8] proof reading --- docs/preset.md | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/docs/preset.md b/docs/preset.md index daea562cc55..c11c5dc22fe 100644 --- a/docs/preset.md +++ b/docs/preset.md @@ -1,20 +1,20 @@ -# llama.cpp INI preset +# llama.cpp INI Presets ## Introduction -INI preset is a feature that was added in [PR#17859](https://github.com/ggml-org/llama.cpp/pull/17859). The goal is to allow writing reusable and sharable parameter presets in llama.cpp +The INI preset feature, introduced in [PR#17859](https://github.com/ggml-org/llama.cpp/pull/17859), allows users to create reusable and shareable parameter configurations for llama.cpp. -### Using preset on server +### Using Presets with the Server -When using multiple models on server (router mode), INI preset file can be used to configure model-specific parameters. Please refer to [server documentations](../tools/server/README.md) for more. +When running multiple models on the server (router mode), INI preset files can be used to configure model-specific parameters. Please refer to the [server documentation](../tools/server/README.md) for more details. -### Using a remote preset +### Using a Remote Preset > [!NOTE] > -> This feature is currently only supported via the `-hf` option +> This feature is currently only supported via the `-hf` option. -For GGUF models stored on Hugging Face, you can create a file named `preset.ini` in the root directory of the repository that contains specific configurations for the current model. +For GGUF models hosted on Hugging Face, you can include a `preset.ini` file in the root directory of the repository to define specific configurations for that model. Example: @@ -25,16 +25,16 @@ top-k = 20 top-p = 0.95 ``` -For security reason, only certain options are allowed. Please refer to [preset.cpp](../common/preset.cpp) for the list of allowed options. +For security reasons, only certain options are allowed. Please refer to [preset.cpp](../common/preset.cpp) for the complete list of permitted options. Example usage: -Provided your repo is `username/my-model-with-preset` having a `preset.ini` with the content above. +Assuming your repository `username/my-model-with-preset` contains a `preset.ini` with the configuration above: ```sh llama-cli -hf username/my-model-with-preset -# equivalent to +# This is equivalent to: llama-cli -hf username/my-model-with-preset \ --hf-repo-draft username/my-draft-model-GGUF \ --temp 0.5 \ @@ -42,9 +42,9 @@ llama-cli -hf username/my-model-with-preset \ --top-p 0.95 ``` -You can also optionally override preset args by specifying them in the arguments: +You can also override preset arguments by specifying them on the command line: ```sh -# forcing temp = 0.1 +# Force temp = 0.1, overriding the preset value llama-cli -hf username/my-model-with-preset --temp 0.1 ``` From 9935820cb881f83e1eda2f37cf44ded6930a2cb2 Mon Sep 17 00:00:00 2001 From: Xuan Son Nguyen Date: Tue, 6 Jan 2026 16:48:40 +0100 Subject: [PATCH 3/8] allow one HF repo to point to multiple HF repos --- common/arg.cpp | 13 +++++++++++++ common/download.cpp | 2 +- common/preset.cpp | 2 +- 3 files changed, 15 insertions(+), 2 deletions(-) diff --git a/common/arg.cpp b/common/arg.cpp index 4d4f91e96f5..d8ece34a3b7 100644 --- a/common/arg.cpp +++ b/common/arg.cpp @@ -296,6 +296,7 @@ static bool common_params_handle_remote_preset(common_params & params, llama_exa auto remote_presets = ctx.load_from_ini(preset_path, global); if (remote_presets.find(COMMON_PRESET_DEFAULT_NAME) != remote_presets.end()) { common_preset & preset = remote_presets.at(COMMON_PRESET_DEFAULT_NAME); + LOG_INF("\n%s", preset.to_ini().c_str()); // to_ini already added trailing newline preset.apply_to_params(params); } else { throw std::runtime_error("Remote preset.ini does not contain [" + std::string(COMMON_PRESET_DEFAULT_NAME) + "] section"); @@ -525,11 +526,23 @@ static bool common_params_parse_ex(int argc, char ** argv, common_params_context // maybe handle remote preset if (!params.model.hf_repo.empty()) { + std::string cli_hf_repo = params.model.hf_repo; bool has_preset = common_params_handle_remote_preset(params, ctx_arg.ex); + + // special case: if hf_repo explicitly set by preset, we need to preserve it (ignore CLI value) + // this is useful when we have one HF repo pointing to other HF repos (one model - multiple GGUFs) + std::string preset_hf_repo = params.model.hf_repo; + bool preset_has_hf_repo = preset_hf_repo != cli_hf_repo; + if (has_preset) { // re-parse CLI args to override preset values parse_cli_args(); } + + // preserve hf_repo from preset if needed + if (preset_has_hf_repo) { + params.model.hf_repo = preset_hf_repo; + } } postprocess_cpu_params(params.cpuparams, nullptr); diff --git a/common/download.cpp b/common/download.cpp index d0aa3860812..3bff968623e 100644 --- a/common/download.cpp +++ b/common/download.cpp @@ -952,7 +952,7 @@ common_hf_file_res common_get_hf_file(const std::string & hf_repo_with_tag, cons } else if (res_code == 401) { throw std::runtime_error("error: model is private or does not exist; if you are accessing a gated model, please provide a valid HF token"); } else { - throw std::runtime_error(string_format("error from HF API, response code: %ld, data: %s", res_code, res_str.c_str())); + throw std::runtime_error(string_format("error from HF API (%s), response code: %ld, data: %s", url.c_str(), res_code, res_str.c_str())); } // check response diff --git a/common/preset.cpp b/common/preset.cpp index 949fe001109..aec14e07692 100644 --- a/common/preset.cpp +++ b/common/preset.cpp @@ -19,7 +19,7 @@ static std::string rm_leading_dashes(const std::string & str) { // only allow a subset of args for remote presets for security reasons // do not add more args unless absolutely necessary // args that output to files are strictly prohibited -static std::set get_remote_preset_whitelist(std::map & key_to_opt) { +static std::set get_remote_preset_whitelist(const std::map & key_to_opt) { static const std::set allowed_options = { "model-url", "hf-repo", From 9e173f9061e181ea5d07e29731c6863269db4629 Mon Sep 17 00:00:00 2001 From: Xuan Son Nguyen Date: Tue, 6 Jan 2026 17:00:59 +0100 Subject: [PATCH 4/8] docs: mention about multiple GGUF use case --- docs/preset.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/docs/preset.md b/docs/preset.md index c11c5dc22fe..be50bb99266 100644 --- a/docs/preset.md +++ b/docs/preset.md @@ -48,3 +48,13 @@ You can also override preset arguments by specifying them on the command line: # Force temp = 0.1, overriding the preset value llama-cli -hf username/my-model-with-preset --temp 0.1 ``` + +If you want to define multiple preset configurations for one or more GGUF models, you can create a blank HF repo for each preset. Each HF repo should contain a `preset.ini` file that references the actual model(s): + +```ini +hf-repo = user/my-model-main +hf-repo-draft = user/my-model-draft +temp = 0.8 +ctx-size = 1024 +; (and other configurations) +``` From 74a33726868de463966233e9fba10a6b44cb1d77 Mon Sep 17 00:00:00 2001 From: Xuan Son Nguyen Date: Thu, 8 Jan 2026 14:47:50 +0100 Subject: [PATCH 5/8] correct clean_file_name --- common/arg.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/common/arg.cpp b/common/arg.cpp index 64da5d293e9..1dcf7e86b29 100644 --- a/common/arg.cpp +++ b/common/arg.cpp @@ -271,8 +271,8 @@ static void parse_tensor_buffer_overrides(const std::string & value, std::vector static std::string clean_file_name(const std::string & fname) { std::string clean_fname = fname; - string_replace_all(clean_fname, "\\", "/"); - string_replace_all(clean_fname, "/", ""); + string_replace_all(clean_fname, "\\", "_"); + string_replace_all(clean_fname, "/", "_"); return clean_fname; } From 7fccd041f33cef619920f7ee4dc238d7cb410386 Mon Sep 17 00:00:00 2001 From: Xuan Son Nguyen Date: Thu, 8 Jan 2026 15:29:06 +0100 Subject: [PATCH 6/8] download: also return HTTP status code --- common/arg.cpp | 5 ++- common/download.cpp | 82 +++++++++++++++++++++++++++------------------ common/download.h | 10 +++--- 3 files changed, 60 insertions(+), 37 deletions(-) diff --git a/common/arg.cpp b/common/arg.cpp index a80f336a67d..72750a3cba0 100644 --- a/common/arg.cpp +++ b/common/arg.cpp @@ -286,7 +286,8 @@ static bool common_params_handle_remote_preset(common_params & params, llama_exa // prepare local path for caching auto preset_fname = clean_file_name(params.model.hf_repo + "_preset.ini"); auto preset_path = fs_get_cache_file(preset_fname); - bool has_preset = common_download_file_single(preset_url, preset_path, params.hf_token, offline); + const int status = common_download_file_single(preset_url, preset_path, params.hf_token, offline); + const bool has_preset = status >= 200 && status < 400; // remote preset is optional, so we don't error out if not found if (has_preset) { @@ -301,6 +302,8 @@ static bool common_params_handle_remote_preset(common_params & params, llama_exa } else { throw std::runtime_error("Remote preset.ini does not contain [" + std::string(COMMON_PRESET_DEFAULT_NAME) + "] section"); } + } else { + LOG_INF("%s", "no remote preset found, skipping\n"); } return has_preset; diff --git a/common/download.cpp b/common/download.cpp index eca43e23afe..4b3e7930602 100644 --- a/common/download.cpp +++ b/common/download.cpp @@ -157,6 +157,10 @@ static std::string read_etag(const std::string & path) { return none; } +static bool is_http_status_ok(int status) { + return status >= 200 && status < 400; +} + #ifdef LLAMA_USE_CURL // @@ -306,12 +310,14 @@ static bool common_download_head(CURL * curl, } // download one single file from remote URL to local path -static bool common_download_file_single_online(const std::string & url, +// returns status code or -1 on error +static int common_download_file_single_online(const std::string & url, const std::string & path, const std::string & bearer_token, const common_header_list & custom_headers) { static const int max_attempts = 3; static const int retry_delay_seconds = 2; + for (int i = 0; i < max_attempts; ++i) { std::string etag; @@ -371,7 +377,7 @@ static bool common_download_file_single_online(const std::string & url, LOG_WRN("%s: deleting previous downloaded file: %s\n", __func__, path.c_str()); if (remove(path.c_str()) != 0) { LOG_ERR("%s: unable to delete file: %s\n", __func__, path.c_str()); - return false; + return -1; } } @@ -380,14 +386,14 @@ static bool common_download_file_single_online(const std::string & url, if (std::filesystem::exists(path_temporary)) { if (remove(path_temporary.c_str()) != 0) { LOG_ERR("%s: unable to delete file: %s\n", __func__, path_temporary.c_str()); - return false; + return -1; } } if (std::filesystem::exists(path)) { if (remove(path.c_str()) != 0) { LOG_ERR("%s: unable to delete file: %s\n", __func__, path.c_str()); - return false; + return -1; } } } @@ -414,23 +420,27 @@ static bool common_download_file_single_online(const std::string & url, long http_code = 0; curl_easy_getinfo(curl.get(), CURLINFO_RESPONSE_CODE, &http_code); - if (http_code < 200 || http_code >= 400) { + + int status = static_cast(http_code); + if (!is_http_status_ok(http_code)) { LOG_ERR("%s: invalid http status code received: %ld\n", __func__, http_code); - return false; + return status; // TODO: maybe only return on certain codes } if (rename(path_temporary.c_str(), path.c_str()) != 0) { LOG_ERR("%s: unable to rename file: %s to %s\n", __func__, path_temporary.c_str(), path.c_str()); - return false; + return -1; } + + return static_cast(http_code); } else { LOG_INF("%s: using cached file: %s\n", __func__, path.c_str()); - } - break; + return 304; // Not Modified - fake cached response + } } - return true; + return -1; // max attempts reached } std::pair> common_remote_get_content(const std::string & url, const common_remote_params & params) { @@ -625,7 +635,8 @@ static bool common_pull_file(httplib::Client & cli, } // download one single file from remote URL to local path -static bool common_download_file_single_online(const std::string & url, +// returns status code or -1 on error +static int common_download_file_single_online(const std::string & url, const std::string & path, const std::string & bearer_token, const common_header_list & custom_headers) { @@ -659,8 +670,10 @@ static bool common_download_file_single_online(const std::string & url, LOG_WRN("%s: HEAD invalid http status code received: %d\n", __func__, head ? head->status : -1); if (file_exists) { LOG_INF("%s: Using cached file (HEAD failed): %s\n", __func__, path.c_str()); - return true; + return head->status; } + return head->status; // cannot use cached file, return raw status code + // TODO: maybe retry only on certain codes } std::string etag; @@ -692,12 +705,12 @@ static bool common_download_file_single_online(const std::string & url, if (file_exists) { if (!should_download_from_scratch) { LOG_INF("%s: using cached file: %s\n", __func__, path.c_str()); - return true; + return 304; // 304 Not Modified - fake cached response } LOG_WRN("%s: deleting previous downloaded file: %s\n", __func__, path.c_str()); if (remove(path.c_str()) != 0) { LOG_ERR("%s: unable to delete file: %s\n", __func__, path.c_str()); - return false; + return -1; } } @@ -709,7 +722,7 @@ static bool common_download_file_single_online(const std::string & url, existing_size = std::filesystem::file_size(path_temporary); } else if (remove(path_temporary.c_str()) != 0) { LOG_ERR("%s: unable to delete file: %s\n", __func__, path_temporary.c_str()); - return false; + return -1; } } @@ -730,15 +743,16 @@ static bool common_download_file_single_online(const std::string & url, if (std::rename(path_temporary.c_str(), path.c_str()) != 0) { LOG_ERR("%s: unable to rename file: %s to %s\n", __func__, path_temporary.c_str(), path.c_str()); - return false; + return -1; } if (!etag.empty()) { write_etag(path, etag); } - break; + + return head->status; // TODO: use actual GET status? } - return true; + return -1; // max attempts reached } std::pair> common_remote_get_content(const std::string & url, @@ -777,22 +791,22 @@ std::pair> common_remote_get_content(const std::string #if defined(LLAMA_USE_CURL) || defined(LLAMA_USE_HTTPLIB) -static bool common_download_file_single(const std::string & url, - const std::string & path, - const std::string & bearer_token, - bool offline, - const common_header_list & headers) { +int common_download_file_single(const std::string & url, + const std::string & path, + const std::string & bearer_token, + bool offline, + const common_header_list & headers) { if (!offline) { return common_download_file_single_online(url, path, bearer_token, headers); } if (!std::filesystem::exists(path)) { LOG_ERR("%s: required file is not available in cache (offline mode): %s\n", __func__, path.c_str()); - return false; + return -1; } LOG_INF("%s: using cached file (offline mode): %s\n", __func__, path.c_str()); - return true; + return -1; } // download multiple files from remote URLs to local paths @@ -810,7 +824,8 @@ static bool common_download_file_multiple(const std::vector & it) -> bool { - return common_download_file_single(it.first, it.second, bearer_token, offline, headers); + const int http_status = common_download_file_single(it.first, it.second, bearer_token, offline, headers); + return is_http_status_ok(http_status); }, item ) @@ -837,7 +852,8 @@ bool common_download_model(const common_params_model & model, return false; } - if (!common_download_file_single(model.url, model.path, bearer_token, offline, headers)) { + const int http_status = common_download_file_single(model.url, model.path, bearer_token, offline, headers); + if (!is_http_status_ok(http_status)) { return false; } @@ -1094,7 +1110,8 @@ std::string common_docker_resolve_model(const std::string & docker) { std::string local_path = fs_get_cache_file(model_filename); const std::string blob_url = url_prefix + "/blobs/" + gguf_digest; - if (!common_download_file_single(blob_url, local_path, token, false, {})) { + const int http_status = common_download_file_single(blob_url, local_path, token, false, {}); + if (!is_http_status_ok(http_status)) { throw std::runtime_error("Failed to download Docker Model"); } @@ -1120,10 +1137,11 @@ std::string common_docker_resolve_model(const std::string &) { throw std::runtime_error("download functionality is not enabled in this build"); } -bool common_download_file_single(const std::string &, - const std::string &, - const std::string &, - const common_header_list &) { +int common_download_file_single(const std::string &, + const std::string &, + const std::string &, + bool, + const common_header_list &) { throw std::runtime_error("download functionality is not enabled in this build"); } diff --git a/common/download.h b/common/download.h index a4803eee0b6..c79be2f90eb 100644 --- a/common/download.h +++ b/common/download.h @@ -66,10 +66,12 @@ bool common_download_model( std::vector common_list_cached_models(); // download single file from url to local path -bool common_download_file_single(const std::string & url, - const std::string & path, - const std::string & bearer_token, - const common_header_list & headers = {}); +// returns status code or -1 on error +int common_download_file_single(const std::string & url, + const std::string & path, + const std::string & bearer_token, + bool offline, + const common_header_list & headers = {}); // resolve and download model from Docker registry // return local path to downloaded model file From 36b6b9856dd5a26f90faf55fcdbab64b9d80d2a2 Mon Sep 17 00:00:00 2001 From: Xuan Son Nguyen Date: Thu, 8 Jan 2026 15:56:48 +0100 Subject: [PATCH 7/8] fix case with cache file used --- common/download.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/download.cpp b/common/download.cpp index 4b3e7930602..b8b9b45b852 100644 --- a/common/download.cpp +++ b/common/download.cpp @@ -670,7 +670,7 @@ static int common_download_file_single_online(const std::string & url, LOG_WRN("%s: HEAD invalid http status code received: %d\n", __func__, head ? head->status : -1); if (file_exists) { LOG_INF("%s: Using cached file (HEAD failed): %s\n", __func__, path.c_str()); - return head->status; + return 304; // 304 Not Modified - fake cached response } return head->status; // cannot use cached file, return raw status code // TODO: maybe retry only on certain codes From 38cd7fb4f665c6bae7c355260d2c87371584297c Mon Sep 17 00:00:00 2001 From: Xuan Son Nguyen Date: Thu, 8 Jan 2026 17:51:06 +0100 Subject: [PATCH 8/8] fix --offline option --- common/download.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/download.cpp b/common/download.cpp index b8b9b45b852..a1e0e518e9a 100644 --- a/common/download.cpp +++ b/common/download.cpp @@ -806,7 +806,7 @@ int common_download_file_single(const std::string & url, } LOG_INF("%s: using cached file (offline mode): %s\n", __func__, path.c_str()); - return -1; + return 304; // Not Modified - fake cached response } // download multiple files from remote URLs to local paths