From 26d0e9b2eb6e30c137b442ffe65c9642b31ca2e0 Mon Sep 17 00:00:00 2001
From: Xuan Son Nguyen <son@huggingface.co>
Date: Thu, 19 Mar 2026 00:00:52 +0100
Subject: [PATCH 1/8] wip: server_tools

---
 common/arg.cpp                |   8 +
 common/common.h               |   3 +
 tools/server/CMakeLists.txt   |   2 +
 tools/server/README-dev.md    |  16 +
 tools/server/README.md        |   8 +
 tools/server/server-tools.cpp | 596 ++++++++++++++++++++++++++++++++++
 tools/server/server-tools.h   |   7 +
 tools/server/server.cpp       |  10 +
 8 files changed, 650 insertions(+)
 create mode 100644 tools/server/server-tools.cpp
 create mode 100644 tools/server/server-tools.h

diff --git a/common/arg.cpp b/common/arg.cpp
index 666339a0945..f54cab4449d 100644
--- a/common/arg.cpp
+++ b/common/arg.cpp
@@ -2848,6 +2848,14 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
             params.webui_mcp_proxy = value;
         }
     ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_WEBUI_MCP_PROXY"));
+    add_opt(common_arg(
+        {"--tools"},
+        {"--no-tools"},
+        string_format("experimental: whether to enable tools for AI agents - do not enable in untrusted environments (default: %s)", params.server_tools ? "enabled" : "disabled"),
+        [](common_params & params, bool value) {
+            params.server_tools = value;
+        }
+    ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_TOOLS"));
     add_opt(common_arg(
         {"--webui"},
         {"--no-webui"},
diff --git a/common/common.h b/common/common.h
index 073ef566d2d..25f798ff0ff 100644
--- a/common/common.h
+++ b/common/common.h
@@ -569,6 +569,9 @@ struct common_params {
     bool endpoint_props   = false; // only control POST requests, not GET
     bool endpoint_metrics = false;
 
+    // enable built-in tools
+    bool server_tools = false;
+
     // router server configs
     std::string models_dir    = ""; // directory containing models for the router server
     std::string models_preset = ""; // directory containing model presets for the router server
diff --git a/tools/server/CMakeLists.txt b/tools/server/CMakeLists.txt
index 5621a51b226..fc4cb5dcc8a 100644
--- a/tools/server/CMakeLists.txt
+++ b/tools/server/CMakeLists.txt
@@ -13,6 +13,8 @@ add_library(${TARGET} STATIC
     server-common.h
     server-context.cpp
     server-context.h
+    server-tools.cpp
+    server-tools.h
 )
 
 if (BUILD_SHARED_LIBS)
diff --git a/tools/server/README-dev.md b/tools/server/README-dev.md
index 3fea3042f72..8318c5852d8 100644
--- a/tools/server/README-dev.md
+++ b/tools/server/README-dev.md
@@ -95,6 +95,22 @@ The framework automatically starts a `llama-server` instance, sends requests, an
 
 For detailed instructions, see the [test documentation](./tests/README.md).
 
+### API for tools
+
+This endpoint is intended to be used internally by the Web UI and subject to change or to be removed in the future.
+
+**GET /tools**
+
+Get a list of tools, the tool definition is in OAI-compat format.
+
+**POST /tools**
+
+Invoke a tool call, request body is a JSON object with:
+- `tool` (string): the name of the tool
+- `params` (object): a mapping from argument name (string) to argument value
+
+Returns JSON object, the schema depends on the tool itself.
+
 ### Notable Related PRs
 
 - Initial server implementation: https://github.com/ggml-org/llama.cpp/pull/1443
diff --git a/tools/server/README.md b/tools/server/README.md
index da16ddc756e..c0fd8b06610 100644
--- a/tools/server/README.md
+++ b/tools/server/README.md
@@ -1433,6 +1433,14 @@ curl http://localhost:8080/v1/messages/count_tokens \
 {"input_tokens": 10}
 ```
 
+## Server built-in tools
+
+The server exposes a REST API under `/tools` that allows the Web UI to call built-in tools. This endpoint is intended to be used internally by the Web UI and subject to change or to be removed in the future.
+
+**Please do NOT use this endpoint in a downstream application**
+
+For further documentation about this endpoint, please refer to [server internal documentation](./README-dev.md)
+
 ## Using multiple models
 
 `llama-server` can be launched in a **router mode** that exposes an API for dynamically loading and unloading models. The main process (the "router") automatically forwards each request to the appropriate model instance.
diff --git a/tools/server/server-tools.cpp b/tools/server/server-tools.cpp
new file mode 100644
index 00000000000..a040c42051f
--- /dev/null
+++ b/tools/server/server-tools.cpp
@@ -0,0 +1,596 @@
+#include "server-tools.h"
+
+#include <sheredom/subprocess.h>
+
+#include <filesystem>
+#include <fstream>
+#include <regex>
+#include <thread>
+#include <chrono>
+#include <atomic>
+#include <cstring>
+#include <climits>
+
+namespace fs = std::filesystem;
+
+//
+// internal helpers
+//
+
+static std::vector<char *> to_cstr_vec(const std::vector<std::string> & v) {
+    std::vector<char *> r;
+    r.reserve(v.size() + 1);
+    for (const auto & s : v) {
+        r.push_back(const_cast<char *>(s.c_str()));
+    }
+    r.push_back(nullptr);
+    return r;
+}
+
+struct run_proc_result {
+    std::string output;
+    int         exit_code = -1;
+    bool        timed_out = false;
+};
+
+static run_proc_result run_process(
+        const std::vector<std::string> & args,
+        size_t max_output,
+        int timeout_secs) {
+    run_proc_result res;
+
+    subprocess_s proc;
+    auto argv = to_cstr_vec(args);
+
+    int options = subprocess_option_no_window
+                | subprocess_option_combined_stdout_stderr
+                | subprocess_option_inherit_environment
+                | subprocess_option_search_user_path;
+
+    if (subprocess_create(argv.data(), options, &proc) != 0) {
+        res.output = "failed to spawn process";
+        return res;
+    }
+
+    std::atomic<bool> done{false};
+    std::atomic<bool> timed_out{false};
+
+    std::thread timeout_thread([&]() {
+        auto deadline = std::chrono::steady_clock::now() + std::chrono::seconds(timeout_secs);
+        while (!done.load()) {
+            if (std::chrono::steady_clock::now() >= deadline) {
+                timed_out.store(true);
+                subprocess_terminate(&proc);
+                return;
+            }
+            std::this_thread::sleep_for(std::chrono::milliseconds(100));
+        }
+    });
+
+    FILE * f = subprocess_stdout(&proc);
+    std::string output;
+    bool truncated = false;
+    if (f) {
+        char buf[4096];
+        while (fgets(buf, sizeof(buf), f) != nullptr) {
+            if (!truncated) {
+                size_t len = strlen(buf);
+                if (output.size() + len <= max_output) {
+                    output.append(buf, len);
+                } else {
+                    output.append(buf, max_output - output.size());
+                    truncated = true;
+                }
+            }
+        }
+    }
+
+    done.store(true);
+    if (timeout_thread.joinable()) {
+        timeout_thread.join();
+    }
+
+    subprocess_join(&proc, &res.exit_code);
+    subprocess_destroy(&proc);
+
+    res.output    = output;
+    res.timed_out = timed_out.load();
+    if (truncated) {
+        res.output += "\n[output truncated]";
+    }
+    return res;
+}
+
+// simple glob: * matches non-/ chars, ** matches anything including /
+static bool glob_match(const char * pattern, const char * str) {
+    if (*pattern == '\0') {
+        return *str == '\0';
+    }
+    if (pattern[0] == '*' && pattern[1] == '*') {
+        const char * p = pattern + 2;
+        if (*p == '/') p++;
+        if (glob_match(p, str)) return true;
+        if (*str != '\0') return glob_match(pattern, str + 1);
+        return false;
+    }
+    if (*pattern == '*') {
+        const char * p = pattern + 1;
+        for (; *str != '\0' && *str != '/'; str++) {
+            if (glob_match(p, str)) return true;
+        }
+        return glob_match(p, str);
+    }
+    if (*pattern == '?' && *str != '\0' && *str != '/') {
+        return glob_match(pattern + 1, str + 1);
+    }
+    if (*pattern == *str) {
+        return glob_match(pattern + 1, str + 1);
+    }
+    return false;
+}
+
+static bool glob_match(const std::string & pattern, const std::string & str) {
+    return glob_match(pattern.c_str(), str.c_str());
+}
+
+//
+// base struct
+//
+
+struct server_tool {
+    std::string name;
+    json        definition;
+    bool        permission_write = false;
+    virtual ~server_tool() = default;
+    virtual json to_json() = 0;
+    virtual json invoke(json params) = 0;
+};
+
+//
+// read_file: read a file with optional line range and line-number prefix
+//
+
+static constexpr size_t SERVER_TOOL_READ_FILE_MAX_SIZE = 16 * 1024; // 16 KB
+
+struct server_tool_read_file : server_tool {
+    server_tool_read_file() { name = "read_file"; permission_write = false; }
+
+    json to_json() override {
+        return {
+            {"type", "function"},
+            {"function", {
+                {"name", name},
+                {"description", "Read the contents of a file. Optionally specify a 1-based line range. "
+                                "If append_loc is true, each line is prefixed with its line number (e.g. \"1\u2192 ...\")."},
+                {"parameters", {
+                    {"type", "object"},
+                    {"properties", {
+                        {"path",       {{"type", "string"},  {"description", "Path to the file"}}},
+                        {"start_line", {{"type", "integer"}, {"description", "First line to read, 1-based (default: 1)"}}},
+                        {"end_line",   {{"type", "integer"}, {"description", "Last line to read, 1-based inclusive (default: end of file)"}}},
+                        {"append_loc", {{"type", "boolean"}, {"description", "Prefix each line with its line number"}}},
+                    }},
+                    {"required", json::array({"path"})},
+                }},
+            }},
+        };
+    }
+
+    json invoke(json params) override {
+        std::string path  = params.at("path").get<std::string>();
+        int  start_line   = json_value(params, "start_line", 1);
+        int  end_line     = json_value(params, "end_line",  -1); // -1 = no limit
+        bool append_loc   = json_value(params, "append_loc", false);
+
+        std::error_code ec;
+        uintmax_t file_size = fs::file_size(path, ec);
+        if (ec) {
+            return {{"error", "cannot stat file: " + ec.message()}};
+        }
+        if (file_size > SERVER_TOOL_READ_FILE_MAX_SIZE && end_line == -1) {
+            return {{"error", string_format(
+                "file too large (%zu bytes, max %zu). Use start_line/end_line to read a portion.",
+                (size_t)file_size, SERVER_TOOL_READ_FILE_MAX_SIZE)}};
+        }
+
+        std::ifstream f(path);
+        if (!f) {
+            return {{"error", "failed to open file: " + path}};
+        }
+
+        std::string result;
+        std::string line;
+        int lineno = 0;
+
+        while (std::getline(f, line)) {
+            lineno++;
+            if (lineno < start_line) continue;
+            if (end_line != -1 && lineno > end_line) break;
+
+            std::string out_line;
+            if (append_loc) {
+                out_line = std::to_string(lineno) + "\u2192 " + line + "\n";
+            } else {
+                out_line = line + "\n";
+            }
+
+            if (result.size() + out_line.size() > SERVER_TOOL_READ_FILE_MAX_SIZE) {
+                result += "[output truncated]";
+                break;
+            }
+            result += out_line;
+        }
+
+        return {{"content", result}};
+    }
+};
+
+//
+// file_glob_search: find files matching a glob pattern under a base directory
+//
+
+static constexpr size_t SERVER_TOOL_FILE_SEARCH_MAX_RESULTS = 100;
+
+struct server_tool_file_glob_search : server_tool {
+    server_tool_file_glob_search() { name = "file_glob_search"; permission_write = false; }
+
+    json to_json() override {
+        return {
+            {"type", "function"},
+            {"function", {
+                {"name", name},
+                {"description", "Recursively search for files matching a glob pattern under a directory."},
+                {"parameters", {
+                    {"type", "object"},
+                    {"properties", {
+                        {"path",    {{"type", "string"}, {"description", "Base directory to search in"}}},
+                        {"include", {{"type", "string"}, {"description", "Glob pattern for files to include (e.g. \"**/*.cpp\"). Default: **"}}},
+                        {"exclude", {{"type", "string"}, {"description", "Glob pattern for files to exclude"}}},
+                    }},
+                    {"required", json::array({"path"})},
+                }},
+            }},
+        };
+    }
+
+    json invoke(json params) override {
+        std::string base    = params.at("path").get<std::string>();
+        std::string include = json_value(params, "include", std::string("**"));
+        std::string exclude = json_value(params, "exclude", std::string(""));
+
+        json files = json::array();
+
+        std::error_code ec;
+        for (const auto & entry : fs::recursive_directory_iterator(base,
+                fs::directory_options::skip_permission_denied, ec)) {
+            if (!entry.is_regular_file()) continue;
+
+            std::string rel = fs::relative(entry.path(), base, ec).string();
+            if (ec) continue;
+            std::replace(rel.begin(), rel.end(), '\\', '/');
+
+            if (!glob_match(include, rel)) continue;
+            if (!exclude.empty() && glob_match(exclude, rel)) continue;
+
+            files.push_back(entry.path().string());
+            if (files.size() >= SERVER_TOOL_FILE_SEARCH_MAX_RESULTS) {
+                break;
+            }
+        }
+
+        return {{"files", files}, {"count", files.size()}};
+    }
+};
+
+//
+// grep_search: search for a regex pattern in files
+//
+
+static constexpr size_t SERVER_TOOL_GREP_SEARCH_MAX_RESULTS = 100;
+
+struct server_tool_grep_search : server_tool {
+    server_tool_grep_search() { name = "grep_search"; permission_write = false; }
+
+    json to_json() override {
+        return {
+            {"type", "function"},
+            {"function", {
+                {"name", name},
+                {"description", "Search for a regex pattern in files under a path. Returns matching lines."},
+                {"parameters", {
+                    {"type", "object"},
+                    {"properties", {
+                        {"path",                {{"type", "string"},  {"description", "File or directory to search in"}}},
+                        {"pattern",             {{"type", "string"},  {"description", "Regular expression pattern to search for"}}},
+                        {"include",             {{"type", "string"},  {"description", "Glob pattern to filter files (default: **)"}}},
+                        {"exclude",             {{"type", "string"},  {"description", "Glob pattern to exclude files"}}},
+                        {"return_line_numbers", {{"type", "boolean"}, {"description", "If true, include line numbers in results"}}},
+                    }},
+                    {"required", json::array({"path", "pattern"})},
+                }},
+            }},
+        };
+    }
+
+    json invoke(json params) override {
+        std::string path    = params.at("path").get<std::string>();
+        std::string pat_str = params.at("pattern").get<std::string>();
+        std::string include = json_value(params, "include", std::string("**"));
+        std::string exclude = json_value(params, "exclude", std::string(""));
+        bool show_lineno    = json_value(params, "return_line_numbers", false);
+
+        std::regex pattern;
+        try {
+            pattern = std::regex(pat_str);
+        } catch (const std::regex_error & e) {
+            return {{"error", std::string("invalid regex: ") + e.what()}};
+        }
+
+        json matches = json::array();
+        size_t total = 0;
+
+        auto search_file = [&](const fs::path & fpath) {
+            std::ifstream f(fpath);
+            if (!f) return;
+            std::string line;
+            int lineno = 0;
+            while (std::getline(f, line) && total < SERVER_TOOL_GREP_SEARCH_MAX_RESULTS) {
+                lineno++;
+                if (std::regex_search(line, pattern)) {
+                    json match = {{"file", fpath.string()}, {"content", line}};
+                    if (show_lineno) {
+                        match["line"] = lineno;
+                    }
+                    matches.push_back(match);
+                    total++;
+                }
+            }
+        };
+
+        std::error_code ec;
+        if (fs::is_regular_file(path, ec)) {
+            search_file(path);
+        } else if (fs::is_directory(path, ec)) {
+            for (const auto & entry : fs::recursive_directory_iterator(path,
+                    fs::directory_options::skip_permission_denied, ec)) {
+                if (!entry.is_regular_file()) continue;
+                if (total >= SERVER_TOOL_GREP_SEARCH_MAX_RESULTS) break;
+
+                std::string rel = fs::relative(entry.path(), path, ec).string();
+                if (ec) continue;
+                std::replace(rel.begin(), rel.end(), '\\', '/');
+
+                if (!glob_match(include, rel)) continue;
+                if (!exclude.empty() && glob_match(exclude, rel)) continue;
+
+                search_file(entry.path());
+            }
+        } else {
+            return {{"error", "path does not exist: " + path}};
+        }
+
+        return {{"matches", matches}, {"count", total}};
+    }
+};
+
+//
+// exec_shell_command: run an arbitrary shell command
+//
+
+static constexpr size_t SERVER_TOOL_EXEC_SHELL_COMMAND_MAX_OUTPUT_SIZE = 16 * 1024; // 16 KB
+static constexpr int    SERVER_TOOL_EXEC_SHELL_COMMAND_MAX_TIMEOUT     = 60;        // seconds
+
+struct server_tool_exec_shell_command : server_tool {
+    server_tool_exec_shell_command() { name = "exec_shell_command"; permission_write = true; }
+
+    json to_json() override {
+        return {
+            {"type", "function"},
+            {"function", {
+                {"name", name},
+                {"description", "Execute a shell command and return its output (stdout and stderr combined)."},
+                {"parameters", {
+                    {"type", "object"},
+                    {"properties", {
+                        {"command",         {{"type", "string"},  {"description", "Shell command to execute"}}},
+                        {"timeout",         {{"type", "integer"}, {"description", string_format("Timeout in seconds (default 10, max %d)", SERVER_TOOL_EXEC_SHELL_COMMAND_MAX_TIMEOUT)}}},
+                        {"max_output_size", {{"type", "integer"}, {"description", string_format("Maximum output size in bytes (default %zu)", SERVER_TOOL_EXEC_SHELL_COMMAND_MAX_OUTPUT_SIZE)}}},
+                    }},
+                    {"required", json::array({"command"})},
+                }},
+            }},
+        };
+    }
+
+    json invoke(json params) override {
+        std::string command   = params.at("command").get<std::string>();
+        int    timeout        = json_value(params, "timeout",         10);
+        size_t max_output     = (size_t) json_value(params, "max_output_size", (int) SERVER_TOOL_EXEC_SHELL_COMMAND_MAX_OUTPUT_SIZE);
+
+        timeout    = std::min(timeout,    SERVER_TOOL_EXEC_SHELL_COMMAND_MAX_TIMEOUT);
+        max_output = std::min(max_output, SERVER_TOOL_EXEC_SHELL_COMMAND_MAX_OUTPUT_SIZE);
+
+#ifdef _WIN32
+        std::vector<std::string> args = {"cmd", "/c", command};
+#else
+        std::vector<std::string> args = {"sh", "-c", command};
+#endif
+
+        auto res = run_process(args, max_output, timeout);
+
+        json out = {{"output", res.output}, {"exit_code", res.exit_code}};
+        if (res.timed_out) {
+            out["timed_out"] = true;
+        }
+        return out;
+    }
+};
+
+//
+// write_file: create or overwrite a file
+//
+
+struct server_tool_write_file : server_tool {
+    server_tool_write_file() { name = "write_file"; permission_write = true; }
+
+    json to_json() override {
+        return {
+            {"type", "function"},
+            {"function", {
+                {"name", name},
+                {"description", "Write content to a file, creating it (including parent directories) if it does not exist."},
+                {"parameters", {
+                    {"type", "object"},
+                    {"properties", {
+                        {"path",    {{"type", "string"}, {"description", "Path of the file to write"}}},
+                        {"content", {{"type", "string"}, {"description", "Content to write"}}},
+                    }},
+                    {"required", json::array({"path", "content"})},
+                }},
+            }},
+        };
+    }
+
+    json invoke(json params) override {
+        std::string path    = params.at("path").get<std::string>();
+        std::string content = params.at("content").get<std::string>();
+
+        std::error_code ec;
+        fs::path fpath(path);
+        if (fpath.has_parent_path()) {
+            fs::create_directories(fpath.parent_path(), ec);
+            if (ec) {
+                return {{"error", "failed to create directories: " + ec.message()}};
+            }
+        }
+
+        std::ofstream f(path, std::ios::binary);
+        if (!f) {
+            return {{"error", "failed to open file for writing: " + path}};
+        }
+        f << content;
+        if (!f) {
+            return {{"error", "failed to write file: " + path}};
+        }
+
+        return {{"result", "file written successfully"}, {"path", path}, {"bytes", content.size()}};
+    }
+};
+
+//
+// edit_file: apply a unified diff via git apply
+//
+
+struct server_tool_edit_file : server_tool {
+    server_tool_edit_file() { name = "edit_file"; permission_write = true; }
+
+    json to_json() override {
+        return {
+            {"type", "function"},
+            {"function", {
+                {"name", name},
+                {"description", "Apply a unified diff to edit one or more files using git apply."},
+                {"parameters", {
+                    {"type", "object"},
+                    {"properties", {
+                        {"diff", {{"type", "string"}, {"description", "Unified diff content in git diff format"}}},
+                    }},
+                    {"required", json::array({"diff"})},
+                }},
+            }},
+        };
+    }
+
+    json invoke(json params) override {
+        std::string diff = params.at("diff").get<std::string>();
+
+        // write diff to a temporary file
+        static std::atomic<int> counter{0};
+        std::string tmp_path = (fs::temp_directory_path() /
+            ("llama_patch_" + std::to_string(++counter) + ".patch")).string();
+
+        {
+            std::ofstream f(tmp_path, std::ios::binary);
+            if (!f) {
+                return {{"error", "failed to create temp patch file"}};
+            }
+            f << diff;
+        }
+
+        auto res = run_process({"git", "apply", tmp_path}, 4096, 10);
+
+        std::error_code ec;
+        fs::remove(tmp_path, ec);
+
+        if (res.exit_code != 0) {
+            return {{"error", "git apply failed (exit " + std::to_string(res.exit_code) + "): " + res.output}};
+        }
+        return {{"result", "patch applied successfully"}};
+    }
+};
+
+//
+// public API
+//
+
+static std::vector<std::unique_ptr<server_tool>> build_tools() {
+    std::vector<std::unique_ptr<server_tool>> tools;
+    tools.push_back(std::make_unique<server_tool_read_file>());
+    tools.push_back(std::make_unique<server_tool_file_glob_search>());
+    tools.push_back(std::make_unique<server_tool_grep_search>());
+    tools.push_back(std::make_unique<server_tool_exec_shell_command>());
+    tools.push_back(std::make_unique<server_tool_write_file>());
+    tools.push_back(std::make_unique<server_tool_edit_file>());
+    return tools;
+}
+
+static json server_tools_list() {
+    auto tools  = build_tools();
+    json result = json::array();
+    for (const auto & t : tools) {
+        result.push_back(t->to_json());
+    }
+    return result;
+}
+
+static json server_tool_call(const std::string & name, const json & params) {
+    auto tools = build_tools();
+    for (auto & t : tools) {
+        if (t->name == name) {
+            return t->invoke(params);
+        }
+    }
+    return {{"error", "unknown tool: " + name}};
+}
+
+server_http_context::handler_t server_tools_get = [](const server_http_req &) -> server_http_res_ptr {
+    auto res = std::make_unique<server_http_res>();
+    try {
+        json tools = server_tools_list();
+        res->data = safe_json_to_str(tools);
+    } catch (const std::exception & e) {
+        SRV_ERR("got exception: %s\n", e.what());
+        res->status = 500;
+        res->data   = safe_json_to_str(format_error_response(e.what(), ERROR_TYPE_SERVER));
+    }
+    return res;
+};
+
+server_http_context::handler_t server_tools_post = [](const server_http_req & req) -> server_http_res_ptr {
+    auto res = std::make_unique<server_http_res>();
+    try {
+        json body = json::parse(req.body);
+        std::string tool_name = body.at("tool").get<std::string>();
+        json params = body.value("params", json::object());
+        json result = server_tool_call(tool_name, params);
+        res->data   = safe_json_to_str(result);
+    } catch (const json::exception & e) {
+        res->status = 400;
+        res->data   = safe_json_to_str(format_error_response(e.what(), ERROR_TYPE_INVALID_REQUEST));
+    } catch (const std::exception & e) {
+        SRV_ERR("got exception: %s\n", e.what());
+        res->status = 500;
+        res->data   = safe_json_to_str(format_error_response(e.what(), ERROR_TYPE_SERVER));
+    }
+    return res;
+};
diff --git a/tools/server/server-tools.h b/tools/server/server-tools.h
new file mode 100644
index 00000000000..141235d7993
--- /dev/null
+++ b/tools/server/server-tools.h
@@ -0,0 +1,7 @@
+#pragma once
+
+#include "server-common.h"
+#include "server-http.h"
+
+extern server_http_context::handler_t server_tools_get;
+extern server_http_context::handler_t server_tools_post;
\ No newline at end of file
diff --git a/tools/server/server.cpp b/tools/server/server.cpp
index 0bd6fda17d2..d1db1ed1ea0 100644
--- a/tools/server/server.cpp
+++ b/tools/server/server.cpp
@@ -2,6 +2,7 @@
 #include "server-http.h"
 #include "server-models.h"
 #include "server-cors-proxy.h"
+#include "server-tools.h"
 
 #include "arg.h"
 #include "common.h"
@@ -211,6 +212,15 @@ int main(int argc, char ** argv) {
         ctx_http.get ("/cors-proxy",      ex_wrapper(proxy_handler_get));
         ctx_http.post("/cors-proxy",      ex_wrapper(proxy_handler_post));
     }
+    // EXPERIMENTAL built-in tools
+    if (params.server_tools) {
+        SRV_WRN("%s", "-----------------\n");
+        SRV_WRN("%s", "Built-in tools are enabled, do not expose server to untrusted environments\n");
+        SRV_WRN("%s", "This feature is EXPERIMENTAL and may be changed in the future\n");
+        SRV_WRN("%s", "-----------------\n");
+        ctx_http.get ("/tools",           ex_wrapper(server_tools_get));
+        ctx_http.post("/tools",           ex_wrapper(server_tools_post));
+    }
 
     //
     // Start the server

From 7f9f53124bf1a0ed0110332ed1377a728328e99c Mon Sep 17 00:00:00 2001
From: Xuan Son Nguyen <son@huggingface.co>
Date: Thu, 19 Mar 2026 23:03:33 +0100
Subject: [PATCH 2/8] refactor

---
 tools/server/README-dev.md    |  43 +++++-
 tools/server/server-tools.cpp | 270 ++++++++++++++++++++++++++++++----
 2 files changed, 282 insertions(+), 31 deletions(-)

diff --git a/tools/server/README-dev.md b/tools/server/README-dev.md
index 8318c5852d8..326cb357b44 100644
--- a/tools/server/README-dev.md
+++ b/tools/server/README-dev.md
@@ -101,7 +101,12 @@ This endpoint is intended to be used internally by the Web UI and subject to cha
 
 **GET /tools**
 
-Get a list of tools, the tool definition is in OAI-compat format.
+Get a list of tools, each tool has these fields:
+- `tool` (string): the ID name of the tool, to be used in POST call. Example: `read_file`
+- `displayName` (string): the name to be displayed on UI. Example: `Read file`
+- `type` (string): always be `"builtin"` for now
+- `permissions` (object): a mapping string --> boolean that indicates the permission required by this tool. This is useful for the UI to ask the user before calling the tool. For now, the only permission supported is `"write"`
+- `definition` (object): the OAI-compat definition of this tool
 
 **POST /tools**
 
@@ -109,7 +114,41 @@ Invoke a tool call, request body is a JSON object with:
 - `tool` (string): the name of the tool
 - `params` (object): a mapping from argument name (string) to argument value
 
-Returns JSON object, the schema depends on the tool itself.
+Returns JSON object. There are two response formats:
+
+Format 1: Plain text. The text will be placed into a field called `plain_text_response`, example:
+
+```json
+{
+    "plain_text_response": "this is a text response"
+}
+```
+
+The client should extract this value and place it inside message content (note: content is no longer a JSON), example
+
+```json
+{
+    "role": "tool",
+    "content": "this is a text response"
+}
+```
+
+Format 2: Normal JSON response, example:
+
+```json
+{
+    "error": "cannot open this file"
+}
+```
+
+That requires `JSON.stringify` when formatted to message content:
+
+```json
+{
+    "role": "tool",
+    "content": "{\"error\":\"cannot open this file\"}"
+}
+```
 
 ### Notable Related PRs
 
diff --git a/tools/server/server-tools.cpp b/tools/server/server-tools.cpp
index a040c42051f..d5c7921efd2 100644
--- a/tools/server/server-tools.cpp
+++ b/tools/server/server-tools.cpp
@@ -139,11 +139,25 @@ static bool glob_match(const std::string & pattern, const std::string & str) {
 
 struct server_tool {
     std::string name;
+    std::string displayName;
     json        definition;
     bool        permission_write = false;
+
     virtual ~server_tool() = default;
-    virtual json to_json() = 0;
+    virtual json get_definition() = 0;
     virtual json invoke(json params) = 0;
+
+    json to_json() {
+        return {
+            {"displayName", displayName},
+            {"tool", name},
+            {"type", "builtin"},
+            {"permissions", json{
+                {"write", permission_write}
+            }},
+            {"definition", get_definition()},
+        };
+    }
 };
 
 //
@@ -153,9 +167,13 @@ struct server_tool {
 static constexpr size_t SERVER_TOOL_READ_FILE_MAX_SIZE = 16 * 1024; // 16 KB
 
 struct server_tool_read_file : server_tool {
-    server_tool_read_file() { name = "read_file"; permission_write = false; }
+    server_tool_read_file() {
+        name = "read_file";
+        displayName = "Read file";
+        permission_write = false;
+    }
 
-    json to_json() override {
+    json get_definition() override {
         return {
             {"type", "function"},
             {"function", {
@@ -221,7 +239,7 @@ struct server_tool_read_file : server_tool {
             result += out_line;
         }
 
-        return {{"content", result}};
+        return {{"plain_text_response", result}};
     }
 };
 
@@ -232,9 +250,13 @@ struct server_tool_read_file : server_tool {
 static constexpr size_t SERVER_TOOL_FILE_SEARCH_MAX_RESULTS = 100;
 
 struct server_tool_file_glob_search : server_tool {
-    server_tool_file_glob_search() { name = "file_glob_search"; permission_write = false; }
+    server_tool_file_glob_search() {
+        name = "file_glob_search";
+        displayName = "File search";
+        permission_write = false;
+    }
 
-    json to_json() override {
+    json get_definition() override {
         return {
             {"type", "function"},
             {"function", {
@@ -258,7 +280,8 @@ struct server_tool_file_glob_search : server_tool {
         std::string include = json_value(params, "include", std::string("**"));
         std::string exclude = json_value(params, "exclude", std::string(""));
 
-        json files = json::array();
+        std::ostringstream output_text;
+        size_t count = 0;
 
         std::error_code ec;
         for (const auto & entry : fs::recursive_directory_iterator(base,
@@ -272,13 +295,15 @@ struct server_tool_file_glob_search : server_tool {
             if (!glob_match(include, rel)) continue;
             if (!exclude.empty() && glob_match(exclude, rel)) continue;
 
-            files.push_back(entry.path().string());
-            if (files.size() >= SERVER_TOOL_FILE_SEARCH_MAX_RESULTS) {
+            output_text << entry.path().string() << "\n";
+            if (++count >= SERVER_TOOL_FILE_SEARCH_MAX_RESULTS) {
                 break;
             }
         }
 
-        return {{"files", files}, {"count", files.size()}};
+        output_text << "\n---\nTotal matches: " << count << "\n";
+
+        return {{"plain_text_response", output_text.str()}};
     }
 };
 
@@ -289,9 +314,13 @@ struct server_tool_file_glob_search : server_tool {
 static constexpr size_t SERVER_TOOL_GREP_SEARCH_MAX_RESULTS = 100;
 
 struct server_tool_grep_search : server_tool {
-    server_tool_grep_search() { name = "grep_search"; permission_write = false; }
+    server_tool_grep_search() {
+        name = "grep_search";
+        displayName = "Grep search";
+        permission_write = false;
+    }
 
-    json to_json() override {
+    json get_definition() override {
         return {
             {"type", "function"},
             {"function", {
@@ -326,7 +355,7 @@ struct server_tool_grep_search : server_tool {
             return {{"error", std::string("invalid regex: ") + e.what()}};
         }
 
-        json matches = json::array();
+        std::ostringstream output_text;
         size_t total = 0;
 
         auto search_file = [&](const fs::path & fpath) {
@@ -337,11 +366,11 @@ struct server_tool_grep_search : server_tool {
             while (std::getline(f, line) && total < SERVER_TOOL_GREP_SEARCH_MAX_RESULTS) {
                 lineno++;
                 if (std::regex_search(line, pattern)) {
-                    json match = {{"file", fpath.string()}, {"content", line}};
+                    output_text << fpath.string() << ":";
                     if (show_lineno) {
-                        match["line"] = lineno;
+                        output_text << lineno << ":";
                     }
-                    matches.push_back(match);
+                    output_text << line << "\n";
                     total++;
                 }
             }
@@ -369,7 +398,9 @@ struct server_tool_grep_search : server_tool {
             return {{"error", "path does not exist: " + path}};
         }
 
-        return {{"matches", matches}, {"count", total}};
+        output_text << "\n\n---\nTotal matches: " << total << "\n";
+
+        return {{"plain_text_response", output_text.str()}};
     }
 };
 
@@ -381,9 +412,13 @@ static constexpr size_t SERVER_TOOL_EXEC_SHELL_COMMAND_MAX_OUTPUT_SIZE = 16 * 10
 static constexpr int    SERVER_TOOL_EXEC_SHELL_COMMAND_MAX_TIMEOUT     = 60;        // seconds
 
 struct server_tool_exec_shell_command : server_tool {
-    server_tool_exec_shell_command() { name = "exec_shell_command"; permission_write = true; }
+    server_tool_exec_shell_command() {
+        name = "exec_shell_command";
+        displayName = "Execute shell command";
+        permission_write = true;
+    }
 
-    json to_json() override {
+    json get_definition() override {
         return {
             {"type", "function"},
             {"function", {
@@ -418,11 +453,13 @@ struct server_tool_exec_shell_command : server_tool {
 
         auto res = run_process(args, max_output, timeout);
 
-        json out = {{"output", res.output}, {"exit_code", res.exit_code}};
+        std::string text_output = res.output;
+        text_output += string_format("\n[exit code: %d]", res.exit_code);
         if (res.timed_out) {
-            out["timed_out"] = true;
+            text_output += " [exit due to timed out]";
         }
-        return out;
+
+        return {{"plain_text_response", text_output}};
     }
 };
 
@@ -431,14 +468,18 @@ struct server_tool_exec_shell_command : server_tool {
 //
 
 struct server_tool_write_file : server_tool {
-    server_tool_write_file() { name = "write_file"; permission_write = true; }
+    server_tool_write_file() {
+        name = "write_file";
+        displayName = "Write file";
+        permission_write = true;
+    }
 
-    json to_json() override {
+    json get_definition() override {
         return {
             {"type", "function"},
             {"function", {
                 {"name", name},
-                {"description", "Write content to a file, creating it (including parent directories) if it does not exist."},
+                {"description", "Write content to a file, creating it (including parent directories) if it does not exist. May use with edit_file for more complex edits."},
                 {"parameters", {
                     {"type", "object"},
                     {"properties", {
@@ -478,18 +519,188 @@ struct server_tool_write_file : server_tool {
 };
 
 //
-// edit_file: apply a unified diff via git apply
+// edit_file: edit file content via line-based changes
 //
 
 struct server_tool_edit_file : server_tool {
-    server_tool_edit_file() { name = "edit_file"; permission_write = true; }
+    server_tool_edit_file() {
+        name = "edit_file";
+        displayName = "Edit file";
+        permission_write = true;
+    }
+
+    json get_definition() override {
+        return {
+            {"type", "function"},
+            {"function", {
+                {"name", name},
+                {"description", 
+                    "Edit a file by applying a list of line-based changes. "
+                    "Each change targets a 1-based inclusive line range and has a mode: "
+                    "\"replace\" (replace lines with content), "
+                    "\"delete\" (remove lines, content must be empty string), "
+                    "\"append\" (insert content after lineEnd). "
+                    "Set lineStart to -1 to target the end of file (lineEnd is ignored in that case). "
+                    "Changes must not overlap. They are applied in reverse line order automatically."},
+                {"parameters", {
+                    {"type", "object"},
+                    {"properties", {
+                        {"path",    {{"type", "string"}, {"description", "Path to the file to edit"}}},
+                        {"changes", {
+                            {"type", "array"},
+                            {"description", "List of changes to apply"},
+                            {"items", {
+                                {"type", "object"},
+                                {"properties", {
+                                    {"mode",      {{"type", "string"},  {"description", "\"replace\", \"delete\", or \"append\""}}},
+                                    {"lineStart", {{"type", "integer"}, {"description", "First line of the range (1-based); use -1 for end of file"}}},
+                                    {"lineEnd",   {{"type", "integer"}, {"description", "Last line of the range (1-based, inclusive); ignored when lineStart is -1"}}},
+                                    {"content",   {{"type", "string"},  {"description", "Content to insert; must be empty string for delete mode"}}},
+                                }},
+                                {"required", json::array({"mode", "lineStart", "lineEnd", "content"})},
+                            }},
+                        }},
+                    }},
+                    {"required", json::array({"path", "changes"})},
+                }},
+            }},
+        };
+    }
+
+    json invoke(json params) override {
+        std::string path = params.at("path").get<std::string>();
+        const json & changes = params.at("changes");
+
+        if (!changes.is_array()) {
+            return {{"error", "\"changes\" must be an array"}};
+        }
+
+        // read file into lines
+        std::ifstream fin(path);
+        if (!fin) {
+            return {{"error", "failed to open file: " + path}};
+        }
+        std::vector<std::string> lines;
+        {
+            std::string line;
+            while (std::getline(fin, line)) {
+                lines.push_back(line);
+            }
+        }
+        fin.close();
+
+        // validate and collect changes, then sort descending by lineStart
+        struct change_entry {
+            std::string mode;
+            int         line_start; // 1-based
+            int         line_end;   // 1-based inclusive
+            std::string content;
+        };
+        std::vector<change_entry> entries;
+        entries.reserve(changes.size());
+
+        for (const auto & ch : changes) {
+            change_entry e;
+            e.mode       = ch.at("mode").get<std::string>();
+            e.line_start = ch.at("lineStart").get<int>();
+            e.line_end   = ch.at("lineEnd").get<int>();
+            e.content    = ch.at("content").get<std::string>();
+
+            if (e.mode != "replace" && e.mode != "delete" && e.mode != "append") {
+                return {{"error", "invalid mode \"" + e.mode + "\"; must be replace, delete, or append"}};
+            }
+            if (e.mode == "delete" && !e.content.empty()) {
+                return {{"error", "content must be empty string for delete mode"}};
+            }
+            int n = (int) lines.size();
+            if (e.line_start == -1) {
+                // -1 means end of file; lineEnd is ignored — normalize to point past last line
+                e.line_start = n + 1;
+                e.line_end   = n + 1;
+            } else {
+                if (e.line_start < 1 || e.line_end < e.line_start) {
+                    return {{"error", string_format("invalid line range [%d, %d]", e.line_start, e.line_end)}};
+                }
+                if (e.line_end > n) {
+                    return {{"error", string_format("lineEnd %d exceeds file length %d", e.line_end, n)}};
+                }
+            }
+            entries.push_back(std::move(e));
+        }
+
+        // sort descending so earlier-indexed changes don't shift later ones
+        std::sort(entries.begin(), entries.end(), [](const change_entry & a, const change_entry & b) {
+            return a.line_start > b.line_start;
+        });
+
+        // apply changes (0-based indices internally)
+        for (const auto & e : entries) {
+            int idx_start = e.line_start - 1; // 0-based
+            int idx_end   = e.line_end   - 1; // 0-based inclusive
+
+            // split content into lines (preserve trailing newline awareness)
+            std::vector<std::string> new_lines;
+            if (!e.content.empty()) {
+                std::istringstream ss(e.content);
+                std::string ln;
+                while (std::getline(ss, ln)) {
+                    new_lines.push_back(ln);
+                }
+                // if content ends with \n, getline consumed it — no extra empty line needed
+                // if content does NOT end with \n, last line is still captured correctly
+            }
+
+            if (e.mode == "replace") {
+                // erase [idx_start, idx_end] and insert new_lines
+                lines.erase(lines.begin() + idx_start, lines.begin() + idx_end + 1);
+                lines.insert(lines.begin() + idx_start, new_lines.begin(), new_lines.end());
+            } else if (e.mode == "delete") {
+                lines.erase(lines.begin() + idx_start, lines.begin() + idx_end + 1);
+            } else { // append
+                // idx_end + 1 may equal lines.size() when lineStart == -1 (end of file)
+                lines.insert(lines.begin() + idx_end + 1, new_lines.begin(), new_lines.end());
+            }
+        }
+
+        // write file back
+        std::ofstream fout(path, std::ios::binary);
+        if (!fout) {
+            return {{"error", "failed to open file for writing: " + path}};
+        }
+        for (size_t i = 0; i < lines.size(); i++) {
+            fout << lines[i];
+            if (i + 1 < lines.size()) {
+                fout << "\n";
+            }
+        }
+        if (!lines.empty()) {
+            fout << "\n";
+        }
+        if (!fout) {
+            return {{"error", "failed to write file: " + path}};
+        }
+
+        return {{"result", "file edited successfully"}, {"path", path}, {"lines", (int) lines.size()}};
+    }
+};
+
+//
+// apply_diff: apply a unified diff via git apply
+//
+
+struct server_tool_apply_diff : server_tool {
+    server_tool_apply_diff() {
+        name = "apply_diff";
+        displayName = "Apply diff";
+        permission_write = true;
+    }
 
-    json to_json() override {
+    json get_definition() override {
         return {
             {"type", "function"},
             {"function", {
                 {"name", name},
-                {"description", "Apply a unified diff to edit one or more files using git apply."},
+                {"description", "Apply a unified diff to edit one or more files using git apply. Use this instead of edit_file when the changes are complex."},
                 {"parameters", {
                     {"type", "object"},
                     {"properties", {
@@ -541,6 +752,7 @@ static std::vector<std::unique_ptr<server_tool>> build_tools() {
     tools.push_back(std::make_unique<server_tool_exec_shell_command>());
     tools.push_back(std::make_unique<server_tool_write_file>());
     tools.push_back(std::make_unique<server_tool_edit_file>());
+    tools.push_back(std::make_unique<server_tool_apply_diff>());
     return tools;
 }
 

From 718bfb0777018927e9cbc3f4b8cdae6d27a4849b Mon Sep 17 00:00:00 2001
From: Xuan Son Nguyen <son@huggingface.co>
Date: Thu, 19 Mar 2026 23:08:34 +0100
Subject: [PATCH 3/8] displayName -> display_name

---
 tools/server/README-dev.md    |  2 +-
 tools/server/server-tools.cpp | 18 +++++++++---------
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/tools/server/README-dev.md b/tools/server/README-dev.md
index 326cb357b44..f9fae5a6cc8 100644
--- a/tools/server/README-dev.md
+++ b/tools/server/README-dev.md
@@ -103,7 +103,7 @@ This endpoint is intended to be used internally by the Web UI and subject to cha
 
 Get a list of tools, each tool has these fields:
 - `tool` (string): the ID name of the tool, to be used in POST call. Example: `read_file`
-- `displayName` (string): the name to be displayed on UI. Example: `Read file`
+- `display_name` (string): the name to be displayed on UI. Example: `Read file`
 - `type` (string): always be `"builtin"` for now
 - `permissions` (object): a mapping string --> boolean that indicates the permission required by this tool. This is useful for the UI to ask the user before calling the tool. For now, the only permission supported is `"write"`
 - `definition` (object): the OAI-compat definition of this tool
diff --git a/tools/server/server-tools.cpp b/tools/server/server-tools.cpp
index d5c7921efd2..7e371bfb81e 100644
--- a/tools/server/server-tools.cpp
+++ b/tools/server/server-tools.cpp
@@ -139,7 +139,7 @@ static bool glob_match(const std::string & pattern, const std::string & str) {
 
 struct server_tool {
     std::string name;
-    std::string displayName;
+    std::string display_name;
     json        definition;
     bool        permission_write = false;
 
@@ -149,7 +149,7 @@ struct server_tool {
 
     json to_json() {
         return {
-            {"displayName", displayName},
+            {"display_name", display_name},
             {"tool", name},
             {"type", "builtin"},
             {"permissions", json{
@@ -169,7 +169,7 @@ static constexpr size_t SERVER_TOOL_READ_FILE_MAX_SIZE = 16 * 1024; // 16 KB
 struct server_tool_read_file : server_tool {
     server_tool_read_file() {
         name = "read_file";
-        displayName = "Read file";
+        display_name = "Read file";
         permission_write = false;
     }
 
@@ -252,7 +252,7 @@ static constexpr size_t SERVER_TOOL_FILE_SEARCH_MAX_RESULTS = 100;
 struct server_tool_file_glob_search : server_tool {
     server_tool_file_glob_search() {
         name = "file_glob_search";
-        displayName = "File search";
+        display_name = "File search";
         permission_write = false;
     }
 
@@ -316,7 +316,7 @@ static constexpr size_t SERVER_TOOL_GREP_SEARCH_MAX_RESULTS = 100;
 struct server_tool_grep_search : server_tool {
     server_tool_grep_search() {
         name = "grep_search";
-        displayName = "Grep search";
+        display_name = "Grep search";
         permission_write = false;
     }
 
@@ -414,7 +414,7 @@ static constexpr int    SERVER_TOOL_EXEC_SHELL_COMMAND_MAX_TIMEOUT     = 60;
 struct server_tool_exec_shell_command : server_tool {
     server_tool_exec_shell_command() {
         name = "exec_shell_command";
-        displayName = "Execute shell command";
+        display_name = "Execute shell command";
         permission_write = true;
     }
 
@@ -470,7 +470,7 @@ struct server_tool_exec_shell_command : server_tool {
 struct server_tool_write_file : server_tool {
     server_tool_write_file() {
         name = "write_file";
-        displayName = "Write file";
+        display_name = "Write file";
         permission_write = true;
     }
 
@@ -525,7 +525,7 @@ struct server_tool_write_file : server_tool {
 struct server_tool_edit_file : server_tool {
     server_tool_edit_file() {
         name = "edit_file";
-        displayName = "Edit file";
+        display_name = "Edit file";
         permission_write = true;
     }
 
@@ -691,7 +691,7 @@ struct server_tool_edit_file : server_tool {
 struct server_tool_apply_diff : server_tool {
     server_tool_apply_diff() {
         name = "apply_diff";
-        displayName = "Apply diff";
+        display_name = "Apply diff";
         permission_write = true;
     }
 

From 6aba54e7d710f228e8c652c1eb548c54eb1450c5 Mon Sep 17 00:00:00 2001
From: Xuan Son Nguyen <son@huggingface.co>
Date: Thu, 19 Mar 2026 23:16:10 +0100
Subject: [PATCH 4/8] snake_case everywhere

---
 tools/server/server-tools.cpp | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/tools/server/server-tools.cpp b/tools/server/server-tools.cpp
index 7e371bfb81e..7f7d7a6d568 100644
--- a/tools/server/server-tools.cpp
+++ b/tools/server/server-tools.cpp
@@ -539,8 +539,8 @@ struct server_tool_edit_file : server_tool {
                     "Each change targets a 1-based inclusive line range and has a mode: "
                     "\"replace\" (replace lines with content), "
                     "\"delete\" (remove lines, content must be empty string), "
-                    "\"append\" (insert content after lineEnd). "
-                    "Set lineStart to -1 to target the end of file (lineEnd is ignored in that case). "
+                    "\"append\" (insert content after line_end). "
+                    "Set line_start to -1 to target the end of file (line_end is ignored in that case). "
                     "Changes must not overlap. They are applied in reverse line order automatically."},
                 {"parameters", {
                     {"type", "object"},
@@ -552,12 +552,12 @@ struct server_tool_edit_file : server_tool {
                             {"items", {
                                 {"type", "object"},
                                 {"properties", {
-                                    {"mode",      {{"type", "string"},  {"description", "\"replace\", \"delete\", or \"append\""}}},
-                                    {"lineStart", {{"type", "integer"}, {"description", "First line of the range (1-based); use -1 for end of file"}}},
-                                    {"lineEnd",   {{"type", "integer"}, {"description", "Last line of the range (1-based, inclusive); ignored when lineStart is -1"}}},
-                                    {"content",   {{"type", "string"},  {"description", "Content to insert; must be empty string for delete mode"}}},
+                                    {"mode",       {{"type", "string"},  {"description", "\"replace\", \"delete\", or \"append\""}}},
+                                    {"line_start", {{"type", "integer"}, {"description", "First line of the range (1-based); use -1 for end of file"}}},
+                                    {"line_end",   {{"type", "integer"}, {"description", "Last line of the range (1-based, inclusive); ignored when line_start is -1"}}},
+                                    {"content",    {{"type", "string"},  {"description", "Content to insert; must be empty string for delete mode"}}},
                                 }},
-                                {"required", json::array({"mode", "lineStart", "lineEnd", "content"})},
+                                {"required", json::array({"mode", "line_start", "line_end", "content"})},
                             }},
                         }},
                     }},
@@ -589,7 +589,7 @@ struct server_tool_edit_file : server_tool {
         }
         fin.close();
 
-        // validate and collect changes, then sort descending by lineStart
+        // validate and collect changes, then sort descending by line_start
         struct change_entry {
             std::string mode;
             int         line_start; // 1-based
@@ -602,8 +602,8 @@ struct server_tool_edit_file : server_tool {
         for (const auto & ch : changes) {
             change_entry e;
             e.mode       = ch.at("mode").get<std::string>();
-            e.line_start = ch.at("lineStart").get<int>();
-            e.line_end   = ch.at("lineEnd").get<int>();
+            e.line_start = ch.at("line_start").get<int>();
+            e.line_end   = ch.at("line_end").get<int>();
             e.content    = ch.at("content").get<std::string>();
 
             if (e.mode != "replace" && e.mode != "delete" && e.mode != "append") {
@@ -614,7 +614,7 @@ struct server_tool_edit_file : server_tool {
             }
             int n = (int) lines.size();
             if (e.line_start == -1) {
-                // -1 means end of file; lineEnd is ignored — normalize to point past last line
+                // -1 means end of file; line_end is ignored — normalize to point past last line
                 e.line_start = n + 1;
                 e.line_end   = n + 1;
             } else {
@@ -622,7 +622,7 @@ struct server_tool_edit_file : server_tool {
                     return {{"error", string_format("invalid line range [%d, %d]", e.line_start, e.line_end)}};
                 }
                 if (e.line_end > n) {
-                    return {{"error", string_format("lineEnd %d exceeds file length %d", e.line_end, n)}};
+                    return {{"error", string_format("line_end %d exceeds file length %d", e.line_end, n)}};
                 }
             }
             entries.push_back(std::move(e));
@@ -657,7 +657,7 @@ struct server_tool_edit_file : server_tool {
             } else if (e.mode == "delete") {
                 lines.erase(lines.begin() + idx_start, lines.begin() + idx_end + 1);
             } else { // append
-                // idx_end + 1 may equal lines.size() when lineStart == -1 (end of file)
+                // idx_end + 1 may equal lines.size() when line_start == -1 (end of file)
                 lines.insert(lines.begin() + idx_end + 1, new_lines.begin(), new_lines.end());
             }
         }

From c33fd6f10c399bb5da7fc7cc1842bcb09a68166f Mon Sep 17 00:00:00 2001
From: Xuan Son Nguyen <son@huggingface.co>
Date: Thu, 19 Mar 2026 23:25:22 +0100
Subject: [PATCH 5/8] rm redundant field

---
 tools/server/server-tools.cpp | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/tools/server/server-tools.cpp b/tools/server/server-tools.cpp
index 7f7d7a6d568..d7c116abd81 100644
--- a/tools/server/server-tools.cpp
+++ b/tools/server/server-tools.cpp
@@ -140,8 +140,7 @@ static bool glob_match(const std::string & pattern, const std::string & str) {
 struct server_tool {
     std::string name;
     std::string display_name;
-    json        definition;
-    bool        permission_write = false;
+    bool permission_write = false;
 
     virtual ~server_tool() = default;
     virtual json get_definition() = 0;
@@ -592,8 +591,8 @@ struct server_tool_edit_file : server_tool {
         // validate and collect changes, then sort descending by line_start
         struct change_entry {
             std::string mode;
-            int         line_start; // 1-based
-            int         line_end;   // 1-based inclusive
+            int line_start; // 1-based
+            int line_end;   // 1-based inclusive
             std::string content;
         };
         std::vector<change_entry> entries;

From b0a1b31477d8c68ea7d23bce4b3748de27e88441 Mon Sep 17 00:00:00 2001
From: Xuan Son Nguyen <son@huggingface.co>
Date: Sat, 21 Mar 2026 01:11:57 +0100
Subject: [PATCH 6/8] change arg to --tools all

---
 common/arg.cpp                |  11 +--
 common/common.h               |   2 +-
 tools/server/server-tools.cpp | 129 ++++++++++++++++------------------
 tools/server/server-tools.h   |  23 +++++-
 tools/server/server.cpp       |   8 ++-
 5 files changed, 94 insertions(+), 79 deletions(-)

diff --git a/common/arg.cpp b/common/arg.cpp
index ba2afc77a70..98070d43e25 100644
--- a/common/arg.cpp
+++ b/common/arg.cpp
@@ -2849,11 +2849,12 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
         }
     ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_WEBUI_MCP_PROXY"));
     add_opt(common_arg(
-        {"--tools"},
-        {"--no-tools"},
-        string_format("experimental: whether to enable tools for AI agents - do not enable in untrusted environments (default: %s)", params.server_tools ? "enabled" : "disabled"),
-        [](common_params & params, bool value) {
-            params.server_tools = value;
+        {"--tools"}, "TOOL1,TOOL2,...",
+        "experimental: whether to enable built-in tools for AI agents - do not enable in untrusted environments (default: no tools)\n"
+        "specify \"all\" to enable all tools\n"
+        "available tools: read_file, file_glob_search, grep_search, exec_shell_command, write_file, edit_file, apply_diff",
+        [](common_params & params, const std::string & value) {
+            params.server_tools = parse_csv_row(value);
         }
     ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_TOOLS"));
     add_opt(common_arg(
diff --git a/common/common.h b/common/common.h
index 9fd1b4dbbe1..fde5ba996ed 100644
--- a/common/common.h
+++ b/common/common.h
@@ -614,7 +614,7 @@ struct common_params {
     bool endpoint_metrics = false;
 
     // enable built-in tools
-    bool server_tools = false;
+    std::vector<std::string> server_tools;
 
     // router server configs
     std::string models_dir    = ""; // directory containing models for the router server
diff --git a/tools/server/server-tools.cpp b/tools/server/server-tools.cpp
index d7c116abd81..5e89a5668b7 100644
--- a/tools/server/server-tools.cpp
+++ b/tools/server/server-tools.cpp
@@ -29,8 +29,8 @@ static std::vector<char *> to_cstr_vec(const std::vector<std::string> & v) {
 
 struct run_proc_result {
     std::string output;
-    int         exit_code = -1;
-    bool        timed_out = false;
+    int  exit_code = -1;
+    bool timed_out = false;
 };
 
 static run_proc_result run_process(
@@ -133,31 +133,17 @@ static bool glob_match(const std::string & pattern, const std::string & str) {
     return glob_match(pattern.c_str(), str.c_str());
 }
 
-//
-// base struct
-//
-
-struct server_tool {
-    std::string name;
-    std::string display_name;
-    bool permission_write = false;
-
-    virtual ~server_tool() = default;
-    virtual json get_definition() = 0;
-    virtual json invoke(json params) = 0;
-
-    json to_json() {
-        return {
-            {"display_name", display_name},
-            {"tool", name},
-            {"type", "builtin"},
-            {"permissions", json{
-                {"write", permission_write}
-            }},
-            {"definition", get_definition()},
-        };
-    }
-};
+json server_tool::to_json() {
+    return {
+        {"display_name", display_name},
+        {"tool", name},
+        {"type", "builtin"},
+        {"permissions", json{
+            {"write", permission_write}
+        }},
+        {"definition", get_definition()},
+    };
+}
 
 //
 // read_file: read a file with optional line range and line-number prefix
@@ -533,7 +519,7 @@ struct server_tool_edit_file : server_tool {
             {"type", "function"},
             {"function", {
                 {"name", name},
-                {"description", 
+                {"description",
                     "Edit a file by applying a list of line-based changes. "
                     "Each change targets a 1-based inclusive line range and has a mode: "
                     "\"replace\" (replace lines with content), "
@@ -755,17 +741,56 @@ static std::vector<std::unique_ptr<server_tool>> build_tools() {
     return tools;
 }
 
-static json server_tools_list() {
-    auto tools  = build_tools();
-    json result = json::array();
-    for (const auto & t : tools) {
-        result.push_back(t->to_json());
+void server_tools::setup(const std::vector<std::string> & enabled_tools) {
+    if (!enabled_tools.empty()) {
+        std::unordered_set<std::string> enabled_set(enabled_tools.begin(), enabled_tools.end());
+        auto all_tools = build_tools();
+
+        tools.clear();
+        for (auto & t : all_tools) {
+            if (enabled_set.count(t->name) > 0 || enabled_set.count("all") > 0) {
+                tools.push_back(std::move(t));
+            }
+        }
     }
-    return result;
+
+    handle_get = [this](const server_http_req &) -> server_http_res_ptr {
+        auto res = std::make_unique<server_http_res>();
+        try {
+            json result = json::array();
+            for (const auto & t : tools) {
+                result.push_back(t->to_json());
+            }
+            res->data = safe_json_to_str(result);
+        } catch (const std::exception & e) {
+            SRV_ERR("got exception: %s\n", e.what());
+            res->status = 500;
+            res->data   = safe_json_to_str(format_error_response(e.what(), ERROR_TYPE_SERVER));
+        }
+        return res;
+    };
+
+    handle_post = [this](const server_http_req & req) -> server_http_res_ptr {
+        auto res = std::make_unique<server_http_res>();
+        try {
+            json body = json::parse(req.body);
+            std::string tool_name = body.at("tool").get<std::string>();
+            json params = body.value("params", json::object());
+            json result = invoke(tool_name, params);
+            res->data   = safe_json_to_str(result);
+        } catch (const json::exception & e) {
+            res->status = 400;
+            res->data   = safe_json_to_str(format_error_response(e.what(), ERROR_TYPE_INVALID_REQUEST));
+        } catch (const std::exception & e) {
+            SRV_ERR("got exception: %s\n", e.what());
+            res->status = 500;
+            res->data   = safe_json_to_str(format_error_response(e.what(), ERROR_TYPE_SERVER));
+        }
+        return res;
+    };
 }
 
-static json server_tool_call(const std::string & name, const json & params) {
-    auto tools = build_tools();
+json server_tools::invoke(const std::string & name, const json & params) {
     for (auto & t : tools) {
         if (t->name == name) {
             return t->invoke(params);
@@ -773,35 +798,3 @@ static json server_tool_call(const std::string & name, const json & params) {
     }
     return {{"error", "unknown tool: " + name}};
 }
-
-server_http_context::handler_t server_tools_get = [](const server_http_req &) -> server_http_res_ptr {
-    auto res = std::make_unique<server_http_res>();
-    try {
-        json tools = server_tools_list();
-        res->data = safe_json_to_str(tools);
-    } catch (const std::exception & e) {
-        SRV_ERR("got exception: %s\n", e.what());
-        res->status = 500;
-        res->data   = safe_json_to_str(format_error_response(e.what(), ERROR_TYPE_SERVER));
-    }
-    return res;
-};
-
-server_http_context::handler_t server_tools_post = [](const server_http_req & req) -> server_http_res_ptr {
-    auto res = std::make_unique<server_http_res>();
-    try {
-        json body = json::parse(req.body);
-        std::string tool_name = body.at("tool").get<std::string>();
-        json params = body.value("params", json::object());
-        json result = server_tool_call(tool_name, params);
-        res->data   = safe_json_to_str(result);
-    } catch (const json::exception & e) {
-        res->status = 400;
-        res->data   = safe_json_to_str(format_error_response(e.what(), ERROR_TYPE_INVALID_REQUEST));
-    } catch (const std::exception & e) {
-        SRV_ERR("got exception: %s\n", e.what());
-        res->status = 500;
-        res->data   = safe_json_to_str(format_error_response(e.what(), ERROR_TYPE_SERVER));
-    }
-    return res;
-};
diff --git a/tools/server/server-tools.h b/tools/server/server-tools.h
index 141235d7993..444ef5f8098 100644
--- a/tools/server/server-tools.h
+++ b/tools/server/server-tools.h
@@ -3,5 +3,24 @@
 #include "server-common.h"
 #include "server-http.h"
 
-extern server_http_context::handler_t server_tools_get;
-extern server_http_context::handler_t server_tools_post;
\ No newline at end of file
+struct server_tool {
+    std::string name;
+    std::string display_name;
+    bool permission_write = false;
+
+    virtual ~server_tool() = default;
+    virtual json get_definition() = 0;
+    virtual json invoke(json params) = 0;
+
+    json to_json();
+};
+
+struct server_tools {
+    std::vector<std::unique_ptr<server_tool>> tools;
+
+    void setup(const std::vector<std::string> & enabled_tools);
+    json invoke(const std::string & name, const json & params);
+
+    server_http_context::handler_t handle_get;
+    server_http_context::handler_t handle_post;
+};
diff --git a/tools/server/server.cpp b/tools/server/server.cpp
index d1db1ed1ea0..2a0cf1bcf90 100644
--- a/tools/server/server.cpp
+++ b/tools/server/server.cpp
@@ -125,6 +125,7 @@ int main(int argc, char ** argv) {
 
     // register API routes
     server_routes routes(params, ctx_server);
+    server_tools tools;
 
     bool is_router_server = params.model.path.empty();
     std::optional<server_models_routes> models_routes{};
@@ -213,13 +214,14 @@ int main(int argc, char ** argv) {
         ctx_http.post("/cors-proxy",      ex_wrapper(proxy_handler_post));
     }
     // EXPERIMENTAL built-in tools
-    if (params.server_tools) {
+    if (!params.server_tools.empty()) {
+        tools.setup(params.server_tools);
         SRV_WRN("%s", "-----------------\n");
         SRV_WRN("%s", "Built-in tools are enabled, do not expose server to untrusted environments\n");
         SRV_WRN("%s", "This feature is EXPERIMENTAL and may be changed in the future\n");
         SRV_WRN("%s", "-----------------\n");
-        ctx_http.get ("/tools",           ex_wrapper(server_tools_get));
-        ctx_http.post("/tools",           ex_wrapper(server_tools_post));
+        ctx_http.get ("/tools",           ex_wrapper(tools.handle_get));
+        ctx_http.post("/tools",           ex_wrapper(tools.handle_post));
     }
 
     //

From b648215eb2353c46a5023666319d002227b9b4de Mon Sep 17 00:00:00 2001
From: Xuan Son Nguyen <son@huggingface.co>
Date: Mon, 23 Mar 2026 12:32:33 +0100
Subject: [PATCH 7/8] add readme mention

---
 tools/server/README.md | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/tools/server/README.md b/tools/server/README.md
index 72f3c8e5342..bb4edf6e6da 100644
--- a/tools/server/README.md
+++ b/tools/server/README.md
@@ -290,6 +290,12 @@ It is currently available in the following endpoints:
 
 For more details, please refer to [multimodal documentation](../../docs/multimodal.md)
 
+### Built-in tools support
+
+The server includes a set of built-in tools that enable the LLM to access the local file system directly from the Web UI.
+
+To use this feature, start the server with `--tools all`. You can also enable only specific tools by passing a comma-separated list: `--tools name1,name2,...`. Run `--help` for the full list of available tool names.
+
 ## Build
 
 `llama-server` is built alongside everything else from the root of the project

From e4cc43a809f1e00b3392cf39a7be28a9a0fa516b Mon Sep 17 00:00:00 2001
From: Xuan Son Nguyen <son@huggingface.co>
Date: Mon, 23 Mar 2026 12:35:19 +0100
Subject: [PATCH 8/8] llama-gen-docs

---
 tools/cli/README.md        | 10 +++++++---
 tools/completion/README.md |  7 +++++--
 tools/server/README.md     | 12 ++++++++----
 3 files changed, 20 insertions(+), 9 deletions(-)

diff --git a/tools/cli/README.md b/tools/cli/README.md
index c344cab2a8d..840976a8848 100644
--- a/tools/cli/README.md
+++ b/tools/cli/README.md
@@ -134,7 +134,7 @@
 | `--mirostat-lr N` | Mirostat learning rate, parameter eta (default: 0.10) |
 | `--mirostat-ent N` | Mirostat target entropy, parameter tau (default: 5.00) |
 | `-l, --logit-bias TOKEN_ID(+/-)BIAS` | modifies the likelihood of token appearing in the completion,<br/>i.e. `--logit-bias 15043+1` to increase likelihood of token ' Hello',<br/>or `--logit-bias 15043-1` to decrease likelihood of token ' Hello' |
-| `--grammar GRAMMAR` | BNF-like grammar to constrain generations (see samples in grammars/ dir) (default: '') |
+| `--grammar GRAMMAR` | BNF-like grammar to constrain generations (see samples in grammars/ dir) |
 | `--grammar-file FNAME` | file to read grammar from |
 | `-j, --json-schema SCHEMA` | JSON schema to constrain generations (https://json-schema.org/), e.g. `{}` for any JSON object<br/>For schemas w/ external $refs, use --grammar + example/json_schema_to_grammar.py instead |
 | `-jf, --json-schema-file FILE` | File containing a JSON schema to constrain generations (https://json-schema.org/), e.g. `{}` for any JSON object<br/>For schemas w/ external $refs, use --grammar + example/json_schema_to_grammar.py instead |
@@ -147,7 +147,8 @@
 | -------- | ----------- |
 | `--display-prompt, --no-display-prompt` | whether to print prompt at generation (default: true) |
 | `-co, --color [on\|off\|auto]` | Colorize output to distinguish prompt and user input from generations ('on', 'off', or 'auto', default: 'auto')<br/>'auto' enables colors when output is to a terminal |
-| `--ctx-checkpoints, --swa-checkpoints N` | max number of context checkpoints to create per slot (default: 8)[(more info)](https://github.com/ggml-org/llama.cpp/pull/15293)<br/>(env: LLAMA_ARG_CTX_CHECKPOINTS) |
+| `-ctxcp, --ctx-checkpoints, --swa-checkpoints N` | max number of context checkpoints to create per slot (default: 32)[(more info)](https://github.com/ggml-org/llama.cpp/pull/15293)<br/>(env: LLAMA_ARG_CTX_CHECKPOINTS) |
+| `-cpent, --checkpoint-every-n-tokens N` | create a checkpoint every n tokens during prefill (processing), -1 to disable (default: 8192)<br/>(env: LLAMA_ARG_CHECKPOINT_EVERY_NT) |
 | `-cram, --cache-ram N` | set the maximum cache size in MiB (default: 8192, -1 - no limit, 0 - disable)[(more info)](https://github.com/ggml-org/llama.cpp/pull/16391)<br/>(env: LLAMA_ARG_CACHE_RAM) |
 | `--context-shift, --no-context-shift` | whether to use context shift on infinite text generation (default: disabled)<br/>(env: LLAMA_ARG_CONTEXT_SHIFT) |
 | `-sys, --system-prompt PROMPT` | system prompt to use with model (if applicable, depending on chat template) |
@@ -172,9 +173,12 @@
 | `--chat-template-kwargs STRING` | sets additional params for the json template parser, must be a valid json object string, e.g. '{"key1":"value1","key2":"value2"}'<br/>(env: LLAMA_CHAT_TEMPLATE_KWARGS) |
 | `--jinja, --no-jinja` | whether to use jinja template engine for chat (default: enabled)<br/>(env: LLAMA_ARG_JINJA) |
 | `--reasoning-format FORMAT` | controls whether thought tags are allowed and/or extracted from the response, and in which format they're returned; one of:<br/>- none: leaves thoughts unparsed in `message.content`<br/>- deepseek: puts thoughts in `message.reasoning_content`<br/>- deepseek-legacy: keeps `<think>` tags in `message.content` while also populating `message.reasoning_content`<br/>(default: auto)<br/>(env: LLAMA_ARG_THINK) |
-| `--reasoning-budget N` | controls the amount of thinking allowed; currently only one of: -1 for unrestricted thinking budget, or 0 to disable thinking (default: -1)<br/>(env: LLAMA_ARG_THINK_BUDGET) |
+| `-rea, --reasoning [on\|off\|auto]` | Use reasoning/thinking in the chat ('on', 'off', or 'auto', default: 'auto' (detect from template))<br/>(env: LLAMA_ARG_REASONING) |
+| `--reasoning-budget N` | token budget for thinking: -1 for unrestricted, 0 for immediate end, N>0 for token budget (default: -1)<br/>(env: LLAMA_ARG_THINK_BUDGET) |
+| `--reasoning-budget-message MESSAGE` | message injected before the end-of-thinking tag when reasoning budget is exhausted (default: none)<br/>(env: LLAMA_ARG_THINK_BUDGET_MESSAGE) |
 | `--chat-template JINJA_TEMPLATE` | set custom jinja chat template (default: template taken from model's metadata)<br/>if suffix/prefix are specified, template will be disabled<br/>only commonly used templates are accepted (unless --jinja is set before this flag):<br/>list of built-in templates:<br/>bailing, bailing-think, bailing2, chatglm3, chatglm4, chatml, command-r, deepseek, deepseek2, deepseek3, exaone-moe, exaone3, exaone4, falcon3, gemma, gigachat, glmedge, gpt-oss, granite, grok-2, hunyuan-dense, hunyuan-moe, kimi-k2, llama2, llama2-sys, llama2-sys-bos, llama2-sys-strip, llama3, llama4, megrez, minicpm, mistral-v1, mistral-v3, mistral-v3-tekken, mistral-v7, mistral-v7-tekken, monarch, openchat, orion, pangu-embedded, phi3, phi4, rwkv-world, seed_oss, smolvlm, solar-open, vicuna, vicuna-orca, yandex, zephyr<br/>(env: LLAMA_ARG_CHAT_TEMPLATE) |
 | `--chat-template-file JINJA_TEMPLATE_FILE` | set custom jinja chat template file (default: template taken from model's metadata)<br/>if suffix/prefix are specified, template will be disabled<br/>only commonly used templates are accepted (unless --jinja is set before this flag):<br/>list of built-in templates:<br/>bailing, bailing-think, bailing2, chatglm3, chatglm4, chatml, command-r, deepseek, deepseek2, deepseek3, exaone-moe, exaone3, exaone4, falcon3, gemma, gigachat, glmedge, gpt-oss, granite, grok-2, hunyuan-dense, hunyuan-moe, kimi-k2, llama2, llama2-sys, llama2-sys-bos, llama2-sys-strip, llama3, llama4, megrez, minicpm, mistral-v1, mistral-v3, mistral-v3-tekken, mistral-v7, mistral-v7-tekken, monarch, openchat, orion, pangu-embedded, phi3, phi4, rwkv-world, seed_oss, smolvlm, solar-open, vicuna, vicuna-orca, yandex, zephyr<br/>(env: LLAMA_ARG_CHAT_TEMPLATE_FILE) |
+| `--skip-chat-parsing, --no-skip-chat-parsing` | force a pure content parser, even if a Jinja template is specified; model will output everything in the content section, including any reasoning and/or tool calls (default: disabled)<br/>(env: LLAMA_ARG_SKIP_CHAT_PARSING) |
 | `--simple-io` | use basic IO for better compatibility in subprocesses and limited consoles |
 | `--draft, --draft-n, --draft-max N` | number of tokens to draft for speculative decoding (default: 16)<br/>(env: LLAMA_ARG_DRAFT_MAX) |
 | `--draft-min, --draft-n-min N` | minimum number of draft tokens to use for speculative decoding (default: 0)<br/>(env: LLAMA_ARG_DRAFT_MIN) |
diff --git a/tools/completion/README.md b/tools/completion/README.md
index b5eeba73349..25884ed92d0 100644
--- a/tools/completion/README.md
+++ b/tools/completion/README.md
@@ -217,7 +217,7 @@ llama-completion.exe -m models\gemma-1.1-7b-it.Q4_K_M.gguf --ignore-eos -n -1
 | `--mirostat-lr N` | Mirostat learning rate, parameter eta (default: 0.10) |
 | `--mirostat-ent N` | Mirostat target entropy, parameter tau (default: 5.00) |
 | `-l, --logit-bias TOKEN_ID(+/-)BIAS` | modifies the likelihood of token appearing in the completion,<br/>i.e. `--logit-bias 15043+1` to increase likelihood of token ' Hello',<br/>or `--logit-bias 15043-1` to decrease likelihood of token ' Hello' |
-| `--grammar GRAMMAR` | BNF-like grammar to constrain generations (see samples in grammars/ dir) (default: '') |
+| `--grammar GRAMMAR` | BNF-like grammar to constrain generations (see samples in grammars/ dir) |
 | `--grammar-file FNAME` | file to read grammar from |
 | `-j, --json-schema SCHEMA` | JSON schema to constrain generations (https://json-schema.org/), e.g. `{}` for any JSON object<br/>For schemas w/ external $refs, use --grammar + example/json_schema_to_grammar.py instead |
 | `-jf, --json-schema-file FILE` | File containing a JSON schema to constrain generations (https://json-schema.org/), e.g. `{}` for any JSON object<br/>For schemas w/ external $refs, use --grammar + example/json_schema_to_grammar.py instead |
@@ -252,9 +252,12 @@ llama-completion.exe -m models\gemma-1.1-7b-it.Q4_K_M.gguf --ignore-eos -n -1
 | `-gaw, --grp-attn-w N` | group-attention width (default: 512)<br/>(env: LLAMA_ARG_GRP_ATTN_W) |
 | `--jinja, --no-jinja` | whether to use jinja template engine for chat (default: disabled)<br/>(env: LLAMA_ARG_JINJA) |
 | `--reasoning-format FORMAT` | controls whether thought tags are allowed and/or extracted from the response, and in which format they're returned; one of:<br/>- none: leaves thoughts unparsed in `message.content`<br/>- deepseek: puts thoughts in `message.reasoning_content`<br/>- deepseek-legacy: keeps `<think>` tags in `message.content` while also populating `message.reasoning_content`<br/>(default: auto)<br/>(env: LLAMA_ARG_THINK) |
-| `--reasoning-budget N` | controls the amount of thinking allowed; currently only one of: -1 for unrestricted thinking budget, or 0 to disable thinking (default: -1)<br/>(env: LLAMA_ARG_THINK_BUDGET) |
+| `-rea, --reasoning [on\|off\|auto]` | Use reasoning/thinking in the chat ('on', 'off', or 'auto', default: 'auto' (detect from template))<br/>(env: LLAMA_ARG_REASONING) |
+| `--reasoning-budget N` | token budget for thinking: -1 for unrestricted, 0 for immediate end, N>0 for token budget (default: -1)<br/>(env: LLAMA_ARG_THINK_BUDGET) |
+| `--reasoning-budget-message MESSAGE` | message injected before the end-of-thinking tag when reasoning budget is exhausted (default: none)<br/>(env: LLAMA_ARG_THINK_BUDGET_MESSAGE) |
 | `--chat-template JINJA_TEMPLATE` | set custom jinja chat template (default: template taken from model's metadata)<br/>if suffix/prefix are specified, template will be disabled<br/>only commonly used templates are accepted (unless --jinja is set before this flag):<br/>list of built-in templates:<br/>bailing, bailing-think, bailing2, chatglm3, chatglm4, chatml, command-r, deepseek, deepseek2, deepseek3, exaone-moe, exaone3, exaone4, falcon3, gemma, gigachat, glmedge, gpt-oss, granite, grok-2, hunyuan-dense, hunyuan-moe, kimi-k2, llama2, llama2-sys, llama2-sys-bos, llama2-sys-strip, llama3, llama4, megrez, minicpm, mistral-v1, mistral-v3, mistral-v3-tekken, mistral-v7, mistral-v7-tekken, monarch, openchat, orion, pangu-embedded, phi3, phi4, rwkv-world, seed_oss, smolvlm, solar-open, vicuna, vicuna-orca, yandex, zephyr<br/>(env: LLAMA_ARG_CHAT_TEMPLATE) |
 | `--chat-template-file JINJA_TEMPLATE_FILE` | set custom jinja chat template file (default: template taken from model's metadata)<br/>if suffix/prefix are specified, template will be disabled<br/>only commonly used templates are accepted (unless --jinja is set before this flag):<br/>list of built-in templates:<br/>bailing, bailing-think, bailing2, chatglm3, chatglm4, chatml, command-r, deepseek, deepseek2, deepseek3, exaone-moe, exaone3, exaone4, falcon3, gemma, gigachat, glmedge, gpt-oss, granite, grok-2, hunyuan-dense, hunyuan-moe, kimi-k2, llama2, llama2-sys, llama2-sys-bos, llama2-sys-strip, llama3, llama4, megrez, minicpm, mistral-v1, mistral-v3, mistral-v3-tekken, mistral-v7, mistral-v7-tekken, monarch, openchat, orion, pangu-embedded, phi3, phi4, rwkv-world, seed_oss, smolvlm, solar-open, vicuna, vicuna-orca, yandex, zephyr<br/>(env: LLAMA_ARG_CHAT_TEMPLATE_FILE) |
+| `--skip-chat-parsing, --no-skip-chat-parsing` | force a pure content parser, even if a Jinja template is specified; model will output everything in the content section, including any reasoning and/or tool calls (default: disabled)<br/>(env: LLAMA_ARG_SKIP_CHAT_PARSING) |
 | `--simple-io` | use basic IO for better compatibility in subprocesses and limited consoles |
 
 <!-- HELP_END -->
diff --git a/tools/server/README.md b/tools/server/README.md
index bb4edf6e6da..cb53678416f 100644
--- a/tools/server/README.md
+++ b/tools/server/README.md
@@ -151,7 +151,7 @@ For the full list of features, please refer to [server's changelog](https://gith
 | `--mirostat-lr N` | Mirostat learning rate, parameter eta (default: 0.10) |
 | `--mirostat-ent N` | Mirostat target entropy, parameter tau (default: 5.00) |
 | `-l, --logit-bias TOKEN_ID(+/-)BIAS` | modifies the likelihood of token appearing in the completion,<br/>i.e. `--logit-bias 15043+1` to increase likelihood of token ' Hello',<br/>or `--logit-bias 15043-1` to decrease likelihood of token ' Hello' |
-| `--grammar GRAMMAR` | BNF-like grammar to constrain generations (see samples in grammars/ dir) (default: '') |
+| `--grammar GRAMMAR` | BNF-like grammar to constrain generations (see samples in grammars/ dir) |
 | `--grammar-file FNAME` | file to read grammar from |
 | `-j, --json-schema SCHEMA` | JSON schema to constrain generations (https://json-schema.org/), e.g. `{}` for any JSON object<br/>For schemas w/ external $refs, use --grammar + example/json_schema_to_grammar.py instead |
 | `-jf, --json-schema-file FILE` | File containing a JSON schema to constrain generations (https://json-schema.org/), e.g. `{}` for any JSON object<br/>For schemas w/ external $refs, use --grammar + example/json_schema_to_grammar.py instead |
@@ -164,7 +164,8 @@ For the full list of features, please refer to [server's changelog](https://gith
 | -------- | ----------- |
 | `-lcs, --lookup-cache-static FNAME` | path to static lookup cache to use for lookup decoding (not updated by generation) |
 | `-lcd, --lookup-cache-dynamic FNAME` | path to dynamic lookup cache to use for lookup decoding (updated by generation) |
-| `--ctx-checkpoints, --swa-checkpoints N` | max number of context checkpoints to create per slot (default: 8)[(more info)](https://github.com/ggml-org/llama.cpp/pull/15293)<br/>(env: LLAMA_ARG_CTX_CHECKPOINTS) |
+| `-ctxcp, --ctx-checkpoints, --swa-checkpoints N` | max number of context checkpoints to create per slot (default: 32)[(more info)](https://github.com/ggml-org/llama.cpp/pull/15293)<br/>(env: LLAMA_ARG_CTX_CHECKPOINTS) |
+| `-cpent, --checkpoint-every-n-tokens N` | create a checkpoint every n tokens during prefill (processing), -1 to disable (default: 8192)<br/>(env: LLAMA_ARG_CHECKPOINT_EVERY_NT) |
 | `-cram, --cache-ram N` | set the maximum cache size in MiB (default: 8192, -1 - no limit, 0 - disable)[(more info)](https://github.com/ggml-org/llama.cpp/pull/16391)<br/>(env: LLAMA_ARG_CACHE_RAM) |
 | `-kvu, --kv-unified, -no-kvu, --no-kv-unified` | use single unified KV buffer shared across all sequences (default: enabled if number of slots is auto)<br/>(env: LLAMA_ARG_KV_UNIFIED) |
 | `--context-shift, --no-context-shift` | whether to use context shift on infinite text generation (default: disabled)<br/>(env: LLAMA_ARG_CONTEXT_SHIFT) |
@@ -192,6 +193,8 @@ For the full list of features, please refer to [server's changelog](https://gith
 | `--api-prefix PREFIX` | prefix path the server serves from, without the trailing slash (default: )<br/>(env: LLAMA_ARG_API_PREFIX) |
 | `--webui-config JSON` | JSON that provides default WebUI settings (overrides WebUI defaults)<br/>(env: LLAMA_ARG_WEBUI_CONFIG) |
 | `--webui-config-file PATH` | JSON file that provides default WebUI settings (overrides WebUI defaults)<br/>(env: LLAMA_ARG_WEBUI_CONFIG_FILE) |
+| `--webui-mcp-proxy, --no-webui-mcp-proxy` | experimental: whether to enable MCP CORS proxy - do not enable in untrusted environments (default: disabled)<br/>(env: LLAMA_ARG_WEBUI_MCP_PROXY) |
+| `--tools TOOL1,TOOL2,...` | experimental: whether to enable built-in tools for AI agents - do not enable in untrusted environments (default: no tools)<br/>specify "all" to enable all tools<br/>available tools: read_file, file_glob_search, grep_search, exec_shell_command, write_file, edit_file, apply_diff<br/>(env: LLAMA_ARG_TOOLS) |
 | `--webui, --no-webui` | whether to enable the Web UI (default: enabled)<br/>(env: LLAMA_ARG_WEBUI) |
 | `--embedding, --embeddings` | restrict to only support embedding use case; use only with dedicated embedding models (default: disabled)<br/>(env: LLAMA_ARG_EMBEDDINGS) |
 | `--rerank, --reranking` | enable reranking endpoint on server (default: disabled)<br/>(env: LLAMA_ARG_RERANKING) |
@@ -215,11 +218,12 @@ For the full list of features, please refer to [server's changelog](https://gith
 | `--models-autoload, --no-models-autoload` | for router server, whether to automatically load models (default: enabled)<br/>(env: LLAMA_ARG_MODELS_AUTOLOAD) |
 | `--jinja, --no-jinja` | whether to use jinja template engine for chat (default: enabled)<br/>(env: LLAMA_ARG_JINJA) |
 | `--reasoning-format FORMAT` | controls whether thought tags are allowed and/or extracted from the response, and in which format they're returned; one of:<br/>- none: leaves thoughts unparsed in `message.content`<br/>- deepseek: puts thoughts in `message.reasoning_content`<br/>- deepseek-legacy: keeps `<think>` tags in `message.content` while also populating `message.reasoning_content`<br/>(default: auto)<br/>(env: LLAMA_ARG_THINK) |
-| `-rea, --resoning [on\|off\|auto]` | Use reasoning/thinking in the chat ('on', 'off', or 'auto', default: 'auto' (detect from template))<br/>(env: LLAMA_ARG_REASONING) |
+| `-rea, --reasoning [on\|off\|auto]` | Use reasoning/thinking in the chat ('on', 'off', or 'auto', default: 'auto' (detect from template))<br/>(env: LLAMA_ARG_REASONING) |
 | `--reasoning-budget N` | token budget for thinking: -1 for unrestricted, 0 for immediate end, N>0 for token budget (default: -1)<br/>(env: LLAMA_ARG_THINK_BUDGET) |
 | `--reasoning-budget-message MESSAGE` | message injected before the end-of-thinking tag when reasoning budget is exhausted (default: none)<br/>(env: LLAMA_ARG_THINK_BUDGET_MESSAGE) |
 | `--chat-template JINJA_TEMPLATE` | set custom jinja chat template (default: template taken from model's metadata)<br/>if suffix/prefix are specified, template will be disabled<br/>only commonly used templates are accepted (unless --jinja is set before this flag):<br/>list of built-in templates:<br/>bailing, bailing-think, bailing2, chatglm3, chatglm4, chatml, command-r, deepseek, deepseek2, deepseek3, exaone-moe, exaone3, exaone4, falcon3, gemma, gigachat, glmedge, gpt-oss, granite, grok-2, hunyuan-dense, hunyuan-moe, kimi-k2, llama2, llama2-sys, llama2-sys-bos, llama2-sys-strip, llama3, llama4, megrez, minicpm, mistral-v1, mistral-v3, mistral-v3-tekken, mistral-v7, mistral-v7-tekken, monarch, openchat, orion, pangu-embedded, phi3, phi4, rwkv-world, seed_oss, smolvlm, solar-open, vicuna, vicuna-orca, yandex, zephyr<br/>(env: LLAMA_ARG_CHAT_TEMPLATE) |
 | `--chat-template-file JINJA_TEMPLATE_FILE` | set custom jinja chat template file (default: template taken from model's metadata)<br/>if suffix/prefix are specified, template will be disabled<br/>only commonly used templates are accepted (unless --jinja is set before this flag):<br/>list of built-in templates:<br/>bailing, bailing-think, bailing2, chatglm3, chatglm4, chatml, command-r, deepseek, deepseek2, deepseek3, exaone-moe, exaone3, exaone4, falcon3, gemma, gigachat, glmedge, gpt-oss, granite, grok-2, hunyuan-dense, hunyuan-moe, kimi-k2, llama2, llama2-sys, llama2-sys-bos, llama2-sys-strip, llama3, llama4, megrez, minicpm, mistral-v1, mistral-v3, mistral-v3-tekken, mistral-v7, mistral-v7-tekken, monarch, openchat, orion, pangu-embedded, phi3, phi4, rwkv-world, seed_oss, smolvlm, solar-open, vicuna, vicuna-orca, yandex, zephyr<br/>(env: LLAMA_ARG_CHAT_TEMPLATE_FILE) |
+| `--skip-chat-parsing, --no-skip-chat-parsing` | force a pure content parser, even if a Jinja template is specified; model will output everything in the content section, including any reasoning and/or tool calls (default: disabled)<br/>(env: LLAMA_ARG_SKIP_CHAT_PARSING) |
 | `--prefill-assistant, --no-prefill-assistant` | whether to prefill the assistant's response if the last message is an assistant message (default: prefill enabled)<br/>when this flag is set, if the last message is an assistant message then it will be treated as a full message and not prefilled<br/><br/>(env: LLAMA_ARG_PREFILL_ASSISTANT) |
 | `-sps, --slot-prompt-similarity SIMILARITY` | how much the prompt of a request must match the prompt of a slot in order to use that slot (default: 0.10, 0.0 = disabled) |
 | `--lora-init-without-apply` | load LoRA adapters without applying them (apply later via POST /lora-adapters) (default: disabled) |
@@ -234,7 +238,7 @@ For the full list of features, please refer to [server's changelog](https://gith
 | `-ngld, --gpu-layers-draft, --n-gpu-layers-draft N` | max. number of draft model layers to store in VRAM, either an exact number, 'auto', or 'all' (default: auto)<br/>(env: LLAMA_ARG_N_GPU_LAYERS_DRAFT) |
 | `-md, --model-draft FNAME` | draft model for speculative decoding (default: unused)<br/>(env: LLAMA_ARG_MODEL_DRAFT) |
 | `--spec-replace TARGET DRAFT` | translate the string in TARGET into DRAFT if the draft model and main model are not compatible |
-| `--spec-type [none\|ngram-cache\|ngram-simple\|ngram-map-k\|ngram-map-k4v\|ngram-mod]` | type of speculative decoding to use when no draft model is provided (default: none) |
+| `--spec-type [none\|ngram-cache\|ngram-simple\|ngram-map-k\|ngram-map-k4v\|ngram-mod]` | type of speculative decoding to use when no draft model is provided (default: none)<br/><br/>(env: LLAMA_ARG_SPEC_TYPE) |
 | `--spec-ngram-size-n N` | ngram size N for ngram-simple/ngram-map speculative decoding, length of lookup n-gram (default: 12) |
 | `--spec-ngram-size-m N` | ngram size M for ngram-simple/ngram-map speculative decoding, length of draft m-gram (default: 48) |
 | `--spec-ngram-min-hits N` | minimum hits for ngram-map speculative decoding (default: 1) |