Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 7 additions & 7 deletions common/chat.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -601,18 +601,18 @@ bool common_chat_templates_was_explicit(const struct common_chat_templates * tmp
return tmpls->has_explicit_template;
}

const char * common_chat_templates_source(const struct common_chat_templates * tmpls, const char * variant) {
if (variant != nullptr) {
if (strcmp(variant, "tool_use") == 0) {
std::string common_chat_templates_source(const struct common_chat_templates * tmpls, const std::string & variant) {
if (!variant.empty()) {
if (variant == "tool_use") {
if (tmpls->template_tool_use) {
return tmpls->template_tool_use->source().c_str();
return tmpls->template_tool_use->source();
}
return nullptr;
return "";
} else {
LOG_DBG("%s: unknown template variant: %s\n", __func__, variant);
LOG_DBG("%s: unknown template variant: %s\n", __func__, variant.c_str());
}
}
return tmpls->template_default->source().c_str();
return tmpls->template_default->source();
}

common_chat_templates_ptr common_chat_templates_init(
Expand Down
2 changes: 1 addition & 1 deletion common/chat.h
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@ common_chat_templates_ptr common_chat_templates_init(
const std::string & eos_token_override = "");

bool common_chat_templates_was_explicit(const struct common_chat_templates * tmpls);
const char * common_chat_templates_source(const struct common_chat_templates * tmpls, const char * variant = nullptr);
std::string common_chat_templates_source(const struct common_chat_templates * tmpls, const std::string & variant = "");


struct common_chat_params common_chat_templates_apply(
Expand Down
32 changes: 27 additions & 5 deletions tools/cli/cli.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,14 +71,16 @@ struct cli_context {

std::string generate_completion(result_timings & out_timings) {
server_response_reader rd = ctx_server.get_response_reader();
auto formatted = format_chat();
{
// TODO: reduce some copies here in the future
server_task task = server_task(SERVER_TASK_TYPE_COMPLETION);
task.id = rd.get_new_id();
task.index = 0;
task.params = defaults; // copy
task.cli_input = messages; // copy
task.cli_files = input_files; // copy
task.id = rd.get_new_id();
task.index = 0;
task.params = defaults; // copy
task.cli_prompt = formatted.prompt; // copy
task.cli_files = input_files; // copy
task.cli = true;
rd.post_task({std::move(task)});
}

Expand Down Expand Up @@ -156,6 +158,26 @@ struct cli_context {
return content;
}
}

common_chat_params format_chat() {
auto meta = ctx_server.get_meta();
auto & chat_params = meta.chat_params;

common_chat_templates_inputs inputs;
inputs.messages = common_chat_msgs_parse_oaicompat(messages);
inputs.tools = {}; // TODO
inputs.tool_choice = COMMON_CHAT_TOOL_CHOICE_NONE;
inputs.json_schema = ""; // TODO
inputs.grammar = ""; // TODO
inputs.use_jinja = chat_params.use_jinja;
inputs.parallel_tool_calls = false;
inputs.add_generation_prompt = true;
inputs.reasoning_format = chat_params.reasoning_format;
inputs.enable_thinking = chat_params.enable_thinking;

// Apply chat template to the list of messages
return common_chat_templates_apply(chat_params.tmpls.get(), inputs);
}
};

int main(int argc, char ** argv) {
Expand Down
4 changes: 2 additions & 2 deletions tools/server/server-common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -831,7 +831,7 @@ static void handle_media(
// used by /chat/completions endpoint
json oaicompat_chat_params_parse(
json & body, /* openai api json semantics */
const oaicompat_parser_options & opt,
const server_chat_params & opt,
std::vector<raw_buffer> & out_files)
{
json llama_params;
Expand Down Expand Up @@ -1012,7 +1012,7 @@ json oaicompat_chat_params_parse(
}

// Apply chat template to the list of messages
auto chat_params = common_chat_templates_apply(opt.tmpls, inputs);
auto chat_params = common_chat_templates_apply(opt.tmpls.get(), inputs);

/* Append assistant prefilled message */
if (prefill_assistant_message) {
Expand Down
14 changes: 7 additions & 7 deletions tools/server/server-common.h
Original file line number Diff line number Diff line change
Expand Up @@ -274,25 +274,25 @@ std::vector<server_tokens> tokenize_input_prompts(
// OAI utils
//

// used by /completions endpoint
json oaicompat_completion_params_parse(const json & body);

struct oaicompat_parser_options {
struct server_chat_params {
bool use_jinja;
bool prefill_assistant;
common_reasoning_format reasoning_format;
std::map<std::string,std::string> chat_template_kwargs;
common_chat_templates * tmpls;
std::map<std::string, std::string> chat_template_kwargs; // mapping key --> json value
common_chat_templates_ptr tmpls;
bool allow_image;
bool allow_audio;
bool enable_thinking = true;
std::string media_path;
};

// used by /completions endpoint
json oaicompat_completion_params_parse(const json & body);

// used by /chat/completions endpoint
json oaicompat_chat_params_parse(
json & body, /* openai api json semantics */
const oaicompat_parser_options & opt,
const server_chat_params & opt,
std::vector<raw_buffer> & out_files);

// convert Anthropic Messages API format to OpenAI Chat Completions API format
Expand Down
Loading
Loading