Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
97 changes: 97 additions & 0 deletions .github/workflows/llm-deployment-plugin-rocksdb-storage-ci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
name: LLM Deployment Plugin RocksDB Storage CI

# CI job that validates the RocksDB model storage integration for LLMDeploymentPlugin.
# Covers:
# - Constructor correctly initialises LLMModelStorage when a db is provided
# - Thread-local RequestContext propagates JWT user_id into audit entries
# - Empty model_id is rejected with a clear error
# - findBestSource skips "local" sources whose model files are absent
#
# No GPU hardware required — all tests exercise in-process logic only.

on:
pull_request:
types: [opened, synchronize, reopened]
paths:
- 'src/llm/llm_deployment_plugin.cpp'
- 'include/llm/llm_deployment_plugin.h'
- 'src/llm/llm_model_storage.cpp'
- 'include/llm/llm_model_storage.h'
- 'tests/llm/test_llm_deployment_plugin.cpp'
- '.github/workflows/llm-deployment-plugin-rocksdb-storage-ci.yml'
push:
branches:
- main
- develop
paths:
- 'src/llm/llm_deployment_plugin.cpp'
- 'include/llm/llm_deployment_plugin.h'
- 'src/llm/llm_model_storage.cpp'
- 'include/llm/llm_model_storage.h'
- 'tests/llm/test_llm_deployment_plugin.cpp'
- '.github/workflows/llm-deployment-plugin-rocksdb-storage-ci.yml'
workflow_dispatch:

concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true

jobs:
ci-scope-classifier:
permissions:
contents: read
uses: ./.github/workflows/ci-scope-classifier.yml

llm-deployment-rocksdb:
needs: ci-scope-classifier
if: needs.ci-scope-classifier.outputs.has_llm_changes == 'true'
name: LLM Deployment Plugin RocksDB Storage Tests
runs-on: ubuntu-latest
permissions:
contents: read

steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
submodules: recursive

- name: Install build dependencies
run: |
sudo apt-get update -qq
sudo apt-get install -y --no-install-recommends \
cmake ninja-build g++ \
libgtest-dev \
pkg-config

- name: Configure CMake (CPU only, LLM enabled)
run: |
cmake -B build -G Ninja \
-DCMAKE_BUILD_TYPE=Release \
-DTHEMIS_ENABLE_LLM=ON \
-DTHEMIS_ENABLE_CUDA=OFF \
-DTHEMIS_BUILD_TESTS=ON

- name: Build LLM deployment plugin test
run: cmake --build build --target themisdb_tests -j$(nproc)

- name: Run LLM deployment plugin tests
run: |
cd build
ctest --output-on-failure -R "LLMDeploymentPlugin|ModelDownloader"

- name: Write job summary
if: always()
run: |
echo "## 🚀 LLM Deployment Plugin RocksDB Storage CI" >> "$GITHUB_STEP_SUMMARY"
echo "" >> "$GITHUB_STEP_SUMMARY"
echo "| Parameter | Value |" >> "$GITHUB_STEP_SUMMARY"
echo "|-----------|-------|" >> "$GITHUB_STEP_SUMMARY"
echo "| **Build type** | Release (CPU only, CUDA=OFF) |" >> "$GITHUB_STEP_SUMMARY"
echo "| **Test filter** | \`LLMDeploymentPlugin\` + \`ModelDownloader\` |" >> "$GITHUB_STEP_SUMMARY"
echo "| **Event** | \`${{ github.event_name }}\` |" >> "$GITHUB_STEP_SUMMARY"
echo "| **Branch** | \`${{ github.ref_name }}\` |" >> "$GITHUB_STEP_SUMMARY"
echo "| **Commit** | \`${{ github.sha }}\` |" >> "$GITHUB_STEP_SUMMARY"
echo "| **Triggered by** | ${{ github.actor }} |" >> "$GITHUB_STEP_SUMMARY"
echo "" >> "$GITHUB_STEP_SUMMARY"
echo "Validates RocksDB \`LLMModelStorage\` initialisation in \`LLMDeploymentPlugin\`, thread-local \`RequestContext\` JWT user propagation into audit entries, empty model_id rejection, and \`findBestSource\` local-source availability checking." >> "$GITHUB_STEP_SUMMARY"
28 changes: 20 additions & 8 deletions include/llm/llm_deployment_plugin.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,7 @@

#include "llm/model_downloader.h"
#include "llm/llm_plugin_interface.h"
// NOTE: LLMModelStorage is forward-declared to avoid link dependency until implementation exists
// #include "llm/llm_model_storage.h"
#include "llm/llm_model_storage.h"
#include "storage/base_entity.h"
#include "storage/rocksdb_wrapper.h"
#include <string>
Expand All @@ -41,10 +40,6 @@ namespace llm {

using json = nlohmann::json;

// Forward declaration to avoid linker errors until llm_model_storage.cpp is implemented
class LLMModelStorage;
struct LLMModelMetadata;

/**
* @brief Deployment mode for LLM models
*/
Expand Down Expand Up @@ -170,10 +165,27 @@ class LLMDeploymentPlugin {
explicit LLMDeploymentPlugin(const DeploymentConfig& config);

~LLMDeploymentPlugin() = default;

// ═══════════════════════════════════════════════════════════
// Core Deployment Operations
// Thread-local request context (JWT user propagation)
// ═══════════════════════════════════════════════════════════

/// Authentication context set by HTTP handlers before invoking deployment operations.
struct RequestContext {
std::string user_id; ///< Authenticated user / service account
std::string client_ip; ///< Originating client IP address (may be empty)
};

/// Set the authentication context for the calling thread.
/// Must be called before any method that performs audit logging.
static void setRequestContext(const RequestContext& ctx) noexcept;

/// Clear the authentication context for the calling thread.
static void clearRequestContext() noexcept;

/// Return the user_id from the thread-local request context, or @p fallback.
static std::string currentUserId(const char* fallback = "system") noexcept;
Comment on lines +181 to +187


/**
* @brief Deploy a model (download if needed, verify, make available)
Expand Down
8 changes: 4 additions & 4 deletions src/llm/FUTURE_ENHANCEMENTS.md
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,10 @@ This document covers planned enhancements to the LLM module beyond what is track
`llm_deployment_plugin.cpp` line 273 has: "Store in BaseEntity storage (RocksDB) - TODO: Uncomment when `llm_model_storage.cpp` exists". The plugin currently operates in "Filesystem-only mode" (line 136). Model metadata is not persisted to the database, breaking admin query ("list all deployed models") across restarts.

**Implementation Notes:**
- `[ ]` Implement `llm_model_storage.cpp` providing `LLMModelStorage::save(model_id, metadata)` and `load(model_id)` using the existing `StorageEngine` API with key prefix `llm_model::`.
- `[ ]` Uncomment the RocksDB persistence block at line 273; inject `StorageEngine*` into `LLMDeploymentPlugin` constructor.
- `[ ]` Implement `TODO(enhancement)` at line 916: check `model_id` existence before deployment to surface clear errors for unknown model IDs.
- `[ ]` Implement `TODO(feature)` at line 175: propagate authenticated user context from the request JWT into `audit.user` instead of hardcoding `"system"`.
- `[x]` Implement `llm_model_storage.cpp` providing `LLMModelStorage::save(model_id, metadata)` and `load(model_id)` using the existing `StorageEngine` API with key prefix `llm_model::`.
- `[x]` Uncomment the RocksDB persistence block at line 273; inject `StorageEngine*` into `LLMDeploymentPlugin` constructor.
- `[x]` Implement `TODO(enhancement)` at line 916: check `model_id` existence before deployment to surface clear errors for unknown model IDs.
Comment on lines +54 to +55
- `[x]` Implement `TODO(feature)` at line 175: propagate authenticated user context from the request JWT into `audit.user` instead of hardcoding `"system"`.

---

Expand Down
101 changes: 82 additions & 19 deletions src/llm/llm_deployment_plugin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
*/

#include "llm/llm_deployment_plugin.h"
#include "llm/llm_model_storage.h"
#include "utils/logger.h"
#include "utils/checksum_utils.h"
#include <fstream>
Expand Down Expand Up @@ -105,16 +106,43 @@ std::chrono::system_clock::time_point iso8601ToTime(const std::string& iso) {

} // anonymous namespace

// ============================================================================
// Thread-local request context
// ============================================================================

namespace {
struct TLSRequestContext {
std::string user_id;
std::string client_ip;
bool is_set = false;
};
static thread_local TLSRequestContext tls_deploy_ctx;
} // namespace

void LLMDeploymentPlugin::setRequestContext(const RequestContext& ctx) noexcept {
tls_deploy_ctx.user_id = ctx.user_id;
tls_deploy_ctx.client_ip = ctx.client_ip;
tls_deploy_ctx.is_set = true;
}

void LLMDeploymentPlugin::clearRequestContext() noexcept {
tls_deploy_ctx = {};
}

std::string LLMDeploymentPlugin::currentUserId(const char* fallback) noexcept {
if (tls_deploy_ctx.is_set && !tls_deploy_ctx.user_id.empty()) {
return tls_deploy_ctx.user_id;
}
return fallback ? fallback : "system";
}

// ============================================================================
// LLMDeploymentPlugin Implementation
// ============================================================================

LLMDeploymentPlugin::LLMDeploymentPlugin(const DeploymentConfig& config)
: config_(config), downloader_(std::make_unique<ModelDownloader>()) {

// NOTE: BaseEntity storage integration is prepared but disabled until
// llm_model_storage.cpp is implemented to avoid linker errors
/*
// Initialize BaseEntity storage if enabled
if (config_.use_base_entity_storage && config_.db) {
LLMModelStorage::Config storage_config;
Expand All @@ -131,9 +159,6 @@ LLMDeploymentPlugin::LLMDeploymentPlugin(const DeploymentConfig& config)
} else {
LOG_INFO("LLM Deployment Plugin: Filesystem-only mode (no BaseEntity storage)");
}
*/

LOG_INFO("LLM Deployment Plugin: Filesystem-only mode (BaseEntity storage TODO)");

// Create cache directory if it doesn't exist
if (!fs::exists(config_.cache_directory)) {
Expand All @@ -157,7 +182,7 @@ LLMDeploymentPlugin::LLMDeploymentPlugin(const DeploymentConfig& config)
}
}

// Load model registry from filesystem (BaseEntity loading not yet implemented)
// Load model registry from filesystem
loadModelRegistry();

LOG_INFO("LLM Deployment Plugin initialized (mode: {}, cache: {})",
Expand All @@ -172,7 +197,16 @@ std::optional<ModelStatus> LLMDeploymentPlugin::deployModel(const std::string& m
audit.operation = "deploy";
audit.model_id = model_id;
audit.timestamp = std::chrono::system_clock::now();
audit.user = "system"; // TODO(feature): Implement user context tracking for audit compliance
audit.user = currentUserId();

// Reject empty model identifiers immediately with a clear error
if (model_id.empty()) {
audit.success = false;
audit.error_message = "model_id must not be empty";
logAudit(audit);
LOG_ERROR("{}", audit.error_message);
return std::nullopt;
}

try {
std::string model_path = getModelPath(model_id);
Expand Down Expand Up @@ -270,8 +304,7 @@ std::optional<ModelStatus> LLMDeploymentPlugin::deployModel(const std::string& m
status.checksum_type = "sha256";
}

// Store in BaseEntity storage (RocksDB) - TODO: Uncomment when llm_model_storage.cpp exists
/*
// Store in BaseEntity storage (RocksDB)
if (config_.use_base_entity_storage && model_storage_) {
bool stored = saveModelToStorage(status, model_path);
if (stored) {
Expand All @@ -280,7 +313,6 @@ std::optional<ModelStatus> LLMDeploymentPlugin::deployModel(const std::string& m
LOG_WARN("Failed to store model metadata in RocksDB: {}", model_id);
}
}
*/

// Update registry (filesystem fallback)
auto it = std::find_if(model_registry_.begin(), model_registry_.end(),
Expand Down Expand Up @@ -492,7 +524,7 @@ std::optional<ModelStatus> LLMDeploymentPlugin::updateModel(const std::string& m
audit.operation = "update";
audit.model_id = model_id;
audit.timestamp = std::chrono::system_clock::now();
audit.user = "system";
audit.user = currentUserId();

// Force re-download
auto result = deployModel(model_id, true);
Expand All @@ -517,7 +549,7 @@ bool LLMDeploymentPlugin::removeModel(const std::string& model_id, bool force) {
audit.operation = "remove";
audit.model_id = model_id;
audit.timestamp = std::chrono::system_clock::now();
audit.user = "system";
audit.user = currentUserId();

auto status = getModelStatus(model_id);
if (!status) {
Expand Down Expand Up @@ -895,6 +927,12 @@ void LLMDeploymentPlugin::loadModelRegistry() {
}

std::optional<ModelSource> LLMDeploymentPlugin::findBestSource(const std::string& model_id) {
// Validate model_id before attempting any source lookup
if (model_id.empty()) {
LOG_ERROR("findBestSource: model_id must not be empty");
return std::nullopt;
}

if (config_.sources.empty()) {
// Default: try Ollama
ModelSource source;
Expand All @@ -912,12 +950,37 @@ std::optional<ModelSource> LLMDeploymentPlugin::findBestSource(const std::string
return a.priority > b.priority;
});

// Return first source (highest priority)
// TODO(enhancement): In the future, this could check if model_id exists
// in each source's model list and select the best match
LOG_DEBUG("Selected source '{}' (priority {}) for model '{}'",
sorted_sources[0].type, sorted_sources[0].priority, model_id);
return sorted_sources[0];
// Iterate sources in priority order; for "local" sources verify the model
// file is actually present before committing to that source so callers
// receive a clear "model not found" error rather than a confusing copy failure.
for (const auto& src : sorted_sources) {
if (src.type == "local") {
fs::path src_path(src.location);
// If the location is a directory, check whether the expected model
// file (derived from model_id) exists inside it.
if (fs::is_directory(src_path)) {
std::string sanitized = model_id;
std::replace(sanitized.begin(), sanitized.end(), ':', '_');
std::replace(sanitized.begin(), sanitized.end(), '/', '_');
src_path /= sanitized;
// Also try with .gguf extension if no extension present
if (!fs::exists(src_path)) {
src_path += ".gguf";
}
}
if (!fs::exists(src_path)) {
LOG_DEBUG("Local source '{}' does not contain model '{}', skipping",
src.location, model_id);
continue;
}
}
LOG_DEBUG("Selected source '{}' (priority {}) for model '{}'",
src.type, src.priority, model_id);
return src;
}

LOG_ERROR("No source contains model '{}'; checked {} source(s)", model_id, sorted_sources.size());
return std::nullopt;
}

std::string LLMDeploymentPlugin::getModelPath(const std::string& model_id) const {
Expand Down
Loading
Loading