Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions src/cli/modules/launch_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,14 @@ def start_server(host: str = "0.0.0.0", port: int = 8001, reload: bool = False):
logger.info(" - POST /openarc/load Load a model")
logger.info(" - POST /openarc/unload Unload a model")
logger.info(" - GET /openarc/status Get model status")
logger.info(" - GET /openarc/metrics Get hardware telemetry")
logger.info(" - POST /openarc/models/update Update model configuration")
logger.info(" - POST /openarc/bench Run inference benchmark")
logger.info(" - GET /openarc/downloader List active model downloads")
logger.info(" - POST /openarc/downloader Start a model download")
Comment on lines +91 to +95
Copy link

Copilot AI Apr 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The endpoint list printed at startup doesn’t include new routes added in this PR (GET /openarc/version and GET /openarc/models). Please keep this list in sync with the server routes so users can discover the full API surface.

Copilot uses AI. Check for mistakes.
logger.info(" - DELETE /openarc/downloader Cancel a model download")
logger.info(" - POST /openarc/downloader/pause Pause a model download")
logger.info(" - POST /openarc/downloader/resume Resume a model download")
logger.info("--------------------------------")
logger.info("OpenAI compatible endpoints:")
logger.info(" - GET /v1/models")
Expand Down
234 changes: 234 additions & 0 deletions src/server/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,14 @@
import base64
from contextlib import asynccontextmanager
from typing import Any, AsyncIterator, Dict, List, Optional
import psutil
Copy link

Copilot AI Apr 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

psutil is imported at module import time, but it is not listed as a project dependency (see pyproject.toml). This will prevent the server from starting in a clean install. Either add psutil to dependencies, or move telemetry behind an optional import / feature flag so the core server can run without it.

Suggested change
import psutil
try:
import psutil
except ImportError:
psutil = None

Copilot uses AI. Check for mistakes.
from pydantic import BaseModel

from src.server.downloader import global_downloader
from src.server.models.requests_management import (
DownloaderRequest,
DownloaderActionRequest,
)

Comment on lines +18 to 23
Copy link

Copilot AI Apr 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The new endpoints import src.server.downloader and src.server.models.requests_management, but those modules do not exist in the repository. As written, src.server.main will raise ModuleNotFoundError on startup. Please add these modules to the PR (or update the imports to match existing locations/types).

Suggested change
from src.server.downloader import global_downloader
from src.server.models.requests_management import (
DownloaderRequest,
DownloaderActionRequest,
)
try:
from src.server.downloader import global_downloader
from src.server.models.requests_management import (
DownloaderRequest,
DownloaderActionRequest,
)
except ModuleNotFoundError:
class _MissingDownloader:
def __getattr__(self, name: str) -> Any:
raise RuntimeError(
"Downloader support is unavailable because "
"'src.server.downloader' or "
"'src.server.models.requests_management' could not be imported."
)
global_downloader = _MissingDownloader()
class DownloaderRequest(BaseModel):
url: str
output_path: Optional[str] = None
class DownloaderActionRequest(BaseModel):
action: str
request_id: Optional[str] = None

Copilot uses AI. Check for mistakes.
from fastapi import Depends, FastAPI, HTTPException, Request, File, Form, UploadFile
from fastapi.exceptions import RequestValidationError
Expand Down Expand Up @@ -245,6 +253,232 @@ async def get_status():
"""Get registry status showing all loaded models."""
return await _registry.status()

class UpdateModelConfigRequest(BaseModel):
model_path: str
config: Dict[str, Any]


@app.post("/openarc/models/update", dependencies=[Depends(verify_api_key)])
async def update_local_model_config(req: UpdateModelConfigRequest):
import json
from pathlib import Path

target_path = Path(req.model_path)
if not target_path.exists() or not target_path.is_dir():
raise HTTPException(status_code=404, detail="Model directory not found")

config_path = target_path / "openarc.json"

Comment on lines +266 to +271
Copy link

Copilot AI Apr 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

/openarc/models/update accepts an arbitrary filesystem path (model_path) and then writes openarc.json into that directory. Even with API-key protection, this is an arbitrary file-write primitive and can be abused to modify files outside the intended models cache. Consider restricting model_path to a fixed base directory (e.g., the OpenArc models cache), resolve the path, and reject any path that escapes that base.

Copilot uses AI. Check for mistakes.
current_config = {}
if config_path.exists():
try:
with open(config_path, "r", encoding="utf-8") as f:
current_config = json.load(f)
except Exception:
pass
Comment on lines +277 to +278
Copy link

Copilot AI Apr 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When reading an existing openarc.json, JSON parsing errors are silently ignored (except Exception: pass). This can cause the endpoint to overwrite a corrupted config without any signal to the caller. It would be better to return a 400 with a clear error if the existing config is invalid JSON (or at least log the exception).

Suggested change
except Exception:
pass
except json.JSONDecodeError as e:
raise HTTPException(
status_code=400,
detail=f"Existing openarc.json contains invalid JSON: {str(e)}",
)
except Exception as e:
logger.exception("Failed to read existing config from %s", config_path)
raise HTTPException(
status_code=500,
detail=f"Failed to read existing config: {str(e)}",
)

Copilot uses AI. Check for mistakes.

current_config.update(req.config)

try:
with open(config_path, "w", encoding="utf-8") as f:
json.dump(current_config, f, indent=4)
return {"status": "success", "config": current_config}
Comment on lines +282 to +285
Copy link

Copilot AI Apr 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This async endpoint performs blocking disk I/O (open(...).read / json.dump) on the event loop thread. Under load, this can stall request handling. Consider offloading these file operations to a threadpool (e.g., asyncio.to_thread) or using an async file library.

Copilot uses AI. Check for mistakes.
except Exception as e:
raise HTTPException(status_code=500, detail=f"Failed to save config: {str(e)}")


@app.get("/openarc/models", dependencies=[Depends(verify_api_key)])
async def get_local_models(path: Optional[str] = None):
import os
Copy link

Copilot AI Apr 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unused import: os is imported but never referenced in this function. Removing it will avoid linter warnings and keep the endpoint focused.

Suggested change
import os

Copilot uses AI. Check for mistakes.
import json
from pathlib import Path

if path:
target_path = Path(path)
else:
target_path = Path.home() / ".cache" / "openarc" / "models"
Comment on lines +296 to +299
Copy link

Copilot AI Apr 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

/openarc/models allows callers to pass an arbitrary path and will list directories and read openarc.json files from it. This can expose the server's filesystem layout and contents to any API-key holder. Consider restricting listing to the OpenArc models directory (and/or only allowing a model ID rather than a raw path).

Suggested change
if path:
target_path = Path(path)
else:
target_path = Path.home() / ".cache" / "openarc" / "models"
models_root = (Path.home() / ".cache" / "openarc" / "models").resolve()
if path:
target_path = Path(path).expanduser().resolve(strict=False)
try:
target_path.relative_to(models_root)
except ValueError:
raise HTTPException(
status_code=400,
detail="Path must be within the OpenArc models directory",
)
else:
target_path = models_root

Copilot uses AI. Check for mistakes.

models = []
if target_path.exists() and target_path.is_dir():
for entry in target_path.iterdir():
if entry.is_dir():
folder_name = entry.name
config_path = entry / "openarc.json"
has_config = config_path.exists()

model_name = folder_name
model_type = None

if has_config:
try:
with open(config_path, "r", encoding="utf-8") as f:
config_data = json.load(f)
model_name = config_data.get("model_name", model_name)
model_type = config_data.get("model_type")
except Exception:
pass
else:
config_data = {}

models.append(
{
"id": folder_name,
"path": str(entry),
"model_name": model_name,
"model_type": model_type,
"engine": config_data.get("engine"),
"vlm_type": config_data.get("vlm_type"),
"draft_model_path": config_data.get("draft_model_path"),
"draft_device": config_data.get("draft_device"),
"num_assistant_tokens": config_data.get("num_assistant_tokens"),
"assistant_confidence_threshold": config_data.get(
"assistant_confidence_threshold"
),
"runtime_config": config_data.get("runtime_config", {}),
"has_config": has_config,
}
)
return {"models": models}


@app.get("/openarc/version", dependencies=[Depends(verify_api_key)])
async def get_version():
return {"version": "v2.0.4"}
Comment on lines +344 to +346
Copy link

Copilot AI Apr 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

/openarc/version returns a hard-coded string (v2.0.4) which is already inconsistent with the package version declared in pyproject.toml (2.0). Consider sourcing this from package metadata (e.g., importlib.metadata.version(...)) so it stays correct across releases.

Copilot uses AI. Check for mistakes.


def get_intel_gpu_metrics():
gpus = []
cpu_info = {"id": "CPU", "name": "System CPU"}
npus = []

try:
import openvino as ov
import psutil

core = ov.Core()
devices = core.available_devices
for device in devices:
Comment on lines +355 to +360
Copy link

Copilot AI Apr 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

get_intel_gpu_metrics() constructs a new openvino.Core() and enumerates devices on every /openarc/metrics request. If metrics are polled frequently, this can add noticeable overhead. Consider caching the Core instance / static device info at startup and only recomputing the values that actually change per request.

Copilot uses AI. Check for mistakes.
try:
name = core.get_property(device, "FULL_DEVICE_NAME")
except Exception:
name = device

if "CPU" in device:
cpu_info["name"] = str(name)
elif "NPU" in device:
npus.append({"id": device, "name": str(name)})
elif "GPU" in device:
# mock usage/memory for now cause standad python dont easily expose intel gpu telemetry without root/external tools
Copy link

Copilot AI Apr 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Spelling in comment: standad should be standard.

Suggested change
# mock usage/memory for now cause standad python dont easily expose intel gpu telemetry without root/external tools
# mock usage/memory for now cause standard python dont easily expose intel gpu telemetry without root/external tools

Copilot uses AI. Check for mistakes.
usage = 0.0
total_vram_mb = 0
used_vram_mb = 0
is_shared = False

# try getting memory info if avialable in openvino
Copy link

Copilot AI Apr 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Spelling in comment: avialable should be available.

Suggested change
# try getting memory info if avialable in openvino
# try getting memory info if available in openvino

Copilot uses AI. Check for mistakes.
try:
total_vram = core.get_property(device, "DEVICE_TOTAL_MEM_SIZE")
total_vram_mb = total_vram // (1024 * 1024)
except Exception:
# if openvino cannot report DEVICE_TOTAL_MEM_SIZE it usually mean its an integrated
# gpu sharing system memory. lets return the system memory but flag it as shared.
vm = psutil.virtual_memory()
total_vram_mb = vm.total // (1024 * 1024)
is_shared = True

gpus.append(
{
"id": device,
"name": str(name),
"total_vram": int(total_vram_mb),
"used_vram": int(used_vram_mb),
"usage": float(usage),
"is_shared": is_shared,
}
)
except ImportError:
pass
except Exception as e:
import logging

logging.error(f"Failed to query OpenVINO devices: {e}")

return {"cpu": cpu_info, "gpus": gpus, "npus": npus}


@app.get("/openarc/metrics", dependencies=[Depends(verify_api_key)])
async def get_metrics():
import psutil

vm = psutil.virtual_memory()
ov_metrics = get_intel_gpu_metrics()

return {
"cpus": [
{
"id": ov_metrics["cpu"]["id"],
"name": ov_metrics["cpu"]["name"],
"cores": psutil.cpu_count(logical=False) or 1,
"threads": psutil.cpu_count(logical=True) or 1,
"usage": psutil.cpu_percent(),
}
Comment on lines +420 to +423
Copy link

Copilot AI Apr 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

psutil.cpu_percent() without an interval returns a percentage since the last call, and the first call commonly returns 0.0. This can make /openarc/metrics misleading right after startup. Consider priming cpu_percent at startup or using a dedicated background sampler so the returned value is stable/meaningful.

Copilot uses AI. Check for mistakes.
],
"total_ram": vm.total // (1024 * 1024),
"used_ram": vm.used // (1024 * 1024),
"gpus": ov_metrics["gpus"],
"npus": ov_metrics["npus"],
}


@app.post("/openarc/downloader", dependencies=[Depends(verify_api_key)])
async def start_download(request: DownloaderRequest):
success = await global_downloader.start(request.model_name, request.path)
if success:
return {"status": "success", "message": "Model download started successfully."}
return JSONResponse(
status_code=400,
content={"status": "error", "message": "Download already in progress."},
)


@app.get("/openarc/downloader", dependencies=[Depends(verify_api_key)])
async def list_downloads():
return {"models": global_downloader.list_tasks()}


@app.delete("/openarc/downloader", dependencies=[Depends(verify_api_key)])
async def cancel_download(request: DownloaderActionRequest):
if global_downloader.cancel(request.model_name):
return {
"status": "success",
"message": "Model download cancelled successfully.",
}
return JSONResponse(
status_code=404,
content={"status": "error", "message": "Download task not found."},
)


@app.post("/openarc/downloader/pause", dependencies=[Depends(verify_api_key)])
async def pause_download(request: DownloaderActionRequest):
if global_downloader.pause(request.model_name):
return {"status": "success", "message": "Model download paused successfully."}
return JSONResponse(
status_code=404,
content={"status": "error", "message": "Active download task not found."},
)


@app.post("/openarc/downloader/resume", dependencies=[Depends(verify_api_key)])
async def resume_download(request: DownloaderActionRequest):
# resume is basicly start
Copy link

Copilot AI Apr 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Spelling in comment: basicly should be basically.

Suggested change
# resume is basicly start
# resume is basically start

Copilot uses AI. Check for mistakes.
success = await global_downloader.start(request.model_name)
if success:
return {"status": "success", "message": "Model download resumed successfully."}
return JSONResponse(
status_code=400,
content={"status": "error", "message": "Download already in progress."},
)

@app.post("/openarc/bench", dependencies=[Depends(verify_api_key)])
async def benchmark(request: OpenArcBenchRequest):
"""Benchmark endpoint that accepts pre-encoded input_ids and returns only metrics."""
Expand Down
Loading