SearchSavior · mrelmida · Apr 5, 2026 · Copilot · Apr 5, 2026 · Copilot
diff --git a/src/cli/modules/launch_server.py b/src/cli/modules/launch_server.py
@@ -88,6 +88,14 @@ def start_server(host: str = "0.0.0.0", port: int = 8001, reload: bool = False):
     logger.info("  - POST   /openarc/load           Load a model")
     logger.info("  - POST   /openarc/unload         Unload a model")
     logger.info("  - GET    /openarc/status         Get model status")
+    logger.info("  - GET    /openarc/metrics            Get hardware telemetry")
+    logger.info("  - POST   /openarc/models/update      Update model configuration")
+    logger.info("  - POST   /openarc/bench              Run inference benchmark")
+    logger.info("  - GET    /openarc/downloader         List active model downloads")
+    logger.info("  - POST   /openarc/downloader         Start a model download")
+    logger.info("  - DELETE /openarc/downloader         Cancel a model download")
+    logger.info("  - POST   /openarc/downloader/pause   Pause a model download")
+    logger.info("  - POST   /openarc/downloader/resume  Resume a model download")
     logger.info("--------------------------------")
     logger.info("OpenAI compatible endpoints:")
     logger.info("  - GET    /v1/models")

diff --git a/src/server/main.py b/src/server/main.py
@@ -12,6 +12,14 @@
 import base64
 from contextlib import asynccontextmanager
 from typing import Any, AsyncIterator, Dict, List, Optional
+import psutil
-import psutil
+try:
+    import psutil
+except ImportError:
+    psutil = None
-import psutil
+try:
+    import psutil
+except ImportError:
+    psutil = None
+from pydantic import BaseModel
+
+from src.server.downloader import global_downloader
+from src.server.models.requests_management import (
+    DownloaderRequest,
+    DownloaderActionRequest,
+)
 
-from src.server.downloader import global_downloader
-from src.server.models.requests_management import (
-    DownloaderRequest,
-    DownloaderActionRequest,
-)
+try:
+    from src.server.downloader import global_downloader
+    from src.server.models.requests_management import (
+        DownloaderRequest,
+        DownloaderActionRequest,
+    )
+except ModuleNotFoundError:
+    class _MissingDownloader:
+        def __getattr__(self, name: str) -> Any:
+            raise RuntimeError(
+                "Downloader support is unavailable because "
+                "'src.server.downloader' or "
+                "'src.server.models.requests_management' could not be imported."
+            )
+
+    global_downloader = _MissingDownloader()
+
+    class DownloaderRequest(BaseModel):
+        url: str
+        output_path: Optional[str] = None
+
+    class DownloaderActionRequest(BaseModel):
+        action: str
+        request_id: Optional[str] = None
-from src.server.downloader import global_downloader
-from src.server.models.requests_management import (
-    DownloaderRequest,
-    DownloaderActionRequest,
-)
+try:
+    from src.server.downloader import global_downloader
+    from src.server.models.requests_management import (
+        DownloaderRequest,
+        DownloaderActionRequest,
+    )
+except ModuleNotFoundError:
+    class _MissingDownloader:
+        def __getattr__(self, name: str) -> Any:
+            raise RuntimeError(
+                "Downloader support is unavailable because "
+                "'src.server.downloader' or "
+                "'src.server.models.requests_management' could not be imported."
+            )
+
+    global_downloader = _MissingDownloader()
+
+    class DownloaderRequest(BaseModel):
+        url: str
+        output_path: Optional[str] = None
+
+    class DownloaderActionRequest(BaseModel):
+        action: str
+        request_id: Optional[str] = None
 from fastapi import Depends, FastAPI, HTTPException, Request, File, Form, UploadFile
 from fastapi.exceptions import RequestValidationError
@@ -245,6 +253,232 @@ async def get_status():
     """Get registry status showing all loaded models."""
     return await _registry.status()
 
+class UpdateModelConfigRequest(BaseModel):
+    model_path: str
+    config: Dict[str, Any]
+
+
+@app.post("/openarc/models/update", dependencies=[Depends(verify_api_key)])
+async def update_local_model_config(req: UpdateModelConfigRequest):
+    import json
+    from pathlib import Path
+
+    target_path = Path(req.model_path)
+    if not target_path.exists() or not target_path.is_dir():
+        raise HTTPException(status_code=404, detail="Model directory not found")
+
+    config_path = target_path / "openarc.json"
+
+    current_config = {}
+    if config_path.exists():
+        try:
+            with open(config_path, "r", encoding="utf-8") as f:
+                current_config = json.load(f)
+        except Exception:
+            pass
-        except Exception:
-            pass
+        except json.JSONDecodeError as e:
+            raise HTTPException(
+                status_code=400,
+                detail=f"Existing openarc.json contains invalid JSON: {str(e)}",
+            )
+        except Exception as e:
+            logger.exception("Failed to read existing config from %s", config_path)
+            raise HTTPException(
+                status_code=500,
+                detail=f"Failed to read existing config: {str(e)}",
+            )
-        except Exception:
-            pass
+        except json.JSONDecodeError as e:
+            raise HTTPException(
+                status_code=400,
+                detail=f"Existing openarc.json contains invalid JSON: {str(e)}",
+            )
+        except Exception as e:
+            logger.exception("Failed to read existing config from %s", config_path)
+            raise HTTPException(
+                status_code=500,
+                detail=f"Failed to read existing config: {str(e)}",
+            )
+
+    current_config.update(req.config)
+
+    try:
+        with open(config_path, "w", encoding="utf-8") as f:
+            json.dump(current_config, f, indent=4)
+        return {"status": "success", "config": current_config}
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Failed to save config: {str(e)}")
+
+
+@app.get("/openarc/models", dependencies=[Depends(verify_api_key)])
+async def get_local_models(path: Optional[str] = None):
+    import os
-    import os
-    import os
+    import json
+    from pathlib import Path
+
+    if path:
+        target_path = Path(path)
+    else:
+        target_path = Path.home() / ".cache" / "openarc" / "models"
-    if path:
-        target_path = Path(path)
-    else:
-        target_path = Path.home() / ".cache" / "openarc" / "models"
+    models_root = (Path.home() / ".cache" / "openarc" / "models").resolve()
+
+    if path:
+        target_path = Path(path).expanduser().resolve(strict=False)
+        try:
+            target_path.relative_to(models_root)
+        except ValueError:
+            raise HTTPException(
+                status_code=400,
+                detail="Path must be within the OpenArc models directory",
+            )
+    else:
+        target_path = models_root
-    if path:
-        target_path = Path(path)
-    else:
-        target_path = Path.home() / ".cache" / "openarc" / "models"
+    models_root = (Path.home() / ".cache" / "openarc" / "models").resolve()
+
+    if path:
+        target_path = Path(path).expanduser().resolve(strict=False)
+        try:
+            target_path.relative_to(models_root)
+        except ValueError:
+            raise HTTPException(
+                status_code=400,
+                detail="Path must be within the OpenArc models directory",
+            )
+    else:
+        target_path = models_root
+
+    models = []
+    if target_path.exists() and target_path.is_dir():
+        for entry in target_path.iterdir():
+            if entry.is_dir():
+                folder_name = entry.name
+                config_path = entry / "openarc.json"
+                has_config = config_path.exists()
+
+                model_name = folder_name
+                model_type = None
+
+                if has_config:
+                    try:
+                        with open(config_path, "r", encoding="utf-8") as f:
+                            config_data = json.load(f)
+                            model_name = config_data.get("model_name", model_name)
+                            model_type = config_data.get("model_type")
+                    except Exception:
+                        pass
+                else:
+                    config_data = {}
+
+                models.append(
+                    {
+                        "id": folder_name,
+                        "path": str(entry),
+                        "model_name": model_name,
+                        "model_type": model_type,
+                        "engine": config_data.get("engine"),
+                        "vlm_type": config_data.get("vlm_type"),
+                        "draft_model_path": config_data.get("draft_model_path"),
+                        "draft_device": config_data.get("draft_device"),
+                        "num_assistant_tokens": config_data.get("num_assistant_tokens"),
+                        "assistant_confidence_threshold": config_data.get(
+                            "assistant_confidence_threshold"
+                        ),
+                        "runtime_config": config_data.get("runtime_config", {}),
+                        "has_config": has_config,
+                    }
+                )
+    return {"models": models}
+
+
+@app.get("/openarc/version", dependencies=[Depends(verify_api_key)])
+async def get_version():
+    return {"version": "v2.0.4"}
+
+
+def get_intel_gpu_metrics():
+    gpus = []
+    cpu_info = {"id": "CPU", "name": "System CPU"}
+    npus = []
+
+    try:
+        import openvino as ov
+        import psutil
+
+        core = ov.Core()
+        devices = core.available_devices
+        for device in devices:
+            try:
+                name = core.get_property(device, "FULL_DEVICE_NAME")
+            except Exception:
+                name = device
+
+            if "CPU" in device:
+                cpu_info["name"] = str(name)
+            elif "NPU" in device:
+                npus.append({"id": device, "name": str(name)})
+            elif "GPU" in device:
+                # mock usage/memory for now cause standad python dont easily expose intel gpu telemetry without root/external tools
-                # mock usage/memory for now cause standad python dont easily expose intel gpu telemetry without root/external tools
+                # mock usage/memory for now cause standard python dont easily expose intel gpu telemetry without root/external tools
-                # mock usage/memory for now cause standad python dont easily expose intel gpu telemetry without root/external tools
+                # mock usage/memory for now cause standard python dont easily expose intel gpu telemetry without root/external tools
+                usage = 0.0
+                total_vram_mb = 0
+                used_vram_mb = 0
+                is_shared = False
+
+                # try getting memory info if avialable in openvino
-                # try getting memory info if avialable in openvino
+                # try getting memory info if available in openvino
-                # try getting memory info if avialable in openvino
+                # try getting memory info if available in openvino
+                try:
+                    total_vram = core.get_property(device, "DEVICE_TOTAL_MEM_SIZE")
+                    total_vram_mb = total_vram // (1024 * 1024)
+                except Exception:
+                    # if openvino cannot report DEVICE_TOTAL_MEM_SIZE it usually mean its an integrated
+                    # gpu sharing system memory. lets return the system memory but flag it as shared.
+                    vm = psutil.virtual_memory()
+                    total_vram_mb = vm.total // (1024 * 1024)
+                    is_shared = True
+
+                gpus.append(
+                    {
+                        "id": device,
+                        "name": str(name),
+                        "total_vram": int(total_vram_mb),
+                        "used_vram": int(used_vram_mb),
+                        "usage": float(usage),
+                        "is_shared": is_shared,
+                    }
+                )
+    except ImportError:
+        pass
+    except Exception as e:
+        import logging
+
+        logging.error(f"Failed to query OpenVINO devices: {e}")
+
+    return {"cpu": cpu_info, "gpus": gpus, "npus": npus}
+
+
+@app.get("/openarc/metrics", dependencies=[Depends(verify_api_key)])
+async def get_metrics():
+    import psutil
+
+    vm = psutil.virtual_memory()
+    ov_metrics = get_intel_gpu_metrics()
+
+    return {
+        "cpus": [
+            {
+                "id": ov_metrics["cpu"]["id"],
+                "name": ov_metrics["cpu"]["name"],
+                "cores": psutil.cpu_count(logical=False) or 1,
+                "threads": psutil.cpu_count(logical=True) or 1,
+                "usage": psutil.cpu_percent(),
+            }
+        ],
+        "total_ram": vm.total // (1024 * 1024),
+        "used_ram": vm.used // (1024 * 1024),
+        "gpus": ov_metrics["gpus"],
+        "npus": ov_metrics["npus"],
+    }
+
+
+@app.post("/openarc/downloader", dependencies=[Depends(verify_api_key)])
+async def start_download(request: DownloaderRequest):
+    success = await global_downloader.start(request.model_name, request.path)
+    if success:
+        return {"status": "success", "message": "Model download started successfully."}
+    return JSONResponse(
+        status_code=400,
+        content={"status": "error", "message": "Download already in progress."},
+    )
+
+
+@app.get("/openarc/downloader", dependencies=[Depends(verify_api_key)])
+async def list_downloads():
+    return {"models": global_downloader.list_tasks()}
+
+
+@app.delete("/openarc/downloader", dependencies=[Depends(verify_api_key)])
+async def cancel_download(request: DownloaderActionRequest):
+    if global_downloader.cancel(request.model_name):
+        return {
+            "status": "success",
+            "message": "Model download cancelled successfully.",
+        }
+    return JSONResponse(
+        status_code=404,
+        content={"status": "error", "message": "Download task not found."},
+    )
+
+
+@app.post("/openarc/downloader/pause", dependencies=[Depends(verify_api_key)])
+async def pause_download(request: DownloaderActionRequest):
+    if global_downloader.pause(request.model_name):
+        return {"status": "success", "message": "Model download paused successfully."}
+    return JSONResponse(
+        status_code=404,
+        content={"status": "error", "message": "Active download task not found."},
+    )
+
+
+@app.post("/openarc/downloader/resume", dependencies=[Depends(verify_api_key)])
+async def resume_download(request: DownloaderActionRequest):
+    # resume is basicly start
-    # resume is basicly start
+    # resume is basically start
-    # resume is basicly start
+    # resume is basically start
+    success = await global_downloader.start(request.model_name)
+    if success:
+        return {"status": "success", "message": "Model download resumed successfully."}
+    return JSONResponse(
+        status_code=400,
+        content={"status": "error", "message": "Download already in progress."},
+    )
+
 @app.post("/openarc/bench", dependencies=[Depends(verify_api_key)])
 async def benchmark(request: OpenArcBenchRequest):
     """Benchmark endpoint that accepts pre-encoded input_ids and returns only metrics."""