Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,16 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [Unreleased]

### Added
- **Prometheus `/metrics` endpoint** (P4-J). Exposes prom-client's
default Node.js metrics (event-loop lag, heap, GC, etc.) plus
per-request `http_requests_total{method,route,status}` and
`http_request_duration_seconds{method,route,status}` series.
Route labels use the Express pattern (`/v1/customer/:id`) not the
rendered path, so cardinality stays bounded.
Authentication is OPTIONAL: unset `METRICS_BEARER_TOKEN` leaves
the endpoint open (the usual private-network deployment); setting
it requires `Authorization: Bearer <token>` on the scrape. Token
comparison is constant-time.
- **`migration` field on `GET /healthz`** (P4-I). Body now reports the
last applied migration name from `SequelizeMeta` (e.g.
`"20260519000000-idempotency-keys"`). Lets a rolling-deploy caller
Expand Down
148 changes: 148 additions & 0 deletions app/middleware/metrics.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
// SPDX-License-Identifier: Apache-2.0
// Copyright 2026 Aaron K. Clark
"use strict";

/**
* Prometheus metrics + middleware.
*
* Why
* /healthz is binary (up/down). For SLO work — error rates, p95
* latency by route, throughput per tenant — we need a richer
* surface. Prometheus is the de-facto pull-based standard and
* plays nicely with Grafana, Alertmanager, and most operators'
* existing infra.
*
* What's exposed
* - prom-client default Node.js metrics (event-loop lag, heap,
* GC, etc.) — auto-registered.
* - `http_requests_total{method,route,status}` counter — one
* bump per HTTP request, labelled with the original Express
* route pattern (e.g. `/v1/customer/:id` not the rendered
* `/v1/customer/42`) so cardinality stays bounded.
* - `http_request_duration_seconds{method,route,status}`
* histogram — for p50/p95/p99 latency calculations in
* Prometheus.
*
* What's NOT exposed
* - authKey or req.companyId labels. Putting either in label
* cardinality would explode the metric store (one time-series
* per unique key). Per-tenant metrics belong in structured
* logs aggregated server-side.
* - Path-as-label without route pattern. `req.route?.path`
* gives us the pattern after Express has matched; on a 404
* `req.route` is undefined and we fall back to `'<unknown>'`
* to keep the cardinality cap intact.
*
* Auth
* - Default: /metrics is OPEN. The intended deployment puts
* Prometheus on the same private network and lets the
* reverse proxy gate exposure.
* - Operators on shared infra can set METRICS_BEARER_TOKEN
* to require `Authorization: Bearer <token>` on the scrape.
* If the env var is unset, no auth is enforced. The check
* is a constant-time compare so a leaked token can't be
* enumerated character-by-character via timing.
*/

const crypto = require('crypto');
const promClient = require('prom-client');

const registry = new promClient.Registry();
promClient.collectDefaultMetrics({ register: registry });

const httpRequestsTotal = new promClient.Counter({
name: 'http_requests_total',
help: 'Total HTTP requests, labelled by method, route pattern, and status code.',
labelNames: ['method', 'route', 'status'],
registers: [registry],
});

const httpRequestDuration = new promClient.Histogram({
name: 'http_request_duration_seconds',
help: 'HTTP request duration in seconds, labelled by method, route, status.',
labelNames: ['method', 'route', 'status'],
// Buckets sized to a JSON API: most requests in the 1-50ms range,
// tail in the 100ms-2s range, with a few headroom buckets for
// outliers (cold migrations, DB stalls).
buckets: [0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10],
registers: [registry],
});

/**
* Express middleware: time each request and bump the counters on
* `res.finish`. Mounted ONCE at the top of the chain in server.js;
* the listener fires regardless of which controller served the
* response, so 404s and middleware-level errors are captured.
*/
function metricsMiddleware(req, res, next) {
const start = process.hrtime.bigint();
res.on('finish', () => {
const elapsedSec = Number(process.hrtime.bigint() - start) / 1e9;
// After Express has matched, req.route?.path is the route
// pattern. For 404s req.route is undefined; bucket those
// into a single label so the metric doesn't grow per
// distinct mistyped URL.
const route = (req.route && req.route.path)
|| (req.baseUrl && req.route && req.baseUrl + req.route.path)
|| '<unknown>';
const labels = {
method: req.method,
route,
status: String(res.statusCode),
};
httpRequestsTotal.inc(labels);
httpRequestDuration.observe(labels, elapsedSec);
});
next();
}

/**
* Optional bearer-token check on /metrics. Pulled into its own
* function so the gate can be unit-tested without spinning the
* server.
*/
function checkMetricsAuth(req) {
const required = process.env.METRICS_BEARER_TOKEN;
if (!required) return true; // gate disabled

const header = req.get && req.get('Authorization');
if (!header) return false;
const m = /^Bearer\s+(.+)$/.exec(header);
if (!m) return false;
const supplied = m[1];

// Constant-time compare on equal-length buffers; if lengths
// differ we still pay the comparison cost (so a length check
// doesn't leak).
const a = Buffer.from(supplied);
const b = Buffer.from(required);
if (a.length !== b.length) {
// Hash both to equal-length bytes so timingSafeEqual is
// willing to run; result is still deterministic-false
// because the original lengths differed.
return crypto.timingSafeEqual(
crypto.createHash('sha256').update(a).digest(),
crypto.createHash('sha256').update(b).digest(),
) && false;
}
return crypto.timingSafeEqual(a, b);
}

/**
* GET /metrics handler. Emits prom-client's text-format payload
* with the correct Content-Type for a Prometheus scrape.
*/
async function metricsHandler(req, res) {
if (!checkMetricsAuth(req)) {
return res.status(401).json({ message: 'Unauthorized.' });
}
res.setHeader('Content-Type', registry.contentType);
return res.status(200).send(await registry.metrics());
}

module.exports = {
metricsMiddleware,
metricsHandler,
checkMetricsAuth,
registry,
};
6 changes: 6 additions & 0 deletions app/routers/router.js
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,12 @@ const inventoryTransactionSchemas = require('../schemas/inventorytransaction.sch
// of the API process and reachability of the database.
router.get('/healthz', health.healthz);

// Prometheus scrape endpoint. Auth is optional, gated by
// METRICS_BEARER_TOKEN env var; unset => unauthenticated, the
// usual private-network deployment pattern.
const { metricsHandler } = require('../middleware/metrics.js');
router.get('/metrics', metricsHandler);

// attachAuth runs on every /v1/* request and populates
// req.authKey / req.isMaster / req.companyId without rejecting.
// Existing controllers still have their inline authKey check; once
Expand Down
38 changes: 38 additions & 0 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
"pg-hstore": "^2.3.4",
"pino": "^10.3.1",
"pino-http": "^11.0.0",
"prom-client": "^15.1.3",
"sequelize": "^6.37.8",
"sequelize-cli": "^6.6.5",
"swagger-ui-express": "^5.0.1",
Expand Down
6 changes: 6 additions & 0 deletions server.js
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ const db = require('./app/config/db.config.js');
const log = require('./app/config/logger.js');
const router = require('./app/routers/router.js');
const { errorHandler, notFound } = require('./app/middleware/error-handler.js');
const { metricsMiddleware } = require('./app/middleware/metrics.js');

const app = express();

Expand Down Expand Up @@ -154,6 +155,11 @@ if (rateLimitMax !== 0) {
app.use('/v1', v1Limiter);
}

// Metrics observer. Mounted BEFORE the router so it sees every
// request that flows through (including 404s). The handler at
// /metrics is exposed inside the router itself.
app.use(metricsMiddleware);

app.use('/', router);

// 404 fallthrough + global error handler. Order matters — these
Expand Down
104 changes: 104 additions & 0 deletions tests/api/metrics.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
// SPDX-License-Identifier: Apache-2.0
// Copyright 2026 Aaron K. Clark
//
// HTTP smoke tests for the /metrics scrape endpoint (P4-J).

import { describe, test, expect, vi, beforeAll, beforeEach, afterEach } from 'vitest';
import request from 'supertest';
import express from 'express';

vi.mock('../../app/config/db.config.js', () => ({
sequelize: {
query: vi.fn().mockResolvedValue([]),
QueryTypes: { SELECT: 'SELECT' },
},
Sequelize: {},
Customer: {}, TimeEntry: {}, Worker: {}, BillingType: {},
InventoryItem: {}, Company: {}, Job: {}, Invoice: {},
CustomerPayment: {}, InvoiceJob: {}, ProductEntry: {},
VersionInfo: {}, PurchaseOrderVendor: {}, PurchaseOrderHeader: {},
PurchaseOrderLine: {}, InventoryTransaction: {},
ApiKey: {}, ApiMaster: {},
}));

let app;

beforeAll(async () => {
const router = (await import('../../app/routers/router.js')).default
|| require('../../app/routers/router.js');
const { metricsMiddleware } = await import('../../app/middleware/metrics.js');
app = express();
app.use(express.json());
app.use(metricsMiddleware);
app.use('/', router);
});

describe('GET /metrics', () => {
test('route is mounted (not 404)', async () => {
const res = await request(app).get('/metrics');
expect(res.status).not.toBe(404);
});

test('returns Prometheus text-format on a default scrape', async () => {
const res = await request(app).get('/metrics');
expect(res.status).toBe(200);
// prom-client sets `text/plain; version=0.0.4; charset=utf-8`.
expect(res.headers['content-type']).toMatch(/text\/plain/);
// Should include at least one default Node.js metric line.
expect(res.text).toMatch(/^# HELP/m);
expect(res.text).toMatch(/^# TYPE/m);
expect(res.text).toMatch(/process_cpu_user_seconds_total|nodejs_/);
});

test('the http_* metrics are registered (visible in scrape output)', async () => {
// Fire a couple of upstream requests to give the middleware
// a chance to record samples — though vi.mock + nested CJS
// requires (P5-M) sometimes interpose between the middleware
// and the registered counter. The hard requirement we pin
// here is just that the metrics ARE registered, so a future
// refactor that drops the declaration shows up as a failure.
await request(app).get('/healthz');
await request(app).get('/v1/whoami').set('authKey', 'any');
const res = await request(app).get('/metrics');
expect(res.text).toContain('http_requests_total');
expect(res.text).toContain('http_request_duration_seconds');
expect(res.text).toMatch(/# TYPE http_requests_total counter/);
expect(res.text).toMatch(/# TYPE http_request_duration_seconds histogram/);
});

test('does not require authKey header (orthogonal to API auth)', async () => {
const res = await request(app).get('/metrics');
expect(res.status).not.toBe(403);
});
});

describe('GET /metrics — METRICS_BEARER_TOKEN gate', () => {
const ORIGINAL = process.env.METRICS_BEARER_TOKEN;

beforeEach(() => {
process.env.METRICS_BEARER_TOKEN = 'secret-test-token';
});
afterEach(() => {
if (ORIGINAL === undefined) delete process.env.METRICS_BEARER_TOKEN;
else process.env.METRICS_BEARER_TOKEN = ORIGINAL;
});

test('401 when the env var is set but no Authorization header is supplied', async () => {
const res = await request(app).get('/metrics');
expect(res.status).toBe(401);
});

test('401 when the Authorization header carries the wrong token', async () => {
const res = await request(app)
.get('/metrics')
.set('Authorization', 'Bearer not-the-token');
expect(res.status).toBe(401);
});

test('200 when the Authorization header carries the right token', async () => {
const res = await request(app)
.get('/metrics')
.set('Authorization', 'Bearer secret-test-token');
expect(res.status).toBe(200);
});
});
Loading