diff --git a/components/status/ComponentStatus.ts b/components/status/ComponentStatus.ts index 540a09551..cb22fed42 100644 --- a/components/status/ComponentStatus.ts +++ b/components/status/ComponentStatus.ts @@ -5,7 +5,7 @@ * component's status with methods for status management. */ -import { type ComponentStatusLevel, COMPONENT_STATUS_LEVELS } from './types.ts'; +import { type ComponentStatusLevel, type ComponentStatusSource, COMPONENT_STATUS_LEVELS } from './types.ts'; /** * Component status information class @@ -19,12 +19,23 @@ export class ComponentStatus { public message?: string; /** Error information if status is 'error' */ public error?: Error | string; + /** How the status was set */ + public source?: ComponentStatusSource; + /** Epoch ms when this status should auto-clear */ + public expiresAt?: number; + /** Number of times this status has been reported */ + public occurrenceCount: number; + /** When this status was first reported */ + public firstOccurrence: Date; - constructor(status: ComponentStatusLevel, message?: string, error?: Error | string) { + constructor(status: ComponentStatusLevel, message?: string, error?: Error | string, source?: ComponentStatusSource) { this.lastChecked = new Date(); this.status = status; this.message = message; this.error = error; + this.source = source; + this.occurrenceCount = 1; + this.firstOccurrence = this.lastChecked; } /** diff --git a/components/status/ComponentStatusRegistry.ts b/components/status/ComponentStatusRegistry.ts index 1a76a18eb..9864081aa 100644 --- a/components/status/ComponentStatusRegistry.ts +++ b/components/status/ComponentStatusRegistry.ts @@ -8,6 +8,7 @@ import { ComponentStatus } from './ComponentStatus.ts'; import { type ComponentStatusLevel, + type ComponentStatusSource, COMPONENT_STATUS_LEVELS, type AggregatedComponentStatus, type ComponentApplicationStatus, @@ -43,7 +44,8 @@ export class ComponentStatusRegistry { componentName: string, status: ComponentStatusLevel, message?: string, - error?: Error | string + error?: Error | string, + source?: ComponentStatusSource ): void { if (!componentName || typeof componentName !== 'string') { throw new ComponentStatusOperationError( @@ -61,7 +63,16 @@ export class ComponentStatusRegistry { ); } - this.statusMap.set(componentName, new ComponentStatus(status, message, error)); + const existing = this.statusMap.get(componentName); + if (existing && existing.status === status) { + existing.lastChecked = new Date(); + existing.message = message; + if (error !== undefined) existing.error = error; + if (source !== undefined) existing.source = source; + existing.occurrenceCount++; + } else { + this.statusMap.set(componentName, new ComponentStatus(status, message, error, source)); + } } /** diff --git a/components/status/api.ts b/components/status/api.ts index 02c98898c..b32ad4927 100644 --- a/components/status/api.ts +++ b/components/status/api.ts @@ -7,7 +7,7 @@ import { componentStatusRegistry } from './registry.ts'; import { ComponentStatus } from './ComponentStatus.ts'; -import { COMPONENT_STATUS_LEVELS } from './types.ts'; +import { COMPONENT_STATUS_LEVELS, type ComponentStatusSource } from './types.ts'; /** * Component Status Builder @@ -15,9 +15,11 @@ import { COMPONENT_STATUS_LEVELS } from './types.ts'; */ export class ComponentStatusBuilder { private componentName: string; + private source: ComponentStatusSource; - constructor(componentName: string) { + constructor(componentName: string, source: ComponentStatusSource = 'explicit') { this.componentName = componentName; + this.source = source; } /** @@ -26,7 +28,7 @@ export class ComponentStatusBuilder { * @returns this for chaining */ healthy(message?: string): this { - componentStatusRegistry.setStatus(this.componentName, COMPONENT_STATUS_LEVELS.HEALTHY, message); + componentStatusRegistry.setStatus(this.componentName, COMPONENT_STATUS_LEVELS.HEALTHY, message, undefined, this.source); return this; } @@ -36,7 +38,7 @@ export class ComponentStatusBuilder { * @returns this for chaining */ warning(message: string): this { - componentStatusRegistry.setStatus(this.componentName, COMPONENT_STATUS_LEVELS.WARNING, message); + componentStatusRegistry.setStatus(this.componentName, COMPONENT_STATUS_LEVELS.WARNING, message, undefined, this.source); return this; } @@ -47,7 +49,7 @@ export class ComponentStatusBuilder { * @returns this for chaining */ error(message: string, error?: Error): this { - componentStatusRegistry.setStatus(this.componentName, COMPONENT_STATUS_LEVELS.ERROR, message, error); + componentStatusRegistry.setStatus(this.componentName, COMPONENT_STATUS_LEVELS.ERROR, message, error, this.source); return this; } @@ -57,7 +59,7 @@ export class ComponentStatusBuilder { * @returns this for chaining */ loading(message?: string): this { - componentStatusRegistry.setStatus(this.componentName, COMPONENT_STATUS_LEVELS.LOADING, message || 'Loading...'); + componentStatusRegistry.setStatus(this.componentName, COMPONENT_STATUS_LEVELS.LOADING, message || 'Loading...', undefined, this.source); return this; } @@ -67,7 +69,7 @@ export class ComponentStatusBuilder { * @returns this for chaining */ unknown(message?: string): this { - componentStatusRegistry.setStatus(this.componentName, COMPONENT_STATUS_LEVELS.UNKNOWN, message); + componentStatusRegistry.setStatus(this.componentName, COMPONENT_STATUS_LEVELS.UNKNOWN, message, undefined, this.source); return this; } diff --git a/components/status/crossThread.ts b/components/status/crossThread.ts index 339e46418..0cf6b8bce 100644 --- a/components/status/crossThread.ts +++ b/components/status/crossThread.ts @@ -303,6 +303,8 @@ export class StatusAggregator { let mostRecentCheckTime = 0; let latestMessage: string | undefined; let error: Error | string | undefined; + let source: string | undefined; + let totalOccurrenceCount = 0; const statusCounts = new Map(); const abnormalities = new Map(); @@ -340,6 +342,10 @@ export class StatusAggregator { if (status.error && !error) { error = status.error; } + + // Track source and occurrence count + if (status.source) source = status.source; + if (status.occurrenceCount) totalOccurrenceCount += status.occurrenceCount; } // Determine overall status (priority: error > warning > loading > unknown > healthy) @@ -368,6 +374,8 @@ export class StatusAggregator { lastChecked: lastCheckedTimes, latestMessage, error, + source: source as any, + occurrenceCount: totalOccurrenceCount || undefined, }; // Only add abnormalities if there are any diff --git a/components/status/healthChecks.ts b/components/status/healthChecks.ts new file mode 100644 index 000000000..f3518b704 --- /dev/null +++ b/components/status/healthChecks.ts @@ -0,0 +1,127 @@ +/** + * System Health Checks + * + * Periodic monitoring of system resources (disk, memory, CPU) that reports + * status via the component status system. Health check statuses self-heal: + * when the metric returns to normal, status reverts to healthy. + * + * Runs on the main thread only. + */ + +import { componentStatusRegistry } from './registry.ts'; +import { COMPONENT_STATUS_LEVELS } from './types.ts'; + +interface ThresholdConfig { + warning: number; + error: number; +} + +interface HealthCheckConfig { + enabled?: boolean; + intervalSeconds?: number; + thresholds?: { + disk?: ThresholdConfig; + memory?: ThresholdConfig; + cpu?: ThresholdConfig; + }; +} + +const DEFAULT_THRESHOLDS: Record = { + disk: { warning: 80, error: 95 }, + memory: { warning: 80, error: 95 }, + cpu: { warning: 85, error: 95 }, +}; + +const DEFAULT_INTERVAL_SECONDS = 60; + +function setHealthStatus(name: string, percent: number, thresholds: ThresholdConfig, label: string) { + const key = `system.${name}`; + if (percent >= thresholds.error) { + componentStatusRegistry.setStatus( + key, COMPONENT_STATUS_LEVELS.ERROR, + `${label} at ${percent.toFixed(1)}% utilization`, undefined, 'health-check' + ); + } else if (percent >= thresholds.warning) { + componentStatusRegistry.setStatus( + key, COMPONENT_STATUS_LEVELS.WARNING, + `${label} at ${percent.toFixed(1)}% utilization`, undefined, 'health-check' + ); + } else { + componentStatusRegistry.setStatus( + key, COMPONENT_STATUS_LEVELS.HEALTHY, + `${label} at ${percent.toFixed(1)}% utilization`, undefined, 'health-check' + ); + } +} + +async function checkDisk(thresholds: ThresholdConfig) { + try { + const si = await import('systeminformation'); + const fsSizes = await si.fsSize(); + // Check the filesystem with the highest usage + let worstPercent = 0; + let worstMount = ''; + for (const fs of fsSizes) { + if (fs.use > worstPercent) { + worstPercent = fs.use; + worstMount = fs.mount; + } + } + if (worstPercent > 0) { + setHealthStatus('disk', worstPercent, thresholds, `Disk (${worstMount})`); + } + } catch { + // systeminformation may not be available in all environments + } +} + +async function checkMemory(thresholds: ThresholdConfig) { + try { + const si = await import('systeminformation'); + const mem = await si.mem(); + if (mem.total > 0) { + const usedPercent = ((mem.total - mem.available) / mem.total) * 100; + setHealthStatus('memory', usedPercent, thresholds, 'Memory'); + } + } catch { + // systeminformation may not be available in all environments + } +} + +async function checkCPU(thresholds: ThresholdConfig) { + try { + const si = await import('systeminformation'); + const load = await si.currentLoad(); + if (load.currentLoad !== undefined) { + setHealthStatus('cpu', load.currentLoad, thresholds, 'CPU'); + } + } catch { + // systeminformation may not be available in all environments + } +} + +async function runChecks(thresholds: Record) { + await Promise.all([ + checkDisk(thresholds.disk), + checkMemory(thresholds.memory), + checkCPU(thresholds.cpu), + ]); +} + +export function startHealthChecks(config?: HealthCheckConfig) { + if (config?.enabled === false) return; + + const thresholds = { + disk: config?.thresholds?.disk || DEFAULT_THRESHOLDS.disk, + memory: config?.thresholds?.memory || DEFAULT_THRESHOLDS.memory, + cpu: config?.thresholds?.cpu || DEFAULT_THRESHOLDS.cpu, + }; + const intervalMs = (config?.intervalSeconds || DEFAULT_INTERVAL_SECONDS) * 1000; + + // Run immediately + runChecks(thresholds); + + // Then periodically + const timer = setInterval(() => runChecks(thresholds), intervalMs); + timer.unref(); +} diff --git a/components/status/hierarchy.ts b/components/status/hierarchy.ts new file mode 100644 index 000000000..d7d84fee4 --- /dev/null +++ b/components/status/hierarchy.ts @@ -0,0 +1,105 @@ +/** + * Status Hierarchy Builder + * + * Builds a tree-structured view of component status from the flat status map. + * Component names are split on '.' to create parent-child relationships. + * Parent status rolls up from children (worst status wins). + */ + +import { + type ComponentStatusLevel, + type ComponentStatusSource, + type AggregatedComponentStatus, + COMPONENT_STATUS_LEVELS, +} from './types.ts'; + +export interface StatusNode { + status: ComponentStatusLevel; + message?: string; + source?: ComponentStatusSource; + lastChecked?: { main?: number; workers: Record }; + occurrenceCount?: number; + error?: Error | string; + children?: Record; +} + +const STATUS_PRIORITY: Record = { + [COMPONENT_STATUS_LEVELS.ERROR]: 4, + [COMPONENT_STATUS_LEVELS.WARNING]: 3, + [COMPONENT_STATUS_LEVELS.LOADING]: 2, + [COMPONENT_STATUS_LEVELS.UNKNOWN]: 1, + [COMPONENT_STATUS_LEVELS.HEALTHY]: 0, +}; + +/** + * Build a hierarchical status tree from a flat map of aggregated statuses. + * Names are split on '.' to create parent/child structure. + * + * Example: + * 'system.disk' -> { system: { children: { disk: { status: 'warning', ... } } } } + * 'replication' -> { replication: { status: 'error', ... } } + */ +export function buildHierarchy( + statuses: Map +): Record { + const root: Record = {}; + + for (const [name, status] of statuses) { + const parts = name.split('.'); + let currentLevel = root; + + for (let i = 0; i < parts.length; i++) { + const part = parts[i]; + if (!currentLevel[part]) { + currentLevel[part] = { + status: COMPONENT_STATUS_LEVELS.HEALTHY, + }; + } + const node = currentLevel[part]; + + if (i === parts.length - 1) { + // Leaf node -- set actual status from aggregated data + node.status = status.status; + node.message = status.latestMessage; + node.source = status.source; + node.lastChecked = status.lastChecked; + node.occurrenceCount = status.occurrenceCount; + if (status.error) node.error = status.error; + } else { + // Intermediate node -- ensure children map exists + if (!node.children) node.children = {}; + currentLevel = node.children; + } + } + } + + rollUpStatus(root); + return root; +} + +/** + * Roll up status from children to parents. + * A parent's status is the worst (highest priority) status among its children. + */ +function rollUpStatus(nodes: Record): ComponentStatusLevel { + let worstStatus: ComponentStatusLevel = COMPONENT_STATUS_LEVELS.HEALTHY; + + for (const node of Object.values(nodes)) { + let nodeStatus = node.status; + + if (node.children) { + const childWorst = rollUpStatus(node.children); + // Parent status = worst of own status and children's worst + if (STATUS_PRIORITY[childWorst] > STATUS_PRIORITY[nodeStatus]) { + nodeStatus = childWorst; + node.status = nodeStatus; + } + } + + if (STATUS_PRIORITY[nodeStatus] > STATUS_PRIORITY[worstStatus]) { + worstStatus = nodeStatus; + } + } + + return worstStatus; +} diff --git a/components/status/internal.ts b/components/status/internal.ts index 3ca0e72bc..065d12460 100644 --- a/components/status/internal.ts +++ b/components/status/internal.ts @@ -9,6 +9,9 @@ import type { ComponentStatusLevel } from './types.ts'; import { componentStatusRegistry } from './registry.ts'; import { ComponentStatusRegistry } from './ComponentStatusRegistry.ts'; +import { initLogBridge } from './logBridge.ts'; +import { startHealthChecks } from './healthChecks.ts'; +import { isMainThread } from 'node:worker_threads'; // Internal classes and types export { ComponentStatus } from './ComponentStatus.ts'; @@ -25,6 +28,18 @@ export * from './errors.ts'; // All type definitions export * from './types.ts'; +// Log bridge and health checks +export { initLogBridge } from './logBridge.ts'; +export { startHealthChecks } from './healthChecks.ts'; + +// Initialize the log-to-status bridge immediately so logger.status() calls work +initLogBridge(); + +// Start health checks on the main thread only +if (isMainThread) { + startHealthChecks(); +} + // Internal query functions for Harper core export const query = { /** diff --git a/components/status/logBridge.ts b/components/status/logBridge.ts new file mode 100644 index 000000000..03cc3b3a2 --- /dev/null +++ b/components/status/logBridge.ts @@ -0,0 +1,121 @@ +/** + * Log-to-Status Bridge + * + * Handles the connection between logger.status() calls and the component status system. + * When code uses logger.status({ problem: 'key' }).error('message'), this module + * receives the call and updates the ComponentStatusRegistry accordingly. + */ + +import { componentStatusRegistry } from './registry.ts'; +import { COMPONENT_STATUS_LEVELS } from './types.ts'; +import type { StatusOptions } from '../../utility/logging/logger.ts'; +import { setStatusHandler } from '../../utility/logging/harper_logger.js'; + +const LOG_TO_STATUS_LEVEL = { + fatal: COMPONENT_STATUS_LEVELS.ERROR, + error: COMPONENT_STATUS_LEVELS.ERROR, + warn: COMPONENT_STATUS_LEVELS.WARNING, + notify: COMPONENT_STATUS_LEVELS.WARNING, + info: COMPONENT_STATUS_LEVELS.WARNING, + debug: COMPONENT_STATUS_LEVELS.WARNING, + trace: COMPONENT_STATUS_LEVELS.WARNING, +} as const; + +const expiryTimers = new Map>(); + +function formatMessage(args: any[]): string { + return args + .map((a) => { + if (typeof a === 'string') return a; + if (a instanceof Error) return a.message; + try { + return String(a); + } catch { + return '[unserializable]'; + } + }) + .join(' ') + .substring(0, 500); +} + +/** + * Status handler called by the logger's .status() wrapper. + * This is registered with harper_logger.js via setStatusHandler(). + */ +export function handleStatusLog( + options: StatusOptions, + level: string | null, + componentTag: string | undefined, + args: any[] +) { + // When level is null, this is a status-only call (no chained log method). + // If a chained method follows, it will call again with level set — skip the + // initial status-only call in that case by deferring: the chained call will + // provide the actual level and message. + if (level === null) { + // Immediate status-only registration (no log output) + if (options.resolves) { + clearExpiry(options.resolves); + componentStatusRegistry.setStatus( + options.resolves, COMPONENT_STATUS_LEVELS.HEALTHY, 'Resolved', undefined, 'log' + ); + } else if (options.problem) { + const statusLevel = options.level ? (LOG_TO_STATUS_LEVEL[options.level] || COMPONENT_STATUS_LEVELS.WARNING) : COMPONENT_STATUS_LEVELS.ERROR; + componentStatusRegistry.setStatus(options.problem, statusLevel, undefined, undefined, 'log'); + if (options.expires) { + scheduleExpiry(options.problem, options.expires * 1000); + } + } + return; + } + + const message = formatMessage(args); + + if (options.resolves) { + clearExpiry(options.resolves); + componentStatusRegistry.setStatus( + options.resolves, COMPONENT_STATUS_LEVELS.HEALTHY, message || 'Resolved', undefined, 'log' + ); + return; + } + + if (options.problem) { + const key = options.problem; + const statusLevel = LOG_TO_STATUS_LEVEL[level] || COMPONENT_STATUS_LEVELS.WARNING; + const errorArg = args.find((a) => a instanceof Error); + + componentStatusRegistry.setStatus(key, statusLevel, message, errorArg, 'log'); + + if (options.expires) { + scheduleExpiry(key, options.expires * 1000); + } else { + clearExpiry(key); + } + } +} + +function scheduleExpiry(key: string, ms: number) { + clearExpiry(key); + const timer = setTimeout(() => { + componentStatusRegistry.setStatus(key, COMPONENT_STATUS_LEVELS.HEALTHY, 'Auto-expired', undefined, 'log'); + expiryTimers.delete(key); + }, ms); + timer.unref(); + expiryTimers.set(key, timer); +} + +function clearExpiry(key: string) { + const existing = expiryTimers.get(key); + if (existing) { + clearTimeout(existing); + expiryTimers.delete(key); + } +} + +/** + * Initialize the log-to-status bridge by registering handleStatusLog + * with the harper logger's setStatusHandler. + */ +export function initLogBridge() { + setStatusHandler(handleStatusLog); +} diff --git a/components/status/types.ts b/components/status/types.ts index 4c09031ad..e66d3600b 100644 --- a/components/status/types.ts +++ b/components/status/types.ts @@ -18,6 +18,8 @@ export const COMPONENT_STATUS_LEVELS = { export type ComponentStatusLevel = (typeof COMPONENT_STATUS_LEVELS)[keyof typeof COMPONENT_STATUS_LEVELS]; +export type ComponentStatusSource = 'log' | 'explicit' | 'health-check'; + /** * Component status information as a plain object */ @@ -32,6 +34,10 @@ export interface ComponentStatusSummary { error?: Error | string; /** Worker index for cross-thread tracking */ workerIndex?: number; + /** How the status was set */ + source?: ComponentStatusSource; + /** Number of times this status has been reported */ + occurrenceCount?: number; } /** @@ -69,6 +75,10 @@ export interface AggregatedComponentStatus { latestMessage?: string; /** Any error from any thread */ error?: Error | string; + /** How the status was set */ + source?: ComponentStatusSource; + /** Number of times this status has been reported (across all threads) */ + occurrenceCount?: number; } /** diff --git a/resources/DatabaseTransaction.ts b/resources/DatabaseTransaction.ts index a16cb3516..d91ec2bf5 100644 --- a/resources/DatabaseTransaction.ts +++ b/resources/DatabaseTransaction.ts @@ -139,6 +139,7 @@ export class DatabaseTransaction implements Transaction { !this.overloadChecked && performance.now() - outstandingCommitStart > MAX_OUTSTANDING_TXN_DURATION ) { + harperLogger.status({ problem: 'database.write-queue-overloaded', expires: 60 }); throw new ServerError('Outstanding write transactions have too long of queue, please try again later', 503); } this.overloadChecked = true; // only check this once, don't interrupt ongoing transactions that have already made writes @@ -377,7 +378,7 @@ function startMonitoringTxns() { for (const txn of trackedTxns) { if (txn.timeout <= 0) { const url = txn.getContext()?.url; - harperLogger.error( + harperLogger.status({ problem: 'database.txn-open-too-long' }).error( `Transaction was open too long and has been committed, from table: ${ txn.db?.name + (url ? ' path: ' + url : '') }`, diff --git a/resources/RecordEncoder.ts b/resources/RecordEncoder.ts index ac4a7f41d..24ac52a9c 100644 --- a/resources/RecordEncoder.ts +++ b/resources/RecordEncoder.ts @@ -323,7 +323,7 @@ export class RecordEncoder extends Encoder { } // else a normal entry return options?.valueAsBuffer ? buffer : decodeFromDatabase(() => super.decode(buffer, options), this.rootStore); } catch (error) { - harperLogger.error('Error decoding record', error, 'data: ' + buffer.slice(0, 40).toString('hex')); + harperLogger.status({ problem: 'database.record-decode' }).error('Error decoding record', error, 'data: ' + buffer.slice(0, 40).toString('hex')); return null; } } @@ -493,13 +493,13 @@ setInterval(() => { if (txn.openTimer) { if (txn.openTimer > 3) { if (txn.openTimer > 60) { - harperLogger.error( + harperLogger.status({ problem: 'database.read-txn-critical' }).error( 'Read transaction detected that has been open too long (over 15 minutes), ending transaction', txn ); txn.done(); } else - harperLogger.error( + harperLogger.status({ problem: 'database.read-txn-long', expires: 60 }).error( 'Read transaction detected that has been open too long (over one minute), make sure read transactions are quickly closed', txn ); diff --git a/resources/auditStore.ts b/resources/auditStore.ts index 9c08a6d26..f76344444 100644 --- a/resources/auditStore.ts +++ b/resources/auditStore.ts @@ -210,7 +210,7 @@ export function openAuditStore(rootStore) { for (const time of auditStore.getKeys({ reverse: true, limit: 1 })) { if (time > Date.now()) { timestampErrored = true; - harperLogger.error( + harperLogger.status({ problem: 'system.time-reversal' }).error( 'The current time is before the last recorded entry in the audit log. Time reversal can undermine the integrity of data tracking and certificate validation and the time must be corrected.' ); } diff --git a/resources/databases.ts b/resources/databases.ts index f91e2fbd9..16bcf56dd 100644 --- a/resources/databases.ts +++ b/resources/databases.ts @@ -537,14 +537,14 @@ function initStores( attributesUpdated = true; } } catch (error) { - logger.error(`Error trying to update attribute`, attribute, existingAttributes, indices, error); + logger.status({ problem: 'database.attribute-update' }).error(`Error trying to update attribute`, attribute, existingAttributes, indices, error); } } for (const existingAttribute of existingAttributes) { const attribute = attributes.find((attribute) => attribute.name === existingAttribute.name); if (!attribute) { if (existingAttribute.isPrimaryKey) { - logger.error('Unable to remove existing primary key attribute', existingAttribute); + logger.status({ problem: 'database.primary-key' }).error('Unable to remove existing primary key attribute', existingAttribute); continue; } if (existingAttribute.indexed) { diff --git a/server/fastifyRoutes.ts b/server/fastifyRoutes.ts index d1f9fba94..fb0e80a65 100644 --- a/server/fastifyRoutes.ts +++ b/server/fastifyRoutes.ts @@ -84,7 +84,7 @@ export async function customFunctionsServer() { //generate a Fastify server instance server = fastifyServer = await buildServer(isHttps); } catch (err) { - harperLogger.error(`Custom Functions buildServer error: ${err}`); + harperLogger.status({ problem: 'custom-functions.build' }).error(`Custom Functions buildServer error: ${err}`); throw err; } @@ -92,13 +92,13 @@ export async function customFunctionsServer() { //make sure the process waits for the server to be fully instantiated before moving forward await server.ready(); } catch (err) { - harperLogger.error(`Custom Functions server.ready() error: ${err}`); + harperLogger.status({ problem: 'custom-functions.ready' }).error(`Custom Functions server.ready() error: ${err}`); throw err; } // fastify can't clean up properly server.server.cantCleanupProperly = true; } catch (err) { - harperLogger.error(`Custom Functions ${process.pid} Error: ${err}`); + harperLogger.status({ problem: 'custom-functions.init' }).error(`Custom Functions ${process.pid} Error: ${err}`); harperLogger.error(err); process.exit(1); } @@ -141,9 +141,9 @@ function buildRouteFolder(routesFolder, projectName) { })) .after((err, instance, next) => { if (err?.message) { - harperLogger.error(err.message); + harperLogger.status({ problem: 'custom-functions.routes' }).error(err.message); } else if (err) { - harperLogger.error(err); + harperLogger.status({ problem: 'custom-functions.routes' }).error(err); } next(); }); diff --git a/server/http.ts b/server/http.ts index 6df394e38..753c5bc80 100644 --- a/server/http.ts +++ b/server/http.ts @@ -74,7 +74,7 @@ export function deliverSocket(fdOrSocket, port, data) { if (data) socket.emit('data', data); } else if (retries < 5) retry(retries + 1); else { - harperLogger.error(`Server on port ${port} was not registered`); + harperLogger.status({ problem: 'server.port-registration' }).error(`Server on port ${port} was not registered`); socket.destroy(); } }, 1000); diff --git a/server/operationsServer.ts b/server/operationsServer.ts index 47e3ae9b1..9d44ec797 100644 --- a/server/operationsServer.ts +++ b/server/operationsServer.ts @@ -74,13 +74,13 @@ async function operationsServer(options: ServerOptions & { resources?: Resources } } catch (err) { server.close(); - harperLogger.error(err); + harperLogger.status({ problem: 'operations-server.config' }).error(err); harperLogger.error(`Error configuring operations server`); throw err; } } catch (err) { console.error(`Failed to build server on ${process.pid}`, err); - harperLogger.fatal(err); + harperLogger.status({ problem: 'operations-server.fatal' }).fatal(err); process.exit(1); } } diff --git a/server/serverHelpers/serverHandlers.js b/server/serverHelpers/serverHandlers.js index 686b28d05..5d668ba6f 100644 --- a/server/serverHelpers/serverHandlers.js +++ b/server/serverHelpers/serverHandlers.js @@ -27,7 +27,7 @@ function handleServerUncaughtException(err) { os.EOL }Terminating ${isMainThread ? 'HDB' : 'thread'}.`; console.error(message); - harperLogger.fatal(message); + harperLogger.status({ problem: 'system.uncaught-exception' }).fatal(message); process.exit(1); } diff --git a/server/status/index.ts b/server/status/index.ts index 5efd2ccdf..d5b9c152e 100644 --- a/server/status/index.ts +++ b/server/status/index.ts @@ -4,6 +4,7 @@ import { loggerWithTag } from '../../utility/logging/logger.ts'; import { validateStatus } from '../../validation/statusValidator.ts'; import { type StatusId, type StatusValueMap, type StatusRecord, DEFAULT_STATUS_ID } from './definitions.ts'; import { internal as statusInternal, type AggregatedComponentStatus } from '../../components/status/index.ts'; +import { buildHierarchy, type StatusNode } from '../../components/status/hierarchy.ts'; import { restartNeeded } from '../../components/requestRestart.ts'; export { clearStatus as clear, getStatus as get, setStatus as set }; @@ -75,6 +76,7 @@ interface AggregatedComponentStatusWithName extends AggregatedComponentStatus { interface AllStatusSummary { systemStatus: Promise>; componentStatus: AggregatedComponentStatusWithName[]; + hierarchy: Record; restartRequired: boolean; } @@ -91,12 +93,16 @@ async function getAllStatus(): Promise { }) ); + // Build hierarchical view + const hierarchy = buildHierarchy(aggregatedStatuses); + // Get restart flag status const restartRequired = restartNeeded(); return { systemStatus: statusRecords as Promise>, componentStatus: componentStatusArray, + hierarchy, restartRequired, }; } diff --git a/server/threads/manageThreads.js b/server/threads/manageThreads.js index 552ebb4f9..2c133e42d 100644 --- a/server/threads/manageThreads.js +++ b/server/threads/manageThreads.js @@ -196,7 +196,7 @@ function startWorker(path, options = {}) { worker.on('error', (error) => { // log errors, and it also important that we catch errors so we can recover if a thread dies (in a recoverable // way) - harperLogger.error(`Worker index ${options.workerIndex} error:`, error); + harperLogger.status({ problem: 'threads.worker-error', expires: 120 }).error(`Worker index ${options.workerIndex} error:`, error); }); worker.on('exit', (_code) => { workers.splice(workers.indexOf(worker), 1); @@ -205,7 +205,7 @@ function startWorker(path, options = {}) { if (worker.unexpectedRestarts < MAX_UNEXPECTED_RESTARTS) { options.unexpectedRestarts = worker.unexpectedRestarts + 1; startWorker(path, options); - } else harperLogger.error(`Thread has been restarted ${worker.restarts} times and will not be restarted`); + } else harperLogger.status({ problem: 'threads.restart-limit' }).error(`Thread has been restarted ${worker.restarts} times and will not be restarted`); } }); workers.push(worker); @@ -240,7 +240,7 @@ async function restartWorkers( // some reason, so we need to reset it to the correct path. process.chdir(process.cwd()); } catch (e) { - harperLogger.error('Unable to reestablish current working directory', e); + harperLogger.status({ problem: 'threads.cwd' }).error('Unable to reestablish current working directory', e); } // problematic cyclic dependency, bind late const { resetRestartNeeded } = require('../../components/requestRestart.ts'); diff --git a/server/threads/threadServer.js b/server/threads/threadServer.js index 77113677b..ac72b6ce0 100644 --- a/server/threads/threadServer.js +++ b/server/threads/threadServer.js @@ -64,7 +64,7 @@ process.on('uncaughtException', (error) => { if (error.isHandled) return; if (error.code === 'ECONNRESET' || error.code === 'ECONNREFUSED') return; // that's what network connections do if (error.message === 'write EIO') return; // that means the terminal is closed - harperLogger.error('uncaughtException', error); + harperLogger.status({ problem: 'threads.uncaught-exception' }).error('uncaughtException', error); }); env.initSync(); exports.globals = globals; @@ -217,7 +217,7 @@ function listenOnPorts() { else if (createReuseportFd) listen_on = { fd: createReuseportFd(+port, '::') }; else listen_on = { port }; } catch (error) { - harperLogger.error(`Unable to bind to port ${port}`, error); + harperLogger.status({ problem: 'server.port-bind' }).error(`Unable to bind to port ${port}`, error); continue; } listening.push( diff --git a/utility/logging/harper_logger.js b/utility/logging/harper_logger.js index 17dc10b26..37e21b9b3 100644 --- a/utility/logging/harper_logger.js +++ b/utility/logging/harper_logger.js @@ -218,6 +218,9 @@ class HarperLogger extends Console { withTag(tag) { return loggerWithTag(tag, true, this); } + status(options) { + return statusLogger(options, this); + } forComponent(_name) { // to be replaced return this; @@ -258,6 +261,8 @@ module.exports = { startOnMainThread: updateLogSettings, errorToString, disableStdio, + setStatusHandler, + status: (options) => statusLogger(options, mainLogger), }; /** @@ -297,6 +302,9 @@ module.exports.externalLogger = { forComponent(name) { return externalLogger.forComponent(name); }, + status(options) { + return externalLogger.status(options); + }, }; _assignPackageExport('logger', module.exports.externalLogger); @@ -441,9 +449,30 @@ function stdioLogging() { } } +let statusHandler; +function setStatusHandler(handler) { + statusHandler = handler; +} +function statusLogger(options, logger) { + // If called with just options (no chained log method), register status immediately + if (statusHandler) { + statusHandler(options, null, logger.tag || currentTag, []); + } + const wrapper = {}; + for (const level of ['notify', 'fatal', 'error', 'warn', 'info', 'debug', 'trace']) { + wrapper[level] = function (...args) { + logger[level](...args); + if (statusHandler) { + statusHandler(options, level, logger.tag || currentTag, args); + } + }; + } + return wrapper; +} + function loggerWithTag(tag, conditional, logger = mainLogger) { tag = tag.replace(/ /g, '-'); // tag can't have spaces - return { + const taggedLogger = { notify: logWithTag(logger.notify, 'notify'), fatal: logWithTag(logger.fatal, 'fatal'), error: logWithTag(logger.error, 'error'), @@ -451,7 +480,11 @@ function loggerWithTag(tag, conditional, logger = mainLogger) { info: logWithTag(logger.info, 'info'), debug: logWithTag(logger.debug, 'debug'), trace: logWithTag(logger.trace, 'trace'), + status(options) { + return statusLogger(options, logger); + }, }; + return taggedLogger; function logWithTag(loggerMethod, level) { return !conditional || logger.level <= LOG_LEVEL_HIERARCHY[level] ? function (...args) { diff --git a/utility/logging/logger.ts b/utility/logging/logger.ts index c1d42d3e5..e9ccc9487 100644 --- a/utility/logging/logger.ts +++ b/utility/logging/logger.ts @@ -8,11 +8,33 @@ for (let level of ['trace', 'debug', 'info', 'warn', 'error', 'fatal', 'notify'] logger[level] = harperLogger[level]; } } +logger.status = harperLogger.status; export function loggerWithTag(tag: string): Logger { return harperLogger.loggerWithTag(tag, true) as Logger; } +export interface StatusOptions { + /** A key identifying the problem. Sets/updates a status entry at this key. */ + problem?: string; + /** A key to resolve. Clears the status entry at this key back to healthy. */ + resolves?: string; + /** Seconds until the status auto-clears. Only valid with `problem`. */ + expires?: number; + /** Status level for status-only calls (no chained log method). Defaults to 'error'. */ + level?: string; +} + +export interface StatusLogger { + notify: (...args: any[]) => void; + fatal: (...args: any[]) => void; + error: (...args: any[]) => void; + warn: (...args: any[]) => void; + info: (...args: any[]) => void; + debug: (...args: any[]) => void; + trace: (...args: any[]) => void; +} + export interface Logger { notify?: (...args: any[]) => void; fatal?: (...args: any[]) => void; @@ -21,4 +43,5 @@ export interface Logger { info?: (...args: any[]) => void; debug?: (...args: any[]) => void; trace?: (...args: any[]) => void; + status?: (options: StatusOptions) => StatusLogger; }