diff --git a/docs/Taskfile.yaml b/docs/Taskfile.yaml new file mode 100644 index 0000000..319b9e6 --- /dev/null +++ b/docs/Taskfile.yaml @@ -0,0 +1,75 @@ +version: '3' + +vars: + DIAGRAMS_DIR: "{{.USER_WORKING_DIR}}/docs/diagrams" + OUTPUT_FORMAT: "png" + +tasks: + generate: + desc: Generate all documentation artifacts (diagrams, etc.) + cmds: + - task: diagrams:render + silent: true + + diagrams: + desc: Generate all architecture diagrams from PlantUML + cmds: + - task: diagrams:render + silent: true + + diagrams:render: + desc: Render PlantUML diagrams to PNG format using Docker + cmds: + - | + set -e + echo "Rendering PlantUML diagrams..." + echo "" + + # Check if PlantUML files exist + if ! ls {{.DIAGRAMS_DIR}}/*.puml 1>/dev/null 2>&1; then + echo "No PlantUML source files found in {{.DIAGRAMS_DIR}}" + exit 0 + fi + + # Render using Docker (no local installation required) + docker run --rm \ + -v "{{.DIAGRAMS_DIR}}":/data \ + plantuml/plantuml:latest \ + -t{{.OUTPUT_FORMAT}} \ + /data/*.puml + + echo "" + echo "Diagrams rendered in {{.DIAGRAMS_DIR}}" + echo "" + echo "Generated files:" + ls -1 {{.DIAGRAMS_DIR}}/*.{{.OUTPUT_FORMAT}} 2>/dev/null | xargs -n1 basename || echo "No output files found" + silent: true + + diagrams:clean: + desc: Remove generated diagram files + cmds: + - | + rm -f {{.DIAGRAMS_DIR}}/*.png {{.DIAGRAMS_DIR}}/*.svg + echo "Generated diagram files removed" + silent: true + + diagrams:validate: + desc: Validate PlantUML syntax using Docker + cmds: + - | + set -e + echo "Validating PlantUML diagrams..." + + # Check if PlantUML files exist + if ! ls {{.DIAGRAMS_DIR}}/*.puml 1>/dev/null 2>&1; then + echo "No PlantUML source files found in {{.DIAGRAMS_DIR}}" + exit 0 + fi + + docker run --rm \ + -v "{{.DIAGRAMS_DIR}}":/data \ + plantuml/plantuml:latest \ + -syntax \ + /data/*.puml + echo "All diagrams are valid" + silent: true diff --git a/docs/architecture/customer-facing-logs.md b/docs/architecture/customer-facing-logs.md new file mode 100644 index 0000000..e15423d --- /dev/null +++ b/docs/architecture/customer-facing-logs.md @@ -0,0 +1,528 @@ +# Customer-Facing Logs + +Status: Draft +Scope (v1): AI Edge (HTTPProxy + WAF) logs only + +## Motivation + +Datum platform services emit operational signals — request logs, security +events, control-plane activity — that customers need visibility into for +debugging, compliance, and security investigation. Today there is no +customer-facing query surface for these logs. Customers running workloads on +AI Edge (Datum's HTTP proxy + WAF product) cannot answer basic questions +like "show me 5xx responses for my proxy in the last hour" or "which +requests did the WAF block." + +This design defines a project-scoped, multi-tenant logs pipeline with a +Loki-compatible query API. AI Edge is the v1 scope: it produces high-volume +access logs and WAF events that are the most acute customer need, and its +log shape exercises every layer of the design without depending on +control-plane audit-log work that lives elsewhere. + +## Goals (v1) + +- Customers can query AI Edge access logs and WAF events for their project + through Grafana, LogCLI, and any Loki-compatible client. +- All logs are tenant-isolated at storage and query time; cross-tenant + reads are structurally impossible. +- Log schemas are declared once by the producing service and surface + automatically as catalog metadata (resource types, label vocabulary, log + definitions). +- Service teams can see logs from their own service across all consumers + in the service's producer project; customers only see logs scoped to + their own project. This follows GCP's consumer / producer pattern, which + falls out naturally from Milo's project hierarchy (both tenants and + service producers are modelled as projects). +- 7-day default retention for operational logs. Retention is platform-set + in v1; not user-controllable. + +## Non-Goals (v1) + +- Control-plane audit logs. Audit logs are collected by the activity + system (`milo-os/activity`) and stored separately; they do not flow + through this pipeline. +- Customer-configurable log export (`LogSource` in `ExportPolicy`) — + deferred to a follow-on enhancement. +- Body-content redaction via regex; v1 redacts at attribute level only. +- Log-based metrics and alerting derived from log streams. +- Per-project ingestion quota. Volume protection in v1 is platform-set + defaults at the gateway; a `LogIngestionQuota` resource is a follow-on + enhancement. + +## Layers + +### 1. Service Declaration + +Services declare what they emit in their `ServiceConfiguration` +(`services.miloapis.com/v1alpha1`). Two fields participate: + +- `spec.monitoredResourceTypes[]` — already fans out to + `billing.MonitoredResourceType`; now also fans out to a new + `telemetry.MonitoredResourceType`. +- `spec.logs[]` (new) — fans out to `telemetry.LogDefinition`. + +AI Edge declaration: + +```yaml +apiVersion: services.miloapis.com/v1alpha1 +kind: ServiceConfiguration +metadata: + name: networking-datumapis-com +spec: + serviceRef: + name: networking-datumapis-com + phase: Published + monitoredResourceTypes: + - resourceTypeName: networking.datumapis.com/HTTPProxy + displayName: HTTP Proxy + gvk: + group: networking.datumapis.com + kind: HTTPProxy + labels: + - name: resource.group + description: API group of the resource (networking.datumapis.com). + - name: resource.kind + description: Resource kind (HTTPProxy). + - name: resource.name + description: Name of the HTTPProxy instance. + - name: resource.namespace + description: Project namespace the HTTPProxy belongs to. + - name: hostname + description: Hostname the request was received on. + logs: + - logID: networking.datumapis.com/httpproxy-access + displayName: HTTP Proxy Access Log + description: One entry per HTTP request handled by the proxy. + monitoredResourceType: networking.datumapis.com/HTTPProxy + entrySchema: + - name: http.request.id + description: Per-request correlation ID (Envoy x-request-id). + - name: http.request.method + description: HTTP method (GET, POST, etc). + - name: http.response.status_code + description: HTTP response status returned to the client. + - name: url.path + description: Request path. + - name: client.address + description: Client IP. + - name: user_agent.original + description: Verbatim User-Agent header sent by the client. + - name: http.request.duration_ms + description: Request duration in milliseconds. + - name: edge.pop.ingress + description: PoP code that received the request (e.g. cdg1). + - name: edge.pop.upstream + description: PoP that routed to the upstream when different from ingress; empty when handled at ingress. + - name: waf.outcome + description: Summary of WAF decision for this request — allowed, blocked, or challenged. + - name: waf.matched_rules + description: Number of WAF rules that matched on this request. Non-zero implies a paired httpproxy-waf entry exists per matched rule. + destinations: + - type: consumer # written to the customer's project + - type: producer # written to the networking service's producer project + categoryGroups: [allLogs] + + - logID: networking.datumapis.com/httpproxy-waf + displayName: HTTP Proxy WAF Event Log + description: One entry per WAF rule evaluation that matched or blocked. + monitoredResourceType: networking.datumapis.com/HTTPProxy + entrySchema: + - name: http.request.id + description: Matches the http.request.id on the paired httpproxy-access entry. PoP, user agent, response status, and other request-level context are joined from there. + - name: waf.rule.id + description: Identifier of the WAF rule that matched. + - name: waf.action + description: Action taken for this rule — block, log, challenge. + - name: waf.severity + description: Severity classification of the matched rule. + destinations: + - type: consumer + - type: producer + categoryGroups: [allLogs] +``` + +A log entry is written once per declared destination: + +- `consumer` — the customer's project. They query their own project and + see only their data. +- `producer` — the service's producer project (here, the networking + service's project). The Datum networking team queries that project and + sees logs across all consumers, with the originating consumer preserved + on each entry as a `consumer_name` label. + +Producer-only log types (no `consumer` destination) are also supported — +useful for internal diagnostics that should never be visible to +customers. + +### 2. Platform Catalog + +The services operator (`milo-os/telemetry`) owns two new CRDs that the +`ServiceConfiguration` controller fans out into. + +`telemetry.MonitoredResourceType` — instance-identifying label vocabulary +for a resource Kind. Parallel to `billing.MonitoredResourceType`: + +```yaml +apiVersion: telemetry.miloapis.com/v1alpha1 +kind: MonitoredResourceType +metadata: + name: networking-datumapis-com-httpproxy +spec: + resourceTypeName: networking.datumapis.com/HTTPProxy + phase: Published + displayName: HTTP Proxy + gvk: + group: networking.datumapis.com + kind: HTTPProxy + labels: + - name: resource.group + - name: resource.kind + - name: resource.name + - name: resource.namespace + - name: hostname +``` + +`LogDefinition` — the log type catalog entry; references +`MonitoredResourceType` by `resourceTypeName`: + +```yaml +apiVersion: telemetry.miloapis.com/v1alpha1 +kind: LogDefinition +metadata: + name: networking-datumapis-com-httpproxy-access +spec: + logID: networking.datumapis.com/httpproxy-access + phase: Published + displayName: HTTP Proxy Access Log + monitoredResourceType: networking.datumapis.com/HTTPProxy + entrySchema: + - name: http.request.id + - name: http.request.method + - name: http.response.status_code + - name: url.path + - name: client.address + - name: user_agent.original + - name: http.request.duration_ms + - name: edge.pop.ingress + - name: edge.pop.upstream + - name: waf.outcome + - name: waf.matched_rules + destinations: + - type: consumer + - type: producer + categoryGroups: [allLogs] +``` + +Both CRDs are server-managed: the `ServiceConfiguration` controller is the +sole writer. Customers read them via standard list/get to populate UIs and +discover available log types. + +### 3. Ingestion Pipeline + +![Ingestion Pipeline](../diagrams/ingestion-pipeline.png) + +AI Edge data-plane components (Envoy + WAF sidecar) emit logs over OTLP to +a regional OTel Collector gateway. Workload identity cannot be relied on +to resolve the project — the source of these logs is typically a service +component (e.g. Envoy) writing to a log sink, not a consumer-authored +application running with the consumer's identity. Tenancy therefore has +to travel on the log record itself. + +Every log record entering the gateway must carry tenancy labels stamped +by the producing service: + +- `tenant.kind` — the type of tenant that generated the log + (`Project`, `Organization`, `User`). +- `tenant.name` — the resource name of the tenant + (e.g. `personal-project-xyz`). + +Records missing these labels are rejected. Services are also responsible +for stamping resource identity labels declared by their +`MonitoredResourceType` (`resource.group`, `resource.kind`, +`resource.name`, `resource.namespace`, and any service-specific labels +such as `hostname`). The gateway enforces the vocabulary; it does not +inject tenancy or instance identity. + +Gateway responsibilities: + +1. Receive OTLP log records. +2. Validate that `tenant.kind` and `tenant.name` are present and refer to + a tenant the caller is authorised to write logs for. +3. Look up the declared `MonitoredResourceType` for the entry's + `resource_type` and validate that emitted resource attributes are a + subset of the declared label vocabulary. Reject undeclared labels. +4. Resolve `tenant_id` from `(tenant.kind, tenant.name)` via the project + catalog. +5. For each declared destination on the matching `LogDefinition`, emit one + log record: + - `consumer` → `tenant_id` resolved from the originating tenant. + - `producer` → `tenant_id` resolved from the service's producer + project, with `consumer_name` set to the originating tenant. +6. Hand the resulting records off to NATS for durable buffering. + +A NATS JetStream subject sits between the gateway and ClickHouse. NATS +gives us: + +- **Backpressure**. If ClickHouse is down or slow, the consumer pauses; + NATS retains the backlog rather than the gateway dropping records. +- **Live tail**. The same stream feeds the Loki `/tail` handler, so tail + doesn't need to poll ClickHouse — see Live Tail below. + +A ClickHouse-writer consumer drains NATS into the `platform_logs` table +in batches. + +### 4. Storage + +Shared ClickHouse `platform_logs` table, OTel-aligned schema, `tenant_id` +first in `ORDER BY` and partition key: + +```sql +CREATE TABLE platform_logs ( + tenant_id UInt32, + timestamp UInt64, + observed_timestamp UInt64, + severity_number UInt8, + severity_text LowCardinality(String), + body String, + log_id LowCardinality(String), + resource_type LowCardinality(String), + resource_group LowCardinality(String), + resource_kind LowCardinality(String), + resource_name String, + resource_namespace LowCardinality(String), + consumer_name String, -- empty on consumer-destination rows + attributes_string Map(String, String), + resources_string Map(String, String), + trace_id String, + span_id String +) +ENGINE = MergeTree() +PARTITION BY (tenant_id, toYYYYMM(toDateTime(timestamp / 1e9))) +ORDER BY (tenant_id, resource_type, resource_name, log_id, timestamp) +TTL toDateTime(timestamp / 1e9) + INTERVAL 7 DAY DELETE; +``` + +Top-level columns are chosen for the two common query shapes: + +- **Per-resource**: "give me all access logs for proxy XYZ". Served by + the `(tenant_id, resource_type, resource_name, log_id)` prefix of the + sort key. +- **Per-tenant**: "give me all logs for project X". Served by the + `tenant_id` prefix. + +`log_id`, `resource_type`, `resource_group`, `resource_kind`, and +`resource_namespace` are all low-cardinality and appear in nearly every +query's filter clause. `resource_name` is high-cardinality but is the +primary drill-down key, so it earns a top-level column and a position in +the sort key. `consumer_name` is populated only on producer-destination +rows, so service teams can filter "show me logs for consumer X" without +cross-tenant grants. + +### 5. Query API — Loki-Compatible, Project-Scoped + +Customer query surface is a Loki-compatible HTTP API exposed under the +project's control-plane endpoint: + +``` +GET {project-control-plane-endpoint}/telemetry/loki/api/v1/query +GET {project-control-plane-endpoint}/telemetry/loki/api/v1/query_range +GET {project-control-plane-endpoint}/telemetry/loki/api/v1/labels +GET {project-control-plane-endpoint}/telemetry/loki/api/v1/label/{name}/values +GET {project-control-plane-endpoint}/telemetry/loki/api/v1/series +GET {project-control-plane-endpoint}/telemetry/loki/api/v1/tail +``` + +`{project-control-plane-endpoint}` is the same per-project control-plane +URL Milo already issues for Kubernetes API access; the telemetry handler +mounts at `/telemetry/...` under it. The project is therefore resolved +from the endpoint itself — no `{project}` placeholder in the path, no +`X-Scope-OrgID` header. `X-Scope-OrgID` sent by Grafana is ignored. + +The Milo gateway resolves the endpoint to a `tenant_id` and enforces IAM +before the request reaches the Loki handler. The handler itself is a pure +query layer: + +- Parses LogQL. +- Translates to ClickHouse SQL: stream selectors → top-level column + lookups (`tenant_id`, `resource_type`, `resource_name`, `log_id`, …) + where possible, `resources_string` map lookups otherwise; line filters + → `body LIKE` / full-text; parsed field filters → `attributes_string` + lookups. +- Executes with `tenant_id` already injected from the endpoint context. +- Serialises results in Loki's response format. + +Label and series discovery is served from the `MonitoredResourceType` +catalog rather than from ClickHouse, so discovery works on empty projects +and Grafana's stream-selector UI populates correctly on first open. + +Grafana datasource configuration: base URL set to the project's +control-plane endpoint with `/telemetry/` appended, type Loki, no custom +plugin. + +#### Example queries + +Consumer querying their own project: + +```logql +{log_id="networking.datumapis.com/httpproxy-access", resource_name="api-gateway"} + | json | http_response_status_code >= 500 +``` + +Service team querying the networking service's producer project — across +all consumers, or drilling into one: + +```logql +# Aggregate error rate by consumer +sum by (consumer_name) ( + rate({log_id="networking.datumapis.com/httpproxy-access"} + | json | http_response_status_code >= 500 [5m]) +) + +# Drill into a specific consumer +{log_id="networking.datumapis.com/httpproxy-access", consumer_name="ecommerce-co"} + | json | http_response_status_code >= 500 +``` + +No cross-tenant grants are needed for either side — each principal has +IAM on the project (consumer or producer) whose endpoint they're querying. + +A secondary `LogQuery` virtual resource (Kubernetes-native, modelled on +`AuditLogQuery` in `milo-os/activity`) is retained for kubectl-native and +GitOps workflows. It shares the same LogQL → SQL translation layer. + +### 6. Access Control + +Milo IAM gates access at the project boundary via standard Kubernetes +RBAC on the project's telemetry endpoint. Because the URL is the project +control-plane endpoint, the same RBAC that protects the rest of the +project's resources protects log queries — no separate access model. + +Consumer vs. producer separation is what gives service teams visibility +across all consumers of their service: a Datum networking SRE needs IAM +only on the networking service's producer project to see access logs for +every customer's `HTTPProxy`. No cross-tenant grant is required. + +## Cross-Cutting Concerns + +### Retention + +Fixed defaults; not user-controllable in v1. + +| Category Group | Retention | +|---|---| +| `allLogs` | 7 days | + +Implemented via the table TTL on the `timestamp` column. Per-project or +per-category retention overrides are a follow-on enhancement. + +### Default Enablement + +`allLogs` collection is opt-in per project via a `LogCollectionPolicy` +resource. Customers don't get surprise bills from log volume tracking +workload activity they didn't request. + +For v1 (AI Edge only): proxy access logs default off, WAF events default +on (the volume is bounded by request rate × match rate, not full request +rate). + +### Live Tail + +The Loki `/tail` endpoint is served by a small handler that subscribes to +the NATS subject the ingestion pipeline already writes to, filters by +`tenant_id` and the stream selector from the request, and streams +matching records over the WebSocket. This avoids polling ClickHouse and +keeps tail latency in the low hundreds of milliseconds. + +### Request Correlation + +A single HTTP request through AI Edge produces one access log entry +(`httpproxy-access`) and zero-or-more WAF entries (`httpproxy-waf`, one +per matched rule). All of them carry the same `http.request.id` +(Envoy's `x-request-id`, which already propagates through the filter +chain to the WAF sidecar). That's the join key. + +The model favours denormalisation on the access log for the common case: + +- `waf.outcome` (`allowed` / `blocked` / `challenged`) and + `waf.matched_rules` (count) are stamped directly on the access log, + so the high-frequency "show me blocked requests" query is a single + stream filter, not a join — same shape as GCP Cloud Armor's + `enforcedSecurityPolicy.outcome` on LB access logs. +- The per-rule `httpproxy-waf` entries carry the rule id, action, and + severity, joined back to the access log by `http.request.id` when the + customer needs to drill in to "which rules fired on this request." + +This supports a per-request lifecycle view (one row per request, +expandable to show every WAF rule that fired) without forcing every +query through a join. The lifecycle view itself is built by a single +`http.request.id` filter across both streams: + +```logql +{log_id=~"networking.datumapis.com/httpproxy-(access|waf)"} + | json | http_request_id="phl94-1779186433904-397d1bd984ce" +``` + +`edge.pop.ingress` (where the request was received) and +`edge.pop.upstream` (where it was routed to, when different) live only +on the access log; WAF entries inherit them by joining on +`http.request.id`. They're emission context, not resource identity — +one `HTTPProxy` serves from many PoPs — so they're stamped at emission +by the data plane and aren't part of the `MonitoredResourceType` +vocabulary. The same reasoning is why the WAF entry schema is lean: +client IP, user agent, response status, PoP — anything that already +exists on the paired access entry is reached via the join rather than +duplicated on every matched-rule row. + +### Redaction + +- Platform-managed allowlist of attribute keys always dropped or hashed + at the gateway (`*.token`, `*.password`, `authorization`, ...). +- Customer-configurable `LogRedactionPolicy` resource — attribute-level + drop/hash rules only. +- Body content is **not** redacted in v1. Documented as a constraint; + services are pushed to put structured data in attributes. + +## Fan-Out Summary + +``` +ServiceConfiguration + spec.monitoredResourceTypes[] → billing.MonitoredResourceType (existing) + → telemetry.MonitoredResourceType (new) + spec.logs[] → telemetry.LogDefinition (new) +``` + +## v1 Delivery Slice + +In dependency order: + +1. CRDs: `MonitoredResourceType`, `LogDefinition`, + `LogCollectionPolicy`, `LogRedactionPolicy`, `LogQuery`. +2. Fan-out controllers in this operator for `MonitoredResourceType` and + `LogDefinition`. +3. NATS JetStream subject and ClickHouse `platform_logs` table. +4. OTel Collector gateway with tenancy-label validation + (`tenant.kind` / `tenant.name`), label-vocabulary validation, and + per-destination fan-out (`consumer` / `producer`) into NATS. +5. ClickHouse writer consumer draining NATS into `platform_logs`. +6. AI Edge data-plane integration: Envoy access log + WAF event OTLP + exporters that stamp tenancy and resource identity labels; + `ServiceConfiguration` for `networking-datumapis-com` with the two log + definitions. +7. Loki API handler at + `{project-control-plane-endpoint}/telemetry/loki/api/v1/...` backed by + a LogQL → SQL translator, plus the NATS-backed `/tail` handler. +8. Catalog-backed labels/series discovery. +9. Grafana datasource documentation. + +## Open Questions + +- Whether `LogCollectionPolicy` is project-scoped or finer-grained (per + `HTTPProxy`). Project-scoped is the simpler v1; finer granularity is a + future enhancement once we see usage patterns. +- Loki LogQL feature subset for v1: instant queries, range queries, + line filters, label filters are required; metric queries + (`rate`, `sum by`, ...) likely deferred to v2. +- How the catalog-backed label discovery handles tenant-specific label + values (e.g. the set of `resource.name` values that actually exist in + the project). Likely a hybrid: label names from catalog, values from + ClickHouse with a short cache. diff --git a/docs/diagrams/ingestion-pipeline.png b/docs/diagrams/ingestion-pipeline.png new file mode 100644 index 0000000..3e7f795 Binary files /dev/null and b/docs/diagrams/ingestion-pipeline.png differ diff --git a/docs/diagrams/ingestion-pipeline.puml b/docs/diagrams/ingestion-pipeline.puml new file mode 100644 index 0000000..d7724f8 --- /dev/null +++ b/docs/diagrams/ingestion-pipeline.puml @@ -0,0 +1,36 @@ +@startuml ingestion-pipeline +!include https://raw.githubusercontent.com/plantuml-stdlib/C4-PlantUML/master/C4_Container.puml + +LAYOUT_LANDSCAPE() +LAYOUT_WITH_LEGEND() + +title Customer-Facing Logs — Ingestion Pipeline (AI Edge v1) + +' External producers +System_Ext(ai_edge, "AI Edge Data Plane", "Envoy + WAF sidecar handling customer HTTP traffic. Emits one OTLP log record per request (access) and per WAF rule match (waf). Stamps tenant.kind / tenant.name and resource identity labels on every record.") + +' Catalog and per-tenant policies — read by the gateway via cached informers +System_Ext(catalog, "Telemetry Catalog", "MonitoredResourceType and LogDefinition CRDs published by the telemetry-services-operator. Defines the legal label vocabulary per resource type and the consumer / producer destinations per log.") +System_Ext(policies, "Per-Tenant Policies", "LogCollectionPolicy and LogRedactionPolicy resources.") + +' Ingestion system boundary +System_Boundary(ingest, "Ingestion Pipeline") { + Container(gateway, "OTel Collector Gateway", "OpenTelemetry Collector, regional Deployment", "Validates tenant.kind / tenant.name. Resolves tenant_id via the project catalog. Validates resource attributes against catalog vocabulary. Fans out one record per declared destination (consumer / producer), stamping consumer_name on producer records. Drops/hashes per redaction allowlist.") + ContainerQueue(nats, "NATS JetStream", "Durable subject", "Buffers records between the gateway and ClickHouse. Provides backpressure if ClickHouse is down and feeds the live-tail handler.") + Container(writer, "ClickHouse Writer", "Go consumer", "Drains NATS into platform_logs in batches.") + ContainerDb(clickhouse, "ClickHouse", "platform_logs table", "MergeTree, partitioned by (tenant_id, month). Sorted by (tenant_id, resource_type, resource_name, log_id, timestamp). Table TTL on timestamp.") +} + +' Producer flow (ingress) +Rel_D(ai_edge, gateway, "Emit log records (OTLP/gRPC) with tenancy + resource labels.") + +' Gateway lookups +Rel_R(gateway, catalog, "Validate label vocabulary; resolve destinations", "informer cache") +Rel_R(gateway, policies, "Apply collection / redaction", "informer cache") + +' Successful write path +Rel_D(gateway, nats, "Publish per-destination records", "NATS") +Rel_D(nats, writer, "Consume", "NATS") +Rel_D(writer, clickhouse, "Batch insert", "TCP/9000") + +@enduml