From 2d99343037c862b0b0b1e935701a0bc10b8150f7 Mon Sep 17 00:00:00 2001 From: Noam Ben Simon Date: Thu, 14 May 2026 21:49:32 -0400 Subject: [PATCH 1/3] postgres surgically removed ;) --- .env.example | 13 -- docker/README.md | 199 +++++----------- docker/dsv/docker-compose.dsv-postgresql.yml | 51 ----- ...docker-compose.dsv-redis-kafka-3nodes.yml} | 45 +--- ...yml => docker-compose.dsv-redis-kafka.yml} | 35 +-- .../docker-compose.postgresql-production.yml | 100 -------- .../postgresql/docker-compose.postgresql.yml | 23 -- docker/postgresql/postgresql.conf | 137 ----------- docker/postgresql/scripts/init-primary.sh | 11 - .../postgresql/scripts/replica-entrypoint.sh | 25 -- docs/architecture.md | 6 +- docs/docker.md | 199 ++++------------ docs/kubernetes.md | 214 +++++------------- docs/technologies.md | 119 ++-------- k8s/README.md | 152 ++++++------- k8s/production/app-statefulset.yaml | 15 -- k8s/production/postgres-configmap.yaml | 27 --- k8s/production/postgres.yaml | 81 ------- k8s/testing/app-service.yaml | 3 +- k8s/testing/app-statefulset.yaml | 20 +- k8s/testing/postgres-configmap.yaml | 27 --- k8s/testing/postgres.yaml | 64 ------ pom.xml | 16 +- scripts/test-three-dsv-kafka-nodes.sh | 4 +- .../config/StorageConfig.java | 4 - .../service/secret/SecretService.java | 3 +- src/main/resources/application-dev.properties | 21 -- .../application-single-redis.properties | 4 +- src/main/resources/application.properties | 59 +---- .../resources/application-test.properties | 48 +--- src/test/resources/application.properties | 48 +--- 31 files changed, 273 insertions(+), 1500 deletions(-) delete mode 100644 docker/dsv/docker-compose.dsv-postgresql.yml rename docker/dsv/{docker-compose.dsv-redis-postgresql-3nodes.yml => docker-compose.dsv-redis-kafka-3nodes.yml} (67%) rename docker/dsv/{docker-compose.dsv-redis-postgresql.yml => docker-compose.dsv-redis-kafka.yml} (65%) delete mode 100644 docker/postgresql/docker-compose.postgresql-production.yml delete mode 100644 docker/postgresql/docker-compose.postgresql.yml delete mode 100644 docker/postgresql/postgresql.conf delete mode 100644 docker/postgresql/scripts/init-primary.sh delete mode 100644 docker/postgresql/scripts/replica-entrypoint.sh delete mode 100644 k8s/production/postgres-configmap.yaml delete mode 100644 k8s/production/postgres.yaml delete mode 100644 k8s/testing/postgres-configmap.yaml delete mode 100644 k8s/testing/postgres.yaml diff --git a/.env.example b/.env.example index b714a3c..89b4094 100644 --- a/.env.example +++ b/.env.example @@ -57,19 +57,6 @@ SPRING_PROFILES_ACTIVE=dev # Spring framework logging # LOGGING_LEVEL_ORG_SPRINGFRAMEWORK=INFO -# ---------------------------------------------------- -# PostgreSQL Configuration (user accounts) -# ---------------------------------------------------- -# Used by Docker Compose and by the app when datasource is configured. -# Set POSTGRES_PASSWORD in .env; other values are optional (defaults shown). -POSTGRES_USER=dsv -POSTGRES_PASSWORD=POSTGRES_PASSWORD -POSTGRES_DB=dsv - -# Production only: replication user for PostgreSQL primary/standby (docker-compose.postgresql-production.yml) -# POSTGRES_REPLICATION_USER=replicator -# POSTGRES_REPLICATION_PASSWORD= - # ---------------------------------------------------- # Security Configuration (Future Use) # ---------------------------------------------------- diff --git a/docker/README.md b/docker/README.md index c44c3cb..5fb1c8a 100644 --- a/docker/README.md +++ b/docker/README.md @@ -1,190 +1,111 @@ # Docker Configuration -This directory contains all Docker-related configuration for the Distributed Secrets Vault. +This directory contains Docker configuration for the Distributed Secrets Vault. ## Structure -``` +```text docker/ ├── dsv/ │ ├── docker-compose.dsv.yml # App only │ ├── docker-compose.dsv-redis.yml # App + Redis -│ ├── docker-compose.dsv-postgresql.yml # App + PostgreSQL -│ ├── docker-compose.dsv-redis-postgresql.yml # App + Redis + PostgreSQL + Kafka (single app) -│ └── docker-compose.dsv-redis-postgresql-3nodes.yml # Same stack, three DSV app instances (Kafka fanout test) +│ ├── docker-compose.dsv-redis-kafka.yml # App + Redis + Kafka +│ └── docker-compose.dsv-redis-kafka-3nodes.yml # Three DSV app instances ├── redis/ │ ├── docker-compose.redis.yml # Redis only │ └── redis.conf # Redis persistence and security config ├── kafka/ -│ └── docker-compose.kafka.yml # Kafka only (KRaft mode) -├── postgresql/ -│ ├── docker-compose.postgresql.yml # PostgreSQL only (development, single node) -│ ├── docker-compose.postgresql-production.yml # Production: primary + 2 standbys -│ ├── postgresql.conf # Production replication config (used by production compose) -│ └── scripts/ -│ ├── init-primary.sh # Creates replication user (production primary) -│ └── replica-entrypoint.sh # Bootstrap standbys from primary (production) -└── README.md # This file +│ └── docker-compose.kafka.yml # Kafka only, KRaft mode +└── README.md +``` Project root: -├── .env.example # Environment variable template -├── .env # Your local config (gitignored) -└── scripts/test-three-dsv-kafka-nodes.sh # Builds, starts 3-node stack, curls temp Kafka endpoint, checks logs + +```text +.env.example # Environment variable template +.env # Local config, gitignored +scripts/test-three-dsv-kafka-nodes.sh # Builds and verifies the 3-node Kafka fanout stack ``` ## Setup -1. **Create environment file in project root:** - - ```bash - cp .env.example .env - ``` - -2. **Set required values in `.env`** (recommended; otherwise dev compose uses defaults): - - ```env - REDIS_PASSWORD=your-secure-password-here - POSTGRES_PASSWORD=your-postgres-password-here - ``` - - If you skip `.env`, the **development** compose files use the same defaults as `.env.example` (`REDIS_PASSWORD`, `POSTGRES_PASSWORD`). Set stronger values in `.env` for any real use. Optionally set `POSTGRES_USER` and `POSTGRES_DB` (defaults: `dsv`). - -3. **Build and start from project root:** - - ```bash - ./mvnw clean package - mkdir -p target/dependency && (cd target/dependency; jar -xf ../*.jar) - docker compose -f docker/dsv/docker-compose.dsv-redis-postgresql.yml up --build - ``` - - When you run `docker compose` from the project root (as in the commands below), Compose loads `.env` from the project root. +Create a local environment file from the project root: - **Other compose files:** - - App only: `docker compose -f docker/dsv/docker-compose.dsv.yml up --build` - - App + Redis: `docker compose -f docker/dsv/docker-compose.dsv-redis.yml up --build` - - App + PostgreSQL: `docker compose -f docker/dsv/docker-compose.dsv-postgresql.yml up --build` +```bash +cp .env.example .env +``` - **Three DSV instances (Kafka commit fanout):** do not run this at the same time as the single-app stack on the same machine (shared container names `dsv-redis`, `dsv-postgres`, `dsv-kafka`, and host ports). Stop the other stack first (`docker compose … down`). +Set a Redis password for any real local use: - ```bash - ./mvnw clean package -DskipTests - mkdir -p target/dependency && (cd target/dependency && jar -xf ../*.jar) - docker compose -f docker/dsv/docker-compose.dsv-redis-postgresql-3nodes.yml up -d --build - ``` +```env +REDIS_PASSWORD=your-secure-password-here +SPRING_PROFILES_ACTIVE=dev +``` - Apps listen on **8081**, **8082**, and **8083** (mapped to container port 8080). Each instance sets a different `NODE_NAME` so Kafka consumer groups differ and every node receives `secrets-commit` messages. +Build and start the Redis + Kafka stack: - Automated check (build, start, publish once from app1, assert all three containers logged the commit): +```bash +./mvnw clean package -DskipTests +mkdir -p target/dependency && (cd target/dependency; jar -xf ../*.jar) +docker compose -f docker/dsv/docker-compose.dsv-redis-kafka.yml up --build +``` - ```bash - ./scripts/test-three-dsv-kafka-nodes.sh - ``` +The API listens on `http://localhost:8080`. - Manual check after the stack is healthy (prefer **`127.0.0.1`** on WSL2 / Docker Desktop if `localhost` gives `Connection reset by peer`): +## Three App Nodes - ```bash - curl -sS http://127.0.0.1:8081/api/temp-test/kafka - docker logs dsv-app-1 2>&1 | grep -i "Received commit" | tail -3 - docker logs dsv-app-2 2>&1 | grep -i "Received commit" | tail -3 - docker logs dsv-app-3 2>&1 | grep -i "Received commit" | tail -3 - ``` +For local cluster-like testing, run three DSV app instances against the same Redis and Kafka services: -## Environment Variables +```bash +./mvnw clean package -DskipTests +mkdir -p target/dependency && (cd target/dependency && jar -xf ../*.jar) +docker compose -f docker/dsv/docker-compose.dsv-redis-kafka-3nodes.yml up -d --build +``` -| Variable | Description | Default | -| ------------------------------- | --------------------------------- | --------------------------------------------------------- | -| `REDIS_PASSWORD` | Redis authentication password | Placeholder in `.env.example`; set in `.env` for real use | -| `POSTGRES_USER` | PostgreSQL user | `dsv` | -| `POSTGRES_PASSWORD` | PostgreSQL password (required) | Placeholder in `.env.example`; set in `.env` for real use | -| `POSTGRES_DB` | PostgreSQL database name | `dsv` | -| `POSTGRES_REPLICATION_USER` | Replication user (production) | `replicator` | -| `POSTGRES_REPLICATION_PASSWORD` | Replication password (production) | — | -| `SPRING_PROFILES_ACTIVE` | Spring Boot profile | `dev` | +Apps listen on `8081`, `8082`, and `8083`. Each instance sets a different `NODE_NAME` so Kafka consumer groups differ and every node receives `secrets-commit` messages. -## Redis Configuration +Automated check: -Redis is configured for durable secret storage with: +```bash +./scripts/test-three-dsv-kafka-nodes.sh +``` -- **AOF persistence**: `appendfsync everysec` (max 1 second data loss) -- **RDB snapshots**: Every 15 minutes if keys changed -- **No eviction**: Secrets are never auto-deleted -- **Password auth**: Required for all connections +Manual check: -See `redis/redis.conf` for full configuration. +```bash +curl -sS http://127.0.0.1:8081/api/temp-test/kafka +docker logs dsv-app-1 2>&1 | grep -i "Received commit" | tail -3 +docker logs dsv-app-2 2>&1 | grep -i "Received commit" | tail -3 +docker logs dsv-app-3 2>&1 | grep -i "Received commit" | tail -3 +``` ## Services -### `redis` - -- **Image**: redis:8.6-alpine -- **Ports**: 6379 -- **Volumes**: Persistent data in `redis-data` volume - -### `postgres` +`redis` stores secret shards durably with AOF persistence and password auth. -- **Image**: postgres:18.2-alpine -- **Ports**: 5432 -- **Volumes**: Persistent data in `postgres-data` volume -- **Healthcheck**: `pg_isready` before app starts -- **Purpose**: User accounts; development uses a single node. For production redundancy, use the production compose (see below). +`kafka` provides commit fanout and ordering infrastructure in KRaft mode. -### `kafka` - -- **Image**: apache/kafka:3.7.0 -- **Ports**: 9092 -- **Volumes**: Persistent data in `kafka-data` volume -- **Purpose**: Message broker for request sequencing. Uses KRaft (ZooKeeper-less) mode. - -### `app` - -- **Build**: From project root (build context `../..`); uses the Dockerfile in the project root. -- **Ports**: 8080 -- **Depends on**: Redis and/or PostgreSQL (waits for health checks when present) +`app` is the Spring Boot DSV service built from the repository root Dockerfile. ## Commands -All commands assume you are in the **project root**. - ```bash -# Full dev stack (app + Redis + PostgreSQL) -./mvnw clean package -mkdir -p target/dependency && (cd target/dependency; jar -xf ../*.jar) -docker compose -f docker/dsv/docker-compose.dsv-redis-postgresql.yml up --build +# Start full local stack +docker compose -f docker/dsv/docker-compose.dsv-redis-kafka.yml up # Start in background -docker compose -f docker/dsv/docker-compose.dsv-redis-postgresql.yml up -d +docker compose -f docker/dsv/docker-compose.dsv-redis-kafka.yml up -d # View logs -docker compose -f docker/dsv/docker-compose.dsv-redis-postgresql.yml logs -f app -docker compose -f docker/dsv/docker-compose.dsv-redis-postgresql.yml logs -f redis -docker compose -f docker/dsv/docker-compose.dsv-redis-postgresql.yml logs -f postgres +docker compose -f docker/dsv/docker-compose.dsv-redis-kafka.yml logs -f app +docker compose -f docker/dsv/docker-compose.dsv-redis-kafka.yml logs -f redis +docker compose -f docker/dsv/docker-compose.dsv-redis-kafka.yml logs -f kafka # Stop services -docker compose -f docker/dsv/docker-compose.dsv-redis-postgresql.yml down +docker compose -f docker/dsv/docker-compose.dsv-redis-kafka.yml down -# Clean slate (removes volumes) -docker compose -f docker/dsv/docker-compose.dsv-redis-postgresql.yml down -v - -# Rebuild after code changes -./mvnw clean package && mkdir -p target/dependency && (cd target/dependency; jar -xf ../*.jar) -docker compose -f docker/dsv/docker-compose.dsv-redis-postgresql.yml up --build +# Clean slate, including volumes +docker compose -f docker/dsv/docker-compose.dsv-redis-kafka.yml down -v ``` -## Production PostgreSQL (multi-node) - -For production, run one primary and two synchronous standbys for redundancy: - -```bash -# From project root. Set in .env: POSTGRES_PASSWORD, POSTGRES_REPLICATION_USER (default: replicator), POSTGRES_REPLICATION_PASSWORD -docker compose -f docker/postgresql/docker-compose.postgresql-production.yml up -d -``` - -- **postgres-primary**: Read-write; uses `postgresql/postgresql.conf` (WAL archiving, synchronous replication). Port 5432. -- **postgres-1**, **postgres-2**: Read-only standbys; stream from primary. Application names match `synchronous_standby_names` in `postgresql.conf` so commits wait for at least one standby. -- Scripts: `scripts/init-primary.sh` creates the replication user on first start; `scripts/replica-entrypoint.sh` bootstraps each standby with `pg_basebackup` then starts streaming. - -Applications should connect to the primary (hostname `postgres-primary`) for read-write; standbys can be used for read scaling if desired. - -## Network - -All services communicate on the `dsv-network` bridge network. The app connects to Redis using the hostname `redis` and to PostgreSQL using the hostname `postgres` (dev) or `postgres-primary` (production). +All services communicate on the `dsv-network` bridge network. The app connects to Redis as `redis` and Kafka as `kafka:29092`. diff --git a/docker/dsv/docker-compose.dsv-postgresql.yml b/docker/dsv/docker-compose.dsv-postgresql.yml deleted file mode 100644 index 829d583..0000000 --- a/docker/dsv/docker-compose.dsv-postgresql.yml +++ /dev/null @@ -1,51 +0,0 @@ -# Run the Distributed Secrets Vault application with PostgreSQL -name: "Distributed Secrets Vault + PostgreSQL" - -# Environment variables are loaded from ../../.env (project root) -# Copy .env.example to .env and configure as needed - -services: - postgres: - image: postgres:18.2-alpine - container_name: dsv-postgres - ports: - - "5432:5432" - environment: - - LANG=C.UTF-8 - - POSTGRES_USER=${POSTGRES_USER:-dsv} - - POSTGRES_PASSWORD=${POSTGRES_PASSWORD:-POSTGRES_PASSWORD} - - POSTGRES_DB=${POSTGRES_DB:-dsv} - volumes: - - postgres-data:/var/lib/postgresql/data - healthcheck: - test: ["CMD-SHELL", "pg_isready -U \"$$POSTGRES_USER\" -d \"$$POSTGRES_DB\""] - interval: 5s - timeout: 3s - retries: 5 - networks: - - dsv-network - - app: - build: ../.. - container_name: dsv-app - env_file: - - ../../.env - ports: - - "8080:8080" - environment: - - SPRING_PROFILES_ACTIVE=${SPRING_PROFILES_ACTIVE:-dev} - - SPRING_DATASOURCE_URL=jdbc:postgresql://postgres:5432/${POSTGRES_DB:-dsv} - - SPRING_DATASOURCE_USERNAME=${POSTGRES_USER:-dsv} - - SPRING_DATASOURCE_PASSWORD=${POSTGRES_PASSWORD:-POSTGRES_PASSWORD} - depends_on: - postgres: - condition: service_healthy - networks: - - dsv-network - -volumes: - postgres-data: - -networks: - dsv-network: - driver: bridge diff --git a/docker/dsv/docker-compose.dsv-redis-postgresql-3nodes.yml b/docker/dsv/docker-compose.dsv-redis-kafka-3nodes.yml similarity index 67% rename from docker/dsv/docker-compose.dsv-redis-postgresql-3nodes.yml rename to docker/dsv/docker-compose.dsv-redis-kafka-3nodes.yml index 57893b1..48516cf 100644 --- a/docker/dsv/docker-compose.dsv-redis-postgresql-3nodes.yml +++ b/docker/dsv/docker-compose.dsv-redis-kafka-3nodes.yml @@ -1,11 +1,6 @@ -# Same stack as docker-compose.dsv-redis-postgresql.yml but three DSV app instances. -# Use this file OR the single-app compose file, not both (same project name and infra container names). -# -# Each app has a distinct NODE_NAME so Kafka consumer groups differ — every instance receives -# the full secrets-commit topic (see application.properties spring.kafka.consumer.group-id). -# +# Same stack as docker-compose.dsv-redis-kafka.yml but with three DSV app instances. # HTTP: localhost:8081 (app1), :8082 (app2), :8083 (app3) -name: "Distributed Secrets Vault + Redis + PostgreSQL" +name: "Distributed Secrets Vault + Redis + Kafka" services: redis: @@ -25,26 +20,6 @@ services: networks: - dsv-network - postgres: - image: postgres:18.2-alpine - container_name: dsv-postgres - ports: - - "5432:5432" - environment: - - LANG=C.UTF-8 - - POSTGRES_USER=${POSTGRES_USER:-dsv} - - POSTGRES_PASSWORD=${POSTGRES_PASSWORD:-POSTGRES_PASSWORD} - - POSTGRES_DB=${POSTGRES_DB:-dsv} - volumes: - - postgres-data:/var/lib/postgresql/data - healthcheck: - test: ["CMD-SHELL", "pg_isready -U \"$$POSTGRES_USER\" -d \"$$POSTGRES_DB\""] - interval: 5s - timeout: 3s - retries: 5 - networks: - - dsv-network - kafka: image: apache/kafka:3.7.0 container_name: dsv-kafka @@ -92,16 +67,11 @@ services: - SPRING_DATA_REDIS_PORT=6379 - SPRING_DATA_REDIS_PASSWORD=${REDIS_PASSWORD:-REDIS_PASSWORD} - SPRING_PROFILES_ACTIVE=${SPRING_PROFILES_ACTIVE:-dev} - - SPRING_DATASOURCE_URL=jdbc:postgresql://postgres:5432/${POSTGRES_DB:-dsv} - - SPRING_DATASOURCE_USERNAME=${POSTGRES_USER:-dsv} - - SPRING_DATASOURCE_PASSWORD=${POSTGRES_PASSWORD:-POSTGRES_PASSWORD} - KAFKA_BOOTSTRAP_SERVERS=kafka:29092 - SEED_DNS_HOST=localhost depends_on: redis: condition: service_healthy - postgres: - condition: service_healthy kafka: condition: service_healthy networks: @@ -120,16 +90,11 @@ services: - SPRING_DATA_REDIS_PORT=6379 - SPRING_DATA_REDIS_PASSWORD=${REDIS_PASSWORD:-REDIS_PASSWORD} - SPRING_PROFILES_ACTIVE=${SPRING_PROFILES_ACTIVE:-dev} - - SPRING_DATASOURCE_URL=jdbc:postgresql://postgres:5432/${POSTGRES_DB:-dsv} - - SPRING_DATASOURCE_USERNAME=${POSTGRES_USER:-dsv} - - SPRING_DATASOURCE_PASSWORD=${POSTGRES_PASSWORD:-POSTGRES_PASSWORD} - KAFKA_BOOTSTRAP_SERVERS=kafka:29092 - SEED_DNS_HOST=localhost depends_on: redis: condition: service_healthy - postgres: - condition: service_healthy kafka: condition: service_healthy networks: @@ -148,16 +113,11 @@ services: - SPRING_DATA_REDIS_PORT=6379 - SPRING_DATA_REDIS_PASSWORD=${REDIS_PASSWORD:-REDIS_PASSWORD} - SPRING_PROFILES_ACTIVE=${SPRING_PROFILES_ACTIVE:-dev} - - SPRING_DATASOURCE_URL=jdbc:postgresql://postgres:5432/${POSTGRES_DB:-dsv} - - SPRING_DATASOURCE_USERNAME=${POSTGRES_USER:-dsv} - - SPRING_DATASOURCE_PASSWORD=${POSTGRES_PASSWORD:-POSTGRES_PASSWORD} - KAFKA_BOOTSTRAP_SERVERS=kafka:29092 - SEED_DNS_HOST=localhost depends_on: redis: condition: service_healthy - postgres: - condition: service_healthy kafka: condition: service_healthy networks: @@ -165,7 +125,6 @@ services: volumes: redis-data: - postgres-data: kafka-data: networks: diff --git a/docker/dsv/docker-compose.dsv-redis-postgresql.yml b/docker/dsv/docker-compose.dsv-redis-kafka.yml similarity index 65% rename from docker/dsv/docker-compose.dsv-redis-postgresql.yml rename to docker/dsv/docker-compose.dsv-redis-kafka.yml index fa94c85..1cfa894 100644 --- a/docker/dsv/docker-compose.dsv-redis-postgresql.yml +++ b/docker/dsv/docker-compose.dsv-redis-kafka.yml @@ -1,8 +1,5 @@ -# Run the Distributed Secrets Vault application with Redis and PostgreSQL -name: "Distributed Secrets Vault + Redis + PostgreSQL" - -# Environment variables are loaded from ../../.env (project root) -# Copy .env.example to .env and configure as needed +# Run the Distributed Secrets Vault application with Redis and Kafka +name: "Distributed Secrets Vault + Redis + Kafka" services: redis: @@ -22,26 +19,6 @@ services: networks: - dsv-network - postgres: - image: postgres:18.2-alpine - container_name: dsv-postgres - ports: - - "5432:5432" - environment: - - LANG=C.UTF-8 - - POSTGRES_USER=${POSTGRES_USER:-dsv} - - POSTGRES_PASSWORD=${POSTGRES_PASSWORD:-POSTGRES_PASSWORD} - - POSTGRES_DB=${POSTGRES_DB:-dsv} - volumes: - - postgres-data:/var/lib/postgresql/data - healthcheck: - test: ["CMD-SHELL", "pg_isready -U \"$$POSTGRES_USER\" -d \"$$POSTGRES_DB\""] - interval: 5s - timeout: 3s - retries: 5 - networks: - - dsv-network - kafka: image: apache/kafka:3.7.0 container_name: dsv-kafka @@ -50,7 +27,6 @@ services: environment: - KAFKA_NODE_ID=1 - KAFKA_PROCESS_ROLES=broker,controller - # INTERNAL for containers (app); HOST for JVM on machine (metadata must match bootstrap) - KAFKA_LISTENERS=PLAINTEXT_HOST://0.0.0.0:9092,PLAINTEXT_INTERNAL://0.0.0.0:29092,CONTROLLER://0.0.0.0:9093 - KAFKA_ADVERTISED_LISTENERS=PLAINTEXT_HOST://localhost:9092,PLAINTEXT_INTERNAL://kafka:29092 - KAFKA_CONTROLLER_LISTENER_NAMES=CONTROLLER @@ -89,17 +65,11 @@ services: - SPRING_DATA_REDIS_PORT=6379 - SPRING_DATA_REDIS_PASSWORD=${REDIS_PASSWORD:-REDIS_PASSWORD} - SPRING_PROFILES_ACTIVE=${SPRING_PROFILES_ACTIVE:-dev} - - SPRING_DATASOURCE_URL=jdbc:postgresql://postgres:5432/${POSTGRES_DB:-dsv} - - SPRING_DATASOURCE_USERNAME=${POSTGRES_USER:-dsv} - - SPRING_DATASOURCE_PASSWORD=${POSTGRES_PASSWORD:-POSTGRES_PASSWORD} - KAFKA_BOOTSTRAP_SERVERS=kafka:29092 - # Single-node ScaleCube: seed resolves to this container (see ScaleCubeConfig) - SEED_DNS_HOST=localhost depends_on: redis: condition: service_healthy - postgres: - condition: service_healthy kafka: condition: service_healthy networks: @@ -107,7 +77,6 @@ services: volumes: redis-data: - postgres-data: kafka-data: networks: diff --git a/docker/postgresql/docker-compose.postgresql-production.yml b/docker/postgresql/docker-compose.postgresql-production.yml deleted file mode 100644 index 21a1b12..0000000 --- a/docker/postgresql/docker-compose.postgresql-production.yml +++ /dev/null @@ -1,100 +0,0 @@ -# Production PostgreSQL: one primary + two synchronous standbys for redundancy. -# Uses docker/postgresql/postgresql.conf (replication, archive, logging). -# Standbys use application_name postgres-1 and postgres-2 to match synchronous_standby_names. -# -# Environment variables are loaded from ../../.env (project root). -# Required: POSTGRES_PASSWORD, POSTGRES_REPLICATION_USER, POSTGRES_REPLICATION_PASSWORD. -name: "PostgreSQL (Production)" - -services: - postgres-primary: - image: postgres:18.2-alpine - container_name: dsv-postgres-primary - hostname: postgres-primary - ports: - - "5432:5432" - environment: - - POSTGRES_USER=${POSTGRES_USER:-dsv} - - POSTGRES_PASSWORD=${POSTGRES_PASSWORD} - - POSTGRES_DB=${POSTGRES_DB:-dsv} - - POSTGRES_REPLICATION_USER=${POSTGRES_REPLICATION_USER:-replicator} - - POSTGRES_REPLICATION_PASSWORD=${POSTGRES_REPLICATION_PASSWORD} - volumes: - - postgres-primary-data:/var/lib/postgresql/data - - postgres-archive:/var/lib/postgresql/archive - - ./postgresql.conf:/etc/postgresql/postgresql.conf:ro - - ./scripts/init-primary.sh:/docker-entrypoint-initdb.d/01-create-replication-user.sh:ro - command: ["postgres", "-c", "config_file=/etc/postgresql/postgresql.conf"] - healthcheck: - test: ["CMD-SHELL", "pg_isready -U \"$$POSTGRES_USER\" -d \"$$POSTGRES_DB\""] - interval: 5s - timeout: 3s - retries: 5 - networks: - - dsv-network - - postgres-1: - image: postgres:18.2-alpine - container_name: dsv-postgres-1 - hostname: postgres-1 - environment: - - PGDATA=/var/lib/postgresql/data - - POSTGRES_USER=${POSTGRES_USER:-dsv} - - POSTGRES_DB=${POSTGRES_DB:-dsv} - - POSTGRES_PRIMARY_HOST=postgres-primary - - POSTGRES_REPLICATION_USER=${POSTGRES_REPLICATION_USER:-replicator} - - POSTGRES_REPLICATION_PASSWORD=${POSTGRES_REPLICATION_PASSWORD} - - REPLICA_NAME=postgres-1 - volumes: - - postgres-1-data:/var/lib/postgresql/data - - ./scripts/replica-entrypoint.sh:/scripts/replica-entrypoint.sh:ro - command: ["bash", "/scripts/replica-entrypoint.sh"] - depends_on: - postgres-primary: - condition: service_healthy - healthcheck: - test: ["CMD-SHELL", "pg_isready -U \"$$POSTGRES_USER\" -d \"$$POSTGRES_DB\""] - interval: 10s - timeout: 5s - retries: 5 - start_period: 60s - networks: - - dsv-network - - postgres-2: - image: postgres:18.2-alpine - container_name: dsv-postgres-2 - hostname: postgres-2 - environment: - - PGDATA=/var/lib/postgresql/data - - POSTGRES_USER=${POSTGRES_USER:-dsv} - - POSTGRES_DB=${POSTGRES_DB:-dsv} - - POSTGRES_PRIMARY_HOST=postgres-primary - - POSTGRES_REPLICATION_USER=${POSTGRES_REPLICATION_USER:-replicator} - - POSTGRES_REPLICATION_PASSWORD=${POSTGRES_REPLICATION_PASSWORD} - - REPLICA_NAME=postgres-2 - volumes: - - postgres-2-data:/var/lib/postgresql/data - - ./scripts/replica-entrypoint.sh:/scripts/replica-entrypoint.sh:ro - command: ["bash", "/scripts/replica-entrypoint.sh"] - depends_on: - postgres-primary: - condition: service_healthy - healthcheck: - test: ["CMD-SHELL", "pg_isready -U \"$$POSTGRES_USER\" -d \"$$POSTGRES_DB\""] - interval: 10s - timeout: 5s - retries: 5 - start_period: 60s - networks: - - dsv-network - -volumes: - postgres-primary-data: - postgres-archive: - postgres-1-data: - postgres-2-data: - -networks: - dsv-network: - driver: bridge diff --git a/docker/postgresql/docker-compose.postgresql.yml b/docker/postgresql/docker-compose.postgresql.yml deleted file mode 100644 index ff61d5c..0000000 --- a/docker/postgresql/docker-compose.postgresql.yml +++ /dev/null @@ -1,23 +0,0 @@ -# Run PostgreSQL with the DSV configuration -# Development: single node. Production multi-node will use a separate compose and docker/postgresql/postgresql.conf. -name: "PostgreSQL" - -# Environment variables are loaded from ../../.env (project root) -# Copy .env.example to .env and configure as needed - -services: - postgres: - image: postgres:18.2-alpine - container_name: dsv-postgres - ports: - - "5432:5432" - environment: - - LANG=C.UTF-8 - - POSTGRES_USER=${POSTGRES_USER:-dsv} - - POSTGRES_PASSWORD=${POSTGRES_PASSWORD:-POSTGRES_PASSWORD} - - POSTGRES_DB=${POSTGRES_DB:-dsv} - volumes: - - postgres-data:/var/lib/postgresql/data - -volumes: - postgres-data: diff --git a/docker/postgresql/postgresql.conf b/docker/postgresql/postgresql.conf deleted file mode 100644 index 4950492..0000000 --- a/docker/postgresql/postgresql.conf +++ /dev/null @@ -1,137 +0,0 @@ -# ============================================================================ -# PostgreSQL Configuration for Distributed Secrets Vault -# ============================================================================ -# This PostgreSQL instance serves as one of the user management nodes. - - -# ============================================================================ -# NETWORK CONFIGURATION -# ============================================================================ - -# Listen on all network interfaces (required for Docker containers) -listen_addresses = '*' - -# Maximum number of concurrent client connections -max_connections = 200 - - -# ============================================================================ -# REPLICATION & HIGH AVAILABILITY -# ============================================================================ - -# WAL level must be 'replica' or higher for streaming replication -# This enables standby servers to receive and apply WAL changes -wal_level = replica - -# Maximum number of concurrent WAL sender processes -# This determines how many standby servers can connect simultaneously -max_wal_senders = 10 - -# Maximum number of replication slots to retain WAL files for standbys -# Prevents premature deletion of WAL files needed by disconnected standbys -max_replication_slots = 10 - -# Allows read-only queries on standby servers during recovery -hot_standby = on - -# Ensures transaction commits wait for WAL to be written to at least 1 standby -# Provides strong consistency guarantees at the cost of some latency -synchronous_commit = on - -# Requires acknowledgment from ANY 1 of the named standby servers before commit -# This provides automatic failover capability without data loss -synchronous_standby_names = 'ANY 1 (postgres-1, postgres-2)' - - -# ============================================================================ -# MEMORY CONFIGURATION -# ============================================================================ - -# Amount of memory used for caching data/indexes -# Recommended: 25% of total system RAM (adjusted for containerized deployment) -shared_buffers = 1GB - -# Estimate of memory available for disk caching by the OS -# Used by query planner to estimate costs; does not allocate memory -# Set to ~50-75% of total system RAM -effective_cache_size = 3GB - -# Memory used for internal sort operations and hash tables per query -# Increase if you frequently sort large datasets or perform complex joins -work_mem = 16MB - -# Memory used for maintenance operations (VACUUM, CREATE INDEX, ALTER TABLE) -maintenance_work_mem = 256MB - - -# ============================================================================ -# WRITE-AHEAD LOG (WAL) CONFIGURATION -# ============================================================================ - -# Enables continuous archiving and point-in-time recovery (PITR) -# WAL files are copied to archive location for backup purposes -archive_mode = on - -# Command to execute to archive a WAL file segment -# %p = path of file to archive, %f = filename only -# Replace with actual backup destination (S3, NFS, etc.) -archive_command = 'test ! -f /var/lib/postgresql/archive/%f && cp %p /var/lib/postgresql/archive/%f' - -# Target fraction (0.0-1.0) of checkpoint interval to complete checkpoint -# Higher values spread out checkpoint I/O load but may increase recovery time -checkpoint_completion_target = 0.9 - - -# ============================================================================ -# LOGGING CONFIGURATION -# ============================================================================ - -# Enables the logging collector background process -# Required for log rotation and centralized log management -logging_collector = on - -# Log all data modification statements (INSERT, UPDATE, DELETE, TRUNCATE, etc.) -# Critical for auditing user data changes in a secrets management system -log_statement = 'mod' - -# Log each successful connection attempt (important for security auditing) -log_connections = on - -# Log session termination (track connection lifecycle for monitoring) -log_disconnections = on - -# Log duration of each completed statement -# Useful for identifying performance issues -log_duration = on - -# Only log statements that take longer than this many milliseconds -# Set to 1000ms (1 second) to capture slow queries without excessive logging -log_min_duration_statement = 1000 - -# Include line numbers in log messages for easier debugging -log_line_prefix = '%t [%p]: [%l-1] user=%u,db=%d,app=%a,client=%h ' - - -# ============================================================================ -# SECURITY & AUTHENTICATION -# ============================================================================ - -# Requires SSL/TLS for client connections when properly configured -# Uncomment and configure certificates for production use -# ssl = on -# ssl_cert_file = '/path/to/server.crt' -# ssl_key_file = '/path/to/server.key' - - -# ============================================================================ -# PERFORMANCE TUNING -# ============================================================================ - -# Enables query execution plan statistics collection -# Helps the planner make better decisions over time -# Note: May add slight overhead but improves long-term performance -default_statistics_target = 100 - -# Sets the planner's estimate of the cost of a non-sequentially-fetched disk page -# Lower for SSDs (1.1-2.0), default (4.0) for HDDs -# random_page_cost = 1.1 \ No newline at end of file diff --git a/docker/postgresql/scripts/init-primary.sh b/docker/postgresql/scripts/init-primary.sh deleted file mode 100644 index f38ab32..0000000 --- a/docker/postgresql/scripts/init-primary.sh +++ /dev/null @@ -1,11 +0,0 @@ -#!/bin/bash -set -e -# Ensure archive directory exists and is writable by postgres (for postgresql.conf archive_command) -mkdir -p /var/lib/postgresql/archive -chown postgres:postgres /var/lib/postgresql/archive -# Create replication user (used by production standbys) -psql -v ON_ERROR_STOP=1 --username "$POSTGRES_USER" --dbname "$POSTGRES_DB" <<-EOSQL -CREATE USER ${POSTGRES_REPLICATION_USER} WITH REPLICATION PASSWORD '${POSTGRES_REPLICATION_PASSWORD}'; -EOSQL -# Allow replication connections from standbys -echo "host replication ${POSTGRES_REPLICATION_USER} 0.0.0.0/0 scram-sha-256" >> "$PGDATA/pg_hba.conf" diff --git a/docker/postgresql/scripts/replica-entrypoint.sh b/docker/postgresql/scripts/replica-entrypoint.sh deleted file mode 100644 index da23a49..0000000 --- a/docker/postgresql/scripts/replica-entrypoint.sh +++ /dev/null @@ -1,25 +0,0 @@ -#!/bin/bash -set -e -PGDATA="${PGDATA:-/var/lib/postgresql/data}" - -# If already a standby (restart), just start Postgres -if [ -f "$PGDATA/standby.signal" ]; then - exec /usr/local/bin/docker-entrypoint.sh postgres -fi - -# Wait for primary to be ready -until PGPASSWORD="$POSTGRES_REPLICATION_PASSWORD" pg_isready -h "$POSTGRES_PRIMARY_HOST" -U "$POSTGRES_REPLICATION_USER"; do - echo "Waiting for primary at $POSTGRES_PRIMARY_HOST..." - sleep 2 -done - -# Bootstrap standby from primary -echo "Running pg_basebackup from $POSTGRES_PRIMARY_HOST..." -PGPASSWORD="$POSTGRES_REPLICATION_PASSWORD" pg_basebackup -h "$POSTGRES_PRIMARY_HOST" -U "$POSTGRES_REPLICATION_USER" -D "$PGDATA" -Fp -Xs -P -R -w - -# Set application_name for synchronous_standby_names (must match postgresql.conf: postgres-1, postgres-2) -if [ -n "$REPLICA_NAME" ] && [ -f "$PGDATA/postgresql.auto.conf" ]; then - sed -i "s/'$/ application_name='$REPLICA_NAME'/'/" "$PGDATA/postgresql.auto.conf" -fi - -exec /usr/local/bin/docker-entrypoint.sh postgres diff --git a/docs/architecture.md b/docs/architecture.md index 31006ae..37a827a 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -25,10 +25,10 @@ graph LR --- -2. New user signs up +2. User identity -- User signs up through the gateway using OAuth2-compatible credentials. -- Credentials and account records are stored in PostgreSQL only. +- User identity and authentication are outside the current DSV backend runtime. +- The vault service stores and retrieves secret shards; it no longer depends on a relational database. --- diff --git a/docs/docker.md b/docs/docker.md index 4e32b71..c0ae5f6 100644 --- a/docs/docker.md +++ b/docs/docker.md @@ -1,202 +1,91 @@ # Docker Deployment Guide -This guide covers building and running the Distributed Secrets Vault using Docker. +This guide covers building and running Distributed Secrets Vault with Docker. -## Prerequisites - -- Docker and Docker Compose installed -- Java 25+ (for local builds) - -## Quick Start with Docker Compose (Recommended) - -The easiest way to run the application with Redis, PostgreSQL, and Kafka: +## Quick Start ```bash -# 1. Setup environment (from project root; optional for quick start) cp .env.example .env -# Edit .env: set REDIS_PASSWORD and POSTGRES_PASSWORD (dev compose defaults match .env.example if unset) - -# 2. Build and start all services -./mvnw clean package +./mvnw clean package -DskipTests mkdir -p target/dependency && (cd target/dependency; jar -xf ../*.jar) -docker compose -f docker/dsv/docker-compose.dsv-redis-postgresql.yml up --build +docker compose -f docker/dsv/docker-compose.dsv-redis-kafka.yml up --build ``` The application will be available at: -- **API:** `http://localhost:8080` -- **Redis:** `localhost:6379` -- **PostgreSQL:** `localhost:5432` -- **Kafka:** `localhost:9092` +- API: `http://localhost:8080` +- Redis: `localhost:6379` +- Kafka: `localhost:9092` -### Compose file layout +## Compose Files -Compose files live under `docker/`; there is no single `docker-compose.yml` at the root. Use `-f` to choose a file: +| File | Stack | +| --- | --- | +| `docker/dsv/docker-compose.dsv.yml` | App only | +| `docker/dsv/docker-compose.dsv-redis.yml` | App + Redis | +| `docker/dsv/docker-compose.dsv-redis-kafka.yml` | App + Redis + Kafka | +| `docker/dsv/docker-compose.dsv-redis-kafka-3nodes.yml` | Three app nodes + Redis + Kafka | +| `docker/redis/docker-compose.redis.yml` | Redis only | +| `docker/kafka/docker-compose.kafka.yml` | Kafka only | -| File | Stack | -| ----------------------------------------------------- | -------------------------------------------- | -| `dsv/docker-compose.dsv.yml` | App only | -| `dsv/docker-compose.dsv-redis.yml` | App + Redis | -| `dsv/docker-compose.dsv-postgresql.yml` | App + PostgreSQL | -| `dsv/docker-compose.dsv-redis-postgresql.yml` | App + Redis + PostgreSQL + Kafka (full dev stack) | -| `postgresql/docker-compose.postgresql.yml` | PostgreSQL only (single node, dev) | -| `postgresql/docker-compose.postgresql-production.yml` | PostgreSQL primary + 2 standbys (production) | -| `redis/docker-compose.redis.yml` | Redis only | +## Local Cluster Test -### Docker Compose commands - -All commands below assume you are in the **project root**. Use the same `-f` path for the stack you are running. +Use the three-node stack to test cluster-like behavior locally: ```bash -# Start full dev stack in foreground (see logs) -docker compose -f docker/dsv/docker-compose.dsv-redis-postgresql.yml up - -# Start in background -docker compose -f docker/dsv/docker-compose.dsv-redis-postgresql.yml up -d +./scripts/test-three-dsv-kafka-nodes.sh +``` -# View logs -docker compose -f docker/dsv/docker-compose.dsv-redis-postgresql.yml logs -f app -docker compose -f docker/dsv/docker-compose.dsv-redis-postgresql.yml logs -f redis -docker compose -f docker/dsv/docker-compose.dsv-redis-postgresql.yml logs -f postgres +Or start it manually: -# Stop services -docker compose -f docker/dsv/docker-compose.dsv-redis-postgresql.yml down +```bash +./mvnw clean package -DskipTests +mkdir -p target/dependency && (cd target/dependency; jar -xf ../*.jar) +docker compose -f docker/dsv/docker-compose.dsv-redis-kafka-3nodes.yml up -d --build +``` -# Stop and remove volumes (clean slate) -docker compose -f docker/dsv/docker-compose.dsv-redis-postgresql.yml down -v +The apps listen on: -# Rebuild after code changes -./mvnw clean package && mkdir -p target/dependency && (cd target/dependency; jar -xf ../*.jar) -docker compose -f docker/dsv/docker-compose.dsv-redis-postgresql.yml up --build -``` +- `http://127.0.0.1:8081` +- `http://127.0.0.1:8082` +- `http://127.0.0.1:8083` ## Configuration -### Environment variables - -Configuration is managed through the `.env` file in the project root: +Configuration is loaded from `.env` in the project root when you run Docker Compose from the project root: ```env -# Redis REDIS_PASSWORD=your-secure-password - -# PostgreSQL (user accounts) -POSTGRES_USER=dsv -POSTGRES_PASSWORD=your-postgres-password -POSTGRES_DB=dsv - -# Spring profile (dev, prod, test) SPRING_PROFILES_ACTIVE=dev ``` -For production PostgreSQL (primary + standbys), also set: - -```env -POSTGRES_REPLICATION_USER=replicator -POSTGRES_REPLICATION_PASSWORD=your-replication-password -``` - -**Security note:** Never commit `.env` to git. Use `.env.example` as a template. - -Docker Compose loads `.env` from the directory from which you run `docker compose`; when using the recommended commands (from project root), that is the project root. - -### Redis configuration - -Redis uses the stock `redis:8.6-alpine` image. Our `redis.conf` is mounted at `/usr/local/etc/redis/redis.conf` and passed explicitly (`redis-server /usr/local/etc/redis/redis.conf --requirepass ${REDIS_PASSWORD:-REDIS_PASSWORD}`) so Redis runs as a vanilla server with no bundled modules; the config file has no `include` and no `loadmodule`. Persistence and security are configured in `docker/redis/redis.conf`: - -- AOF persistence with `everysec` fsync -- RDB snapshots at 15 min, 5 min, and 1 min intervals (when keys change) -- No eviction policy (suitable for secrets storage) -- Password auth required - -### PostgreSQL configuration - -- **Development:** Single node via `docker/postgresql/docker-compose.postgresql.yml` or as part of the full dev stack. Image: `postgres:18.2-alpine`. No custom config file (defaults only). -- **Production:** Multi-node (primary + 2 standbys) via `docker/postgresql/docker-compose.postgresql-production.yml`. Uses `docker/postgresql/postgresql.conf` for replication, WAL archiving, and logging. See **Production PostgreSQL** below. - -## Production PostgreSQL (multi-node) - -For redundancy, run one primary and two synchronous standbys: - -```bash -# From project root. Ensure .env has POSTGRES_PASSWORD, POSTGRES_REPLICATION_PASSWORD (and optionally POSTGRES_REPLICATION_USER) -docker compose -f docker/postgresql/docker-compose.postgresql-production.yml up -d -``` - -- **postgres-primary:** Read-write; port 5432; uses `docker/postgresql/postgresql.conf`. -- **postgres-1, postgres-2:** Read-only standbys streaming from the primary. Application names match `synchronous_standby_names` in `postgresql.conf`. +Kafka and Redis connection settings are provided by the compose files for containerized runs. -Applications should connect to the primary (hostname `postgres-primary`) for read-write. See `docker/README.md` for script and network details. +## Redis -## Standalone Docker (without Compose) +Redis stores secret shards. The local config in `docker/redis/redis.conf` enables: -If you need to run the app container without Compose: +- AOF persistence with `appendfsync everysec` +- RDB snapshots +- no key eviction +- password authentication -### Build the image +## Standalone Image ```bash -./mvnw clean package +./mvnw clean package -DskipTests mkdir -p target/dependency && (cd target/dependency; jar -xf ../*.jar) docker build -t distributed-secrets-vault . -``` - -### Run the container - -```bash docker run -p 8080:8080 distributed-secrets-vault ``` -### With environment variables +For a standalone container connected to external services: ```bash -docker run -e "SPRING_PROFILES_ACTIVE=prod" \ +docker run \ + -e "SPRING_PROFILES_ACTIVE=prod" \ -e "SPRING_DATA_REDIS_HOST=redis.example.com" \ -e "SPRING_DATA_REDIS_PASSWORD=yourpassword" \ - -e "SPRING_DATASOURCE_URL=jdbc:postgresql://postgres.example.com:5432/dsv" \ - -e "SPRING_DATASOURCE_USERNAME=dsv" \ - -e "SPRING_DATASOURCE_PASSWORD=yourpostgrespassword" \ + -e "KAFKA_BOOTSTRAP_SERVERS=kafka.example.com:9092" \ -p 8080:8080 distributed-secrets-vault ``` - -## Development - -### Fast rebuild after code changes - -The layered Dockerfile keeps dependency layers cached; only the app layer rebuilds when code changes: - -```bash -./mvnw clean package -mkdir -p target/dependency && (cd target/dependency; jar -xf ../*.jar) -docker compose -f docker/dsv/docker-compose.dsv-redis-postgresql.yml up --build app -``` - -### Debugging - -Enable remote debugging: - -```bash -docker run -e "JAVA_TOOL_OPTIONS=-agentlib:jdwp=transport=dt_socket,address=5005,server=y,suspend=n" \ - -p 8080:8080 -p 5005:5005 distributed-secrets-vault -``` - -Connect your IDE debugger to `localhost:5005`. - -## Alternative: Spring Boot Buildpack - -Build without the project Dockerfile: - -```bash -./mvnw spring-boot:build-image -Dspring-boot.build-image.imageName=distributed-secrets-vault -docker run -p 8080:8080 distributed-secrets-vault -``` - -## Container management - -```bash -docker ps -docker stop -docker rm -docker rmi distributed-secrets-vault -``` - -For more detail (structure, services, production PostgreSQL scripts), see **docker/README.md**. diff --git a/docs/kubernetes.md b/docs/kubernetes.md index e201fd3..532ad4c 100644 --- a/docs/kubernetes.md +++ b/docs/kubernetes.md @@ -1,190 +1,82 @@ -# Kubernetes Architecture & Configuration +# Kubernetes Architecture and Configuration -This document provides an in-depth look at how the Distributed Secrets Vault (DSV) is deployed on Kubernetes (specifically optimized for K3s). The Kubernetes orchestration is designed to enforce strict hardware utilization limits, physical architecture constraints, and high-availability database replication. +The Kubernetes deployment runs DSV as a leaderless app cluster with per-node Redis sidecars and a Kafka broker. ---- - -## 1. High-Level Architecture - -The cluster is distinctly divided into **Control Plane Node(s)** and **Agent Nodes**. Workloads (DSV App and Postgres databases) are entirely segregated from the Control Plane to ensure networking and load-balancing performance is not impacted by heavy data processing. +## High-Level Architecture ```mermaid graph TD Client([Client]) - - subgraph ControlPlane [Control Plane Node] - Ingress[Traefik Ingress Gateway
api.dsv.local] - Service[dsv-app-service
ClusterIP] - Ingress --> |Routes traffic| Service - end - - Client --> |HTTP/HTTPS| Ingress - - subgraph AgentNodes [Agent Nodes Collection] - direction TB - - subgraph Node1 [Agent Node 1] - direction LR - App1[DSV Worker + Redis Sidecar
Pod] - DB0[(Postgres-0 Primary
Pod)] - end - - subgraph Node2 [Agent Node 2] - direction LR - App2[DSV Worker + Redis Sidecar
Pod] - DB1[(Postgres-1 Replica
Pod)] - end - - subgraph Node3 [Agent Node 3] - direction LR - App3[DSV Worker + Redis Sidecar
Pod] - DB2[(Postgres-2 Replica
Pod)] - end - - subgraph NodeN [Agent Node 4..12] - AppN[DSV Worker + Redis Sidecar
Pod] - end - - subgraph StandbyNodes [Standby Nodes > 12] - Idle[Idle / Pending Pods Wait Here] - end + Ingress[Ingress] + Service[dsv-app-service] + Kafka[(Kafka)] + + Client --> Ingress + Ingress --> Service + + subgraph AppPods[DSV StatefulSet] + App1[DSV App 0 + Redis Sidecar] + App2[DSV App 1 + Redis Sidecar] + App3[DSV App 2 + Redis Sidecar] + AppN[DSV App N + Redis Sidecar] end - Service -.-> |Load Balances| App1 - Service -.-> |Load Balances| App2 - Service -.-> |Load Balances| App3 - Service -.-> |Load Balances| AppN - - DB1 -.-> |Streaming Repl| DB0 - DB2 -.-> |Streaming Repl| DB0 - - classDef control fill:#dbe9f4,stroke:#4a76a8,stroke-width:2px,color:#333; - classDef agent fill:#e8f4e5,stroke:#5c8b52,stroke-width:2px,color:#333; - classDef pod fill:#fdfdfd,stroke:#888,stroke-width:1px,color:#333; - - class ControlPlane control; - class Node1,Node2,Node3,NodeN,StandbyNodes agent; - class App1,App2,App3,AppN,DB0,DB1,DB2 pod; -``` - ---- - -## 2. Workload Segregation + Service --> App1 + Service --> App2 + Service --> App3 + Service --> AppN -We enforce the distinction between Control Plane and Agent Nodes using Kubernetes **Node Affinity** rules. By explicitly denying placement on nodes labeled as `node-role.kubernetes.io/control-plane`, we guarantee the Control Plane handles only API ingresses and cluster state management. - -```yaml -# Present on all Workload Pods (DSV Worker & Postgres) -affinity: - nodeAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - nodeSelectorTerms: - - matchExpressions: - - key: node-role.kubernetes.io/control-plane - operator: DoesNotExist + App1 <--> Kafka + App2 <--> Kafka + App3 <--> Kafka + AppN <--> Kafka ``` ---- +## DSV App and Redis -## 3. Distributed Secrets Vault Worker (App + Redis) +The app runs as a StatefulSet. Each pod includes: -The primary secret sharing application is deployed as a `StatefulSet` with an upper bound of 12 replicas. +- a Spring Boot DSV container +- a Redis sidecar on `localhost:6379` +- a PVC mounted into Redis for durable shard storage -### The Sidecar Model -Because the DSV application expects a tightly coupled Redis instance for persistent secret recovery and fast in-memory queueing, Redis is deployed as a **Sidecar** to the DSV Spring Boot application. They share the same Pod, meaning they share the `localhost` network space. The DSV application can always communicate with its dedicated Redis cache at `localhost:6379`. +This keeps shard storage local to the DSV pod while still allowing Kubernetes to reschedule pods with their persistent volumes. -```mermaid -graph LR - subgraph Pod [DSV App Pod / StatefulSet Replica] - Boot[Spring Boot App
Port: 8080] - Redis[(Redis Engine
Port: 6379)] - Vol[[Persistent Volume]] - - Boot <--> |localhost:6379| Redis - Redis --- Vol - end -``` +## Cluster Discovery -### Resource Limits and Node Affinity -To enforce a strict **one worker per physical node** rule, we implement a `podAntiAffinity` constraint keyed to the `kubernetes.io/hostname`. +`dsv-app-headless` is a headless service that returns DNS records for app pods. ScaleCube uses: -This limits deployment logic to: -* **Max 12 Nodes:** The StatefulSet requests exactly 12 replicas. -* **Insufficient physical hardware:** If the cluster only has 5 agent nodes, 5 Pods are scheduled, and the remaining 7 request "Standby" mode in the `Pending` state. -* **Too much physical hardware:** If there are 15 agent nodes, 12 receive Pods. The other 3 remain empty "Standby" nodes ready to take over instantly if an active node fails. +- `SEED_DNS_HOST=dsv-app-headless.default.svc.cluster.local` +- `SEED_DNS_PORT=4801` +- `CLUSTER_PORT=4801` ---- +The app service `dsv-app-service` separately provides load-balanced HTTP traffic. -## 4. PostgreSQL Cluster Deployment +## Kafka -The PostgreSQL service utilizes a `StatefulSet` capped at exactly 3 replicas. Similar to the DSV app, it has a strict `podAntiAffinity` constraint to distribute the primary and two replicas across 3 independent physical agent nodes. +Kafka runs as a single-broker KRaft StatefulSet in the current manifests. DSV app pods connect through: -### Intelligent Primary/Replica Discovery -To translate the heavy Docker shell scripts into a unified Kubernetes deployment, the Postgres `StatefulSet` mounts a bash wrapper via a `ConfigMap`. Kubernetes organically names StatefulSet pods sequentially: `postgres-0`, `postgres-1`, `postgres-2`. - -The wrapper script automatically interprets the current Pod's hostname: - -```mermaid -sequenceDiagram - participant K8s as Kubernetes - participant Wrap as wrapper.sh - participant Primary as postgres-0 (Primary) - participant Replica as postgres-1/2 (Replica) - - K8s->>Wrap: Start Container - Wrap->>Wrap: Extract $HOSTNAME - alt is postgres-0 - Wrap->>Primary: Execute init-primary logic - Primary-->>Wrap: Postgres Engine Started (Read/Write) - else is postgres-1 or postgres-2 - Wrap->>Replica: Execute replica-entrypoint logic - Replica->>Primary: Wait for connection - Replica->>Primary: pg_basebackup - Replica-->>Wrap: Postgres Engine Started (Read-Only) - end +```text +KAFKA_BOOTSTRAP_SERVERS=kafka.default.svc.cluster.local:9092 ``` -Because of this wrapper, there is no need for separate `Primary` and `Replica` configuration files—Kubernetes self-organizes the database roles seamlessly. - ---- - -## 5. DNS and Cluster Discovery - -To facilitate internal communications without going through the external ingress gateway, the manifests rely on **Headless Services**. - -A standard Kubernetes Service (like `dsv-app-service`) provides a single IP that load-balances across all healthy pods. A Headless Service (`ClusterIP: None`) bypasses the proxy and returns the raw A-records for *every matching Pod endpoint*. - -For ScaleCube membership, startup is DNS-based in both production and testing: - -* `SEED_DNS_HOST=dsv-app-headless.default.svc.cluster.local` -* `SEED_DNS_PORT=4801` and `CLUSTER_PORT=4801` -* `dsv-app-headless` publishes port `4801` so each worker can resolve and join currently active worker nodes without hard-coded seed lists. -* ScaleCube retry/default behavior is configured in `src/main/resources/application.properties`: - * `scalecube.cluster.default-port=4801` - * `scalecube.dns.resolve.max-attempts=5` - * `scalecube.dns.resolve.retry-delay-ms=1000` +## Testing Environment -`ScaleCubeConfig` resolves `SEED_DNS_HOST` to all available pod IPs and feeds those addresses into ScaleCube membership seed discovery. +`k8s/testing` is intended for Docker Desktop, Minikube, or K3d. It runs three DSV app replicas without production node-affinity constraints. -* `dsv-app-headless`: Returns all DSV worker pod A-records used for ScaleCube peer discovery. -* `postgres-headless`: Allows the application layer to reliably locate `postgres-0.postgres-headless.default.svc.cluster.local` as the permanent primary database URL. - ---- - -## 6. Kafka Infrastructure (Future) - -To prepare for future 2-Phase Commit coordination via event queuing, the cluster includes a Kafka broker. - -* **Development/Testing:** Deployed as a single-node StatefulSet (`kafka-statefulset.yaml`) alongside a headless service (`kafka-service.yaml`) for broker discovery. Uses KRaft configuration to eliminate the Zookeeper dependency. -* **Production:** Currently a single-replica stateful set for foundational use, but intended to be scaled out if needed. -* **Network mapping:** Exposes port 9092 internally. DSV app pods are configured to reach this broker via the `KAFKA_BOOTSTRAP_SERVERS` environment variable pointing to `kafka.default.svc.cluster.local:9092`. - ---- +```bash +kubectl apply -f k8s/testing/ +kubectl get pods -w +``` -## 7. Testing Environments +## Production Environment -To facilitate local testing via Docker Desktop, Minikube, or K3d without needing a multi-node architecture, the `k8s/testing` directory contains versions of these YAML files with the `podAntiAffinity` and `nodeAffinity` constraints stripped out, and the replica counts reduced to `1`. +`k8s/production` keeps scheduling controls for a multi-node target: -Because of the intelligent Postgres wrapper, scaling `postgres` to `1` replica simply builds the `postgres-0` StatefulSet and seamlessly behaves as a standalone database! +- app pods avoid control-plane nodes +- app pods use pod anti-affinity to spread across worker nodes +- the app StatefulSet requests up to 12 replicas -ScaleCube discovery remains identical to production in testing: the worker still resolves `dsv-app-headless` DNS and joins peers over port `4801`; only replica counts and scheduling constraints differ. \ No newline at end of file +```bash +kubectl apply -f k8s/production/ +``` diff --git a/docs/technologies.md b/docs/technologies.md index fb5e35b..444c913 100644 --- a/docs/technologies.md +++ b/docs/technologies.md @@ -2,125 +2,52 @@ ## Java 25 -**Why we chose it:** Java is the language and framework the team chose for building a robust backend system. +Java is the backend language for the DSV service. -**Alternatives considered:** -- **Python:** Used for the client application, but not chosen for the backend due to performance considerations in a distributed, latency-sensitive system. +## Spring Boot 4 ---- +Spring Boot provides application wiring, configuration, validation, actuator health endpoints, and the REST API layer through Spring Web MVC. -## Spring Boot 4.0.2 +## Spring Data Redis -**Why we chose it:** Spring Boot provides a production-ready framework with auto-configuration, dependency injection, and a rich ecosystem of integrations (web, data, validation). It dramatically reduces boilerplate and lets us focus on business logic instead of infrastructure wiring. +Redis is the durable shard store. Spring Data Redis provides the Redis client integration used by the repository implementation. -**Alternatives considered:** -- **Core Java without Spring:** Would require building production-ready infrastructure (dependency injection, request handling, validation) from scratch, which is not practical for a capstone project. +## Redis 8 -### Spring Web MVC +Redis stores secret shards as key-value data. The Docker configuration enables AOF persistence, RDB snapshots, password authentication, and no eviction. -**Why we chose it:** Provides a straightforward annotation-driven REST API layer (`@RestController`, `@RequestMapping`) that integrates cleanly with the rest of the Spring ecosystem. +## Apache Kafka 3.7 -### Spring Data JPA +Kafka provides commit fanout and ordered messaging infrastructure for distributed mutation coordination. -**Why we chose it:** Gives us a repository abstraction over PostgreSQL, eliminating most SQL boilerplate while still allowing custom queries when needed. +## ScaleCube -### Spring Data Redis +ScaleCube handles cluster membership and peer discovery for DSV app nodes. -**Why we chose it:** Provides a Spring-idiomatic client for Redis, including `RedisTemplate` and repository support, so secret shards can be read and written with the same patterns used for relational data. +## Shamir's Secret Sharing -### Spring Validation +The `codahale/shamir` library implements the cryptographic split/reconstruct primitive used to divide a secret into `n` shards with a `k` shard reconstruction threshold. -**Why we chose it:** Declarative bean validation (`@Valid`, `@NotNull`, etc.) keeps input-validation logic out of service code and produces consistent error responses. +## Maven ---- +Maven builds the Spring Boot application and manages Java dependencies. -## Maven (with Maven Wrapper) +## Docker and Docker Compose -**Why we chose it:** Maven is the most widely used Java build tool and integrates natively with Spring Boot's parent POM. The `mvnw` wrapper ensures every developer and CI runner uses the same Maven version without a separate install step. +Docker makes the app, Redis, and Kafka reproducible across developer machines. Docker Compose defines the single-node and three-node local stacks. ---- +## Kubernetes -## PostgreSQL 18 - -**Why we chose it:** PostgreSQL is a proven, open-source relational database with strong ACID guarantees. We use it exclusively for user account and authentication-related data management. For production we run one primary and two synchronous standbys for redundancy. - -**Alternatives considered:** -- **SQLite:** Not suitable for a multi-node, concurrent server environment. - ---- - -## Redis 8 (with AOF Persistence) - -**Why we chose it:** Redis provides fast in-memory storage with configurable durability. We store secret shards in Redis because shard reads and writes must be extremely fast (they happen on every secret retrieval), and AOF persistence with `appendfsync everysec` gives us at most one second of data loss on failure—acceptable for this use case. - -**Configuration highlights:** -- AOF (Append-Only File) persistence enabled -- RDB snapshots every 15 minutes -- No key eviction (secrets are never auto-deleted) -- Password authentication required - -**Alternatives considered:** -- **Pure PostgreSQL for shards:** Would work, but is slower for the high-frequency shard reads/writes and adds unnecessary relational overhead for key-value data. - ---- - -## Shamir's Secret Sharing (`codahale/shamir` 0.7.0) - -**Why we chose it:** Shamir's Secret Sharing is the cryptographic foundation of the entire project. It allows a secret to be split into *n* shards such that any *k* of them can reconstruct the original, while fewer than *k* shards reveal nothing. The `codahale/shamir` library is a well-audited, minimal Java implementation of the algorithm. The Shamir algorithm is the focus of the project and no alternative was considered. - ---- - -## Docker & Docker Compose - -**Why we chose it:** Docker makes the entire stack (application, Redis, PostgreSQL) reproducible across developer machines and CI environments with a single command. Docker Compose lets us define multi-container topologies (full dev stack, production PostgreSQL cluster) as version-controlled YAML files. We plan to integrate Kubernetes in the future for production orchestration. - ---- - -## HAProxy - -**Why we chose it:** HAProxy acts as the gateway in front of the leaderless cluster, distributing incoming HTTP requests across all available nodes. It is battle-tested for high-throughput load balancing and supports health checks so failed nodes are automatically removed from rotation. - -**Alternatives considered:** -- **Nginx:** Also a capable reverse proxy, but HAProxy's load-balancing algorithms and health-check semantics are more fine-grained for TCP/HTTP balancing across a cluster. -- **Traefik:** Cloud-native and Kubernetes-aware, but adds complexity we don't need at this stage. - ---- - -## Keepalived - -**Why we chose it:** Keepalived provides a virtual IP (VIP) that floats between gateway instances using VRRP. If the active HAProxy node fails, Keepalived promotes a standby so the cluster remains reachable without a DNS change. - -**Alternatives considered:** -- **Relying on a single HAProxy instance:** Simpler but introduces a single point of failure at the gateway layer, which contradicts the distributed-availability goals of the project. - ---- +Kubernetes manifests under `k8s/` run DSV app pods with Redis sidecars and Kafka for local or production-style orchestration. ## Lombok -**Why we chose it:** Lombok generates repetitive Java boilerplate (getters, setters, constructors, `equals`/`hashCode`, builders) at compile time via annotations. This keeps model and DTO classes concise without sacrificing type safety. - -**Alternatives considered:** -- **Java Records:** Suitable for immutable data carriers, but lack the builder pattern and mutable-field support needed for JPA entities. -- **Writing boilerplate by hand:** Too verbose and error-prone for a team that wants to focus on distributed-systems logic. - ---- +Lombok reduces boilerplate in model and DTO classes. ## Apache Commons Pool2 -**Why we chose it:** Commons Pool2 provides the connection-pool implementation underlying Spring Data Redis's Lettuce driver. It ensures Redis connections are reused across requests rather than opened and closed on every operation, which is critical for low-latency shard access. - ---- - -## Apache Kafka 3.7.0 (Future Integration) - -**Why we chose it:** Kafka will provide a persistent, strictly ordered commit log to serve as a distributed queue. We will use Kafka topics to reliably order concurrent mutations (Create, Update, Delete) to the same secret key across multiple nodes, thus establishing a foundation for race-condition tie-breaking in our Two-Phase Commit (2PC) coordinate logic. This prevents using ad-hoc table locks. - -**Alternatives considered:** -- **Redis distributed locks:** Could solve concurrency races, but Kafka guarantees strict event ordering natively without risking deadlocks from crashed nodes holding locks. -- **RDBMS locking:** We want to minimize PostgreSQL serialization load. - ---- +Commons Pool2 backs Lettuce Redis connection pooling. -## Eclipse Temurin 25 (Docker base image) +## Eclipse Temurin 25 -**Why we chose it:** Eclipse Temurin is the Adoptium (formerly AdoptOpenJDK) distribution of OpenJDK. It is free, regularly patched, and widely recommended as the default JDK image for production Docker containers. +The Docker image uses Eclipse Temurin as the Java runtime base image. diff --git a/k8s/README.md b/k8s/README.md index 76235d6..f6b106f 100644 --- a/k8s/README.md +++ b/k8s/README.md @@ -1,90 +1,82 @@ # Kubernetes Configuration -This directory contains the Kubernetes (K8s) deployment manifests for the Distributed Secrets Vault. The configuration is tuned for K3s environments but uses standard K8s primitives. +This directory contains Kubernetes manifests for Distributed Secrets Vault. The app runs as a StatefulSet, each app pod has a Redis sidecar for shard storage, and Kafka runs as a StatefulSet for commit messaging. ## Structure -``` +```text k8s/ ├── production/ -│ ├── app-service.yaml # Headless and ClusterIP services for DSV App routing -│ ├── app-statefulset.yaml # StatefulSet for Agent nodes (DSV Spring Boot + Redis sidecar) -│ ├── ingress.yaml # Traefik Ingress configuration to expose the gateway -│ ├── postgres-configmap.yaml # Wrapper shell script for intelligent Postgres primary/replica discovery -│ └── postgres.yaml # StatefulSet for the PostgreSQL user database cluster +│ ├── app-service.yaml +│ ├── app-statefulset.yaml +│ ├── ingress.yaml +│ ├── kafka-service.yaml +│ └── kafka-statefulset.yaml ├── testing/ -│ ├── app-service.yaml # Same as production, for local testing -│ ├── app-statefulset.yaml # 1 Replica, No Node/Pod Anti-Affinity constraints -│ ├── ingress.yaml # Local ingress configuration -│ ├── postgres-configmap.yaml # Wrapper script config -│ └── postgres.yaml # 1 Replica, No Node/Pod Anti-Affinity constraints -└── README.md # This file (You are here) +│ ├── app-service.yaml +│ ├── app-statefulset.yaml +│ ├── ingress.yaml +│ ├── kafka-service.yaml +│ └── kafka-statefulset.yaml +└── README.md +``` + +## Architecture + +- `dsv-app` is a StatefulSet. +- Redis runs as a sidecar inside every `dsv-app` pod and persists data through a per-pod PVC. +- `dsv-app-headless` exposes pod DNS records for ScaleCube peer discovery. +- `dsv-app-service` load-balances HTTP traffic to healthy app pods. +- Kafka is available at `kafka.default.svc.cluster.local:9092`. + +The production manifests keep the one-app-pod-per-worker-node placement strategy through node affinity and pod anti-affinity. The testing manifests remove those scheduling constraints for Docker Desktop, Minikube, or K3d. + +## Local Testing + +Build the local image first: + +```bash +./mvnw clean package -DskipTests +mkdir -p target/dependency && (cd target/dependency; jar -xf ../*.jar) +docker build -t dsv-backend:latest . +``` + +Then deploy: + +```bash +kubectl apply -f k8s/testing/ +kubectl get pods -w ``` -## Architecture & Configuration - -The Kubernetes deployment mirrors the production requirements: - -1. **Control Plane & Agent Node Segregation (`production` only)** - - Pods are governed by `nodeAffinity` rules ensuring they do not get scheduled on `node-role.kubernetes.io/control-plane`. - - Control plane naturally serves as the Load Balancer/Gateway entry point via `ingress.yaml`. -2. **Strict Pod Placements (`production` only)** - - Hard limits of 1 DSV App (+ Redis sidecar) per physical hardware node are enforced via `podAntiAffinity` rules matching `kubernetes.io/hostname`. -3. **App Architecture** - - **DSV Worker & Redis:** Redis is deployed as a *sidecar container* inside the DSV `StatefulSet`. If the pod dies, it recovers data natively via Persistent Volumes (`volumeClaimTemplates`). - - **ScaleCube peer discovery:** ScaleCube bootstrap is DNS-based. Each DSV worker resolves `SEED_DNS_HOST` (the headless worker service) and joins peers on `SEED_DNS_PORT`/`CLUSTER_PORT` (`4801`). - - **PostgreSQL Database:** Handled via a `StatefulSet` with replicas. A smart wrapper script mounted from `postgres-configmap.yaml` automatically discovers if a pod is the primary (`postgres-0`) or a replica (e.g., `postgres-1`) based on the hostname provided by the `StatefulSet` and initializes replication logic accordingly. -4. **Dynamic Scaling (Standby Strategy)** - - We target 12 total agent nodes natively requesting `replicas: 12`. If fewer physical worker nodes exist (e.g., 5 nodes available), 5 pods run and 7 remain pending gracefully acting as a standby queue. - -## Usage - -### Production (Multi-Node Target) -To run the production deployment onto a properly labeled multi-node system (e.g. standard K3s installation). - -1. Review and apply Secrets/ConfigMaps to fulfill the Environment Variables if needed natively. -2. Apply the production configurations: - ```bash - kubectl apply -f k8s/production/ - ``` - -### Local Testing (Single-Node Dev) -A lightweight version in `testing/` strips away the Affinity constraints and lowers replica counts, making it perfect for Docker Desktop, Minikube, or K3d local development on a single machine. - -1. Apply the testing configurations: - ```bash - kubectl apply -f k8s/testing/ - ``` -2. Verify rollout: - ```bash - kubectl get pods -w - ``` -3. Expose the Ingress endpoint if your local orchestrator requires specific tunings or simply curl the proxy IP endpoint. - -## Environment Variables Mapping - -Most configurations mirror the `.env` settings expected globally: - -| Target Container | Variables Set via K8s Manifest | Description / Source Mapping | -| ------------------- | ---------------------------------------------------- | ------------------------------------------ | -| `dsv-app` | `NODE_NAME`, `POD_IP` | StatefulSet pod identity used by ScaleCube | -| `dsv-app` | `CLUSTER_PORT`, `SEED_DNS_PORT` | ScaleCube membership transport port | -| `dsv-app` | `SEED_DNS_HOST` | Headless service DNS used for peer lookup | -| `dsv-app` | `SPRING_DATA_REDIS_HOST="localhost"` | Redis operates as a sidecar container | -| `dsv-app` | `SPRING_DATASOURCE_URL` | Routes to the headless Postgres service | -| `postgres` | `POSTGRES_USER`, `POSTGRES_PASSWORD`, `POSTGRES_DB` | Environment values or `Secret` references | -| `postgres` | `POSTGRES_REPLICATION_USER`, `POSTGRES_PRIMARY_HOST` | Bound dynamically for the StatefulSet init | - -## ScaleCube Discovery Contract - -ScaleCube startup is now fully DNS-based in both `k8s/production` and `k8s/testing`: - -- `SEED_DNS_HOST` points to `dsv-app-headless.default.svc.cluster.local` -- `SEED_DNS_PORT` and `CLUSTER_PORT` are set to `4801` -- `dsv-app-headless` exposes port `4801` to provide discoverable endpoints for cluster membership -- Runtime defaults are sourced from `application.properties`: - - `scalecube.cluster.default-port=4801` - - `scalecube.dns.resolve.max-attempts=5` - - `scalecube.dns.resolve.retry-delay-ms=1000` - -*Note: In production deployments, it's highly recommended to replace hardcoded values (like `POSTGRES_PASSWORD="POSTGRES_PASSWORD"`) inside `k8s/production/postgres.yaml` with a K8s `Secret` before applying.* \ No newline at end of file +The testing app manifest uses `imagePullPolicy: Never`, so the image must exist in the local cluster's Docker image store. + +## Production + +```bash +kubectl apply -f k8s/production/ +``` + +Before production use, replace placeholder image and ingress details with the registry image and hostnames for the target cluster. + +## App Environment + +| Variable | Purpose | +| --- | --- | +| `NODE_NAME` | StatefulSet pod identity | +| `POD_IP` | Pod IP used for cluster membership | +| `CLUSTER_PORT` | ScaleCube transport port | +| `SEED_DNS_HOST` | Headless service DNS name for peer discovery | +| `SEED_DNS_PORT` | ScaleCube seed port | +| `SPRING_DATA_REDIS_HOST` | `localhost`, because Redis is a sidecar | +| `SPRING_DATA_REDIS_PORT` | Redis sidecar port | +| `KAFKA_BOOTSTRAP_SERVERS` | Kafka service DNS endpoint | + +## ScaleCube Discovery + +ScaleCube startup is DNS-based: + +- `SEED_DNS_HOST=dsv-app-headless.default.svc.cluster.local` +- `SEED_DNS_PORT=4801` +- `CLUSTER_PORT=4801` + +The headless service exposes port `4801` so each worker can resolve and join active peer pods. diff --git a/k8s/production/app-statefulset.yaml b/k8s/production/app-statefulset.yaml index 850c7ae..9dda4a9 100644 --- a/k8s/production/app-statefulset.yaml +++ b/k8s/production/app-statefulset.yaml @@ -61,21 +61,6 @@ spec: value: "6379" - name: SPRING_PROFILES_ACTIVE value: "prod" - # Postgres connection via the postgres primary service - - name: SPRING_DATASOURCE_URL - value: "jdbc:postgresql://postgres-0.postgres-headless.default.svc.cluster.local:5432/dsv" - - name: SPRING_DATASOURCE_USERNAME - valueFrom: - secretKeyRef: - name: dsv-secrets - key: POSTGRES_USER - optional: true - - name: SPRING_DATASOURCE_PASSWORD - valueFrom: - secretKeyRef: - name: dsv-secrets - key: POSTGRES_PASSWORD - optional: true - name: KAFKA_BOOTSTRAP_SERVERS value: "kafka.default.svc.cluster.local:9092" # Redis Sidecar container within the same Pod diff --git a/k8s/production/postgres-configmap.yaml b/k8s/production/postgres-configmap.yaml deleted file mode 100644 index 077e6d0..0000000 --- a/k8s/production/postgres-configmap.yaml +++ /dev/null @@ -1,27 +0,0 @@ -apiVersion: v1 -kind: ConfigMap -metadata: - name: postgres-scripts - namespace: default -data: - wrapper.sh: | - #!/bin/bash - set -e - - HOSTNAME=$(hostname) - - if [[ "$HOSTNAME" == "postgres-0" ]]; then - echo "Initializing Primary on $HOSTNAME" - # Injecting standard PostgreSQL config args - # Under the hood it will use the original scripts/init-primary.sh mounted via configmap if needed - # pseudo-code: Setup replication users - exec docker-entrypoint.sh postgres -c config_file=/etc/postgresql/postgresql.conf - else - echo "Initializing Replica on $HOSTNAME" - export REPLICA_NAME=$HOSTNAME - # pseudo-code: bash /scripts/replica-entrypoint.sh - # wait for primary (postgres-0) to become available - # pg_basebackup from postgres-0 - # start replica - exec /scripts/replica-entrypoint.sh - fi diff --git a/k8s/production/postgres.yaml b/k8s/production/postgres.yaml deleted file mode 100644 index e1d78a9..0000000 --- a/k8s/production/postgres.yaml +++ /dev/null @@ -1,81 +0,0 @@ -apiVersion: apps/v1 -kind: StatefulSet -metadata: - name: postgres - namespace: default -spec: - # Exactly 3 nodes for Postgres (1 primary, 2 replicas) - replicas: 3 - serviceName: postgres-headless - selector: - matchLabels: - app: postgres - template: - metadata: - labels: - app: postgres - spec: - # Spread Postgres across nodes and avoid control plane - affinity: - nodeAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - nodeSelectorTerms: - - matchExpressions: - - key: node-role.kubernetes.io/control-plane - operator: DoesNotExist - podAntiAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - - labelSelector: - matchExpressions: - - key: app - operator: In - values: - - postgres - topologyKey: "kubernetes.io/hostname" - containers: - - name: postgres - image: postgres:18.2-alpine - ports: - - containerPort: 5432 - env: - - name: POSTGRES_USER - value: "dsv" - - name: POSTGRES_PASSWORD - value: "POSTGRES_PASSWORD" - - name: POSTGRES_DB - value: "dsv" - - name: POSTGRES_REPLICATION_USER - value: "replicator" - - name: POSTGRES_REPLICATION_PASSWORD - value: "POSTGRES_REPLICATION_PASSWORD" - - name: POSTGRES_PRIMARY_HOST - value: "postgres-0.postgres-headless.default.svc.cluster.local" - command: ["bash", "/scripts/wrapper.sh"] - volumeMounts: - - name: postgres-data - mountPath: /var/lib/postgresql/data - - name: postgres-scripts - mountPath: /scripts - volumeClaimTemplates: - - metadata: - name: postgres-data - spec: - accessModes: [ "ReadWriteOnce" ] - resources: - requests: - storage: 5Gi - ---- -apiVersion: v1 -kind: Service -metadata: - name: postgres-headless - namespace: default -spec: - type: ClusterIP - clusterIP: None - selector: - app: postgres - ports: - - port: 5432 - targetPort: 5432 diff --git a/k8s/testing/app-service.yaml b/k8s/testing/app-service.yaml index 6c2c4d2..c44f587 100644 --- a/k8s/testing/app-service.yaml +++ b/k8s/testing/app-service.yaml @@ -26,5 +26,6 @@ spec: - name: cluster port: 4801 targetPort: 4801 - - port: 8080 + - name: http + port: 8080 targetPort: 8080 diff --git a/k8s/testing/app-statefulset.yaml b/k8s/testing/app-statefulset.yaml index 0b8d7a4..2d838be 100644 --- a/k8s/testing/app-statefulset.yaml +++ b/k8s/testing/app-statefulset.yaml @@ -18,6 +18,7 @@ spec: containers: - name: dsv-app image: dsv-backend:latest + imagePullPolicy: Never ports: - containerPort: 4801 name: cluster @@ -42,23 +43,10 @@ spec: value: "localhost" - name: SPRING_DATA_REDIS_PORT value: "6379" + - name: SPRING_DATA_REDIS_PASSWORD + value: "" - name: SPRING_PROFILES_ACTIVE - value: "prod" - # Postgres connection via the postgres primary service - - name: SPRING_DATASOURCE_URL - value: "jdbc:postgresql://postgres-0.postgres-headless.default.svc.cluster.local:5432/dsv" - - name: SPRING_DATASOURCE_USERNAME - valueFrom: - secretKeyRef: - name: dsv-secrets - key: POSTGRES_USER - optional: true - - name: SPRING_DATASOURCE_PASSWORD - valueFrom: - secretKeyRef: - name: dsv-secrets - key: POSTGRES_PASSWORD - optional: true + value: "dev" - name: KAFKA_BOOTSTRAP_SERVERS value: "kafka.default.svc.cluster.local:9092" # Redis Sidecar container within the same Pod diff --git a/k8s/testing/postgres-configmap.yaml b/k8s/testing/postgres-configmap.yaml deleted file mode 100644 index 077e6d0..0000000 --- a/k8s/testing/postgres-configmap.yaml +++ /dev/null @@ -1,27 +0,0 @@ -apiVersion: v1 -kind: ConfigMap -metadata: - name: postgres-scripts - namespace: default -data: - wrapper.sh: | - #!/bin/bash - set -e - - HOSTNAME=$(hostname) - - if [[ "$HOSTNAME" == "postgres-0" ]]; then - echo "Initializing Primary on $HOSTNAME" - # Injecting standard PostgreSQL config args - # Under the hood it will use the original scripts/init-primary.sh mounted via configmap if needed - # pseudo-code: Setup replication users - exec docker-entrypoint.sh postgres -c config_file=/etc/postgresql/postgresql.conf - else - echo "Initializing Replica on $HOSTNAME" - export REPLICA_NAME=$HOSTNAME - # pseudo-code: bash /scripts/replica-entrypoint.sh - # wait for primary (postgres-0) to become available - # pg_basebackup from postgres-0 - # start replica - exec /scripts/replica-entrypoint.sh - fi diff --git a/k8s/testing/postgres.yaml b/k8s/testing/postgres.yaml deleted file mode 100644 index 847f8a5..0000000 --- a/k8s/testing/postgres.yaml +++ /dev/null @@ -1,64 +0,0 @@ -apiVersion: apps/v1 -kind: StatefulSet -metadata: - name: postgres - namespace: default -spec: - # Exactly 3 nodes for Postgres (1 primary, 2 replicas) - replicas: 1 - serviceName: postgres-headless - selector: - matchLabels: - app: postgres - template: - metadata: - labels: - app: postgres - spec: - containers: - - name: postgres - image: postgres:18.2-alpine - ports: - - containerPort: 5432 - env: - - name: POSTGRES_USER - value: "dsv" - - name: POSTGRES_PASSWORD - value: "POSTGRES_PASSWORD" - - name: POSTGRES_DB - value: "dsv" - - name: POSTGRES_REPLICATION_USER - value: "replicator" - - name: POSTGRES_REPLICATION_PASSWORD - value: "POSTGRES_REPLICATION_PASSWORD" - - name: POSTGRES_PRIMARY_HOST - value: "postgres-0.postgres-headless.default.svc.cluster.local" - command: ["bash", "/scripts/wrapper.sh"] - volumeMounts: - - name: postgres-data - mountPath: /var/lib/postgresql/data - - name: postgres-scripts - mountPath: /scripts - volumeClaimTemplates: - - metadata: - name: postgres-data - spec: - accessModes: ["ReadWriteOnce"] - resources: - requests: - storage: 5Gi - ---- -apiVersion: v1 -kind: Service -metadata: - name: postgres-headless - namespace: default -spec: - type: ClusterIP - clusterIP: None - selector: - app: postgres - ports: - - port: 5432 - targetPort: 5432 diff --git a/pom.xml b/pom.xml index 62e51db..f6ec1e6 100644 --- a/pom.xml +++ b/pom.xml @@ -42,10 +42,6 @@ org.springframework.boot spring-boot-starter-actuator - - org.springframework.boot - spring-boot-starter-data-jpa -