From 2d99343037c862b0b0b1e935701a0bc10b8150f7 Mon Sep 17 00:00:00 2001 From: Noam Ben Simon Date: Thu, 14 May 2026 21:49:32 -0400 Subject: [PATCH 1/7] postgres surgically removed ;) --- .env.example | 13 -- docker/README.md | 199 +++++----------- docker/dsv/docker-compose.dsv-postgresql.yml | 51 ----- ...docker-compose.dsv-redis-kafka-3nodes.yml} | 45 +--- ...yml => docker-compose.dsv-redis-kafka.yml} | 35 +-- .../docker-compose.postgresql-production.yml | 100 -------- .../postgresql/docker-compose.postgresql.yml | 23 -- docker/postgresql/postgresql.conf | 137 ----------- docker/postgresql/scripts/init-primary.sh | 11 - .../postgresql/scripts/replica-entrypoint.sh | 25 -- docs/architecture.md | 6 +- docs/docker.md | 199 ++++------------ docs/kubernetes.md | 214 +++++------------- docs/technologies.md | 119 ++-------- k8s/README.md | 152 ++++++------- k8s/production/app-statefulset.yaml | 15 -- k8s/production/postgres-configmap.yaml | 27 --- k8s/production/postgres.yaml | 81 ------- k8s/testing/app-service.yaml | 3 +- k8s/testing/app-statefulset.yaml | 20 +- k8s/testing/postgres-configmap.yaml | 27 --- k8s/testing/postgres.yaml | 64 ------ pom.xml | 16 +- scripts/test-three-dsv-kafka-nodes.sh | 4 +- .../config/StorageConfig.java | 4 - .../service/secret/SecretService.java | 3 +- src/main/resources/application-dev.properties | 21 -- .../application-single-redis.properties | 4 +- src/main/resources/application.properties | 59 +---- .../resources/application-test.properties | 48 +--- src/test/resources/application.properties | 48 +--- 31 files changed, 273 insertions(+), 1500 deletions(-) delete mode 100644 docker/dsv/docker-compose.dsv-postgresql.yml rename docker/dsv/{docker-compose.dsv-redis-postgresql-3nodes.yml => docker-compose.dsv-redis-kafka-3nodes.yml} (67%) rename docker/dsv/{docker-compose.dsv-redis-postgresql.yml => docker-compose.dsv-redis-kafka.yml} (65%) delete mode 100644 docker/postgresql/docker-compose.postgresql-production.yml delete mode 100644 docker/postgresql/docker-compose.postgresql.yml delete mode 100644 docker/postgresql/postgresql.conf delete mode 100644 docker/postgresql/scripts/init-primary.sh delete mode 100644 docker/postgresql/scripts/replica-entrypoint.sh delete mode 100644 k8s/production/postgres-configmap.yaml delete mode 100644 k8s/production/postgres.yaml delete mode 100644 k8s/testing/postgres-configmap.yaml delete mode 100644 k8s/testing/postgres.yaml diff --git a/.env.example b/.env.example index b714a3c..89b4094 100644 --- a/.env.example +++ b/.env.example @@ -57,19 +57,6 @@ SPRING_PROFILES_ACTIVE=dev # Spring framework logging # LOGGING_LEVEL_ORG_SPRINGFRAMEWORK=INFO -# ---------------------------------------------------- -# PostgreSQL Configuration (user accounts) -# ---------------------------------------------------- -# Used by Docker Compose and by the app when datasource is configured. -# Set POSTGRES_PASSWORD in .env; other values are optional (defaults shown). -POSTGRES_USER=dsv -POSTGRES_PASSWORD=POSTGRES_PASSWORD -POSTGRES_DB=dsv - -# Production only: replication user for PostgreSQL primary/standby (docker-compose.postgresql-production.yml) -# POSTGRES_REPLICATION_USER=replicator -# POSTGRES_REPLICATION_PASSWORD= - # ---------------------------------------------------- # Security Configuration (Future Use) # ---------------------------------------------------- diff --git a/docker/README.md b/docker/README.md index c44c3cb..5fb1c8a 100644 --- a/docker/README.md +++ b/docker/README.md @@ -1,190 +1,111 @@ # Docker Configuration -This directory contains all Docker-related configuration for the Distributed Secrets Vault. +This directory contains Docker configuration for the Distributed Secrets Vault. ## Structure -``` +```text docker/ ├── dsv/ │ ├── docker-compose.dsv.yml # App only │ ├── docker-compose.dsv-redis.yml # App + Redis -│ ├── docker-compose.dsv-postgresql.yml # App + PostgreSQL -│ ├── docker-compose.dsv-redis-postgresql.yml # App + Redis + PostgreSQL + Kafka (single app) -│ └── docker-compose.dsv-redis-postgresql-3nodes.yml # Same stack, three DSV app instances (Kafka fanout test) +│ ├── docker-compose.dsv-redis-kafka.yml # App + Redis + Kafka +│ └── docker-compose.dsv-redis-kafka-3nodes.yml # Three DSV app instances ├── redis/ │ ├── docker-compose.redis.yml # Redis only │ └── redis.conf # Redis persistence and security config ├── kafka/ -│ └── docker-compose.kafka.yml # Kafka only (KRaft mode) -├── postgresql/ -│ ├── docker-compose.postgresql.yml # PostgreSQL only (development, single node) -│ ├── docker-compose.postgresql-production.yml # Production: primary + 2 standbys -│ ├── postgresql.conf # Production replication config (used by production compose) -│ └── scripts/ -│ ├── init-primary.sh # Creates replication user (production primary) -│ └── replica-entrypoint.sh # Bootstrap standbys from primary (production) -└── README.md # This file +│ └── docker-compose.kafka.yml # Kafka only, KRaft mode +└── README.md +``` Project root: -├── .env.example # Environment variable template -├── .env # Your local config (gitignored) -└── scripts/test-three-dsv-kafka-nodes.sh # Builds, starts 3-node stack, curls temp Kafka endpoint, checks logs + +```text +.env.example # Environment variable template +.env # Local config, gitignored +scripts/test-three-dsv-kafka-nodes.sh # Builds and verifies the 3-node Kafka fanout stack ``` ## Setup -1. **Create environment file in project root:** - - ```bash - cp .env.example .env - ``` - -2. **Set required values in `.env`** (recommended; otherwise dev compose uses defaults): - - ```env - REDIS_PASSWORD=your-secure-password-here - POSTGRES_PASSWORD=your-postgres-password-here - ``` - - If you skip `.env`, the **development** compose files use the same defaults as `.env.example` (`REDIS_PASSWORD`, `POSTGRES_PASSWORD`). Set stronger values in `.env` for any real use. Optionally set `POSTGRES_USER` and `POSTGRES_DB` (defaults: `dsv`). - -3. **Build and start from project root:** - - ```bash - ./mvnw clean package - mkdir -p target/dependency && (cd target/dependency; jar -xf ../*.jar) - docker compose -f docker/dsv/docker-compose.dsv-redis-postgresql.yml up --build - ``` - - When you run `docker compose` from the project root (as in the commands below), Compose loads `.env` from the project root. +Create a local environment file from the project root: - **Other compose files:** - - App only: `docker compose -f docker/dsv/docker-compose.dsv.yml up --build` - - App + Redis: `docker compose -f docker/dsv/docker-compose.dsv-redis.yml up --build` - - App + PostgreSQL: `docker compose -f docker/dsv/docker-compose.dsv-postgresql.yml up --build` +```bash +cp .env.example .env +``` - **Three DSV instances (Kafka commit fanout):** do not run this at the same time as the single-app stack on the same machine (shared container names `dsv-redis`, `dsv-postgres`, `dsv-kafka`, and host ports). Stop the other stack first (`docker compose … down`). +Set a Redis password for any real local use: - ```bash - ./mvnw clean package -DskipTests - mkdir -p target/dependency && (cd target/dependency && jar -xf ../*.jar) - docker compose -f docker/dsv/docker-compose.dsv-redis-postgresql-3nodes.yml up -d --build - ``` +```env +REDIS_PASSWORD=your-secure-password-here +SPRING_PROFILES_ACTIVE=dev +``` - Apps listen on **8081**, **8082**, and **8083** (mapped to container port 8080). Each instance sets a different `NODE_NAME` so Kafka consumer groups differ and every node receives `secrets-commit` messages. +Build and start the Redis + Kafka stack: - Automated check (build, start, publish once from app1, assert all three containers logged the commit): +```bash +./mvnw clean package -DskipTests +mkdir -p target/dependency && (cd target/dependency; jar -xf ../*.jar) +docker compose -f docker/dsv/docker-compose.dsv-redis-kafka.yml up --build +``` - ```bash - ./scripts/test-three-dsv-kafka-nodes.sh - ``` +The API listens on `http://localhost:8080`. - Manual check after the stack is healthy (prefer **`127.0.0.1`** on WSL2 / Docker Desktop if `localhost` gives `Connection reset by peer`): +## Three App Nodes - ```bash - curl -sS http://127.0.0.1:8081/api/temp-test/kafka - docker logs dsv-app-1 2>&1 | grep -i "Received commit" | tail -3 - docker logs dsv-app-2 2>&1 | grep -i "Received commit" | tail -3 - docker logs dsv-app-3 2>&1 | grep -i "Received commit" | tail -3 - ``` +For local cluster-like testing, run three DSV app instances against the same Redis and Kafka services: -## Environment Variables +```bash +./mvnw clean package -DskipTests +mkdir -p target/dependency && (cd target/dependency && jar -xf ../*.jar) +docker compose -f docker/dsv/docker-compose.dsv-redis-kafka-3nodes.yml up -d --build +``` -| Variable | Description | Default | -| ------------------------------- | --------------------------------- | --------------------------------------------------------- | -| `REDIS_PASSWORD` | Redis authentication password | Placeholder in `.env.example`; set in `.env` for real use | -| `POSTGRES_USER` | PostgreSQL user | `dsv` | -| `POSTGRES_PASSWORD` | PostgreSQL password (required) | Placeholder in `.env.example`; set in `.env` for real use | -| `POSTGRES_DB` | PostgreSQL database name | `dsv` | -| `POSTGRES_REPLICATION_USER` | Replication user (production) | `replicator` | -| `POSTGRES_REPLICATION_PASSWORD` | Replication password (production) | — | -| `SPRING_PROFILES_ACTIVE` | Spring Boot profile | `dev` | +Apps listen on `8081`, `8082`, and `8083`. Each instance sets a different `NODE_NAME` so Kafka consumer groups differ and every node receives `secrets-commit` messages. -## Redis Configuration +Automated check: -Redis is configured for durable secret storage with: +```bash +./scripts/test-three-dsv-kafka-nodes.sh +``` -- **AOF persistence**: `appendfsync everysec` (max 1 second data loss) -- **RDB snapshots**: Every 15 minutes if keys changed -- **No eviction**: Secrets are never auto-deleted -- **Password auth**: Required for all connections +Manual check: -See `redis/redis.conf` for full configuration. +```bash +curl -sS http://127.0.0.1:8081/api/temp-test/kafka +docker logs dsv-app-1 2>&1 | grep -i "Received commit" | tail -3 +docker logs dsv-app-2 2>&1 | grep -i "Received commit" | tail -3 +docker logs dsv-app-3 2>&1 | grep -i "Received commit" | tail -3 +``` ## Services -### `redis` - -- **Image**: redis:8.6-alpine -- **Ports**: 6379 -- **Volumes**: Persistent data in `redis-data` volume - -### `postgres` +`redis` stores secret shards durably with AOF persistence and password auth. -- **Image**: postgres:18.2-alpine -- **Ports**: 5432 -- **Volumes**: Persistent data in `postgres-data` volume -- **Healthcheck**: `pg_isready` before app starts -- **Purpose**: User accounts; development uses a single node. For production redundancy, use the production compose (see below). +`kafka` provides commit fanout and ordering infrastructure in KRaft mode. -### `kafka` - -- **Image**: apache/kafka:3.7.0 -- **Ports**: 9092 -- **Volumes**: Persistent data in `kafka-data` volume -- **Purpose**: Message broker for request sequencing. Uses KRaft (ZooKeeper-less) mode. - -### `app` - -- **Build**: From project root (build context `../..`); uses the Dockerfile in the project root. -- **Ports**: 8080 -- **Depends on**: Redis and/or PostgreSQL (waits for health checks when present) +`app` is the Spring Boot DSV service built from the repository root Dockerfile. ## Commands -All commands assume you are in the **project root**. - ```bash -# Full dev stack (app + Redis + PostgreSQL) -./mvnw clean package -mkdir -p target/dependency && (cd target/dependency; jar -xf ../*.jar) -docker compose -f docker/dsv/docker-compose.dsv-redis-postgresql.yml up --build +# Start full local stack +docker compose -f docker/dsv/docker-compose.dsv-redis-kafka.yml up # Start in background -docker compose -f docker/dsv/docker-compose.dsv-redis-postgresql.yml up -d +docker compose -f docker/dsv/docker-compose.dsv-redis-kafka.yml up -d # View logs -docker compose -f docker/dsv/docker-compose.dsv-redis-postgresql.yml logs -f app -docker compose -f docker/dsv/docker-compose.dsv-redis-postgresql.yml logs -f redis -docker compose -f docker/dsv/docker-compose.dsv-redis-postgresql.yml logs -f postgres +docker compose -f docker/dsv/docker-compose.dsv-redis-kafka.yml logs -f app +docker compose -f docker/dsv/docker-compose.dsv-redis-kafka.yml logs -f redis +docker compose -f docker/dsv/docker-compose.dsv-redis-kafka.yml logs -f kafka # Stop services -docker compose -f docker/dsv/docker-compose.dsv-redis-postgresql.yml down +docker compose -f docker/dsv/docker-compose.dsv-redis-kafka.yml down -# Clean slate (removes volumes) -docker compose -f docker/dsv/docker-compose.dsv-redis-postgresql.yml down -v - -# Rebuild after code changes -./mvnw clean package && mkdir -p target/dependency && (cd target/dependency; jar -xf ../*.jar) -docker compose -f docker/dsv/docker-compose.dsv-redis-postgresql.yml up --build +# Clean slate, including volumes +docker compose -f docker/dsv/docker-compose.dsv-redis-kafka.yml down -v ``` -## Production PostgreSQL (multi-node) - -For production, run one primary and two synchronous standbys for redundancy: - -```bash -# From project root. Set in .env: POSTGRES_PASSWORD, POSTGRES_REPLICATION_USER (default: replicator), POSTGRES_REPLICATION_PASSWORD -docker compose -f docker/postgresql/docker-compose.postgresql-production.yml up -d -``` - -- **postgres-primary**: Read-write; uses `postgresql/postgresql.conf` (WAL archiving, synchronous replication). Port 5432. -- **postgres-1**, **postgres-2**: Read-only standbys; stream from primary. Application names match `synchronous_standby_names` in `postgresql.conf` so commits wait for at least one standby. -- Scripts: `scripts/init-primary.sh` creates the replication user on first start; `scripts/replica-entrypoint.sh` bootstraps each standby with `pg_basebackup` then starts streaming. - -Applications should connect to the primary (hostname `postgres-primary`) for read-write; standbys can be used for read scaling if desired. - -## Network - -All services communicate on the `dsv-network` bridge network. The app connects to Redis using the hostname `redis` and to PostgreSQL using the hostname `postgres` (dev) or `postgres-primary` (production). +All services communicate on the `dsv-network` bridge network. The app connects to Redis as `redis` and Kafka as `kafka:29092`. diff --git a/docker/dsv/docker-compose.dsv-postgresql.yml b/docker/dsv/docker-compose.dsv-postgresql.yml deleted file mode 100644 index 829d583..0000000 --- a/docker/dsv/docker-compose.dsv-postgresql.yml +++ /dev/null @@ -1,51 +0,0 @@ -# Run the Distributed Secrets Vault application with PostgreSQL -name: "Distributed Secrets Vault + PostgreSQL" - -# Environment variables are loaded from ../../.env (project root) -# Copy .env.example to .env and configure as needed - -services: - postgres: - image: postgres:18.2-alpine - container_name: dsv-postgres - ports: - - "5432:5432" - environment: - - LANG=C.UTF-8 - - POSTGRES_USER=${POSTGRES_USER:-dsv} - - POSTGRES_PASSWORD=${POSTGRES_PASSWORD:-POSTGRES_PASSWORD} - - POSTGRES_DB=${POSTGRES_DB:-dsv} - volumes: - - postgres-data:/var/lib/postgresql/data - healthcheck: - test: ["CMD-SHELL", "pg_isready -U \"$$POSTGRES_USER\" -d \"$$POSTGRES_DB\""] - interval: 5s - timeout: 3s - retries: 5 - networks: - - dsv-network - - app: - build: ../.. - container_name: dsv-app - env_file: - - ../../.env - ports: - - "8080:8080" - environment: - - SPRING_PROFILES_ACTIVE=${SPRING_PROFILES_ACTIVE:-dev} - - SPRING_DATASOURCE_URL=jdbc:postgresql://postgres:5432/${POSTGRES_DB:-dsv} - - SPRING_DATASOURCE_USERNAME=${POSTGRES_USER:-dsv} - - SPRING_DATASOURCE_PASSWORD=${POSTGRES_PASSWORD:-POSTGRES_PASSWORD} - depends_on: - postgres: - condition: service_healthy - networks: - - dsv-network - -volumes: - postgres-data: - -networks: - dsv-network: - driver: bridge diff --git a/docker/dsv/docker-compose.dsv-redis-postgresql-3nodes.yml b/docker/dsv/docker-compose.dsv-redis-kafka-3nodes.yml similarity index 67% rename from docker/dsv/docker-compose.dsv-redis-postgresql-3nodes.yml rename to docker/dsv/docker-compose.dsv-redis-kafka-3nodes.yml index 57893b1..48516cf 100644 --- a/docker/dsv/docker-compose.dsv-redis-postgresql-3nodes.yml +++ b/docker/dsv/docker-compose.dsv-redis-kafka-3nodes.yml @@ -1,11 +1,6 @@ -# Same stack as docker-compose.dsv-redis-postgresql.yml but three DSV app instances. -# Use this file OR the single-app compose file, not both (same project name and infra container names). -# -# Each app has a distinct NODE_NAME so Kafka consumer groups differ — every instance receives -# the full secrets-commit topic (see application.properties spring.kafka.consumer.group-id). -# +# Same stack as docker-compose.dsv-redis-kafka.yml but with three DSV app instances. # HTTP: localhost:8081 (app1), :8082 (app2), :8083 (app3) -name: "Distributed Secrets Vault + Redis + PostgreSQL" +name: "Distributed Secrets Vault + Redis + Kafka" services: redis: @@ -25,26 +20,6 @@ services: networks: - dsv-network - postgres: - image: postgres:18.2-alpine - container_name: dsv-postgres - ports: - - "5432:5432" - environment: - - LANG=C.UTF-8 - - POSTGRES_USER=${POSTGRES_USER:-dsv} - - POSTGRES_PASSWORD=${POSTGRES_PASSWORD:-POSTGRES_PASSWORD} - - POSTGRES_DB=${POSTGRES_DB:-dsv} - volumes: - - postgres-data:/var/lib/postgresql/data - healthcheck: - test: ["CMD-SHELL", "pg_isready -U \"$$POSTGRES_USER\" -d \"$$POSTGRES_DB\""] - interval: 5s - timeout: 3s - retries: 5 - networks: - - dsv-network - kafka: image: apache/kafka:3.7.0 container_name: dsv-kafka @@ -92,16 +67,11 @@ services: - SPRING_DATA_REDIS_PORT=6379 - SPRING_DATA_REDIS_PASSWORD=${REDIS_PASSWORD:-REDIS_PASSWORD} - SPRING_PROFILES_ACTIVE=${SPRING_PROFILES_ACTIVE:-dev} - - SPRING_DATASOURCE_URL=jdbc:postgresql://postgres:5432/${POSTGRES_DB:-dsv} - - SPRING_DATASOURCE_USERNAME=${POSTGRES_USER:-dsv} - - SPRING_DATASOURCE_PASSWORD=${POSTGRES_PASSWORD:-POSTGRES_PASSWORD} - KAFKA_BOOTSTRAP_SERVERS=kafka:29092 - SEED_DNS_HOST=localhost depends_on: redis: condition: service_healthy - postgres: - condition: service_healthy kafka: condition: service_healthy networks: @@ -120,16 +90,11 @@ services: - SPRING_DATA_REDIS_PORT=6379 - SPRING_DATA_REDIS_PASSWORD=${REDIS_PASSWORD:-REDIS_PASSWORD} - SPRING_PROFILES_ACTIVE=${SPRING_PROFILES_ACTIVE:-dev} - - SPRING_DATASOURCE_URL=jdbc:postgresql://postgres:5432/${POSTGRES_DB:-dsv} - - SPRING_DATASOURCE_USERNAME=${POSTGRES_USER:-dsv} - - SPRING_DATASOURCE_PASSWORD=${POSTGRES_PASSWORD:-POSTGRES_PASSWORD} - KAFKA_BOOTSTRAP_SERVERS=kafka:29092 - SEED_DNS_HOST=localhost depends_on: redis: condition: service_healthy - postgres: - condition: service_healthy kafka: condition: service_healthy networks: @@ -148,16 +113,11 @@ services: - SPRING_DATA_REDIS_PORT=6379 - SPRING_DATA_REDIS_PASSWORD=${REDIS_PASSWORD:-REDIS_PASSWORD} - SPRING_PROFILES_ACTIVE=${SPRING_PROFILES_ACTIVE:-dev} - - SPRING_DATASOURCE_URL=jdbc:postgresql://postgres:5432/${POSTGRES_DB:-dsv} - - SPRING_DATASOURCE_USERNAME=${POSTGRES_USER:-dsv} - - SPRING_DATASOURCE_PASSWORD=${POSTGRES_PASSWORD:-POSTGRES_PASSWORD} - KAFKA_BOOTSTRAP_SERVERS=kafka:29092 - SEED_DNS_HOST=localhost depends_on: redis: condition: service_healthy - postgres: - condition: service_healthy kafka: condition: service_healthy networks: @@ -165,7 +125,6 @@ services: volumes: redis-data: - postgres-data: kafka-data: networks: diff --git a/docker/dsv/docker-compose.dsv-redis-postgresql.yml b/docker/dsv/docker-compose.dsv-redis-kafka.yml similarity index 65% rename from docker/dsv/docker-compose.dsv-redis-postgresql.yml rename to docker/dsv/docker-compose.dsv-redis-kafka.yml index fa94c85..1cfa894 100644 --- a/docker/dsv/docker-compose.dsv-redis-postgresql.yml +++ b/docker/dsv/docker-compose.dsv-redis-kafka.yml @@ -1,8 +1,5 @@ -# Run the Distributed Secrets Vault application with Redis and PostgreSQL -name: "Distributed Secrets Vault + Redis + PostgreSQL" - -# Environment variables are loaded from ../../.env (project root) -# Copy .env.example to .env and configure as needed +# Run the Distributed Secrets Vault application with Redis and Kafka +name: "Distributed Secrets Vault + Redis + Kafka" services: redis: @@ -22,26 +19,6 @@ services: networks: - dsv-network - postgres: - image: postgres:18.2-alpine - container_name: dsv-postgres - ports: - - "5432:5432" - environment: - - LANG=C.UTF-8 - - POSTGRES_USER=${POSTGRES_USER:-dsv} - - POSTGRES_PASSWORD=${POSTGRES_PASSWORD:-POSTGRES_PASSWORD} - - POSTGRES_DB=${POSTGRES_DB:-dsv} - volumes: - - postgres-data:/var/lib/postgresql/data - healthcheck: - test: ["CMD-SHELL", "pg_isready -U \"$$POSTGRES_USER\" -d \"$$POSTGRES_DB\""] - interval: 5s - timeout: 3s - retries: 5 - networks: - - dsv-network - kafka: image: apache/kafka:3.7.0 container_name: dsv-kafka @@ -50,7 +27,6 @@ services: environment: - KAFKA_NODE_ID=1 - KAFKA_PROCESS_ROLES=broker,controller - # INTERNAL for containers (app); HOST for JVM on machine (metadata must match bootstrap) - KAFKA_LISTENERS=PLAINTEXT_HOST://0.0.0.0:9092,PLAINTEXT_INTERNAL://0.0.0.0:29092,CONTROLLER://0.0.0.0:9093 - KAFKA_ADVERTISED_LISTENERS=PLAINTEXT_HOST://localhost:9092,PLAINTEXT_INTERNAL://kafka:29092 - KAFKA_CONTROLLER_LISTENER_NAMES=CONTROLLER @@ -89,17 +65,11 @@ services: - SPRING_DATA_REDIS_PORT=6379 - SPRING_DATA_REDIS_PASSWORD=${REDIS_PASSWORD:-REDIS_PASSWORD} - SPRING_PROFILES_ACTIVE=${SPRING_PROFILES_ACTIVE:-dev} - - SPRING_DATASOURCE_URL=jdbc:postgresql://postgres:5432/${POSTGRES_DB:-dsv} - - SPRING_DATASOURCE_USERNAME=${POSTGRES_USER:-dsv} - - SPRING_DATASOURCE_PASSWORD=${POSTGRES_PASSWORD:-POSTGRES_PASSWORD} - KAFKA_BOOTSTRAP_SERVERS=kafka:29092 - # Single-node ScaleCube: seed resolves to this container (see ScaleCubeConfig) - SEED_DNS_HOST=localhost depends_on: redis: condition: service_healthy - postgres: - condition: service_healthy kafka: condition: service_healthy networks: @@ -107,7 +77,6 @@ services: volumes: redis-data: - postgres-data: kafka-data: networks: diff --git a/docker/postgresql/docker-compose.postgresql-production.yml b/docker/postgresql/docker-compose.postgresql-production.yml deleted file mode 100644 index 21a1b12..0000000 --- a/docker/postgresql/docker-compose.postgresql-production.yml +++ /dev/null @@ -1,100 +0,0 @@ -# Production PostgreSQL: one primary + two synchronous standbys for redundancy. -# Uses docker/postgresql/postgresql.conf (replication, archive, logging). -# Standbys use application_name postgres-1 and postgres-2 to match synchronous_standby_names. -# -# Environment variables are loaded from ../../.env (project root). -# Required: POSTGRES_PASSWORD, POSTGRES_REPLICATION_USER, POSTGRES_REPLICATION_PASSWORD. -name: "PostgreSQL (Production)" - -services: - postgres-primary: - image: postgres:18.2-alpine - container_name: dsv-postgres-primary - hostname: postgres-primary - ports: - - "5432:5432" - environment: - - POSTGRES_USER=${POSTGRES_USER:-dsv} - - POSTGRES_PASSWORD=${POSTGRES_PASSWORD} - - POSTGRES_DB=${POSTGRES_DB:-dsv} - - POSTGRES_REPLICATION_USER=${POSTGRES_REPLICATION_USER:-replicator} - - POSTGRES_REPLICATION_PASSWORD=${POSTGRES_REPLICATION_PASSWORD} - volumes: - - postgres-primary-data:/var/lib/postgresql/data - - postgres-archive:/var/lib/postgresql/archive - - ./postgresql.conf:/etc/postgresql/postgresql.conf:ro - - ./scripts/init-primary.sh:/docker-entrypoint-initdb.d/01-create-replication-user.sh:ro - command: ["postgres", "-c", "config_file=/etc/postgresql/postgresql.conf"] - healthcheck: - test: ["CMD-SHELL", "pg_isready -U \"$$POSTGRES_USER\" -d \"$$POSTGRES_DB\""] - interval: 5s - timeout: 3s - retries: 5 - networks: - - dsv-network - - postgres-1: - image: postgres:18.2-alpine - container_name: dsv-postgres-1 - hostname: postgres-1 - environment: - - PGDATA=/var/lib/postgresql/data - - POSTGRES_USER=${POSTGRES_USER:-dsv} - - POSTGRES_DB=${POSTGRES_DB:-dsv} - - POSTGRES_PRIMARY_HOST=postgres-primary - - POSTGRES_REPLICATION_USER=${POSTGRES_REPLICATION_USER:-replicator} - - POSTGRES_REPLICATION_PASSWORD=${POSTGRES_REPLICATION_PASSWORD} - - REPLICA_NAME=postgres-1 - volumes: - - postgres-1-data:/var/lib/postgresql/data - - ./scripts/replica-entrypoint.sh:/scripts/replica-entrypoint.sh:ro - command: ["bash", "/scripts/replica-entrypoint.sh"] - depends_on: - postgres-primary: - condition: service_healthy - healthcheck: - test: ["CMD-SHELL", "pg_isready -U \"$$POSTGRES_USER\" -d \"$$POSTGRES_DB\""] - interval: 10s - timeout: 5s - retries: 5 - start_period: 60s - networks: - - dsv-network - - postgres-2: - image: postgres:18.2-alpine - container_name: dsv-postgres-2 - hostname: postgres-2 - environment: - - PGDATA=/var/lib/postgresql/data - - POSTGRES_USER=${POSTGRES_USER:-dsv} - - POSTGRES_DB=${POSTGRES_DB:-dsv} - - POSTGRES_PRIMARY_HOST=postgres-primary - - POSTGRES_REPLICATION_USER=${POSTGRES_REPLICATION_USER:-replicator} - - POSTGRES_REPLICATION_PASSWORD=${POSTGRES_REPLICATION_PASSWORD} - - REPLICA_NAME=postgres-2 - volumes: - - postgres-2-data:/var/lib/postgresql/data - - ./scripts/replica-entrypoint.sh:/scripts/replica-entrypoint.sh:ro - command: ["bash", "/scripts/replica-entrypoint.sh"] - depends_on: - postgres-primary: - condition: service_healthy - healthcheck: - test: ["CMD-SHELL", "pg_isready -U \"$$POSTGRES_USER\" -d \"$$POSTGRES_DB\""] - interval: 10s - timeout: 5s - retries: 5 - start_period: 60s - networks: - - dsv-network - -volumes: - postgres-primary-data: - postgres-archive: - postgres-1-data: - postgres-2-data: - -networks: - dsv-network: - driver: bridge diff --git a/docker/postgresql/docker-compose.postgresql.yml b/docker/postgresql/docker-compose.postgresql.yml deleted file mode 100644 index ff61d5c..0000000 --- a/docker/postgresql/docker-compose.postgresql.yml +++ /dev/null @@ -1,23 +0,0 @@ -# Run PostgreSQL with the DSV configuration -# Development: single node. Production multi-node will use a separate compose and docker/postgresql/postgresql.conf. -name: "PostgreSQL" - -# Environment variables are loaded from ../../.env (project root) -# Copy .env.example to .env and configure as needed - -services: - postgres: - image: postgres:18.2-alpine - container_name: dsv-postgres - ports: - - "5432:5432" - environment: - - LANG=C.UTF-8 - - POSTGRES_USER=${POSTGRES_USER:-dsv} - - POSTGRES_PASSWORD=${POSTGRES_PASSWORD:-POSTGRES_PASSWORD} - - POSTGRES_DB=${POSTGRES_DB:-dsv} - volumes: - - postgres-data:/var/lib/postgresql/data - -volumes: - postgres-data: diff --git a/docker/postgresql/postgresql.conf b/docker/postgresql/postgresql.conf deleted file mode 100644 index 4950492..0000000 --- a/docker/postgresql/postgresql.conf +++ /dev/null @@ -1,137 +0,0 @@ -# ============================================================================ -# PostgreSQL Configuration for Distributed Secrets Vault -# ============================================================================ -# This PostgreSQL instance serves as one of the user management nodes. - - -# ============================================================================ -# NETWORK CONFIGURATION -# ============================================================================ - -# Listen on all network interfaces (required for Docker containers) -listen_addresses = '*' - -# Maximum number of concurrent client connections -max_connections = 200 - - -# ============================================================================ -# REPLICATION & HIGH AVAILABILITY -# ============================================================================ - -# WAL level must be 'replica' or higher for streaming replication -# This enables standby servers to receive and apply WAL changes -wal_level = replica - -# Maximum number of concurrent WAL sender processes -# This determines how many standby servers can connect simultaneously -max_wal_senders = 10 - -# Maximum number of replication slots to retain WAL files for standbys -# Prevents premature deletion of WAL files needed by disconnected standbys -max_replication_slots = 10 - -# Allows read-only queries on standby servers during recovery -hot_standby = on - -# Ensures transaction commits wait for WAL to be written to at least 1 standby -# Provides strong consistency guarantees at the cost of some latency -synchronous_commit = on - -# Requires acknowledgment from ANY 1 of the named standby servers before commit -# This provides automatic failover capability without data loss -synchronous_standby_names = 'ANY 1 (postgres-1, postgres-2)' - - -# ============================================================================ -# MEMORY CONFIGURATION -# ============================================================================ - -# Amount of memory used for caching data/indexes -# Recommended: 25% of total system RAM (adjusted for containerized deployment) -shared_buffers = 1GB - -# Estimate of memory available for disk caching by the OS -# Used by query planner to estimate costs; does not allocate memory -# Set to ~50-75% of total system RAM -effective_cache_size = 3GB - -# Memory used for internal sort operations and hash tables per query -# Increase if you frequently sort large datasets or perform complex joins -work_mem = 16MB - -# Memory used for maintenance operations (VACUUM, CREATE INDEX, ALTER TABLE) -maintenance_work_mem = 256MB - - -# ============================================================================ -# WRITE-AHEAD LOG (WAL) CONFIGURATION -# ============================================================================ - -# Enables continuous archiving and point-in-time recovery (PITR) -# WAL files are copied to archive location for backup purposes -archive_mode = on - -# Command to execute to archive a WAL file segment -# %p = path of file to archive, %f = filename only -# Replace with actual backup destination (S3, NFS, etc.) -archive_command = 'test ! -f /var/lib/postgresql/archive/%f && cp %p /var/lib/postgresql/archive/%f' - -# Target fraction (0.0-1.0) of checkpoint interval to complete checkpoint -# Higher values spread out checkpoint I/O load but may increase recovery time -checkpoint_completion_target = 0.9 - - -# ============================================================================ -# LOGGING CONFIGURATION -# ============================================================================ - -# Enables the logging collector background process -# Required for log rotation and centralized log management -logging_collector = on - -# Log all data modification statements (INSERT, UPDATE, DELETE, TRUNCATE, etc.) -# Critical for auditing user data changes in a secrets management system -log_statement = 'mod' - -# Log each successful connection attempt (important for security auditing) -log_connections = on - -# Log session termination (track connection lifecycle for monitoring) -log_disconnections = on - -# Log duration of each completed statement -# Useful for identifying performance issues -log_duration = on - -# Only log statements that take longer than this many milliseconds -# Set to 1000ms (1 second) to capture slow queries without excessive logging -log_min_duration_statement = 1000 - -# Include line numbers in log messages for easier debugging -log_line_prefix = '%t [%p]: [%l-1] user=%u,db=%d,app=%a,client=%h ' - - -# ============================================================================ -# SECURITY & AUTHENTICATION -# ============================================================================ - -# Requires SSL/TLS for client connections when properly configured -# Uncomment and configure certificates for production use -# ssl = on -# ssl_cert_file = '/path/to/server.crt' -# ssl_key_file = '/path/to/server.key' - - -# ============================================================================ -# PERFORMANCE TUNING -# ============================================================================ - -# Enables query execution plan statistics collection -# Helps the planner make better decisions over time -# Note: May add slight overhead but improves long-term performance -default_statistics_target = 100 - -# Sets the planner's estimate of the cost of a non-sequentially-fetched disk page -# Lower for SSDs (1.1-2.0), default (4.0) for HDDs -# random_page_cost = 1.1 \ No newline at end of file diff --git a/docker/postgresql/scripts/init-primary.sh b/docker/postgresql/scripts/init-primary.sh deleted file mode 100644 index f38ab32..0000000 --- a/docker/postgresql/scripts/init-primary.sh +++ /dev/null @@ -1,11 +0,0 @@ -#!/bin/bash -set -e -# Ensure archive directory exists and is writable by postgres (for postgresql.conf archive_command) -mkdir -p /var/lib/postgresql/archive -chown postgres:postgres /var/lib/postgresql/archive -# Create replication user (used by production standbys) -psql -v ON_ERROR_STOP=1 --username "$POSTGRES_USER" --dbname "$POSTGRES_DB" <<-EOSQL -CREATE USER ${POSTGRES_REPLICATION_USER} WITH REPLICATION PASSWORD '${POSTGRES_REPLICATION_PASSWORD}'; -EOSQL -# Allow replication connections from standbys -echo "host replication ${POSTGRES_REPLICATION_USER} 0.0.0.0/0 scram-sha-256" >> "$PGDATA/pg_hba.conf" diff --git a/docker/postgresql/scripts/replica-entrypoint.sh b/docker/postgresql/scripts/replica-entrypoint.sh deleted file mode 100644 index da23a49..0000000 --- a/docker/postgresql/scripts/replica-entrypoint.sh +++ /dev/null @@ -1,25 +0,0 @@ -#!/bin/bash -set -e -PGDATA="${PGDATA:-/var/lib/postgresql/data}" - -# If already a standby (restart), just start Postgres -if [ -f "$PGDATA/standby.signal" ]; then - exec /usr/local/bin/docker-entrypoint.sh postgres -fi - -# Wait for primary to be ready -until PGPASSWORD="$POSTGRES_REPLICATION_PASSWORD" pg_isready -h "$POSTGRES_PRIMARY_HOST" -U "$POSTGRES_REPLICATION_USER"; do - echo "Waiting for primary at $POSTGRES_PRIMARY_HOST..." - sleep 2 -done - -# Bootstrap standby from primary -echo "Running pg_basebackup from $POSTGRES_PRIMARY_HOST..." -PGPASSWORD="$POSTGRES_REPLICATION_PASSWORD" pg_basebackup -h "$POSTGRES_PRIMARY_HOST" -U "$POSTGRES_REPLICATION_USER" -D "$PGDATA" -Fp -Xs -P -R -w - -# Set application_name for synchronous_standby_names (must match postgresql.conf: postgres-1, postgres-2) -if [ -n "$REPLICA_NAME" ] && [ -f "$PGDATA/postgresql.auto.conf" ]; then - sed -i "s/'$/ application_name='$REPLICA_NAME'/'/" "$PGDATA/postgresql.auto.conf" -fi - -exec /usr/local/bin/docker-entrypoint.sh postgres diff --git a/docs/architecture.md b/docs/architecture.md index 31006ae..37a827a 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -25,10 +25,10 @@ graph LR --- -2. New user signs up +2. User identity -- User signs up through the gateway using OAuth2-compatible credentials. -- Credentials and account records are stored in PostgreSQL only. +- User identity and authentication are outside the current DSV backend runtime. +- The vault service stores and retrieves secret shards; it no longer depends on a relational database. --- diff --git a/docs/docker.md b/docs/docker.md index 4e32b71..c0ae5f6 100644 --- a/docs/docker.md +++ b/docs/docker.md @@ -1,202 +1,91 @@ # Docker Deployment Guide -This guide covers building and running the Distributed Secrets Vault using Docker. +This guide covers building and running Distributed Secrets Vault with Docker. -## Prerequisites - -- Docker and Docker Compose installed -- Java 25+ (for local builds) - -## Quick Start with Docker Compose (Recommended) - -The easiest way to run the application with Redis, PostgreSQL, and Kafka: +## Quick Start ```bash -# 1. Setup environment (from project root; optional for quick start) cp .env.example .env -# Edit .env: set REDIS_PASSWORD and POSTGRES_PASSWORD (dev compose defaults match .env.example if unset) - -# 2. Build and start all services -./mvnw clean package +./mvnw clean package -DskipTests mkdir -p target/dependency && (cd target/dependency; jar -xf ../*.jar) -docker compose -f docker/dsv/docker-compose.dsv-redis-postgresql.yml up --build +docker compose -f docker/dsv/docker-compose.dsv-redis-kafka.yml up --build ``` The application will be available at: -- **API:** `http://localhost:8080` -- **Redis:** `localhost:6379` -- **PostgreSQL:** `localhost:5432` -- **Kafka:** `localhost:9092` +- API: `http://localhost:8080` +- Redis: `localhost:6379` +- Kafka: `localhost:9092` -### Compose file layout +## Compose Files -Compose files live under `docker/`; there is no single `docker-compose.yml` at the root. Use `-f` to choose a file: +| File | Stack | +| --- | --- | +| `docker/dsv/docker-compose.dsv.yml` | App only | +| `docker/dsv/docker-compose.dsv-redis.yml` | App + Redis | +| `docker/dsv/docker-compose.dsv-redis-kafka.yml` | App + Redis + Kafka | +| `docker/dsv/docker-compose.dsv-redis-kafka-3nodes.yml` | Three app nodes + Redis + Kafka | +| `docker/redis/docker-compose.redis.yml` | Redis only | +| `docker/kafka/docker-compose.kafka.yml` | Kafka only | -| File | Stack | -| ----------------------------------------------------- | -------------------------------------------- | -| `dsv/docker-compose.dsv.yml` | App only | -| `dsv/docker-compose.dsv-redis.yml` | App + Redis | -| `dsv/docker-compose.dsv-postgresql.yml` | App + PostgreSQL | -| `dsv/docker-compose.dsv-redis-postgresql.yml` | App + Redis + PostgreSQL + Kafka (full dev stack) | -| `postgresql/docker-compose.postgresql.yml` | PostgreSQL only (single node, dev) | -| `postgresql/docker-compose.postgresql-production.yml` | PostgreSQL primary + 2 standbys (production) | -| `redis/docker-compose.redis.yml` | Redis only | +## Local Cluster Test -### Docker Compose commands - -All commands below assume you are in the **project root**. Use the same `-f` path for the stack you are running. +Use the three-node stack to test cluster-like behavior locally: ```bash -# Start full dev stack in foreground (see logs) -docker compose -f docker/dsv/docker-compose.dsv-redis-postgresql.yml up - -# Start in background -docker compose -f docker/dsv/docker-compose.dsv-redis-postgresql.yml up -d +./scripts/test-three-dsv-kafka-nodes.sh +``` -# View logs -docker compose -f docker/dsv/docker-compose.dsv-redis-postgresql.yml logs -f app -docker compose -f docker/dsv/docker-compose.dsv-redis-postgresql.yml logs -f redis -docker compose -f docker/dsv/docker-compose.dsv-redis-postgresql.yml logs -f postgres +Or start it manually: -# Stop services -docker compose -f docker/dsv/docker-compose.dsv-redis-postgresql.yml down +```bash +./mvnw clean package -DskipTests +mkdir -p target/dependency && (cd target/dependency; jar -xf ../*.jar) +docker compose -f docker/dsv/docker-compose.dsv-redis-kafka-3nodes.yml up -d --build +``` -# Stop and remove volumes (clean slate) -docker compose -f docker/dsv/docker-compose.dsv-redis-postgresql.yml down -v +The apps listen on: -# Rebuild after code changes -./mvnw clean package && mkdir -p target/dependency && (cd target/dependency; jar -xf ../*.jar) -docker compose -f docker/dsv/docker-compose.dsv-redis-postgresql.yml up --build -``` +- `http://127.0.0.1:8081` +- `http://127.0.0.1:8082` +- `http://127.0.0.1:8083` ## Configuration -### Environment variables - -Configuration is managed through the `.env` file in the project root: +Configuration is loaded from `.env` in the project root when you run Docker Compose from the project root: ```env -# Redis REDIS_PASSWORD=your-secure-password - -# PostgreSQL (user accounts) -POSTGRES_USER=dsv -POSTGRES_PASSWORD=your-postgres-password -POSTGRES_DB=dsv - -# Spring profile (dev, prod, test) SPRING_PROFILES_ACTIVE=dev ``` -For production PostgreSQL (primary + standbys), also set: - -```env -POSTGRES_REPLICATION_USER=replicator -POSTGRES_REPLICATION_PASSWORD=your-replication-password -``` - -**Security note:** Never commit `.env` to git. Use `.env.example` as a template. - -Docker Compose loads `.env` from the directory from which you run `docker compose`; when using the recommended commands (from project root), that is the project root. - -### Redis configuration - -Redis uses the stock `redis:8.6-alpine` image. Our `redis.conf` is mounted at `/usr/local/etc/redis/redis.conf` and passed explicitly (`redis-server /usr/local/etc/redis/redis.conf --requirepass ${REDIS_PASSWORD:-REDIS_PASSWORD}`) so Redis runs as a vanilla server with no bundled modules; the config file has no `include` and no `loadmodule`. Persistence and security are configured in `docker/redis/redis.conf`: - -- AOF persistence with `everysec` fsync -- RDB snapshots at 15 min, 5 min, and 1 min intervals (when keys change) -- No eviction policy (suitable for secrets storage) -- Password auth required - -### PostgreSQL configuration - -- **Development:** Single node via `docker/postgresql/docker-compose.postgresql.yml` or as part of the full dev stack. Image: `postgres:18.2-alpine`. No custom config file (defaults only). -- **Production:** Multi-node (primary + 2 standbys) via `docker/postgresql/docker-compose.postgresql-production.yml`. Uses `docker/postgresql/postgresql.conf` for replication, WAL archiving, and logging. See **Production PostgreSQL** below. - -## Production PostgreSQL (multi-node) - -For redundancy, run one primary and two synchronous standbys: - -```bash -# From project root. Ensure .env has POSTGRES_PASSWORD, POSTGRES_REPLICATION_PASSWORD (and optionally POSTGRES_REPLICATION_USER) -docker compose -f docker/postgresql/docker-compose.postgresql-production.yml up -d -``` - -- **postgres-primary:** Read-write; port 5432; uses `docker/postgresql/postgresql.conf`. -- **postgres-1, postgres-2:** Read-only standbys streaming from the primary. Application names match `synchronous_standby_names` in `postgresql.conf`. +Kafka and Redis connection settings are provided by the compose files for containerized runs. -Applications should connect to the primary (hostname `postgres-primary`) for read-write. See `docker/README.md` for script and network details. +## Redis -## Standalone Docker (without Compose) +Redis stores secret shards. The local config in `docker/redis/redis.conf` enables: -If you need to run the app container without Compose: +- AOF persistence with `appendfsync everysec` +- RDB snapshots +- no key eviction +- password authentication -### Build the image +## Standalone Image ```bash -./mvnw clean package +./mvnw clean package -DskipTests mkdir -p target/dependency && (cd target/dependency; jar -xf ../*.jar) docker build -t distributed-secrets-vault . -``` - -### Run the container - -```bash docker run -p 8080:8080 distributed-secrets-vault ``` -### With environment variables +For a standalone container connected to external services: ```bash -docker run -e "SPRING_PROFILES_ACTIVE=prod" \ +docker run \ + -e "SPRING_PROFILES_ACTIVE=prod" \ -e "SPRING_DATA_REDIS_HOST=redis.example.com" \ -e "SPRING_DATA_REDIS_PASSWORD=yourpassword" \ - -e "SPRING_DATASOURCE_URL=jdbc:postgresql://postgres.example.com:5432/dsv" \ - -e "SPRING_DATASOURCE_USERNAME=dsv" \ - -e "SPRING_DATASOURCE_PASSWORD=yourpostgrespassword" \ + -e "KAFKA_BOOTSTRAP_SERVERS=kafka.example.com:9092" \ -p 8080:8080 distributed-secrets-vault ``` - -## Development - -### Fast rebuild after code changes - -The layered Dockerfile keeps dependency layers cached; only the app layer rebuilds when code changes: - -```bash -./mvnw clean package -mkdir -p target/dependency && (cd target/dependency; jar -xf ../*.jar) -docker compose -f docker/dsv/docker-compose.dsv-redis-postgresql.yml up --build app -``` - -### Debugging - -Enable remote debugging: - -```bash -docker run -e "JAVA_TOOL_OPTIONS=-agentlib:jdwp=transport=dt_socket,address=5005,server=y,suspend=n" \ - -p 8080:8080 -p 5005:5005 distributed-secrets-vault -``` - -Connect your IDE debugger to `localhost:5005`. - -## Alternative: Spring Boot Buildpack - -Build without the project Dockerfile: - -```bash -./mvnw spring-boot:build-image -Dspring-boot.build-image.imageName=distributed-secrets-vault -docker run -p 8080:8080 distributed-secrets-vault -``` - -## Container management - -```bash -docker ps -docker stop -docker rm -docker rmi distributed-secrets-vault -``` - -For more detail (structure, services, production PostgreSQL scripts), see **docker/README.md**. diff --git a/docs/kubernetes.md b/docs/kubernetes.md index e201fd3..532ad4c 100644 --- a/docs/kubernetes.md +++ b/docs/kubernetes.md @@ -1,190 +1,82 @@ -# Kubernetes Architecture & Configuration +# Kubernetes Architecture and Configuration -This document provides an in-depth look at how the Distributed Secrets Vault (DSV) is deployed on Kubernetes (specifically optimized for K3s). The Kubernetes orchestration is designed to enforce strict hardware utilization limits, physical architecture constraints, and high-availability database replication. +The Kubernetes deployment runs DSV as a leaderless app cluster with per-node Redis sidecars and a Kafka broker. ---- - -## 1. High-Level Architecture - -The cluster is distinctly divided into **Control Plane Node(s)** and **Agent Nodes**. Workloads (DSV App and Postgres databases) are entirely segregated from the Control Plane to ensure networking and load-balancing performance is not impacted by heavy data processing. +## High-Level Architecture ```mermaid graph TD Client([Client]) - - subgraph ControlPlane [Control Plane Node] - Ingress[Traefik Ingress Gateway
api.dsv.local] - Service[dsv-app-service
ClusterIP] - Ingress --> |Routes traffic| Service - end - - Client --> |HTTP/HTTPS| Ingress - - subgraph AgentNodes [Agent Nodes Collection] - direction TB - - subgraph Node1 [Agent Node 1] - direction LR - App1[DSV Worker + Redis Sidecar
Pod] - DB0[(Postgres-0 Primary
Pod)] - end - - subgraph Node2 [Agent Node 2] - direction LR - App2[DSV Worker + Redis Sidecar
Pod] - DB1[(Postgres-1 Replica
Pod)] - end - - subgraph Node3 [Agent Node 3] - direction LR - App3[DSV Worker + Redis Sidecar
Pod] - DB2[(Postgres-2 Replica
Pod)] - end - - subgraph NodeN [Agent Node 4..12] - AppN[DSV Worker + Redis Sidecar
Pod] - end - - subgraph StandbyNodes [Standby Nodes > 12] - Idle[Idle / Pending Pods Wait Here] - end + Ingress[Ingress] + Service[dsv-app-service] + Kafka[(Kafka)] + + Client --> Ingress + Ingress --> Service + + subgraph AppPods[DSV StatefulSet] + App1[DSV App 0 + Redis Sidecar] + App2[DSV App 1 + Redis Sidecar] + App3[DSV App 2 + Redis Sidecar] + AppN[DSV App N + Redis Sidecar] end - Service -.-> |Load Balances| App1 - Service -.-> |Load Balances| App2 - Service -.-> |Load Balances| App3 - Service -.-> |Load Balances| AppN - - DB1 -.-> |Streaming Repl| DB0 - DB2 -.-> |Streaming Repl| DB0 - - classDef control fill:#dbe9f4,stroke:#4a76a8,stroke-width:2px,color:#333; - classDef agent fill:#e8f4e5,stroke:#5c8b52,stroke-width:2px,color:#333; - classDef pod fill:#fdfdfd,stroke:#888,stroke-width:1px,color:#333; - - class ControlPlane control; - class Node1,Node2,Node3,NodeN,StandbyNodes agent; - class App1,App2,App3,AppN,DB0,DB1,DB2 pod; -``` - ---- - -## 2. Workload Segregation + Service --> App1 + Service --> App2 + Service --> App3 + Service --> AppN -We enforce the distinction between Control Plane and Agent Nodes using Kubernetes **Node Affinity** rules. By explicitly denying placement on nodes labeled as `node-role.kubernetes.io/control-plane`, we guarantee the Control Plane handles only API ingresses and cluster state management. - -```yaml -# Present on all Workload Pods (DSV Worker & Postgres) -affinity: - nodeAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - nodeSelectorTerms: - - matchExpressions: - - key: node-role.kubernetes.io/control-plane - operator: DoesNotExist + App1 <--> Kafka + App2 <--> Kafka + App3 <--> Kafka + AppN <--> Kafka ``` ---- +## DSV App and Redis -## 3. Distributed Secrets Vault Worker (App + Redis) +The app runs as a StatefulSet. Each pod includes: -The primary secret sharing application is deployed as a `StatefulSet` with an upper bound of 12 replicas. +- a Spring Boot DSV container +- a Redis sidecar on `localhost:6379` +- a PVC mounted into Redis for durable shard storage -### The Sidecar Model -Because the DSV application expects a tightly coupled Redis instance for persistent secret recovery and fast in-memory queueing, Redis is deployed as a **Sidecar** to the DSV Spring Boot application. They share the same Pod, meaning they share the `localhost` network space. The DSV application can always communicate with its dedicated Redis cache at `localhost:6379`. +This keeps shard storage local to the DSV pod while still allowing Kubernetes to reschedule pods with their persistent volumes. -```mermaid -graph LR - subgraph Pod [DSV App Pod / StatefulSet Replica] - Boot[Spring Boot App
Port: 8080] - Redis[(Redis Engine
Port: 6379)] - Vol[[Persistent Volume]] - - Boot <--> |localhost:6379| Redis - Redis --- Vol - end -``` +## Cluster Discovery -### Resource Limits and Node Affinity -To enforce a strict **one worker per physical node** rule, we implement a `podAntiAffinity` constraint keyed to the `kubernetes.io/hostname`. +`dsv-app-headless` is a headless service that returns DNS records for app pods. ScaleCube uses: -This limits deployment logic to: -* **Max 12 Nodes:** The StatefulSet requests exactly 12 replicas. -* **Insufficient physical hardware:** If the cluster only has 5 agent nodes, 5 Pods are scheduled, and the remaining 7 request "Standby" mode in the `Pending` state. -* **Too much physical hardware:** If there are 15 agent nodes, 12 receive Pods. The other 3 remain empty "Standby" nodes ready to take over instantly if an active node fails. +- `SEED_DNS_HOST=dsv-app-headless.default.svc.cluster.local` +- `SEED_DNS_PORT=4801` +- `CLUSTER_PORT=4801` ---- +The app service `dsv-app-service` separately provides load-balanced HTTP traffic. -## 4. PostgreSQL Cluster Deployment +## Kafka -The PostgreSQL service utilizes a `StatefulSet` capped at exactly 3 replicas. Similar to the DSV app, it has a strict `podAntiAffinity` constraint to distribute the primary and two replicas across 3 independent physical agent nodes. +Kafka runs as a single-broker KRaft StatefulSet in the current manifests. DSV app pods connect through: -### Intelligent Primary/Replica Discovery -To translate the heavy Docker shell scripts into a unified Kubernetes deployment, the Postgres `StatefulSet` mounts a bash wrapper via a `ConfigMap`. Kubernetes organically names StatefulSet pods sequentially: `postgres-0`, `postgres-1`, `postgres-2`. - -The wrapper script automatically interprets the current Pod's hostname: - -```mermaid -sequenceDiagram - participant K8s as Kubernetes - participant Wrap as wrapper.sh - participant Primary as postgres-0 (Primary) - participant Replica as postgres-1/2 (Replica) - - K8s->>Wrap: Start Container - Wrap->>Wrap: Extract $HOSTNAME - alt is postgres-0 - Wrap->>Primary: Execute init-primary logic - Primary-->>Wrap: Postgres Engine Started (Read/Write) - else is postgres-1 or postgres-2 - Wrap->>Replica: Execute replica-entrypoint logic - Replica->>Primary: Wait for connection - Replica->>Primary: pg_basebackup - Replica-->>Wrap: Postgres Engine Started (Read-Only) - end +```text +KAFKA_BOOTSTRAP_SERVERS=kafka.default.svc.cluster.local:9092 ``` -Because of this wrapper, there is no need for separate `Primary` and `Replica` configuration files—Kubernetes self-organizes the database roles seamlessly. - ---- - -## 5. DNS and Cluster Discovery - -To facilitate internal communications without going through the external ingress gateway, the manifests rely on **Headless Services**. - -A standard Kubernetes Service (like `dsv-app-service`) provides a single IP that load-balances across all healthy pods. A Headless Service (`ClusterIP: None`) bypasses the proxy and returns the raw A-records for *every matching Pod endpoint*. - -For ScaleCube membership, startup is DNS-based in both production and testing: - -* `SEED_DNS_HOST=dsv-app-headless.default.svc.cluster.local` -* `SEED_DNS_PORT=4801` and `CLUSTER_PORT=4801` -* `dsv-app-headless` publishes port `4801` so each worker can resolve and join currently active worker nodes without hard-coded seed lists. -* ScaleCube retry/default behavior is configured in `src/main/resources/application.properties`: - * `scalecube.cluster.default-port=4801` - * `scalecube.dns.resolve.max-attempts=5` - * `scalecube.dns.resolve.retry-delay-ms=1000` +## Testing Environment -`ScaleCubeConfig` resolves `SEED_DNS_HOST` to all available pod IPs and feeds those addresses into ScaleCube membership seed discovery. +`k8s/testing` is intended for Docker Desktop, Minikube, or K3d. It runs three DSV app replicas without production node-affinity constraints. -* `dsv-app-headless`: Returns all DSV worker pod A-records used for ScaleCube peer discovery. -* `postgres-headless`: Allows the application layer to reliably locate `postgres-0.postgres-headless.default.svc.cluster.local` as the permanent primary database URL. - ---- - -## 6. Kafka Infrastructure (Future) - -To prepare for future 2-Phase Commit coordination via event queuing, the cluster includes a Kafka broker. - -* **Development/Testing:** Deployed as a single-node StatefulSet (`kafka-statefulset.yaml`) alongside a headless service (`kafka-service.yaml`) for broker discovery. Uses KRaft configuration to eliminate the Zookeeper dependency. -* **Production:** Currently a single-replica stateful set for foundational use, but intended to be scaled out if needed. -* **Network mapping:** Exposes port 9092 internally. DSV app pods are configured to reach this broker via the `KAFKA_BOOTSTRAP_SERVERS` environment variable pointing to `kafka.default.svc.cluster.local:9092`. - ---- +```bash +kubectl apply -f k8s/testing/ +kubectl get pods -w +``` -## 7. Testing Environments +## Production Environment -To facilitate local testing via Docker Desktop, Minikube, or K3d without needing a multi-node architecture, the `k8s/testing` directory contains versions of these YAML files with the `podAntiAffinity` and `nodeAffinity` constraints stripped out, and the replica counts reduced to `1`. +`k8s/production` keeps scheduling controls for a multi-node target: -Because of the intelligent Postgres wrapper, scaling `postgres` to `1` replica simply builds the `postgres-0` StatefulSet and seamlessly behaves as a standalone database! +- app pods avoid control-plane nodes +- app pods use pod anti-affinity to spread across worker nodes +- the app StatefulSet requests up to 12 replicas -ScaleCube discovery remains identical to production in testing: the worker still resolves `dsv-app-headless` DNS and joins peers over port `4801`; only replica counts and scheduling constraints differ. \ No newline at end of file +```bash +kubectl apply -f k8s/production/ +``` diff --git a/docs/technologies.md b/docs/technologies.md index fb5e35b..444c913 100644 --- a/docs/technologies.md +++ b/docs/technologies.md @@ -2,125 +2,52 @@ ## Java 25 -**Why we chose it:** Java is the language and framework the team chose for building a robust backend system. +Java is the backend language for the DSV service. -**Alternatives considered:** -- **Python:** Used for the client application, but not chosen for the backend due to performance considerations in a distributed, latency-sensitive system. +## Spring Boot 4 ---- +Spring Boot provides application wiring, configuration, validation, actuator health endpoints, and the REST API layer through Spring Web MVC. -## Spring Boot 4.0.2 +## Spring Data Redis -**Why we chose it:** Spring Boot provides a production-ready framework with auto-configuration, dependency injection, and a rich ecosystem of integrations (web, data, validation). It dramatically reduces boilerplate and lets us focus on business logic instead of infrastructure wiring. +Redis is the durable shard store. Spring Data Redis provides the Redis client integration used by the repository implementation. -**Alternatives considered:** -- **Core Java without Spring:** Would require building production-ready infrastructure (dependency injection, request handling, validation) from scratch, which is not practical for a capstone project. +## Redis 8 -### Spring Web MVC +Redis stores secret shards as key-value data. The Docker configuration enables AOF persistence, RDB snapshots, password authentication, and no eviction. -**Why we chose it:** Provides a straightforward annotation-driven REST API layer (`@RestController`, `@RequestMapping`) that integrates cleanly with the rest of the Spring ecosystem. +## Apache Kafka 3.7 -### Spring Data JPA +Kafka provides commit fanout and ordered messaging infrastructure for distributed mutation coordination. -**Why we chose it:** Gives us a repository abstraction over PostgreSQL, eliminating most SQL boilerplate while still allowing custom queries when needed. +## ScaleCube -### Spring Data Redis +ScaleCube handles cluster membership and peer discovery for DSV app nodes. -**Why we chose it:** Provides a Spring-idiomatic client for Redis, including `RedisTemplate` and repository support, so secret shards can be read and written with the same patterns used for relational data. +## Shamir's Secret Sharing -### Spring Validation +The `codahale/shamir` library implements the cryptographic split/reconstruct primitive used to divide a secret into `n` shards with a `k` shard reconstruction threshold. -**Why we chose it:** Declarative bean validation (`@Valid`, `@NotNull`, etc.) keeps input-validation logic out of service code and produces consistent error responses. +## Maven ---- +Maven builds the Spring Boot application and manages Java dependencies. -## Maven (with Maven Wrapper) +## Docker and Docker Compose -**Why we chose it:** Maven is the most widely used Java build tool and integrates natively with Spring Boot's parent POM. The `mvnw` wrapper ensures every developer and CI runner uses the same Maven version without a separate install step. +Docker makes the app, Redis, and Kafka reproducible across developer machines. Docker Compose defines the single-node and three-node local stacks. ---- +## Kubernetes -## PostgreSQL 18 - -**Why we chose it:** PostgreSQL is a proven, open-source relational database with strong ACID guarantees. We use it exclusively for user account and authentication-related data management. For production we run one primary and two synchronous standbys for redundancy. - -**Alternatives considered:** -- **SQLite:** Not suitable for a multi-node, concurrent server environment. - ---- - -## Redis 8 (with AOF Persistence) - -**Why we chose it:** Redis provides fast in-memory storage with configurable durability. We store secret shards in Redis because shard reads and writes must be extremely fast (they happen on every secret retrieval), and AOF persistence with `appendfsync everysec` gives us at most one second of data loss on failure—acceptable for this use case. - -**Configuration highlights:** -- AOF (Append-Only File) persistence enabled -- RDB snapshots every 15 minutes -- No key eviction (secrets are never auto-deleted) -- Password authentication required - -**Alternatives considered:** -- **Pure PostgreSQL for shards:** Would work, but is slower for the high-frequency shard reads/writes and adds unnecessary relational overhead for key-value data. - ---- - -## Shamir's Secret Sharing (`codahale/shamir` 0.7.0) - -**Why we chose it:** Shamir's Secret Sharing is the cryptographic foundation of the entire project. It allows a secret to be split into *n* shards such that any *k* of them can reconstruct the original, while fewer than *k* shards reveal nothing. The `codahale/shamir` library is a well-audited, minimal Java implementation of the algorithm. The Shamir algorithm is the focus of the project and no alternative was considered. - ---- - -## Docker & Docker Compose - -**Why we chose it:** Docker makes the entire stack (application, Redis, PostgreSQL) reproducible across developer machines and CI environments with a single command. Docker Compose lets us define multi-container topologies (full dev stack, production PostgreSQL cluster) as version-controlled YAML files. We plan to integrate Kubernetes in the future for production orchestration. - ---- - -## HAProxy - -**Why we chose it:** HAProxy acts as the gateway in front of the leaderless cluster, distributing incoming HTTP requests across all available nodes. It is battle-tested for high-throughput load balancing and supports health checks so failed nodes are automatically removed from rotation. - -**Alternatives considered:** -- **Nginx:** Also a capable reverse proxy, but HAProxy's load-balancing algorithms and health-check semantics are more fine-grained for TCP/HTTP balancing across a cluster. -- **Traefik:** Cloud-native and Kubernetes-aware, but adds complexity we don't need at this stage. - ---- - -## Keepalived - -**Why we chose it:** Keepalived provides a virtual IP (VIP) that floats between gateway instances using VRRP. If the active HAProxy node fails, Keepalived promotes a standby so the cluster remains reachable without a DNS change. - -**Alternatives considered:** -- **Relying on a single HAProxy instance:** Simpler but introduces a single point of failure at the gateway layer, which contradicts the distributed-availability goals of the project. - ---- +Kubernetes manifests under `k8s/` run DSV app pods with Redis sidecars and Kafka for local or production-style orchestration. ## Lombok -**Why we chose it:** Lombok generates repetitive Java boilerplate (getters, setters, constructors, `equals`/`hashCode`, builders) at compile time via annotations. This keeps model and DTO classes concise without sacrificing type safety. - -**Alternatives considered:** -- **Java Records:** Suitable for immutable data carriers, but lack the builder pattern and mutable-field support needed for JPA entities. -- **Writing boilerplate by hand:** Too verbose and error-prone for a team that wants to focus on distributed-systems logic. - ---- +Lombok reduces boilerplate in model and DTO classes. ## Apache Commons Pool2 -**Why we chose it:** Commons Pool2 provides the connection-pool implementation underlying Spring Data Redis's Lettuce driver. It ensures Redis connections are reused across requests rather than opened and closed on every operation, which is critical for low-latency shard access. - ---- - -## Apache Kafka 3.7.0 (Future Integration) - -**Why we chose it:** Kafka will provide a persistent, strictly ordered commit log to serve as a distributed queue. We will use Kafka topics to reliably order concurrent mutations (Create, Update, Delete) to the same secret key across multiple nodes, thus establishing a foundation for race-condition tie-breaking in our Two-Phase Commit (2PC) coordinate logic. This prevents using ad-hoc table locks. - -**Alternatives considered:** -- **Redis distributed locks:** Could solve concurrency races, but Kafka guarantees strict event ordering natively without risking deadlocks from crashed nodes holding locks. -- **RDBMS locking:** We want to minimize PostgreSQL serialization load. - ---- +Commons Pool2 backs Lettuce Redis connection pooling. -## Eclipse Temurin 25 (Docker base image) +## Eclipse Temurin 25 -**Why we chose it:** Eclipse Temurin is the Adoptium (formerly AdoptOpenJDK) distribution of OpenJDK. It is free, regularly patched, and widely recommended as the default JDK image for production Docker containers. +The Docker image uses Eclipse Temurin as the Java runtime base image. diff --git a/k8s/README.md b/k8s/README.md index 76235d6..f6b106f 100644 --- a/k8s/README.md +++ b/k8s/README.md @@ -1,90 +1,82 @@ # Kubernetes Configuration -This directory contains the Kubernetes (K8s) deployment manifests for the Distributed Secrets Vault. The configuration is tuned for K3s environments but uses standard K8s primitives. +This directory contains Kubernetes manifests for Distributed Secrets Vault. The app runs as a StatefulSet, each app pod has a Redis sidecar for shard storage, and Kafka runs as a StatefulSet for commit messaging. ## Structure -``` +```text k8s/ ├── production/ -│ ├── app-service.yaml # Headless and ClusterIP services for DSV App routing -│ ├── app-statefulset.yaml # StatefulSet for Agent nodes (DSV Spring Boot + Redis sidecar) -│ ├── ingress.yaml # Traefik Ingress configuration to expose the gateway -│ ├── postgres-configmap.yaml # Wrapper shell script for intelligent Postgres primary/replica discovery -│ └── postgres.yaml # StatefulSet for the PostgreSQL user database cluster +│ ├── app-service.yaml +│ ├── app-statefulset.yaml +│ ├── ingress.yaml +│ ├── kafka-service.yaml +│ └── kafka-statefulset.yaml ├── testing/ -│ ├── app-service.yaml # Same as production, for local testing -│ ├── app-statefulset.yaml # 1 Replica, No Node/Pod Anti-Affinity constraints -│ ├── ingress.yaml # Local ingress configuration -│ ├── postgres-configmap.yaml # Wrapper script config -│ └── postgres.yaml # 1 Replica, No Node/Pod Anti-Affinity constraints -└── README.md # This file (You are here) +│ ├── app-service.yaml +│ ├── app-statefulset.yaml +│ ├── ingress.yaml +│ ├── kafka-service.yaml +│ └── kafka-statefulset.yaml +└── README.md +``` + +## Architecture + +- `dsv-app` is a StatefulSet. +- Redis runs as a sidecar inside every `dsv-app` pod and persists data through a per-pod PVC. +- `dsv-app-headless` exposes pod DNS records for ScaleCube peer discovery. +- `dsv-app-service` load-balances HTTP traffic to healthy app pods. +- Kafka is available at `kafka.default.svc.cluster.local:9092`. + +The production manifests keep the one-app-pod-per-worker-node placement strategy through node affinity and pod anti-affinity. The testing manifests remove those scheduling constraints for Docker Desktop, Minikube, or K3d. + +## Local Testing + +Build the local image first: + +```bash +./mvnw clean package -DskipTests +mkdir -p target/dependency && (cd target/dependency; jar -xf ../*.jar) +docker build -t dsv-backend:latest . +``` + +Then deploy: + +```bash +kubectl apply -f k8s/testing/ +kubectl get pods -w ``` -## Architecture & Configuration - -The Kubernetes deployment mirrors the production requirements: - -1. **Control Plane & Agent Node Segregation (`production` only)** - - Pods are governed by `nodeAffinity` rules ensuring they do not get scheduled on `node-role.kubernetes.io/control-plane`. - - Control plane naturally serves as the Load Balancer/Gateway entry point via `ingress.yaml`. -2. **Strict Pod Placements (`production` only)** - - Hard limits of 1 DSV App (+ Redis sidecar) per physical hardware node are enforced via `podAntiAffinity` rules matching `kubernetes.io/hostname`. -3. **App Architecture** - - **DSV Worker & Redis:** Redis is deployed as a *sidecar container* inside the DSV `StatefulSet`. If the pod dies, it recovers data natively via Persistent Volumes (`volumeClaimTemplates`). - - **ScaleCube peer discovery:** ScaleCube bootstrap is DNS-based. Each DSV worker resolves `SEED_DNS_HOST` (the headless worker service) and joins peers on `SEED_DNS_PORT`/`CLUSTER_PORT` (`4801`). - - **PostgreSQL Database:** Handled via a `StatefulSet` with replicas. A smart wrapper script mounted from `postgres-configmap.yaml` automatically discovers if a pod is the primary (`postgres-0`) or a replica (e.g., `postgres-1`) based on the hostname provided by the `StatefulSet` and initializes replication logic accordingly. -4. **Dynamic Scaling (Standby Strategy)** - - We target 12 total agent nodes natively requesting `replicas: 12`. If fewer physical worker nodes exist (e.g., 5 nodes available), 5 pods run and 7 remain pending gracefully acting as a standby queue. - -## Usage - -### Production (Multi-Node Target) -To run the production deployment onto a properly labeled multi-node system (e.g. standard K3s installation). - -1. Review and apply Secrets/ConfigMaps to fulfill the Environment Variables if needed natively. -2. Apply the production configurations: - ```bash - kubectl apply -f k8s/production/ - ``` - -### Local Testing (Single-Node Dev) -A lightweight version in `testing/` strips away the Affinity constraints and lowers replica counts, making it perfect for Docker Desktop, Minikube, or K3d local development on a single machine. - -1. Apply the testing configurations: - ```bash - kubectl apply -f k8s/testing/ - ``` -2. Verify rollout: - ```bash - kubectl get pods -w - ``` -3. Expose the Ingress endpoint if your local orchestrator requires specific tunings or simply curl the proxy IP endpoint. - -## Environment Variables Mapping - -Most configurations mirror the `.env` settings expected globally: - -| Target Container | Variables Set via K8s Manifest | Description / Source Mapping | -| ------------------- | ---------------------------------------------------- | ------------------------------------------ | -| `dsv-app` | `NODE_NAME`, `POD_IP` | StatefulSet pod identity used by ScaleCube | -| `dsv-app` | `CLUSTER_PORT`, `SEED_DNS_PORT` | ScaleCube membership transport port | -| `dsv-app` | `SEED_DNS_HOST` | Headless service DNS used for peer lookup | -| `dsv-app` | `SPRING_DATA_REDIS_HOST="localhost"` | Redis operates as a sidecar container | -| `dsv-app` | `SPRING_DATASOURCE_URL` | Routes to the headless Postgres service | -| `postgres` | `POSTGRES_USER`, `POSTGRES_PASSWORD`, `POSTGRES_DB` | Environment values or `Secret` references | -| `postgres` | `POSTGRES_REPLICATION_USER`, `POSTGRES_PRIMARY_HOST` | Bound dynamically for the StatefulSet init | - -## ScaleCube Discovery Contract - -ScaleCube startup is now fully DNS-based in both `k8s/production` and `k8s/testing`: - -- `SEED_DNS_HOST` points to `dsv-app-headless.default.svc.cluster.local` -- `SEED_DNS_PORT` and `CLUSTER_PORT` are set to `4801` -- `dsv-app-headless` exposes port `4801` to provide discoverable endpoints for cluster membership -- Runtime defaults are sourced from `application.properties`: - - `scalecube.cluster.default-port=4801` - - `scalecube.dns.resolve.max-attempts=5` - - `scalecube.dns.resolve.retry-delay-ms=1000` - -*Note: In production deployments, it's highly recommended to replace hardcoded values (like `POSTGRES_PASSWORD="POSTGRES_PASSWORD"`) inside `k8s/production/postgres.yaml` with a K8s `Secret` before applying.* \ No newline at end of file +The testing app manifest uses `imagePullPolicy: Never`, so the image must exist in the local cluster's Docker image store. + +## Production + +```bash +kubectl apply -f k8s/production/ +``` + +Before production use, replace placeholder image and ingress details with the registry image and hostnames for the target cluster. + +## App Environment + +| Variable | Purpose | +| --- | --- | +| `NODE_NAME` | StatefulSet pod identity | +| `POD_IP` | Pod IP used for cluster membership | +| `CLUSTER_PORT` | ScaleCube transport port | +| `SEED_DNS_HOST` | Headless service DNS name for peer discovery | +| `SEED_DNS_PORT` | ScaleCube seed port | +| `SPRING_DATA_REDIS_HOST` | `localhost`, because Redis is a sidecar | +| `SPRING_DATA_REDIS_PORT` | Redis sidecar port | +| `KAFKA_BOOTSTRAP_SERVERS` | Kafka service DNS endpoint | + +## ScaleCube Discovery + +ScaleCube startup is DNS-based: + +- `SEED_DNS_HOST=dsv-app-headless.default.svc.cluster.local` +- `SEED_DNS_PORT=4801` +- `CLUSTER_PORT=4801` + +The headless service exposes port `4801` so each worker can resolve and join active peer pods. diff --git a/k8s/production/app-statefulset.yaml b/k8s/production/app-statefulset.yaml index 850c7ae..9dda4a9 100644 --- a/k8s/production/app-statefulset.yaml +++ b/k8s/production/app-statefulset.yaml @@ -61,21 +61,6 @@ spec: value: "6379" - name: SPRING_PROFILES_ACTIVE value: "prod" - # Postgres connection via the postgres primary service - - name: SPRING_DATASOURCE_URL - value: "jdbc:postgresql://postgres-0.postgres-headless.default.svc.cluster.local:5432/dsv" - - name: SPRING_DATASOURCE_USERNAME - valueFrom: - secretKeyRef: - name: dsv-secrets - key: POSTGRES_USER - optional: true - - name: SPRING_DATASOURCE_PASSWORD - valueFrom: - secretKeyRef: - name: dsv-secrets - key: POSTGRES_PASSWORD - optional: true - name: KAFKA_BOOTSTRAP_SERVERS value: "kafka.default.svc.cluster.local:9092" # Redis Sidecar container within the same Pod diff --git a/k8s/production/postgres-configmap.yaml b/k8s/production/postgres-configmap.yaml deleted file mode 100644 index 077e6d0..0000000 --- a/k8s/production/postgres-configmap.yaml +++ /dev/null @@ -1,27 +0,0 @@ -apiVersion: v1 -kind: ConfigMap -metadata: - name: postgres-scripts - namespace: default -data: - wrapper.sh: | - #!/bin/bash - set -e - - HOSTNAME=$(hostname) - - if [[ "$HOSTNAME" == "postgres-0" ]]; then - echo "Initializing Primary on $HOSTNAME" - # Injecting standard PostgreSQL config args - # Under the hood it will use the original scripts/init-primary.sh mounted via configmap if needed - # pseudo-code: Setup replication users - exec docker-entrypoint.sh postgres -c config_file=/etc/postgresql/postgresql.conf - else - echo "Initializing Replica on $HOSTNAME" - export REPLICA_NAME=$HOSTNAME - # pseudo-code: bash /scripts/replica-entrypoint.sh - # wait for primary (postgres-0) to become available - # pg_basebackup from postgres-0 - # start replica - exec /scripts/replica-entrypoint.sh - fi diff --git a/k8s/production/postgres.yaml b/k8s/production/postgres.yaml deleted file mode 100644 index e1d78a9..0000000 --- a/k8s/production/postgres.yaml +++ /dev/null @@ -1,81 +0,0 @@ -apiVersion: apps/v1 -kind: StatefulSet -metadata: - name: postgres - namespace: default -spec: - # Exactly 3 nodes for Postgres (1 primary, 2 replicas) - replicas: 3 - serviceName: postgres-headless - selector: - matchLabels: - app: postgres - template: - metadata: - labels: - app: postgres - spec: - # Spread Postgres across nodes and avoid control plane - affinity: - nodeAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - nodeSelectorTerms: - - matchExpressions: - - key: node-role.kubernetes.io/control-plane - operator: DoesNotExist - podAntiAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - - labelSelector: - matchExpressions: - - key: app - operator: In - values: - - postgres - topologyKey: "kubernetes.io/hostname" - containers: - - name: postgres - image: postgres:18.2-alpine - ports: - - containerPort: 5432 - env: - - name: POSTGRES_USER - value: "dsv" - - name: POSTGRES_PASSWORD - value: "POSTGRES_PASSWORD" - - name: POSTGRES_DB - value: "dsv" - - name: POSTGRES_REPLICATION_USER - value: "replicator" - - name: POSTGRES_REPLICATION_PASSWORD - value: "POSTGRES_REPLICATION_PASSWORD" - - name: POSTGRES_PRIMARY_HOST - value: "postgres-0.postgres-headless.default.svc.cluster.local" - command: ["bash", "/scripts/wrapper.sh"] - volumeMounts: - - name: postgres-data - mountPath: /var/lib/postgresql/data - - name: postgres-scripts - mountPath: /scripts - volumeClaimTemplates: - - metadata: - name: postgres-data - spec: - accessModes: [ "ReadWriteOnce" ] - resources: - requests: - storage: 5Gi - ---- -apiVersion: v1 -kind: Service -metadata: - name: postgres-headless - namespace: default -spec: - type: ClusterIP - clusterIP: None - selector: - app: postgres - ports: - - port: 5432 - targetPort: 5432 diff --git a/k8s/testing/app-service.yaml b/k8s/testing/app-service.yaml index 6c2c4d2..c44f587 100644 --- a/k8s/testing/app-service.yaml +++ b/k8s/testing/app-service.yaml @@ -26,5 +26,6 @@ spec: - name: cluster port: 4801 targetPort: 4801 - - port: 8080 + - name: http + port: 8080 targetPort: 8080 diff --git a/k8s/testing/app-statefulset.yaml b/k8s/testing/app-statefulset.yaml index 0b8d7a4..2d838be 100644 --- a/k8s/testing/app-statefulset.yaml +++ b/k8s/testing/app-statefulset.yaml @@ -18,6 +18,7 @@ spec: containers: - name: dsv-app image: dsv-backend:latest + imagePullPolicy: Never ports: - containerPort: 4801 name: cluster @@ -42,23 +43,10 @@ spec: value: "localhost" - name: SPRING_DATA_REDIS_PORT value: "6379" + - name: SPRING_DATA_REDIS_PASSWORD + value: "" - name: SPRING_PROFILES_ACTIVE - value: "prod" - # Postgres connection via the postgres primary service - - name: SPRING_DATASOURCE_URL - value: "jdbc:postgresql://postgres-0.postgres-headless.default.svc.cluster.local:5432/dsv" - - name: SPRING_DATASOURCE_USERNAME - valueFrom: - secretKeyRef: - name: dsv-secrets - key: POSTGRES_USER - optional: true - - name: SPRING_DATASOURCE_PASSWORD - valueFrom: - secretKeyRef: - name: dsv-secrets - key: POSTGRES_PASSWORD - optional: true + value: "dev" - name: KAFKA_BOOTSTRAP_SERVERS value: "kafka.default.svc.cluster.local:9092" # Redis Sidecar container within the same Pod diff --git a/k8s/testing/postgres-configmap.yaml b/k8s/testing/postgres-configmap.yaml deleted file mode 100644 index 077e6d0..0000000 --- a/k8s/testing/postgres-configmap.yaml +++ /dev/null @@ -1,27 +0,0 @@ -apiVersion: v1 -kind: ConfigMap -metadata: - name: postgres-scripts - namespace: default -data: - wrapper.sh: | - #!/bin/bash - set -e - - HOSTNAME=$(hostname) - - if [[ "$HOSTNAME" == "postgres-0" ]]; then - echo "Initializing Primary on $HOSTNAME" - # Injecting standard PostgreSQL config args - # Under the hood it will use the original scripts/init-primary.sh mounted via configmap if needed - # pseudo-code: Setup replication users - exec docker-entrypoint.sh postgres -c config_file=/etc/postgresql/postgresql.conf - else - echo "Initializing Replica on $HOSTNAME" - export REPLICA_NAME=$HOSTNAME - # pseudo-code: bash /scripts/replica-entrypoint.sh - # wait for primary (postgres-0) to become available - # pg_basebackup from postgres-0 - # start replica - exec /scripts/replica-entrypoint.sh - fi diff --git a/k8s/testing/postgres.yaml b/k8s/testing/postgres.yaml deleted file mode 100644 index 847f8a5..0000000 --- a/k8s/testing/postgres.yaml +++ /dev/null @@ -1,64 +0,0 @@ -apiVersion: apps/v1 -kind: StatefulSet -metadata: - name: postgres - namespace: default -spec: - # Exactly 3 nodes for Postgres (1 primary, 2 replicas) - replicas: 1 - serviceName: postgres-headless - selector: - matchLabels: - app: postgres - template: - metadata: - labels: - app: postgres - spec: - containers: - - name: postgres - image: postgres:18.2-alpine - ports: - - containerPort: 5432 - env: - - name: POSTGRES_USER - value: "dsv" - - name: POSTGRES_PASSWORD - value: "POSTGRES_PASSWORD" - - name: POSTGRES_DB - value: "dsv" - - name: POSTGRES_REPLICATION_USER - value: "replicator" - - name: POSTGRES_REPLICATION_PASSWORD - value: "POSTGRES_REPLICATION_PASSWORD" - - name: POSTGRES_PRIMARY_HOST - value: "postgres-0.postgres-headless.default.svc.cluster.local" - command: ["bash", "/scripts/wrapper.sh"] - volumeMounts: - - name: postgres-data - mountPath: /var/lib/postgresql/data - - name: postgres-scripts - mountPath: /scripts - volumeClaimTemplates: - - metadata: - name: postgres-data - spec: - accessModes: ["ReadWriteOnce"] - resources: - requests: - storage: 5Gi - ---- -apiVersion: v1 -kind: Service -metadata: - name: postgres-headless - namespace: default -spec: - type: ClusterIP - clusterIP: None - selector: - app: postgres - ports: - - port: 5432 - targetPort: 5432 diff --git a/pom.xml b/pom.xml index 62e51db..f6ec1e6 100644 --- a/pom.xml +++ b/pom.xml @@ -42,10 +42,6 @@ org.springframework.boot spring-boot-starter-actuator - - org.springframework.boot - spring-boot-starter-data-jpa - LeaderlessCluster + Gateway[Ingress Gateway
Traefik] --> LeaderlessCluster - subgraph LeaderlessCluster[Leaderless Cluster] + subgraph LeaderlessCluster[Kubernetes Cluster] direction LR - Node1[Cluster Node 1] - Node2[Cluster Node 2] - Node3[Cluster Node 3] + Node1[DSV Worker 1] + Node2[DSV Worker 2] + Node3[DSV Worker 3] Nodes[...] - NodeN[Cluster Node N] + NodeN[DSV Worker N] end Client -->|HTTP/S Requests| Gateway @@ -34,11 +34,11 @@ graph LR 3. A User puts a new secret in the storage -- Client sends the secret with the secret's key to any cluster node -- Gateway attaches request timestamp metadata before forwarding the write -- Receiving node runs two-phase commit for `user:key`: - - voting phase: nodes vote on write-lock ownership - - writing phase: lock owner writes shards and then releases lock +- Client sends the secret with the secret's key to the ingress gateway +- A DSV Worker receives the request and acts as the Coordinating Node +- Receiving node attaches request timestamp metadata and starts two-phase commit via Kafka: + - ordering phase: node publishes a create intent to the strictly ordered Kafka commit log + - writing phase: node distributes shards via ScaleCube and confirms persistence - Receiving node applies Shamir's Secret Sharing in memory, splitting the secret into n shards - Node distributes n-1 shards to other nodes and keeps 1 shard locally - k shards are required to reconstruct the secret (threshold scheme) @@ -48,23 +48,24 @@ graph LR ```mermaid sequenceDiagram participant User - participant Gateway - participant Node as Cluster Node - participant Cluster as Other Nodes - - User->>Gateway: POST /secret {key, value} - Gateway->>Gateway: Attach request timestamp metadata - Gateway->>Node: Forward request + timestamp metadata - Node->>Cluster: Voting phase: request write lock for user:key - Cluster-->>Node: Vote ACKs + participant Ingress as Traefik Ingress + participant Node as Coordinating Node + participant Kafka as Kafka Broker + participant Cluster as Peer Nodes + + User->>Ingress: POST /secret {key, value} + Ingress->>Node: Forward HTTP request + Node->>Node: Attach request timestamp metadata + Node->>Kafka: Publish create intent for user:key + Kafka-->>Node: Acknowledge strict ordering + Kafka-->>Cluster: Broadcast intent Node->>Node: Split secret into n shards (in memory)
Plaintext never written to disk Node->>Cluster: Writing phase: distribute n-1 shards (encrypted in transit) - Node->>Node: Store local shard to disk - Cluster->>Cluster: Store received shards to disk + Node->>Node: Store local shard to Redis + Cluster->>Cluster: Store received shards to Redis Cluster-->>Node: Write ACK - Node->>Cluster: Release write lock - Node-->>Gateway: Success + Version - Gateway-->>User: Secret stored (version) + Node-->>Ingress: Success + Version + Ingress-->>User: Secret stored (version) ``` --- @@ -81,28 +82,28 @@ sequenceDiagram ```mermaid sequenceDiagram participant User - participant Gateway - participant Node as Cluster Node - participant Cluster as Other Nodes - - User->>Gateway: GET /secret/{key}?version={v} - Gateway->>Node: Forward request - Node->>Node: Load local shard from disk - Node->>Cluster: Request k-1 additional shards + participant Ingress as Traefik Ingress + participant Node as Coordinating Node + participant Cluster as Peer Nodes + + User->>Ingress: GET /secret/{key}?version={v} + Ingress->>Node: Forward HTTP request + Node->>Node: Load local shard from Redis + Node->>Cluster: Request k-1 additional shards via ScaleCube Cluster-->>Node: Return shards (encrypted in transit) Node->>Node: Reconstruct plaintext in memory
using Shamir's algorithm (k of n shards) - Node-->>Gateway: Return secret value - Gateway-->>User: Secret value + Node-->>Ingress: Return secret value + Ingress-->>User: Secret value ``` --- 5. A user updates a stored secret (version control) -- Create and update both use the same two-phase commit flow with distributed write locks. -- Phase 1 (voting phase): nodes vote on lock ownership for `user:key`; writes are blocked on other nodes until lock is released. -- Phase 2 (writing phase): after lock quorum is reached, shards are distributed and persisted; lock is released after commit/rollback. -- The gateway attaches request timestamp metadata to incoming write requests. +- Create and update both use the same two-phase commit flow with Kafka ordering. +- Phase 1 (ordering phase): node publishes update intent to Kafka; concurrent writes are resolved by commit log order. +- Phase 2 (writing phase): shards are distributed and persisted to Redis. +- The DSV Worker attaches request timestamp metadata to incoming write requests. - Each successful write returns a new secret version - A user can request either a specific version of the secret or the latest - Update creates a new set of shards for the new version (independent from previous version shards) @@ -111,24 +112,24 @@ sequenceDiagram ```mermaid sequenceDiagram participant User - participant Gateway - participant Node1 as Cluster Node - participant Nodes as Other Nodes - - User->>Gateway: PUT /secret/{key} {new_value} - Gateway->>Gateway: Attach request timestamp metadata - Gateway->>Node1: Forward update request + timestamp metadata - Node1->>Nodes: Voting phase: request write lock for user:key - Nodes-->>Node1: Vote responses - Node1->>Node1: Lock quorum reached + participant Ingress as Traefik Ingress + participant Node1 as Coordinating Node + participant Kafka as Kafka Broker + participant Nodes as Peer Nodes + + User->>Ingress: PUT /secret/{key} {new_value} + Ingress->>Node1: Forward HTTP request + Node1->>Node1: Attach request timestamp metadata + Node1->>Kafka: Publish update intent for user:key + Kafka-->>Node1: Acknowledge strict ordering + Kafka-->>Nodes: Broadcast intent Node1->>Node1: Split new secret value into n shards (in memory)
Plaintext never written to disk Node1->>Nodes: Writing phase: distribute n-1 shards with new version - Node1->>Node1: Store local shard to disk - Nodes->>Nodes: Store received shards to disk + Node1->>Node1: Store local shard to Redis + Nodes->>Nodes: Store received shards to Redis Nodes-->>Node1: Write ACK - Node1->>Nodes: Release write lock - Node1-->>Gateway: Success + New Version - Gateway-->>User: Secret updated (version: N+1) + Node1-->>Ingress: Success + New Version + Ingress-->>User: Secret updated (version: N+1) ``` --- @@ -144,17 +145,20 @@ sequenceDiagram ```mermaid sequenceDiagram participant User - participant Gateway - participant Node as Cluster Node - participant Cluster as Other Nodes - - User->>Gateway: DELETE /secret {key} - Gateway->>Node: Forward delete request - Node->>Cluster: Broadcast delete for key shards + participant Ingress as Traefik Ingress + participant Node as Coordinating Node + participant Kafka as Kafka Broker + participant Cluster as Peer Nodes + + User->>Ingress: DELETE /secret {key} + Ingress->>Node: Forward HTTP request + Node->>Kafka: Publish delete intent for user:key (strict ordering) + Kafka-->>Node: Acknowledge ordering + Node->>Cluster: Broadcast delete for key shards via ScaleCube Cluster-->>Node: Delete ACKs Node->>Node: Verify ACK count >= m-k+1 - Node-->>Gateway: Success (non-reconstructable) - Gateway-->>User: Secret deleted + Node-->>Ingress: Success (non-reconstructable) + Ingress-->>User: Secret deleted ``` --- @@ -204,6 +208,6 @@ graph LR 9. Node failure recovery -- If failure occurs in the **voting phase**, no shard writes are committed and lock requests expire/rollback. -- If failure occurs in the **writing phase**, partially written shards are rolled back using the write transaction ID before lock release. -- Recovered nodes rejoin automatically and only accept writes after lock state is synchronized. +- If failure occurs in the **ordering phase**, no shard writes are committed and the request fails. +- If failure occurs in the **writing phase**, partially written shards are rolled back. +- Recovered nodes rejoin automatically via ScaleCube and synchronize state from Kafka and peers. diff --git a/docs/challenges.md b/docs/challenges.md index 140ae8d..2c15bf6 100644 --- a/docs/challenges.md +++ b/docs/challenges.md @@ -7,10 +7,10 @@ Reads collect at least `k` shards and reconstruct only in memory. If fewer than `k` shards are available, the read fails deterministically instead of returning partial or stale data. 3. **Create vs Update Under Concurrency** - Create requires non-existent key; update requires existing key. Both use the same lock-based two-phase write flow. This keeps write ordering consistent while preserving operation-specific preconditions. + Create requires non-existent key; update requires existing key. Both use the same Kafka-based two-phase write flow. This keeps write ordering consistent while preserving operation-specific preconditions. 4. **Versioning and Time Metadata** - The gateway attaches request timestamp metadata. Versions are committed in per-key lock order. This avoids relying on a global clock source while maintaining monotonic per-key history. + The DSV Worker attaches request timestamp metadata. Versions are committed in per-key Kafka order. This avoids relying on a global clock source while maintaining monotonic per-key history. 5. **History and Validity Intervals** Each version is independently stored and retrievable. `valid_from`/`valid_to` define active intervals. Intervals are updated during commits so historical reads can be served without ambiguity. @@ -31,8 +31,8 @@ `enc(NAME)` and `secret(NAME)` processing is all-or-nothing; failures roll back staged writes. Callers receive either a fully transformed file or a single error response. 11. **Failure Phases for Writes** - - **Voting phase failure**: lock quorum not reached; no commit. - - **Writing phase failure**: lock held but write quorum fails; partial writes roll back. + - **Ordering phase failure**: Kafka commit log write failed; no intent published. + - **Writing phase failure**: intent published but write quorum fails; partial writes roll back. Phase separation makes recovery behavior explicit and prevents ambiguous outcomes for in-flight writes. 12. **Recovery and Availability** diff --git a/docs/crud/create.md b/docs/crud/create.md index 30cf1ab..e6ada4d 100644 --- a/docs/crud/create.md +++ b/docs/crud/create.md @@ -13,10 +13,10 @@ A client can create a secret only if no secret with the same key already exists. **Error Cases** -- [3. Gateway unable to forward request to node](#3-gateway-unable-to-forward-request-to-node) +- [3. Ingress unable to forward request to node](#3-ingress-unable-to-forward-request-to-node) - [4. Key is already persisted on the receiving node](#4-key-is-already-persisted-on-the-receiving-node) - [5. Key is already persisted on another node](#5-key-is-already-persisted-on-another-node) -- [6. Gateway metadata missing or invalid](#6-gateway-metadata-missing-or-invalid) +- [6. Coordinating Node metadata missing or invalid](#6-coordinating-node-metadata-missing-or-invalid) - [7. M nodes do not send back confirmation for receiving secret](#7-m-nodes-do-not-send-back-confirmation-for-receiving-secret) - [8. M nodes do not send back confirmation for persisting secret](#8-m-nodes-do-not-send-back-confirmation-for-persisting-secret) - [9. Client does not receive response](#9-client-does-not-receive-response) @@ -26,39 +26,40 @@ A client can create a secret only if no secret with the same key already exists. ## 1. Create one secret -- A client submits a secret through the gateway, and the gateway forwards the request into the cluster where one node picks it up. -- The receiving node validates that the key does not already exist, then starts two-phase commit for `user:key`. -- In the **voting phase**, nodes vote on write-lock ownership for the key and block competing writes until lock release. -- In the **writing phase**, the lock owner splits the secret into n shards, sends shards to peers, and each node stages its shard in temporary in-memory state. -- The receiving node then submits a persistence request to all nodes and returns success after m persistence confirmations. +- A client submits a secret through the ingress gateway (Traefik), which routes it to a DSV Worker (acting as the Coordinating Node). +- The Coordinating Node validates that the key does not already exist locally, attaches timestamp metadata, and starts the two-phase commit process via Kafka. +- In the **ordering phase**, the node publishes a create intent to the Kafka commit log. All nodes consume this log to establish a globally agreed-upon order, resolving concurrent write races. +- In the **writing phase**, the Coordinating Node splits the secret into n shards using Shamir's algorithm, sends shards to peer nodes (via ScaleCube), and each node stages its shard in temporary in-memory state. +- The Coordinating Node then submits a persistence request to all nodes and returns success after m persistence confirmations. - **Response**: `201 Created` ```mermaid sequenceDiagram participant User - participant Gateway - participant Node as Cluster Node - participant Peers as Other Nodes - User->>Gateway: POST /secret {key,value} - Gateway->>Gateway: Attach request timestamp metadata - Gateway->>Node: Forward request + timestamp metadata + participant Ingress as Traefik Ingress + participant Node as Coordinating Node + participant Kafka as Kafka Broker + participant Peers as Peer Nodes + User->>Ingress: POST /secret {key,value} + Ingress->>Node: Forward HTTP request Node->>Node: Check whether key is already persisted locally - Node->>Peers: Voting phase: request write lock for user:key - Peers-->>Node: Vote confirmations + Node->>Node: Attach request timestamp metadata + Node->>Kafka: Publish create intent for user:key + Kafka-->>Node: Acknowledge strict ordering + Kafka-->>Peers: Broadcast create intent Node->>Node: Split secret into n shards using Shamir's algorithm Node->>Peers: Writing phase: send n-1 shards to other nodes - Peers->>Peers: Store shard temporarily and check key state + Peers->>Peers: Store shard temporarily and validate intent Node->>Node: Add local confirmation - Peers-->>Node: Return confirmation or error (key already exists / lock contention) + Peers-->>Node: Return confirmation or error Node->>Node: Wait for confirmations from m nodes Node->>Peers: Submit persistence request for shards Node->>Node: Persist local shard Peers->>Peers: Persist shards Peers-->>Node: Send persistence confirmation Node->>Node: Wait for persistence confirmations from m - 1 nodes - Node->>Peers: Release write lock - Node-->>Gateway: Return success confirmation - Gateway-->>User: "Secret Created" + Node-->>Ingress: Return success confirmation + Ingress-->>User: "Secret Created" ``` --- @@ -66,60 +67,62 @@ sequenceDiagram ## 2. Create two secrets - Two create requests with the same key may be processed concurrently by different nodes. -- Nodes and peers use persisted state, temporary state, and voting-phase lock ownership to resolve the conflict. -- The earlier request continues through quorum and persistence, while the later request is rejected. +- Instead of peer-to-peer voting, the nodes publish their create intents to Kafka. +- Kafka strictly orders the requests. The request that appears first in the commit log continues through quorum and persistence. +- The node handling the later request observes the conflict from the commit log and aborts. - The client receives success for the earlier request and an error for the later one. - **Response**: `201 Created` for the earlier request; `409 Conflict` for the later request ```mermaid sequenceDiagram participant User - participant Gateway - participant Node as Cluster Node - participant Peers as Other Nodes + participant Ingress as Traefik Ingress + participant Node1 as Node 1 (Coord) + participant Node2 as Node 2 (Coord) + participant Kafka as Kafka Broker + participant Peers as Peer Nodes + par Secret 1 - User->>Gateway: POST /secret {key,value} - Gateway->>Gateway: Attach request timestamp metadata - Gateway->>Node: Forward request + timestamp metadata - Node->>Node: Check whether key is already persisted locally - Node->>Peers: Voting phase: request write lock for user:key - Peers-->>Node: Lock vote success (arrived first) - Node->>Node: Split secret into n shards using Shamir's algorithm - Node->>Peers: Writing phase: send n-1 shards to other nodes - Peers->>Peers: Store shard temporarily and check key state.
Key is in temporary storage, and this request came first - Node->>Node: Check temporary key state.
Key is in temporary storage, and this request came first - Node->>Node: Add local confirmation - Peers-->>Node: Return confirmation because key is not persisted - Node->>Node: Wait for confirmations from m nodes + User->>Ingress: POST /secret {key, value A} + Ingress->>Node1: Forward HTTP request + Node1->>Node1: Attach timestamp + Node1->>Kafka: Publish intent (arrives 1st) and Secret 2 - User->>Gateway: POST /secret {same key as secret 1, value (may differ)} - Gateway->>Gateway: Attach request timestamp metadata - Gateway->>Node: Forward request + timestamp metadata - Node->>Node: Check whether key is already persisted locally - Node->>Peers: Voting phase: request write lock for user:key - Peers-->>Node: Lock vote failure (request arrived second) - Node-->>Gateway: Send error on failure response(s) or timeout - break after error is sent to user - Gateway-->>User: "Secret 2 failed to create" - end + User->>Ingress: POST /secret {key, value B} + Ingress->>Node2: Forward HTTP request + Node2->>Node2: Attach timestamp + Node2->>Kafka: Publish intent (arrives 2nd) end - Node->>Peers: Submit persistence request for shards - Node->>Node: Persist local shard + + Kafka-->>Node1: Broadcast intent 1 (Wins) + Kafka-->>Node2: Broadcast intent 1 (Notices conflict) + + Note over Node2: Node 2 aborts creation for Secret 2 + Node2-->>Ingress: Return 409 Conflict + Ingress-->>User: "Secret 2 failed to create" + + Note over Node1: Node 1 proceeds with Secret 1 + Node1->>Node1: Split secret into n shards + Node1->>Peers: Writing phase: send shards to other nodes + Peers->>Peers: Store shard temporarily + Peers-->>Node1: Return confirmation + Node1->>Node1: Wait for confirmations from m nodes + Node1->>Peers: Submit persistence request for shards + Node1->>Node1: Persist local shard Peers->>Peers: Persist shards - Peers-->>Node: Send persistence confirmation - Node->>Node: Wait for persistence confirmations from m nodes - Node->>Peers: Release write lock - Node-->>Gateway: Return success confirmation - Gateway-->>User: "Secret 1 Created" + Peers-->>Node1: Send persistence confirmation + Node1->>Node1: Wait for persistence confirmations + Node1-->>Ingress: Return success confirmation + Ingress-->>User: "Secret 1 Created" ``` --- -## 3. Gateway unable to forward request to node +## 3. Ingress unable to forward request to node -- The gateway attempts to forward a create request to a cluster node. -- If forwarding times out, the gateway retries with another node. -- After repeated timeouts, the gateway returns: "Could not forward request to node". +- The Traefik ingress attempts to forward a create request to a cluster node. +- If forwarding times out, the ingress retries with another node based on its load balancing configuration. +- After repeated timeouts, the ingress returns a failure. - **Response**: `503 Service Unavailable` --- @@ -142,10 +145,10 @@ sequenceDiagram --- -## 6. Gateway metadata missing or invalid +## 6. Coordinating Node metadata missing or invalid -- The node requires gateway-attached timestamp metadata before starting write coordination. -- If metadata is missing or invalid, creation cannot continue. +- The coordinating node requires a valid timestamp and metadata to publish the intent to Kafka. +- If metadata generation fails, creation cannot continue. - The client receives: "Secret creation error - invalid request metadata". - **Response**: `503 Service Unavailable` diff --git a/docs/crud/delete.md b/docs/crud/delete.md index 2657352..6f210bd 100644 --- a/docs/crud/delete.md +++ b/docs/crud/delete.md @@ -20,49 +20,40 @@ A client can delete a secret by sending a DELETE request specifying the secret k ## 1. Delete one secret -- The client sends a DELETE request to the gateway specifying the secret key. -- The gateway forwards the request to the cluster; the receiving node broadcasts the delete to all n nodes. +- The client sends a DELETE request to the ingress gateway (Traefik), which routes it to a DSV Worker (acting as the Coordinating Node). +- The Coordinating Node attaches timestamp metadata and publishes a delete intent to the Kafka commit log. +- Kafka establishes strict ordering for the delete operation relative to any concurrent updates. +- The Coordinating Node broadcasts the delete command to all peer nodes (via ScaleCube). - Each node checks its local storage for a shard matching the key, deletes it, and returns an acknowledgment. -- After m − k + 1 acknowledgments are received (or the timeout is reached with that threshold met), the deletion is confirmed and the client receives a 204 No Content response. +- After m − k + 1 acknowledgments are received, the deletion is confirmed (ensuring fewer than k shards remain). - **Response**: `204 No Content` ```mermaid sequenceDiagram - participant Client - participant Controller as SecretController - participant Service as DeleteSecretService - participant Node1 as Cluster Node 1 - participant Node2 as Cluster Node 2 - participant NodeN as Cluster Node N - - Client->>Controller: DELETE /secret/{key} - activate Controller - Controller->>Controller: Validate DeleteSecretRequest - Controller->>Service: invoke delete(key) - activate Service - Service->>Node1: Broadcast delete shard request - Service->>Node2: Broadcast delete shard request - Service->>NodeN: Broadcast delete shard request - - activate Node1 - Node1->>Node1: Find shard for key - Node1->>Service: Return success acknowledgment - deactivate Node1 - - activate Node2 - Node2->>Node2: Find shard for key - Node2->>Service: Return success acknowledgment - deactivate Node2 - - activate NodeN - NodeN->>NodeN: Find shard for key - NodeN->>Service: Return success acknowledgment - deactivate NodeN - - Service->>Service: Collect acknowledgments (threshold: m − k + 1) - deactivate Service - Controller-->>Client: 204 No Content - deactivate Controller + participant User + participant Ingress as Traefik Ingress + participant Node as Coordinating Node + participant Kafka as Kafka Broker + participant Peers as Peer Nodes + + User->>Ingress: DELETE /secret/{key} + Ingress->>Node: Forward HTTP request + Node->>Node: Attach request timestamp metadata + Node->>Kafka: Publish delete intent for user:key + Kafka-->>Node: Acknowledge strict ordering + Kafka-->>Peers: Broadcast delete intent + Node->>Peers: Broadcast delete shard request (via ScaleCube) + + par Node 1 (Local) + Node->>Node: Find shard for key & delete + and Peer Nodes + Peers->>Peers: Find shard for key & delete + Peers-->>Node: Return success acknowledgment + end + + Node->>Node: Collect acknowledgments (threshold: m − k + 1) + Node-->>Ingress: Return 204 No Content + Ingress-->>User: 204 No Content ``` --- diff --git a/docs/crud/retrieve.md b/docs/crud/retrieve.md index 50eb81c..4e6165e 100644 --- a/docs/crud/retrieve.md +++ b/docs/crud/retrieve.md @@ -24,7 +24,7 @@ In every case the receiving node collects at least k shards (the reconstruction - [5. Version Not Found](#5-version-not-found) - [6. Insufficient Shards](#6-insufficient-shards) - [7. Not Authorized to Access Secret](#7-not-authorized-to-access-secret) -- [8. Gateway Unavailable](#8-gateway-unavailable) +- [8. Ingress Unavailable](#8-ingress-unavailable) - [9. Node Unavailable](#9-node-unavailable) - [10. Local Shard Read Failure](#10-local-shard-read-failure) - [11. Version Enumeration Failure](#11-version-enumeration-failure) @@ -35,10 +35,10 @@ In every case the receiving node collects at least k shards (the reconstruction ## 1. Retrieve Latest Version - Client sends a GET request for a secret key without specifying a version -- Gateway forwards the request to any cluster node (leaderless routing) -- Receiving node consults replicated key metadata to resolve the current latest version number +- Traefik ingress forwards the request to any DSV Worker (Coordinating Node) +- Coordinating Node consults replicated key metadata to resolve the current latest version number - Node loads its own local shard for `user:key:latest-version` from durable storage -- Node requests k-1 additional shards from peer nodes +- Node requests k-1 additional shards from peer nodes via ScaleCube - Node reconstructs the plaintext secret in memory using Shamir's algorithm (k of n shards) - Secret value is returned to the client; plaintext is cleared from memory immediately - **Response**: `200 OK` @@ -46,19 +46,19 @@ In every case the receiving node collects at least k shards (the reconstruction ```mermaid sequenceDiagram participant User - participant Gateway - participant Node as Cluster Node - participant Peers as Other Nodes + participant Ingress as Traefik Ingress + participant Node as Coordinating Node + participant Peers as Peer Nodes - User->>Gateway: GET /secret/{key} - Gateway->>Node: Forward request + User->>Ingress: GET /secret/{key} + Ingress->>Node: Forward HTTP request Node->>Node: Resolve latest version from replicated metadata for {key} Node->>Node: Load local shard for user:key:version from storage Node->>Peers: Request k-1 shards for user:key:version Peers-->>Node: Return shards (encrypted in transit) Node->>Node: Reconstruct plaintext in memory
using Shamir's algorithm (k of n shards) - Node-->>Gateway: Return secret value + version - Gateway-->>User: Secret value + version + Node-->>Ingress: Return secret value + version + Ingress-->>User: Secret value + version ``` --- @@ -66,10 +66,10 @@ sequenceDiagram ## 2. Retrieve Specific Version - Client sends a GET request for a secret key with an explicit version number -- Gateway forwards the request to any cluster node -- Receiving node skips version resolution — it uses the requested version directly +- Traefik ingress forwards the request to any DSV Worker (Coordinating Node) +- Coordinating Node skips version resolution — it uses the requested version directly - Node loads its own local shard for `user:key:requested-version` from durable storage -- Node requests k-1 additional shards from peer nodes for the same version +- Node requests k-1 additional shards from peer nodes via ScaleCube for the same version - Node reconstructs the plaintext secret in memory using Shamir's algorithm (k of n shards) - Secret value for the requested version is returned to the client; plaintext is cleared from memory - **Response**: `200 OK` @@ -77,19 +77,19 @@ sequenceDiagram ```mermaid sequenceDiagram participant User - participant Gateway - participant Node as Cluster Node - participant Peers as Other Nodes + participant Ingress as Traefik Ingress + participant Node as Coordinating Node + participant Peers as Peer Nodes - User->>Gateway: GET /secret/{key}?version={v} - Gateway->>Node: Forward request + User->>Ingress: GET /secret/{key}?version={v} + Ingress->>Node: Forward HTTP request Node->>Node: Load local shard for user:key:v from storage Node->>Peers: Request k-1 shards for user:key:v Peers-->>Node: Return shards (encrypted in transit) Node->>Node: Reconstruct plaintext in memory
using Shamir's algorithm (k of n shards) Node->>Node: Clear plaintext from memory - Node-->>Gateway: Return secret value + version - Gateway-->>User: Secret value (version: v) + Node-->>Ingress: Return secret value + version + Ingress-->>User: Secret value (version: v) ``` --- @@ -97,9 +97,9 @@ sequenceDiagram ## 3. Retrieve All Versions - Client sends a GET request for a secret key requesting all versions -- Gateway forwards the request to any cluster node -- Receiving node queries its local storage to enumerate all known versions of `user:key` -- For each version, the node loads its local shard and requests k-1 shards from peers +- Traefik ingress forwards the request to any DSV Worker (Coordinating Node) +- Coordinating Node queries its local storage to enumerate all known versions of `user:key` +- For each version, the node loads its local shard and requests k-1 shards from peers via ScaleCube - Each version's plaintext is reconstructed independently in memory and added to the result map - Plaintext for each version is cleared from memory as soon as it is added to the map - Node returns the complete map of version → secret value to the client @@ -108,12 +108,12 @@ sequenceDiagram ```mermaid sequenceDiagram participant User - participant Gateway - participant Node as Cluster Node - participant Peers as Other Nodes + participant Ingress as Traefik Ingress + participant Node as Coordinating Node + participant Peers as Peer Nodes - User->>Gateway: GET /secret/{key}/versions - Gateway->>Node: Forward request + User->>Ingress: GET /secret/{key}/versions + Ingress->>Node: Forward HTTP request Node->>Node: Enumerate all stored versions for user:key loop For each version V Node->>Node: Load local shard for user:key:V from storage @@ -122,8 +122,8 @@ sequenceDiagram Node->>Node: Reconstruct plaintext in memory
using Shamir's algorithm (k of n shards) Node->>Node: Add V → secret value to result map
Clear plaintext from memory end - Node-->>Gateway: Return map {version → secret value} - Gateway-->>User: Map of all versions to secret values + Node-->>Ingress: Return map {version → secret value} + Ingress-->>User: Map of all versions to secret values ``` --- @@ -168,7 +168,7 @@ sequenceDiagram - **When it happens**: Authentication fails or authorization rules deny access. - **Handling**: - - Reject early at the gateway when possible; nodes still enforce authorization on every request. + - Reject early at the ingress when possible; nodes still enforce authorization on every request. - Return `401 Unauthorized` for invalid/expired credentials, `403 Forbidden` for valid but insufficient access. - Do not indicate whether the secret exists. - Audit log the denial with request metadata (no plaintext). @@ -176,14 +176,14 @@ sequenceDiagram --- -## 8. Gateway Unavailable +## 8. Ingress Unavailable -- **When it happens**: The gateway is unreachable or returns errors to the client. +- **When it happens**: The Traefik ingress is unreachable or returns errors to the client. - **Handling**: - Clients should retry with exponential backoff and jitter. - - Gateways should be stateless and horizontally scaled behind a load balancer. - - Use health checks and circuit breakers to avoid routing to unhealthy gateways. - - Return `503 Service Unavailable` when the gateway is overloaded. + - Ingress instances should be stateless and horizontally scaled behind a load balancer. + - Use health checks and circuit breakers to avoid routing to unhealthy ingress pods. + - Return `503 Service Unavailable` when the ingress is overloaded. - **Response**: `503 Service Unavailable` --- @@ -192,7 +192,7 @@ sequenceDiagram - **When it happens**: The target node is down or unreachable. - **Handling**: - - Gateway retries on another node; routing is leaderless. + - Ingress retries on another node; routing is leaderless. - Node-to-node shard requests use timeouts and fall back to other peers. - If a receiving node cannot reach enough peers to reach k shards, treat as insufficient shards. - Track node health and quarantine flapping nodes temporarily. diff --git a/docs/crud/update.md b/docs/crud/update.md index c50b539..386c257 100644 --- a/docs/crud/update.md +++ b/docs/crud/update.md @@ -13,8 +13,8 @@ A client can update a secret only if a secret with that key already exists. The **Error Cases** -- [3. Gateway unable to forward request to node](#3-gateway-unable-to-forward-request-to-node) -- [4. Gateway metadata missing or invalid](#4-gateway-metadata-missing-or-invalid) +- [3. Ingress unable to forward request to node](#3-ingress-unable-to-forward-request-to-node) +- [4. Coordinating Node metadata missing or invalid](#4-coordinating-node-metadata-missing-or-invalid) - [5. M nodes do not send back confirmation for receiving update](#5-m-nodes-do-not-send-back-confirmation-for-receiving-update) - [6. M nodes do not send back confirmation for persisting update](#6-m-nodes-do-not-send-back-confirmation-for-persisting-update) - [7. Client does not receive response](#7-client-does-not-receive-response) @@ -23,27 +23,29 @@ A client can update a secret only if a secret with that key already exists. The ## 1. Update one secret -- A client submits an updated secret through the gateway, and the gateway forwards the request into the cluster where one node picks it up. -- The receiving node validates that the key already exists, then starts two-phase commit for `user:key`. -- In the **voting phase**, nodes vote on write-lock ownership for the key and block competing writes until lock release. -- In the **writing phase**, the lock owner splits the updated secret into n shards and sends update shards to peers, where each node stages its shard in temporary in-memory state. -- The receiving node then submits a persistence request for the new version to all nodes and returns success after m persistence confirmations. +- A client submits an updated secret through the ingress gateway (Traefik), which routes it to a DSV Worker (acting as the Coordinating Node). +- The Coordinating Node validates that the key already exists locally, attaches timestamp metadata, and starts the two-phase commit process via Kafka. +- In the **ordering phase**, the node publishes an update intent to the Kafka commit log. All nodes consume this log to establish a globally agreed-upon order. +- In the **writing phase**, the Coordinating Node splits the updated secret into n shards using Shamir's algorithm, sends update shards to peer nodes (via ScaleCube), and each node stages its shard in temporary in-memory state. +- The Coordinating Node then submits a persistence request for the new version to all nodes and returns success after m persistence confirmations. - **Response**: `200 OK` ```mermaid sequenceDiagram participant User - participant Gateway - participant Node as Cluster Node - participant Peers as Other Nodes - User->>Gateway: PUT /secret {key,newValue} - Gateway->>Gateway: Attach request timestamp metadata - Gateway->>Node: Forward request + timestamp metadata - Node->>Peers: Voting phase: request write lock for user:key - Peers-->>Node: Vote confirmations + participant Ingress as Traefik Ingress + participant Node as Coordinating Node + participant Kafka as Kafka Broker + participant Peers as Peer Nodes + User->>Ingress: PUT /secret {key,newValue} + Ingress->>Node: Forward HTTP request + Node->>Node: Attach request timestamp metadata + Node->>Kafka: Publish update intent for user:key + Kafka-->>Node: Acknowledge strict ordering + Kafka-->>Peers: Broadcast update intent Node->>Node: Split updated secret into n shards using Shamir's algorithm Node->>Peers: Writing phase: send update shards with key and version - Peers->>Peers: Store shard temporarily and check key/version state + Peers->>Peers: Store shard temporarily and validate intent Node->>Node: Add local confirmation if key is persisted locally Peers-->>Node: Return confirmation if update is valid Node->>Node: Wait for confirmations from m nodes @@ -51,10 +53,9 @@ sequenceDiagram Node->>Node: Persist local versioned shard Peers->>Peers: Persist versioned shards Peers-->>Node: Send persistence confirmation - Node->>Node: Wait for persistence confirmations from m nodes - Node->>Peers: Release write lock - Node-->>Gateway: Return success confirmation - Gateway-->>User: "Secret Updated" + Node->>Node: Wait for persistence confirmations from m - 1 nodes + Node-->>Ingress: Return success confirmation + Ingress-->>User: "Secret Updated" ``` --- @@ -62,66 +63,70 @@ sequenceDiagram ## 2. Concurrent updates to the same secret - Two update requests for the same key may be processed concurrently by different nodes. -- Nodes and peers use persisted state, temporary state, and voting-phase lock ownership to resolve which update proceeds first. -- The earlier update continues through quorum and persistence, while the later conflicting update is rejected or retried with a newer version. +- Instead of peer-to-peer voting, the nodes publish their update intents to Kafka. +- Kafka strictly orders the requests. The request that appears first in the commit log continues through quorum and persistence as version V+1. +- The node handling the later request observes the conflict from the commit log and aborts or prompts a retry for version V+2. - The client receives success for the accepted update and an error for the rejected one. - **Response**: `200 OK` for the accepted update; `409 Conflict` for the rejected update ```mermaid sequenceDiagram participant User - participant Gateway - participant Node as Cluster Node - participant Peers as Other Nodes + participant Ingress as Traefik Ingress + participant Node1 as Node 1 (Coord) + participant Node2 as Node 2 (Coord) + participant Kafka as Kafka Broker + participant Peers as Peer Nodes + par Update 1 - User->>Gateway: PUT /secret {key,valueA} - Gateway->>Gateway: Attach request timestamp metadata - Gateway->>Node: Forward request + timestamp metadata - Node->>Peers: Voting phase: request write lock for user:key - Peers-->>Node: Lock vote success (arrived first) - Node->>Node: Split updated secret into n shards - Node->>Peers: Writing phase: send update shards for version V+1 - Peers->>Peers: Store shard temporarily and check key/version state.
This update arrived first - Node->>Node: Check persisted and temporary key/version state.
This update arrived first - Node->>Node: Add local confirmation - Peers-->>Node: Return confirmation for version V+1 - Node->>Node: Wait for confirmations from m nodes + User->>Ingress: PUT /secret {key,valueA} + Ingress->>Node1: Forward HTTP request + Node1->>Node1: Attach timestamp + Node1->>Kafka: Publish intent (arrives 1st) and Update 2 - User->>Gateway: PUT /secret {same key, valueB} - Gateway->>Gateway: Attach request timestamp metadata - Gateway->>Node: Forward request + timestamp metadata - Node->>Peers: Voting phase: request write lock for user:key - Peers-->>Node: Lock vote failure (arrived second) - Node-->>Gateway: Send error on failure response(s) or timeout - break after error is sent to user - Gateway-->>User: "Update 2 failed" - end + User->>Ingress: PUT /secret {same key, valueB} + Ingress->>Node2: Forward HTTP request + Node2->>Node2: Attach timestamp + Node2->>Kafka: Publish intent (arrives 2nd) end - Node->>Peers: Submit persistence request for version V+1 - Node->>Node: Persist local versioned shard - Peers->>Peers: Persist versioned shards - Peers-->>Node: Send persistence confirmation - Node->>Node: Wait for persistence confirmations from m nodes - Node->>Peers: Release write lock - Node-->>Gateway: Return success confirmation - Gateway-->>User: "Update 1 Accepted" + + Kafka-->>Node1: Broadcast intent 1 (Wins) + Kafka-->>Node2: Broadcast intent 1 (Notices conflict) + + Note over Node2: Node 2 aborts update for Secret 2 + Node2-->>Ingress: Return 409 Conflict + Ingress-->>User: "Update 2 failed" + + Note over Node1: Node 1 proceeds with Secret 1 + Node1->>Node1: Split updated secret into n shards + Node1->>Peers: Writing phase: send shards for version V+1 + Peers->>Peers: Store shard temporarily + Peers-->>Node1: Return confirmation + Node1->>Node1: Wait for confirmations from m nodes + Node1->>Peers: Submit persistence request for version V+1 + Node1->>Node1: Persist local shard + Peers->>Peers: Persist shards + Peers-->>Node1: Send persistence confirmation + Node1->>Node1: Wait for persistence confirmations + Node1-->>Ingress: Return success confirmation + Ingress-->>User: "Update 1 Accepted" ``` --- -## 3. Gateway unable to forward request to node +## 3. Ingress unable to forward request to node -- The gateway attempts to forward an update request into the cluster so a node can pick it up. -- If forwarding times out, the gateway retries with another node. -- After repeated timeouts, the gateway returns: "Could not forward request to node". +- The Traefik ingress attempts to forward an update request into the cluster so a node can pick it up. +- If forwarding times out, the ingress retries with another node based on load balancing configuration. +- After repeated timeouts, the ingress returns a failure. - **Response**: `503 Service Unavailable` --- -## 4. Gateway metadata missing or invalid +## 4. Coordinating Node metadata missing or invalid -- The node requires gateway-attached timestamp metadata before starting write coordination. -- If metadata is missing or invalid, update cannot continue. +- The coordinating node requires a valid timestamp and metadata to publish the intent to Kafka. +- If metadata generation fails, update cannot continue. - The client receives: "Secret update error - invalid request metadata". - **Response**: `503 Service Unavailable` From 03538733b828dfffff2ab189655b79bc8eff0b32 Mon Sep 17 00:00:00 2001 From: Ari Krakauer Date: Mon, 18 May 2026 20:50:54 -0400 Subject: [PATCH 5/7] Fixes --- docs/milestones.md | 4 ++-- docs/scope.md | 9 ++------- 2 files changed, 4 insertions(+), 9 deletions(-) diff --git a/docs/milestones.md b/docs/milestones.md index 832eed4..3b3bc44 100644 --- a/docs/milestones.md +++ b/docs/milestones.md @@ -16,7 +16,7 @@ You will be graded weekly against these milestones. | Week | Focus | Running Artifact | Scope / Design Artifact | | ------ | ------------------------------------------ | --------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------- | | **1** | Project Bootstrap & Scope Definition | • Secrets service starts and responds to basic API endpoints (`/get`, `/post`, etc)
• Single-node deployment reproducible | • Initial scope document (1–2 pages)
• Explicit assumptions and non-goals | -| **2** | Architecture & API Contract | • Create, update, and retrieve endpoints callable (stubbed allowed)
• Authentication enforced | • API contract frozen
• Initial architecture diagram | +| **2** | Architecture & API Contract | • Create, update, and retrieve endpoints callable (stubbed allowed) | • API contract frozen
• Initial architecture diagram | | **3** | Secret Existence & Duplicate Semantics | • Secret creation succeeds and shards distributed to nodes
• Each node stores only one shard (plaintext never on disk)
• Duplicate create requests fail | • Secret existence and duplicate handling rules | | **4** | Single-Node Update & Versioning | • Updates create new versions
• History preserved with validity intervals | • Validity guarantees | | **5** | Shamir Shard Storage Record Format | • Secret shards stored durably on each node
• Records include shard data, metadata (user:key:version)
| • Shard storage record definition | @@ -24,7 +24,7 @@ You will be graded weekly against these milestones. | **7** | Multi-Node Shard Distribution Path | • Shards distributed to n nodes (configurable)
• Each node stores one shard per secret version
• k-of-n threshold reconstruction tested | • Shard distribution model and failure assumptions | | **8** | Multi-Node Consistency | • Consistent behavior with all nodes healthy
• Reads reconstruct secrets by gathering k shards
• Plaintext only exists in memory during reconstruction | • Updated scope reflecting implemented behavior | | **9** | Failure Handling | • One vault node killed during operation
• System remains usable (up to n-k node failures)
• Secret reconstruction still possible with k available shards | • Failure behavior and degraded modes | -| **10** | Concurrency, History, and Isolation Safety | • Concurrent create/update tested
• Duplicate and update rules enforced correctly | • Concurrency and isolation guarantees | +| **10** | Concurrency and History Safety | • Concurrent create/update tested
• Duplicate and update rules enforced correctly | • Concurrency guarantees | | **11** | Multi-Secret `.env` Creation & Resolution | • `.env encrypt` creates secrets correctly
• `.env expand` resolves secrets correctly
• Duplicate secrets cause full failure | • `.env` transformation and failure semantics | | **12** | History Retrieval & Observability | • Secret history endpoint returns ordered versions with validity ranges
• Logs show create, update, retrieve, and failure | • History query semantics and observability notes | | **13** | Final Demo & Submission | • Live multi-node demo showing create/update, concurrency, failure, recovery, history access, and `.env` workflows | • Final scope document
• Clear statement of guarantees and limitations | diff --git a/docs/scope.md b/docs/scope.md index a00954c..ce86612 100644 --- a/docs/scope.md +++ b/docs/scope.md @@ -13,7 +13,6 @@ A long-running HTTP service that accepts requests for creating, updating, retrie It will: - Accept structured JSON input -- Authenticate requests and derive the caller’s identity - Accept secret **create** requests that establish a new secret - Reject create requests for secrets that already exist - Accept secret **update** requests that create a new version of an existing secret @@ -26,8 +25,6 @@ It will: - expand `secret(NAME)` references by retrieving authoritative secret values - process `enc(NAME)` references by creating new secrets and returning `secret(NAME)` - Fail the entire `.env` request if any referenced secret already exists or cannot be resolved -- Enforce unauthorized rejections without revealing information on internal state -- Enforce caller-scoped isolation on reads and writes - Expose basic health and status endpoints The API validates and forwards requests but does not determine secret existence or authoritative state. @@ -65,7 +62,6 @@ A shared behavioral model implemented consistently across all components. It defines: -- How callers are identified and scoped - What it means for a secret to exist and be retrievable - The distinction between secret creation and secret update - The shared lock/commit flow used by both creation and update @@ -74,9 +70,8 @@ It defines: - How historical secret values are retained - How secret deletion is defined and when a secret is considered non-reconstructable - What identifiers are used to reference secrets -- How isolation is enforced during retrieval and history access - How retries and concurrent requests are handled -- What duplicate and _not found_ errors mean under isolation +- What duplicate and _not found_ errors mean The model must be documented and observable in practice. @@ -107,7 +102,7 @@ It will: - Clearly distinguish secret creation from secret update - Define delete request and response behavior, including threshold-based deletion success criteria - Specify duplicate and _not found_ error behavior -- Describe durability, replication, and isolation guarantees +- Describe durability and replication guarantees - Describe secret-keeping and spreading behavior and failure behavior when referenced secrets cannot be resolved - Describe secret history retrieval semantics, including version ordering and validity timestamps - Describe `.env` encryption and expansion semantics, including secret creation and all-or-nothing failure From 349959ace195f26d3a51bc3eb4b6a687bcd3a126 Mon Sep 17 00:00:00 2001 From: Ari Krakauer Date: Mon, 18 May 2026 20:53:51 -0400 Subject: [PATCH 6/7] Removed old cases --- docs/challenges.md | 4 ++-- docs/crud/delete.md | 14 ++------------ docs/crud/retrieve.md | 36 +++++++++++------------------------- 3 files changed, 15 insertions(+), 39 deletions(-) diff --git a/docs/challenges.md b/docs/challenges.md index 2c15bf6..64df778 100644 --- a/docs/challenges.md +++ b/docs/challenges.md @@ -21,8 +21,8 @@ 7. **Retries and Idempotency** Safe retries return existing committed outcomes. Duplicate create returns `409`; duplicate identical update is idempotent. This lets clients retry on timeout without risking duplicate state transitions. -8. **Isolation by Caller** - Secrets are scoped by authenticated identity (`user:key:version`) and cross-tenant leakage is prevented. Authorization checks are enforced on every request path before shard access. +8. **Namespace Isolation** + Secrets are separated into logical namespaces (`user:key:version`) allowing different groups to reuse key names. Pre-condition checks are enforced on every request path before shard access. 9. **Deterministic Failure Semantics** Precondition failures are stable (`409` for duplicate create, `404` for missing update/retrieve/delete). Equivalent requests against equivalent cluster state produce the same status code. diff --git a/docs/crud/delete.md b/docs/crud/delete.md index 6f210bd..a231c91 100644 --- a/docs/crud/delete.md +++ b/docs/crud/delete.md @@ -13,8 +13,7 @@ A client can delete a secret by sending a DELETE request specifying the secret k **Error Cases** - [2. Secret not found](#2-secret-not-found) -- [3. Authentication failure](#3-authentication-failure) -- [4. Invalid request](#4-invalid-request) +- [3. Invalid request](#3-invalid-request) --- @@ -67,16 +66,7 @@ sequenceDiagram --- -## 3. Authentication failure - -- The client's credentials are missing, expired, or invalid. -- The request is rejected before reaching the cluster. -- The client receives: "Unauthorized". -- **Response**: `401 Unauthorized` - ---- - -## 4. Invalid request +## 3. Invalid request - The request is malformed, for example the key field is missing or empty. - The controller rejects the request during validation before forwarding it. diff --git a/docs/crud/retrieve.md b/docs/crud/retrieve.md index 4e6165e..005a388 100644 --- a/docs/crud/retrieve.md +++ b/docs/crud/retrieve.md @@ -23,12 +23,11 @@ In every case the receiving node collects at least k shards (the reconstruction - [4. Secret Not Found](#4-secret-not-found) - [5. Version Not Found](#5-version-not-found) - [6. Insufficient Shards](#6-insufficient-shards) -- [7. Not Authorized to Access Secret](#7-not-authorized-to-access-secret) -- [8. Ingress Unavailable](#8-ingress-unavailable) -- [9. Node Unavailable](#9-node-unavailable) -- [10. Local Shard Read Failure](#10-local-shard-read-failure) -- [11. Version Enumeration Failure](#11-version-enumeration-failure) -- [12. Shard Reconstruction Failure](#12-shard-reconstruction-failure) +- [7. Ingress Unavailable](#7-ingress-unavailable) +- [8. Node Unavailable](#8-node-unavailable) +- [9. Local Shard Read Failure](#9-local-shard-read-failure) +- [10. Version Enumeration Failure](#10-version-enumeration-failure) +- [11. Shard Reconstruction Failure](#11-shard-reconstruction-failure) --- @@ -130,11 +129,10 @@ sequenceDiagram ## 4. Secret Not Found -- **When it happens**: The key has no recorded versions for the authenticated user. +- **When it happens**: The key has no recorded versions for the provided user space. - **Handling**: - Receiving node checks local metadata first, then consults peers only if metadata is uncertain. - If no node can confirm any version for the key, return `404 Not Found` with a stable error code. - - Do not leak existence across tenants; errors should be scoped to the authenticated user. - Cache the negative lookup briefly to reduce repeated fan-out. - **Response**: `404 Not Found` @@ -164,19 +162,7 @@ sequenceDiagram --- -## 7. Not Authorized to Access Secret - -- **When it happens**: Authentication fails or authorization rules deny access. -- **Handling**: - - Reject early at the ingress when possible; nodes still enforce authorization on every request. - - Return `401 Unauthorized` for invalid/expired credentials, `403 Forbidden` for valid but insufficient access. - - Do not indicate whether the secret exists. - - Audit log the denial with request metadata (no plaintext). -- **Response**: `401 Unauthorized` or `403 Forbidden` - ---- - -## 8. Ingress Unavailable +## 7. Ingress Unavailable - **When it happens**: The Traefik ingress is unreachable or returns errors to the client. - **Handling**: @@ -188,7 +174,7 @@ sequenceDiagram --- -## 9. Node Unavailable +## 8. Node Unavailable - **When it happens**: The target node is down or unreachable. - **Handling**: @@ -200,7 +186,7 @@ sequenceDiagram --- -## 10. Local Shard Read Failure +## 9. Local Shard Read Failure - **When it happens**: Local storage returns an error or corrupted shard data. - **Handling**: @@ -212,7 +198,7 @@ sequenceDiagram --- -## 11. Version Enumeration Failure +## 10. Version Enumeration Failure - **When it happens**: The node cannot list versions due to metadata or storage errors. - **Handling**: @@ -224,7 +210,7 @@ sequenceDiagram --- -## 12. Shard Reconstruction Failure +## 11. Shard Reconstruction Failure - **When it happens**: Collected shards fail integrity checks or reconstruction cannot complete. - **Handling**: From efe525190a7b15326515e6dc1f8c038de2780d5a Mon Sep 17 00:00:00 2001 From: Ari Krakauer Date: Mon, 18 May 2026 21:32:07 -0400 Subject: [PATCH 7/7] Fixed decprecated json serializer and deserializer --- .../CommitPubSubIntegrationTest.java | 24 +++++++++++-------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/src/test/java/edu/yu/capstone/DistributedSecretsVault/service/communication/CommitPubSubIntegrationTest.java b/src/test/java/edu/yu/capstone/DistributedSecretsVault/service/communication/CommitPubSubIntegrationTest.java index 7deae9c..3af310d 100644 --- a/src/test/java/edu/yu/capstone/DistributedSecretsVault/service/communication/CommitPubSubIntegrationTest.java +++ b/src/test/java/edu/yu/capstone/DistributedSecretsVault/service/communication/CommitPubSubIntegrationTest.java @@ -16,8 +16,8 @@ import org.junit.jupiter.api.Test; import org.springframework.kafka.core.DefaultKafkaProducerFactory; import org.springframework.kafka.core.KafkaTemplate; -import org.springframework.kafka.support.serializer.JsonDeserializer; -import org.springframework.kafka.support.serializer.JsonSerializer; +import org.springframework.kafka.support.serializer.JacksonJsonDeserializer; +import org.springframework.kafka.support.serializer.JacksonJsonSerializer; import org.springframework.kafka.test.EmbeddedKafkaBroker; import org.springframework.kafka.test.EmbeddedKafkaKraftBroker; @@ -41,8 +41,10 @@ /** * Verifies that a commit broadcast is visible to multiple DSV-style consumers. *

- * In production each node uses its own {@code spring.kafka.consumer.group-id} (via - * {@code NODE_NAME}), so every node consumes the full topic — unlike a shared group where + * In production each node uses its own {@code spring.kafka.consumer.group-id} + * (via + * {@code NODE_NAME}), so every node consumes the full topic — unlike a shared + * group where * only one consumer would read each partition. */ public class CommitPubSubIntegrationTest { @@ -89,7 +91,8 @@ void broadcastCommit_reachesThreeIndependentNodes() throws Exception { receiveCommitOnNode(bootstrap, groupId, message.getOperationId(), allNodesSubscribed))); } - // Block until each node consumer has joined the group and been assigned partitions + // Block until each node consumer has joined the group and been assigned + // partitions allNodesSubscribed.await(ASSIGN_TIMEOUT.toSeconds(), TimeUnit.SECONDS); publisher.broadcastCommit(message); @@ -116,7 +119,8 @@ private static Callable receiveCommitOnNode( UUID expectedOperationId, CyclicBarrier allNodesSubscribed) { return () -> { - try (KafkaConsumer consumer = new KafkaConsumer<>(consumerProps(bootstrap, groupId))) { + try (KafkaConsumer consumer = new KafkaConsumer<>( + consumerProps(bootstrap, groupId))) { consumer.subscribe(List.of(KafkaConfig.COMMIT_TOPIC)); long assignDeadline = System.nanoTime() + ASSIGN_TIMEOUT.toNanos(); while (consumer.assignment().isEmpty()) { @@ -146,9 +150,9 @@ private static Map consumerProps(String bootstrap, String groupI props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrap); props.put(ConsumerConfig.GROUP_ID_CONFIG, groupId); props.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class); - props.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, JsonDeserializer.class); - props.put(JsonDeserializer.TRUSTED_PACKAGES, "*"); - props.put(JsonDeserializer.VALUE_DEFAULT_TYPE, CommitMessage.class.getName()); + props.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, JacksonJsonDeserializer.class); + props.put(JacksonJsonDeserializer.TRUSTED_PACKAGES, "*"); + props.put(JacksonJsonDeserializer.VALUE_DEFAULT_TYPE, CommitMessage.class.getName()); props.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest"); props.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, false); return props; @@ -158,7 +162,7 @@ private static DefaultKafkaProducerFactory producerFactory(Strin Map props = new HashMap<>(); props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrap); props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class); - props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, JsonSerializer.class); + props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, JacksonJsonSerializer.class); props.put(ProducerConfig.ACKS_CONFIG, "all"); return new DefaultKafkaProducerFactory<>(props); }