diff --git a/README.md b/README.md
index 6c94b5e..9ab1cd1 100644
--- a/README.md
+++ b/README.md
@@ -12,6 +12,7 @@ The Distributed Secrets Vault is a leaderless cluster system where secrets are s
- [Distributed System Challenges](docs/challenges.md)
- [Workflow Diagrams](docs/crud)
- [Software Architecture Diagrams](docs/architecture.md)
+- [Peer-Only Recovery](docs/peer-recovery.md)
- [Tools & Technologies](docs/technologies.md)
## Installation and Usage Guide
diff --git a/docs/architecture.md b/docs/architecture.md
index 68a5a3b..0b1dffe 100644
--- a/docs/architecture.md
+++ b/docs/architecture.md
@@ -210,4 +210,6 @@ graph LR
- If failure occurs in the **ordering phase**, no shard writes are committed and the request fails.
- If failure occurs in the **writing phase**, partially written shards are rolled back.
-- Recovered nodes rejoin automatically via ScaleCube and synchronize state from Kafka and peers.
+- Recovered nodes rejoin automatically via ScaleCube, query peers for current shard state, and pull any missing shards locally.
+- On the first cluster startup, there are no peers yet, so recovery exits cleanly as a no-op.
+- After catch-up, the node can be queried for shards and can participate in Shamir reconstruction immediately.
diff --git a/docs/challenges.md b/docs/challenges.md
index 64df778..344ab28 100644
--- a/docs/challenges.md
+++ b/docs/challenges.md
@@ -36,4 +36,4 @@
Phase separation makes recovery behavior explicit and prevents ambiguous outcomes for in-flight writes.
12. **Recovery and Availability**
- Nodes recover from durable storage, and rejoin automatically when healthy. Quorum rules determine whether reads/writes continue or fail fast during degraded periods.
+ Nodes recover from their peers by synchronizing the current shard inventory and fetching missing shards locally. On the first cluster startup, recovery no-ops because there are no peers yet. Quorum rules determine whether reads/writes continue or fail fast during degraded periods.
diff --git a/docs/peer-recovery.md b/docs/peer-recovery.md
new file mode 100644
index 0000000..cdcc363
--- /dev/null
+++ b/docs/peer-recovery.md
@@ -0,0 +1,64 @@
+# Peer Recovery
+
+When a node joins a running cluster, it catches up from its peers by:
+
+1. Discovering peer nodes via ScaleCube.
+2. Asking peers for the secret keys and versions they currently store.
+3. Comparing that cluster inventory with its own local Redis data.
+4. Requesting any missing shards from peers.
+5. Storing recovered shards locally so it can participate in Shamir reconstruction.
+
+---
+
+## First cluster startup
+
+When the very first node starts a brand-new cluster, there are no peers yet. In that case:
+
+- peer discovery returns empty
+- recovery exits cleanly
+- nothing is fetched or stored
+- the node continues normal startup
+
+So first-time boot is safe and effectively a no-op.
+
+---
+
+## New node joining an existing cluster
+
+When a new node joins a cluster that already has data:
+
+- the node discovers existing peers
+- peers export their current state
+- the joining node downloads missing shards
+- the joining node stores those shards in Redis
+- the node becomes queryable for shard reads and Shamir reconstruction
+
+That means newly added nodes are automatically brought up to date.
+
+---
+
+## Internal endpoints
+
+Each node exposes these internal peer-only endpoints:
+
+- `GET /internal/recovery/state`
+- `GET /internal/recovery/shard/{user}/{key}/{version}`
+- `GET /internal/recovery/health`
+
+These are used only by other cluster nodes.
+
+---
+
+## Recovery config usage
+
+Recovery is always on. There is no on/off switch for peer recovery.
+
+The recovery config only tunes how startup behaves:
+
+- `delay-seconds` — how long to wait after startup before checking peers
+- `peer-connectivity-timeout-seconds` — how long to wait for peers to appear
+- `min-required-peers` — how many peers must be visible before catch-up starts
+
+For a brand-new cluster, these settings do not cause problems because the node simply finds no peers and exits recovery cleanly. When a new node joins an existing cluster, the same settings let it wait briefly, discover peers, and pull the missing shards it needs.
+
+---
diff --git a/src/main/java/edu/yu/capstone/DistributedSecretsVault/config/RecoveryConfig.java b/src/main/java/edu/yu/capstone/DistributedSecretsVault/config/RecoveryConfig.java
new file mode 100644
index 0000000..6bc7925
--- /dev/null
+++ b/src/main/java/edu/yu/capstone/DistributedSecretsVault/config/RecoveryConfig.java
@@ -0,0 +1,36 @@
+package edu.yu.capstone.DistributedSecretsVault.config;
+
+import org.springframework.boot.context.properties.ConfigurationProperties;
+import org.springframework.validation.annotation.Validated;
+import lombok.Data;
+
+/**
+ * Configuration for peer-only cluster recovery.
+ *
+ * Recovery is always on. These settings only tune startup timing and logging.
+ */
+@Validated
+@ConfigurationProperties(prefix = "cluster.peer-recovery")
+@Data
+public class RecoveryConfig {
+
+ /**
+ * Delay in seconds before starting recovery after node startup.
+ * Gives Redis and ScaleCube time to initialize.
+ * Default: 2 seconds
+ */
+ private int delaySeconds = 2;
+
+ /**
+ * Maximum time in seconds to wait for peer connectivity during recovery.
+ * Default: 15 seconds
+ */
+ private int peerConnectivityTimeoutSeconds = 15;
+
+ /**
+ * Minimum number of peers required for recovery to proceed.
+ * Default: 1 (at least one other node in the cluster)
+ */
+ private int minRequiredPeers = 1;
+
+}
diff --git a/src/main/java/edu/yu/capstone/DistributedSecretsVault/controller/PeerRecoveryController.java b/src/main/java/edu/yu/capstone/DistributedSecretsVault/controller/PeerRecoveryController.java
new file mode 100644
index 0000000..4439c6f
--- /dev/null
+++ b/src/main/java/edu/yu/capstone/DistributedSecretsVault/controller/PeerRecoveryController.java
@@ -0,0 +1,97 @@
+package edu.yu.capstone.DistributedSecretsVault.controller;
+
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Optional;
+import java.util.Set;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.springframework.data.redis.core.StringRedisTemplate;
+import org.springframework.http.ResponseEntity;
+import org.springframework.web.bind.annotation.GetMapping;
+import org.springframework.web.bind.annotation.PathVariable;
+import org.springframework.web.bind.annotation.RequestMapping;
+import org.springframework.web.bind.annotation.RestController;
+
+import edu.yu.capstone.DistributedSecretsVault.domain.model.SecretKey;
+import edu.yu.capstone.DistributedSecretsVault.domain.model.SecretPart;
+import edu.yu.capstone.DistributedSecretsVault.dto.recovery.NodeStateResponse;
+import edu.yu.capstone.DistributedSecretsVault.dto.recovery.RecoveryHealthResponse;
+import edu.yu.capstone.DistributedSecretsVault.dto.recovery.StateSummary;
+import edu.yu.capstone.DistributedSecretsVault.repository.SecretPartRepository;
+
+/**
+ * Internal peer-recovery RPC endpoints.
+ * These endpoints are always available to other nodes in the cluster.
+ */
+@RestController
+@RequestMapping("/internal/recovery")
+public class PeerRecoveryController {
+ private static final Logger log = LoggerFactory.getLogger(PeerRecoveryController.class);
+
+ private final SecretPartRepository secretPartRepository;
+ private final StringRedisTemplate redisTemplate;
+
+ public PeerRecoveryController(SecretPartRepository secretPartRepository, StringRedisTemplate redisTemplate) {
+ this.secretPartRepository = secretPartRepository;
+ this.redisTemplate = redisTemplate;
+ }
+
+ @GetMapping("/state")
+ public ResponseEntity exportNodeState() {
+ try {
+ Set state = scanLocalState();
+ NodeStateResponse response = new NodeStateResponse();
+ response.setNodeState(new ArrayList<>(state));
+ return ResponseEntity.ok(response);
+ } catch (Exception ex) {
+ log.error("Error exporting peer recovery state", ex);
+ return ResponseEntity.status(500).build();
+ }
+ }
+
+ @GetMapping("/shard/{user}/{key}/{version}")
+ public ResponseEntity getShard(
+ @PathVariable String user,
+ @PathVariable String key,
+ @PathVariable long version) {
+ try {
+ Optional part = secretPartRepository.findPart(new SecretKey(user, key), version);
+ return part.map(ResponseEntity::ok).orElseGet(() -> ResponseEntity.notFound().build());
+ } catch (Exception ex) {
+ log.error("Error retrieving recovery shard for {}:{}:{}", user, key, version, ex);
+ return ResponseEntity.status(500).build();
+ }
+ }
+
+ @GetMapping("/health")
+ public ResponseEntity recoveryHealth() {
+ return ResponseEntity.ok(new RecoveryHealthResponse(true, "Ready to serve peer recovery requests"));
+ }
+
+ private Set scanLocalState() {
+ Set state = new HashSet<>();
+ Set redisKeys = redisTemplate.keys("*:*");
+ if (redisKeys == null || redisKeys.isEmpty()) {
+ return state;
+ }
+
+ for (String redisKey : redisKeys) {
+ if (redisKey == null) {
+ continue;
+ }
+ String[] parts = redisKey.split(":", 2);
+ if (parts.length != 2) {
+ continue;
+ }
+
+ List versions = secretPartRepository.listVersions(new SecretKey(parts[0], parts[1]));
+ for (Long version : versions) {
+ state.add(new StateSummary(parts[0], parts[1], version, "local"));
+ }
+ }
+ return state;
+ }
+}
diff --git a/src/main/java/edu/yu/capstone/DistributedSecretsVault/dto/recovery/NodeStateResponse.java b/src/main/java/edu/yu/capstone/DistributedSecretsVault/dto/recovery/NodeStateResponse.java
new file mode 100644
index 0000000..19c643a
--- /dev/null
+++ b/src/main/java/edu/yu/capstone/DistributedSecretsVault/dto/recovery/NodeStateResponse.java
@@ -0,0 +1,18 @@
+package edu.yu.capstone.DistributedSecretsVault.dto.recovery;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import lombok.AllArgsConstructor;
+import lombok.Data;
+import lombok.NoArgsConstructor;
+
+/**
+ * Recovery response listing all secret state known by a node.
+ */
+@Data
+@NoArgsConstructor
+@AllArgsConstructor
+public class NodeStateResponse {
+ private List nodeState = new ArrayList<>();
+}
diff --git a/src/main/java/edu/yu/capstone/DistributedSecretsVault/dto/recovery/RecoveryHealthResponse.java b/src/main/java/edu/yu/capstone/DistributedSecretsVault/dto/recovery/RecoveryHealthResponse.java
new file mode 100644
index 0000000..1b624fe
--- /dev/null
+++ b/src/main/java/edu/yu/capstone/DistributedSecretsVault/dto/recovery/RecoveryHealthResponse.java
@@ -0,0 +1,16 @@
+package edu.yu.capstone.DistributedSecretsVault.dto.recovery;
+
+import lombok.AllArgsConstructor;
+import lombok.Data;
+import lombok.NoArgsConstructor;
+
+/**
+ * Recovery endpoint health response.
+ */
+@Data
+@NoArgsConstructor
+@AllArgsConstructor
+public class RecoveryHealthResponse {
+ private boolean ready;
+ private String message;
+}
diff --git a/src/main/java/edu/yu/capstone/DistributedSecretsVault/dto/recovery/StateSummary.java b/src/main/java/edu/yu/capstone/DistributedSecretsVault/dto/recovery/StateSummary.java
new file mode 100644
index 0000000..5ff6fa1
--- /dev/null
+++ b/src/main/java/edu/yu/capstone/DistributedSecretsVault/dto/recovery/StateSummary.java
@@ -0,0 +1,11 @@
+package edu.yu.capstone.DistributedSecretsVault.dto.recovery;
+
+/**
+ * Recovery inventory entry for a secret shard.
+ * Used to advertise which (user:key:version) values exist in the cluster.
+ */
+public record StateSummary(String ownerId, String keyName, long version, String sourceNodeId) {
+ public String toRedisKey() {
+ return ownerId + ":" + keyName + ":" + version;
+ }
+}
diff --git a/src/main/java/edu/yu/capstone/DistributedSecretsVault/service/internal/NodeClient.java b/src/main/java/edu/yu/capstone/DistributedSecretsVault/service/internal/NodeClient.java
index 36b0c6f..d09f158 100644
--- a/src/main/java/edu/yu/capstone/DistributedSecretsVault/service/internal/NodeClient.java
+++ b/src/main/java/edu/yu/capstone/DistributedSecretsVault/service/internal/NodeClient.java
@@ -19,10 +19,11 @@
import edu.yu.capstone.DistributedSecretsVault.dto.internal.DeletePrepareRequest;
import edu.yu.capstone.DistributedSecretsVault.dto.internal.PostPrepareRequest;
import edu.yu.capstone.DistributedSecretsVault.dto.internal.PutPrepareRequest;
+import edu.yu.capstone.DistributedSecretsVault.dto.recovery.NodeStateResponse;
import io.scalecube.services.Microservices;
/**
- * Outbound HTTP client for inter-node communication within the cluster.
+ * Outbound HTTP client for internode communication within the cluster.
*
* Peer discovery is handled via ScaleCube's
* {@link Microservices#serviceEndpoints()},
@@ -159,7 +160,7 @@ public SecretPartsResponse fetchAllSecretParts(String peerUrl, SecretKey key) {
Map parts = restClient.get()
.uri(peerUrl + "/internal/{id}/all?user={user}", key.getName(), key.getOwnerId())
.retrieve()
- .body(new ParameterizedTypeReference