Skip to content

Commit 93df31d

Browse files
feat: add Vault password integration, hook policy controls, and comprehensive backup test coverage
- Add Vault-managed password support as preferred authentication method (vault.ReadFromVault) - Add HooksPolicy to Settings with enabled flag and allowed_commands allowlist - Add RunHookWithSettings() to enforce hook policy and replace direct exec calls - Add ResolveRepositoryNameFromConfig() helper to centralize repository resolution logic - Add recordPasswordSource() and recordRepositoryResolution() metrics
1 parent 015a410 commit 93df31d

17 files changed

Lines changed: 761 additions & 113 deletions

.github/workflows/ci.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,9 @@ jobs:
3333
- name: Run unit tests with race and coverage
3434
run: go test -short -race -coverprofile=unit.coverage.out -covermode=atomic ./pkg/...
3535

36+
- name: Run backup-focused unit tests with coverage
37+
run: go test -short -race -coverprofile=backup.unit.coverage.out -covermode=atomic ./pkg/backup/...
38+
3639
- name: Enforce unit coverage >= 70%
3740
run: |
3841
COVERAGE=$(go tool cover -func=unit.coverage.out | awk '/^total:/ {gsub("%","",$3); print $3}')
@@ -74,6 +77,7 @@ jobs:
7477
name: ci-unit-artifacts
7578
path: |
7679
unit.coverage.out
80+
backup.unit.coverage.out
7781
flaky-summary.txt
7882
7983
ci-integration:

cmd/backup/quick.go

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -154,31 +154,34 @@ func resolveQuickBackupRepository(rc *eos_io.RuntimeContext) (string, backup.Rep
154154
return "", backup.Repository{}, fmt.Errorf("loading backup configuration: %w", err)
155155
}
156156

157-
repoName := strings.TrimSpace(config.DefaultRepository)
158-
if repoName != "" {
159-
if _, ok := config.Repositories[repoName]; !ok {
160-
return "", backup.Repository{}, fmt.Errorf("default repository %q not found in configuration", repoName)
161-
}
157+
if repoName, err := backup.ResolveRepositoryNameFromConfig(config, ""); err == nil {
162158
repo := config.Repositories[repoName]
159+
backup.RecordRepositoryResolution("quick_default", true)
163160
logger.Info("Using default repository for quick backup",
164161
zap.String("repository", repoName))
165162
return repoName, repo, nil
163+
} else if config.DefaultRepository != "" {
164+
backup.RecordRepositoryResolution("quick_default", false)
165+
return "", backup.Repository{}, err
166166
}
167167

168-
if _, ok := config.Repositories[backup.QuickBackupRepositoryName]; ok {
169-
repo := config.Repositories[backup.QuickBackupRepositoryName]
168+
if repoName, err := backup.ResolveRepositoryNameFromConfig(config, backup.QuickBackupRepositoryName); err == nil {
169+
repo := config.Repositories[repoName]
170+
backup.RecordRepositoryResolution("quick_named", true)
170171
logger.Info("Using quick backup repository from configuration",
171172
zap.String("repository", backup.QuickBackupRepositoryName))
172-
return backup.QuickBackupRepositoryName, repo, nil
173+
return repoName, repo, nil
173174
}
174175

175176
if len(config.Repositories) == 0 {
177+
backup.RecordRepositoryResolution("quick_default", false)
176178
return "", backup.Repository{}, fmt.Errorf("no repositories configured; add at least one in %s", backup.ConfigFile)
177179
}
178180

179181
if len(config.Repositories) == 1 {
180182
for name := range config.Repositories {
181183
repo := config.Repositories[name]
184+
backup.RecordRepositoryResolution("quick_single_repo", true)
182185
logger.Info("Using sole configured repository for quick backup",
183186
zap.String("repository", name))
184187
return name, repo, nil
@@ -191,6 +194,7 @@ func resolveQuickBackupRepository(rc *eos_io.RuntimeContext) (string, backup.Rep
191194
}
192195
sort.Strings(repoNames)
193196

197+
backup.RecordRepositoryResolution("quick_single_repo", false)
194198
return "", backup.Repository{}, eos_err.NewExpectedError(rc.Ctx, fmt.Errorf(
195199
"multiple repositories configured (%s) but no default_repository set; update %s to select one",
196200
strings.Join(repoNames, ", "), backup.ConfigFile))

cmd/backup/update.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -93,13 +93,13 @@ Examples:
9393
if profile.Hooks != nil && len(profile.Hooks.PreBackup) > 0 {
9494
logger.Info("Running pre-backup hooks")
9595
for _, hook := range profile.Hooks.PreBackup {
96-
if err := backup.RunHook(rc.Ctx, logger, hook); err != nil {
96+
if err := backup.RunHookWithSettings(rc.Ctx, logger, hook, config.Settings); err != nil {
9797
logger.Error("Pre-backup hook failed",
9898
zap.String("hook", hook),
9999
zap.Error(err))
100100
if profile.Hooks.OnError != nil {
101101
for _, errorHook := range profile.Hooks.OnError {
102-
_ = backup.RunHook(rc.Ctx, logger, errorHook)
102+
_ = backup.RunHookWithSettings(rc.Ctx, logger, errorHook, config.Settings)
103103
}
104104
}
105105
return fmt.Errorf("pre-backup hook failed: %w", err)
@@ -122,7 +122,7 @@ Examples:
122122
// Run error hooks
123123
if profile.Hooks != nil && profile.Hooks.OnError != nil {
124124
for _, hook := range profile.Hooks.OnError {
125-
_ = backup.RunHook(rc.Ctx, logger, hook)
125+
_ = backup.RunHookWithSettings(rc.Ctx, logger, hook, config.Settings)
126126
}
127127
}
128128
return err
@@ -132,7 +132,7 @@ Examples:
132132
if profile.Hooks != nil && len(profile.Hooks.PostBackup) > 0 {
133133
logger.Info("Running post-backup hooks")
134134
for _, hook := range profile.Hooks.PostBackup {
135-
if err := backup.RunHook(rc.Ctx, logger, hook); err != nil {
135+
if err := backup.RunHookWithSettings(rc.Ctx, logger, hook, config.Settings); err != nil {
136136
logger.Warn("Post-backup hook failed",
137137
zap.String("hook", hook),
138138
zap.Error(err))
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
# Backup Observability Runbook
2+
3+
## Metrics Endpoint
4+
5+
Backup telemetry is exported via Go `expvar` at `/debug/vars`.
6+
7+
Key maps:
8+
9+
- `backup_repository_resolution_total`
10+
- `backup_config_load_total`
11+
- `backup_config_source_total`
12+
- `backup_password_source_total`
13+
- `backup_hook_decision_total`
14+
15+
## High-Signal Keys
16+
17+
Config and path drift:
18+
19+
- `backup_config_load_total.permission_denied_failure`
20+
- `backup_config_source_total.canonical_success`
21+
- `backup_config_source_total.legacy_success`
22+
- `backup_config_source_total.defaults_success`
23+
24+
Credential source health:
25+
26+
- `backup_password_source_total.vault_success`
27+
- `backup_password_source_total.vault_failure`
28+
- `backup_password_source_total.repo_env_success`
29+
- `backup_password_source_total.secrets_env_success`
30+
31+
Hook policy enforcement:
32+
33+
- `backup_hook_decision_total.allowlist_execute_success`
34+
- `backup_hook_decision_total.deny_not_allowlisted_failure`
35+
- `backup_hook_decision_total.deny_bad_arguments_failure`
36+
- `backup_hook_decision_total.disabled_failure`
37+
38+
## Recommended Alerts
39+
40+
- Config access regression:
41+
Trigger if `permission_denied_failure` increases over a 5-minute window.
42+
- Secret hygiene regression:
43+
Trigger if `repo_env_success` or `secrets_env_success` grows faster than `vault_success`.
44+
- Hook policy pressure:
45+
Trigger if `deny_not_allowlisted_failure` spikes and `allowlist_execute_success` drops.
46+

pkg/backup/client.go

Lines changed: 68 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ import (
1818
"github.com/CodeMonkeyCybersecurity/eos/pkg/eos_io"
1919
"github.com/CodeMonkeyCybersecurity/eos/pkg/interaction"
2020
"github.com/CodeMonkeyCybersecurity/eos/pkg/shared"
21+
"github.com/CodeMonkeyCybersecurity/eos/pkg/vault"
2122
"github.com/uptrace/opentelemetry-go-extra/otelzap"
2223
"go.uber.org/zap"
2324
)
@@ -270,83 +271,137 @@ func (c *Client) handleRepositoryNotInitialized(resticOutput string) error {
270271
func (c *Client) getRepositoryPassword() (string, error) {
271272
logger := otelzap.Ctx(c.rc.Ctx)
272273

273-
// 1. Repository-local password file (created by quick backup generator)
274+
// 1. Vault-managed secret (preferred when Vault is configured)
275+
if isVaultConfigured() {
276+
if password, err := c.readPasswordFromVault(); err == nil {
277+
recordPasswordSource("vault", true)
278+
return password, nil
279+
} else {
280+
recordPasswordSource("vault", false)
281+
logger.Warn("Failed to read repository password from Vault",
282+
zap.String("repository", c.repository.Name),
283+
zap.Error(err))
284+
}
285+
}
286+
287+
// 2. Repository-local password file (created by quick backup generator)
274288
localPasswordPath := filepath.Join(c.repository.URL, ".password")
275289
if password, err := readPasswordFile(localPasswordPath); err == nil {
290+
recordPasswordSource("repo_password_file", true)
276291
logger.Debug("Using repository-local password file",
277292
zap.String("path", localPasswordPath))
278293
return password, nil
279294
} else if err != nil && !errors.Is(err, os.ErrNotExist) {
295+
recordPasswordSource("repo_password_file", false)
280296
logger.Warn("Failed to read repository-local password file",
281297
zap.String("path", localPasswordPath),
282298
zap.Error(err))
283299
}
284300

285-
// 2. Global secrets directory fallback (used by managed repositories)
301+
// 3. Global secrets directory fallback (used by managed repositories)
286302
secretsPasswordPath := filepath.Join(secretsDirPath, fmt.Sprintf("%s.password", c.repository.Name))
287303
if password, err := readPasswordFile(secretsPasswordPath); err == nil {
304+
recordPasswordSource("secrets_password_file", true)
288305
logger.Debug("Using secrets directory password file",
289306
zap.String("path", secretsPasswordPath))
290307
return password, nil
291308
} else if err != nil && !errors.Is(err, os.ErrNotExist) {
309+
recordPasswordSource("secrets_password_file", false)
292310
logger.Warn("Failed to read secrets directory password file",
293311
zap.String("path", secretsPasswordPath),
294312
zap.Error(err))
295313
}
296314

297-
// 3. Repository `.env` file (temporary secret storage during Vault testing)
315+
// 4. Repository `.env` file (compatibility fallback)
298316
envPath := filepath.Join(c.repository.URL, ".env")
299317
if password, err := readPasswordFromEnvFile(envPath); err == nil {
318+
recordPasswordSource("repo_env", true)
300319
logger.Debug("Using repository .env file for restic password",
301320
zap.String("path", envPath))
302321
return password, nil
303322
} else if err != nil && !errors.Is(err, os.ErrNotExist) {
323+
recordPasswordSource("repo_env", false)
304324
logger.Warn("Failed to read repository .env file",
305325
zap.String("path", envPath),
306326
zap.Error(err))
307327
}
308328

309-
// 4a. Secrets directory .env file (fallback for non-local repositories)
329+
// 5. Secrets directory .env file (fallback for non-local repositories)
310330
secretsEnvPath := filepath.Join(secretsDirPath, fmt.Sprintf("%s.env", c.repository.Name))
311331
if password, err := readPasswordFromEnvFile(secretsEnvPath); err == nil {
332+
recordPasswordSource("secrets_env", true)
312333
logger.Debug("Using secrets .env file for restic password",
313334
zap.String("path", secretsEnvPath))
314335
return password, nil
315336
} else if err != nil && !errors.Is(err, os.ErrNotExist) {
337+
recordPasswordSource("secrets_env", false)
316338
logger.Warn("Failed to read secrets .env file",
317339
zap.String("path", secretsEnvPath),
318340
zap.Error(err))
319341
}
320342

321-
// 4. Environment variable overrides (least preferred, but supported for manual ops)
322-
if password := strings.TrimSpace(os.Getenv("RESTIC_PASSWORD")); password != "" {
323-
logger.Warn("Using RESTIC_PASSWORD environment variable; prefer password files for security")
324-
return password, nil
325-
}
326-
343+
// 6. Environment variable overrides (least preferred)
327344
if passwordFile := strings.TrimSpace(os.Getenv("RESTIC_PASSWORD_FILE")); passwordFile != "" {
328345
if password, err := readPasswordFile(passwordFile); err == nil {
346+
recordPasswordSource("env_var", true)
329347
logger.Warn("Using RESTIC_PASSWORD_FILE override; prefer managed password files",
330348
zap.String("path", passwordFile))
331349
return password, nil
332350
}
351+
recordPasswordSource("env_var", false)
352+
}
353+
354+
// 7. Raw environment variable override
355+
if password := strings.TrimSpace(os.Getenv("RESTIC_PASSWORD")); password != "" {
356+
recordPasswordSource("env_var", true)
357+
logger.Warn("Using RESTIC_PASSWORD environment variable; prefer password files for security")
358+
return password, nil
333359
}
334360

335361
missingErr := fmt.Errorf("restic repository password not found; expected password file at %s, secrets fallback at %s, or RESTIC_PASSWORD in %s",
336362
localPasswordPath, secretsPasswordPath, envPath)
337363

364+
// 8. Interactive wizard fallback
338365
password, wizardErr := c.runPasswordWizard(localPasswordPath, secretsPasswordPath, []string{envPath, secretsEnvPath})
339366
if wizardErr == nil {
367+
recordPasswordSource("wizard", true)
340368
return password, nil
341369
}
342370
if wizardErr != nil && !errors.Is(wizardErr, errPasswordWizardSkipped) {
371+
recordPasswordSource("wizard", false)
343372
logger.Warn("Password setup wizard failed",
344373
zap.Error(wizardErr))
345374
}
346375

347376
return "", missingErr
348377
}
349378

379+
func (c *Client) readPasswordFromVault() (string, error) {
380+
var secret map[string]interface{}
381+
vaultPath := fmt.Sprintf("%s/%s", VaultPasswordPathPrefix, c.repository.Name)
382+
if err := vault.ReadFromVault(c.rc, vaultPath, &secret); err != nil {
383+
return "", err
384+
}
385+
386+
raw, ok := secret[VaultPasswordKey]
387+
if !ok {
388+
return "", fmt.Errorf("vault secret %q missing key %q", vaultPath, VaultPasswordKey)
389+
}
390+
password, ok := raw.(string)
391+
if !ok {
392+
return "", fmt.Errorf("vault secret %q contains non-string password", vaultPath)
393+
}
394+
password = strings.TrimSpace(password)
395+
if password == "" {
396+
return "", fmt.Errorf("vault secret %q contains empty password", vaultPath)
397+
}
398+
return password, nil
399+
}
400+
401+
func isVaultConfigured() bool {
402+
return strings.TrimSpace(os.Getenv("VAULT_ADDR")) != ""
403+
}
404+
350405
// InitRepository initializes a new restic repository
351406
func (c *Client) InitRepository() error {
352407
logger := otelzap.Ctx(c.rc.Ctx)
@@ -839,16 +894,9 @@ func (c *Client) executeHooks(hooks []string, hookType string) error {
839894
ctx, cancel := context.WithTimeout(c.rc.Ctx, HookTimeout)
840895
defer cancel()
841896

842-
cmd := exec.CommandContext(ctx, "sh", "-c", hookCmd)
843-
844-
// Capture output
845-
var stdout, stderr bytes.Buffer
846-
cmd.Stdout = &stdout
847-
cmd.Stderr = &stderr
848-
849897
// Execute hook
850898
start := time.Now()
851-
err := cmd.Run()
899+
err := RunHookWithSettings(ctx, logger, hookCmd, c.config.Settings)
852900
duration := time.Since(start)
853901

854902
// Check for timeout
@@ -866,27 +914,14 @@ func (c *Client) executeHooks(hooks []string, hookType string) error {
866914
zap.String("type", hookType),
867915
zap.String("command", hookCmd),
868916
zap.Duration("duration", duration),
869-
zap.String("stdout", stdout.String()),
870-
zap.String("stderr", stderr.String()),
871917
zap.Error(err))
872-
return fmt.Errorf("hook failed: %w\nstdout: %s\nstderr: %s",
873-
err, stdout.String(), stderr.String())
918+
return fmt.Errorf("hook failed: %w", err)
874919
}
875920

876921
logger.Info("Hook completed successfully",
877922
zap.String("type", hookType),
878923
zap.String("command", hookCmd),
879-
zap.Duration("duration", duration),
880-
zap.Int("stdout_bytes", stdout.Len()),
881-
zap.Int("stderr_bytes", stderr.Len()))
882-
883-
// Log output if present (for debugging)
884-
if stdout.Len() > 0 {
885-
logger.Debug("Hook stdout", zap.String("output", stdout.String()))
886-
}
887-
if stderr.Len() > 0 {
888-
logger.Debug("Hook stderr", zap.String("output", stderr.String()))
889-
}
924+
zap.Duration("duration", duration))
890925
}
891926

892927
logger.Info("All hooks completed successfully",

pkg/backup/client_test.go

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package backup
33
import (
44
"context"
55
"encoding/json"
6+
"path/filepath"
67
"strings"
78
"testing"
89

@@ -17,6 +18,33 @@ func TestNewClient(t *testing.T) {
1718
Log: logger,
1819
}
1920

21+
tmpDir := t.TempDir()
22+
configPath := filepath.Join(tmpDir, "backup.yaml")
23+
origRead := configReadCandidates
24+
origWritePath := configWritePath
25+
origWriteDir := configWriteDir
26+
t.Cleanup(func() {
27+
configReadCandidates = origRead
28+
configWritePath = origWritePath
29+
configWriteDir = origWriteDir
30+
})
31+
configReadCandidates = []string{configPath}
32+
configWritePath = configPath
33+
configWriteDir = tmpDir
34+
35+
cfg := &Config{
36+
DefaultRepository: "local",
37+
Repositories: map[string]Repository{
38+
"local": {Name: "local", Backend: "local", URL: filepath.Join(tmpDir, "repo")},
39+
},
40+
Profiles: map[string]Profile{
41+
"system": {Name: "system", Repository: "local", Paths: []string{tmpDir}},
42+
},
43+
}
44+
if err := SaveConfig(rc, cfg); err != nil {
45+
t.Fatalf("SaveConfig() error = %v", err)
46+
}
47+
2048
t.Run("create client with default config", func(t *testing.T) {
2149
// This will use the default config since no config file exists
2250
client, err := NewClient(rc, "local")

0 commit comments

Comments
 (0)