diff --git a/NEXT_CHANGELOG.md b/NEXT_CHANGELOG.md index 8320d92369b..f30aa795683 100644 --- a/NEXT_CHANGELOG.md +++ b/NEXT_CHANGELOG.md @@ -4,6 +4,8 @@ ### Notable Changes +* Breaking change: OAuth tokens for interactive logins (`auth_type = databricks-cli`) are now stored in the OS-native secure store by default (Keychain on macOS, Credential Manager on Windows, Secret Service on Linux) instead of `~/.databricks/token-cache.json`. After upgrading, run `databricks auth login` once per profile to re-authenticate; cached tokens from older versions are not migrated. To keep the previous file-backed storage, set `DATABRICKS_AUTH_STORAGE=plaintext` or add `auth_storage = plaintext` under `[__settings__]` in `~/.databrickscfg` (the env var takes precedence over the config setting), then re-run `databricks auth login`. On systems where the OS keyring is not reachable (e.g. Linux containers without a D-Bus session bus), the CLI transparently falls back to the file cache when reading tokens so legacy `token-cache.json` entries remain accessible without manual configuration. + ### CLI * Added `databricks aitools` command group for installing Databricks skills into your coding agents (Claude Code, Cursor, Codex CLI, OpenCode, GitHub Copilot, Antigravity). Skills are fetched from [github.com/databricks/databricks-agent-skills](https://github.com/databricks/databricks-agent-skills) and either symlinked into each agent's skills directory or copied into the current project. Use `databricks aitools install` to set up, `update` to pull newer versions, `list` to see what's available, and `uninstall` to remove them. diff --git a/acceptance/cmd/auth/describe/u2m-json-output/output.txt b/acceptance/cmd/auth/describe/u2m-json-output/output.txt index 7e2ac070cbc..bd5e6108735 100644 --- a/acceptance/cmd/auth/describe/u2m-json-output/output.txt +++ b/acceptance/cmd/auth/describe/u2m-json-output/output.txt @@ -2,7 +2,7 @@ >>> [CLI] auth describe --profile u2m-profile --output json Warn: [hostmetadata] failed to fetch host metadata for https://u2m-profile.databricks.test, will skip for 1m0s { - "mode": "plaintext", - "location": "~/.databricks/token-cache.json", + "mode": "secure", + "location": "OS keyring (service: databricks-cli)", "source": "default" } diff --git a/acceptance/cmd/auth/describe/u2m-plaintext-config/output.txt b/acceptance/cmd/auth/describe/u2m-plaintext-config/output.txt index d1e522be97b..85c9edca654 100644 --- a/acceptance/cmd/auth/describe/u2m-plaintext-config/output.txt +++ b/acceptance/cmd/auth/describe/u2m-plaintext-config/output.txt @@ -1,7 +1,7 @@ >>> [CLI] auth describe --profile u2m-profile Warn: [hostmetadata] failed to fetch host metadata for https://u2m-profile.databricks.test, will skip for 1m0s -Unable to authenticate: error getting token: cache: token not found +Unable to authenticate: error getting token: cache: no cached credentials; run `databricks auth login` to sign in Token storage: plaintext, ~/.databricks/token-cache.json (from auth_storage in [__settings__] section of [TEST_TMP_DIR]/home/.databrickscfg) ----- Current configuration: diff --git a/acceptance/cmd/auth/describe/u2m-plaintext-default/test.toml b/acceptance/cmd/auth/describe/u2m-plaintext-default/test.toml deleted file mode 100644 index 36c0e7e237b..00000000000 --- a/acceptance/cmd/auth/describe/u2m-plaintext-default/test.toml +++ /dev/null @@ -1,3 +0,0 @@ -Ignore = [ - "home" -] diff --git a/acceptance/cmd/auth/describe/u2m-plaintext-env/output.txt b/acceptance/cmd/auth/describe/u2m-plaintext-env/output.txt index edfdc0a1939..10d1896b040 100644 --- a/acceptance/cmd/auth/describe/u2m-plaintext-env/output.txt +++ b/acceptance/cmd/auth/describe/u2m-plaintext-env/output.txt @@ -1,7 +1,7 @@ >>> [CLI] auth describe --profile u2m-profile Warn: [hostmetadata] failed to fetch host metadata for https://u2m-profile.databricks.test, will skip for 1m0s -Unable to authenticate: error getting token: cache: token not found +Unable to authenticate: error getting token: cache: no cached credentials; run `databricks auth login` to sign in Token storage: plaintext, ~/.databricks/token-cache.json (from DATABRICKS_AUTH_STORAGE environment variable) ----- Current configuration: diff --git a/acceptance/cmd/auth/describe/u2m-plaintext-default/out.test.toml b/acceptance/cmd/auth/describe/u2m-secure-default/out.test.toml similarity index 100% rename from acceptance/cmd/auth/describe/u2m-plaintext-default/out.test.toml rename to acceptance/cmd/auth/describe/u2m-secure-default/out.test.toml diff --git a/acceptance/cmd/auth/describe/u2m-plaintext-default/output.txt b/acceptance/cmd/auth/describe/u2m-secure-default/output.txt similarity index 78% rename from acceptance/cmd/auth/describe/u2m-plaintext-default/output.txt rename to acceptance/cmd/auth/describe/u2m-secure-default/output.txt index 004cc787aaa..fab13663d44 100644 --- a/acceptance/cmd/auth/describe/u2m-plaintext-default/output.txt +++ b/acceptance/cmd/auth/describe/u2m-secure-default/output.txt @@ -1,8 +1,8 @@ >>> [CLI] auth describe --profile u2m-profile Warn: [hostmetadata] failed to fetch host metadata for https://u2m-profile.databricks.test, will skip for 1m0s -Unable to authenticate: error getting token: cache: token not found -Token storage: plaintext, ~/.databricks/token-cache.json (from default) +Unable to authenticate: error getting token: [KEYRING_LOOKUP_ERROR] +Token storage: secure, OS keyring (service: databricks-cli) (from default) ----- Current configuration: ✓ host: https://u2m-profile.databricks.test (from [TEST_TMP_DIR]/home/.databrickscfg config file) diff --git a/acceptance/cmd/auth/describe/u2m-plaintext-default/script b/acceptance/cmd/auth/describe/u2m-secure-default/script similarity index 100% rename from acceptance/cmd/auth/describe/u2m-plaintext-default/script rename to acceptance/cmd/auth/describe/u2m-secure-default/script diff --git a/acceptance/cmd/auth/describe/u2m-secure-default/test.toml b/acceptance/cmd/auth/describe/u2m-secure-default/test.toml new file mode 100644 index 00000000000..d0d5090e6a5 --- /dev/null +++ b/acceptance/cmd/auth/describe/u2m-secure-default/test.toml @@ -0,0 +1,11 @@ +Ignore = [ + "home" +] + +# This test runs against the real OS keyring at Lookup time (no writes). +# macOS produces a clean miss; Linux without a usable D-Bus session bus +# produces a backend error. Normalize both so the assertion stays on the +# resolved storage mode, not the lookup outcome. +[[Repls]] +Old = 'Unable to authenticate: error getting token: .*' +New = 'Unable to authenticate: error getting token: [KEYRING_LOOKUP_ERROR]' diff --git a/acceptance/cmd/auth/logout/stale-account-id-workspace-host/output.txt b/acceptance/cmd/auth/logout/stale-account-id-workspace-host/output.txt index 29c2d40709e..8c3218ad06e 100644 --- a/acceptance/cmd/auth/logout/stale-account-id-workspace-host/output.txt +++ b/acceptance/cmd/auth/logout/stale-account-id-workspace-host/output.txt @@ -36,4 +36,4 @@ logfood (Default) [DATABRICKS_URL] NO === Logged out profile should no longer return a token >>> musterr [CLI] auth token --profile logfood -Error: cache: databricks OAuth is not configured for this host. Try logging in again with `databricks auth login --profile logfood` before retrying. If this fails, please report this issue to the Databricks CLI maintainers at https://github.com/databricks/cli/issues/new +Error: cache: databricks OAuth is not configured for this host. no cached credentials; run `databricks auth login` to sign in diff --git a/acceptance/cmd/auth/token/default-profile/output.txt b/acceptance/cmd/auth/token/default-profile/output.txt index 1a62bd6a7d6..4b386f69e94 100644 --- a/acceptance/cmd/auth/token/default-profile/output.txt +++ b/acceptance/cmd/auth/token/default-profile/output.txt @@ -3,7 +3,7 @@ >>> errcode [CLI] auth token Warn: [hostmetadata] failed to fetch host metadata for https://myworkspace.test, will skip for 1m0s -Error: cache: databricks OAuth is not configured for this host. Try logging in again with `databricks auth login --profile myprofile` before retrying. If this fails, please report this issue to the Databricks CLI maintainers at https://github.com/databricks/cli/issues/new +Error: cache: databricks OAuth is not configured for this host. no cached credentials; run `databricks auth login` to sign in Exit code: 1 diff --git a/acceptance/cmd/auth/token/force-refresh-no-cache/output.txt b/acceptance/cmd/auth/token/force-refresh-no-cache/output.txt index a5fb2467341..bd35319a5a8 100644 --- a/acceptance/cmd/auth/token/force-refresh-no-cache/output.txt +++ b/acceptance/cmd/auth/token/force-refresh-no-cache/output.txt @@ -1,3 +1,3 @@ -Error: cache: databricks OAuth is not configured for this host. Try logging in again with `databricks auth login --profile test-profile` before retrying. If this fails, please report this issue to the Databricks CLI maintainers at https://github.com/databricks/cli/issues/new +Error: cache: databricks OAuth is not configured for this host. no cached credentials; run `databricks auth login` to sign in Exit code: 1 diff --git a/acceptance/cmd/auth/token/output.txt b/acceptance/cmd/auth/token/output.txt index 9299dfe871b..67bf1a3654e 100644 --- a/acceptance/cmd/auth/token/output.txt +++ b/acceptance/cmd/auth/token/output.txt @@ -1,5 +1,5 @@ >>> [CLI] auth token --host [DATABRICKS_URL] -Error: cache: databricks OAuth is not configured for this host. Try logging in again with `databricks auth login --host [DATABRICKS_URL]` before retrying. If this fails, please report this issue to the Databricks CLI maintainers at https://github.com/databricks/cli/issues/new +Error: cache: databricks OAuth is not configured for this host. no cached credentials; run `databricks auth login` to sign in Exit code: 1 diff --git a/acceptance/script.prepare b/acceptance/script.prepare index a8ea942bb46..3d0d59dc14d 100644 --- a/acceptance/script.prepare +++ b/acceptance/script.prepare @@ -1,3 +1,9 @@ +# Force plaintext token storage so acceptance tests exercise the file-backed +# cache rather than the OS keyring, which is not reliably reachable in CI. +# Tests that want to exercise the secure path or the resolver default unset +# or override this in their own script.prepare or script. +export DATABRICKS_AUTH_STORAGE=plaintext + errcode() { # Temporarily disable 'set -e' to prevent the script from exiting on error set +e diff --git a/cmd/auth/auth_test.go b/cmd/auth/auth_test.go index fc7e5d533d4..9dbf04fc664 100644 --- a/cmd/auth/auth_test.go +++ b/cmd/auth/auth_test.go @@ -1,6 +1,8 @@ package auth import ( + "os" + "path/filepath" "testing" "github.com/databricks/cli/cmd/root" @@ -113,7 +115,14 @@ func TestProfileHostConflictTokenViaCobra(t *testing.T) { // pass the conflict check (the command will fail later for other reasons, but // NOT with a conflict error). func TestProfileHostCompatibleViaCobra(t *testing.T) { - t.Setenv("DATABRICKS_CONFIG_FILE", "./testdata/.databrickscfg") + // Copy the fixture into a temp directory so the auth login flow's writes + // (e.g. silent plaintext-fallback persistence on CI runners without a + // usable keyring) cannot dirty the checked-in fixture. + configPath := filepath.Join(t.TempDir(), ".databrickscfg") + fixture, err := os.ReadFile("./testdata/.databrickscfg") + require.NoError(t, err) + require.NoError(t, os.WriteFile(configPath, fixture, 0o600)) + t.Setenv("DATABRICKS_CONFIG_FILE", configPath) ctx := cmdctx.GenerateExecId(t.Context()) cli := root.New(ctx) @@ -125,7 +134,7 @@ func TestProfileHostCompatibleViaCobra(t *testing.T) { "--host", "https://www.host1.test", }) - _, err := cli.ExecuteContextC(ctx) + _, err = cli.ExecuteContextC(ctx) // The command may fail for other reasons (no browser, non-interactive, etc.) // but it should NOT fail with a conflict error. if err != nil { diff --git a/cmd/auth/describe_test.go b/cmd/auth/describe_test.go index 528decf1022..72fee3486bc 100644 --- a/cmd/auth/describe_test.go +++ b/cmd/auth/describe_test.go @@ -240,21 +240,21 @@ func TestResolveTokenStorageInfo(t *testing.T) { want: nil, }, { - name: "databricks-cli with default plaintext", + name: "databricks-cli with default secure", authType: authTypeDatabricksCLI, want: &tokenStorageInfo{ - Mode: "plaintext", - Location: plaintextLocation, + Mode: "secure", + Location: secureLocation, Source: "default", }, }, { - name: "databricks-cli with secure from env", + name: "databricks-cli with plaintext from env", authType: authTypeDatabricksCLI, - envValue: "secure", + envValue: "plaintext", want: &tokenStorageInfo{ - Mode: "secure", - Location: secureLocation, + Mode: "plaintext", + Location: plaintextLocation, Source: "DATABRICKS_AUTH_STORAGE environment variable", }, }, diff --git a/cmd/auth/login.go b/cmd/auth/login.go index d45a77806fd..5ba9729d1ba 100644 --- a/cmd/auth/login.go +++ b/cmd/auth/login.go @@ -144,15 +144,6 @@ a new profile is created. ctx := cmd.Context() profileName := cmd.Flag("profile").Value.String() - // Resolve the cache before the browser step so an unavailable - // keyring surfaces here rather than after OAuth. The probe also - // triggers the OS unlock prompt, which the user can answer during - // OAuth. - tokenCache, mode, err := storage.ResolveCacheForLogin(ctx, "") - if err != nil { - return err - } - // Cluster and Serverless are mutually exclusive. if configureCluster && configureServerless { return errors.New("please either configure serverless or cluster, not both") @@ -178,6 +169,16 @@ a new profile is created. } } + // Resolve the cache before the browser step so an unavailable + // keyring surfaces here rather than after OAuth. The probe also + // triggers the OS unlock prompt, which the user can answer during + // OAuth. Run after input validation so trivially-invalid commands + // fail without probing. + tokenCache, mode, err := storage.ResolveCacheForLogin(ctx, "") + if err != nil { + return err + } + // When interactive and nothing was specified, show a picker that lets // the user re-login to an existing profile, create a new one, or enter // a host URL. With no profiles configured the picker still shows the @@ -294,6 +295,11 @@ a new profile is created. if err = persistentAuth.Challenge(); err != nil { return err } + // Lock secure mode in after a successful keyring write so a later + // transient keyring probe failure cannot silently demote this user + // to plaintext. + storage.PinSecureMode(ctx, mode, storage.StorageModeUnknown) + // At this point, an OAuth token has been successfully minted and stored // in the CLI cache. The rest of the command focuses on: // 1. Workspace selection for SPOG hosts (best-effort); @@ -633,6 +639,7 @@ func discoveryLogin(ctx context.Context, in discoveryLoginInputs) error { if err := persistentAuth.Challenge(); err != nil { return discoveryErr("login via login.databricks.com failed", err) } + storage.PinSecureMode(ctx, in.mode, storage.StorageModeUnknown) discoveredHost := arg.GetDiscoveredHost() if discoveredHost == "" { diff --git a/cmd/auth/token.go b/cmd/auth/token.go index d7c25ecece3..e789346e0fa 100644 --- a/cmd/auth/token.go +++ b/cmd/auth/token.go @@ -285,10 +285,22 @@ func loadToken(ctx context.Context, args loadTokenArgs) (*oauth2.Token, error) { // // Older SDK versions check for a particular substring to determine if // the OAuth authentication type can fall through or if it is a real error. - // This means we need to keep this error message constant for backwards compatibility. + // This means we need to keep "databricks OAuth is not configured for + // this host" present in the error for backwards compatibility. // // This is captured in an acceptance test under "cmd/auth/token". - err = errors.New("cache: databricks OAuth is not configured for this host") + const compatSubstring = "databricks OAuth is not configured for this host" + // When storage's notFoundHintCache wrapped the ErrNotFound with + // an actionable hint (e.g. the post-upgrade "stored credentials + // from older CLI versions are no longer used; run `databricks + // auth login`..." copy), surface it instead of the generic + // "Try logging in again with ... If this fails, please report + // this issue" trailer. The hint is more specific and avoids + // users reporting expected post-upgrade behavior as a bug. + if hint := storage.HintForNotFound(err); hint != "" { + return nil, fmt.Errorf("cache: %s. %s", compatSubstring, hint) + } + err = errors.New("cache: " + compatSubstring) } if rewritten, rewrittenErr := auth.RewriteAuthError(ctx, args.authArguments.Host, args.authArguments.AccountID, args.profileName, err); rewritten { return nil, rewrittenErr @@ -433,6 +445,7 @@ func runInlineLogin(ctx context.Context, profiler profile.Profiler, tokenCache c if err = persistentAuth.Challenge(); err != nil { return "", nil, err } + storage.PinSecureMode(ctx, mode, storage.StorageModeUnknown) clearKeys := oauthLoginClearKeys() clearKeys = append(clearKeys, databrickscfg.ExperimentalIsUnifiedHostKey) diff --git a/cmd/auth/token_test.go b/cmd/auth/token_test.go index f3b3cdd8c0c..40ca29f588b 100644 --- a/cmd/auth/token_test.go +++ b/cmd/auth/token_test.go @@ -10,15 +10,34 @@ import ( "time" "github.com/databricks/cli/libs/auth" + "github.com/databricks/cli/libs/auth/storage" "github.com/databricks/cli/libs/cmdio" "github.com/databricks/cli/libs/databrickscfg/profile" "github.com/databricks/cli/libs/env" "github.com/databricks/databricks-sdk-go/credentials/u2m" + "github.com/databricks/databricks-sdk-go/credentials/u2m/cache" "github.com/databricks/databricks-sdk-go/httpclient/fixtures" "github.com/stretchr/testify/assert" "golang.org/x/oauth2" ) +// upgradeHintTokenCache returns a notFoundHint-wrapped ErrNotFound on +// Lookup, mirroring what storage.notFoundHintCache produces in +// production when ~/.databricks/token-cache.json has entries and the +// resolver picked secure mode by default. Used by TestToken_loadToken +// to verify that auth token surfaces the upgrade-specific hint instead +// of dropping it for the SDK-compat constant string. +type upgradeHintTokenCache struct{} + +func (upgradeHintTokenCache) Store(string, *oauth2.Token) error { return nil } +func (upgradeHintTokenCache) Lookup(string) (*oauth2.Token, error) { + return nil, storage.NewNotFoundHint( + "stored credentials from older CLI versions are no longer used; run `databricks auth login` to sign in again, or set DATABRICKS_AUTH_STORAGE=plaintext to keep using the file cache", + ) +} + +var _ cache.TokenCache = upgradeHintTokenCache{} + type failOnCallTransport struct{} func (failOnCallTransport) RoundTrip(*http.Request) (*http.Response, error) { @@ -408,6 +427,35 @@ func TestToken_loadToken(t *testing.T) { "Try logging in again with `databricks auth login --host https://nonexistent.cloud.databricks.com` before retrying. " + "If this fails, please report this issue to the Databricks CLI maintainers at https://github.com/databricks/cli/issues/new", }, + { + // Regression test: when notFoundHintCache wraps ErrNotFound + // with the upgrade copy (post-upgrade default-secure user + // with a populated token-cache.json), `auth token` must + // surface that hint instead of dropping it for the SDK-compat + // constant string. The combined message keeps the + // "OAuth is not configured for this host" substring older + // SDK versions look for and skips the generic "Try logging + // in again ... If this fails, please report this issue" + // trailer, which would mislead users into reporting expected + // post-upgrade behavior. + name: "ErrNotFound carrying upgrade hint surfaces it", + args: loadTokenArgs{ + authArguments: &auth.AuthArguments{}, + profileName: "", + args: []string{"nonexistent.cloud.databricks.com"}, + tokenTimeout: 1 * time.Hour, + profiler: profiler, + tokenCache: upgradeHintTokenCache{}, + persistentAuthOpts: []u2m.PersistentAuthOption{ + u2m.WithTokenCache(upgradeHintTokenCache{}), + u2m.WithOAuthEndpointSupplier(&MockApiClient{}), + }, + }, + wantErr: "cache: databricks OAuth is not configured for this host. " + + "stored credentials from older CLI versions are no longer used; " + + "run `databricks auth login` to sign in again, " + + "or set DATABRICKS_AUTH_STORAGE=plaintext to keep using the file cache", + }, { name: "errors with clear message for non-host non-profile positional arg", args: loadTokenArgs{ diff --git a/libs/auth/storage/cache.go b/libs/auth/storage/cache.go index d3a5fa85d29..c3d1fc43dcf 100644 --- a/libs/auth/storage/cache.go +++ b/libs/auth/storage/cache.go @@ -16,17 +16,19 @@ import ( // so unit tests can inject stubs without hitting the real OS keyring or // filesystem. Production code uses defaultCacheFactories(). type cacheFactories struct { - newFile func(context.Context) (cache.TokenCache, error) - newKeyring func() cache.TokenCache - probeKeyring func() error + newFile func(context.Context) (cache.TokenCache, error) + newKeyring func() cache.TokenCache + probeKeyring func() error + probeKeyringRead func() error } // defaultCacheFactories returns the production factory set. func defaultCacheFactories() cacheFactories { return cacheFactories{ - newFile: func(ctx context.Context) (cache.TokenCache, error) { return NewFileTokenCache(ctx) }, - newKeyring: NewKeyringCache, - probeKeyring: ProbeKeyring, + newFile: func(ctx context.Context) (cache.TokenCache, error) { return NewFileTokenCache(ctx) }, + newKeyring: NewKeyringCache, + probeKeyring: ProbeKeyring, + probeKeyringRead: ProbeKeyringRead, } } @@ -37,11 +39,22 @@ func defaultCacheFactories() cacheFactories { // override is usually the command-level flag value. Pass "" when the command // has no flag; precedence then falls through to env -> config -> default. // +// When the resolver returns (mode=Secure, source=Default) and the OS +// keyring is definitively unreachable (a non-timeout probe error), reads +// fall back to the plaintext file cache so post-upgrade users with legacy +// token-cache.json entries are not stranded. Unlike the login path, this +// fallback does not persist auth_storage = plaintext to [__settings__]; +// pinning happens only on successful login. +// // Every CLI code path that calls u2m.NewPersistentAuth must route the result // through u2m.WithTokenCache, otherwise the SDK defaults to the file cache // and splits the user's tokens across two backends. func ResolveCache(ctx context.Context, override StorageMode) (cache.TokenCache, StorageMode, error) { - return resolveCacheWith(ctx, override, defaultCacheFactories()) + inner, mode, err := resolveCacheForReadWith(ctx, override, defaultCacheFactories()) + if err != nil { + return nil, "", err + } + return withNotFoundHint(ctx, inner, mode), mode, nil } // ResolveCacheForLogin resolves the cache like ResolveCache with extra rules @@ -59,15 +72,15 @@ func ResolveCache(ctx context.Context, override StorageMode) (cache.TokenCache, // The timeout is ambiguous (locked vs hung); a misdiagnosis fails // the final Store rather than silently downgrading to plaintext. // -// Rules 1 and 2 are dormant today: the resolver default is plaintext, so -// (mode=Secure, explicit=false) is unreachable. They activate when the -// default flips to secure at GA. -// // Login-specific. Read paths (auth token, bundle commands) keep the original // keyring error so they don't silently mint plaintext copies of tokens that // were stored in the keyring on another machine. func ResolveCacheForLogin(ctx context.Context, override StorageMode) (cache.TokenCache, StorageMode, error) { - return resolveCacheForLoginWith(ctx, override, defaultCacheFactories()) + inner, mode, err := resolveCacheForLoginWith(ctx, override, defaultCacheFactories()) + if err != nil { + return nil, "", err + } + return withNotFoundHint(ctx, inner, mode), mode, nil } // WrapForOAuthArgument wraps tokenCache so SDK-side writes (Challenge, refresh) @@ -85,8 +98,11 @@ func WrapForOAuthArgument(tokenCache cache.TokenCache, mode StorageMode, arg u2m return NewDualWritingTokenCache(tokenCache, arg) } -// resolveCacheWith is the pure form of ResolveCache. It takes the factory -// set as a parameter so tests can inject stubs. +// resolveCacheWith is the pure form of ResolveCache without the read-path +// fallback. Takes the factory set as a parameter so tests can inject stubs. +// Used directly by ResolveCacheForLogin (which has its own fallback rules) +// and indirectly by ResolveCache (which adds the read-path fallback in +// resolveCacheForReadWith). func resolveCacheWith(ctx context.Context, override StorageMode, f cacheFactories) (cache.TokenCache, StorageMode, error) { mode, err := ResolveStorageMode(ctx, override) if err != nil { @@ -106,6 +122,61 @@ func resolveCacheWith(ctx context.Context, override StorageMode, f cacheFactorie } } +// resolveCacheForReadWith is the pure form of ResolveCache. It applies the +// read-path fallback: when mode is secure-from-default and the keyring +// probes as definitively unavailable, return the file cache instead. +// Timeouts keep the keyring (could be transient). +func resolveCacheForReadWith(ctx context.Context, override StorageMode, f cacheFactories) (cache.TokenCache, StorageMode, error) { + mode, source, err := ResolveStorageModeWithSource(ctx, override) + if err != nil { + return nil, "", err + } + return applyReadFallback(ctx, mode, source.Explicit(), f) +} + +// applyReadFallback realizes the read-path fallback. Mirrors +// applyLoginFallback but: +// +// - Uses a read-only probe (ProbeKeyringRead) so calls do not write to +// the keyring on every CLI invocation. +// - Does not persist auth_storage = plaintext. Pinning happens only on +// successful login, where the write-probe gives us stronger evidence +// that the keyring is truly unavailable on this machine. +// +// Explicit secure is honored: callers who asked for secure get the keyring +// cache even if the probe fails, so the actual Lookup error surfaces the +// unreachability instead of silently using a different backend. +func applyReadFallback(ctx context.Context, mode StorageMode, explicit bool, f cacheFactories) (cache.TokenCache, StorageMode, error) { + switch mode { + case StorageModePlaintext: + c, err := f.newFile(ctx) + if err != nil { + return nil, "", fmt.Errorf("open file token cache: %w", err) + } + return c, mode, nil + case StorageModeSecure: + if explicit { + return f.newKeyring(), mode, nil + } + if probeErr := f.probeKeyringRead(); probeErr != nil { + var timeoutErr *TimeoutError + if errors.As(probeErr, &timeoutErr) { + log.Debugf(ctx, "keyring read probe timed out (%v); staying on keyring", probeErr) + return f.newKeyring(), mode, nil + } + log.Debugf(ctx, "secure storage unavailable on read path (%v), using file cache", probeErr) + fileCache, fileErr := f.newFile(ctx) + if fileErr != nil { + return nil, "", fmt.Errorf("open file token cache: %w", fileErr) + } + return fileCache, StorageModePlaintext, nil + } + return f.newKeyring(), mode, nil + default: + return nil, "", fmt.Errorf("unsupported storage mode %q", string(mode)) + } +} + // resolveCacheForLoginWith is the pure form of ResolveCacheForLogin. It takes // the factory set as a parameter so tests can inject stubs. func resolveCacheForLoginWith(ctx context.Context, override StorageMode, f cacheFactories) (cache.TokenCache, StorageMode, error) { @@ -120,12 +191,6 @@ func resolveCacheForLoginWith(ctx context.Context, override StorageMode, f cache // resolved mode and whether the user explicitly asked for it. Split out so // tests can drive the (mode, explicit) input space directly without depending // on whatever the resolver's default mode happens to be at any point in time. -// -// Pin-on-success across modes (locking in the first working behavior to -// insulate users from keyring flakiness) is intentionally not implemented -// here. It lands at GA alongside the default flip; pinning before the -// flip would freeze every default user into plaintext and make the flip a -// no-op for them. func applyLoginFallback(ctx context.Context, mode StorageMode, explicit bool, f cacheFactories) (cache.TokenCache, StorageMode, error) { switch mode { case StorageModePlaintext: @@ -153,9 +218,7 @@ func applyLoginFallback(ctx context.Context, mode StorageMode, explicit bool, f if fileErr != nil { return nil, "", fmt.Errorf("open file token cache: %w", fileErr) } - if err := persistPlaintextFallback(ctx); err != nil { - log.Debugf(ctx, "persisting auth_storage fallback failed: %v", err) - } + persistPlaintextFallback(ctx) return fileCache, StorageModePlaintext, nil } return f.newKeyring(), mode, nil @@ -168,12 +231,46 @@ func applyLoginFallback(ctx context.Context, mode StorageMode, explicit bool, f // in .databrickscfg so subsequent commands skip the (slow/blocking) keyring // probe and route straight to the file cache. // -// Only called on the (mode=Secure, explicit=false) probe-failure branch. That -// branch is unreachable today (resolver default is plaintext), so this is -// dormant infrastructure: it activates when the default flips to secure -// at GA and lets default-on-broken-keyring users avoid a 3s probe on every -// command. -func persistPlaintextFallback(ctx context.Context) error { +// Only called on the (mode=Secure, explicit=false) probe-failure branch. +// Best-effort: persistence failures are logged at debug and never block +// login. +func persistPlaintextFallback(ctx context.Context) { configPath := env.Get(ctx, "DATABRICKS_CONFIG_FILE") - return databrickscfg.SetConfiguredAuthStorage(ctx, string(StorageModePlaintext), configPath) + if err := databrickscfg.SetConfiguredAuthStorage(ctx, string(StorageModePlaintext), configPath); err != nil { + log.Debugf(ctx, "persist auth_storage=plaintext fallback failed: %v", err) + } +} + +// PinSecureMode persists auth_storage = secure to [__settings__] when the +// user is currently on the secure-from-default path. Once pinned, subsequent +// invocations see source=Config (explicit), so applyLoginFallback returns an +// error on a transient keyring probe failure instead of silently demoting +// the user to plaintext. +// +// No-op when mode is not secure or when the user already chose a mode +// explicitly via override, env var, or config. override must be the same +// value the caller passed to ResolveCacheForLogin so the source check sees +// the caller's intent rather than re-resolving without it. +// +// Persistence failures are logged at warn: they do not block login, but +// the user should know the pin did not happen, since a later transient +// keyring failure could then silently route a default-secure user to +// plaintext. Concurrent logins racing this write is benign because both +// write the same value. +func PinSecureMode(ctx context.Context, mode, override StorageMode) { + if mode != StorageModeSecure { + return + } + _, source, err := ResolveStorageModeWithSource(ctx, override) + if err != nil { + log.Debugf(ctx, "pin secure mode: resolve: %v", err) + return + } + if source.Explicit() { + return + } + configPath := env.Get(ctx, "DATABRICKS_CONFIG_FILE") + if err := databrickscfg.SetConfiguredAuthStorage(ctx, string(StorageModeSecure), configPath); err != nil { + log.Warnf(ctx, "could not persist auth_storage=secure to %s: %v. Future commands may need DATABRICKS_AUTH_STORAGE=secure to keep using the OS keyring.", configPath, err) + } } diff --git a/libs/auth/storage/cache_test.go b/libs/auth/storage/cache_test.go index db059299dc3..52044eef986 100644 --- a/libs/auth/storage/cache_test.go +++ b/libs/auth/storage/cache_test.go @@ -4,6 +4,7 @@ import ( "context" "errors" "fmt" + "io/fs" "os" "path/filepath" "testing" @@ -27,9 +28,10 @@ func (stubCache) Lookup(string) (*oauth2.Token, error) { return nil, cache.ErrNo func fakeFactories(t *testing.T) cacheFactories { t.Helper() return cacheFactories{ - newFile: func(context.Context) (cache.TokenCache, error) { return stubCache{source: "file"}, nil }, - newKeyring: func() cache.TokenCache { return stubCache{source: "keyring"} }, - probeKeyring: func() error { return nil }, + newFile: func(context.Context) (cache.TokenCache, error) { return stubCache{source: "file"}, nil }, + newKeyring: func() cache.TokenCache { return stubCache{source: "keyring"} }, + probeKeyring: func() error { return nil }, + probeKeyringRead: func() error { return nil }, } } @@ -41,15 +43,15 @@ func hermetic(t *testing.T) { t.Setenv("DATABRICKS_CONFIG_FILE", filepath.Join(t.TempDir(), "databrickscfg")) } -func TestResolveCache_DefaultsToPlaintextFile(t *testing.T) { +func TestResolveCache_DefaultsToSecureKeyring(t *testing.T) { hermetic(t) ctx := t.Context() got, mode, err := resolveCacheWith(ctx, "", fakeFactories(t)) require.NoError(t, err) - assert.Equal(t, StorageModePlaintext, mode) - assert.Equal(t, "file", got.(stubCache).source) + assert.Equal(t, StorageModeSecure, mode) + assert.Equal(t, "keyring", got.(stubCache).source) } func TestResolveCache_OverrideSecureUsesKeyring(t *testing.T) { @@ -110,9 +112,10 @@ func TestResolveCache_FileFactoryErrorPropagates(t *testing.T) { ctx := t.Context() boom := errors.New("disk full") factories := cacheFactories{ - newFile: func(context.Context) (cache.TokenCache, error) { return nil, boom }, - newKeyring: func() cache.TokenCache { return stubCache{source: "keyring"} }, - probeKeyring: func() error { return nil }, + newFile: func(context.Context) (cache.TokenCache, error) { return nil, boom }, + newKeyring: func() cache.TokenCache { return stubCache{source: "keyring"} }, + probeKeyring: func() error { return nil }, + probeKeyringRead: func() error { return nil }, } _, _, err := resolveCacheWith(ctx, StorageModePlaintext, factories) @@ -121,6 +124,113 @@ func TestResolveCache_FileFactoryErrorPropagates(t *testing.T) { assert.ErrorIs(t, err, boom) } +// applyReadFallback mirrors applyLoginFallback's logic on the read path: +// keyring is probed read-only, definitive failures fall through to the file +// cache so legacy plaintext tokens stay reachable, timeouts stay on the +// keyring, and explicit-secure is honored even when the probe fails. + +func TestApplyReadFallback_PlaintextSkipsProbe(t *testing.T) { + hermetic(t) + ctx := t.Context() + probed := false + f := fakeFactories(t) + f.probeKeyringRead = func() error { + probed = true + return nil + } + + got, mode, err := applyReadFallback(ctx, StorageModePlaintext, false, f) + + require.NoError(t, err) + assert.Equal(t, StorageModePlaintext, mode) + assert.Equal(t, "file", got.(stubCache).source) + assert.False(t, probed, "probe must not run when mode is already plaintext") +} + +func TestApplyReadFallback_ExplicitSecureSkipsProbe(t *testing.T) { + hermetic(t) + ctx := t.Context() + probed := false + f := fakeFactories(t) + f.probeKeyringRead = func() error { + probed = true + return errors.New("unreachable") + } + + got, mode, err := applyReadFallback(ctx, StorageModeSecure, true, f) + + require.NoError(t, err) + assert.Equal(t, StorageModeSecure, mode) + assert.Equal(t, "keyring", got.(stubCache).source) + assert.False(t, probed, "probe must not run when user is explicit about secure mode") +} + +func TestApplyReadFallback_DefaultSecure_ProbeOK_UsesKeyring(t *testing.T) { + hermetic(t) + ctx := t.Context() + + got, mode, err := applyReadFallback(ctx, StorageModeSecure, false, fakeFactories(t)) + + require.NoError(t, err) + assert.Equal(t, StorageModeSecure, mode) + assert.Equal(t, "keyring", got.(stubCache).source) +} + +func TestApplyReadFallback_DefaultSecure_ProbeFail_FallsBack(t *testing.T) { + hermetic(t) + ctx := t.Context() + configPath := env.Get(ctx, "DATABRICKS_CONFIG_FILE") + + f := fakeFactories(t) + f.probeKeyringRead = func() error { return errors.New("no keyring") } + + got, mode, err := applyReadFallback(ctx, StorageModeSecure, false, f) + + require.NoError(t, err) + assert.Equal(t, StorageModePlaintext, mode) + assert.Equal(t, "file", got.(stubCache).source) + + // Read-path fallback must NOT pin: pinning is reserved for login, + // where the write-probe gives stronger evidence of unavailability. + persisted, gerr := databrickscfg.GetConfiguredAuthStorage(ctx, configPath) + require.NoError(t, gerr) + assert.Equal(t, "", persisted, "read-path fallback must not persist auth_storage") +} + +// A timeout could mean a locked keyring that will work once the user unlocks +// it. Stay on the keyring so the actual Lookup surfaces the real outcome +// rather than silently routing reads to the file cache. +func TestApplyReadFallback_DefaultSecure_ProbeTimeout_StaysOnKeyring(t *testing.T) { + cases := []struct { + name string + probeErr error + }{ + {"bare TimeoutError", &TimeoutError{Op: "get"}}, + {"wrapped TimeoutError", fmt.Errorf("get: %w", &TimeoutError{Op: "get"})}, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + hermetic(t) + ctx := t.Context() + configPath := env.Get(ctx, "DATABRICKS_CONFIG_FILE") + + f := fakeFactories(t) + f.probeKeyringRead = func() error { return tc.probeErr } + + got, mode, err := applyReadFallback(ctx, StorageModeSecure, false, f) + + require.NoError(t, err) + assert.Equal(t, StorageModeSecure, mode) + assert.Equal(t, "keyring", got.(stubCache).source) + + persisted, gerr := databrickscfg.GetConfiguredAuthStorage(ctx, configPath) + require.NoError(t, gerr) + assert.Equal(t, "", persisted, "probe timeout must not persist anything") + }) + } +} + func TestResolveCacheForLogin_PlaintextSkipsProbe(t *testing.T) { hermetic(t) ctx := t.Context() @@ -272,6 +382,104 @@ func TestApplyLoginFallback_ProbeTimeout_StaysOnKeyring(t *testing.T) { } } +func TestPinSecureMode(t *testing.T) { + cases := []struct { + name string + mode StorageMode + override StorageMode + envValue string + configBody string + wantWritten string + }{ + { + name: "secure from default persists secure", + mode: StorageModeSecure, + wantWritten: "secure", + }, + { + name: "plaintext mode is a no-op", + mode: StorageModePlaintext, + wantWritten: "", + }, + { + name: "secure from env is a no-op", + mode: StorageModeSecure, + envValue: "secure", + wantWritten: "", + }, + { + name: "secure from config is a no-op (already pinned)", + mode: StorageModeSecure, + configBody: "[__settings__]\nauth_storage = secure\n", + wantWritten: "secure", + }, + { + // The override signal is per-invocation, so persisting it to + // config would silently turn an ephemeral choice into a + // persistent one. Honor the caller's explicit override by + // no-op'ing the pin. + name: "secure from override is a no-op", + mode: StorageModeSecure, + override: StorageModeSecure, + wantWritten: "", + }, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + hermetic(t) + ctx := t.Context() + configPath := env.Get(ctx, "DATABRICKS_CONFIG_FILE") + if tc.configBody != "" { + require.NoError(t, os.WriteFile(configPath, []byte(tc.configBody), 0o600)) + } + if tc.envValue != "" { + ctx = env.Set(ctx, EnvVar, tc.envValue) + } + + PinSecureMode(ctx, tc.mode, tc.override) + + got, err := databrickscfg.GetConfiguredAuthStorage(ctx, configPath) + require.NoError(t, err) + assert.Equal(t, tc.wantWritten, got) + }) + } +} + +func TestPinSecureMode_IsIdempotent(t *testing.T) { + hermetic(t) + ctx := t.Context() + configPath := env.Get(ctx, "DATABRICKS_CONFIG_FILE") + + PinSecureMode(ctx, StorageModeSecure, StorageModeUnknown) + first, err := databrickscfg.GetConfiguredAuthStorage(ctx, configPath) + require.NoError(t, err) + require.Equal(t, "secure", first) + + // Second call should see source=Config and skip the write. + PinSecureMode(ctx, StorageModeSecure, StorageModeUnknown) + second, err := databrickscfg.GetConfiguredAuthStorage(ctx, configPath) + require.NoError(t, err) + assert.Equal(t, "secure", second) +} + +func TestPinSecureMode_PersistFailureIsSwallowed(t *testing.T) { + hermetic(t) + ctx := t.Context() + // Point DATABRICKS_CONFIG_FILE at a path whose parent does not exist. + // loadOrCreateConfigFile does not mkdir, so the underlying os.OpenFile + // fails and SetConfiguredAuthStorage returns an error. + configPath := filepath.Join(t.TempDir(), "no-such-dir", ".databrickscfg") + t.Setenv("DATABRICKS_CONFIG_FILE", configPath) + + // Must not panic or block; failure surfaces in the warn log. + PinSecureMode(ctx, StorageModeSecure, StorageModeUnknown) + + // The persist failure must not have produced any file. + _, err := os.Stat(configPath) + assert.ErrorIs(t, err, fs.ErrNotExist, "no file should have been written") +} + func TestWrapForOAuthArgument(t *testing.T) { const ( host = "https://example.com" diff --git a/libs/auth/storage/keyring.go b/libs/auth/storage/keyring.go index 8ce87525b35..e0099ebe6bd 100644 --- a/libs/auth/storage/keyring.go +++ b/libs/auth/storage/keyring.go @@ -92,6 +92,9 @@ func NewKeyringCache() cache.TokenCache { // cycle within the standard timeout. *TimeoutError means the keyring // was unresponsive (locked or hung, indistinguishable here); other // errors are definitive failures. +// +// Used by the login path, where we want to validate both read and write +// capability before committing to the keyring backend. func ProbeKeyring() error { return probeWithBackend(zalandoBackend{}, defaultKeyringTimeout) } @@ -113,6 +116,35 @@ func probeWithBackend(backend keyringBackend, timeout time.Duration) error { return nil } +// ProbeKeyringRead returns nil if the OS keyring accepted a Get for a +// non-existent account within the standard timeout (i.e. the backend is +// reachable and responded with keyring.ErrNotFound). *TimeoutError means +// the keyring was unresponsive; other errors are definitive failures. +// +// Used by the read path so probing does not write to the keyring. A +// successful probe is indistinguishable from the user not having an +// entry for that probe account; we treat both as "reachable". +func ProbeKeyringRead() error { + return probeReadWithBackend(zalandoBackend{}, defaultKeyringTimeout) +} + +func probeReadWithBackend(backend keyringBackend, timeout time.Duration) error { + c := &keyringCache{ + backend: backend, + timeout: timeout, + keyringSvcName: keyringServiceName, + } + account := keyringProbeAccountPrefix + uuid.NewString() + err := c.withTimeout("get", func() error { + _, gerr := c.backend.Get(c.keyringSvcName, account) + return gerr + }) + if errors.Is(err, keyring.ErrNotFound) { + return nil + } + return err +} + // Store stores t under key. Nil t deletes the entry; deleting a missing // entry is not an error. func (k *keyringCache) Store(key string, t *oauth2.Token) error { @@ -149,7 +181,7 @@ func (k *keyringCache) Lookup(key string) (*oauth2.Token, error) { return nil, cache.ErrNotFound } if err != nil { - return nil, err + return nil, wrapKeyringUnreachable(err) } var entry keyringEntry @@ -159,6 +191,24 @@ func (k *keyringCache) Lookup(key string) (*oauth2.Token, error) { return entry.Token, nil } +// wrapKeyringUnreachable wraps a non-ErrNotFound keyring error with +// actionable guidance for users whose system has no usable keyring +// backend (Linux without a Secret Service / D-Bus session bus, headless +// containers, certain SSH sessions). Surfaces on the read path, where +// the resolver does not silently fall back to plaintext: a missing +// token might actually be reachable from the keyring on another machine, +// so we surface the unreachability instead of minting a fresh plaintext +// copy. +// +// ErrNotFound passes through unchanged because a clean miss is not an +// availability problem. +func wrapKeyringUnreachable(err error) error { + if err == nil || errors.Is(err, keyring.ErrNotFound) { + return err + } + return fmt.Errorf("OS keyring unreachable: %w (set DATABRICKS_AUTH_STORAGE=plaintext or run `databricks auth login` to use file-based token storage)", err) +} + // Compile-time confirmation that keyringCache satisfies the SDK interface. var _ cache.TokenCache = (*keyringCache)(nil) diff --git a/libs/auth/storage/keyring_test.go b/libs/auth/storage/keyring_test.go index f2b933ee42b..868e3ef5d3c 100644 --- a/libs/auth/storage/keyring_test.go +++ b/libs/auth/storage/keyring_test.go @@ -137,6 +137,25 @@ func TestKeyringCache_Lookup_PropagatesOtherErrors(t *testing.T) { _, err := c.Lookup("my-profile") require.Error(t, err) assert.ErrorIs(t, err, boom) + // The non-ErrNotFound path wraps the backend error with actionable + // guidance so users on systems without a usable keyring backend + // (e.g. headless Linux) know what to do. + assert.Contains(t, err.Error(), "OS keyring unreachable") + assert.Contains(t, err.Error(), "DATABRICKS_AUTH_STORAGE=plaintext") + assert.Contains(t, err.Error(), "databricks auth login") +} + +// ErrNotFound has to pass through unwrapped because callers branch on it +// (cache.ErrNotFound is the "no token, please log in" signal). Wrapping it +// with the unreachability hint would mislead the user. +func TestKeyringCache_Lookup_NotFoundIsNotWrapped(t *testing.T) { + backend := newFakeBackend() + c := newTestCache(backend) + + _, err := c.Lookup("nope") + require.Error(t, err) + assert.ErrorIs(t, err, cache.ErrNotFound) + assert.NotContains(t, err.Error(), "OS keyring unreachable") } func TestKeyringCache_Lookup_CorruptedJSONReturnsError(t *testing.T) { @@ -277,3 +296,59 @@ func TestProbeKeyring(t *testing.T) { }) } } + +func TestProbeKeyringRead(t *testing.T) { + boom := errors.New("backend boom") + cases := []struct { + name string + getErr error + getBlock bool + timeout time.Duration + wantErr error + wantTimeout bool + }{ + { + // keyring.ErrNotFound is the success signal: the backend + // responded that no entry exists for our probe account, + // which means it is reachable. + name: "ErrNotFound counts as reachable", + getErr: keyring.ErrNotFound, + timeout: 100 * time.Millisecond, + }, + { + name: "other backend error propagates", + getErr: boom, + timeout: 100 * time.Millisecond, + wantErr: boom, + }, + { + name: "get times out", + getBlock: true, + timeout: 50 * time.Millisecond, + wantTimeout: true, + }, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + backend := newFakeBackend() + backend.getErr = tc.getErr + backend.getBlock = tc.getBlock + + err := probeReadWithBackend(backend, tc.timeout) + + switch { + case tc.wantErr != nil: + require.Error(t, err) + assert.ErrorIs(t, err, tc.wantErr) + case tc.wantTimeout: + require.Error(t, err) + var timeoutErr *TimeoutError + assert.ErrorAs(t, err, &timeoutErr) + default: + require.NoError(t, err) + assert.Empty(t, backend.items, "read probe must not write to the keyring") + } + }) + } +} diff --git a/libs/auth/storage/mode.go b/libs/auth/storage/mode.go index 6dd3c6e5dff..60c285f51ad 100644 --- a/libs/auth/storage/mode.go +++ b/libs/auth/storage/mode.go @@ -1,9 +1,10 @@ // Package storage selects and constructs the CLI's U2M token storage backend. // -// Two modes are supported. Plaintext writes to ~/.databricks/token-cache.json -// with host-key dual-write for older Go SDK versions (v0.61-v0.103); it is the -// resolver default. Secure writes to the OS-native keyring under the profile -// cache key only; it is opt-in pre-GA and slated to become the default at GA. +// Two modes are supported. Secure writes to the OS-native keyring under the +// profile cache key only; it is the resolver default. Plaintext writes to +// ~/.databricks/token-cache.json with host-key dual-write for older Go SDK +// versions (v0.61-v0.103); it is the opt-in fallback for environments where +// the OS keyring is not available. package storage import ( @@ -26,12 +27,15 @@ const ( // StorageModePlaintext writes tokens to ~/.databricks/token-cache.json // and mirrors each token under the legacy host-based cache key for - // older Go SDK versions (v0.61-v0.103). This is the resolver default. + // older Go SDK versions (v0.61-v0.103). Opt-in via DATABRICKS_AUTH_STORAGE + // or [__settings__].auth_storage for environments where the OS keyring + // is not available. StorageModePlaintext StorageMode = "plaintext" // StorageModeSecure writes tokens to the OS-native secure store // (macOS Keychain, Windows Credential Manager, Linux Secret Service) // under the profile cache key only. No host-key entry is written. + // This is the resolver default. StorageModeSecure StorageMode = "secure" ) @@ -101,7 +105,7 @@ func ParseMode(raw string) StorageMode { // 1. override (typically from a command-level flag such as --secure-storage). // 2. DATABRICKS_AUTH_STORAGE env var. // 3. [__settings__].auth_storage in .databrickscfg. -// 4. StorageModePlaintext. +// 4. StorageModeSecure. // // StorageModeUnknown as override means "no flag set; fall through." The // override is trusted to be a valid StorageMode: callers that parse user @@ -138,7 +142,7 @@ func ResolveStorageModeWithSource(ctx context.Context, override StorageMode) (St return mode, StorageSourceConfig, err } - return StorageModePlaintext, StorageSourceDefault, nil + return StorageModeSecure, StorageSourceDefault, nil } func parseFromSource(raw, source string) (StorageMode, error) { diff --git a/libs/auth/storage/mode_test.go b/libs/auth/storage/mode_test.go index 4c6cf0e1614..431cc903801 100644 --- a/libs/auth/storage/mode_test.go +++ b/libs/auth/storage/mode_test.go @@ -41,7 +41,7 @@ func TestResolveStorageMode(t *testing.T) { }{ { name: "default when nothing is set", - want: StorageModePlaintext, + want: StorageModeSecure, }, { name: "override wins over env and config", @@ -63,8 +63,8 @@ func TestResolveStorageMode(t *testing.T) { }, { name: "config sets mode when env and override unset", - configBody: "[__settings__]\nauth_storage = secure\n", - want: StorageModeSecure, + configBody: "[__settings__]\nauth_storage = plaintext\n", + want: StorageModePlaintext, }, { name: "env value is case-insensitive and trimmed", @@ -141,19 +141,19 @@ func TestResolveStorageModeWithSource(t *testing.T) { }{ { name: "default", - wantMode: StorageModePlaintext, + wantMode: StorageModeSecure, wantSource: StorageSourceDefault, }, { name: "override", - override: StorageModeSecure, - wantMode: StorageModeSecure, + override: StorageModePlaintext, + wantMode: StorageModePlaintext, wantSource: StorageSourceOverride, }, { name: "env", - envValue: "secure", - wantMode: StorageModeSecure, + envValue: "plaintext", + wantMode: StorageModePlaintext, wantSource: StorageSourceEnvVar, }, { diff --git a/libs/auth/storage/not_found_hint.go b/libs/auth/storage/not_found_hint.go new file mode 100644 index 00000000000..d86615535b5 --- /dev/null +++ b/libs/auth/storage/not_found_hint.go @@ -0,0 +1,120 @@ +package storage + +import ( + "context" + "encoding/json" + "errors" + "os" + "path/filepath" + + "github.com/databricks/cli/libs/env" + "github.com/databricks/databricks-sdk-go/credentials/u2m/cache" + "golang.org/x/oauth2" +) + +// notFoundHintCache wraps a TokenCache so Lookup returns ErrNotFound with +// a hint pointing the user at `databricks auth login`. When mode is secure +// and the legacy file-backed cache has entries, the hint uses the upgrade- +// specific copy so users who logged in with an older CLI version know why +// their cached credentials are no longer being read. +// +// errors.Is(err, cache.ErrNotFound) continues to return true because the +// wrap uses %w; the SDK's branches on ErrNotFound still fire. +// +// Store is delegated unchanged; only Lookup needs the message polish. +type notFoundHintCache struct { + inner cache.TokenCache + mode StorageMode + legacyCachePath string +} + +func (c *notFoundHintCache) Store(key string, t *oauth2.Token) error { + return c.inner.Store(key, t) +} + +func (c *notFoundHintCache) Lookup(key string) (*oauth2.Token, error) { + tok, err := c.inner.Lookup(key) + if err == nil || !errors.Is(err, cache.ErrNotFound) { + return tok, err + } + if c.mode == StorageModeSecure && legacyCacheHasTokens(c.legacyCachePath) { + return nil, ¬FoundHint{msg: "stored credentials from older CLI versions are no longer used; run `databricks auth login` to sign in again, or set DATABRICKS_AUTH_STORAGE=plaintext to keep using the file cache"} + } + return nil, ¬FoundHint{msg: "no cached credentials; run `databricks auth login` to sign in"} +} + +// notFoundHint replaces cache.ErrNotFound's terse "token not found" string +// with an actionable message while still satisfying errors.Is(err, +// cache.ErrNotFound). The SDK's loadToken wraps every cache error with +// "cache: %w", and fmt.Errorf("...: %w", ErrNotFound) would tack the +// original "token not found" onto the end of our hint, producing +// "cache: : token not found". A custom type lets us own the +// rendered message while still unwrapping to ErrNotFound for callers +// that branch on it. +type notFoundHint struct { + msg string +} + +func (e *notFoundHint) Error() string { return e.msg } +func (e *notFoundHint) Unwrap() error { return cache.ErrNotFound } + +// HintForNotFound extracts the actionable hint message from an error +// chain produced by notFoundHintCache. Returns the empty string if the +// chain does not contain a notFoundHint (e.g. an unwrapped +// cache.ErrNotFound from a plain TokenCache). +// +// Used by call sites like `auth token` that rewrite the SDK error for +// backwards-compatibility (the "databricks OAuth is not configured for +// this host" substring is load-bearing for older SDK fall-through +// logic) but want to surface the actionable hint to the user instead of +// dropping it. +func HintForNotFound(err error) string { + var hint *notFoundHint + if errors.As(err, &hint) { + return hint.msg + } + return "" +} + +// NewNotFoundHint returns an error that renders as msg but unwraps to +// cache.ErrNotFound, mirroring what notFoundHintCache produces in +// production. Exported so tests in other packages (e.g. cmd/auth) can +// construct a hint-wrapped error without going through the full +// resolver setup. +func NewNotFoundHint(msg string) error { + return ¬FoundHint{msg: msg} +} + +// withNotFoundHint wraps inner so ErrNotFound from Lookup carries an +// actionable hint. The legacy file path is resolved up front (where ctx +// is available) so Lookup can do its check without needing a context. +// +// Resolution failures for the home directory are not fatal: an empty +// legacyCachePath simply disables the upgrade-specific message, which +// falls back to the generic "run auth login" hint. +func withNotFoundHint(ctx context.Context, inner cache.TokenCache, mode StorageMode) cache.TokenCache { + var legacyCachePath string + if home, err := env.UserHomeDir(ctx); err == nil { + legacyCachePath = filepath.Join(home, tokenCacheFilePath) + } + return ¬FoundHintCache{inner: inner, mode: mode, legacyCachePath: legacyCachePath} +} + +// legacyCacheHasTokens reports whether the file at path is a valid token +// cache with at least one entry. Best-effort and read-only: any I/O or +// parse error returns false so we never claim "you have legacy tokens" +// when we cannot actually tell. +func legacyCacheHasTokens(path string) bool { + if path == "" { + return false + } + raw, err := os.ReadFile(path) + if err != nil { + return false + } + var f tokenCacheFile + if err := json.Unmarshal(raw, &f); err != nil { + return false + } + return len(f.Tokens) > 0 +} diff --git a/libs/auth/storage/not_found_hint_test.go b/libs/auth/storage/not_found_hint_test.go new file mode 100644 index 00000000000..b262aeb2d9f --- /dev/null +++ b/libs/auth/storage/not_found_hint_test.go @@ -0,0 +1,180 @@ +package storage + +import ( + "encoding/json" + "errors" + "fmt" + "os" + "path/filepath" + "testing" + + "github.com/databricks/databricks-sdk-go/credentials/u2m/cache" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "golang.org/x/oauth2" +) + +// missingCache always returns ErrNotFound on Lookup. Lets us drive the +// wrapper without going through the real file or keyring cache. +type missingCache struct{} + +func (missingCache) Store(string, *oauth2.Token) error { return nil } +func (missingCache) Lookup(string) (*oauth2.Token, error) { return nil, cache.ErrNotFound } + +// foundCache always returns a token. Used to confirm the wrapper passes +// successful lookups through unchanged. +type foundCache struct{ tok *oauth2.Token } + +func (c foundCache) Store(string, *oauth2.Token) error { return nil } +func (c foundCache) Lookup(string) (*oauth2.Token, error) { return c.tok, nil } + +// boomCache returns a non-ErrNotFound error. The wrapper must not add a +// "run auth login" hint here; the error is about something else. +type boomCache struct{ err error } + +func (c boomCache) Store(string, *oauth2.Token) error { return nil } +func (c boomCache) Lookup(string) (*oauth2.Token, error) { return nil, c.err } + +func writeLegacyCache(t *testing.T, path string, hasEntries bool) { + t.Helper() + require.NoError(t, os.MkdirAll(filepath.Dir(path), 0o700)) + tokens := map[string]*oauth2.Token{} + if hasEntries { + tokens["my-profile"] = &oauth2.Token{AccessToken: "abc"} + } + body, err := json.Marshal(tokenCacheFile{Version: tokenCacheVersion, Tokens: tokens}) + require.NoError(t, err) + require.NoError(t, os.WriteFile(path, body, 0o600)) +} + +func TestNotFoundHintCache_SecureWithLegacyEntries_UsesUpgradeMessage(t *testing.T) { + tmp := t.TempDir() + legacyPath := filepath.Join(tmp, tokenCacheFilePath) + writeLegacyCache(t, legacyPath, true) + + c := ¬FoundHintCache{inner: missingCache{}, mode: StorageModeSecure, legacyCachePath: legacyPath} + _, err := c.Lookup("anything") + require.Error(t, err) + assert.ErrorIs(t, err, cache.ErrNotFound) + assert.Contains(t, err.Error(), "stored credentials from older CLI versions") + assert.Contains(t, err.Error(), "databricks auth login") + assert.Contains(t, err.Error(), "DATABRICKS_AUTH_STORAGE=plaintext") +} + +func TestNotFoundHintCache_SecureWithEmptyLegacyFile_UsesGenericMessage(t *testing.T) { + tmp := t.TempDir() + legacyPath := filepath.Join(tmp, tokenCacheFilePath) + writeLegacyCache(t, legacyPath, false) + + c := ¬FoundHintCache{inner: missingCache{}, mode: StorageModeSecure, legacyCachePath: legacyPath} + _, err := c.Lookup("anything") + require.Error(t, err) + assert.ErrorIs(t, err, cache.ErrNotFound) + assert.Contains(t, err.Error(), "no cached credentials") + assert.NotContains(t, err.Error(), "stored credentials from older CLI versions") +} + +func TestNotFoundHintCache_SecureNoLegacyFile_UsesGenericMessage(t *testing.T) { + c := ¬FoundHintCache{inner: missingCache{}, mode: StorageModeSecure, legacyCachePath: filepath.Join(t.TempDir(), "missing.json")} + _, err := c.Lookup("anything") + require.Error(t, err) + assert.ErrorIs(t, err, cache.ErrNotFound) + assert.Contains(t, err.Error(), "no cached credentials") +} + +func TestNotFoundHintCache_Plaintext_AlwaysGenericMessage(t *testing.T) { + tmp := t.TempDir() + legacyPath := filepath.Join(tmp, tokenCacheFilePath) + writeLegacyCache(t, legacyPath, true) + + // Even with a populated legacy file present, plaintext mode reads from + // that same file, so the upgrade copy would be misleading. + c := ¬FoundHintCache{inner: missingCache{}, mode: StorageModePlaintext, legacyCachePath: legacyPath} + _, err := c.Lookup("anything") + require.Error(t, err) + assert.ErrorIs(t, err, cache.ErrNotFound) + assert.Contains(t, err.Error(), "no cached credentials") + assert.NotContains(t, err.Error(), "stored credentials from older CLI versions") +} + +func TestNotFoundHintCache_NonErrNotFound_PassesThrough(t *testing.T) { + boom := errors.New("backend blew up") + c := ¬FoundHintCache{inner: boomCache{err: boom}, mode: StorageModeSecure, legacyCachePath: ""} + _, err := c.Lookup("anything") + require.Error(t, err) + assert.ErrorIs(t, err, boom) + assert.NotContains(t, err.Error(), "no cached credentials") +} + +func TestNotFoundHintCache_SuccessfulLookupUnchanged(t *testing.T) { + tok := &oauth2.Token{AccessToken: "abc"} + c := ¬FoundHintCache{inner: foundCache{tok: tok}, mode: StorageModeSecure, legacyCachePath: ""} + got, err := c.Lookup("anything") + require.NoError(t, err) + assert.Equal(t, tok, got) +} + +func TestNotFoundHintCache_StoreIsDelegated(t *testing.T) { + c := ¬FoundHintCache{inner: missingCache{}, mode: StorageModeSecure, legacyCachePath: ""} + require.NoError(t, c.Store("k", &oauth2.Token{AccessToken: "abc"})) +} + +func TestHintForNotFound(t *testing.T) { + t.Run("nil returns empty", func(t *testing.T) { + assert.Empty(t, HintForNotFound(nil)) + }) + + t.Run("plain ErrNotFound returns empty", func(t *testing.T) { + // An unwrapped cache.ErrNotFound carries no hint, so the caller + // (e.g. `auth token`) falls back to its default error path. + assert.Empty(t, HintForNotFound(cache.ErrNotFound)) + }) + + t.Run("unrelated error returns empty", func(t *testing.T) { + assert.Empty(t, HintForNotFound(errors.New("something else"))) + }) + + t.Run("notFoundHint returns the hint message", func(t *testing.T) { + h := ¬FoundHint{msg: "do the thing"} + assert.Equal(t, "do the thing", HintForNotFound(h)) + }) + + t.Run("notFoundHint behind an fmt.Errorf wrap returns the hint", func(t *testing.T) { + // The SDK wraps every cache error with `cache: %w`, so the hint + // is one Unwrap away when it surfaces in callers. errors.As must + // still find it. + h := ¬FoundHint{msg: "do the thing"} + wrapped := fmt.Errorf("cache: %w", h) + assert.Equal(t, "do the thing", HintForNotFound(wrapped)) + }) +} + +func TestLegacyCacheHasTokens(t *testing.T) { + tmp := t.TempDir() + + t.Run("empty path returns false", func(t *testing.T) { + assert.False(t, legacyCacheHasTokens("")) + }) + + t.Run("missing file returns false", func(t *testing.T) { + assert.False(t, legacyCacheHasTokens(filepath.Join(tmp, "missing.json"))) + }) + + t.Run("garbage file returns false", func(t *testing.T) { + p := filepath.Join(tmp, "garbage.json") + require.NoError(t, os.WriteFile(p, []byte("not json"), 0o600)) + assert.False(t, legacyCacheHasTokens(p)) + }) + + t.Run("empty token map returns false", func(t *testing.T) { + p := filepath.Join(tmp, "empty.json") + writeLegacyCache(t, p, false) + assert.False(t, legacyCacheHasTokens(p)) + }) + + t.Run("populated token map returns true", func(t *testing.T) { + p := filepath.Join(tmp, "populated.json") + writeLegacyCache(t, p, true) + assert.True(t, legacyCacheHasTokens(p)) + }) +}