diff --git a/pkg/clouds/pulumi/gcp/compute_proc.go b/pkg/clouds/pulumi/gcp/compute_proc.go index e42ee8fb..109c85c3 100644 --- a/pkg/clouds/pulumi/gcp/compute_proc.go +++ b/pkg/clouds/pulumi/gcp/compute_proc.go @@ -251,8 +251,16 @@ func createCloudsqlProxy(ctx *sdk.Context, params appendParams) (*CloudSQLProxy, // This ensures service accounts are unique per environment (e.g., telegram-bot--on-sidecarcsql--production vs telegram-bot--on-sidecarcsql--staging) baseProxyName := fmt.Sprintf("%s-%s-sidecarcsql", params.stack.Name, params.postgresName) cloudsqlProxyName := kubernetes.SanitizeK8sName(params.input.ToResName(baseProxyName)) - // Sanitize namespace name as well - sanitizedNamespace := kubernetes.SanitizeK8sName(params.input.StackParams.StackName) + // The CloudSQL proxy emits a Kubernetes Secret (proxy credentials) that must live in + // the same namespace as the consuming pod for it to be mountable. For custom stacks + // (parentEnv != stackEnv) the pod namespace is `-` per + // kubernetes.GenerateNamespaceName, so derive that here. + parentEnv := "" + if params.provisionParams.ParentStack != nil { + parentEnv = params.provisionParams.ParentStack.ParentEnv + } + derivedNamespace := kubernetes.GenerateNamespaceName(params.input.StackParams.StackName, params.input.StackParams.Environment, parentEnv) + sanitizedNamespace := kubernetes.SanitizeK8sName(derivedNamespace) cloudsqlProxy, err := NewCloudsqlProxy(ctx, CloudSQLProxyArgs{ Name: cloudsqlProxyName, DBInstance: PostgresDBInstanceArgs{ @@ -345,7 +353,14 @@ func createUserForDatabase(ctx *sdk.Context, userName, dbName string, params app } // Sanitize names to comply with Kubernetes RFC 1123 requirements (no underscores) cloudsqlProxyName := kubernetes.SanitizeK8sName(util.TrimStringMiddle(fmt.Sprintf("%s-%s-initcsql", userName, params.postgresName), 60, "-")) - namespace := kubernetes.SanitizeK8sName(params.input.StackParams.StackName) + // Init job + ad-hoc CloudSQL proxy must live in the same namespace as the consuming + // pod so the proxy's credential Secret is mountable. For custom stacks the pod is + // in `-` per kubernetes.GenerateNamespaceName. + parentEnv := "" + if params.provisionParams.ParentStack != nil { + parentEnv = params.provisionParams.ParentStack.ParentEnv + } + namespace := kubernetes.SanitizeK8sName(kubernetes.GenerateNamespaceName(params.input.StackParams.StackName, params.input.StackParams.Environment, parentEnv)) cloudsqlProxy, err := NewCloudsqlProxy(ctx, CloudSQLProxyArgs{ Name: cloudsqlProxyName, DBInstance: dbInstanceArgs, diff --git a/pkg/clouds/pulumi/kubernetes/compute_proc_mongodb.go b/pkg/clouds/pulumi/kubernetes/compute_proc_mongodb.go index ef5fb06e..ff4003dd 100644 --- a/pkg/clouds/pulumi/kubernetes/compute_proc_mongodb.go +++ b/pkg/clouds/pulumi/kubernetes/compute_proc_mongodb.go @@ -129,7 +129,15 @@ func createMongodbUserForDatabase(ctx *sdk.Context, userName, dbName string, par } ctx.Export(passwordName, password.Result) - namespace := params.input.StackParams.StackName + // The init job must run in the same namespace as the consuming pod so it can be + // observed and cleaned up alongside the workload. For custom stacks (parentEnv != stackEnv) + // the pod lives in `-` per GenerateNamespaceName, so derive the same + // namespace here. Standard stacks keep the existing `` namespace. + parentEnv := "" + if params.provisionParams.ParentStack != nil { + parentEnv = params.provisionParams.ParentStack.ParentEnv + } + namespace := GenerateNamespaceName(params.input.StackParams.StackName, params.input.StackParams.Environment, parentEnv) params.collector.AddPreProcessor(&SimpleContainerArgs{}, func(c any) error { _, err = NewMongodbInitDbUserJob(ctx, userName, InitDbUserJobArgs{ diff --git a/pkg/clouds/pulumi/kubernetes/compute_proc_postgres.go b/pkg/clouds/pulumi/kubernetes/compute_proc_postgres.go index bda9a6b3..05228965 100644 --- a/pkg/clouds/pulumi/kubernetes/compute_proc_postgres.go +++ b/pkg/clouds/pulumi/kubernetes/compute_proc_postgres.go @@ -210,7 +210,15 @@ func createPostgresUserForDatabase(ctx *sdk.Context, userName, dbName string, pa return nil, errors.Wrapf(err, "failed to parse postgres url for database %q", dbName) } - namespace := params.input.StackParams.StackName + // The init job must run in the same namespace as the consuming pod so it can be + // observed and cleaned up alongside the workload. For custom stacks (parentEnv != stackEnv) + // the pod lives in `-` per GenerateNamespaceName, so derive the same + // namespace here. Standard stacks keep the existing `` namespace. + parentEnv := "" + if params.provisionParams.ParentStack != nil { + parentEnv = params.provisionParams.ParentStack.ParentEnv + } + namespace := GenerateNamespaceName(params.input.StackParams.StackName, params.input.StackParams.Environment, parentEnv) params.collector.AddPreProcessor(&SimpleContainerArgs{}, func(c any) error { _, err = NewPostgresInitDbUserJob(ctx, userName, InitDbUserJobArgs{ diff --git a/pkg/clouds/pulumi/kubernetes/deployment.go b/pkg/clouds/pulumi/kubernetes/deployment.go index 76b763ee..6d1d0487 100644 --- a/pkg/clouds/pulumi/kubernetes/deployment.go +++ b/pkg/clouds/pulumi/kubernetes/deployment.go @@ -63,8 +63,13 @@ func DeploySimpleContainer(ctx *sdk.Context, args Args, opts ...sdk.ResourceOpti parentEnv = args.Params.ParentStack.ParentEnv } - // Determine namespace - always use stack name as namespace (service name) - namespace := lo.If(args.Namespace == "", stackName).Else(args.Namespace) + // Derive namespace via GenerateNamespaceName: standard stacks keep the stackName-based + // namespace; custom stacks (parentEnv != stackEnv) get a stackEnv-suffixed namespace so + // sibling sub-envs no longer share a physical namespace. See the helper's docstring for + // migration semantics. The shared parent namespace stays intact through the per-stack + // migration thanks to RetainOnDelete on the namespace resource. + baseNamespace := lo.If(args.Namespace == "", stackName).Else(args.Namespace) + namespace := GenerateNamespaceName(baseNamespace, stackEnv, parentEnv) // Generate deployment name with environment suffix for custom stacks baseDeploymentName := lo.If(args.DeploymentName == "", stackName).Else(args.DeploymentName) diff --git a/pkg/clouds/pulumi/kubernetes/helpers.go b/pkg/clouds/pulumi/kubernetes/helpers.go index fd0f272a..0bef2a7b 100644 --- a/pkg/clouds/pulumi/kubernetes/helpers.go +++ b/pkg/clouds/pulumi/kubernetes/helpers.go @@ -45,7 +45,10 @@ func sanitizeK8sName(name string) string { } func ensureNamespace(ctx *sdk.Context, input api.ResourceInput, params pApi.ProvisionParams, namespace string) (*corev1.Namespace, error) { - opts := []sdk.ResourceOption{sdk.Provider(params.Provider)} + // RetainOnDelete: see the rationale at simple_container.go's NewNamespace call — + // helm operator stacks share namespaces across sibling stacks the same way client + // stacks do, so the destroy-cascade hazard is identical here. + opts := []sdk.ResourceOption{sdk.Provider(params.Provider), sdk.RetainOnDelete(true)} sanitizedNamespace := sanitizeK8sName(namespace) return corev1.NewNamespace(ctx, fmt.Sprintf("create-ns-%s-%s", sanitizedNamespace, input.ToResName(input.Descriptor.Name)), &corev1.NamespaceArgs{ Metadata: &metav1.ObjectMetaArgs{ diff --git a/pkg/clouds/pulumi/kubernetes/naming.go b/pkg/clouds/pulumi/kubernetes/naming.go index c57bead1..161e5fb7 100644 --- a/pkg/clouds/pulumi/kubernetes/naming.go +++ b/pkg/clouds/pulumi/kubernetes/naming.go @@ -71,6 +71,36 @@ func isCustomStack(stackEnv, parentEnv string) bool { return parentEnv != "" && parentEnv != stackEnv } +// GenerateNamespaceName derives the physical k8s namespace name for a stack. +// Standard stacks (parentEnv unset, or parentEnv == stackEnv) keep baseNamespace +// (typically the stackName). Custom stacks (parentEnv set and differing from +// stackEnv, e.g. parentEnv=production with stackEnv=tenant-a/tenant-b/...) get +// baseNamespace suffixed with stackEnv, mirroring the per-stackEnv suffix every +// other resource type (Deployment, Service, Secret, ConfigMap, HPA, VPA, +// ImagePullSecret) already gets via generateResourceName. +// +// The result is sanitized to comply with Kubernetes RFC 1123 (lowercase +// alphanumeric and `-`, ≤63 chars with FNV-1a truncation hash) so callers can +// pass it directly into `metadata.namespace` without an extra sanitization +// step. Sanitization is idempotent — callers that pre-sanitize their inputs +// see no behavioural change. +// +// Without this isolation, sibling sub-env stacks share one physical namespace, +// and any `pulumi destroy` on a sub-env cascade-deletes every sibling's resources +// via the k8s namespace delete API. Migrating an existing custom stack to its +// dedicated namespace is automatic on the next `pulumi up`: Pulumi sees the +// namespace metadata.Name change and Replaces the namespace plus its +// namespace-scoped resources. The namespace is created with RetainOnDelete (see +// NewSimpleContainer), so the parent's shared namespace is left in place — the +// parent stack's resources continue running through the migration. +func GenerateNamespaceName(baseNamespace, stackEnv, parentEnv string) string { + name := baseNamespace + if isCustomStack(stackEnv, parentEnv) { + name = fmt.Sprintf("%s-%s", baseNamespace, stackEnv) + } + return SanitizeK8sName(name) +} + // GenerateCaddyDeploymentName creates the Caddy deployment name with environment suffix // Caddy deployments always include the environment suffix for backwards compatibility // This is exported so it can be used by both kubernetes and gcp packages for consistency @@ -87,8 +117,8 @@ func GenerateCaddyDeploymentName(stackEnv string) string { // // Caddy is provisioned by the parent infra stack, so its deployment name is keyed on // parentEnv (e.g. caddy-production). For sub-env client stacks where parentEnv differs -// from stackEnv (e.g. parentEnv=production, stackEnv=gl-pay), passing stackEnv would -// produce caddy-gl-pay — which doesn't exist — and the patch would fail silently. +// from stackEnv (e.g. parentEnv=production, stackEnv=tenant-a), passing stackEnv would +// produce caddy-tenant-a — which doesn't exist — and the patch would fail silently. // For single-env stacks (parentEnv empty or equal to stackEnv) this falls back to stackEnv. // // Note: this is the call site asymmetric to GenerateCaddyDeploymentName, which is used diff --git a/pkg/clouds/pulumi/kubernetes/naming_test.go b/pkg/clouds/pulumi/kubernetes/naming_test.go index 108a984b..77438c4a 100644 --- a/pkg/clouds/pulumi/kubernetes/naming_test.go +++ b/pkg/clouds/pulumi/kubernetes/naming_test.go @@ -455,7 +455,7 @@ func TestCaddyDeploymentNameForChild(t *testing.T) { }, { name: "sub-env stack targets parent's caddy", - stackEnv: "gl-pay", + stackEnv: "tenant-a", parentEnv: "production", expected: "caddy-production", }, @@ -533,6 +533,121 @@ func TestIsCustomStack(t *testing.T) { } } +// TestGenerateNamespaceName covers the namespace derivation that gives custom stacks +// (parentEnv != stackEnv) their own physical k8s namespace, while leaving standard +// stacks on the stackName-based namespace. Includes regression cases for the +// shared-namespace outage class where production parentEnv plus several sibling +// sub-env stacks (tenant-a/tenant-b/tenant-c/preview-test/...) all collided in one +// namespace, so destroying any sibling cascade-deleted the rest. +func TestGenerateNamespaceName(t *testing.T) { + tests := []struct { + name string + baseNamespace string + stackEnv string + parentEnv string + expected string + }{ + { + name: "standard stack - no parentEnv", + baseNamespace: "myapp", + stackEnv: "staging", + parentEnv: "", + expected: "myapp", + }, + { + name: "self-reference - parentEnv equals stackEnv", + baseNamespace: "myapp", + stackEnv: "production", + parentEnv: "production", + expected: "myapp", + }, + { + name: "custom stack - sub-env under production", + baseNamespace: "myapp", + stackEnv: "tenant-a", + parentEnv: "production", + expected: "myapp-tenant-a", + }, + { + // Realistic stackNames often contain underscores; the namespace must be + // sanitized to RFC 1123 so callers can pass the result directly into + // metadata.namespace without their own sanitization step. + name: "underscores in stackName get normalized", + baseNamespace: "my_app", + stackEnv: "tenant-a", + parentEnv: "production", + expected: "my-app-tenant-a", + }, + { + name: "uppercase gets lowercased", + baseNamespace: "MyApp", + stackEnv: "TenantA", + parentEnv: "production", + expected: "myapp-tenanta", + }, + { + name: "custom stack - PR preview under staging", + baseNamespace: "api", + stackEnv: "staging-pr-123", + parentEnv: "staging", + expected: "api-staging-pr-123", + }, + { + name: "custom stack - throwaway test sibling", + baseNamespace: "myapp", + stackEnv: "preview-test", + parentEnv: "production", + expected: "myapp-preview-test", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := GenerateNamespaceName(tt.baseNamespace, tt.stackEnv, tt.parentEnv) + if got != tt.expected { + t.Errorf("GenerateNamespaceName(%q, %q, %q) = %q, expected %q", + tt.baseNamespace, tt.stackEnv, tt.parentEnv, got, tt.expected) + } + }) + } +} + +// TestGenerateNamespaceName_SiblingsAreUnique is the direct regression test for the +// shared-namespace outage: every sub-env stack under one stackName must resolve to +// a distinct namespace, while the parent (parentEnv == stackEnv) keeps its existing +// namespace. This mirrors the production scenario where one parent env hosted +// multiple tenant sub-envs plus a throwaway test sub-env, all sharing one namespace. +func TestGenerateNamespaceName_SiblingsAreUnique(t *testing.T) { + baseNamespace := "myapp" + parentEnv := "production" + + siblings := []struct { + stackEnv string + expectedNamespace string + }{ + {"production", "myapp"}, + {"tenant-a", "myapp-tenant-a"}, + {"tenant-b", "myapp-tenant-b"}, + {"tenant-c", "myapp-tenant-c"}, + {"tenant-d", "myapp-tenant-d"}, + {"preview-test", "myapp-preview-test"}, + } + + seen := make(map[string]string) + for _, s := range siblings { + got := GenerateNamespaceName(baseNamespace, s.stackEnv, parentEnv) + if got != s.expectedNamespace { + t.Errorf("stackEnv=%q: got namespace %q, expected %q", + s.stackEnv, got, s.expectedNamespace) + } + if prior, dup := seen[got]; dup { + t.Errorf("stackEnv=%q produced namespace %q already used by stackEnv=%q — "+ + "siblings must not share a namespace", s.stackEnv, got, prior) + } + seen[got] = s.stackEnv + } +} + // TestComplexScenarios tests real-world deployment scenarios func TestComplexScenarios(t *testing.T) { t.Run("multiple preview environments with unique namespaces", func(t *testing.T) { diff --git a/pkg/clouds/pulumi/kubernetes/simple_container.go b/pkg/clouds/pulumi/kubernetes/simple_container.go index c8aa47b1..74d35796 100644 --- a/pkg/clouds/pulumi/kubernetes/simple_container.go +++ b/pkg/clouds/pulumi/kubernetes/simple_container.go @@ -216,7 +216,25 @@ func NewSimpleContainer(ctx *sdk.Context, args *SimpleContainerArgs, opts ...sdk // Sanitize namespace name to comply with Kubernetes RFC 1123 requirements sanitizedNamespace := sanitizeK8sName(args.Namespace) // Use deployment name as Pulumi resource name to ensure uniqueness across environments - // while keeping the actual K8s namespace name as specified by the user + // while keeping the actual K8s namespace name as specified by the user. + // + // RetainOnDelete: in legacy deploys, sub-env client stacks (e.g. parentEnv=production + // with stackEnv=tenant-a/tenant-b/...) shared one physical K8s namespace because the + // namespace metadata.Name was derived from stackName, not from stackEnv. Each stack + // tracked its own Pulumi Namespace resource with a unique URN, but they all referenced + // the same physical k8s namespace. Without RetainOnDelete, destroying any single + // sub-env stack would cascade-delete the shared namespace and wipe every sibling + // stack's resources (Deployments, Services, Secrets) — a real production outage when + // a throwaway sub-env destroy took down all live siblings. + // + // GenerateNamespaceName now isolates custom stacks per-stackEnv, but RetainOnDelete + // remains load-bearing for the migration step: when a pre-existing custom stack + // first runs `pulumi up` after this version, Pulumi Replaces the namespace, and the + // old shared namespace must NOT be deleted because the parent stack still lives + // there. Post-migration, RetainOnDelete continues to defend against any case where + // multiple stacks legitimately share a namespace (helm operators, explicit + // `Namespace` overrides). Empty namespaces left after the last referencing stack + // is destroyed must be cleaned up by hand. namespaceResourceName := fmt.Sprintf("%s-ns", sanitizedDeployment) namespace, err := corev1.NewNamespace(ctx, namespaceResourceName, &corev1.NamespaceArgs{ Metadata: &metav1.ObjectMetaArgs{ @@ -224,7 +242,7 @@ func NewSimpleContainer(ctx *sdk.Context, args *SimpleContainerArgs, opts ...sdk Labels: sdk.ToStringMap(appLabels), Annotations: sdk.ToStringMap(appAnnotations), }, - }, opts...) + }, append(opts, sdk.RetainOnDelete(true))...) if err != nil { return nil, err }