diff --git a/pkg/clouds/pulumi/kubernetes/caddy.go b/pkg/clouds/pulumi/kubernetes/caddy.go index 857b6493..32a5bc5c 100644 --- a/pkg/clouds/pulumi/kubernetes/caddy.go +++ b/pkg/clouds/pulumi/kubernetes/caddy.go @@ -90,17 +90,50 @@ func DeployCaddyService(ctx *sdk.Context, caddy CaddyDeployment, input api.Resou } defaultCaddyFileEntryStart := `http:// {` + // Default catch-all serves a hard 503 page from /etc/caddy/pages/503.html + // instead of `file_server` over the whole pages dir (which used to serve + // index.html with status 200 for any unknown Host — invisible to monitoring + // when every backend was gone). + // + // Rationale for 503: + // - When all Services with `simple-container.com/caddyfile-entry` for a + // given Host vanish (e.g. cascade-deletion from a namespace Replace gone + // wrong), the request now gets HTTP 503 + Retry-After. CDNs fail over, + // uptime checks alert, oncall sees it. + // + // Why file_server (not respond with inlined HTML): + // - Symmetric with the existing `handle_bucket_error` / `handle_server_error` + // snippets in embed/caddy/Caddyfile, which serve {404,500,502}.html the + // same way for per-Service error fallbacks. One pattern for every status + // page in this codebase. + // - file_server emits Content-Type automatically from the file extension, + // so no explicit `header Content-Type` needed. + // - Operators can override the 503 body by mounting a different ConfigMap + // at /etc/caddy/pages/503.html without touching SC api code. + // + // Wrapped in `handle { ... }` so the directives below apply only to the + // 503 path and nothing else can short-circuit (e.g. `import hsts` redir + // firing before the response). We also intentionally do NOT `import hsts` + // here — sending an HSTS header from a catch-all that answers any Host is + // meaningless, and the HTTP→HTTPS redirect would only route the request + // into a TLS handshake failure (Caddy has no cert for an unknown SNI), + // which is invisible to HTTP-layer monitoring. defaultCaddyFileEntry := ` import gzip - import handle_static - root * /etc/caddy/pages - file_server + handle { + root * /etc/caddy/pages + rewrite * /503.html + header Cache-Control "no-store" + header Retry-After "60" + file_server { + status 503 + } + } ` - // if caddy must respect SSL connections only + // Still computed because it's threaded into per-stack Caddyfile entries + // elsewhere in this function; intentionally NOT applied to the catch-all + // default block above (see comment on `import hsts` omission). useSSL := caddy.UseSSL == nil || *caddy.UseSSL - if useSSL { - defaultCaddyFileEntry += "\nimport hsts" - } serviceAccountName := input.ToResName(fmt.Sprintf("%s-caddy-sa", input.Descriptor.Name)) serviceAccount, err := NewSimpleServiceAccount(ctx, serviceAccountName, &SimpleServiceAccountArgs{ @@ -176,27 +209,75 @@ func DeployCaddyService(ctx *sdk.Context, caddy CaddyDeployment, input api.Resou return envVars }(), Command: sdk.ToStringArray([]string{"bash", "-c", ` - set -xe; + # set -e (exit on error) + pipefail (any pipe component fail = fail). + # Notably we do NOT enable -x here: tracing every command would dump + # the raw caddyfile-entry annotation body to stdout for every Service + # on every pod restart, which lands in cluster logs (GCP/Datadog/ELK). + # SC-generated annotations don't contain secrets, but consumer-side + # misuse (eg. basicauth credentials in Headers or LbConfig.ExtraHelpers + # that templated into the annotation) could leak via -x. The trade-off + # is debuggability — for live troubleshooting, re-enable -x by + # overriding the init-container command in the cluster. + set -eo pipefail; cp -f /etc/caddy/Caddyfile /tmp/Caddyfile; - + # Inject custom Caddyfile prefix at the top (e.g., GCS storage configuration) if [ -n "$CADDYFILE_PREFIX" ]; then echo "$CADDYFILE_PREFIX" >> /tmp/Caddyfile echo "" >> /tmp/Caddyfile fi - - # Get all services with Simple Container annotations across all namespaces - services=$(kubectl get services --all-namespaces -o jsonpath='{range .items[?(@.metadata.annotations.simple-container\.com/caddyfile-entry)]}{.metadata.namespace}{" "}{.metadata.name}{"\n"}{end}') + + # List Services carrying the caddyfile-entry annotation. We also pull + # creationTimestamp so we can dedup by site-address with the newest + # Service winning — during a Pulumi Replace of a namespace (or Service), + # the old and new Services transiently coexist and both carry the same + # annotation; without dedup that produced two "http:// { ... }" + # blocks and Caddy aborted with "ambiguous site definition". + # kubectl and sort are split into separate assignments so a kubectl + # failure surfaces unambiguously even without pipefail (originally + # they were piped; pipefail was added in response to a review catch + # and we kept the structural split so future readers do not need + # to know about pipefail to reason about failure modes here). + # pipefail is kept on as belt-and-suspenders for the later + # printf-to-sort pipe and the printf-to-while-read pipeline below. + # If either listing step fails the init-container exits non-zero + # and K8s reschedules — preferable to a Caddyfile with only the + # default 503 block, which would mean a complete loss of routing + # for the entire cluster. + raw_services=$(kubectl get services --all-namespaces -o jsonpath='{range .items[?(@.metadata.annotations.simple-container\.com/caddyfile-entry)]}{.metadata.creationTimestamp}{" "}{.metadata.namespace}{" "}{.metadata.name}{"\n"}{end}') + services=$(printf '%s' "$raw_services" | sort -r) echo "$DEFAULT_ENTRY_START" >> /tmp/Caddyfile if [ "$USE_PREFIXES" == "false" ]; then echo "$DEFAULT_ENTRY" >> /tmp/Caddyfile echo "}" >> /tmp/Caddyfile fi + # Dedup state: first non-blank, non-comment line of each annotation is + # the site address (e.g. "http://support-payhey.pay.space {") or the + # "handle_path /*" matcher for prefix routing. Whitespace is + # trimmed both sides so an indentation difference can't pass through as + # a distinct key. Already-seen keys are skipped — most-recently-created + # Service wins via sort -r. + seen=$(mktemp) + trap 'rm -f "$seen"' EXIT # Process each service that has Caddyfile entry annotation - echo "$services" | while read ns service; do + printf '%s\n' "$services" | while read ts ns service; do if [ -n "$ns" ] && [ -n "$service" ]; then + entry=$(kubectl get service -n "$ns" "$service" -o jsonpath='{.metadata.annotations.simple-container\.com/caddyfile-entry}' 2>/dev/null || true) + if [ -z "$entry" ]; then + continue + fi + key=$(printf '%s\n' "$entry" | awk ' + /^[[:space:]]*$/ { next } + /^[[:space:]]*#/ { next } + { sub(/^[[:space:]]+/, ""); sub(/[[:space:]]+$/, ""); print; exit } + ') + if [ -n "$key" ] && grep -qFx -- "$key" "$seen" 2>/dev/null; then + echo "Skipping duplicate caddyfile-entry '$key' from $ns/$service (older Service)" + continue + fi + [ -n "$key" ] && printf '%s\n' "$key" >> "$seen" echo "Processing service: $service in namespace: $ns" - kubectl get service -n $ns $service -o jsonpath='{.metadata.annotations.simple-container\.com/caddyfile-entry}' >> /tmp/Caddyfile || true; + printf '%s\n' "$entry" >> /tmp/Caddyfile echo "" >> /tmp/Caddyfile fi done diff --git a/pkg/clouds/pulumi/kubernetes/embed/caddy/pages/503.html b/pkg/clouds/pulumi/kubernetes/embed/caddy/pages/503.html new file mode 100644 index 00000000..85412ecd --- /dev/null +++ b/pkg/clouds/pulumi/kubernetes/embed/caddy/pages/503.html @@ -0,0 +1,17 @@ + +503 Service Unavailable + + +
+

503 Service Unavailable

+

No backend route is configured for this host.

+

If you are an operator, verify the Service has the + simple-container.com/caddyfile-entry annotation and that + Caddy has been rolled.

+
diff --git a/pkg/clouds/pulumi/kubernetes/embed/caddy/pages/index.html b/pkg/clouds/pulumi/kubernetes/embed/caddy/pages/index.html deleted file mode 100644 index cb8f0acc..00000000 --- a/pkg/clouds/pulumi/kubernetes/embed/caddy/pages/index.html +++ /dev/null @@ -1,12 +0,0 @@ - -Default page - - -
-

Default page

-
\ No newline at end of file diff --git a/pkg/clouds/pulumi/kubernetes/helpers.go b/pkg/clouds/pulumi/kubernetes/helpers.go index 0bef2a7b..a2e62f1c 100644 --- a/pkg/clouds/pulumi/kubernetes/helpers.go +++ b/pkg/clouds/pulumi/kubernetes/helpers.go @@ -45,10 +45,16 @@ func sanitizeK8sName(name string) string { } func ensureNamespace(ctx *sdk.Context, input api.ResourceInput, params pApi.ProvisionParams, namespace string) (*corev1.Namespace, error) { - // RetainOnDelete: see the rationale at simple_container.go's NewNamespace call — - // helm operator stacks share namespaces across sibling stacks the same way client - // stacks do, so the destroy-cascade hazard is identical here. - opts := []sdk.ResourceOption{sdk.Provider(params.Provider), sdk.RetainOnDelete(true)} + // RetainOnDelete + IgnoreChanges("metadata.name"): see the long rationale + // at simple_container.go's NewNamespace call. Helm operator stacks share + // namespaces across sibling stacks the same way client stacks do, so both + // the destroy-cascade hazard and the migration-time Replace cascade + // hazard apply identically here. + opts := []sdk.ResourceOption{ + sdk.Provider(params.Provider), + sdk.RetainOnDelete(true), + sdk.IgnoreChanges([]string{"metadata.name"}), + } sanitizedNamespace := sanitizeK8sName(namespace) return corev1.NewNamespace(ctx, fmt.Sprintf("create-ns-%s-%s", sanitizedNamespace, input.ToResName(input.Descriptor.Name)), &corev1.NamespaceArgs{ Metadata: &metav1.ObjectMetaArgs{ diff --git a/pkg/clouds/pulumi/kubernetes/simple_container.go b/pkg/clouds/pulumi/kubernetes/simple_container.go index 74d35796..e5d503a8 100644 --- a/pkg/clouds/pulumi/kubernetes/simple_container.go +++ b/pkg/clouds/pulumi/kubernetes/simple_container.go @@ -218,23 +218,46 @@ func NewSimpleContainer(ctx *sdk.Context, args *SimpleContainerArgs, opts ...sdk // Use deployment name as Pulumi resource name to ensure uniqueness across environments // while keeping the actual K8s namespace name as specified by the user. // - // RetainOnDelete: in legacy deploys, sub-env client stacks (e.g. parentEnv=production - // with stackEnv=tenant-a/tenant-b/...) shared one physical K8s namespace because the - // namespace metadata.Name was derived from stackName, not from stackEnv. Each stack - // tracked its own Pulumi Namespace resource with a unique URN, but they all referenced - // the same physical k8s namespace. Without RetainOnDelete, destroying any single - // sub-env stack would cascade-delete the shared namespace and wipe every sibling - // stack's resources (Deployments, Services, Secrets) — a real production outage when - // a throwaway sub-env destroy took down all live siblings. + // Namespace-handling has two protections against the destroy/Replace cascade + // hazard discovered in pre-PR-230 deploys (see PR #230 and the 2026-05-10 + // PAY-SPACE + 2026-05-12 fulldiveVR outages): // - // GenerateNamespaceName now isolates custom stacks per-stackEnv, but RetainOnDelete - // remains load-bearing for the migration step: when a pre-existing custom stack - // first runs `pulumi up` after this version, Pulumi Replaces the namespace, and the - // old shared namespace must NOT be deleted because the parent stack still lives - // there. Post-migration, RetainOnDelete continues to defend against any case where - // multiple stacks legitimately share a namespace (helm operators, explicit - // `Namespace` overrides). Empty namespaces left after the last referencing stack - // is destroyed must be cleaned up by hand. + // 1. RetainOnDelete(true). In legacy deploys, sub-env client stacks + // (parentEnv= with stackEnv=tenant-a/tenant-b/...) shared one + // physical K8s namespace because metadata.Name was derived from + // stackName, not stackEnv. Each stack tracked its own Pulumi Namespace + // resource at a unique URN, but they all pointed at the same physical + // namespace. Destroying any single sub-env stack would cascade-delete + // the shared namespace and wipe every sibling. RetainOnDelete keeps + // Pulumi from issuing the k8s DELETE on destroy. + // + // 2. IgnoreChanges("metadata.name"). PR #230 changed GenerateNamespaceName + // to isolate custom stacks (stackName-stackEnv) instead of sharing the + // parent's namespace. That works for fresh deploys, but for any consumer + // whose Pulumi state predates #230, the next `pulumi up` saw a diff + // between state's metadata.Name="" and program's + // metadata.Name="-", and scheduled a Replace. + // Replace = create-new + delete-old, and `RetainOnDelete` on the new + // resource is non-retroactive — Pulumi reads delete-time options from + // the OLD resource's state, which predates the flag. The k8s DELETE on + // the shared namespace went through and cascade-killed the parent + // stack's running resources. + // + // IgnoreChanges("metadata.name") suppresses the diff entirely. No + // Replace is scheduled, no delete fires. The resource state retains + // whatever metadata.Name it had (new for fresh deploys, legacy shared + // for migrated consumers). Other resources (Service, Deployment, …) + // that reference namespace.Metadata.Name().Elem() follow whichever + // name is in effect — fresh deploys land in the isolated namespace, + // migrated consumers continue using the shared one. Combined with + // RetainOnDelete this keeps both modes safe. + // + // Consumers who want to migrate an existing custom stack to the + // isolated namespace name opt in by running + // pulumi stack export | jq 'del(... namespace urn ...)' | pulumi stack import + // (forget the namespace resource — k8s namespace itself stays put), + // then the next pulumi up registers a fresh Namespace at the isolated + // name. Documented in the PR description. namespaceResourceName := fmt.Sprintf("%s-ns", sanitizedDeployment) namespace, err := corev1.NewNamespace(ctx, namespaceResourceName, &corev1.NamespaceArgs{ Metadata: &metav1.ObjectMetaArgs{ @@ -242,7 +265,7 @@ func NewSimpleContainer(ctx *sdk.Context, args *SimpleContainerArgs, opts ...sdk Labels: sdk.ToStringMap(appLabels), Annotations: sdk.ToStringMap(appAnnotations), }, - }, append(opts, sdk.RetainOnDelete(true))...) + }, append(opts, sdk.RetainOnDelete(true), sdk.IgnoreChanges([]string{"metadata.name"}))...) if err != nil { return nil, err } @@ -613,20 +636,27 @@ func NewSimpleContainer(ctx *sdk.Context, args *SimpleContainerArgs, opts ...sdk serviceAnnotations := lo.Assign(appAnnotations) var caddyfileEntry string + var caddyfileEntryAnnotation sdk.StringInput if args.GenerateCaddyfileEntry && mainPort != nil { + // The unsubstituted template — used for both the initial sync render + // (sc.CaddyfileEntry static export, change-hash signal) and the + // deferred re-render inside ApplyT below (live-namespace annotation + // on the Service). Single source of truth so any template tweak + // updates both paths. + var caddyfileEntryTemplate string if args.Domain != "" { - caddyfileEntry = ` + caddyfileEntryTemplate = ` ${proto}://${domain} { reverse_proxy http://${service}.${namespace}.svc.cluster.local:${port} { header_down Server nginx ${addHeaders} import handle_server_error ${extraHelpers} } - ${imports} + ${imports} } ` } else if args.Prefix != "" { - caddyfileEntry = ` + caddyfileEntryTemplate = ` handle_path /${prefix}* {${additionalProxyConfig} reverse_proxy http://${service}.${namespace}.svc.cluster.local:${port} { header_down Server nginx ${addHeaders} @@ -660,9 +690,51 @@ ${proto}://${domain} { } else { placeholdersMap["additionalProxyConfig"] = "" } + // Apply placeholders synchronously so the static representation + // (used for sc.CaddyfileEntry change-hash + log lines) is populated. + // `namespace` here is sanitizedNamespace — for fresh deploys that + // matches the live k8s namespace, but for migrated stacks with + // IgnoreChanges("metadata.name") suppressing the rename the live + // namespace stays at the legacy value. The annotation that lands + // on the Service is computed from the live namespace Output below. + caddyfileEntry = caddyfileEntryTemplate if err := placeholders.New().Apply(&caddyfileEntry, placeholders.WithData(placeholdersMap)); err != nil { return nil, errors.Wrapf(err, "failed to apply placeholders on caddyfile entry template") } + + // Build the actual annotation as an Output that resolves namespace + // from the live Namespace resource's metadata.name. On migrated + // stacks this is the legacy shared name (because of IgnoreChanges), + // which is also where the Service is created, so reverse_proxy + // http://${service}.${namespace}.svc.cluster.local resolves to + // real cluster DNS. On fresh deploys it equals sanitizedNamespace + // so the byte output matches the legacy code path. + // + // Render failures inside ApplyT are returned as errors (not silently + // fallen back to the statically-rendered template) — falling back + // would re-introduce the migrated-stack 502 bug this PR is fixing. + staticEntry := caddyfileEntry + caddyfileEntryAnnotation = namespace.Metadata.Name().ApplyT(func(nsPtr *string) (string, error) { + liveNS := sanitizedNamespace + if nsPtr != nil && *nsPtr != "" { + liveNS = *nsPtr + } + if liveNS == sanitizedNamespace { + // Fresh deploy or no migration: static template is correct verbatim. + return staticEntry, nil + } + // Migrated stack: re-render with the live (legacy) namespace. + localMap := make(placeholders.MapData, len(placeholdersMap)) + for k, v := range placeholdersMap { + localMap[k] = v + } + localMap["namespace"] = liveNS + rendered := caddyfileEntryTemplate + if err := placeholders.New().Apply(&rendered, placeholders.WithData(localMap)); err != nil { + return "", errors.Wrapf(err, "failed to re-render caddyfile entry for live namespace %q", liveNS) + } + return rendered, nil + }).(sdk.StringOutput) serviceAnnotations[AnnotationCaddyfileEntry] = caddyfileEntry } @@ -675,6 +747,18 @@ ${proto}://${domain} { }) } } + // Build the Pulumi-input annotation map. The caddyfile-entry value, if + // any, is an Output that resolves the namespace placeholder against the + // live Namespace resource (so IgnoreChanges'd migrated stacks point at + // the legacy shared namespace, fresh deploys point at the per-stackEnv + // namespace). Everything else is a static string. + serviceAnnotationsInput := sdk.StringMap{} + for k, v := range serviceAnnotations { + serviceAnnotationsInput[k] = sdk.String(v) + } + if caddyfileEntryAnnotation != nil { + serviceAnnotationsInput[AnnotationCaddyfileEntry] = caddyfileEntryAnnotation + } var service *corev1.Service if len(lo.FromPtr(args.IngressContainer).Ports) > 0 { service, err = corev1.NewService(ctx, sanitizedService, &corev1.ServiceArgs{ @@ -682,7 +766,7 @@ ${proto}://${domain} { Name: sdk.String(sanitizedService), Namespace: namespace.Metadata.Name().Elem(), Labels: sdk.ToStringMap(appLabels), - Annotations: sdk.ToStringMap(serviceAnnotations), + Annotations: serviceAnnotationsInput, }, Spec: &corev1.ServiceSpecArgs{ Selector: sdk.ToStringMap(appLabels), @@ -701,16 +785,25 @@ ${proto}://${domain} { if mainPort == nil { return nil, errors.Errorf("cannot provision ingress when no main port is specified") } - ingressAnnotations := lo.Assign(serviceAnnotations) + // Mirror the Service-side annotation map (Pulumi-input with the + // live-namespace caddyfile-entry Output) and overlay the + // Ingress-only ssl-redirect tweak. + ingressAnnotationsInput := sdk.StringMap{} + for k, v := range serviceAnnotations { + ingressAnnotationsInput[k] = sdk.String(v) + } + if caddyfileEntryAnnotation != nil { + ingressAnnotationsInput[AnnotationCaddyfileEntry] = caddyfileEntryAnnotation + } if args.UseSSL { - ingressAnnotations["ingress.kubernetes.io/ssl-redirect"] = "false" // do not need ssl redirect from kube + ingressAnnotationsInput["ingress.kubernetes.io/ssl-redirect"] = sdk.String("false") // do not need ssl redirect from kube } _, err = networkv1.NewIngress(ctx, sanitizedService, &networkv1.IngressArgs{ Metadata: &metav1.ObjectMetaArgs{ Name: sdk.String(sanitizedService), Namespace: namespace.Metadata.Name().Elem(), Labels: sdk.ToStringMap(appLabels), - Annotations: sdk.ToStringMap(ingressAnnotations), + Annotations: ingressAnnotationsInput, }, Spec: &networkv1.IngressSpecArgs{ Rules: networkv1.IngressRuleArray{ @@ -797,9 +890,13 @@ ${proto}://${domain} { return nil, err } - // Create VPA if enabled + // Create VPA if enabled. Pass the live namespace name (Pulumi Output) + // rather than the program-computed sanitizedNamespace string, so the + // VPA lands in the same namespace as its target Deployment on migrated + // stacks (where IgnoreChanges("metadata.name") keeps the namespace at + // the legacy shared value). if args.VPA != nil && args.VPA.Enabled { - if err := createVPA(ctx, args, baseResourceName, sanitizedNamespace, appLabels, appAnnotations, opts...); err != nil { + if err := createVPA(ctx, args, baseResourceName, namespace.Metadata.Name().Elem(), appLabels, appAnnotations, opts...); err != nil { return nil, errors.Wrapf(err, "failed to create VPA for deployment %s", baseResourceName) } } @@ -843,7 +940,7 @@ ${proto}://${domain} { return sc, nil } -func createVPA(ctx *sdk.Context, args *SimpleContainerArgs, deploymentName, namespace string, labels, annotations map[string]string, opts ...sdk.ResourceOption) error { +func createVPA(ctx *sdk.Context, args *SimpleContainerArgs, deploymentName string, namespace sdk.StringInput, labels, annotations map[string]string, opts ...sdk.ResourceOption) error { vpaName := fmt.Sprintf("%s-vpa", deploymentName) // Build VPA spec content @@ -938,7 +1035,7 @@ func createVPA(ctx *sdk.Context, args *SimpleContainerArgs, deploymentName, name Kind: sdk.String("VerticalPodAutoscaler"), Metadata: &metav1.ObjectMetaArgs{ Name: sdk.String(vpaName), - Namespace: sdk.String(namespace), + Namespace: namespace, Labels: sdk.ToStringMap(vpaLabels), Annotations: sdk.ToStringMap(vpaAnnotations), },