diff --git a/.github/workflows/deploy-backend.yml b/.github/workflows/deploy-backend.yml index 36bdf1386..05baf7bd0 100644 --- a/.github/workflows/deploy-backend.yml +++ b/.github/workflows/deploy-backend.yml @@ -110,6 +110,10 @@ env: # Sonarcloud - do not allow direct usage of untrusted data run-name: Deploy Backend - ${{ inputs.environment }} ${{ inputs.sub_environment }} +concurrency: + group: deploy-backend-${{ github.repository }}-${{ inputs.environment }}-${{ (inputs.environment == 'preprod' || inputs.environment == 'prod') && 'shared-trigger' || inputs.sub_environment }} + cancel-in-progress: false + jobs: deploy-lambda-images: name: Deploy ${{ matrix.lambda_name }} image @@ -247,6 +251,10 @@ jobs: working-directory: infrastructure/instance run: make init + - name: Set Terraform workspace + working-directory: infrastructure/instance + run: make workspace + - name: Terraform Plan # Ignore cancellations to prevent Terraform from being killed while it holds a state lock # A stuck process can still be killed with the force-cancel API operation @@ -293,6 +301,10 @@ jobs: working-directory: infrastructure/instance run: make init + - name: Set Terraform workspace + working-directory: infrastructure/instance + run: make workspace + - name: Terraform Apply # Ignore cancellations to prevent Terraform from being killed while it holds a state lock # A stuck process can still be killed with the force-cancel API operation @@ -302,6 +314,47 @@ jobs: make apply-ci echo "ID_SYNC_QUEUE_ARN=$(make -s output name=id_sync_queue_arn)" >> $GITHUB_ENV + - name: Terraform Init Event Source Mappings + if: ${{ !failure() }} + working-directory: infrastructure/event_source_mappings + run: make init + + - name: Terraform Format Check Event Source Mappings + if: ${{ !failure() }} + working-directory: infrastructure/event_source_mappings + run: make fmt-check + + - name: Terraform Validate Event Source Mappings + if: ${{ !failure() }} + working-directory: infrastructure/event_source_mappings + run: make validate + + - name: Adopt Existing Event Source Mappings + if: ${{ !failure() }} + working-directory: infrastructure/event_source_mappings + env: + ALLOW_EVENT_SOURCE_MAPPING_ADOPTION: "true" + run: make adopt + + - name: Terraform Plan Event Source Mappings + if: ${{ !failure() }} + working-directory: infrastructure/event_source_mappings + run: make plan-ci + + - name: Save Event Source Mapping Terraform Plan + if: ${{ !failure() }} + uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a + with: + name: ${{ env.ENVIRONMENT }}-${{ env.SUB_ENVIRONMENT }}-event-source-mappings-tfplan + path: infrastructure/event_source_mappings/tfplan + + - name: Terraform Apply Event Source Mappings + if: ${{ !failure() }} + working-directory: infrastructure/event_source_mappings + run: | + make apply-ci + make verify + - name: Install poetry if: ${{ inputs.environment == 'dev' && inputs.create_mns_subscription }} run: pip install poetry==2.1.4 diff --git a/.github/workflows/migrate-event-source-mappings.yml b/.github/workflows/migrate-event-source-mappings.yml new file mode 100644 index 000000000..bda868457 --- /dev/null +++ b/.github/workflows/migrate-event-source-mappings.yml @@ -0,0 +1,111 @@ +name: Migrate Event Source Mappings + +on: + workflow_dispatch: + inputs: + apigee_environment: + type: choice + description: Select the Apigee proxy environment for dev state buckets + options: + - internal-dev + - internal-qa + - ref + default: internal-dev + environment: + type: choice + description: Select the AWS backend environment + options: + - dev + - preprod + - prod + sub_environment: + type: string + description: Set the sub-environment name, e.g. internal-dev, int-blue, blue + required: true + confirm_event_source_mapping_migration: + type: boolean + description: Confirm this is the controlled one-time migration for the selected environment + required: true + default: false + +env: + APIGEE_ENVIRONMENT: ${{ inputs.apigee_environment }} + ENVIRONMENT: ${{ inputs.environment }} + SUB_ENVIRONMENT: ${{ inputs.sub_environment }} + +run-name: Migrate Event Source Mappings - ${{ inputs.environment }} ${{ inputs.sub_environment }} + +concurrency: + group: deploy-backend-${{ github.repository }}-${{ inputs.environment }}-${{ (inputs.environment == 'preprod' || inputs.environment == 'prod') && 'shared-trigger' || inputs.sub_environment }} + cancel-in-progress: false + +jobs: + migrate-event-source-mappings: + permissions: + id-token: write + contents: read + runs-on: ubuntu-latest + environment: + name: ${{ inputs.environment }} + steps: + - name: Confirm controlled migration + run: | + set -euo pipefail + + if [ "${CONFIRM_EVENT_SOURCE_MAPPING_MIGRATION}" != "true" ]; then + echo "This workflow is only for the controlled one-time event source mapping migration." + echo "Set confirm_event_source_mapping_migration to true to continue." + exit 1 + fi + env: + CONFIRM_EVENT_SOURCE_MAPPING_MIGRATION: ${{ inputs.confirm_event_source_mapping_migration }} + + - name: Checkout + uses: actions/checkout@0c366fd6a839edf440554fa01a7085ccba70ac98 + + - name: Connect to AWS + uses: aws-actions/configure-aws-credentials@ec61189d14ec14c8efccab744f656cffd0e33f37 + with: + aws-region: eu-west-2 + role-to-assume: arn:aws:iam::${{ vars.AWS_ACCOUNT_ID }}:role/auto-ops + role-session-name: github-actions + + - uses: hashicorp/setup-terraform@5e8dbf3c6d9deaf4193ca7a8fb23f2ac83bb6c85 + with: + terraform_version: "1.12.2" + + - name: Terraform Init + working-directory: infrastructure/event_source_mappings + run: make init + + - name: Adopt Existing Event Source Mappings + working-directory: infrastructure/event_source_mappings + env: + ALLOW_EVENT_SOURCE_MAPPING_ADOPTION: "true" + run: make adopt + + - name: Terraform Format Check + working-directory: infrastructure/event_source_mappings + run: make fmt-check + + - name: Terraform Validate + working-directory: infrastructure/event_source_mappings + run: make validate + + - name: Terraform Plan + working-directory: infrastructure/event_source_mappings + run: make plan-ci + + - name: Save Terraform Plan + uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a + with: + name: ${{ env.ENVIRONMENT }}-${{ env.SUB_ENVIRONMENT }}-event-source-mappings-migration-tfplan + path: infrastructure/event_source_mappings/tfplan + + - name: Terraform Apply + working-directory: infrastructure/event_source_mappings + run: make apply-ci + + - name: Verify Event Source Mappings + working-directory: infrastructure/event_source_mappings + run: make verify diff --git a/.github/workflows/pr-teardown.yml b/.github/workflows/pr-teardown.yml index 9d0d29ca2..b4a2330d2 100644 --- a/.github/workflows/pr-teardown.yml +++ b/.github/workflows/pr-teardown.yml @@ -92,6 +92,12 @@ jobs: echo "Unsubscribing SQS to MNS for notifications..." make unsubscribe + - name: Destroy Lambda event source mappings + working-directory: infrastructure/event_source_mappings + run: | + make init apigee_environment=$APIGEE_ENVIRONMENT environment=$BACKEND_ENVIRONMENT sub_environment=$BACKEND_SUB_ENVIRONMENT + make destroy apigee_environment=$APIGEE_ENVIRONMENT environment=$BACKEND_ENVIRONMENT sub_environment=$BACKEND_SUB_ENVIRONMENT + - name: Terraform Destroy working-directory: infrastructure/instance run: | diff --git a/.github/workflows/quality-checks.yml b/.github/workflows/quality-checks.yml index 5566bb2b3..7a208235c 100644 --- a/.github/workflows/quality-checks.yml +++ b/.github/workflows/quality-checks.yml @@ -73,6 +73,18 @@ jobs: - name: Check formatting run: terraform fmt -check -recursive + - name: Validate event source mappings + working-directory: infrastructure/event_source_mappings + run: | + terraform init -backend=false -input=false + terraform validate + + - name: Install ShellCheck + run: sudo apt-get update && sudo apt-get install -y shellcheck + + - name: ShellCheck event source mapping adoption script + run: shellcheck utilities/scripts/adopt_event_source_mappings.sh + testcoverage_and_sonarcloud: name: Test Coverage and SonarCloud runs-on: ubuntu-latest diff --git a/infrastructure/event_source_mappings/.terraform.lock.hcl b/infrastructure/event_source_mappings/.terraform.lock.hcl new file mode 100644 index 000000000..c31874d86 --- /dev/null +++ b/infrastructure/event_source_mappings/.terraform.lock.hcl @@ -0,0 +1,25 @@ +# This file is maintained automatically by "terraform init". +# Manual edits may be lost in future updates. + +provider "registry.terraform.io/hashicorp/aws" { + version = "6.42.0" + constraints = "~> 6.0" + hashes = [ + "h1:B00CO2gJ6fSyfUGhi+siRqNoUG9jI7PD+3r1dHWv3OI=", + "zh:0dd774a97eaa4371a60e13b5dc56800d4fb1c48d50e79049f75fc4fe26705ff5", + "zh:237d652d8ec028f7bedce1ce056ffe42e2e120d2a4a47fe45b97263cc5948e7e", + "zh:367d9e4b816e5f857956887b8f0770aaa5bec47c4b1e2c7bf924e39192d580d6", + "zh:4194addb5b34bb803fab031a80d84e9d16cac2308df9a498d51e2214c7893900", + "zh:5bcc36226fa5d8a3c37e41ac8cfd2b1eb73e7ae96f8418f6776dcaa16a987198", + "zh:61d0632d4cc7973b779b90c1d3bb2d05a1cd4d7030bb4645831e67cf735ecaa0", + "zh:7c7efaf9e4bb662ba3e8a714abafe7107bdcd1bf2cd0867510ea32762debc11d", + "zh:7d7f8ffe00d4a90184efa454a107bcc46d81f21245baea9678dcfa2fa77ac1be", + "zh:7e534c454cdeafe9cc225bea53397883bb96c54da6ea3421fd53dbc9dfc1bb90", + "zh:99564c99a0672b2a8b666ab2040f36a7c6f372fa79ac43a6657a54734e46fff8", + "zh:9b12af85486a96aedd8d7984b0ff811a4b42e3d88dad1a3fb4c0b580d04fa425", + "zh:a6b1bb6178798882508f5512222a9433fc21e63cdae83a755650c938e6141d32", + "zh:a87592d6ff3d46ee83756f4f84503b2fe4ffc9c1d5dc9f8d4afccb5e126ae538", + "zh:daa56fb74c5c9d26b327c40b486beac71cdb9a932c7c4e4561f4401cd0d16a4a", + "zh:f35ab043d7121f3a194f42f5b0e0d59fe62d94488acea07e3783405fa5838785", + ] +} diff --git a/infrastructure/event_source_mappings/Makefile b/infrastructure/event_source_mappings/Makefile new file mode 100644 index 000000000..a4812f2f6 --- /dev/null +++ b/infrastructure/event_source_mappings/Makefile @@ -0,0 +1,76 @@ +-include .env + +apigee_environment ?= $(APIGEE_ENVIRONMENT) +environment ?= $(ENVIRONMENT) +sub_environment ?= $(SUB_ENVIRONMENT) +sub_environment_dir := $(if $(findstring pr-,$(sub_environment)),pr,$(sub_environment)) +tf_var_file := ../instance/environments/$(environment)/$(sub_environment_dir)/variables.tfvars +has_sub_environment_scope = $(shell sed -n 's/^[[:space:]]*has_sub_environment_scope[[:space:]]*=[[:space:]]*//p' "$(tf_var_file)" 2>/dev/null | sed 's/[[:space:]]*\#.*$$//' | tr -d '[:space:]' | sed -n '1p') +targets_requiring_tf_var_scope := init workspace adopt validate plan plan-ci apply apply-ci verify ensure-destroy-allowed destroy output +requested_targets := $(if $(MAKECMDGOALS),$(MAKECMDGOALS),init) +ifneq ($(filter $(targets_requiring_tf_var_scope),$(requested_targets)),) +ifeq ($(has_sub_environment_scope),) +$(error has_sub_environment_scope not found in $(tf_var_file)) +endif +ifneq ($(filter true false,$(has_sub_environment_scope)),$(has_sub_environment_scope)) +$(error has_sub_environment_scope in $(tf_var_file) must be true or false, got '$(has_sub_environment_scope)') +endif +endif +workspace_name = $(if $(filter false,$(has_sub_environment_scope)),$(environment),$(sub_environment)) +allow_shared_scope_destroy ?= $(ALLOW_SHARED_SCOPE_DESTROY) + +tf_cmd = AWS_PROFILE=$(AWS_PROFILE) terraform + +bucket_name = $(if $(filter dev,$(environment)),immunisation-$(apigee_environment),immunisation-$(environment))-terraform-state-files + +tf_state = -backend-config="bucket=$(bucket_name)" + +tf_vars = \ + -var="sub_environment=$(sub_environment)" \ + -var-file="$(tf_var_file)" + +init: + $(tf_cmd) init $(tf_state) -upgrade + +workspace: + $(tf_cmd) workspace select -or-create $(workspace_name) && echo "Switched to workspace/environment: $(workspace_name)" + +adopt: workspace + ENVIRONMENT='$(environment)' SUB_ENVIRONMENT='$(sub_environment)' RESOURCE_SCOPE='$(workspace_name)' bash ../../utilities/scripts/adopt_event_source_mappings.sh $(tf_vars) + +fmt-check: + $(tf_cmd) fmt -check + +validate: workspace + $(tf_cmd) validate + +plan: workspace + $(tf_cmd) plan $(tf_vars) + +plan-ci: workspace + $(tf_cmd) plan $(tf_vars) -out=tfplan -input=false + +apply: workspace + $(tf_cmd) apply $(tf_vars) --auto-approve + +apply-ci: workspace + $(tf_cmd) apply $(tf_vars) -input=false tfplan + +verify: + ENVIRONMENT='$(environment)' SUB_ENVIRONMENT='$(sub_environment)' RESOURCE_SCOPE='$(workspace_name)' EVENT_SOURCE_MAPPING_ACTION=verify bash ../../utilities/scripts/adopt_event_source_mappings.sh + +ensure-destroy-allowed: + @if [ "$(has_sub_environment_scope)" = "false" ] && [ "$(allow_shared_scope_destroy)" != "true" ]; then \ + echo "Refusing to destroy shared event source mappings in workspace $(workspace_name). Set ALLOW_SHARED_SCOPE_DESTROY=true for controlled teardown."; \ + exit 1; \ + fi + +destroy: workspace ensure-destroy-allowed + $(tf_cmd) destroy $(tf_vars) -auto-approve + $(tf_cmd) workspace select default + $(tf_cmd) workspace delete $(workspace_name) + +output: + $(tf_cmd) output -raw $(name) + +.PHONY : init workspace adopt fmt-check validate plan plan-ci apply apply-ci verify ensure-destroy-allowed destroy output diff --git a/infrastructure/event_source_mappings/main.tf b/infrastructure/event_source_mappings/main.tf new file mode 100644 index 000000000..59c319714 --- /dev/null +++ b/infrastructure/event_source_mappings/main.tf @@ -0,0 +1,78 @@ +terraform { + required_providers { + aws = { + source = "hashicorp/aws" + version = "~> 6" + } + } + backend "s3" { + region = "eu-west-2" + key = "event-source-mappings/state" + use_lockfile = true + } + required_version = ">= 1.5.0" +} + +provider "aws" { + region = var.aws_region + default_tags { + tags = { + Project = var.project_name + Environment = local.resource_scope + Service = var.service + } + } +} + +locals { + resource_scope = var.has_sub_environment_scope ? var.sub_environment : var.environment + short_prefix = "${var.project_short_name}-${var.sub_environment}" + events_table_name = "imms-${local.resource_scope}-imms-events" + id_sync_queue_name = "imms-${local.resource_scope}-id-sync-queue" + delta_lambda_name = "${local.short_prefix}-delta-lambda" + delta_dlq_name = "${local.short_prefix}-delta-dlq" + id_sync_lambda_name = "${local.short_prefix}-id-sync-lambda" +} + +data "aws_dynamodb_table" "events" { + name = local.events_table_name +} + +data "aws_sqs_queue" "delta_dlq" { + name = local.delta_dlq_name +} + +data "aws_sqs_queue" "id_sync" { + name = local.id_sync_queue_name +} + +data "aws_lambda_function" "delta" { + function_name = local.delta_lambda_name +} + +data "aws_lambda_function" "id_sync" { + function_name = local.id_sync_lambda_name +} + +resource "aws_lambda_event_source_mapping" "delta_trigger" { + event_source_arn = data.aws_dynamodb_table.events.stream_arn + function_name = data.aws_lambda_function.delta.function_name + starting_position = "TRIM_HORIZON" + + destination_config { + on_failure { + destination_arn = data.aws_sqs_queue.delta_dlq.arn + } + } + + maximum_retry_attempts = 0 +} + +resource "aws_lambda_event_source_mapping" "id_sync_sqs_trigger" { + event_source_arn = data.aws_sqs_queue.id_sync.arn + function_name = data.aws_lambda_function.id_sync.arn + + batch_size = 10 + maximum_batching_window_in_seconds = 5 + function_response_types = ["ReportBatchItemFailures"] +} diff --git a/infrastructure/event_source_mappings/outputs.tf b/infrastructure/event_source_mappings/outputs.tf new file mode 100644 index 000000000..6d0d83d32 --- /dev/null +++ b/infrastructure/event_source_mappings/outputs.tf @@ -0,0 +1,34 @@ +output "id_sync_queue_arn" { + description = "The ARN of the ID Sync (MNS NHS Number change) SQS queue" + value = data.aws_sqs_queue.id_sync.arn +} + +output "delta_trigger_uuid" { + description = "The UUID of the Delta Lambda event source mapping" + value = aws_lambda_event_source_mapping.delta_trigger.id +} + +output "delta_trigger_function_arn" { + description = "The ARN of the Delta Lambda targeted by the event source mapping" + value = data.aws_lambda_function.delta.arn +} + +output "delta_trigger_state" { + description = "The current state of the Delta Lambda event source mapping" + value = aws_lambda_event_source_mapping.delta_trigger.state +} + +output "id_sync_sqs_trigger_uuid" { + description = "The UUID of the ID Sync SQS event source mapping" + value = aws_lambda_event_source_mapping.id_sync_sqs_trigger.id +} + +output "id_sync_sqs_trigger_function_arn" { + description = "The ARN of the ID Sync Lambda targeted by the event source mapping" + value = data.aws_lambda_function.id_sync.arn +} + +output "id_sync_sqs_trigger_state" { + description = "The current state of the ID Sync SQS event source mapping" + value = aws_lambda_event_source_mapping.id_sync_sqs_trigger.state +} diff --git a/infrastructure/event_source_mappings/variables.tf b/infrastructure/event_source_mappings/variables.tf new file mode 100644 index 000000000..f6c233df2 --- /dev/null +++ b/infrastructure/event_source_mappings/variables.tf @@ -0,0 +1,97 @@ +variable "environment" { + type = string + description = "Environment (AWS Account) name - dev, preprod or prod" +} + +variable "sub_environment" { + type = string + description = "Sub-environment name, e.g. internal-dev, int-blue, blue" +} + +variable "has_sub_environment_scope" { + description = "True if resources are scoped to the sub-environment. False for blue/green shared resources." + type = bool + default = false +} + +variable "project_name" { + type = string + default = "immunisation" +} + +variable "project_short_name" { + type = string + default = "imms" +} + +variable "service" { + type = string + default = "fhir-api" +} + +variable "aws_region" { + type = string + default = "eu-west-2" + + validation { + condition = var.aws_region == "eu-west-2" + error_message = "AWS Region must be set to eu-west-2." + } +} + +variable "immunisation_account_id" { + type = string + description = "Immunisation AWS Account ID" +} + +variable "dspp_core_account_id" { + type = string + description = "DSPP Core AWS Account ID" +} + +variable "mns_account_id" { + type = string + description = "MNS AWS account ID - trusted source for MNS notifications" + default = "631615744739" +} + +variable "pds_environment" { + type = string + default = "int" +} + +variable "mns_environment" { + type = string + default = "int" +} + +variable "error_alarm_notifications_enabled" { + default = true + description = "Switch to enable error alarm notifications to Slack" + type = bool +} + +variable "create_mesh_processor" { + type = bool + default = false +} + +variable "mesh_no_invocation_period_seconds" { + type = number + default = 300 +} + +variable "dspp_submission_s3_bucket_name" { + type = string + default = "nhsd-dspp-core-ref-s3-submission-upload" +} + +variable "dspp_submission_kms_key_alias" { + type = string + default = "nhsd-dspp-core-ref-s3-submission-upload-key" +} + +variable "dynamodb_point_in_time_recovery_enabled" { + type = bool + default = false +} diff --git a/infrastructure/instance/README.md b/infrastructure/instance/README.md index 8f3804925..8ef99ab91 100644 --- a/infrastructure/instance/README.md +++ b/infrastructure/instance/README.md @@ -33,3 +33,81 @@ Note: If you switch environment configuration in .env ensure that you run `make If you want to apply Terraform to a workspace created by a PR you can set the above SUB_ENVIRONMENT to the `PR-number` and ENVIRONMENT set to `dev`. E.g. `pr-57`. You can use this to test out changes when tests fail in CI. + +## Lambda Trigger Handoff + +The `delta_trigger` and `id_sync_sqs_trigger` event source mappings are managed from `../event_source_mappings` so the main instance plan does not rewrite shared backend state. The deploy workflow applies the main instance first, safely adopts any existing trigger mappings into the dedicated trigger workspace, then plans and applies trigger changes from that workspace. + +### First Cutover + +The normal backend deploy performs idempotent adoption before it plans trigger changes. Use the `Migrate Event Source Mappings` workflow when you want to perform the handoff separately from a full backend deploy. Select the target `environment` and `sub_environment`, then set `confirm_event_source_mapping_migration` to `true`. The migration workflow imports existing mappings, runs `terraform validate`, saves a dedicated trigger `tfplan` artifact, applies that saved plan, and verifies the final Lambda targets. + +Before starting, check for duplicate or stale mappings. Replace the variable values with the shared scope and target sub-environment: + +```bash +RESOURCE_SCOPE=preprod +SUB_ENVIRONMENT=int-blue +COUNTERPART_SUB_ENVIRONMENT=int-green + +EVENTS_STREAM_ARN="$(aws dynamodb describe-table \ + --table-name "imms-${RESOURCE_SCOPE}-imms-events" \ + --query 'Table.LatestStreamArn' \ + --output text)" + +ID_SYNC_QUEUE_URL="$(aws sqs get-queue-url \ + --queue-name "imms-${RESOURCE_SCOPE}-id-sync-queue" \ + --query 'QueueUrl' \ + --output text)" + +ID_SYNC_QUEUE_ARN="$(aws sqs get-queue-attributes \ + --queue-url "${ID_SYNC_QUEUE_URL}" \ + --attribute-names QueueArn \ + --query 'Attributes.QueueArn' \ + --output text)" + +aws lambda list-event-source-mappings \ + --event-source-arn "${EVENTS_STREAM_ARN}" \ + --function-name "imms-${SUB_ENVIRONMENT}-delta-lambda" + +aws lambda list-event-source-mappings \ + --event-source-arn "${EVENTS_STREAM_ARN}" \ + --function-name "imms-${COUNTERPART_SUB_ENVIRONMENT}-delta-lambda" + +aws lambda list-event-source-mappings \ + --event-source-arn "${ID_SYNC_QUEUE_ARN}" \ + --function-name "imms-${SUB_ENVIRONMENT}-id-sync-lambda" + +aws lambda list-event-source-mappings \ + --event-source-arn "${ID_SYNC_QUEUE_ARN}" \ + --function-name "imms-${COUNTERPART_SUB_ENVIRONMENT}-id-sync-lambda" +``` + +### Rollback + +If the cutover applies cleanly but the target sub-environment must be rolled back, rerun the migration workflow with the previous active sub-environment selected. The workflow should update the managed mappings back to the previous Lambda targets through a saved trigger plan. Verify the final UUIDs, Lambda ARNs, and states with: + +```bash +cd infrastructure/event_source_mappings +make init +make workspace +terraform output delta_trigger_uuid +terraform output delta_trigger_function_arn +terraform output delta_trigger_state +terraform output id_sync_sqs_trigger_uuid +terraform output id_sync_sqs_trigger_function_arn +terraform output id_sync_sqs_trigger_state +make verify +``` + +### Failed Apply Recovery + +If the migration fails after import but before apply, rerun the same migration workflow for the same environment and sub-environment. The import step is idempotent for resources already in state and does not delete AWS mappings. + +If verification fails, inspect live AWS mappings before retrying: + +```bash +aws lambda get-event-source-mapping --uuid "" +aws lambda list-event-source-mappings --event-source-arn "" +``` + +Do not run `make destroy` for shared blue/green trigger workspaces unless this is a controlled teardown. Shared-scope destroys require `ALLOW_SHARED_SCOPE_DESTROY=true`. diff --git a/infrastructure/instance/delta.tf b/infrastructure/instance/delta.tf index 32238d42c..681b99b02 100644 --- a/infrastructure/instance/delta.tf +++ b/infrastructure/instance/delta.tf @@ -70,20 +70,6 @@ resource "aws_lambda_function" "delta_sync_lambda" { ] } - -resource "aws_lambda_event_source_mapping" "delta_trigger" { - event_source_arn = aws_dynamodb_table.events-dynamodb-table.stream_arn - function_name = aws_lambda_function.delta_sync_lambda.function_name - starting_position = "TRIM_HORIZON" - destination_config { - on_failure { - destination_arn = aws_sqs_queue.dlq.arn - } - } - maximum_retry_attempts = 0 -} - - resource "aws_sqs_queue" "dlq" { name = "${local.short_prefix}-${local.dlq_name}" } diff --git a/infrastructure/instance/id_sync_lambda.tf b/infrastructure/instance/id_sync_lambda.tf index 9e70f37f5..33fdb57af 100644 --- a/infrastructure/instance/id_sync_lambda.tf +++ b/infrastructure/instance/id_sync_lambda.tf @@ -258,20 +258,3 @@ resource "aws_cloudwatch_metric_alarm" "id_sync_error_alarm" { alarm_actions = [data.aws_sns_topic.imms_system_alert_errors.arn] treat_missing_data = "notBreaching" } - - - -# delete config_lambda_notification / new_s3_invoke_permission - not required; duplicate - -# NEW -resource "aws_lambda_event_source_mapping" "id_sync_sqs_trigger" { - event_source_arn = aws_sqs_queue.id_sync_queue.arn - function_name = aws_lambda_function.id_sync_lambda.arn - - # Optional: Configure batch size and other settings - batch_size = 10 - maximum_batching_window_in_seconds = 5 - - # Optional: Configure error handling - function_response_types = ["ReportBatchItemFailures"] -} diff --git a/infrastructure/instance/temp.tf b/infrastructure/instance/temp.tf index a17db4eb9..e8dc2d840 100644 --- a/infrastructure/instance/temp.tf +++ b/infrastructure/instance/temp.tf @@ -259,6 +259,22 @@ removed { } } +removed { + from = aws_lambda_event_source_mapping.delta_trigger + + lifecycle { + destroy = false + } +} + +removed { + from = aws_lambda_event_source_mapping.id_sync_sqs_trigger + + lifecycle { + destroy = false + } +} + removed { from = aws_ecr_repository.redis_sync_lambda_repository diff --git a/utilities/scripts/adopt_event_source_mappings.sh b/utilities/scripts/adopt_event_source_mappings.sh new file mode 100644 index 000000000..1ac07f0f5 --- /dev/null +++ b/utilities/scripts/adopt_event_source_mappings.sh @@ -0,0 +1,270 @@ +#!/usr/bin/env bash + +set -euo pipefail + +environment="${ENVIRONMENT:-${environment:-}}" +sub_environment="${SUB_ENVIRONMENT:-${sub_environment:-}}" +resource_scope="${RESOURCE_SCOPE:-${resource_scope:-}}" +action="${EVENT_SOURCE_MAPPING_ACTION:-adopt}" + +require_value() { + local name="$1" + local value="$2" + + if [[ -z "${value}" ]]; then + echo "${name} must be set." + exit 1 + fi +} + +require_value "ENVIRONMENT" "${environment}" +require_value "SUB_ENVIRONMENT" "${sub_environment}" +require_value "RESOURCE_SCOPE" "${resource_scope}" + +require_controlled_adoption() { + if [[ "${ALLOW_EVENT_SOURCE_MAPPING_ADOPTION:-}" != "true" ]]; then + echo "ALLOW_EVENT_SOURCE_MAPPING_ADOPTION=true must be set for the controlled event source mapping migration." + exit 1 + fi +} + +log_mappings() { + local mappings_json="$1" + local event_source_arn="$2" + local function_name="$3" + + if jq -e '.EventSourceMappings | length == 0' <<<"${mappings_json}" >/dev/null; then + echo "No event source mappings found for ${function_name} on ${event_source_arn}." >&2 + return 0 + fi + + echo "Event source mappings found for ${function_name} on ${event_source_arn}:" >&2 + jq -r \ + '.EventSourceMappings[] + | " UUID=\(.UUID) State=\(.State) FunctionArn=\(.FunctionArn // "unknown")"' \ + <<<"${mappings_json}" >&2 +} + +lookup_mapping_uuid() { + local event_source_arn="$1" + local function_name="$2" + local mappings_json + local active_mapping_count + local mapping_uuid + + mappings_json="$(aws lambda list-event-source-mappings \ + --event-source-arn "${event_source_arn}" \ + --function-name "${function_name}" \ + --output json)" + + log_mappings "${mappings_json}" "${event_source_arn}" "${function_name}" + + active_mapping_count="$(jq '[.EventSourceMappings[]? | select(.State != "Deleting")] | length' <<<"${mappings_json}")" + + if ((active_mapping_count > 1)); then + echo "Ambiguous event source mappings for ${function_name} on ${event_source_arn}; refusing to continue." >&2 + exit 1 + fi + + if ((active_mapping_count == 0)); then + return 0 + fi + + mapping_uuid="$(jq -r '.EventSourceMappings[] | select(.State != "Deleting") | .UUID' <<<"${mappings_json}")" + printf '%s' "${mapping_uuid}" +} + +counterpart_for_sub_environment() { + local name="$1" + + case "${name}" in + blue) + printf 'green' + ;; + green) + printf 'blue' + ;; + *-blue) + printf '%s-green' "${name%-blue}" + ;; + *-green) + printf '%s-blue' "${name%-green}" + ;; + *) + return 1 + ;; + esac +} + +state_has_resource() { + local address="$1" + + terraform state show "${address}" >/dev/null 2>&1 +} + +resolve_mapping_uuid() { + local address="$1" + local event_source_arn="$2" + local target_function_name="$3" + local counterpart_function_name="${4:-}" + local target_mapping_uuid="" + local counterpart_mapping_uuid="" + local mapping_uuid="" + local resource_in_state="false" + + if state_has_resource "${address}"; then + resource_in_state="true" + fi + + target_mapping_uuid="$(lookup_mapping_uuid "${event_source_arn}" "${target_function_name}")" + + if [[ -n "${counterpart_function_name}" ]]; then + counterpart_mapping_uuid="$(lookup_mapping_uuid "${event_source_arn}" "${counterpart_function_name}")" + fi + + if [[ -n "${target_mapping_uuid}" && -n "${counterpart_mapping_uuid}" ]]; then + echo "Both target and counterpart mappings exist for ${address}; refusing to continue." >&2 + echo "Target UUID: ${target_mapping_uuid}" >&2 + echo "Counterpart UUID: ${counterpart_mapping_uuid}" >&2 + exit 1 + fi + + if [[ "${resource_in_state}" == "true" ]]; then + echo "${address} is already managed in this workspace." >&2 + return 0 + fi + + if [[ -n "${counterpart_mapping_uuid}" ]]; then + mapping_uuid="${counterpart_mapping_uuid}" + else + mapping_uuid="${target_mapping_uuid}" + fi + + if [[ -z "${mapping_uuid}" ]]; then + echo "No existing event source mapping found for ${address}; Terraform will create it." >&2 + return 0 + fi + + printf '%s' "${mapping_uuid}" +} + +import_mapping() { + local address="$1" + local mapping_uuid="$2" + + shift 2 + + if [[ -z "${mapping_uuid}" ]]; then + return 0 + fi + + terraform import -input=false "$@" "${address}" "${mapping_uuid}" >/dev/null + echo "Imported ${address} into workspace ${resource_scope} using ${mapping_uuid}." +} + +verify_mapping() { + local address="$1" + local event_source_arn="$2" + local target_function_name="$3" + local counterpart_function_name="${4:-}" + local target_mapping_uuid="" + local counterpart_mapping_uuid="" + + target_mapping_uuid="$(lookup_mapping_uuid "${event_source_arn}" "${target_function_name}")" + + if [[ -z "${target_mapping_uuid}" ]]; then + echo "No final event source mapping found for ${address} targeting ${target_function_name}." + exit 1 + fi + + if [[ -n "${counterpart_function_name}" ]]; then + counterpart_mapping_uuid="$(lookup_mapping_uuid "${event_source_arn}" "${counterpart_function_name}")" + + if [[ -n "${counterpart_mapping_uuid}" ]]; then + echo "A stale counterpart mapping remains for ${address}: ${counterpart_mapping_uuid} targets ${counterpart_function_name}." + exit 1 + fi + fi + + echo "Verified ${address} targets ${target_function_name} with UUID ${target_mapping_uuid}." +} + +events_table_name="imms-${resource_scope}-imms-events" +delta_event_source_arn="$(aws dynamodb describe-table \ + --table-name "${events_table_name}" \ + --query 'Table.LatestStreamArn' \ + --output text)" + +if [[ -z "${delta_event_source_arn}" || "${delta_event_source_arn}" == "None" ]]; then + echo "Unable to resolve the DynamoDB stream ARN for ${events_table_name}." + exit 1 +fi + +id_sync_queue_name="imms-${resource_scope}-id-sync-queue" +id_sync_queue_url="$(aws sqs get-queue-url \ + --queue-name "${id_sync_queue_name}" \ + --query 'QueueUrl' \ + --output text)" +id_sync_queue_arn="$(aws sqs get-queue-attributes \ + --queue-url "${id_sync_queue_url}" \ + --attribute-names QueueArn \ + --query 'Attributes.QueueArn' \ + --output text)" + +target_delta_function="imms-${sub_environment}-delta-lambda" +target_id_sync_function="imms-${sub_environment}-id-sync-lambda" +counterpart_delta_function="" +counterpart_id_sync_function="" + +if [[ "${resource_scope}" != "${sub_environment}" ]] && counterpart_sub_environment="$(counterpart_for_sub_environment "${sub_environment}")"; then + counterpart_delta_function="imms-${counterpart_sub_environment}-delta-lambda" + counterpart_id_sync_function="imms-${counterpart_sub_environment}-id-sync-lambda" +fi + +case "${action}" in + adopt) + delta_mapping_uuid="" + id_sync_mapping_uuid="" + + require_controlled_adoption + + delta_mapping_uuid="$(resolve_mapping_uuid \ + "aws_lambda_event_source_mapping.delta_trigger" \ + "${delta_event_source_arn}" \ + "${target_delta_function}" \ + "${counterpart_delta_function}")" + + id_sync_mapping_uuid="$(resolve_mapping_uuid \ + "aws_lambda_event_source_mapping.id_sync_sqs_trigger" \ + "${id_sync_queue_arn}" \ + "${target_id_sync_function}" \ + "${counterpart_id_sync_function}")" + + import_mapping \ + "aws_lambda_event_source_mapping.delta_trigger" \ + "${delta_mapping_uuid}" \ + "$@" + + import_mapping \ + "aws_lambda_event_source_mapping.id_sync_sqs_trigger" \ + "${id_sync_mapping_uuid}" \ + "$@" + ;; + verify) + verify_mapping \ + "aws_lambda_event_source_mapping.delta_trigger" \ + "${delta_event_source_arn}" \ + "${target_delta_function}" \ + "${counterpart_delta_function}" + + verify_mapping \ + "aws_lambda_event_source_mapping.id_sync_sqs_trigger" \ + "${id_sync_queue_arn}" \ + "${target_id_sync_function}" \ + "${counterpart_id_sync_function}" + ;; + *) + echo "Unsupported EVENT_SOURCE_MAPPING_ACTION: ${action}" + exit 1 + ;; +esac