From 4f72ae9fd8f77bcb63c765ed83348327e62661c4 Mon Sep 17 00:00:00 2001 From: Thomas-Boyle Date: Wed, 22 Apr 2026 11:17:42 +0100 Subject: [PATCH 1/6] Enhance blue/green deployment workflow for Lambda triggers - Added steps to set the Terraform workspace and manage shared Lambda triggers during blue/green deployments in the deploy-backend.yml workflow. - Introduced a new script, manage_blue_green_event_source_mappings.sh, to handle the preparation and cleanup of event source mappings for Lambda functions. - Updated README.md to document the new blue/green Lambda trigger handoff process, removing manual steps from the deployment flow. --- .github/workflows/deploy-backend.yml | 17 +++ infrastructure/instance/README.md | 6 + ...manage_blue_green_event_source_mappings.sh | 139 ++++++++++++++++++ 3 files changed, 162 insertions(+) create mode 100644 utilities/scripts/manage_blue_green_event_source_mappings.sh diff --git a/.github/workflows/deploy-backend.yml b/.github/workflows/deploy-backend.yml index c50a04e29b..79f41ed8e6 100644 --- a/.github/workflows/deploy-backend.yml +++ b/.github/workflows/deploy-backend.yml @@ -247,6 +247,14 @@ jobs: working-directory: infrastructure/instance run: make init + - name: Set Terraform workspace + working-directory: infrastructure/instance + run: make workspace + + - name: Adopt shared Lambda triggers for blue/green deploys + working-directory: infrastructure/instance + run: bash ../../utilities/scripts/manage_blue_green_event_source_mappings.sh prepare-state + - name: Terraform Plan # Ignore cancellations to prevent Terraform from being killed while it holds a state lock # A stuck process can still be killed with the force-cancel API operation @@ -293,6 +301,15 @@ jobs: working-directory: infrastructure/instance run: make init + - name: Set Terraform workspace + working-directory: infrastructure/instance + run: make workspace + + - name: Remove stale Lambda triggers for blue/green deploys + if: ${{ !failure() }} + working-directory: infrastructure/instance + run: bash ../../utilities/scripts/manage_blue_green_event_source_mappings.sh cleanup-stale + - name: Terraform Apply # Ignore cancellations to prevent Terraform from being killed while it holds a state lock # A stuck process can still be killed with the force-cancel API operation diff --git a/infrastructure/instance/README.md b/infrastructure/instance/README.md index 8f38049252..6f5e0e9b59 100644 --- a/infrastructure/instance/README.md +++ b/infrastructure/instance/README.md @@ -33,3 +33,9 @@ Note: If you switch environment configuration in .env ensure that you run `make If you want to apply Terraform to a workspace created by a PR you can set the above SUB_ENVIRONMENT to the `PR-number` and ENVIRONMENT set to `dev`. E.g. `pr-57`. You can use this to test out changes when tests fail in CI. + +## Blue/Green Lambda Trigger Handoff + +For split sub-environments such as `int-blue`/`int-green` and `prod-blue`/`prod-green`, the deploy workflow now reimports the shared `delta_trigger` and `id_sync_sqs_trigger` resources into the target Terraform workspace before planning. On apply, it also deletes any stale trigger that still points at the target side's old dedicated Lambda function. + +This removes the release-time `Disable delta` and `Disable ID sync` steps from the repository-managed deployment flow. The remaining operational follow-up is outside this repository: update the Jira Smart Checklist release templates to remove those manual checklist items once the automated flow has been rolled out. diff --git a/utilities/scripts/manage_blue_green_event_source_mappings.sh b/utilities/scripts/manage_blue_green_event_source_mappings.sh new file mode 100644 index 0000000000..4af78e227f --- /dev/null +++ b/utilities/scripts/manage_blue_green_event_source_mappings.sh @@ -0,0 +1,139 @@ +#!/usr/bin/env bash + +set -euo pipefail + +mode="${1:-prepare-state}" +sub_environment="${SUB_ENVIRONMENT:-${sub_environment:-}}" + +if [[ -z "${sub_environment}" ]]; then + echo "SUB_ENVIRONMENT must be set." + exit 1 +fi + +if [[ ! "${sub_environment}" =~ -(blue|green)$ ]]; then + echo "Skipping Lambda trigger handoff for ${sub_environment}." + exit 0 +fi + +current_colour="${BASH_REMATCH[1]}" +counterpart_colour="blue" +if [[ "${current_colour}" == "blue" ]]; then + counterpart_colour="green" +fi + +counterpart_sub_environment="${sub_environment%-${current_colour}}-${counterpart_colour}" +current_workspace="$(terraform workspace show)" + +if [[ "${current_workspace}" != "${sub_environment}" ]]; then + echo "Terraform workspace ${current_workspace} does not match SUB_ENVIRONMENT ${sub_environment}." + exit 1 +fi + +lookup_mapping_uuid() { + local event_source_arn="$1" + local function_name="$2" + local mapping_uuid + + mapping_uuid="$(aws lambda list-event-source-mappings \ + --event-source-arn "${event_source_arn}" \ + --function-name "${function_name}" \ + --query 'EventSourceMappings[0].UUID' \ + --output text)" + + if [[ "${mapping_uuid}" == "None" ]]; then + return 0 + fi + + printf '%s' "${mapping_uuid}" +} + +resolve_event_source_arns() { + id_sync_queue_arn="$(terraform output -raw id_sync_queue_arn)" + events_table_name="$(terraform output -raw dynamodb_table_name)" + delta_event_source_arn="$(aws dynamodb describe-table \ + --table-name "${events_table_name}" \ + --query 'Table.LatestStreamArn' \ + --output text)" + + if [[ -z "${delta_event_source_arn}" || "${delta_event_source_arn}" == "None" ]]; then + echo "Unable to resolve the DynamoDB stream ARN for ${events_table_name}." + exit 1 + fi +} + +prepare_state() { + local address="$1" + local event_source_arn="$2" + local counterpart_function_name="$3" + local target_function_name="$4" + local mapping_uuid="" + + mapping_uuid="$(lookup_mapping_uuid "${event_source_arn}" "${counterpart_function_name}")" + if [[ -z "${mapping_uuid}" ]]; then + mapping_uuid="$(lookup_mapping_uuid "${event_source_arn}" "${target_function_name}")" + fi + + if [[ -z "${mapping_uuid}" ]]; then + echo "Unable to find an event source mapping for ${address}." + exit 1 + fi + + terraform state rm "${address}" >/dev/null 2>&1 || true + terraform import "${address}" "${mapping_uuid}" >/dev/null + + echo "Imported ${address} into workspace ${sub_environment} using ${mapping_uuid}." +} + +cleanup_stale_mapping() { + local event_source_arn="$1" + local counterpart_function_name="$2" + local target_function_name="$3" + local counterpart_uuid="" + local target_uuid="" + + counterpart_uuid="$(lookup_mapping_uuid "${event_source_arn}" "${counterpart_function_name}")" + target_uuid="$(lookup_mapping_uuid "${event_source_arn}" "${target_function_name}")" + + if [[ -z "${target_uuid}" || "${target_uuid}" == "${counterpart_uuid}" ]]; then + return 0 + fi + + aws lambda delete-event-source-mapping --uuid "${target_uuid}" >/dev/null + echo "Deleted stale event source mapping ${target_uuid} for ${target_function_name}." +} + +resolve_event_source_arns + +target_delta_function="imms-${sub_environment}-delta-lambda" +counterpart_delta_function="imms-${counterpart_sub_environment}-delta-lambda" +target_id_sync_function="imms-${sub_environment}-id-sync-lambda" +counterpart_id_sync_function="imms-${counterpart_sub_environment}-id-sync-lambda" + +case "${mode}" in + prepare-state) + prepare_state \ + "aws_lambda_event_source_mapping.delta_trigger" \ + "${delta_event_source_arn}" \ + "${counterpart_delta_function}" \ + "${target_delta_function}" + prepare_state \ + "aws_lambda_event_source_mapping.id_sync_sqs_trigger" \ + "${id_sync_queue_arn}" \ + "${counterpart_id_sync_function}" \ + "${target_id_sync_function}" + ;; + cleanup-stale) + cleanup_stale_mapping \ + "${delta_event_source_arn}" \ + "${counterpart_delta_function}" \ + "${target_delta_function}" + cleanup_stale_mapping \ + "${id_sync_queue_arn}" \ + "${counterpart_id_sync_function}" \ + "${target_id_sync_function}" + ;; + *) + echo "Unsupported mode: ${mode}. Use prepare-state or cleanup-stale." + exit 1 + ;; +esac From 0174644f6b195543196cab8d9a34a07577d27e89 Mon Sep 17 00:00:00 2001 From: Thomas-Boyle Date: Mon, 27 Apr 2026 09:29:14 +0100 Subject: [PATCH 2/6] Refactor Lambda event source mappings and update deployment workflows - Removed the management of shared Lambda triggers for blue/green deployments from the deploy-backend.yml workflow. - Introduced a new workflow step to apply Lambda event source mappings from a dedicated directory. - Added a script to adopt existing event source mappings for Lambda functions, streamlining the deployment process. - Updated the pr-teardown.yml workflow to include the destruction of Lambda event source mappings. - Created new Terraform configuration files for managing event source mappings, including variables and outputs. - Updated README.md to reflect changes in the Lambda trigger management process. --- .github/workflows/deploy-backend.yml | 17 +-- .github/workflows/pr-teardown.yml | 6 + .../event_source_mappings/.terraform.lock.hcl | 25 +++ infrastructure/event_source_mappings/Makefile | 46 ++++++ infrastructure/event_source_mappings/main.tf | 78 ++++++++++ .../event_source_mappings/outputs.tf | 4 + .../event_source_mappings/variables.tf | 97 ++++++++++++ infrastructure/instance/README.md | 6 +- infrastructure/instance/delta.tf | 14 -- infrastructure/instance/id_sync_lambda.tf | 17 --- infrastructure/instance/temp.tf | 16 ++ .../scripts/adopt_event_source_mappings.sh | 142 ++++++++++++++++++ ...manage_blue_green_event_source_mappings.sh | 139 ----------------- 13 files changed, 424 insertions(+), 183 deletions(-) create mode 100644 infrastructure/event_source_mappings/.terraform.lock.hcl create mode 100644 infrastructure/event_source_mappings/Makefile create mode 100644 infrastructure/event_source_mappings/main.tf create mode 100644 infrastructure/event_source_mappings/outputs.tf create mode 100644 infrastructure/event_source_mappings/variables.tf create mode 100644 utilities/scripts/adopt_event_source_mappings.sh delete mode 100644 utilities/scripts/manage_blue_green_event_source_mappings.sh diff --git a/.github/workflows/deploy-backend.yml b/.github/workflows/deploy-backend.yml index 79f41ed8e6..e3dc59ce79 100644 --- a/.github/workflows/deploy-backend.yml +++ b/.github/workflows/deploy-backend.yml @@ -251,10 +251,6 @@ jobs: working-directory: infrastructure/instance run: make workspace - - name: Adopt shared Lambda triggers for blue/green deploys - working-directory: infrastructure/instance - run: bash ../../utilities/scripts/manage_blue_green_event_source_mappings.sh prepare-state - - name: Terraform Plan # Ignore cancellations to prevent Terraform from being killed while it holds a state lock # A stuck process can still be killed with the force-cancel API operation @@ -305,11 +301,6 @@ jobs: working-directory: infrastructure/instance run: make workspace - - name: Remove stale Lambda triggers for blue/green deploys - if: ${{ !failure() }} - working-directory: infrastructure/instance - run: bash ../../utilities/scripts/manage_blue_green_event_source_mappings.sh cleanup-stale - - name: Terraform Apply # Ignore cancellations to prevent Terraform from being killed while it holds a state lock # A stuck process can still be killed with the force-cancel API operation @@ -319,6 +310,14 @@ jobs: make apply-ci echo "ID_SYNC_QUEUE_ARN=$(make -s output name=id_sync_queue_arn)" >> $GITHUB_ENV + - name: Apply Lambda event source mappings + if: ${{ !failure() }} + working-directory: infrastructure/event_source_mappings + run: | + make init + make adopt + make apply + - name: Install poetry if: ${{ inputs.environment == 'dev' && inputs.create_mns_subscription }} run: pip install poetry==2.1.4 diff --git a/.github/workflows/pr-teardown.yml b/.github/workflows/pr-teardown.yml index 56b9794e44..e40aa9eb30 100644 --- a/.github/workflows/pr-teardown.yml +++ b/.github/workflows/pr-teardown.yml @@ -92,6 +92,12 @@ jobs: echo "Unsubscribing SQS to MNS for notifications..." make unsubscribe + - name: Destroy Lambda event source mappings + working-directory: infrastructure/event_source_mappings + run: | + make init apigee_environment=$APIGEE_ENVIRONMENT environment=$BACKEND_ENVIRONMENT sub_environment=$BACKEND_SUB_ENVIRONMENT + make destroy apigee_environment=$APIGEE_ENVIRONMENT environment=$BACKEND_ENVIRONMENT sub_environment=$BACKEND_SUB_ENVIRONMENT + - name: Terraform Destroy working-directory: infrastructure/instance run: | diff --git a/infrastructure/event_source_mappings/.terraform.lock.hcl b/infrastructure/event_source_mappings/.terraform.lock.hcl new file mode 100644 index 0000000000..c31874d864 --- /dev/null +++ b/infrastructure/event_source_mappings/.terraform.lock.hcl @@ -0,0 +1,25 @@ +# This file is maintained automatically by "terraform init". +# Manual edits may be lost in future updates. + +provider "registry.terraform.io/hashicorp/aws" { + version = "6.42.0" + constraints = "~> 6.0" + hashes = [ + "h1:B00CO2gJ6fSyfUGhi+siRqNoUG9jI7PD+3r1dHWv3OI=", + "zh:0dd774a97eaa4371a60e13b5dc56800d4fb1c48d50e79049f75fc4fe26705ff5", + "zh:237d652d8ec028f7bedce1ce056ffe42e2e120d2a4a47fe45b97263cc5948e7e", + "zh:367d9e4b816e5f857956887b8f0770aaa5bec47c4b1e2c7bf924e39192d580d6", + "zh:4194addb5b34bb803fab031a80d84e9d16cac2308df9a498d51e2214c7893900", + "zh:5bcc36226fa5d8a3c37e41ac8cfd2b1eb73e7ae96f8418f6776dcaa16a987198", + "zh:61d0632d4cc7973b779b90c1d3bb2d05a1cd4d7030bb4645831e67cf735ecaa0", + "zh:7c7efaf9e4bb662ba3e8a714abafe7107bdcd1bf2cd0867510ea32762debc11d", + "zh:7d7f8ffe00d4a90184efa454a107bcc46d81f21245baea9678dcfa2fa77ac1be", + "zh:7e534c454cdeafe9cc225bea53397883bb96c54da6ea3421fd53dbc9dfc1bb90", + "zh:99564c99a0672b2a8b666ab2040f36a7c6f372fa79ac43a6657a54734e46fff8", + "zh:9b12af85486a96aedd8d7984b0ff811a4b42e3d88dad1a3fb4c0b580d04fa425", + "zh:a6b1bb6178798882508f5512222a9433fc21e63cdae83a755650c938e6141d32", + "zh:a87592d6ff3d46ee83756f4f84503b2fe4ffc9c1d5dc9f8d4afccb5e126ae538", + "zh:daa56fb74c5c9d26b327c40b486beac71cdb9a932c7c4e4561f4401cd0d16a4a", + "zh:f35ab043d7121f3a194f42f5b0e0d59fe62d94488acea07e3783405fa5838785", + ] +} diff --git a/infrastructure/event_source_mappings/Makefile b/infrastructure/event_source_mappings/Makefile new file mode 100644 index 0000000000..012cf9d866 --- /dev/null +++ b/infrastructure/event_source_mappings/Makefile @@ -0,0 +1,46 @@ +-include .env + +apigee_environment ?= $(APIGEE_ENVIRONMENT) +environment ?= $(ENVIRONMENT) +sub_environment ?= $(SUB_ENVIRONMENT) +sub_environment_dir := $(if $(findstring pr-,$(sub_environment)),pr,$(sub_environment)) +tf_var_file := ../instance/environments/$(environment)/$(sub_environment_dir)/variables.tfvars +has_sub_environment_scope = $(shell awk -F= '/^has_sub_environment_scope/ { gsub(/[[:space:]]/, "", $$2); print $$2 }' "$(tf_var_file)") +workspace_name = $(if $(filter false,$(has_sub_environment_scope)),$(environment),$(sub_environment)) + +tf_cmd = AWS_PROFILE=$(AWS_PROFILE) terraform + +bucket_name = $(if $(filter dev,$(environment)),immunisation-$(apigee_environment),immunisation-$(environment))-terraform-state-files + +tf_state = \ + -backend-config="bucket=$(bucket_name)" \ + -backend-config="key=event-source-mappings/state" + +tf_vars = \ + -var="sub_environment=$(sub_environment)" \ + -var-file="$(tf_var_file)" + +init: + $(tf_cmd) init $(tf_state) -upgrade + +workspace: + $(tf_cmd) workspace select -or-create $(workspace_name) && echo "Switched to workspace/environment: $(workspace_name)" + +adopt: workspace + ENVIRONMENT='$(environment)' SUB_ENVIRONMENT='$(sub_environment)' RESOURCE_SCOPE='$(workspace_name)' bash ../../utilities/scripts/adopt_event_source_mappings.sh $(tf_vars) + +plan: workspace + $(tf_cmd) plan $(tf_vars) + +apply: workspace + $(tf_cmd) apply $(tf_vars) --auto-approve + +destroy: workspace + $(tf_cmd) destroy $(tf_vars) -auto-approve + $(tf_cmd) workspace select default + $(tf_cmd) workspace delete $(workspace_name) + +output: + $(tf_cmd) output -raw $(name) + +.PHONY : init workspace adopt plan apply destroy output diff --git a/infrastructure/event_source_mappings/main.tf b/infrastructure/event_source_mappings/main.tf new file mode 100644 index 0000000000..59c3197144 --- /dev/null +++ b/infrastructure/event_source_mappings/main.tf @@ -0,0 +1,78 @@ +terraform { + required_providers { + aws = { + source = "hashicorp/aws" + version = "~> 6" + } + } + backend "s3" { + region = "eu-west-2" + key = "event-source-mappings/state" + use_lockfile = true + } + required_version = ">= 1.5.0" +} + +provider "aws" { + region = var.aws_region + default_tags { + tags = { + Project = var.project_name + Environment = local.resource_scope + Service = var.service + } + } +} + +locals { + resource_scope = var.has_sub_environment_scope ? var.sub_environment : var.environment + short_prefix = "${var.project_short_name}-${var.sub_environment}" + events_table_name = "imms-${local.resource_scope}-imms-events" + id_sync_queue_name = "imms-${local.resource_scope}-id-sync-queue" + delta_lambda_name = "${local.short_prefix}-delta-lambda" + delta_dlq_name = "${local.short_prefix}-delta-dlq" + id_sync_lambda_name = "${local.short_prefix}-id-sync-lambda" +} + +data "aws_dynamodb_table" "events" { + name = local.events_table_name +} + +data "aws_sqs_queue" "delta_dlq" { + name = local.delta_dlq_name +} + +data "aws_sqs_queue" "id_sync" { + name = local.id_sync_queue_name +} + +data "aws_lambda_function" "delta" { + function_name = local.delta_lambda_name +} + +data "aws_lambda_function" "id_sync" { + function_name = local.id_sync_lambda_name +} + +resource "aws_lambda_event_source_mapping" "delta_trigger" { + event_source_arn = data.aws_dynamodb_table.events.stream_arn + function_name = data.aws_lambda_function.delta.function_name + starting_position = "TRIM_HORIZON" + + destination_config { + on_failure { + destination_arn = data.aws_sqs_queue.delta_dlq.arn + } + } + + maximum_retry_attempts = 0 +} + +resource "aws_lambda_event_source_mapping" "id_sync_sqs_trigger" { + event_source_arn = data.aws_sqs_queue.id_sync.arn + function_name = data.aws_lambda_function.id_sync.arn + + batch_size = 10 + maximum_batching_window_in_seconds = 5 + function_response_types = ["ReportBatchItemFailures"] +} diff --git a/infrastructure/event_source_mappings/outputs.tf b/infrastructure/event_source_mappings/outputs.tf new file mode 100644 index 0000000000..db152cc4b9 --- /dev/null +++ b/infrastructure/event_source_mappings/outputs.tf @@ -0,0 +1,4 @@ +output "id_sync_queue_arn" { + description = "The ARN of the ID Sync (MNS NHS Number change) SQS queue" + value = data.aws_sqs_queue.id_sync.arn +} diff --git a/infrastructure/event_source_mappings/variables.tf b/infrastructure/event_source_mappings/variables.tf new file mode 100644 index 0000000000..f6c233df24 --- /dev/null +++ b/infrastructure/event_source_mappings/variables.tf @@ -0,0 +1,97 @@ +variable "environment" { + type = string + description = "Environment (AWS Account) name - dev, preprod or prod" +} + +variable "sub_environment" { + type = string + description = "Sub-environment name, e.g. internal-dev, int-blue, blue" +} + +variable "has_sub_environment_scope" { + description = "True if resources are scoped to the sub-environment. False for blue/green shared resources." + type = bool + default = false +} + +variable "project_name" { + type = string + default = "immunisation" +} + +variable "project_short_name" { + type = string + default = "imms" +} + +variable "service" { + type = string + default = "fhir-api" +} + +variable "aws_region" { + type = string + default = "eu-west-2" + + validation { + condition = var.aws_region == "eu-west-2" + error_message = "AWS Region must be set to eu-west-2." + } +} + +variable "immunisation_account_id" { + type = string + description = "Immunisation AWS Account ID" +} + +variable "dspp_core_account_id" { + type = string + description = "DSPP Core AWS Account ID" +} + +variable "mns_account_id" { + type = string + description = "MNS AWS account ID - trusted source for MNS notifications" + default = "631615744739" +} + +variable "pds_environment" { + type = string + default = "int" +} + +variable "mns_environment" { + type = string + default = "int" +} + +variable "error_alarm_notifications_enabled" { + default = true + description = "Switch to enable error alarm notifications to Slack" + type = bool +} + +variable "create_mesh_processor" { + type = bool + default = false +} + +variable "mesh_no_invocation_period_seconds" { + type = number + default = 300 +} + +variable "dspp_submission_s3_bucket_name" { + type = string + default = "nhsd-dspp-core-ref-s3-submission-upload" +} + +variable "dspp_submission_kms_key_alias" { + type = string + default = "nhsd-dspp-core-ref-s3-submission-upload-key" +} + +variable "dynamodb_point_in_time_recovery_enabled" { + type = bool + default = false +} diff --git a/infrastructure/instance/README.md b/infrastructure/instance/README.md index 6f5e0e9b59..0f7cbdec15 100644 --- a/infrastructure/instance/README.md +++ b/infrastructure/instance/README.md @@ -34,8 +34,6 @@ Note: If you switch environment configuration in .env ensure that you run `make If you want to apply Terraform to a workspace created by a PR you can set the above SUB_ENVIRONMENT to the `PR-number` and ENVIRONMENT set to `dev`. E.g. `pr-57`. You can use this to test out changes when tests fail in CI. -## Blue/Green Lambda Trigger Handoff +## Lambda Trigger Handoff -For split sub-environments such as `int-blue`/`int-green` and `prod-blue`/`prod-green`, the deploy workflow now reimports the shared `delta_trigger` and `id_sync_sqs_trigger` resources into the target Terraform workspace before planning. On apply, it also deletes any stale trigger that still points at the target side's old dedicated Lambda function. - -This removes the release-time `Disable delta` and `Disable ID sync` steps from the repository-managed deployment flow. The remaining operational follow-up is outside this repository: update the Jira Smart Checklist release templates to remove those manual checklist items once the automated flow has been rolled out. +The `delta_trigger` and `id_sync_sqs_trigger` event source mappings are managed from `../event_source_mappings` so the main instance plan does not rewrite shared backend state. The deploy workflow applies the main instance first, then adopts or updates the trigger mappings from the dedicated trigger workspace. diff --git a/infrastructure/instance/delta.tf b/infrastructure/instance/delta.tf index 32238d42c0..681b99b028 100644 --- a/infrastructure/instance/delta.tf +++ b/infrastructure/instance/delta.tf @@ -70,20 +70,6 @@ resource "aws_lambda_function" "delta_sync_lambda" { ] } - -resource "aws_lambda_event_source_mapping" "delta_trigger" { - event_source_arn = aws_dynamodb_table.events-dynamodb-table.stream_arn - function_name = aws_lambda_function.delta_sync_lambda.function_name - starting_position = "TRIM_HORIZON" - destination_config { - on_failure { - destination_arn = aws_sqs_queue.dlq.arn - } - } - maximum_retry_attempts = 0 -} - - resource "aws_sqs_queue" "dlq" { name = "${local.short_prefix}-${local.dlq_name}" } diff --git a/infrastructure/instance/id_sync_lambda.tf b/infrastructure/instance/id_sync_lambda.tf index 9e70f37f53..33fdb57afc 100644 --- a/infrastructure/instance/id_sync_lambda.tf +++ b/infrastructure/instance/id_sync_lambda.tf @@ -258,20 +258,3 @@ resource "aws_cloudwatch_metric_alarm" "id_sync_error_alarm" { alarm_actions = [data.aws_sns_topic.imms_system_alert_errors.arn] treat_missing_data = "notBreaching" } - - - -# delete config_lambda_notification / new_s3_invoke_permission - not required; duplicate - -# NEW -resource "aws_lambda_event_source_mapping" "id_sync_sqs_trigger" { - event_source_arn = aws_sqs_queue.id_sync_queue.arn - function_name = aws_lambda_function.id_sync_lambda.arn - - # Optional: Configure batch size and other settings - batch_size = 10 - maximum_batching_window_in_seconds = 5 - - # Optional: Configure error handling - function_response_types = ["ReportBatchItemFailures"] -} diff --git a/infrastructure/instance/temp.tf b/infrastructure/instance/temp.tf index a17db4eb9f..e8dc2d840a 100644 --- a/infrastructure/instance/temp.tf +++ b/infrastructure/instance/temp.tf @@ -259,6 +259,22 @@ removed { } } +removed { + from = aws_lambda_event_source_mapping.delta_trigger + + lifecycle { + destroy = false + } +} + +removed { + from = aws_lambda_event_source_mapping.id_sync_sqs_trigger + + lifecycle { + destroy = false + } +} + removed { from = aws_ecr_repository.redis_sync_lambda_repository diff --git a/utilities/scripts/adopt_event_source_mappings.sh b/utilities/scripts/adopt_event_source_mappings.sh new file mode 100644 index 0000000000..bee5cb0248 --- /dev/null +++ b/utilities/scripts/adopt_event_source_mappings.sh @@ -0,0 +1,142 @@ +#!/usr/bin/env bash + +set -euo pipefail + +environment="${ENVIRONMENT:-${environment:-}}" +sub_environment="${SUB_ENVIRONMENT:-${sub_environment:-}}" +resource_scope="${RESOURCE_SCOPE:-${resource_scope:-}}" + +if [[ -z "${environment}" ]]; then + echo "ENVIRONMENT must be set." + exit 1 +fi + +if [[ -z "${sub_environment}" ]]; then + echo "SUB_ENVIRONMENT must be set." + exit 1 +fi + +if [[ -z "${resource_scope}" ]]; then + echo "RESOURCE_SCOPE must be set." + exit 1 +fi + +lookup_mapping_uuid() { + local event_source_arn="$1" + local function_name="$2" + local mapping_uuid + + mapping_uuid="$(aws lambda list-event-source-mappings \ + --event-source-arn "${event_source_arn}" \ + --function-name "${function_name}" \ + --query 'EventSourceMappings[0].UUID' \ + --output text)" + + if [[ "${mapping_uuid}" == "None" ]]; then + return 0 + fi + + printf '%s' "${mapping_uuid}" +} + +counterpart_for_sub_environment() { + local name="$1" + + case "${name}" in + blue) + printf 'green' + ;; + green) + printf 'blue' + ;; + *-blue) + printf '%s-green' "${name%-blue}" + ;; + *-green) + printf '%s-blue' "${name%-green}" + ;; + *) + return 1 + ;; + esac +} + +state_has_resource() { + local address="$1" + + terraform state show "${address}" >/dev/null 2>&1 +} + +adopt_mapping() { + local address="$1" + local event_source_arn="$2" + local target_function_name="$3" + local counterpart_function_name="${4:-}" + local mapping_uuid="" + + shift 4 + + if state_has_resource "${address}"; then + echo "${address} is already managed in this workspace." + return 0 + fi + + mapping_uuid="$(lookup_mapping_uuid "${event_source_arn}" "${target_function_name}")" + if [[ -z "${mapping_uuid}" && -n "${counterpart_function_name}" ]]; then + mapping_uuid="$(lookup_mapping_uuid "${event_source_arn}" "${counterpart_function_name}")" + fi + + if [[ -z "${mapping_uuid}" ]]; then + echo "No existing event source mapping found for ${address}; Terraform will create it." + return 0 + fi + + terraform import -input=false "$@" "${address}" "${mapping_uuid}" >/dev/null + echo "Imported ${address} into workspace ${resource_scope} using ${mapping_uuid}." +} + +events_table_name="imms-${resource_scope}-imms-events" +delta_event_source_arn="$(aws dynamodb describe-table \ + --table-name "${events_table_name}" \ + --query 'Table.LatestStreamArn' \ + --output text)" + +if [[ -z "${delta_event_source_arn}" || "${delta_event_source_arn}" == "None" ]]; then + echo "Unable to resolve the DynamoDB stream ARN for ${events_table_name}." + exit 1 +fi + +id_sync_queue_name="imms-${resource_scope}-id-sync-queue" +id_sync_queue_url="$(aws sqs get-queue-url \ + --queue-name "${id_sync_queue_name}" \ + --query 'QueueUrl' \ + --output text)" +id_sync_queue_arn="$(aws sqs get-queue-attributes \ + --queue-url "${id_sync_queue_url}" \ + --attribute-names QueueArn \ + --query 'Attributes.QueueArn' \ + --output text)" + +target_delta_function="imms-${sub_environment}-delta-lambda" +target_id_sync_function="imms-${sub_environment}-id-sync-lambda" +counterpart_delta_function="" +counterpart_id_sync_function="" + +if [[ "${resource_scope}" != "${sub_environment}" ]] && counterpart_sub_environment="$(counterpart_for_sub_environment "${sub_environment}")"; then + counterpart_delta_function="imms-${counterpart_sub_environment}-delta-lambda" + counterpart_id_sync_function="imms-${counterpart_sub_environment}-id-sync-lambda" +fi + +adopt_mapping \ + "aws_lambda_event_source_mapping.delta_trigger" \ + "${delta_event_source_arn}" \ + "${target_delta_function}" \ + "${counterpart_delta_function}" \ + "$@" + +adopt_mapping \ + "aws_lambda_event_source_mapping.id_sync_sqs_trigger" \ + "${id_sync_queue_arn}" \ + "${target_id_sync_function}" \ + "${counterpart_id_sync_function}" \ + "$@" diff --git a/utilities/scripts/manage_blue_green_event_source_mappings.sh b/utilities/scripts/manage_blue_green_event_source_mappings.sh deleted file mode 100644 index 4af78e227f..0000000000 --- a/utilities/scripts/manage_blue_green_event_source_mappings.sh +++ /dev/null @@ -1,139 +0,0 @@ -#!/usr/bin/env bash - -set -euo pipefail - -mode="${1:-prepare-state}" -sub_environment="${SUB_ENVIRONMENT:-${sub_environment:-}}" - -if [[ -z "${sub_environment}" ]]; then - echo "SUB_ENVIRONMENT must be set." - exit 1 -fi - -if [[ ! "${sub_environment}" =~ -(blue|green)$ ]]; then - echo "Skipping Lambda trigger handoff for ${sub_environment}." - exit 0 -fi - -current_colour="${BASH_REMATCH[1]}" -counterpart_colour="blue" -if [[ "${current_colour}" == "blue" ]]; then - counterpart_colour="green" -fi - -counterpart_sub_environment="${sub_environment%-${current_colour}}-${counterpart_colour}" -current_workspace="$(terraform workspace show)" - -if [[ "${current_workspace}" != "${sub_environment}" ]]; then - echo "Terraform workspace ${current_workspace} does not match SUB_ENVIRONMENT ${sub_environment}." - exit 1 -fi - -lookup_mapping_uuid() { - local event_source_arn="$1" - local function_name="$2" - local mapping_uuid - - mapping_uuid="$(aws lambda list-event-source-mappings \ - --event-source-arn "${event_source_arn}" \ - --function-name "${function_name}" \ - --query 'EventSourceMappings[0].UUID' \ - --output text)" - - if [[ "${mapping_uuid}" == "None" ]]; then - return 0 - fi - - printf '%s' "${mapping_uuid}" -} - -resolve_event_source_arns() { - id_sync_queue_arn="$(terraform output -raw id_sync_queue_arn)" - events_table_name="$(terraform output -raw dynamodb_table_name)" - delta_event_source_arn="$(aws dynamodb describe-table \ - --table-name "${events_table_name}" \ - --query 'Table.LatestStreamArn' \ - --output text)" - - if [[ -z "${delta_event_source_arn}" || "${delta_event_source_arn}" == "None" ]]; then - echo "Unable to resolve the DynamoDB stream ARN for ${events_table_name}." - exit 1 - fi -} - -prepare_state() { - local address="$1" - local event_source_arn="$2" - local counterpart_function_name="$3" - local target_function_name="$4" - local mapping_uuid="" - - mapping_uuid="$(lookup_mapping_uuid "${event_source_arn}" "${counterpart_function_name}")" - if [[ -z "${mapping_uuid}" ]]; then - mapping_uuid="$(lookup_mapping_uuid "${event_source_arn}" "${target_function_name}")" - fi - - if [[ -z "${mapping_uuid}" ]]; then - echo "Unable to find an event source mapping for ${address}." - exit 1 - fi - - terraform state rm "${address}" >/dev/null 2>&1 || true - terraform import "${address}" "${mapping_uuid}" >/dev/null - - echo "Imported ${address} into workspace ${sub_environment} using ${mapping_uuid}." -} - -cleanup_stale_mapping() { - local event_source_arn="$1" - local counterpart_function_name="$2" - local target_function_name="$3" - local counterpart_uuid="" - local target_uuid="" - - counterpart_uuid="$(lookup_mapping_uuid "${event_source_arn}" "${counterpart_function_name}")" - target_uuid="$(lookup_mapping_uuid "${event_source_arn}" "${target_function_name}")" - - if [[ -z "${target_uuid}" || "${target_uuid}" == "${counterpart_uuid}" ]]; then - return 0 - fi - - aws lambda delete-event-source-mapping --uuid "${target_uuid}" >/dev/null - echo "Deleted stale event source mapping ${target_uuid} for ${target_function_name}." -} - -resolve_event_source_arns - -target_delta_function="imms-${sub_environment}-delta-lambda" -counterpart_delta_function="imms-${counterpart_sub_environment}-delta-lambda" -target_id_sync_function="imms-${sub_environment}-id-sync-lambda" -counterpart_id_sync_function="imms-${counterpart_sub_environment}-id-sync-lambda" - -case "${mode}" in - prepare-state) - prepare_state \ - "aws_lambda_event_source_mapping.delta_trigger" \ - "${delta_event_source_arn}" \ - "${counterpart_delta_function}" \ - "${target_delta_function}" - prepare_state \ - "aws_lambda_event_source_mapping.id_sync_sqs_trigger" \ - "${id_sync_queue_arn}" \ - "${counterpart_id_sync_function}" \ - "${target_id_sync_function}" - ;; - cleanup-stale) - cleanup_stale_mapping \ - "${delta_event_source_arn}" \ - "${counterpart_delta_function}" \ - "${target_delta_function}" - cleanup_stale_mapping \ - "${id_sync_queue_arn}" \ - "${counterpart_id_sync_function}" \ - "${target_id_sync_function}" - ;; - *) - echo "Unsupported mode: ${mode}. Use prepare-state or cleanup-stale." - exit 1 - ;; -esac From 7cc77a6f2826992490b996bf47bdfdaba662010a Mon Sep 17 00:00:00 2001 From: Thomas-Boyle Date: Mon, 27 Apr 2026 13:10:16 +0100 Subject: [PATCH 3/6] Refactor environment variable validation in adopt_event_source_mappings.sh - Introduced a new function, require_value, to streamline the validation of required environment variables. - Replaced individual checks for ENVIRONMENT, SUB_ENVIRONMENT, and RESOURCE_SCOPE with calls to the new function for improved code clarity and maintainability. --- .../scripts/adopt_event_source_mappings.sh | 23 +++++++++---------- 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/utilities/scripts/adopt_event_source_mappings.sh b/utilities/scripts/adopt_event_source_mappings.sh index bee5cb0248..3bfe85316f 100644 --- a/utilities/scripts/adopt_event_source_mappings.sh +++ b/utilities/scripts/adopt_event_source_mappings.sh @@ -6,20 +6,19 @@ environment="${ENVIRONMENT:-${environment:-}}" sub_environment="${SUB_ENVIRONMENT:-${sub_environment:-}}" resource_scope="${RESOURCE_SCOPE:-${resource_scope:-}}" -if [[ -z "${environment}" ]]; then - echo "ENVIRONMENT must be set." - exit 1 -fi +require_value() { + local name="$1" + local value="$2" -if [[ -z "${sub_environment}" ]]; then - echo "SUB_ENVIRONMENT must be set." - exit 1 -fi + if [[ -z "${value}" ]]; then + echo "${name} must be set." + exit 1 + fi +} -if [[ -z "${resource_scope}" ]]; then - echo "RESOURCE_SCOPE must be set." - exit 1 -fi +require_value "ENVIRONMENT" "${environment}" +require_value "SUB_ENVIRONMENT" "${sub_environment}" +require_value "RESOURCE_SCOPE" "${resource_scope}" lookup_mapping_uuid() { local event_source_arn="$1" From c807b6821b9780840f0fb618a1588baf37e6d683 Mon Sep 17 00:00:00 2001 From: Thomas-Boyle Date: Fri, 1 May 2026 12:41:45 +0100 Subject: [PATCH 4/6] Add delete_mapping function to manage event source mappings in adopt_event_source_mappings.sh - Introduced a new delete_mapping function to handle the deletion of AWS Lambda event source mappings, including a timeout mechanism for deletion confirmation. - Updated adopt_mapping function to utilize the new delete_mapping function, improving the logic for handling target and counterpart mapping UUIDs. - Enhanced code clarity and maintainability by restructuring the mapping lookup and deletion process. --- .../scripts/adopt_event_source_mappings.sh | 35 +++++++++++++++++-- 1 file changed, 32 insertions(+), 3 deletions(-) diff --git a/utilities/scripts/adopt_event_source_mappings.sh b/utilities/scripts/adopt_event_source_mappings.sh index 3bfe85316f..ebbfb31f47 100644 --- a/utilities/scripts/adopt_event_source_mappings.sh +++ b/utilities/scripts/adopt_event_source_mappings.sh @@ -66,11 +66,30 @@ state_has_resource() { terraform state show "${address}" >/dev/null 2>&1 } +delete_mapping() { + local mapping_uuid="$1" + + aws lambda delete-event-source-mapping --uuid "${mapping_uuid}" >/dev/null + + for _ in {1..30}; do + if ! aws lambda get-event-source-mapping --uuid "${mapping_uuid}" >/dev/null 2>&1; then + return 0 + fi + + sleep 2 + done + + echo "Timed out waiting for event source mapping ${mapping_uuid} to be deleted." + exit 1 +} + adopt_mapping() { local address="$1" local event_source_arn="$2" local target_function_name="$3" local counterpart_function_name="${4:-}" + local target_mapping_uuid="" + local counterpart_mapping_uuid="" local mapping_uuid="" shift 4 @@ -80,9 +99,19 @@ adopt_mapping() { return 0 fi - mapping_uuid="$(lookup_mapping_uuid "${event_source_arn}" "${target_function_name}")" - if [[ -z "${mapping_uuid}" && -n "${counterpart_function_name}" ]]; then - mapping_uuid="$(lookup_mapping_uuid "${event_source_arn}" "${counterpart_function_name}")" + target_mapping_uuid="$(lookup_mapping_uuid "${event_source_arn}" "${target_function_name}")" + + if [[ -n "${counterpart_function_name}" ]]; then + counterpart_mapping_uuid="$(lookup_mapping_uuid "${event_source_arn}" "${counterpart_function_name}")" + fi + + if [[ -n "${counterpart_mapping_uuid}" ]]; then + if [[ -n "${target_mapping_uuid}" ]]; then + delete_mapping "${target_mapping_uuid}" + fi + mapping_uuid="${counterpart_mapping_uuid}" + else + mapping_uuid="${target_mapping_uuid}" fi if [[ -z "${mapping_uuid}" ]]; then From 6042e07321d4ab36a46e9bbac958b782966a300b Mon Sep 17 00:00:00 2001 From: Thomas-Boyle Date: Tue, 5 May 2026 13:51:54 +0100 Subject: [PATCH 5/6] Enhance event source mapping workflows and Terraform management - Added a new workflow for migrating event source mappings, allowing controlled one-time migrations for specific environments. - Updated the deploy-backend.yml workflow to include concurrency settings and additional steps for Terraform initialization, formatting, validation, and applying event source mappings. - Refactored the Makefile to introduce new commands for formatting checks, validation, and applying Terraform plans. - Enhanced the adopt_event_source_mappings.sh script to support verification of event source mappings and improved logging for existing mappings. - Updated README.md to document the new migration process and rollback procedures for event source mappings. --- .github/workflows/deploy-backend.yml | 38 +++- .../migrate-event-source-mappings.yml | 111 +++++++++++ infrastructure/event_source_mappings/Makefile | 26 ++- .../event_source_mappings/outputs.tf | 30 +++ infrastructure/instance/README.md | 76 +++++++- .../scripts/adopt_event_source_mappings.sh | 174 ++++++++++++++---- 6 files changed, 408 insertions(+), 47 deletions(-) create mode 100644 .github/workflows/migrate-event-source-mappings.yml diff --git a/.github/workflows/deploy-backend.yml b/.github/workflows/deploy-backend.yml index e3dc59ce79..dd086c47b3 100644 --- a/.github/workflows/deploy-backend.yml +++ b/.github/workflows/deploy-backend.yml @@ -110,6 +110,10 @@ env: # Sonarcloud - do not allow direct usage of untrusted data run-name: Deploy Backend - ${{ inputs.environment }} ${{ inputs.sub_environment }} +concurrency: + group: deploy-backend-${{ github.repository }}-${{ inputs.environment }}-${{ (inputs.environment == 'preprod' || inputs.environment == 'prod') && 'shared-trigger' || inputs.sub_environment }} + cancel-in-progress: false + jobs: deploy-lambda-images: name: Deploy ${{ matrix.lambda_name }} image @@ -310,13 +314,39 @@ jobs: make apply-ci echo "ID_SYNC_QUEUE_ARN=$(make -s output name=id_sync_queue_arn)" >> $GITHUB_ENV - - name: Apply Lambda event source mappings + - name: Terraform Init Event Source Mappings + if: ${{ !failure() }} + working-directory: infrastructure/event_source_mappings + run: make init + + - name: Terraform Format Check Event Source Mappings + if: ${{ !failure() }} + working-directory: infrastructure/event_source_mappings + run: make fmt-check + + - name: Terraform Validate Event Source Mappings + if: ${{ !failure() }} + working-directory: infrastructure/event_source_mappings + run: make validate + + - name: Terraform Plan Event Source Mappings + if: ${{ !failure() }} + working-directory: infrastructure/event_source_mappings + run: make plan-ci + + - name: Save Event Source Mapping Terraform Plan + if: ${{ !failure() }} + uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a + with: + name: ${{ env.ENVIRONMENT }}-${{ env.SUB_ENVIRONMENT }}-event-source-mappings-tfplan + path: infrastructure/event_source_mappings/tfplan + + - name: Terraform Apply Event Source Mappings if: ${{ !failure() }} working-directory: infrastructure/event_source_mappings run: | - make init - make adopt - make apply + make apply-ci + make verify - name: Install poetry if: ${{ inputs.environment == 'dev' && inputs.create_mns_subscription }} diff --git a/.github/workflows/migrate-event-source-mappings.yml b/.github/workflows/migrate-event-source-mappings.yml new file mode 100644 index 0000000000..bda8684576 --- /dev/null +++ b/.github/workflows/migrate-event-source-mappings.yml @@ -0,0 +1,111 @@ +name: Migrate Event Source Mappings + +on: + workflow_dispatch: + inputs: + apigee_environment: + type: choice + description: Select the Apigee proxy environment for dev state buckets + options: + - internal-dev + - internal-qa + - ref + default: internal-dev + environment: + type: choice + description: Select the AWS backend environment + options: + - dev + - preprod + - prod + sub_environment: + type: string + description: Set the sub-environment name, e.g. internal-dev, int-blue, blue + required: true + confirm_event_source_mapping_migration: + type: boolean + description: Confirm this is the controlled one-time migration for the selected environment + required: true + default: false + +env: + APIGEE_ENVIRONMENT: ${{ inputs.apigee_environment }} + ENVIRONMENT: ${{ inputs.environment }} + SUB_ENVIRONMENT: ${{ inputs.sub_environment }} + +run-name: Migrate Event Source Mappings - ${{ inputs.environment }} ${{ inputs.sub_environment }} + +concurrency: + group: deploy-backend-${{ github.repository }}-${{ inputs.environment }}-${{ (inputs.environment == 'preprod' || inputs.environment == 'prod') && 'shared-trigger' || inputs.sub_environment }} + cancel-in-progress: false + +jobs: + migrate-event-source-mappings: + permissions: + id-token: write + contents: read + runs-on: ubuntu-latest + environment: + name: ${{ inputs.environment }} + steps: + - name: Confirm controlled migration + run: | + set -euo pipefail + + if [ "${CONFIRM_EVENT_SOURCE_MAPPING_MIGRATION}" != "true" ]; then + echo "This workflow is only for the controlled one-time event source mapping migration." + echo "Set confirm_event_source_mapping_migration to true to continue." + exit 1 + fi + env: + CONFIRM_EVENT_SOURCE_MAPPING_MIGRATION: ${{ inputs.confirm_event_source_mapping_migration }} + + - name: Checkout + uses: actions/checkout@0c366fd6a839edf440554fa01a7085ccba70ac98 + + - name: Connect to AWS + uses: aws-actions/configure-aws-credentials@ec61189d14ec14c8efccab744f656cffd0e33f37 + with: + aws-region: eu-west-2 + role-to-assume: arn:aws:iam::${{ vars.AWS_ACCOUNT_ID }}:role/auto-ops + role-session-name: github-actions + + - uses: hashicorp/setup-terraform@5e8dbf3c6d9deaf4193ca7a8fb23f2ac83bb6c85 + with: + terraform_version: "1.12.2" + + - name: Terraform Init + working-directory: infrastructure/event_source_mappings + run: make init + + - name: Adopt Existing Event Source Mappings + working-directory: infrastructure/event_source_mappings + env: + ALLOW_EVENT_SOURCE_MAPPING_ADOPTION: "true" + run: make adopt + + - name: Terraform Format Check + working-directory: infrastructure/event_source_mappings + run: make fmt-check + + - name: Terraform Validate + working-directory: infrastructure/event_source_mappings + run: make validate + + - name: Terraform Plan + working-directory: infrastructure/event_source_mappings + run: make plan-ci + + - name: Save Terraform Plan + uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a + with: + name: ${{ env.ENVIRONMENT }}-${{ env.SUB_ENVIRONMENT }}-event-source-mappings-migration-tfplan + path: infrastructure/event_source_mappings/tfplan + + - name: Terraform Apply + working-directory: infrastructure/event_source_mappings + run: make apply-ci + + - name: Verify Event Source Mappings + working-directory: infrastructure/event_source_mappings + run: make verify diff --git a/infrastructure/event_source_mappings/Makefile b/infrastructure/event_source_mappings/Makefile index 012cf9d866..411b4baca6 100644 --- a/infrastructure/event_source_mappings/Makefile +++ b/infrastructure/event_source_mappings/Makefile @@ -7,6 +7,7 @@ sub_environment_dir := $(if $(findstring pr-,$(sub_environment)),pr,$(sub_enviro tf_var_file := ../instance/environments/$(environment)/$(sub_environment_dir)/variables.tfvars has_sub_environment_scope = $(shell awk -F= '/^has_sub_environment_scope/ { gsub(/[[:space:]]/, "", $$2); print $$2 }' "$(tf_var_file)") workspace_name = $(if $(filter false,$(has_sub_environment_scope)),$(environment),$(sub_environment)) +allow_shared_scope_destroy ?= $(ALLOW_SHARED_SCOPE_DESTROY) tf_cmd = AWS_PROFILE=$(AWS_PROFILE) terraform @@ -29,13 +30,34 @@ workspace: adopt: workspace ENVIRONMENT='$(environment)' SUB_ENVIRONMENT='$(sub_environment)' RESOURCE_SCOPE='$(workspace_name)' bash ../../utilities/scripts/adopt_event_source_mappings.sh $(tf_vars) +fmt-check: + $(tf_cmd) fmt -check + +validate: workspace + $(tf_cmd) validate + plan: workspace $(tf_cmd) plan $(tf_vars) +plan-ci: workspace + $(tf_cmd) plan $(tf_vars) -out=tfplan -input=false + apply: workspace $(tf_cmd) apply $(tf_vars) --auto-approve -destroy: workspace +apply-ci: workspace + $(tf_cmd) apply $(tf_vars) -input=false tfplan + +verify: + ENVIRONMENT='$(environment)' SUB_ENVIRONMENT='$(sub_environment)' RESOURCE_SCOPE='$(workspace_name)' EVENT_SOURCE_MAPPING_ACTION=verify bash ../../utilities/scripts/adopt_event_source_mappings.sh + +ensure-destroy-allowed: + @if [ "$(has_sub_environment_scope)" = "false" ] && [ "$(allow_shared_scope_destroy)" != "true" ]; then \ + echo "Refusing to destroy shared event source mappings in workspace $(workspace_name). Set ALLOW_SHARED_SCOPE_DESTROY=true for controlled teardown."; \ + exit 1; \ + fi + +destroy: workspace ensure-destroy-allowed $(tf_cmd) destroy $(tf_vars) -auto-approve $(tf_cmd) workspace select default $(tf_cmd) workspace delete $(workspace_name) @@ -43,4 +65,4 @@ destroy: workspace output: $(tf_cmd) output -raw $(name) -.PHONY : init workspace adopt plan apply destroy output +.PHONY : init workspace adopt fmt-check validate plan plan-ci apply apply-ci verify ensure-destroy-allowed destroy output diff --git a/infrastructure/event_source_mappings/outputs.tf b/infrastructure/event_source_mappings/outputs.tf index db152cc4b9..6d0d83d328 100644 --- a/infrastructure/event_source_mappings/outputs.tf +++ b/infrastructure/event_source_mappings/outputs.tf @@ -2,3 +2,33 @@ output "id_sync_queue_arn" { description = "The ARN of the ID Sync (MNS NHS Number change) SQS queue" value = data.aws_sqs_queue.id_sync.arn } + +output "delta_trigger_uuid" { + description = "The UUID of the Delta Lambda event source mapping" + value = aws_lambda_event_source_mapping.delta_trigger.id +} + +output "delta_trigger_function_arn" { + description = "The ARN of the Delta Lambda targeted by the event source mapping" + value = data.aws_lambda_function.delta.arn +} + +output "delta_trigger_state" { + description = "The current state of the Delta Lambda event source mapping" + value = aws_lambda_event_source_mapping.delta_trigger.state +} + +output "id_sync_sqs_trigger_uuid" { + description = "The UUID of the ID Sync SQS event source mapping" + value = aws_lambda_event_source_mapping.id_sync_sqs_trigger.id +} + +output "id_sync_sqs_trigger_function_arn" { + description = "The ARN of the ID Sync Lambda targeted by the event source mapping" + value = data.aws_lambda_function.id_sync.arn +} + +output "id_sync_sqs_trigger_state" { + description = "The current state of the ID Sync SQS event source mapping" + value = aws_lambda_event_source_mapping.id_sync_sqs_trigger.state +} diff --git a/infrastructure/instance/README.md b/infrastructure/instance/README.md index 0f7cbdec15..a3299cccf8 100644 --- a/infrastructure/instance/README.md +++ b/infrastructure/instance/README.md @@ -36,4 +36,78 @@ E.g. `pr-57`. You can use this to test out changes when tests fail in CI. ## Lambda Trigger Handoff -The `delta_trigger` and `id_sync_sqs_trigger` event source mappings are managed from `../event_source_mappings` so the main instance plan does not rewrite shared backend state. The deploy workflow applies the main instance first, then adopts or updates the trigger mappings from the dedicated trigger workspace. +The `delta_trigger` and `id_sync_sqs_trigger` event source mappings are managed from `../event_source_mappings` so the main instance plan does not rewrite shared backend state. The deploy workflow applies the main instance first, then updates the trigger mappings from the dedicated trigger workspace. Existing mappings are imported only through the controlled migration workflow. + +### First Cutover + +Use the `Migrate Event Source Mappings` workflow once per environment before relying on the normal backend deploy for trigger changes. Select the target `environment` and `sub_environment`, then set `confirm_event_source_mapping_migration` to `true`. The migration workflow imports existing mappings, runs `terraform validate`, saves a dedicated trigger `tfplan` artifact, applies that saved plan, and verifies the final Lambda targets. + +Before starting, check for duplicate or stale mappings. Replace the variable values with the shared scope and target sub-environment: + +```bash +RESOURCE_SCOPE=preprod +SUB_ENVIRONMENT=int-blue +COUNTERPART_SUB_ENVIRONMENT=int-green + +EVENTS_STREAM_ARN="$(aws dynamodb describe-table \ + --table-name "imms-${RESOURCE_SCOPE}-imms-events" \ + --query 'Table.LatestStreamArn' \ + --output text)" + +ID_SYNC_QUEUE_URL="$(aws sqs get-queue-url \ + --queue-name "imms-${RESOURCE_SCOPE}-id-sync-queue" \ + --query 'QueueUrl' \ + --output text)" + +ID_SYNC_QUEUE_ARN="$(aws sqs get-queue-attributes \ + --queue-url "${ID_SYNC_QUEUE_URL}" \ + --attribute-names QueueArn \ + --query 'Attributes.QueueArn' \ + --output text)" + +aws lambda list-event-source-mappings \ + --event-source-arn "${EVENTS_STREAM_ARN}" \ + --function-name "imms-${SUB_ENVIRONMENT}-delta-lambda" + +aws lambda list-event-source-mappings \ + --event-source-arn "${EVENTS_STREAM_ARN}" \ + --function-name "imms-${COUNTERPART_SUB_ENVIRONMENT}-delta-lambda" + +aws lambda list-event-source-mappings \ + --event-source-arn "${ID_SYNC_QUEUE_ARN}" \ + --function-name "imms-${SUB_ENVIRONMENT}-id-sync-lambda" + +aws lambda list-event-source-mappings \ + --event-source-arn "${ID_SYNC_QUEUE_ARN}" \ + --function-name "imms-${COUNTERPART_SUB_ENVIRONMENT}-id-sync-lambda" +``` + +### Rollback + +If the cutover applies cleanly but the target sub-environment must be rolled back, rerun the migration workflow with the previous active sub-environment selected. The workflow should update the managed mappings back to the previous Lambda targets through a saved trigger plan. Verify the final UUIDs, Lambda ARNs, and states with: + +```bash +cd infrastructure/event_source_mappings +make init +make workspace +terraform output delta_trigger_uuid +terraform output delta_trigger_function_arn +terraform output delta_trigger_state +terraform output id_sync_sqs_trigger_uuid +terraform output id_sync_sqs_trigger_function_arn +terraform output id_sync_sqs_trigger_state +make verify +``` + +### Failed Apply Recovery + +If the migration fails after import but before apply, rerun the same migration workflow for the same environment and sub-environment. The import step is idempotent for resources already in state and does not delete AWS mappings. + +If verification fails, inspect live AWS mappings before retrying: + +```bash +aws lambda get-event-source-mapping --uuid "" +aws lambda list-event-source-mappings --event-source-arn "" +``` + +Do not run `make destroy` for shared blue/green trigger workspaces unless this is a controlled teardown. Shared-scope destroys require `ALLOW_SHARED_SCOPE_DESTROY=true`. diff --git a/utilities/scripts/adopt_event_source_mappings.sh b/utilities/scripts/adopt_event_source_mappings.sh index ebbfb31f47..68f439435d 100644 --- a/utilities/scripts/adopt_event_source_mappings.sh +++ b/utilities/scripts/adopt_event_source_mappings.sh @@ -5,6 +5,7 @@ set -euo pipefail environment="${ENVIRONMENT:-${environment:-}}" sub_environment="${SUB_ENVIRONMENT:-${sub_environment:-}}" resource_scope="${RESOURCE_SCOPE:-${resource_scope:-}}" +action="${EVENT_SOURCE_MAPPING_ACTION:-adopt}" require_value() { local name="$1" @@ -20,21 +21,56 @@ require_value "ENVIRONMENT" "${environment}" require_value "SUB_ENVIRONMENT" "${sub_environment}" require_value "RESOURCE_SCOPE" "${resource_scope}" +require_controlled_adoption() { + if [[ "${ALLOW_EVENT_SOURCE_MAPPING_ADOPTION:-}" != "true" ]]; then + echo "ALLOW_EVENT_SOURCE_MAPPING_ADOPTION=true must be set for the controlled event source mapping migration." + exit 1 + fi +} + +log_mappings() { + local mappings_json="$1" + local event_source_arn="$2" + local function_name="$3" + + if jq -e '.EventSourceMappings | length == 0' <<<"${mappings_json}" >/dev/null; then + echo "No event source mappings found for ${function_name} on ${event_source_arn}." >&2 + return 0 + fi + + echo "Event source mappings found for ${function_name} on ${event_source_arn}:" >&2 + jq -r \ + '.EventSourceMappings[] + | " UUID=\(.UUID) State=\(.State) FunctionArn=\(.FunctionArn // "unknown")"' \ + <<<"${mappings_json}" >&2 +} + lookup_mapping_uuid() { local event_source_arn="$1" local function_name="$2" + local mappings_json + local active_mapping_count local mapping_uuid - mapping_uuid="$(aws lambda list-event-source-mappings \ + mappings_json="$(aws lambda list-event-source-mappings \ --event-source-arn "${event_source_arn}" \ --function-name "${function_name}" \ - --query 'EventSourceMappings[0].UUID' \ - --output text)" + --output json)" + + log_mappings "${mappings_json}" "${event_source_arn}" "${function_name}" + + active_mapping_count="$(jq '[.EventSourceMappings[]? | select(.State != "Deleting")] | length' <<<"${mappings_json}")" + + if ((active_mapping_count > 1)); then + echo "Ambiguous event source mappings for ${function_name} on ${event_source_arn}; refusing to continue." >&2 + exit 1 + fi - if [[ "${mapping_uuid}" == "None" ]]; then + if ((active_mapping_count == 0)); then return 0 fi + mapping_uuid="$(jq -r '.EventSourceMappings[] | select(.State != "Deleting") | .UUID' <<<"${mappings_json}")" printf '%s' "${mapping_uuid}" } @@ -66,24 +102,7 @@ state_has_resource() { terraform state show "${address}" >/dev/null 2>&1 } -delete_mapping() { - local mapping_uuid="$1" - - aws lambda delete-event-source-mapping --uuid "${mapping_uuid}" >/dev/null - - for _ in {1..30}; do - if ! aws lambda get-event-source-mapping --uuid "${mapping_uuid}" >/dev/null 2>&1; then - return 0 - fi - - sleep 2 - done - - echo "Timed out waiting for event source mapping ${mapping_uuid} to be deleted." - exit 1 -} - -adopt_mapping() { +resolve_mapping_uuid() { local address="$1" local event_source_arn="$2" local target_function_name="$3" @@ -92,10 +111,8 @@ adopt_mapping() { local counterpart_mapping_uuid="" local mapping_uuid="" - shift 4 - if state_has_resource "${address}"; then - echo "${address} is already managed in this workspace." + echo "${address} is already managed in this workspace." >&2 return 0 fi @@ -107,7 +124,10 @@ adopt_mapping() { if [[ -n "${counterpart_mapping_uuid}" ]]; then if [[ -n "${target_mapping_uuid}" ]]; then - delete_mapping "${target_mapping_uuid}" + echo "Both target and counterpart mappings exist for ${address}; refusing to delete a live mapping during adoption." >&2 + echo "Target UUID: ${target_mapping_uuid}" >&2 + echo "Counterpart UUID: ${counterpart_mapping_uuid}" >&2 + exit 1 fi mapping_uuid="${counterpart_mapping_uuid}" else @@ -115,7 +135,20 @@ adopt_mapping() { fi if [[ -z "${mapping_uuid}" ]]; then - echo "No existing event source mapping found for ${address}; Terraform will create it." + echo "No existing event source mapping found for ${address}; Terraform will create it." >&2 + return 0 + fi + + printf '%s' "${mapping_uuid}" +} + +import_mapping() { + local address="$1" + local mapping_uuid="$2" + + shift 2 + + if [[ -z "${mapping_uuid}" ]]; then return 0 fi @@ -123,6 +156,33 @@ adopt_mapping() { echo "Imported ${address} into workspace ${resource_scope} using ${mapping_uuid}." } +verify_mapping() { + local address="$1" + local event_source_arn="$2" + local target_function_name="$3" + local counterpart_function_name="${4:-}" + local target_mapping_uuid="" + local counterpart_mapping_uuid="" + + target_mapping_uuid="$(lookup_mapping_uuid "${event_source_arn}" "${target_function_name}")" + + if [[ -z "${target_mapping_uuid}" ]]; then + echo "No final event source mapping found for ${address} targeting ${target_function_name}." + exit 1 + fi + + if [[ -n "${counterpart_function_name}" ]]; then + counterpart_mapping_uuid="$(lookup_mapping_uuid "${event_source_arn}" "${counterpart_function_name}")" + + if [[ -n "${counterpart_mapping_uuid}" ]]; then + echo "A stale counterpart mapping remains for ${address}: ${counterpart_mapping_uuid} targets ${counterpart_function_name}." + exit 1 + fi + fi + + echo "Verified ${address} targets ${target_function_name} with UUID ${target_mapping_uuid}." +} + events_table_name="imms-${resource_scope}-imms-events" delta_event_source_arn="$(aws dynamodb describe-table \ --table-name "${events_table_name}" \ @@ -155,16 +215,50 @@ if [[ "${resource_scope}" != "${sub_environment}" ]] && counterpart_sub_environm counterpart_id_sync_function="imms-${counterpart_sub_environment}-id-sync-lambda" fi -adopt_mapping \ - "aws_lambda_event_source_mapping.delta_trigger" \ - "${delta_event_source_arn}" \ - "${target_delta_function}" \ - "${counterpart_delta_function}" \ - "$@" - -adopt_mapping \ - "aws_lambda_event_source_mapping.id_sync_sqs_trigger" \ - "${id_sync_queue_arn}" \ - "${target_id_sync_function}" \ - "${counterpart_id_sync_function}" \ - "$@" +case "${action}" in + adopt) + delta_mapping_uuid="" + id_sync_mapping_uuid="" + + require_controlled_adoption + + delta_mapping_uuid="$(resolve_mapping_uuid \ + "aws_lambda_event_source_mapping.delta_trigger" \ + "${delta_event_source_arn}" \ + "${target_delta_function}" \ + "${counterpart_delta_function}")" + + id_sync_mapping_uuid="$(resolve_mapping_uuid \ + "aws_lambda_event_source_mapping.id_sync_sqs_trigger" \ + "${id_sync_queue_arn}" \ + "${target_id_sync_function}" \ + "${counterpart_id_sync_function}")" + + import_mapping \ + "aws_lambda_event_source_mapping.delta_trigger" \ + "${delta_mapping_uuid}" \ + "$@" + + import_mapping \ + "aws_lambda_event_source_mapping.id_sync_sqs_trigger" \ + "${id_sync_mapping_uuid}" \ + "$@" + ;; + verify) + verify_mapping \ + "aws_lambda_event_source_mapping.delta_trigger" \ + "${delta_event_source_arn}" \ + "${target_delta_function}" \ + "${counterpart_delta_function}" + + verify_mapping \ + "aws_lambda_event_source_mapping.id_sync_sqs_trigger" \ + "${id_sync_queue_arn}" \ + "${target_id_sync_function}" \ + "${counterpart_id_sync_function}" + ;; + *) + echo "Unsupported EVENT_SOURCE_MAPPING_ACTION: ${action}" + exit 1 + ;; +esac From 229b6ce77c1e0f6c1b3c52f59031bd3a97d826cb Mon Sep 17 00:00:00 2001 From: Thomas-Boyle Date: Thu, 7 May 2026 15:11:33 +0100 Subject: [PATCH 6/6] Enhance event source mapping workflows and validation processes - Added a step to adopt existing event source mappings in the deploy-backend workflow. - Introduced validation for event source mappings in the quality-checks workflow. - Updated the Makefile to improve handling of Terraform variable scope. - Enhanced the adopt_event_source_mappings.sh script for better resource management and logging. - Revised README.md to clarify the event source mapping adoption process. --- .github/workflows/deploy-backend.yml | 7 ++++++ .github/workflows/quality-checks.yml | 9 ++++++++ infrastructure/event_source_mappings/Makefile | 13 +++++++---- infrastructure/instance/README.md | 4 ++-- .../scripts/adopt_event_source_mappings.sh | 22 ++++++++++++------- 5 files changed, 41 insertions(+), 14 deletions(-) diff --git a/.github/workflows/deploy-backend.yml b/.github/workflows/deploy-backend.yml index 88e3ed3a6e..05baf7bd02 100644 --- a/.github/workflows/deploy-backend.yml +++ b/.github/workflows/deploy-backend.yml @@ -329,6 +329,13 @@ jobs: working-directory: infrastructure/event_source_mappings run: make validate + - name: Adopt Existing Event Source Mappings + if: ${{ !failure() }} + working-directory: infrastructure/event_source_mappings + env: + ALLOW_EVENT_SOURCE_MAPPING_ADOPTION: "true" + run: make adopt + - name: Terraform Plan Event Source Mappings if: ${{ !failure() }} working-directory: infrastructure/event_source_mappings diff --git a/.github/workflows/quality-checks.yml b/.github/workflows/quality-checks.yml index 5566bb2b32..12f79d77c7 100644 --- a/.github/workflows/quality-checks.yml +++ b/.github/workflows/quality-checks.yml @@ -73,6 +73,15 @@ jobs: - name: Check formatting run: terraform fmt -check -recursive + - name: Validate event source mappings + working-directory: infrastructure/event_source_mappings + run: | + terraform init -backend=false -input=false + terraform validate + + - name: ShellCheck event source mapping adoption script + run: shellcheck utilities/scripts/adopt_event_source_mappings.sh + testcoverage_and_sonarcloud: name: Test Coverage and SonarCloud runs-on: ubuntu-latest diff --git a/infrastructure/event_source_mappings/Makefile b/infrastructure/event_source_mappings/Makefile index 411b4baca6..f0eb69d0d3 100644 --- a/infrastructure/event_source_mappings/Makefile +++ b/infrastructure/event_source_mappings/Makefile @@ -5,7 +5,14 @@ environment ?= $(ENVIRONMENT) sub_environment ?= $(SUB_ENVIRONMENT) sub_environment_dir := $(if $(findstring pr-,$(sub_environment)),pr,$(sub_environment)) tf_var_file := ../instance/environments/$(environment)/$(sub_environment_dir)/variables.tfvars -has_sub_environment_scope = $(shell awk -F= '/^has_sub_environment_scope/ { gsub(/[[:space:]]/, "", $$2); print $$2 }' "$(tf_var_file)") +has_sub_environment_scope = $(shell awk -F= '/^[[:space:]]*has_sub_environment_scope[[:space:]]*=/ { gsub(/[[:space:]]/, "", $$2); print $$2 }' "$(tf_var_file)" 2>/dev/null) +targets_requiring_tf_var_scope := init workspace adopt validate plan plan-ci apply apply-ci verify ensure-destroy-allowed destroy output +requested_targets := $(if $(MAKECMDGOALS),$(MAKECMDGOALS),init) +ifneq ($(filter $(targets_requiring_tf_var_scope),$(requested_targets)),) +ifeq ($(has_sub_environment_scope),) +$(error has_sub_environment_scope not found in $(tf_var_file)) +endif +endif workspace_name = $(if $(filter false,$(has_sub_environment_scope)),$(environment),$(sub_environment)) allow_shared_scope_destroy ?= $(ALLOW_SHARED_SCOPE_DESTROY) @@ -13,9 +20,7 @@ tf_cmd = AWS_PROFILE=$(AWS_PROFILE) terraform bucket_name = $(if $(filter dev,$(environment)),immunisation-$(apigee_environment),immunisation-$(environment))-terraform-state-files -tf_state = \ - -backend-config="bucket=$(bucket_name)" \ - -backend-config="key=event-source-mappings/state" +tf_state = -backend-config="bucket=$(bucket_name)" tf_vars = \ -var="sub_environment=$(sub_environment)" \ diff --git a/infrastructure/instance/README.md b/infrastructure/instance/README.md index a3299cccf8..8ef99ab910 100644 --- a/infrastructure/instance/README.md +++ b/infrastructure/instance/README.md @@ -36,11 +36,11 @@ E.g. `pr-57`. You can use this to test out changes when tests fail in CI. ## Lambda Trigger Handoff -The `delta_trigger` and `id_sync_sqs_trigger` event source mappings are managed from `../event_source_mappings` so the main instance plan does not rewrite shared backend state. The deploy workflow applies the main instance first, then updates the trigger mappings from the dedicated trigger workspace. Existing mappings are imported only through the controlled migration workflow. +The `delta_trigger` and `id_sync_sqs_trigger` event source mappings are managed from `../event_source_mappings` so the main instance plan does not rewrite shared backend state. The deploy workflow applies the main instance first, safely adopts any existing trigger mappings into the dedicated trigger workspace, then plans and applies trigger changes from that workspace. ### First Cutover -Use the `Migrate Event Source Mappings` workflow once per environment before relying on the normal backend deploy for trigger changes. Select the target `environment` and `sub_environment`, then set `confirm_event_source_mapping_migration` to `true`. The migration workflow imports existing mappings, runs `terraform validate`, saves a dedicated trigger `tfplan` artifact, applies that saved plan, and verifies the final Lambda targets. +The normal backend deploy performs idempotent adoption before it plans trigger changes. Use the `Migrate Event Source Mappings` workflow when you want to perform the handoff separately from a full backend deploy. Select the target `environment` and `sub_environment`, then set `confirm_event_source_mapping_migration` to `true`. The migration workflow imports existing mappings, runs `terraform validate`, saves a dedicated trigger `tfplan` artifact, applies that saved plan, and verifies the final Lambda targets. Before starting, check for duplicate or stale mappings. Replace the variable values with the shared scope and target sub-environment: diff --git a/utilities/scripts/adopt_event_source_mappings.sh b/utilities/scripts/adopt_event_source_mappings.sh index 68f439435d..1ac07f0f5a 100644 --- a/utilities/scripts/adopt_event_source_mappings.sh +++ b/utilities/scripts/adopt_event_source_mappings.sh @@ -110,10 +110,10 @@ resolve_mapping_uuid() { local target_mapping_uuid="" local counterpart_mapping_uuid="" local mapping_uuid="" + local resource_in_state="false" if state_has_resource "${address}"; then - echo "${address} is already managed in this workspace." >&2 - return 0 + resource_in_state="true" fi target_mapping_uuid="$(lookup_mapping_uuid "${event_source_arn}" "${target_function_name}")" @@ -122,13 +122,19 @@ resolve_mapping_uuid() { counterpart_mapping_uuid="$(lookup_mapping_uuid "${event_source_arn}" "${counterpart_function_name}")" fi + if [[ -n "${target_mapping_uuid}" && -n "${counterpart_mapping_uuid}" ]]; then + echo "Both target and counterpart mappings exist for ${address}; refusing to continue." >&2 + echo "Target UUID: ${target_mapping_uuid}" >&2 + echo "Counterpart UUID: ${counterpart_mapping_uuid}" >&2 + exit 1 + fi + + if [[ "${resource_in_state}" == "true" ]]; then + echo "${address} is already managed in this workspace." >&2 + return 0 + fi + if [[ -n "${counterpart_mapping_uuid}" ]]; then - if [[ -n "${target_mapping_uuid}" ]]; then - echo "Both target and counterpart mappings exist for ${address}; refusing to delete a live mapping during adoption." >&2 - echo "Target UUID: ${target_mapping_uuid}" >&2 - echo "Counterpart UUID: ${counterpart_mapping_uuid}" >&2 - exit 1 - fi mapping_uuid="${counterpart_mapping_uuid}" else mapping_uuid="${target_mapping_uuid}"