diff --git a/infra/live/global_vars.hcl b/infra/live/global_vars.hcl index a863909..1f9e675 100644 --- a/infra/live/global_vars.hcl +++ b/infra/live/global_vars.hcl @@ -10,7 +10,8 @@ locals { "application-autoscaling:*", "cloudwatch:*", "sqs:*", - "cloudfront:*" + "cloudfront:*", + "xray:*" ] } diff --git a/infra/live/prod/environment_vars.hcl b/infra/live/prod/environment_vars.hcl index dfaeeca..679e89f 100644 --- a/infra/live/prod/environment_vars.hcl +++ b/infra/live/prod/environment_vars.hcl @@ -6,4 +6,5 @@ locals { inputs = { log_retention_days = local.log_retention_days deploy_branches = local.deploy_branches + otel_sample_rate = 0.1 # 10% of traces sampled } diff --git a/infra/modules/aws/_shared/lambda/data.tf b/infra/modules/aws/_shared/lambda/data.tf index 1ef311c..1b2b71e 100644 --- a/infra/modules/aws/_shared/lambda/data.tf +++ b/infra/modules/aws/_shared/lambda/data.tf @@ -1,3 +1,14 @@ +data "aws_iam_policy_document" "lambda_xray" { + statement { + effect = "Allow" + actions = [ + "xray:PutTraceSegments", + "xray:PutTelemetryRecords", + ] + resources = ["*"] + } +} + data "aws_s3_bucket" "code_bucket" { bucket = var.code_bucket } diff --git a/infra/modules/aws/_shared/lambda/main.tf b/infra/modules/aws/_shared/lambda/main.tf index 52dd042..c59f5ed 100644 --- a/infra/modules/aws/_shared/lambda/main.tf +++ b/infra/modules/aws/_shared/lambda/main.tf @@ -30,12 +30,21 @@ resource "aws_s3_object" "bootstrap_lambda_zip" { content_type = "application/zip" } -resource "aws_lambda_function" "lambda" { - function_name = local.lambda_name - role = aws_iam_role.iam_for_lambda.arn - handler = local.lambda_handler - runtime = local.lambda_runtime +resource "aws_iam_policy" "lambda_xray" { + name = "${local.lambda_name}-xray" + policy = data.aws_iam_policy_document.lambda_xray.json +} + +resource "aws_iam_role_policy_attachment" "lambda_xray" { + role = aws_iam_role.iam_for_lambda.name + policy_arn = aws_iam_policy.lambda_xray.arn +} +resource "aws_lambda_function" "lambda" { + function_name = local.lambda_name + role = aws_iam_role.iam_for_lambda.arn + handler = local.lambda_handler + runtime = local.lambda_runtime reserved_concurrent_executions = local.pc_reserved_count s3_bucket = data.aws_s3_bucket.code_bucket.bucket @@ -44,8 +53,15 @@ resource "aws_lambda_function" "lambda" { # publish ONE immutable version so we can create an alias publish = true + tracing_config { + mode = "Active" + } + environment { - variables = var.environment_variables + variables = merge(var.environment_variables, { + OTEL_TRACES_SAMPLER = "parentbased_traceidratio" + OTEL_TRACES_SAMPLER_ARG = tostring(var.otel_sample_rate) + }) } # tags for identifying the code deploy app and its deployment config. Used in CI/CD pipelines. diff --git a/infra/modules/aws/_shared/lambda/variables.tf b/infra/modules/aws/_shared/lambda/variables.tf index 83ed6dd..9612c53 100644 --- a/infra/modules/aws/_shared/lambda/variables.tf +++ b/infra/modules/aws/_shared/lambda/variables.tf @@ -1,4 +1,10 @@ ### start of static vars set in root.hcl ### +variable "otel_sample_rate" { + type = number + description = "OpenTelemetry trace sampling rate — 0.0 = no traces, 1.0 = 100% of traces sampled" + default = 1.0 # 100% +} + variable "project_name" { type = string description = "Project name used in naming resources" diff --git a/infra/modules/aws/api/main.tf b/infra/modules/aws/api/main.tf index d8c3e00..4714b87 100644 --- a/infra/modules/aws/api/main.tf +++ b/infra/modules/aws/api/main.tf @@ -1,9 +1,10 @@ module "lambda_api" { source = "../_shared/lambda" - project_name = var.project_name - environment = var.environment - code_bucket = var.code_bucket + project_name = var.project_name + environment = var.environment + code_bucket = var.code_bucket + otel_sample_rate = var.otel_sample_rate lambda_name = local.lambda_name diff --git a/infra/modules/aws/api/variables.tf b/infra/modules/aws/api/variables.tf index c34d4f9..5542827 100644 --- a/infra/modules/aws/api/variables.tf +++ b/infra/modules/aws/api/variables.tf @@ -13,6 +13,12 @@ variable "code_bucket" { type = string description = "Bucket where deployable code artifacts are uploaded" } + +variable "otel_sample_rate" { + type = number + description = "OpenTelemetry trace sampling rate — 0.0 = no traces, 1.0 = 100% of traces sampled" + default = 1.0 # 100% +} ### end of static vars set in root.hcl ### variable "deployment_config" { diff --git a/infra/modules/aws/lambda_worker/main.tf b/infra/modules/aws/lambda_worker/main.tf index ce45336..7a42357 100644 --- a/infra/modules/aws/lambda_worker/main.tf +++ b/infra/modules/aws/lambda_worker/main.tf @@ -1,9 +1,10 @@ module "lambda_worker" { source = "../_shared/lambda" - project_name = var.project_name - environment = var.environment - code_bucket = var.code_bucket + project_name = var.project_name + environment = var.environment + code_bucket = var.code_bucket + otel_sample_rate = var.otel_sample_rate lambda_name = local.lambda_name diff --git a/infra/modules/aws/lambda_worker/variables.tf b/infra/modules/aws/lambda_worker/variables.tf index ba56938..957a700 100644 --- a/infra/modules/aws/lambda_worker/variables.tf +++ b/infra/modules/aws/lambda_worker/variables.tf @@ -13,6 +13,12 @@ variable "code_bucket" { type = string description = "Bucket where deployable code artifacts are uploaded" } + +variable "otel_sample_rate" { + type = number + description = "OpenTelemetry trace sampling rate — 0.0 = no traces, 1.0 = 100% of traces sampled" + default = 1.0 # 100% +} ### end of static vars set in root.hcl ### variable "sqs_queue_name" {