From fad47a9806c42b837f67cab3017a87a11775fb4e Mon Sep 17 00:00:00 2001
From: Nilakanta Mallick <nilakanta@base14.io>
Date: Mon, 30 Mar 2026 19:11:39 +0530
Subject: [PATCH 1/2] Expand RDS PostgreSQL and ElastiCache monitoring guides

---
 docs/instrument/infra/aws/elasticache.md | 398 +++++++++++++++-----
 docs/instrument/infra/aws/rds.md         | 451 +++++++++++++++++++----
 2 files changed, 685 insertions(+), 164 deletions(-)

diff --git a/docs/instrument/infra/aws/elasticache.md b/docs/instrument/infra/aws/elasticache.md
index 2cb1f12..083a27c 100644
--- a/docs/instrument/infra/aws/elasticache.md
+++ b/docs/instrument/infra/aws/elasticache.md
@@ -4,169 +4,399 @@ id: collecting-aws-elasticache-telemetry
 title: AWS ElastiCache Monitoring with OpenTelemetry - Redis & Memcached Metrics
 sidebar_label: AWS ElastiCache
 description:
-  Stream ElastiCache Redis and Memcached metrics via CloudWatch. Monitor
-  cache hit rates, evictions, memory usage, and latency with
-  OpenTelemetry.
+  Monitor AWS ElastiCache Redis and Memcached with OpenTelemetry and
+  CloudWatch Metrics Stream. Track cache hit rates, evictions, memory,
+  latency, and connected clients in base14 Scout.
 keywords:
-  [
-    aws elasticache monitoring,
-    elasticache redis monitoring,
-    cloudwatch metrics stream,
-    aws cache monitoring,
-    elasticache observability,
-  ]
+  - aws elasticache monitoring
+  - elasticache redis monitoring
+  - elasticache metrics
+  - elasticache redis metrics
+  - elasticache observability
+  - cloudwatch metrics stream
+  - aws cache monitoring
+  - elasticache redis observability
+  - monitor elasticache
+  - elasticache cloudwatch metrics
 head:
   - - script
     - type: application/ld+json
     - |
-      {"@context":"https://schema.org","@type":"FAQPage","mainEntity":[{"@type":"Question","name":"How do I monitor AWS ElastiCache with OpenTelemetry?","acceptedAnswer":{"@type":"Answer","text":"Use CloudWatch Metrics Stream to collect ElastiCache metrics with 2-3 minute latency, and optionally add the OpenTelemetry Redis receiver for detailed cache-specific metrics. Both can be sent to base14 Scout."}},{"@type":"Question","name":"Should I use CloudWatch Metrics Stream or Prometheus exporters for ElastiCache?","acceptedAnswer":{"@type":"Answer","text":"CloudWatch Metrics Stream is recommended over Prometheus exporters for ElastiCache. It provides faster delivery (2-3 min vs 5+ min), lower cost, better scalability, and automatic metric discovery."}},{"@type":"Question","name":"What ElastiCache metrics can I collect with OpenTelemetry?","acceptedAnswer":{"@type":"Answer","text":"CloudWatch Metrics Stream delivers CPU utilization, memory usage, cache hits/misses, network I/O, and evictions. The optional Redis receiver adds detailed metrics like command latency, connected clients, and keyspace stats."}},{"@type":"Question","name":"How do I filter ElastiCache metrics in CloudWatch Metrics Stream?","acceptedAnswer":{"@type":"Answer","text":"When configuring your CloudWatch Metrics Stream, select specific namespaces and choose only AWS/ElastiCache instead of all namespaces to reduce costs and data volume."}},{"@type":"Question","name":"Can I monitor both ElastiCache Redis and Memcached with OpenTelemetry?","acceptedAnswer":{"@type":"Answer","text":"Yes. CloudWatch Metrics Stream supports both Redis and Memcached ElastiCache engines. For Redis, you can also add the OpenTelemetry Redis receiver for deeper cache-level metrics."}}]}
+      {"@context":"https://schema.org","@type":"FAQPage","mainEntity":[{"@type":"Question","name":"How do I monitor AWS ElastiCache with OpenTelemetry?","acceptedAnswer":{"@type":"Answer","text":"Use CloudWatch Metrics Stream to collect ElastiCache infrastructure metrics (CPU, memory, network) with 2-3 minute latency, and add the OpenTelemetry Redis receiver for cache-specific metrics like command latency, keyspace hits, and connected clients. Both feed into base14 Scout."}},{"@type":"Question","name":"What ElastiCache metrics does CloudWatch collect?","acceptedAnswer":{"@type":"Answer","text":"CloudWatch collects CPUUtilization, EngineCPUUtilization, FreeableMemory, NetworkBytesIn/Out, CurrConnections, NewConnections, CacheHits, CacheMisses, Evictions, ReplicationLag, and BytesUsedForCache for ElastiCache Redis and Memcached."}},{"@type":"Question","name":"Should I use CloudWatch Metrics Stream or the Redis receiver for ElastiCache?","acceptedAnswer":{"@type":"Answer","text":"Use both. CloudWatch provides host-level metrics (CPU, memory, network). The OTel Redis receiver adds cache internals like per-command latency, keyspace statistics, and memory fragmentation ratio. Together they give complete visibility."}},{"@type":"Question","name":"How do I monitor ElastiCache Redis slow commands?","acceptedAnswer":{"@type":"Answer","text":"Enable Redis slow log in your ElastiCache parameter group by setting slowlog-log-slower-than to a threshold in microseconds (e.g., 10000 for 10ms). Forward slow logs via CloudWatch Logs to your OTel Collector for analysis."}},{"@type":"Question","name":"What is a good cache hit rate for ElastiCache Redis?","acceptedAnswer":{"@type":"Answer","text":"A healthy Redis cache hit rate is above 95%. Below 90% indicates that a significant portion of requests are missing the cache and hitting the backend database, which defeats the purpose of caching. Monitor CacheHits / (CacheHits + CacheMisses) to track this ratio."}},{"@type":"Question","name":"How do I set up alerts for ElastiCache?","acceptedAnswer":{"@type":"Answer","text":"Route ElastiCache metrics through CloudWatch Metrics Stream to base14 Scout, then alert on: cache hit rate below 90%, evictions above zero (sustained), memory usage above 80%, CPU above 70%, replication lag above 5 seconds, and current connections approaching the max."}},{"@type":"Question","name":"Can I monitor both ElastiCache Redis and Memcached with OpenTelemetry?","acceptedAnswer":{"@type":"Answer","text":"Yes. CloudWatch Metrics Stream supports both engines. For Redis, add the OTel Redis receiver for deeper cache-level metrics. For Memcached, the OTel Memcached receiver collects hit rates, evictions, and connection counts."}}]}
 ---
 
 ## Overview
 
-This guide will walk you through collecting rich telemetry data from your
-ElastiCache caches using CloudWatch Metrics Stream. We recommend using
-CloudWatch Metrics Stream over Prometheus exporters as it provides faster metric
-delivery (2-3 minute latency) and is more efficient for AWS services.
+This guide covers monitoring AWS ElastiCache (Redis and Memcached)
+using OpenTelemetry and CloudWatch Metrics Stream. You'll collect
+infrastructure metrics from CloudWatch, cache-specific metrics from
+the Redis receiver, and slow logs — all flowing into base14 Scout.
 
-## Collecting ElastiCache Metrics
+## What You'll Monitor
 
-For collecting ElastiCache metrics, we recommend using **CloudWatch Metrics
-Stream** instead of Prometheus exporters. CloudWatch Metrics Stream provides:
+ElastiCache monitoring combines CloudWatch metrics with optional
+Redis receiver metrics for complete visibility:
 
-- **Faster delivery**: 2-3 minute latency vs 5+ minutes with polling
-- **Lower cost**: No need to run dedicated exporters
-- **Better scalability**: Native AWS service integration
-- **Automatic metric discovery**: No need to manually configure metric lists
+**CloudWatch Metrics Stream (infrastructure + cache basics):**
 
-### Step 1: Set up CloudWatch Metrics Stream
+| Metric | What it tells you |
+| ------ | ----------------- |
+| `CPUUtilization` | Instance CPU usage (%) |
+| `EngineCPUUtilization` | Redis/Memcached engine CPU (%) — more relevant than host CPU |
+| `FreeableMemory` | Available RAM (bytes) |
+| `BytesUsedForCache` | Memory used by the cache engine |
+| `CacheHits` / `CacheMisses` | Cache effectiveness |
+| `Evictions` | Keys removed due to memory pressure |
+| `CurrConnections` / `NewConnections` | Client connection counts |
+| `NetworkBytesIn` / `NetworkBytesOut` | Network throughput |
+| `ReplicationLag` | Replica delay (seconds, Redis only) |
+| `SaveInProgress` | Whether a background save is running (Redis) |
+| `CurrItems` | Number of items in the cache |
 
-Follow our comprehensive
-[CloudWatch Metrics Stream guide](cloudwatch-metrics-stream.md) to set up the
-infrastructure.
+**OTel Redis receiver (cache internals, Redis only):**
 
-### Step 2: Configure ElastiCache metrics filtering
+| Metric | What it tells you |
+| ------ | ----------------- |
+| `redis.memory.used` | Actual memory consumed by Redis |
+| `redis.maxmemory` | Configured memory limit |
+| `redis.connected_clients` | Currently connected client count |
+| `redis.keyspace.hits` / `redis.keyspace.misses` | Per-keyspace hit/miss rates |
+| `redis.keys.expired` | Keys expired by TTL |
+| `redis.keys.evicted` | Keys evicted under memory pressure |
+| `redis.uptime` | Time since last restart (seconds) |
+| `redis.memory.fragmentation_ratio` | Memory fragmentation (> 1.5 is a concern) |
+| `redis.commands.processed` | Total commands processed |
+| `redis.connections.received` | Total connections received since start |
 
-When configuring your CloudWatch Metrics Stream in **Step 3** of the setup
-guide, make sure to:
+## Prerequisites
 
-1. **Select specific namespaces** instead of "All namespaces"
-2. **Choose only AWS/ElastiCache** from the namespace list
-3. This ensures you only collect ElastiCache metrics, reducing costs and data
-   volume
+| Requirement | Minimum | Recommended |
+| ----------- | ------- | ----------- |
+| ElastiCache | Redis 6.x or Memcached 1.6 | Redis 7.x |
+| OTel Collector Contrib | 0.90.0 | latest |
+| base14 Scout | Any | - |
+| AWS permissions | CloudWatch, Kinesis Firehose, S3 | - |
 
-### Step 3: Create OTEL Collector config for Redis metrics (Optional)
+Before starting:
 
-If you're using Redis and need detailed cache-specific metrics, create
-`elasticache-metrics-collection-config.yaml`:
+- ElastiCache cluster must be accessible from the host running the
+  OTel Collector (same VPC)
+- For the Redis receiver: AUTH token if encryption in transit is
+  enabled
+- CloudWatch Metrics Stream infrastructure set up (see Step 1)
 
-```yaml
+## Step 1: Set up CloudWatch Metrics Stream
+
+Follow our comprehensive
+[CloudWatch Metrics Stream guide](cloudwatch-metrics-stream.md) to
+set up the streaming infrastructure (S3 bucket, Kinesis Firehose,
+Metrics Stream).
+
+When configuring the Metrics Stream:
+
+1. Select **specific namespaces** instead of "All namespaces"
+2. Choose **AWS/ElastiCache** from the namespace list
+3. This ensures you only collect ElastiCache metrics, reducing costs
+   and data volume
+
+## Step 2: Configure the OTel Collector for Redis metrics
+
+For Redis clusters, add the Redis receiver for cache-internal metrics
+that CloudWatch doesn't expose:
+
+```yaml showLineNumbers title="elasticache-redis-config.yaml"
 receivers:
   redis:
     endpoint: ${env:REDIS_ENDPOINT}
     collection_interval: 60s
-    password: ${env:REDIS_PASSWORD}
-    # transport: tcp
-    # tls:
-    #   insecure: false
-    #   ca_file: /etc/ssl/certs/ca-certificates.crt
-    #   cert_file: /etc/ssl/certs/redis.crt
-    #   key_file: /etc/ssl/certs/redis.key
+    password: ${env:REDIS_AUTH_TOKEN}
+    tls:
+      insecure: false
+      ca_file: /etc/ssl/certs/ca-certificates.crt
     metrics:
       redis.maxmemory:
         enabled: true
-      redis.cmd.latency:
-        enabled: true
       redis.connected_clients:
         enabled: true
       redis.uptime:
         enabled: true
       redis.memory.used:
         enabled: true
+      redis.memory.fragmentation_ratio:
+        enabled: true
       redis.keys.expired:
         enabled: true
+      redis.keys.evicted:
+        enabled: true
       redis.keyspace.hits:
         enabled: true
       redis.keyspace.misses:
         enabled: true
+      redis.commands.processed:
+        enabled: true
+      redis.connections.received:
+        enabled: true
+
+processors:
+  resource:
+    attributes:
+      - key: environment
+        value: ${env:ENVIRONMENT}
+        action: upsert
+      - key: service.name
+        value: ${env:SERVICE_NAME}
+        action: upsert
+      - key: cloud.provider
+        value: aws
+        action: upsert
+
+  batch:
+    timeout: 10s
+    send_batch_size: 1024
 
 exporters:
-  otlp:
-    endpoint: "<SCOUT_ENDPOINT>:4317"
+  otlphttp/b14:
+    endpoint: ${env:OTEL_EXPORTER_OTLP_ENDPOINT}
     tls:
-      insecure: true
+      insecure_skip_verify: true
 
 service:
   pipelines:
-    metrics/elasticache:
+    metrics:
       receivers: [redis]
-      exporters: [otlp]
+      processors: [batch, resource]
+      exporters: [otlphttp/b14]
 ```
 
-> **Note**: CloudWatch Metrics Stream will automatically deliver AWS/ElastiCache
-> metrics (CPU utilization, memory usage, cache hits/misses, network I/O, etc.),
-> while the Redis receiver collects detailed cache-specific metrics if needed.
+### Environment variables
 
-## Collecting Elasticache Logs
+```bash showLineNumbers title=".env"
+REDIS_ENDPOINT=your-cluster.xxxxx.ng.0001.use1.cache.amazonaws.com:6379
+REDIS_AUTH_TOKEN=your_auth_token
+ENVIRONMENT=production
+SERVICE_NAME=elasticache-redis
+OTEL_EXPORTER_OTLP_ENDPOINT=https://<your-tenant>.base14.io
+```
 
-The log collection of Elasticache Cluster requires specifying the list of log
-group names.From the AWS CloudWatch console , please find the log group(s)
-relevant to the integration.
+> **Note**: CloudWatch Metrics Stream delivers the infrastructure
+> metrics (CPU, memory, connections, evictions) automatically. The
+> Redis receiver above adds cache internals like keyspace hit rates,
+> memory fragmentation, and connection details. For Memcached
+> clusters, use the
+> [Memcached receiver](../../component/memcached.md) instead.
 
-### Create the Collector config file
+## Step 3: Collect ElastiCache logs
 
-```yaml
+ElastiCache Redis supports two log types through CloudWatch:
+
+- **Slow log** — commands exceeding a latency threshold
+- **Engine log** — connection events, failovers, configuration changes
+
+Configure the CloudWatch Logs receiver:
+
+```yaml showLineNumbers title="elasticache-logs-config.yaml"
 receivers:
-  awscloudwatch/elasticache_logs:
-    region: us-east-1
+  awscloudwatchlogs/elasticache:
+    region: ${env:AWS_REGION}
     logs:
       poll_interval: 1m
       groups:
         named:
-          # replace with your Elasticache's log group name
-          /aws/elasticache/:
+          # Replace <cluster-id> with your ElastiCache cluster ID
+          /aws/elasticache/cluster/${env:CLUSTER_ID}/slow-log:
+          /aws/elasticache/cluster/${env:CLUSTER_ID}/engine-log:
 
 processors:
-  attributes/add_source_elasticache:
+  attributes/add_source:
     actions:
       - key: source
         value: "elasticache"
         action: insert
+      - key: cloud.provider
+        value: "aws"
+        action: insert
+
   batch:
     send_batch_size: 10000
     send_batch_max_size: 11000
     timeout: 10s
 
 exporters:
-  otlp:
-    endpoint: "<SCOUT_ENDPOINT>:4317"
+  otlphttp/b14:
+    endpoint: ${env:OTEL_EXPORTER_OTLP_ENDPOINT}
     tls:
-      insecure: false
+      insecure_skip_verify: true
 
 service:
   pipelines:
     logs/elasticache:
-      receivers: [awscloudwatch/elasticache_logs]
-      processors: [attributes/add_source_elasticache, batch]
-      exporters: [otlp]
+      receivers: [awscloudwatchlogs/elasticache]
+      processors: [attributes/add_source, batch]
+      exporters: [otlphttp/b14]
 ```
 
-After deploying these changes, generate some traffic to your elasticache cluster
-and check in Scout to see your elasticache's metrics and logs.
+### Enable slow log in ElastiCache
 
----
+In your ElastiCache parameter group, set:
+
+```text
+slowlog-log-slower-than = 10000    # Log commands over 10ms (microseconds)
+slowlog-max-len = 128              # Keep last 128 slow commands
+```
+
+Then in the ElastiCache console, enable **Log delivery** for both
+slow log and engine log, targeting CloudWatch Logs.
+
+## Step 4: Verify the setup
+
+Start the Collector and check for metrics:
+
+```bash showLineNumbers
+# Test Redis connectivity from the Collector host
+redis-cli -h ${REDIS_ENDPOINT%:*} -p 6379 \
+  --tls --cacert /etc/ssl/certs/ca-certificates.crt \
+  -a ${REDIS_AUTH_TOKEN} ping
+```
+
+Check Scout for both CloudWatch metrics (prefixed `aws.elasticache.*`)
+and Redis metrics (prefixed `redis.*`).
+
+## Key alerts to configure
+
+| Metric | Warning | Critical | Why |
+| ------ | ------- | -------- | --- |
+| Cache hit rate | < 90% | < 80% | Low hit rate means cache isn't effective — requests hit the database instead |
+| `Evictions` | > 0 (sustained) | > 100/min | Evictions mean memory pressure is forcing useful data out |
+| `EngineCPUUtilization` | > 65% | > 80% | Redis is single-threaded — high CPU means commands are queuing |
+| `BytesUsedForCache` | > 80% of max | > 90% of max | Approaching memory limit triggers aggressive eviction |
+| `CurrConnections` | > 80% of max | > 90% of max | Connection exhaustion causes application errors |
+| `ReplicationLag` | > 5s | > 30s | High lag means replicas serve stale data |
+| `redis.memory.fragmentation_ratio` | > 1.5 | > 2.0 | High fragmentation wastes memory — consider a restart |
+| Slow log entries | > 10/min | > 50/min | Frequent slow commands indicate saturation — check slow log |
+
+**Cache hit rate formula:**
+`CacheHits / (CacheHits + CacheMisses) * 100`
+
+> **Why EngineCPUUtilization, not CPUUtilization?** ElastiCache Redis
+> is single-threaded. `CPUUtilization` shows total host CPU across
+> all cores, which can look low even when the Redis engine core is
+> saturated. `EngineCPUUtilization` shows the single-core usage that
+> actually matters.
+
+## Troubleshooting
+
+### Redis receiver shows no metrics
+
+**Cause**: Collector can't reach the ElastiCache cluster.
+
+**Fix**:
+
+1. ElastiCache is VPC-only — the Collector must run in the same VPC
+   or a peered VPC
+2. Check the security group allows inbound on port 6379 from the
+   Collector's security group
+3. If encryption in transit is enabled, the Redis receiver must use
+   TLS (`tls.insecure: false` with a CA cert)
+4. Test connectivity:
+   `redis-cli -h <endpoint> -p 6379 --tls -a <token> ping`
+
+### CloudWatch metrics not appearing
+
+**Cause**: Metrics Stream not configured for the AWS/ElastiCache
+namespace.
+
+**Fix**:
+
+1. In CloudWatch > Metrics > Streams, verify the stream is active
+2. Check that the namespace filter includes `AWS/ElastiCache`
+3. Verify Kinesis Firehose delivery is succeeding
+4. Allow 5-10 minutes for initial metrics to flow
+
+### High evictions but low memory usage
+
+**Cause**: The `maxmemory-policy` is set to a volatile policy
+(like `volatile-lru`) and keys without TTLs are filling memory,
+while keys with TTLs get evicted.
+
+**Fix**:
+
+1. Check the eviction policy:
+   `redis-cli CONFIG GET maxmemory-policy`
+2. If using `volatile-lru`, consider switching to `allkeys-lru`
+3. Review key TTL distribution — sample keys and check their TTLs
+   to identify keys without expiration
+
+### Cache hit rate dropping
+
+**Cause**: Application pattern change, insufficient memory, or key
+expiration settings.
+
+**Fix**:
+
+1. Check if evictions are increasing (memory pressure pushing out
+   useful keys)
+2. Review whether application code is requesting keys that were
+   never cached
+3. Compare `CurrItems` trend — a sudden drop suggests mass
+   expiration
+4. Consider increasing node size or adding shards
+
+## FAQ
+
+**How do I monitor ElastiCache Redis slow commands?**
+
+Enable the slow log in your ElastiCache parameter group by setting
+`slowlog-log-slower-than` to a threshold in microseconds (10000 =
+10ms). Enable log delivery to CloudWatch Logs, then forward to
+Scout via the CloudWatch Logs receiver.
+
+**What is a good cache hit rate?**
+
+Above 95% is healthy. Below 90% means a significant portion of
+requests miss the cache and hit the backend database. Track the
+ratio over time — a gradual decline often indicates growing data
+volume without proportional cache capacity.
+
+**Can I monitor Memcached clusters with this setup?**
+
+Yes. CloudWatch Metrics Stream covers Memcached infrastructure
+metrics. For cache-specific metrics, the OTel Collector has a
+[Memcached receiver](../../component/memcached.md) that collects
+hit rates, evictions, connection counts, and memory usage — the
+Memcached equivalent of the Redis receiver above.
+
+**Should I monitor ElastiCache Serverless differently?**
+
+ElastiCache Serverless uses the same CloudWatch metrics namespace
+(`AWS/ElastiCache`) but adds metrics like
+`ElastiCacheProcessingUnits` for capacity tracking. The CloudWatch
+Metrics Stream setup is identical — just include the
+`AWS/ElastiCache` namespace.
+
+**How do I monitor multiple ElastiCache clusters?**
+
+Add multiple Redis receiver blocks with distinct names:
+
+```yaml
+receivers:
+  redis/sessions:
+    endpoint: sessions-cluster.xxxxx.cache.amazonaws.com:6379
+  redis/cache:
+    endpoint: cache-cluster.xxxxx.cache.amazonaws.com:6379
+```
 
-With this setup, your AWS Elasticache cluster becomes fully observable through
-Scout. You'll gain real-time visibility into performance metrics and logs
-without any changes to your application code.
+Then include both in the pipeline:
+`receivers: [redis/sessions, redis/cache]`.
 
 ## Related Guides
 
-- [CloudWatch Metrics Stream Setup](./cloudwatch-metrics-stream.md) - Set up AWS
-  metrics streaming
-- [ELB Monitoring](./elb.md) - Monitor AWS Application Load Balancers
-- [RDS Monitoring](./rds.md) - Monitor AWS RDS databases
-- [Redis Monitoring](../../component/redis.md) - Self-hosted Redis monitoring
-  guide
+- [CloudWatch Metrics Stream Setup](./cloudwatch-metrics-stream.md) —
+  Configure AWS metrics streaming
+- [Redis Monitoring](../../component/redis.md) — Self-hosted Redis
+  monitoring with OpenTelemetry
+- [Memcached Monitoring](../../component/memcached.md) — Self-hosted
+  Memcached monitoring
+- [RDS Monitoring](./rds.md) — Monitor AWS RDS databases
+- [ELB Monitoring](./elb.md) — Monitor AWS Application Load Balancers
 - [OTel Collector Configuration](../../collector-setup/otel-collector-config.md)
-  \- Advanced collector configuration
+  — Collector setup basics
diff --git a/docs/instrument/infra/aws/rds.md b/docs/instrument/infra/aws/rds.md
index 83c0979..f097efd 100644
--- a/docs/instrument/infra/aws/rds.md
+++ b/docs/instrument/infra/aws/rds.md
@@ -1,74 +1,137 @@
 ---
 date: 2025-04-26
 id: collecting-aws-rds-postgres-telemetry
-title: AWS RDS PostgreSQL Monitoring - Metrics, Logs & Query Performance
+title: AWS RDS PostgreSQL Monitoring with OpenTelemetry - Metrics, Logs & Alerts
 sidebar_label: AWS RDS
 description:
-  Stream AWS RDS PostgreSQL metrics via CloudWatch. Monitor connections,
-  replication lag, IOPS, and query performance with OpenTelemetry and
-  base14 Scout.
+  Monitor AWS RDS PostgreSQL with OpenTelemetry and CloudWatch Metrics
+  Stream. Collect connections, replication lag, IOPS, query performance,
+  and Performance Insights data in base14 Scout.
 keywords:
-  [
-    aws rds monitoring,
-    rds postgresql monitoring,
-    cloudwatch metrics stream,
-    aws database monitoring,
-    rds observability,
-  ]
+  - aws rds monitoring
+  - rds postgresql monitoring
+  - postgresql rds metrics
+  - aws rds postgres monitoring
+  - rds postgres observability
+  - cloudwatch metrics stream
+  - aws database monitoring
+  - rds performance insights
+  - aws rds postgresql observability
+  - rds postgres dashboard
 head:
   - - script
     - type: application/ld+json
     - |
-      {"@context":"https://schema.org","@type":"FAQPage","mainEntity":[{"@type":"Question","name":"How do I monitor AWS RDS PostgreSQL with OpenTelemetry?","acceptedAnswer":{"@type":"Answer","text":"Use CloudWatch Metrics Stream for infrastructure metrics (CPU, memory, disk I/O) and the OpenTelemetry PostgreSQL receiver for database-specific metrics like connections, query performance, and locks. Both feed into base14 Scout."}},{"@type":"Question","name":"What RDS metrics does CloudWatch Metrics Stream collect?","acceptedAnswer":{"@type":"Answer","text":"CloudWatch Metrics Stream automatically delivers AWS/RDS metrics including CPU utilization, memory usage, disk I/O, read/write latency, database connections, and replication lag with 2-3 minute latency."}},{"@type":"Question","name":"Do I need both CloudWatch Metrics Stream and the PostgreSQL receiver for RDS?","acceptedAnswer":{"@type":"Answer","text":"CloudWatch provides infrastructure-level RDS metrics while the PostgreSQL receiver collects database-specific metrics like locks, deadlocks, and sequential scans. Using both gives complete visibility in base14 Scout."}},{"@type":"Question","name":"How do I collect RDS PostgreSQL logs with OpenTelemetry?","acceptedAnswer":{"@type":"Answer","text":"Use the AWS CloudWatch Logs receiver in the OpenTelemetry Collector, specifying your RDS log group names. The collector polls CloudWatch Logs and forwards them to base14 Scout."}},{"@type":"Question","name":"How do I filter RDS metrics in CloudWatch Metrics Stream?","acceptedAnswer":{"@type":"Answer","text":"When configuring the Metrics Stream, select specific namespaces and choose only AWS/RDS instead of all namespaces to collect only RDS metrics, reducing costs and data volume."}}]}
+      {"@context":"https://schema.org","@type":"FAQPage","mainEntity":[{"@type":"Question","name":"How do I monitor AWS RDS PostgreSQL with OpenTelemetry?","acceptedAnswer":{"@type":"Answer","text":"Use CloudWatch Metrics Stream for infrastructure metrics (CPU, memory, disk I/O, connections) and the OpenTelemetry PostgreSQL receiver for database-specific metrics like locks, deadlocks, and sequential scans. Both feed into a single observability platform like base14 Scout."}},{"@type":"Question","name":"What RDS metrics does CloudWatch Metrics Stream collect?","acceptedAnswer":{"@type":"Answer","text":"CloudWatch Metrics Stream delivers AWS/RDS metrics including CPUUtilization, FreeableMemory, ReadIOPS, WriteIOPS, ReadLatency, WriteLatency, DatabaseConnections, ReplicaLag, FreeStorageSpace, and DiskQueueDepth with 2-3 minute latency."}},{"@type":"Question","name":"Do I need both CloudWatch Metrics Stream and the PostgreSQL receiver?","acceptedAnswer":{"@type":"Answer","text":"Yes. CloudWatch provides infrastructure-level RDS metrics (CPU, memory, IOPS) while the PostgreSQL receiver collects database-specific metrics like locks, deadlocks, sequential scans, and tuple operations. Using both gives complete visibility."}},{"@type":"Question","name":"How do I collect RDS PostgreSQL logs with OpenTelemetry?","acceptedAnswer":{"@type":"Answer","text":"Use the AWS CloudWatch Logs receiver in the OpenTelemetry Collector, specifying your RDS log group names. The collector polls CloudWatch Logs and forwards them to your observability backend."}},{"@type":"Question","name":"How do I monitor RDS PostgreSQL query performance?","acceptedAnswer":{"@type":"Answer","text":"Enable Performance Insights on your RDS instance and publish the metrics to CloudWatch. Performance Insights provides per-query statistics including wait events, top SQL by load, and active session history. For deeper query monitoring, use the PostgreSQL pg_stat_statements extension with the OTel PostgreSQL receiver."}},{"@type":"Question","name":"What is the difference between CloudWatch metrics and Enhanced Monitoring for RDS?","acceptedAnswer":{"@type":"Answer","text":"CloudWatch metrics are collected at 1-minute intervals and cover instance-level stats like CPU, memory, and IOPS. Enhanced Monitoring provides OS-level metrics at up to 1-second granularity, including per-process CPU, memory usage, and file system details. Enhanced Monitoring is useful for diagnosing issues that 1-minute CloudWatch intervals miss."}},{"@type":"Question","name":"How do I set up alerts for RDS PostgreSQL?","acceptedAnswer":{"@type":"Answer","text":"Route RDS metrics through CloudWatch Metrics Stream to base14 Scout, then configure alerts in Scout on key thresholds: CPU above 80%, connections above 80% of max, replication lag exceeding your SLA, storage below 20% free, and read/write latency spikes."}}]}
 ---
 
 ## Overview
 
-This guide will walk you through collecting rich telemetry data from your RDS
-postgres instance using CloudWatch Metrics Stream. We recommend using CloudWatch
-Metrics Stream over Prometheus exporters as it provides faster metric delivery
-(2-3 minute latency) and is more efficient for AWS services.
+This guide covers monitoring AWS RDS PostgreSQL instances using
+OpenTelemetry and CloudWatch Metrics Stream. You'll collect
+infrastructure metrics from CloudWatch, database-specific metrics from
+the PostgreSQL receiver, and logs from CloudWatch Logs — all flowing
+into base14 Scout for unified visibility.
 
-## Collecting RDS Postgres Metrics
+## What You'll Monitor
 
-For collecting RDS metrics, we recommend using **CloudWatch Metrics Stream**
-instead of Prometheus exporters. CloudWatch Metrics Stream provides:
+RDS PostgreSQL monitoring combines two metric sources that together
+provide complete visibility:
 
-- **Faster delivery**: 2-3 minute latency vs 5+ minutes with polling
-- **Lower cost**: No need to run dedicated exporters
-- **Better scalability**: Native AWS service integration
-- **Automatic metric discovery**: No need to manually configure metric lists
+**CloudWatch Metrics Stream (infrastructure):**
 
-### Step 1: Set up CloudWatch Metrics Stream
+| Metric | What it tells you |
+| ------ | ----------------- |
+| `CPUUtilization` | Instance CPU usage (%) |
+| `FreeableMemory` | Available RAM (bytes) |
+| `FreeStorageSpace` | Remaining disk space (bytes) |
+| `ReadIOPS` / `WriteIOPS` | Disk read/write operations per second |
+| `ReadLatency` / `WriteLatency` | Average time per disk I/O operation |
+| `DatabaseConnections` | Active database connections |
+| `ReplicaLag` | Replication delay for read replicas (seconds) |
+| `DiskQueueDepth` | Number of I/O requests waiting |
+| `NetworkReceiveThroughput` / `NetworkTransmitThroughput` | Network bytes in/out |
+| `SwapUsage` | Swap space used (bytes) |
+| `BurstBalance` | Remaining I/O burst credits (gp2/gp3) |
+
+**OTel PostgreSQL receiver (database internals):**
+
+| Metric | What it tells you |
+| ------ | ----------------- |
+| `postgresql.backends` | Active connections per database |
+| `postgresql.commits` / `postgresql.rollbacks` | Transaction rates |
+| `postgresql.database.locks` | Active locks by type |
+| `postgresql.deadlocks` | Deadlock count |
+| `postgresql.sequential_scans` / `postgresql.index.scans` | Scan type distribution |
+| `postgresql.rows` | Rows affected by operations |
+| `postgresql.table.size` / `postgresql.index.size` | Storage per table/index |
+| `postgresql.table.vacuum.count` | Vacuum frequency |
+| `postgresql.blks_hit` / `postgresql.blks_read` | Buffer cache hit ratio |
+| `postgresql.replication.data_delay` | Replication byte lag |
+| `postgresql.tup_inserted` / `postgresql.tup_updated` / `postgresql.tup_deleted` | Tuple operations |
+
+## Prerequisites
+
+| Requirement | Minimum | Recommended |
+| ----------- | ------- | ----------- |
+| RDS PostgreSQL | 11 | 14+ |
+| OTel Collector Contrib | 0.90.0 | latest |
+| base14 Scout | Any | - |
+| AWS permissions | CloudWatch, Kinesis Firehose, S3 | - |
+
+Before starting:
+
+- RDS instance must be accessible from the host running the OTel
+  Collector (same VPC or VPC peering)
+- A monitoring user with `pg_monitor` role for the PostgreSQL receiver
+- CloudWatch Metrics Stream infrastructure set up (see Step 1)
+
+## Step 1: Set up CloudWatch Metrics Stream
 
 Follow our comprehensive
-[CloudWatch Metrics Stream guide](cloudwatch-metrics-stream.md) to set up the
-infrastructure.
+[CloudWatch Metrics Stream guide](cloudwatch-metrics-stream.md) to set
+up the streaming infrastructure (S3 bucket, Kinesis Firehose, Metrics
+Stream).
 
-### Step 2: Configure RDS metrics filtering
+When configuring the Metrics Stream:
 
-When configuring your CloudWatch Metrics Stream in **Step 3** of the setup
-guide, make sure to:
+1. Select **specific namespaces** instead of "All namespaces"
+2. Choose **AWS/RDS** from the namespace list
+3. This ensures you only collect RDS metrics, reducing costs and data
+   volume
 
-1. **Select specific namespaces** instead of "All namespaces"
-2. **Choose only AWS/RDS** from the namespace list
-3. This ensures you only collect RDS metrics, reducing costs and data volume
+## Step 2: Create a monitoring user on RDS
 
-### Step 3: Create OTEL Collector config for PostgreSQL metrics
+Connect to your RDS PostgreSQL instance and create a dedicated
+monitoring user:
 
-For database-specific metrics (like connection counts, query performance),
-create `postgres-metrics-collection-config.yaml`:
+```sql
+CREATE USER otel_monitor WITH PASSWORD '<your_password>';
+GRANT pg_monitor TO otel_monitor;
+```
 
-```yaml
+The `pg_monitor` role provides read-only access to all statistics
+views needed for monitoring. No write permissions required.
+
+For RDS instances, ensure the security group allows connections from
+the Collector host on port 5432.
+
+## Step 3: Configure the OTel Collector for PostgreSQL metrics
+
+Create `rds-postgres-config.yaml` with both the PostgreSQL receiver
+and the CloudWatch metrics pipeline:
+
+```yaml showLineNumbers title="rds-postgres-config.yaml"
 receivers:
   postgresql:
-    endpoint: ${env:POSTGRESQL_ENDPOINT}
+    endpoint: ${env:RDS_ENDPOINT}
     collection_interval: 10s
-    username: ${env:POSTGRESQL_USERNAME}
-    password: ${env:POSTGRESQL_PASSWORD}
-    databases: ["pgtestdb"]
+    username: ${env:RDS_MONITOR_USER}
+    password: ${env:RDS_MONITOR_PASSWORD}
+    databases: ["${env:RDS_DATABASE}"]
     tls:
       insecure_skip_verify: true
+
     metrics:
       postgresql.database.locks:
         enabled: true
@@ -76,84 +139,312 @@ receivers:
         enabled: true
       postgresql.sequential_scans:
         enabled: true
+      postgresql.index.scans:
+        enabled: true
+      postgresql.backends:
+        enabled: true
+      postgresql.commits:
+        enabled: true
+      postgresql.rollbacks:
+        enabled: true
+      postgresql.db_size:
+        enabled: true
+      postgresql.table.count:
+        enabled: true
+      postgresql.table.size:
+        enabled: true
+      postgresql.index.size:
+        enabled: true
+      postgresql.table.vacuum.count:
+        enabled: true
+      postgresql.rows:
+        enabled: true
+      postgresql.blks_hit:
+        enabled: true
+      postgresql.blks_read:
+        enabled: true
+      postgresql.tup_inserted:
+        enabled: true
+      postgresql.tup_updated:
+        enabled: true
+      postgresql.tup_deleted:
+        enabled: true
+      postgresql.tup_fetched:
+        enabled: true
+      postgresql.replication.data_delay:
+        enabled: true
+
+processors:
+  resource:
+    attributes:
+      - key: environment
+        value: ${env:ENVIRONMENT}
+        action: upsert
+      - key: service.name
+        value: ${env:SERVICE_NAME}
+        action: upsert
+      - key: cloud.provider
+        value: aws
+        action: upsert
+
+  batch:
+    timeout: 10s
+    send_batch_size: 1024
 
 exporters:
-  otlp:
-    endpoint: "<SCOUT_ENDPOINT>:4317"
+  otlphttp/b14:
+    endpoint: ${env:OTEL_EXPORTER_OTLP_ENDPOINT}
     tls:
-      insecure: true
+      insecure_skip_verify: true
 
 service:
   pipelines:
-    metrics/postgresql:
+    metrics:
       receivers: [postgresql]
-      exporters: [otlp]
+      processors: [batch, resource]
+      exporters: [otlphttp/b14]
 ```
 
-> **Note**: CloudWatch Metrics Stream will automatically deliver AWS/RDS metrics
-> (CPU, memory, disk I/O, etc.), while the PostgreSQL receiver collects
-> database-specific metrics.
+### Environment variables
 
-## Collecting RDS Logs
+```bash showLineNumbers title=".env"
+RDS_ENDPOINT=your-rds-instance.xxxxx.us-east-1.rds.amazonaws.com:5432
+RDS_MONITOR_USER=otel_monitor
+RDS_MONITOR_PASSWORD=your_password
+RDS_DATABASE=your_database
+ENVIRONMENT=production
+SERVICE_NAME=rds-postgres
+OTEL_EXPORTER_OTLP_ENDPOINT=https://<your-tenant>.base14.io
+```
 
-The log collection of RDS instance requires specifying the list of log group
-names. From the AWS CloudWatch console, please find the log group(s) relevant to
-the integration.
+> **Note**: CloudWatch Metrics Stream delivers the infrastructure
+> metrics (CPU, memory, IOPS) automatically. The PostgreSQL receiver
+> above collects the database-internal metrics. Together they give
+> you the full picture.
 
-### Create the Collector config file
+## Step 4: Collect RDS PostgreSQL logs
 
-```yaml
+RDS PostgreSQL publishes logs to CloudWatch Log Groups. Use the
+CloudWatch Logs receiver to forward them:
+
+```yaml showLineNumbers title="rds-postgres-logs-config.yaml"
 receivers:
-  awscloudwatch/rds_postgres_logs:
-    region: us-east-1
+  awscloudwatchlogs/rds_postgres:
+    region: ${env:AWS_REGION}
     logs:
       poll_interval: 1m
       groups:
         named:
-          # replace with your RDS log group name
-          /aws/rds/:
+          # Replace with your RDS log group name
+          /aws/rds/instance/${env:RDS_INSTANCE_ID}/postgresql:
 
 processors:
-  attributes/add_source_postgres:
+  attributes/add_source:
     actions:
       - key: source
         value: "rds_postgres"
         action: insert
+      - key: cloud.provider
+        value: "aws"
+        action: insert
+
   batch:
     send_batch_size: 10000
     send_batch_max_size: 11000
     timeout: 10s
 
 exporters:
-  otlp:
-    endpoint: "<SCOUT_ENDPOINT>:4317"
+  otlphttp/b14:
+    endpoint: ${env:OTEL_EXPORTER_OTLP_ENDPOINT}
     tls:
-      insecure: false
+      insecure_skip_verify: true
 
 service:
   pipelines:
-    logs/postgres:
-      receivers: [awscloudwatch/rds_postgres_logs]
-      processors: [attributes/add_source_postgres, batch]
-      exporters: [otlp]
+    logs/rds:
+      receivers: [awscloudwatchlogs/rds_postgres]
+      processors: [attributes/add_source, batch]
+      exporters: [otlphttp/b14]
 ```
 
-After deploying these changes, generate some traffic to your database and check
-the Postgres section in Scout to see your databases's metrics and logs.
+### Enable recommended RDS log types
 
----
+In the RDS console under **Configuration > Log exports**, enable:
+
+- **PostgreSQL log** — query errors, connection events, autovacuum
+- **Upgrade log** — major version upgrade details
+
+For query-level logging, set these RDS parameter group values:
+
+```text
+log_statement = 'ddl'
+log_min_duration_statement = 1000   # Log queries over 1 second
+log_connections = on
+log_disconnections = on
+```
+
+## Step 5: Enable Performance Insights (optional)
+
+RDS Performance Insights provides query-level monitoring that
+CloudWatch and the PostgreSQL receiver don't cover:
+
+- **Top SQL by load** — which queries consume the most CPU and I/O
+- **Wait events** — what queries are waiting on (CPU, I/O, lock,
+  network)
+- **Active session history** — per-second breakdown of database load
+
+To enable:
+
+1. In the RDS console, modify your instance
+2. Under **Performance Insights**, enable it
+3. Choose a retention period (free tier: 7 days, paid: up to 2 years)
+4. Optionally publish Performance Insights metrics to CloudWatch
+
+Performance Insights data flows through CloudWatch Metrics Stream
+alongside your other RDS metrics.
+
+For deeper query-level monitoring beyond Performance Insights, see
+[PostgreSQL Advanced Monitoring](../../component/postgres-advanced.md)
+which covers `pg_stat_statements` and per-table I/O.
+
+## Verify the setup
+
+Start the Collector and check for metrics within 60 seconds:
+
+```bash showLineNumbers
+# Test PostgreSQL connectivity from the Collector host
+psql -h ${RDS_ENDPOINT%:*} -p 5432 -U otel_monitor \
+  -d ${RDS_DATABASE} -c "SELECT version();"
+```
+
+```sql showLineNumbers
+-- Verify monitoring permissions
+SELECT * FROM pg_stat_database WHERE datname = 'your_database';
+SELECT * FROM pg_stat_user_tables LIMIT 5;
+```
+
+Check Scout for both CloudWatch metrics (prefixed `aws.rds.*`) and
+PostgreSQL metrics (prefixed `postgresql.*`).
+
+## Key alerts to configure
+
+Once metrics are flowing, set up alerts on these thresholds:
+
+| Metric | Warning | Critical | Why |
+| ------ | ------- | -------- | --- |
+| `CPUUtilization` | > 70% | > 85% | Sustained high CPU degrades query performance |
+| `DatabaseConnections` | > 80% of max | > 90% of max | Connection exhaustion causes application errors |
+| `FreeStorageSpace` | < 20% | < 10% | Running out of storage crashes the instance |
+| `ReplicaLag` | > 10s | > 60s | High lag means read replicas serve stale data |
+| `ReadLatency` / `WriteLatency` | > 10ms | > 20ms | I/O latency spikes indicate storage bottlenecks |
+| `DiskQueueDepth` | > 10 | > 20 | Deep queue means I/O is saturated |
+| `postgresql.deadlocks` | > 0 | > 5/min | Deadlocks indicate application-level locking issues |
+| Buffer hit ratio | < 95% | < 90% | Low hit ratio means too many disk reads |
+
+Buffer hit ratio: calculate as
+`blks_hit / (blks_hit + blks_read) * 100`.
+
+## Troubleshooting
+
+### PostgreSQL receiver shows no metrics
+
+**Cause**: Collector can't reach the RDS instance.
+
+**Fix**:
+
+1. Verify the RDS instance security group allows inbound on port 5432
+   from the Collector's IP or security group
+2. Confirm the RDS instance is not in a private subnet without a route
+   to the Collector
+3. Test connectivity: `psql -h <rds-endpoint> -U otel_monitor -d <db>`
+4. Check the monitoring user has `pg_monitor` role:
+   `SELECT rolname FROM pg_roles WHERE pg_has_role('otel_monitor', oid, 'member');`
+
+### CloudWatch metrics not appearing
+
+**Cause**: Metrics Stream not configured for the AWS/RDS namespace.
+
+**Fix**:
+
+1. In CloudWatch > Metrics > Streams, verify the stream is active
+2. Check that the namespace filter includes `AWS/RDS`
+3. Verify Kinesis Firehose delivery is succeeding (check the S3
+   error bucket)
+4. Allow 5-10 minutes for initial metrics to flow
+
+### Replication lag metrics showing zero
+
+**Cause**: No read replicas configured, or the instance is a replica
+(not the primary).
+
+**Fix**:
+
+1. `ReplicaLag` is only populated on read replica instances
+2. `postgresql.replication.data_delay` requires at least one replica
+   connected to the primary
+3. On the primary, check: `SELECT * FROM pg_stat_replication;`
+
+### High connection count but low CPU
+
+**Cause**: Idle connections consuming connection slots.
+
+**Fix**:
+
+1. Check for idle connections:
+   `SELECT count(*) FROM pg_stat_activity WHERE state = 'idle';`
+2. Consider connection pooling (PgBouncer or RDS Proxy)
+3. Set `idle_in_transaction_session_timeout` in the parameter group
+
+## FAQ
+
+**How do I monitor RDS PostgreSQL query performance?**
+
+Enable Performance Insights on the RDS instance for top SQL by load
+and wait event analysis. For per-query statistics, enable
+`pg_stat_statements` and use the
+[PostgreSQL Advanced guide](../../component/postgres-advanced.md).
+
+**What's the difference between CloudWatch and Enhanced Monitoring?**
+
+CloudWatch metrics are collected at 1-minute intervals and cover
+instance-level stats. Enhanced Monitoring provides OS-level metrics at
+up to 1-second granularity (per-process CPU, memory, file system).
+Enable Enhanced Monitoring when you need to diagnose issues that
+1-minute intervals miss.
+
+**Can I monitor multiple RDS instances with one Collector?**
+
+Yes. Add multiple PostgreSQL receiver blocks with distinct names:
+
+```yaml
+receivers:
+  postgresql/primary:
+    endpoint: primary.xxxxx.rds.amazonaws.com:5432
+  postgresql/replica:
+    endpoint: replica.xxxxx.rds.amazonaws.com:5432
+```
+
+Then include both in the pipeline:
+`receivers: [postgresql/primary, postgresql/replica]`.
+
+**How do I filter which CloudWatch metrics are streamed?**
 
-With this setup, your RDS instance becomes fully observable through Scout.
-You'll gain real-time visibility into performance metrics and logs without any
-changes to your application code.
+When configuring the Metrics Stream, select specific namespaces and
+choose only `AWS/RDS` instead of all namespaces. This reduces costs
+and data volume.
 
 ## Related Guides
 
-- [CloudWatch Metrics Stream Setup](./cloudwatch-metrics-stream.md) - Configure
-  AWS metrics streaming
-- [ELB Monitoring](./elb.md) - Monitor AWS Application Load Balancers
-- [ElastiCache Monitoring](./elasticache.md) - Monitor Redis and Memcached
-- [AWS ECS/Fargate Setup](../../collector-setup/ecs-setup.md) - Deploy Scout
-  Collector on AWS ECS
-- [Monitor PostgreSQL Component](../../component/postgres.md) - Direct
-  PostgreSQL monitoring
+- [CloudWatch Metrics Stream Setup](./cloudwatch-metrics-stream.md) —
+  Configure AWS metrics streaming
+- [PostgreSQL Basic Monitoring](../../component/postgres.md) — Direct
+  PostgreSQL monitoring with the OTel receiver
+- [PostgreSQL Advanced Monitoring](../../component/postgres-advanced.md)
+  — Query statistics, per-table I/O, replication details
+- [pgX Deep PostgreSQL Analysis](https://base14.io/scout/pgx) —
+  Correlate query performance with application traces
+- [ELB Monitoring](./elb.md) — Monitor AWS Application Load Balancers
+- [ElastiCache Monitoring](./elasticache.md) — Monitor Redis and
+  Memcached on AWS
+- [AWS ECS/Fargate Setup](../../collector-setup/ecs-setup.md) — Deploy
+  the Collector on AWS ECS

From db38201296472e516d6ae1dae9fce4a30bb330bf Mon Sep 17 00:00:00 2001
From: nimishgj <nimishgj444@gmail.com>
Date: Tue, 31 Mar 2026 11:54:08 +0530
Subject: [PATCH 2/2] remove rds performance insights step

---
 docs/instrument/infra/aws/rds.md | 47 ++++++--------------------------
 1 file changed, 9 insertions(+), 38 deletions(-)

diff --git a/docs/instrument/infra/aws/rds.md b/docs/instrument/infra/aws/rds.md
index f097efd..5491f3e 100644
--- a/docs/instrument/infra/aws/rds.md
+++ b/docs/instrument/infra/aws/rds.md
@@ -5,8 +5,8 @@ title: AWS RDS PostgreSQL Monitoring with OpenTelemetry - Metrics, Logs & Alerts
 sidebar_label: AWS RDS
 description:
   Monitor AWS RDS PostgreSQL with OpenTelemetry and CloudWatch Metrics
-  Stream. Collect connections, replication lag, IOPS, query performance,
-  and Performance Insights data in base14 Scout.
+  Stream. Collect connections, replication lag, IOPS, and query
+  performance data in base14 Scout.
 keywords:
   - aws rds monitoring
   - rds postgresql monitoring
@@ -15,14 +15,13 @@ keywords:
   - rds postgres observability
   - cloudwatch metrics stream
   - aws database monitoring
-  - rds performance insights
   - aws rds postgresql observability
   - rds postgres dashboard
 head:
   - - script
     - type: application/ld+json
     - |
-      {"@context":"https://schema.org","@type":"FAQPage","mainEntity":[{"@type":"Question","name":"How do I monitor AWS RDS PostgreSQL with OpenTelemetry?","acceptedAnswer":{"@type":"Answer","text":"Use CloudWatch Metrics Stream for infrastructure metrics (CPU, memory, disk I/O, connections) and the OpenTelemetry PostgreSQL receiver for database-specific metrics like locks, deadlocks, and sequential scans. Both feed into a single observability platform like base14 Scout."}},{"@type":"Question","name":"What RDS metrics does CloudWatch Metrics Stream collect?","acceptedAnswer":{"@type":"Answer","text":"CloudWatch Metrics Stream delivers AWS/RDS metrics including CPUUtilization, FreeableMemory, ReadIOPS, WriteIOPS, ReadLatency, WriteLatency, DatabaseConnections, ReplicaLag, FreeStorageSpace, and DiskQueueDepth with 2-3 minute latency."}},{"@type":"Question","name":"Do I need both CloudWatch Metrics Stream and the PostgreSQL receiver?","acceptedAnswer":{"@type":"Answer","text":"Yes. CloudWatch provides infrastructure-level RDS metrics (CPU, memory, IOPS) while the PostgreSQL receiver collects database-specific metrics like locks, deadlocks, sequential scans, and tuple operations. Using both gives complete visibility."}},{"@type":"Question","name":"How do I collect RDS PostgreSQL logs with OpenTelemetry?","acceptedAnswer":{"@type":"Answer","text":"Use the AWS CloudWatch Logs receiver in the OpenTelemetry Collector, specifying your RDS log group names. The collector polls CloudWatch Logs and forwards them to your observability backend."}},{"@type":"Question","name":"How do I monitor RDS PostgreSQL query performance?","acceptedAnswer":{"@type":"Answer","text":"Enable Performance Insights on your RDS instance and publish the metrics to CloudWatch. Performance Insights provides per-query statistics including wait events, top SQL by load, and active session history. For deeper query monitoring, use the PostgreSQL pg_stat_statements extension with the OTel PostgreSQL receiver."}},{"@type":"Question","name":"What is the difference between CloudWatch metrics and Enhanced Monitoring for RDS?","acceptedAnswer":{"@type":"Answer","text":"CloudWatch metrics are collected at 1-minute intervals and cover instance-level stats like CPU, memory, and IOPS. Enhanced Monitoring provides OS-level metrics at up to 1-second granularity, including per-process CPU, memory usage, and file system details. Enhanced Monitoring is useful for diagnosing issues that 1-minute CloudWatch intervals miss."}},{"@type":"Question","name":"How do I set up alerts for RDS PostgreSQL?","acceptedAnswer":{"@type":"Answer","text":"Route RDS metrics through CloudWatch Metrics Stream to base14 Scout, then configure alerts in Scout on key thresholds: CPU above 80%, connections above 80% of max, replication lag exceeding your SLA, storage below 20% free, and read/write latency spikes."}}]}
+      {"@context":"https://schema.org","@type":"FAQPage","mainEntity":[{"@type":"Question","name":"How do I monitor AWS RDS PostgreSQL with OpenTelemetry?","acceptedAnswer":{"@type":"Answer","text":"Use CloudWatch Metrics Stream for infrastructure metrics (CPU, memory, disk I/O, connections) and the OpenTelemetry PostgreSQL receiver for database-specific metrics like locks, deadlocks, and sequential scans. Both feed into a single observability platform like base14 Scout."}},{"@type":"Question","name":"What RDS metrics does CloudWatch Metrics Stream collect?","acceptedAnswer":{"@type":"Answer","text":"CloudWatch Metrics Stream delivers AWS/RDS metrics including CPUUtilization, FreeableMemory, ReadIOPS, WriteIOPS, ReadLatency, WriteLatency, DatabaseConnections, ReplicaLag, FreeStorageSpace, and DiskQueueDepth with 2-3 minute latency."}},{"@type":"Question","name":"Do I need both CloudWatch Metrics Stream and the PostgreSQL receiver?","acceptedAnswer":{"@type":"Answer","text":"Yes. CloudWatch provides infrastructure-level RDS metrics (CPU, memory, IOPS) while the PostgreSQL receiver collects database-specific metrics like locks, deadlocks, sequential scans, and tuple operations. Using both gives complete visibility."}},{"@type":"Question","name":"How do I collect RDS PostgreSQL logs with OpenTelemetry?","acceptedAnswer":{"@type":"Answer","text":"Use the AWS CloudWatch Logs receiver in the OpenTelemetry Collector, specifying your RDS log group names. The collector polls CloudWatch Logs and forwards them to your observability backend."}},{"@type":"Question","name":"How do I monitor RDS PostgreSQL query performance?","acceptedAnswer":{"@type":"Answer","text":"Enable the PostgreSQL pg_stat_statements extension and use the OTel PostgreSQL receiver to collect per-query statistics including execution counts, total time, and rows returned."}},{"@type":"Question","name":"What is the difference between CloudWatch metrics and Enhanced Monitoring for RDS?","acceptedAnswer":{"@type":"Answer","text":"CloudWatch metrics are collected at 1-minute intervals and cover instance-level stats like CPU, memory, and IOPS. Enhanced Monitoring provides OS-level metrics at up to 1-second granularity, including per-process CPU, memory usage, and file system details. Enhanced Monitoring is useful for diagnosing issues that 1-minute CloudWatch intervals miss."}},{"@type":"Question","name":"How do I set up alerts for RDS PostgreSQL?","acceptedAnswer":{"@type":"Answer","text":"Route RDS metrics through CloudWatch Metrics Stream to base14 Scout, then configure alerts in Scout on key thresholds: CPU above 80%, connections above 80% of max, replication lag exceeding your SLA, storage below 20% free, and read/write latency spikes."}}]}
 ---
 
 ## Overview
@@ -93,12 +92,9 @@ Follow our comprehensive
 up the streaming infrastructure (S3 bucket, Kinesis Firehose, Metrics
 Stream).
 
-When configuring the Metrics Stream:
-
-1. Select **specific namespaces** instead of "All namespaces"
-2. Choose **AWS/RDS** from the namespace list
-3. This ensures you only collect RDS metrics, reducing costs and data
-   volume
+When configuring the Metrics Stream, select the **AWS/RDS** namespace
+instead of "All namespaces" to only collect RDS metrics and reduce
+costs.
 
 ## Step 2: Create a monitoring user on RDS
 
@@ -283,30 +279,6 @@ log_connections = on
 log_disconnections = on
 ```
 
-## Step 5: Enable Performance Insights (optional)
-
-RDS Performance Insights provides query-level monitoring that
-CloudWatch and the PostgreSQL receiver don't cover:
-
-- **Top SQL by load** — which queries consume the most CPU and I/O
-- **Wait events** — what queries are waiting on (CPU, I/O, lock,
-  network)
-- **Active session history** — per-second breakdown of database load
-
-To enable:
-
-1. In the RDS console, modify your instance
-2. Under **Performance Insights**, enable it
-3. Choose a retention period (free tier: 7 days, paid: up to 2 years)
-4. Optionally publish Performance Insights metrics to CloudWatch
-
-Performance Insights data flows through CloudWatch Metrics Stream
-alongside your other RDS metrics.
-
-For deeper query-level monitoring beyond Performance Insights, see
-[PostgreSQL Advanced Monitoring](../../component/postgres-advanced.md)
-which covers `pg_stat_statements` and per-table I/O.
-
 ## Verify the setup
 
 Start the Collector and check for metrics within 60 seconds:
@@ -399,10 +371,9 @@ Buffer hit ratio: calculate as
 
 **How do I monitor RDS PostgreSQL query performance?**
 
-Enable Performance Insights on the RDS instance for top SQL by load
-and wait event analysis. For per-query statistics, enable
-`pg_stat_statements` and use the
-[PostgreSQL Advanced guide](../../component/postgres-advanced.md).
+Enable `pg_stat_statements` for per-query statistics and use the
+[PostgreSQL Advanced guide](../../component/postgres-advanced.md)
+for detailed query-level monitoring.
 
 **What's the difference between CloudWatch and Enhanced Monitoring?**