diff --git a/.circleci/config.yml b/.circleci/config.yml index 508120e67..965b27394 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -1,103 +1,33 @@ --- -version: 2.1 +# Prometheus has switched to GitHub action. +# Circle CI is not disabled repository-wise so that previous pull requests +# continue working. +# This file does not generate any CircleCI workflow. -orbs: - prometheus: prometheus/prometheus@0.17.1 +version: 2.1 executors: - # This must match .promu.yml. golang: docker: - - image: cimg/go:1.20 + - image: busybox jobs: - test: + noopjob: executor: golang steps: - - prometheus/setup_environment - - run: make - - prometheus/store_artifact: - file: postgres_exporter - - integration: - docker: - - image: cimg/go:1.20 - - image: << parameters.postgres_image >> - environment: - POSTGRES_DB: circle_test - POSTGRES_USER: postgres - POSTGRES_PASSWORD: test - - parameters: - postgres_image: - type: string - - environment: - DATA_SOURCE_NAME: 'postgresql://postgres:test@localhost:5432/circle_test?sslmode=disable' - GOOPTS: '-v -tags integration' - - steps: - - checkout - - setup_remote_docker - - run: docker version - - run: make build - - run: make test + - run: + command: "true" workflows: version: 2 - postgres_exporter: + prometheus: jobs: - - test: - filters: - tags: - only: /.*/ - - integration: - matrix: - parameters: - postgres_image: - - circleci/postgres:10 - - circleci/postgres:11 - - circleci/postgres:12 - - circleci/postgres:13 - - cimg/postgres:14.1 - - cimg/postgres:15.1 - - prometheus/build: - name: build - parallelism: 3 - promu_opts: "-p linux/amd64 -p windows/amd64 -p linux/arm64 -p darwin/amd64 -p darwin/arm64 -p linux/386" - filters: - tags: - ignore: /^v.*/ - branches: - ignore: /^(main|master|release-.*|.*build-all.*)$/ - - prometheus/build: - name: build_all - parallelism: 12 - filters: - branches: - only: /^(main|master|release-.*|.*build-all.*)$/ - tags: - only: /^v.*/ - - prometheus/publish_master: - context: org-context - docker_hub_organization: prometheuscommunity - quay_io_organization: prometheuscommunity - requires: - - test - - build_all - filters: - branches: - only: master - - prometheus/publish_release: - context: org-context - docker_hub_organization: prometheuscommunity - quay_io_organization: prometheuscommunity - requires: - - test - - build_all - filters: - tags: - only: /^v.*/ - branches: - ignore: /.*/ + - noopjob + triggers: + - schedule: + cron: "0 0 30 2 *" + filters: + branches: + only: + - main diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 000000000..9d759a627 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,147 @@ +--- +name: CI +on: + pull_request: + push: + +jobs: + test_go: + name: Go tests + runs-on: ubuntu-latest + container: + # Whenever the Go version is updated here, .promu.yml + # should also be updated. + image: quay.io/prometheus/golang-builder:1.25-base + steps: + - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + - uses: prometheus/promci@443c7fc2397e946bc9f5029e313a9c3441b9b86d # v0.4.7 + - uses: ./.github/promci/actions/setup_environment + - run: make test + - run: make GO_ONLY=1 SKIP_GOLANGCI_LINT=1 + integration_tests: + name: Integration tests + runs-on: ubuntu-latest + strategy: + matrix: + # Define Postgres versions to test against. + postgres_version: + - 13.22 + - 14.19 + - 15.14 + - 16.10 + - 17.6 + - 18.1 + services: + postgres: + image: postgres:${{ matrix.postgres_version }} + env: + POSTGRES_DB: circle_test + POSTGRES_USER: postgres + POSTGRES_PASSWORD: test + # options: >- + # --health-cmd="pg_isready -U postgres -d circle_test" + # --health-interval=10s + # --health-timeout=5s + # --health-retries=5 + container: + # Whenever the Go version is updated here, .promu.yml + # should also be updated. + image: quay.io/prometheus/golang-builder:1.25-base + env: + DATA_SOURCE_NAME: 'postgresql://postgres:test@postgres:5432/circle_test?sslmode=disable' + GOOPTS: '-v -tags integration' + steps: + - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + - uses: prometheus/promci@443c7fc2397e946bc9f5029e313a9c3441b9b86d # v0.4.7 + - uses: ./.github/promci/actions/setup_environment + - run: make build + - run: make test + + + build: + name: Build Prometheus for common architectures + runs-on: ubuntu-latest + if: | + !(github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v')) + && + !(github.event_name == 'pull_request' && startsWith(github.event.pull_request.base.ref, 'release-')) + && + !(github.event_name == 'push' && github.event.ref == 'refs/heads/main') + && + !(github.event_name == 'push' && github.event.ref == 'refs/heads/master') + strategy: + matrix: + thread: [ 0, 1, 2 ] + steps: + - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + - uses: prometheus/promci@443c7fc2397e946bc9f5029e313a9c3441b9b86d # v0.4.7 + - uses: ./.github/promci/actions/build + with: + promu_opts: "-p linux/amd64 -p windows/amd64 -p linux/arm64 -p darwin/amd64 -p darwin/arm64 -p linux/386" + parallelism: 3 + thread: ${{ matrix.thread }} + + build_all: + name: Build Prometheus for all architectures + runs-on: ubuntu-latest + if: | + (github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v')) + || + (github.event_name == 'pull_request' && startsWith(github.event.pull_request.base.ref, 'release-')) + || + (github.event_name == 'push' && github.event.ref == 'refs/heads/main') + || + (github.event_name == 'push' && github.event.ref == 'refs/heads/master') + strategy: + matrix: + thread: [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 ] + + # Whenever the Go version is updated here, .promu.yml + # should also be updated. + steps: + - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + - uses: prometheus/promci@443c7fc2397e946bc9f5029e313a9c3441b9b86d # v0.4.7 + - uses: ./.github/promci/actions/build + with: + parallelism: 12 + thread: ${{ matrix.thread }} + + publish_main: + # https://github.com/prometheus/promci/blob/52c7012f5f0070d7281b8db4a119e21341d43c91/actions/publish_main/action.yml + name: Publish main branch artifacts + runs-on: ubuntu-latest + needs: [test_go, build_all] + if: | + (github.event_name == 'push' && github.event.ref == 'refs/heads/main') + || + (github.event_name == 'push' && github.event.ref == 'refs/heads/master') + steps: + - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + - uses: prometheus/promci@443c7fc2397e946bc9f5029e313a9c3441b9b86d # v0.4.7 + - uses: ./.github/promci/actions/publish_main + with: + docker_hub_organization: prometheuscommunity + docker_hub_login: ${{ secrets.docker_hub_login }} + docker_hub_password: ${{ secrets.docker_hub_password }} + quay_io_organization: prometheuscommunity + quay_io_login: ${{ secrets.quay_io_login }} + quay_io_password: ${{ secrets.quay_io_password }} + + publish_release: + name: Publish release artefacts + runs-on: ubuntu-latest + needs: [test_go, build_all] + if: | + (github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v')) + steps: + - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + - uses: prometheus/promci@443c7fc2397e946bc9f5029e313a9c3441b9b86d # v0.4.7 + - uses: ./.github/promci/actions/publish_release + with: + docker_hub_organization: prometheuscommunity + docker_hub_login: ${{ secrets.docker_hub_login }} + docker_hub_password: ${{ secrets.docker_hub_password }} + quay_io_organization: prometheuscommunity + quay_io_login: ${{ secrets.quay_io_login }} + quay_io_password: ${{ secrets.quay_io_password }} + github_token: ${{ secrets.PROMBOT_GITHUB_TOKEN }} diff --git a/.github/workflows/container_description.yml b/.github/workflows/container_description.yml new file mode 100644 index 000000000..d7b879f9c --- /dev/null +++ b/.github/workflows/container_description.yml @@ -0,0 +1,61 @@ +--- +name: Push README to Docker Hub +on: + push: + paths: + - "README.md" + - "README-containers.md" + - ".github/workflows/container_description.yml" + branches: [ main, master ] + +permissions: + contents: read + +jobs: + PushDockerHubReadme: + runs-on: ubuntu-latest + name: Push README to Docker Hub + if: github.repository_owner == 'prometheus' || github.repository_owner == 'prometheus-community' # Don't run this workflow on forks. + steps: + - name: git checkout + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + persist-credentials: false + - name: Set docker hub repo name + run: echo "DOCKER_REPO_NAME=$(make docker-repo-name)" >> $GITHUB_ENV + - name: Push README to Dockerhub + uses: christian-korneck/update-container-description-action@d36005551adeaba9698d8d67a296bd16fa91f8e8 # v1 + env: + DOCKER_USER: ${{ secrets.DOCKER_HUB_LOGIN }} + DOCKER_PASS: ${{ secrets.DOCKER_HUB_PASSWORD }} + with: + destination_container_repo: ${{ env.DOCKER_REPO_NAME }} + provider: dockerhub + short_description: ${{ env.DOCKER_REPO_NAME }} + # Empty string results in README-containers.md being pushed if it + # exists. Otherwise, README.md is pushed. + readme_file: '' + + PushQuayIoReadme: + runs-on: ubuntu-latest + name: Push README to quay.io + if: github.repository_owner == 'prometheus' || github.repository_owner == 'prometheus-community' # Don't run this workflow on forks. + steps: + - name: git checkout + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + persist-credentials: false + - name: Set quay.io org name + run: echo "DOCKER_REPO=$(echo quay.io/${GITHUB_REPOSITORY_OWNER} | tr -d '-')" >> $GITHUB_ENV + - name: Set quay.io repo name + run: echo "DOCKER_REPO_NAME=$(make docker-repo-name)" >> $GITHUB_ENV + - name: Push README to quay.io + uses: christian-korneck/update-container-description-action@d36005551adeaba9698d8d67a296bd16fa91f8e8 # v1 + env: + DOCKER_APIKEY: ${{ secrets.QUAY_IO_API_TOKEN }} + with: + destination_container_repo: ${{ env.DOCKER_REPO_NAME }} + provider: quay + # Empty string results in README-containers.md being pushed if it + # exists. Otherwise, README.md is pushed. + readme_file: '' diff --git a/.github/workflows/golangci-lint.yml b/.github/workflows/golangci-lint.yml index 8ace97bde..dc8ffd02d 100644 --- a/.github/workflows/golangci-lint.yml +++ b/.github/workflows/golangci-lint.yml @@ -3,6 +3,7 @@ name: golangci-lint on: push: + branches: [main, master, 'release-*'] paths: - "go.sum" - "go.mod" @@ -10,23 +11,36 @@ on: - "scripts/errcheck_excludes.txt" - ".github/workflows/golangci-lint.yml" - ".golangci.yml" + tags: ['v*'] pull_request: +permissions: # added using https://github.com/step-security/secure-repo + contents: read + jobs: golangci: + permissions: + contents: read # for actions/checkout to fetch code + pull-requests: read # for golangci/golangci-lint-action to fetch pull requests name: lint runs-on: ubuntu-latest steps: - name: Checkout repository - uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac # v4.0.0 - - name: install Go - uses: actions/setup-go@6edd4406fa81c3da01a34fa6f6343087c207a568 # v3.5.0 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + persist-credentials: false + - name: Install Go + uses: actions/setup-go@7a3fe6cf4cb3a834922a1244abfce67bcef6a0c5 # v6.2.0 with: - go-version: 1.20.x + go-version: 1.26.x - name: Install snmp_exporter/generator dependencies run: sudo apt-get update && sudo apt-get -y install libsnmp-dev if: github.repository == 'prometheus/snmp_exporter' + - name: Get golangci-lint version + id: golangci-lint-version + run: echo "version=$(make print-golangci-lint-version)" >> $GITHUB_OUTPUT - name: Lint - uses: golangci/golangci-lint-action@3a919529898de77ec3da873e3063ca4b10e7f5cc # v3.7.0 + uses: golangci/golangci-lint-action@1e7e51e771db61008b38414a730f564565cf7c20 # v9.2.0 with: - version: v1.54.2 + args: --verbose + version: ${{ steps.golangci-lint-version.outputs.version }} diff --git a/.golangci.yml b/.golangci.yml index 96487c898..4b58b08b6 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -1,23 +1,36 @@ ---- +version: "2" linters: enable: - - misspell - - revive - -issues: - exclude-rules: - - path: _test.go - linters: - - errcheck - -linters-settings: - errcheck: - exclude-functions: - # Never check for logger errors. - - (github.com/go-kit/log.Logger).Log - revive: + - misspell + - revive + settings: + errcheck: + exclude-functions: + - (github.com/go-kit/log.Logger).Log + revive: + rules: + - name: unused-parameter + severity: warning + disabled: true + exclusions: + generated: lax + presets: + - comments + - common-false-positives + - legacy + - std-error-handling rules: - # https://github.com/mgechev/revive/blob/master/RULES_DESCRIPTIONS.md#unused-parameter - - name: unused-parameter - severity: warning - disabled: true + - linters: + - errcheck + path: _test.go + paths: + - third_party$ + - builtin$ + - examples$ +formatters: + exclusions: + generated: lax + paths: + - third_party$ + - builtin$ + - examples$ diff --git a/.promu.yml b/.promu.yml index 55c73712e..c9bb51e78 100644 --- a/.promu.yml +++ b/.promu.yml @@ -1,13 +1,12 @@ go: # This must match .circle/config.yml. - version: 1.20 + version: 1.25 repository: path: github.com/prometheus-community/postgres_exporter build: binaries: - name: postgres_exporter path: ./cmd/postgres_exporter - flags: -a -tags 'netgo static_build' ldflags: | -X github.com/prometheus/common/version.Version={{.Version}} -X github.com/prometheus/common/version.Revision={{.Revision}} diff --git a/.yamllint b/.yamllint index 955a5a627..b329f464f 100644 --- a/.yamllint +++ b/.yamllint @@ -1,5 +1,8 @@ --- extends: default +ignore: | + **/node_modules + web/api/v1/testdata/openapi_*_golden.yaml rules: braces: diff --git a/CHANGELOG.md b/CHANGELOG.md index f55bde533..8cc3b9c47 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,95 @@ +## main / (unreleased) + +* [CHANGE] ... +* [FEATURE] ... +* [ENHANCEMENT] ... +* [BUGFIX] ... + +## 0.19.1 / 2026-02-25 + +* [CHANGE] Filter and warn about duplicates in pg_stat_statements (#998) by @Nudin in https://github.com/prometheus-community/postgres_exporter/pull/1259 +* [ENHANCEMENT] perf/reliability: Optimize pg_stat_statements queries that uses too many temp files by @eugercek in https://github.com/prometheus-community/postgres_exporter/pull/1267 +* [BUGFIX] fix: ignore setting `google_dataplex.max_messages` by @cristiangreco in https://github.com/prometheus-community/postgres_exporter/pull/1261 +* [BUGFIX] wal: Fix collector NULL SUM(size) by @dannotripp in https://github.com/prometheus-community/postgres_exporter/pull/1265 + +## 0.19.0 / 2026-02-03 + +* [CHANGE] Reorganize code to remove the main package by @cristiangreco in https://github.com/prometheus-community/postgres_exporter/pull/1238 +* [FEATURE] Allow setting `limit` in `pg_stat_statements` collector by @cristiangreco in https://github.com/prometheus-community/postgres_exporter/pull/1205 +* [FEATURE] Allow excluding databases or users names in `stat_statements` collector by @cristiangreco in https://github.com/prometheus-community/postgres_exporter/pull/1232 +* [ENHANCEMENT] Enable pprof endpoints for profiling by @SuperQ in https://github.com/prometheus-community/postgres_exporter/pull/1212 +* [ENHANCEMENT] Document `stat_checkpointer` collector option by @fabiorueda in https://github.com/prometheus-community/postgres_exporter/pull/1226 +* [ENHANCEMENT] Ensure collection of stats succeed in a maximum duration to avoid exhausting PG connections by @pierresouchay in https://github.com/prometheus-community/postgres_exporter/pull/1229 +* [BUGFIX] Fix NULL handling on `long_running_transactions` collector by @sysadmind in https://github.com/prometheus-community/postgres_exporter/pull/1230 +* [BUGFIX] Do not crash on bad `pg_settings` value by @sysadmind in https://github.com/prometheus-community/postgres_exporter/pull/1252 + +## 0.18.1 / 2025-09-29 + +* [BUGFIX] Fix swapped `flushedLsn` and `receiveStartTli` for `wal_receiver` collector by @spantaleev in https://github.com/prometheus-community/postgres_exporter/pull/1198 +* [BUGFIX] Fix superfluous semicolon breaking query in `process_idle` by @sysadmind in https://github.com/prometheus-community/postgres_exporter/pull/1197 and https://github.com/prometheus-community/postgres_exporter/pull/1201 + +## 0.18.0 / 2025-09-25 + +* [FEATURE] Add `stat_progress_vacuum` collector by @ianbibby in https://github.com/prometheus-community/postgres_exporter/pull/1141 +* [FEATURE] Add `buffercache_summary` collector by @sfc-gh-pnuttall in https://github.com/prometheus-community/postgres_exporter/pull/1165 +* [FEATURE] `stat_statements`: export query itself together with `queryId` by @Delorien84 in https://github.com/prometheus-community/postgres_exporter/pull/940 +* [ENHANCEMENT] Improve error handling for `Server.Scrape` by @BoweFlex in https://github.com/prometheus-community/postgres_exporter/pull/1158 +* [ENHANCEMENT] `stat_user_tables`: record table-only size bytes in addition to the total size bytes by @Sticksman Sticksman in https://github.com/prometheus-community/postgres_exporter/pull/1149 +* [ENHANCEMENT] (chore) Fix a typo and use `slices.Contains` by @cristiangreco in https://github.com/prometheus-community/postgres_exporter/pull/1176 +* [ENHANCEMENT] Update mixin to latest changes from `grafana/postgres_exporter` by @cristiangreco, @gaantunes, @v-zhuravlev and @mshahzeb in https://github.com/prometheus-community/postgres_exporter/pull/1179 +* [ENHANCEMENT] Exclude the metrics fetching session's data from pg_stat_activity by @kmoppel in https://github.com/prometheus-community/postgres_exporter/pull/1185 +* [BUGFIX] Ensure database connections are always closed by @cristiangreco and @dehaansa in https://github.com/prometheus-community/postgres_exporter/pull/1177 +* [BUGFIX] Fix superfluous semicolon breaking query in `process_idle` by @sysadmind in https://github.com/prometheus-community/postgres_exporter/pull/1197 + +## 0.17.1 / 2025-02-26 + +* [BUGFIX] Fix: Handle incoming labels with invalid UTF-8 #1131 + +## 0.17.0 / 2025-02-16 + +## What's Changed +* [ENHANCEMENT] Add Postgres 17 for CI test by @khiemdoan in https://github.com/prometheus-community/postgres_exporter/pull/1105 +* [ENHANCEMENT] Add wait/backend to pg_stat_activity by @fgalind1 in https://github.com/prometheus-community/postgres_exporter/pull/1106 +* [ENHANCEMENT] Export last replay age in replication collector by @bitfehler in https://github.com/prometheus-community/postgres_exporter/pull/1085 +* [BUGFIX] Fix pg_long_running_transactions time by @jyothikirant-sayukth in https://github.com/prometheus-community/postgres_exporter/pull/1092 +* [BUGFIX] Fix to replace dashes with underscore in the metric names by @aagarwalla-fx in https://github.com/prometheus-community/postgres_exporter/pull/1103 +* [BIGFIX] Checkpoint related columns in PG 17 have been moved from pg_stat_bgwriter to pg_stat_checkpointer by @n-rodriguez in https://github.com/prometheus-community/postgres_exporter/pull/1072 +* [BUGFIX] Fix pg_stat_statements for PG17 by @NevermindZ4 in https://github.com/prometheus-community/postgres_exporter/pull/1114 +* [BUGFIX] Handle pg_replication_slots on pg<13 by @michael-todorovic in https://github.com/prometheus-community/postgres_exporter/pull/1098 +* [BUGFIX] Fix missing dsn sanitization for logging by @sysadmind in https://github.com/prometheus-community/postgres_exporter/pull/1104 + +## New Contributors +* @jyothikirant-sayukth made their first contribution in https://github.com/prometheus-community/postgres_exporter/pull/1092 +* @aagarwalla-fx made their first contribution in https://github.com/prometheus-community/postgres_exporter/pull/1103 +* @NevermindZ4 made their first contribution in https://github.com/prometheus-community/postgres_exporter/pull/1114 +* @michael-todorovic made their first contribution in https://github.com/prometheus-community/postgres_exporter/pull/1098 +* @fgalind1 made their first contribution in https://github.com/prometheus-community/postgres_exporter/pull/1106 + +**Full Changelog**: https://github.com/prometheus-community/postgres_exporter/compare/v0.16.0...v0.17.0 + +## 0.16.0 / 2024-11-10 + +BREAKING CHANGES: + +The logging system has been replaced with log/slog from the stdlib. This change is being made across the prometheus ecosystem. The logging output has changed, but the messages and levels remain the same. The `ts` label for the timestamp has bewen replaced with `time`, the accuracy is less, and the timezone is not forced to UTC. The `caller` field has been replaced by the `source` field, which now includes the full path to the source file. The `level` field now exposes the log level in capital letters. + +* [CHANGE] Replace logging system #1073 +* [ENHANCEMENT] Add save_wal_size and wal_status to replication_slot collector #1027 +* [ENHANCEMENT] Add roles collector and connection limit metrics to database collector #997 +* [ENHANCEMENT] Excluded databases log messgae is now info level #1003 +* [ENHANCEMENT] Add active_time to stat_database collector #961 +* [ENHANCEMENT] Add slot_type label to replication_slot collector #960 +* [BUGFIX] Fix walreceiver collectore when no repmgr #1086 +* [BUGFIX] Remove logging errors on replicas #1048 +* [BUGFIX] Fix active_time query on postgres>=14 #1045 + +## 0.15.0 / 2023-10-27 + +* [ENHANCEMENT] Add 1kB and 2kB units #915 +* [BUGFIX] Add error log when probe collector creation fails #918 +* [BUGFIX] Fix test build failures on 32-bit arch #919 +* [BUGFIX] Adjust collector to use separate connection per scrape #936 + ## 0.14.0 / 2023-09-11 * [CHANGE] Add `state` label to pg_process_idle_seconds #862 diff --git a/Makefile.common b/Makefile.common index 062a28185..cce3ef1d1 100644 --- a/Makefile.common +++ b/Makefile.common @@ -1,4 +1,4 @@ -# Copyright 2018 The Prometheus Authors +# Copyright The Prometheus Authors # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -49,23 +49,24 @@ endif GOTEST := $(GO) test GOTEST_DIR := ifneq ($(CIRCLE_JOB),) -ifneq ($(shell command -v gotestsum > /dev/null),) +ifneq ($(shell command -v gotestsum 2> /dev/null),) GOTEST_DIR := test-results GOTEST := gotestsum --junitfile $(GOTEST_DIR)/unit-tests.xml -- endif endif -PROMU_VERSION ?= 0.15.0 +PROMU_VERSION ?= 0.18.0 PROMU_URL := https://github.com/prometheus/promu/releases/download/v$(PROMU_VERSION)/promu-$(PROMU_VERSION).$(GO_BUILD_PLATFORM).tar.gz SKIP_GOLANGCI_LINT := GOLANGCI_LINT := GOLANGCI_LINT_OPTS ?= -GOLANGCI_LINT_VERSION ?= v1.54.2 -# golangci-lint only supports linux, darwin and windows platforms on i386/amd64. +GOLANGCI_LINT_VERSION ?= v2.10.1 +GOLANGCI_FMT_OPTS ?= +# golangci-lint only supports linux, darwin and windows platforms on i386/amd64/arm64. # windows isn't included here because of the path separator being different. ifeq ($(GOHOSTOS),$(filter $(GOHOSTOS),linux darwin)) - ifeq ($(GOHOSTARCH),$(filter $(GOHOSTARCH),amd64 i386)) + ifeq ($(GOHOSTARCH),$(filter $(GOHOSTARCH),amd64 i386 arm64)) # If we're in CI and there is an Actions file, that means the linter # is being run in Actions, so we don't need to run it here. ifneq (,$(SKIP_GOLANGCI_LINT)) @@ -81,11 +82,50 @@ endif PREFIX ?= $(shell pwd) BIN_DIR ?= $(shell pwd) DOCKER_IMAGE_TAG ?= $(subst /,-,$(shell git rev-parse --abbrev-ref HEAD)) -DOCKERFILE_PATH ?= ./Dockerfile DOCKERBUILD_CONTEXT ?= ./ DOCKER_REPO ?= prom +# Check if deprecated DOCKERFILE_PATH is set +ifdef DOCKERFILE_PATH +$(error DOCKERFILE_PATH is deprecated. Use DOCKERFILE_VARIANTS ?= $(DOCKERFILE_PATH) in the Makefile) +endif + DOCKER_ARCHS ?= amd64 +DOCKERFILE_VARIANTS ?= Dockerfile $(wildcard Dockerfile.*) + +# Function to extract variant from Dockerfile label. +# Returns the variant name from io.prometheus.image.variant label, or "default" if not found. +define dockerfile_variant +$(strip $(or $(shell sed -n 's/.*io\.prometheus\.image\.variant="\([^"]*\)".*/\1/p' $(1)),default)) +endef + +# Check for duplicate variant names (including default for Dockerfiles without labels). +DOCKERFILE_VARIANT_NAMES := $(foreach df,$(DOCKERFILE_VARIANTS),$(call dockerfile_variant,$(df))) +DOCKERFILE_VARIANT_NAMES_SORTED := $(sort $(DOCKERFILE_VARIANT_NAMES)) +ifneq ($(words $(DOCKERFILE_VARIANT_NAMES)),$(words $(DOCKERFILE_VARIANT_NAMES_SORTED))) +$(error Duplicate variant names found. Each Dockerfile must have a unique io.prometheus.image.variant label, and only one can be without a label (default)) +endif + +# Build variant:dockerfile pairs for shell iteration. +DOCKERFILE_VARIANTS_WITH_NAMES := $(foreach df,$(DOCKERFILE_VARIANTS),$(call dockerfile_variant,$(df)):$(df)) + +# Shell helper to check whether a dockerfile/arch pair is excluded. +define dockerfile_arch_is_excluded +case " $(DOCKERFILE_ARCH_EXCLUSIONS) " in \ + *" $$dockerfile:$(1) "*) true ;; \ + *) false ;; \ +esac +endef + +# Shell helper to check whether a registry/arch pair is excluded. +# Extracts registry from DOCKER_REPO (e.g., quay.io/prometheus -> quay.io) +define registry_arch_is_excluded +registry=$$(echo "$(DOCKER_REPO)" | cut -d'/' -f1); \ +case " $(DOCKER_REGISTRY_ARCH_EXCLUSIONS) " in \ + *" $$registry:$(1) "*) true ;; \ + *) false ;; \ +esac +endef BUILD_DOCKER_ARCHS = $(addprefix common-docker-,$(DOCKER_ARCHS)) PUBLISH_DOCKER_ARCHS = $(addprefix common-docker-publish-,$(DOCKER_ARCHS)) @@ -111,7 +151,7 @@ common-all: precheck style check_license lint yamllint unused build test .PHONY: common-style common-style: @echo ">> checking code style" - @fmtRes=$$($(GOFMT) -d $$(find . -path ./vendor -prune -o -name '*.go' -print)); \ + @fmtRes=$$($(GOFMT) -d $$(git ls-files '*.go' ':!:vendor/*' || find . -path ./vendor -prune -o -name '*.go' -print)); \ if [ -n "$${fmtRes}" ]; then \ echo "gofmt checking failed!"; echo "$${fmtRes}"; echo; \ echo "Please ensure you are using $$($(GO) version) for formatting code."; \ @@ -121,13 +161,19 @@ common-style: .PHONY: common-check_license common-check_license: @echo ">> checking license header" - @licRes=$$(for file in $$(find . -type f -iname '*.go' ! -path './vendor/*') ; do \ + @licRes=$$(for file in $$(git ls-files '*.go' ':!:vendor/*' || find . -path ./vendor -prune -o -type f -iname '*.go' -print) ; do \ awk 'NR<=3' $$file | grep -Eq "(Copyright|generated|GENERATED)" || echo $$file; \ done); \ if [ -n "$${licRes}" ]; then \ echo "license header checking failed:"; echo "$${licRes}"; \ exit 1; \ fi + @echo ">> checking for copyright years 2026 or later" + @futureYearRes=$$(git grep -E 'Copyright (202[6-9]|20[3-9][0-9])' -- '*.go' ':!:vendor/*' || true); \ + if [ -n "$${futureYearRes}" ]; then \ + echo "Files with copyright year 2026 or later found (should use 'Copyright The Prometheus Authors'):"; echo "$${futureYearRes}"; \ + exit 1; \ + fi .PHONY: common-deps common-deps: @@ -138,7 +184,7 @@ common-deps: update-go-deps: @echo ">> updating Go dependencies" @for m in $$($(GO) list -mod=readonly -m -f '{{ if and (not .Indirect) (not .Main)}}{{.Path}}{{end}}' all); do \ - $(GO) get -d $$m; \ + $(GO) get $$m; \ done $(GO) mod tidy @@ -156,9 +202,13 @@ $(GOTEST_DIR): @mkdir -p $@ .PHONY: common-format -common-format: +common-format: $(GOLANGCI_LINT) @echo ">> formatting code" $(GO) fmt $(pkgs) +ifdef GOLANGCI_LINT + @echo ">> formatting code with golangci-lint" + $(GOLANGCI_LINT) fmt $(GOLANGCI_FMT_OPTS) +endif .PHONY: common-vet common-vet: @@ -169,16 +219,20 @@ common-vet: common-lint: $(GOLANGCI_LINT) ifdef GOLANGCI_LINT @echo ">> running golangci-lint" -# 'go list' needs to be executed before staticcheck to prepopulate the modules cache. -# Otherwise staticcheck might fail randomly for some reason not yet explained. - $(GO) list -e -compiled -test=true -export=false -deps=true -find=false -tags= -- ./... > /dev/null $(GOLANGCI_LINT) run $(GOLANGCI_LINT_OPTS) $(pkgs) endif +.PHONY: common-lint-fix +common-lint-fix: $(GOLANGCI_LINT) +ifdef GOLANGCI_LINT + @echo ">> running golangci-lint fix" + $(GOLANGCI_LINT) run --fix $(GOLANGCI_LINT_OPTS) $(pkgs) +endif + .PHONY: common-yamllint common-yamllint: @echo ">> running yamllint on all YAML files in the repository" -ifeq (, $(shell command -v yamllint > /dev/null)) +ifeq (, $(shell command -v yamllint 2> /dev/null)) @echo "yamllint not installed so skipping" else yamllint . @@ -204,31 +258,201 @@ common-tarball: promu @echo ">> building release tarball" $(PROMU) tarball --prefix $(PREFIX) $(BIN_DIR) +.PHONY: common-docker-repo-name +common-docker-repo-name: + @echo "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)" + .PHONY: common-docker $(BUILD_DOCKER_ARCHS) common-docker: $(BUILD_DOCKER_ARCHS) $(BUILD_DOCKER_ARCHS): common-docker-%: - docker build -t "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:$(SANITIZED_DOCKER_IMAGE_TAG)" \ - -f $(DOCKERFILE_PATH) \ - --build-arg ARCH="$*" \ - --build-arg OS="linux" \ - $(DOCKERBUILD_CONTEXT) + @for variant in $(DOCKERFILE_VARIANTS_WITH_NAMES); do \ + dockerfile=$${variant#*:}; \ + variant_name=$${variant%%:*}; \ + if $(call dockerfile_arch_is_excluded,$*); then \ + echo "Skipping $$variant_name variant for linux-$* (excluded by DOCKERFILE_ARCH_EXCLUSIONS)"; \ + continue; \ + fi; \ + distroless_arch="$*"; \ + if [ "$*" = "armv7" ]; then \ + distroless_arch="arm"; \ + fi; \ + if [ "$$dockerfile" = "Dockerfile" ]; then \ + echo "Building default variant ($$variant_name) for linux-$* using $$dockerfile"; \ + docker build -t "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:$(SANITIZED_DOCKER_IMAGE_TAG)" \ + -f $$dockerfile \ + --build-arg ARCH="$*" \ + --build-arg OS="linux" \ + --build-arg DISTROLESS_ARCH="$$distroless_arch" \ + $(DOCKERBUILD_CONTEXT); \ + if [ "$$variant_name" != "default" ]; then \ + echo "Tagging default variant with $$variant_name suffix"; \ + docker tag "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:$(SANITIZED_DOCKER_IMAGE_TAG)" \ + "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:$(SANITIZED_DOCKER_IMAGE_TAG)-$$variant_name"; \ + fi; \ + else \ + echo "Building $$variant_name variant for linux-$* using $$dockerfile"; \ + docker build -t "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:$(SANITIZED_DOCKER_IMAGE_TAG)-$$variant_name" \ + -f $$dockerfile \ + --build-arg ARCH="$*" \ + --build-arg OS="linux" \ + --build-arg DISTROLESS_ARCH="$$distroless_arch" \ + $(DOCKERBUILD_CONTEXT); \ + fi; \ + done .PHONY: common-docker-publish $(PUBLISH_DOCKER_ARCHS) common-docker-publish: $(PUBLISH_DOCKER_ARCHS) $(PUBLISH_DOCKER_ARCHS): common-docker-publish-%: - docker push "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:$(SANITIZED_DOCKER_IMAGE_TAG)" + @for variant in $(DOCKERFILE_VARIANTS_WITH_NAMES); do \ + dockerfile=$${variant#*:}; \ + variant_name=$${variant%%:*}; \ + if $(call dockerfile_arch_is_excluded,$*); then \ + echo "Skipping push for $$variant_name variant on linux-$* (excluded by DOCKERFILE_ARCH_EXCLUSIONS)"; \ + continue; \ + fi; \ + if $(call registry_arch_is_excluded,$*); then \ + echo "Skipping push for $$variant_name variant on linux-$* to $(DOCKER_REPO) (excluded by DOCKER_REGISTRY_ARCH_EXCLUSIONS)"; \ + continue; \ + fi; \ + if [ "$$dockerfile" != "Dockerfile" ] || [ "$$variant_name" != "default" ]; then \ + echo "Pushing $$variant_name variant for linux-$*"; \ + docker push "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:$(SANITIZED_DOCKER_IMAGE_TAG)-$$variant_name"; \ + fi; \ + if [ "$$dockerfile" = "Dockerfile" ]; then \ + echo "Pushing default variant ($$variant_name) for linux-$*"; \ + docker push "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:$(SANITIZED_DOCKER_IMAGE_TAG)"; \ + fi; \ + if [ "$(DOCKER_IMAGE_TAG)" = "latest" ]; then \ + if [ "$$dockerfile" != "Dockerfile" ] || [ "$$variant_name" != "default" ]; then \ + echo "Pushing $$variant_name variant version tags for linux-$*"; \ + docker push "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:v$(DOCKER_MAJOR_VERSION_TAG)-$$variant_name"; \ + fi; \ + if [ "$$dockerfile" = "Dockerfile" ]; then \ + echo "Pushing default variant version tag for linux-$*"; \ + docker push "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:v$(DOCKER_MAJOR_VERSION_TAG)"; \ + fi; \ + fi; \ + done DOCKER_MAJOR_VERSION_TAG = $(firstword $(subst ., ,$(shell cat VERSION))) .PHONY: common-docker-tag-latest $(TAG_DOCKER_ARCHS) common-docker-tag-latest: $(TAG_DOCKER_ARCHS) $(TAG_DOCKER_ARCHS): common-docker-tag-latest-%: - docker tag "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:$(SANITIZED_DOCKER_IMAGE_TAG)" "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:latest" - docker tag "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:$(SANITIZED_DOCKER_IMAGE_TAG)" "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:v$(DOCKER_MAJOR_VERSION_TAG)" + @for variant in $(DOCKERFILE_VARIANTS_WITH_NAMES); do \ + dockerfile=$${variant#*:}; \ + variant_name=$${variant%%:*}; \ + if $(call dockerfile_arch_is_excluded,$*); then \ + echo "Skipping tag for $$variant_name variant on linux-$* (excluded by DOCKERFILE_ARCH_EXCLUSIONS)"; \ + continue; \ + fi; \ + if $(call registry_arch_is_excluded,$*); then \ + echo "Skipping tag for $$variant_name variant on linux-$* for $(DOCKER_REPO) (excluded by DOCKER_REGISTRY_ARCH_EXCLUSIONS)"; \ + continue; \ + fi; \ + if [ "$$dockerfile" != "Dockerfile" ] || [ "$$variant_name" != "default" ]; then \ + echo "Tagging $$variant_name variant for linux-$* as latest"; \ + docker tag "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:$(SANITIZED_DOCKER_IMAGE_TAG)-$$variant_name" "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:latest-$$variant_name"; \ + docker tag "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:$(SANITIZED_DOCKER_IMAGE_TAG)-$$variant_name" "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:v$(DOCKER_MAJOR_VERSION_TAG)-$$variant_name"; \ + fi; \ + if [ "$$dockerfile" = "Dockerfile" ]; then \ + echo "Tagging default variant ($$variant_name) for linux-$* as latest"; \ + docker tag "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:$(SANITIZED_DOCKER_IMAGE_TAG)" "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:latest"; \ + docker tag "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:$(SANITIZED_DOCKER_IMAGE_TAG)" "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:v$(DOCKER_MAJOR_VERSION_TAG)"; \ + fi; \ + done .PHONY: common-docker-manifest common-docker-manifest: - DOCKER_CLI_EXPERIMENTAL=enabled docker manifest create -a "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME):$(SANITIZED_DOCKER_IMAGE_TAG)" $(foreach ARCH,$(DOCKER_ARCHS),$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$(ARCH):$(SANITIZED_DOCKER_IMAGE_TAG)) - DOCKER_CLI_EXPERIMENTAL=enabled docker manifest push "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME):$(SANITIZED_DOCKER_IMAGE_TAG)" + @for variant in $(DOCKERFILE_VARIANTS_WITH_NAMES); do \ + dockerfile=$${variant#*:}; \ + variant_name=$${variant%%:*}; \ + if [ "$$dockerfile" != "Dockerfile" ] || [ "$$variant_name" != "default" ]; then \ + echo "Creating manifest for $$variant_name variant"; \ + refs=""; \ + for arch in $(DOCKER_ARCHS); do \ + if $(call dockerfile_arch_is_excluded,$$arch); then \ + echo " Skipping $$arch for $$variant_name (excluded by DOCKERFILE_ARCH_EXCLUSIONS)"; \ + continue; \ + fi; \ + if $(call registry_arch_is_excluded,$$arch); then \ + echo " Skipping $$arch for $$variant_name on $(DOCKER_REPO) (excluded by DOCKER_REGISTRY_ARCH_EXCLUSIONS)"; \ + continue; \ + fi; \ + refs="$$refs $(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$$arch:$(SANITIZED_DOCKER_IMAGE_TAG)-$$variant_name"; \ + done; \ + if [ -z "$$refs" ]; then \ + echo "Skipping manifest for $$variant_name variant (no supported architectures)"; \ + continue; \ + fi; \ + DOCKER_CLI_EXPERIMENTAL=enabled docker manifest create -a "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME):$(SANITIZED_DOCKER_IMAGE_TAG)-$$variant_name" $$refs; \ + DOCKER_CLI_EXPERIMENTAL=enabled docker manifest push "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME):$(SANITIZED_DOCKER_IMAGE_TAG)-$$variant_name"; \ + fi; \ + if [ "$$dockerfile" = "Dockerfile" ]; then \ + echo "Creating default variant ($$variant_name) manifest"; \ + refs=""; \ + for arch in $(DOCKER_ARCHS); do \ + if $(call dockerfile_arch_is_excluded,$$arch); then \ + echo " Skipping $$arch for default variant (excluded by DOCKERFILE_ARCH_EXCLUSIONS)"; \ + continue; \ + fi; \ + if $(call registry_arch_is_excluded,$$arch); then \ + echo " Skipping $$arch for default variant on $(DOCKER_REPO) (excluded by DOCKER_REGISTRY_ARCH_EXCLUSIONS)"; \ + continue; \ + fi; \ + refs="$$refs $(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$$arch:$(SANITIZED_DOCKER_IMAGE_TAG)"; \ + done; \ + if [ -z "$$refs" ]; then \ + echo "Skipping default variant manifest (no supported architectures)"; \ + continue; \ + fi; \ + DOCKER_CLI_EXPERIMENTAL=enabled docker manifest create -a "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME):$(SANITIZED_DOCKER_IMAGE_TAG)" $$refs; \ + DOCKER_CLI_EXPERIMENTAL=enabled docker manifest push "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME):$(SANITIZED_DOCKER_IMAGE_TAG)"; \ + fi; \ + if [ "$(DOCKER_IMAGE_TAG)" = "latest" ]; then \ + if [ "$$dockerfile" != "Dockerfile" ] || [ "$$variant_name" != "default" ]; then \ + echo "Creating manifest for $$variant_name variant version tag"; \ + refs=""; \ + for arch in $(DOCKER_ARCHS); do \ + if $(call dockerfile_arch_is_excluded,$$arch); then \ + echo " Skipping $$arch for $$variant_name version tag (excluded by DOCKERFILE_ARCH_EXCLUSIONS)"; \ + continue; \ + fi; \ + if $(call registry_arch_is_excluded,$$arch); then \ + echo " Skipping $$arch for $$variant_name version tag on $(DOCKER_REPO) (excluded by DOCKER_REGISTRY_ARCH_EXCLUSIONS)"; \ + continue; \ + fi; \ + refs="$$refs $(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$$arch:v$(DOCKER_MAJOR_VERSION_TAG)-$$variant_name"; \ + done; \ + if [ -z "$$refs" ]; then \ + echo "Skipping version-tag manifest for $$variant_name variant (no supported architectures)"; \ + continue; \ + fi; \ + DOCKER_CLI_EXPERIMENTAL=enabled docker manifest create -a "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME):v$(DOCKER_MAJOR_VERSION_TAG)-$$variant_name" $$refs; \ + DOCKER_CLI_EXPERIMENTAL=enabled docker manifest push "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME):v$(DOCKER_MAJOR_VERSION_TAG)-$$variant_name"; \ + fi; \ + if [ "$$dockerfile" = "Dockerfile" ]; then \ + echo "Creating default variant version tag manifest"; \ + refs=""; \ + for arch in $(DOCKER_ARCHS); do \ + if $(call dockerfile_arch_is_excluded,$$arch); then \ + echo " Skipping $$arch for default variant version tag (excluded by DOCKERFILE_ARCH_EXCLUSIONS)"; \ + continue; \ + fi; \ + if $(call registry_arch_is_excluded,$$arch); then \ + echo " Skipping $$arch for default variant version tag on $(DOCKER_REPO) (excluded by DOCKER_REGISTRY_ARCH_EXCLUSIONS)"; \ + continue; \ + fi; \ + refs="$$refs $(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$$arch:v$(DOCKER_MAJOR_VERSION_TAG)"; \ + done; \ + if [ -z "$$refs" ]; then \ + echo "Skipping default variant version-tag manifest (no supported architectures)"; \ + continue; \ + fi; \ + DOCKER_CLI_EXPERIMENTAL=enabled docker manifest create -a "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME):v$(DOCKER_MAJOR_VERSION_TAG)" $$refs; \ + DOCKER_CLI_EXPERIMENTAL=enabled docker manifest push "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME):v$(DOCKER_MAJOR_VERSION_TAG)"; \ + fi; \ + fi; \ + done .PHONY: promu promu: $(PROMU) @@ -240,8 +464,8 @@ $(PROMU): cp $(PROMU_TMP)/promu-$(PROMU_VERSION).$(GO_BUILD_PLATFORM)/promu $(FIRST_GOPATH)/bin/promu rm -r $(PROMU_TMP) -.PHONY: proto -proto: +.PHONY: common-proto +common-proto: @echo ">> generating code from proto files" @./scripts/genproto.sh @@ -253,6 +477,10 @@ $(GOLANGCI_LINT): | sh -s -- -b $(FIRST_GOPATH)/bin $(GOLANGCI_LINT_VERSION) endif +.PHONY: common-print-golangci-lint-version +common-print-golangci-lint-version: + @echo $(GOLANGCI_LINT_VERSION) + .PHONY: precheck precheck:: @@ -267,3 +495,9 @@ $(1)_precheck: exit 1; \ fi endef + +govulncheck: install-govulncheck + govulncheck ./... + +install-govulncheck: + command -v govulncheck > /dev/null || go install golang.org/x/vuln/cmd/govulncheck@latest diff --git a/README.md b/README.md index 3b7730bc8..d0cdf5947 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ Prometheus exporter for PostgreSQL server metrics. -CI Tested PostgreSQL versions: `10`, `11`, `12`, `13`, `14`, `15` +CI Tested PostgreSQL versions: `13`, `14`, `15`, `16`, `17`, `18`. ## Quick Start This package is available for Docker: @@ -17,10 +17,29 @@ docker run --net=host -it --rm -e POSTGRES_PASSWORD=password postgres # Connect to it docker run \ --net=host \ - -e DATA_SOURCE_NAME="postgresql://postgres:password@localhost:5432/postgres?sslmode=disable" \ + -e DATA_SOURCE_URI="localhost:5432/postgres?sslmode=disable" \ + -e DATA_SOURCE_USER=postgres \ + -e DATA_SOURCE_PASS=password \ quay.io/prometheuscommunity/postgres-exporter ``` +Test with: +```bash +curl "http://localhost:9187/metrics" +``` + +Example Prometheus config: +```yaml +scrape_configs: + - job_name: postgres + static_configs: + - targets: ["127.0.0.1:9187"] # Replace IP with the hostname of the docker container if you're running the container in a separate network +``` + +Now use the DATA_SOURCE_PASS_FILE with a mounted file containing the password to prevent having the password in an environment variable. + +The container process runs with uid/gid 65534 (important for file permissions). + ## Multi-Target Support (BETA) **This Feature is in beta and may require changes in future releases. Feedback is welcome.** @@ -122,12 +141,33 @@ This will build the docker image as `prometheuscommunity/postgres_exporter:${bra * `[no-]collector.stat_bgwriter` Enable the `stat_bgwriter` collector (default: enabled). +* `[no-]collector.stat_checkpointer` + Enable the `stat_checkpointer` collector (default: disabled). + * `[no-]collector.stat_database` Enable the `stat_database` collector (default: enabled). +* `[no-]collector.stat_progress_vacuum` + Enable the `stat_progress_vacuum` collector (default: enabled). + * `[no-]collector.stat_statements` Enable the `stat_statements` collector (default: disabled). +* `[no-]collector.stat_statements.include_query` + Enable selecting statement query together with queryId. (default: disabled) + +* `--collector.stat_statements.query_length` + Maximum length of the statement text. Default is 120. + +* `--collector.stat_statements.limit` + Maximum number of statements to return. Default is 100. + +* `--collector.stat_statements.exclude_databases` + Comma-separated list of database names to exclude from `pg_stat_statements` metrics. Default is none. + +* `--collector.stat_statements.exclude_users` + Comma-separated list of user names to exclude from `pg_stat_statements` metrics. Default is none. + * `[no-]collector.stat_user_tables` Enable the `stat_user_tables` collector (default: enabled). @@ -208,7 +248,7 @@ The following environment variables configure the exporter: * `DATA_SOURCE_URI` an alternative to `DATA_SOURCE_NAME` which exclusively accepts the hostname without a username and password component. For example, `my_pg_hostname` or - `my_pg_hostname?sslmode=disable`. + `my_pg_hostname:5432/postgres?sslmode=disable`. * `DATA_SOURCE_URI_FILE` The same as above but reads the URI from a file. @@ -227,6 +267,14 @@ The following environment variables configure the exporter: * `DATA_SOURCE_PASS_FILE` The same as above but reads the password from a file. +* `PG_EXPORTER_COLLECTION_TIMEOUT` + Timeout duration to use when collecting the statistics, default to `1m`. + When the timeout is reached, the database connection will be dropped. + It avoids connections stacking when the database answers too slowly + (for instance if the database creates/drop a huge table and locks the tables) + and will avoid exhausting the pool of connections of the database. + Value of `0` or less than `1ms` is considered invalid and will report an error. + * `PG_EXPORTER_WEB_TELEMETRY_PATH` Path under which to expose metrics. Default is `/metrics`. diff --git a/VERSION b/VERSION index a803cc227..41915c799 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.14.0 +0.19.1 diff --git a/cmd/postgres_exporter/main.go b/cmd/postgres_exporter/main.go index 7d424b3d3..0c1bd003e 100644 --- a/cmd/postgres_exporter/main.go +++ b/cmd/postgres_exporter/main.go @@ -1,4 +1,4 @@ -// Copyright 2021 The Prometheus Authors +// Copyright The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -16,18 +16,19 @@ package main import ( "fmt" "net/http" + _ "net/http/pprof" "os" "strings" "github.com/alecthomas/kingpin/v2" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/prometheus-community/postgres_exporter/collector" "github.com/prometheus-community/postgres_exporter/config" + "github.com/prometheus-community/postgres_exporter/exporter" "github.com/prometheus/client_golang/prometheus" + versioncollector "github.com/prometheus/client_golang/prometheus/collectors/version" "github.com/prometheus/client_golang/prometheus/promhttp" - "github.com/prometheus/common/promlog" - "github.com/prometheus/common/promlog/flag" + "github.com/prometheus/common/promslog" + "github.com/prometheus/common/promslog/flag" "github.com/prometheus/common/version" "github.com/prometheus/exporter-toolkit/web" "github.com/prometheus/exporter-toolkit/web/kingpinflag" @@ -50,79 +51,69 @@ var ( excludeDatabases = kingpin.Flag("exclude-databases", "A list of databases to remove when autoDiscoverDatabases is enabled (DEPRECATED)").Default("").Envar("PG_EXPORTER_EXCLUDE_DATABASES").String() includeDatabases = kingpin.Flag("include-databases", "A list of databases to include when autoDiscoverDatabases is enabled (DEPRECATED)").Default("").Envar("PG_EXPORTER_INCLUDE_DATABASES").String() metricPrefix = kingpin.Flag("metric-prefix", "A metric prefix can be used to have non-default (not \"pg\") prefixes for each of the metrics").Default("pg").Envar("PG_EXPORTER_METRIC_PREFIX").String() - logger = log.NewNopLogger() + collectionTimeout = kingpin.Flag("collection-timeout", "Timeout for collecting the statistics when the database is slow").Default("1m").Envar("PG_EXPORTER_COLLECTION_TIMEOUT").String() + logger = promslog.NewNopLogger() ) -// Metric name parts. -const ( - // Namespace for all metrics. - namespace = "pg" - // Subsystems. - exporter = "exporter" - // The name of the exporter. - exporterName = "postgres_exporter" - // Metric label used for static string data thats handy to send to Prometheus - // e.g. version - staticLabelName = "static" - // Metric label used for server identification. - serverLabelName = "server" -) +// The name of the exporter. +const exporterName = "postgres_exporter" func main() { kingpin.Version(version.Print(exporterName)) - promlogConfig := &promlog.Config{} - flag.AddFlags(kingpin.CommandLine, promlogConfig) + promslogConfig := &promslog.Config{} + flag.AddFlags(kingpin.CommandLine, promslogConfig) kingpin.HelpFlag.Short('h') kingpin.Parse() - logger = promlog.New(promlogConfig) + logger = promslog.New(promslogConfig) if *onlyDumpMaps { - dumpMaps() + exporter.DumpMaps() return } if err := c.ReloadConfig(*configFile, logger); err != nil { // This is not fatal, but it means that auth must be provided for every dsn. - level.Warn(logger).Log("msg", "Error loading config", "err", err) + logger.Warn("Error loading config", "err", err) } - dsns, err := getDataSources() + dsns, err := exporter.GetDataSources() if err != nil { - level.Error(logger).Log("msg", "Failed reading data sources", "err", err.Error()) + logger.Error("Failed reading data sources", "err", err.Error()) os.Exit(1) } excludedDatabases := strings.Split(*excludeDatabases, ",") - logger.Log("msg", "Excluded databases", "databases", fmt.Sprintf("%v", excludedDatabases)) + logger.Info("Excluded databases", "databases", fmt.Sprintf("%v", excludedDatabases)) if *queriesPath != "" { - level.Warn(logger).Log("msg", "The extended queries.yaml config is DEPRECATED", "file", *queriesPath) + logger.Warn("The extended queries.yaml config is DEPRECATED", "file", *queriesPath) } if *autoDiscoverDatabases || *excludeDatabases != "" || *includeDatabases != "" { - level.Warn(logger).Log("msg", "Scraping additional databases via auto discovery is DEPRECATED") + logger.Warn("Scraping additional databases via auto discovery is DEPRECATED") } if *constantLabelsList != "" { - level.Warn(logger).Log("msg", "Constant labels on all metrics is DEPRECATED") + logger.Warn("Constant labels on all metrics is DEPRECATED") } - opts := []ExporterOpt{ - DisableDefaultMetrics(*disableDefaultMetrics), - DisableSettingsMetrics(*disableSettingsMetrics), - AutoDiscoverDatabases(*autoDiscoverDatabases), - WithUserQueriesPath(*queriesPath), - WithConstantLabels(*constantLabelsList), - ExcludeDatabases(excludedDatabases), - IncludeDatabases(*includeDatabases), + opts := []exporter.ExporterOpt{ + exporter.DisableDefaultMetrics(*disableDefaultMetrics), + exporter.DisableSettingsMetrics(*disableSettingsMetrics), + exporter.AutoDiscoverDatabases(*autoDiscoverDatabases), + exporter.WithUserQueriesPath(*queriesPath), + exporter.WithConstantLabels(*constantLabelsList), + exporter.ExcludeDatabases(excludedDatabases), + exporter.IncludeDatabases(*includeDatabases), + exporter.WithMetricPrefix(*metricPrefix), } - exporter := NewExporter(dsns, opts...) + exporter := exporter.NewExporter(dsns, logger, opts...) defer func() { - exporter.servers.Close() + exporter.CloseServers() }() - prometheus.MustRegister(version.NewCollector(exporterName)) + prometheus.MustRegister(versioncollector.NewCollector(exporterName)) prometheus.MustRegister(exporter) @@ -137,9 +128,9 @@ func main() { excludedDatabases, dsn, []string{}, - ) + collector.WithCollectionTimeout(*collectionTimeout)) if err != nil { - level.Warn(logger).Log("msg", "Failed to create PostgresCollector", "err", err.Error()) + logger.Warn("Failed to create PostgresCollector", "err", err.Error()) } else { prometheus.MustRegister(pe) } @@ -160,7 +151,7 @@ func main() { } landingPage, err := web.NewLandingPage(landingConfig) if err != nil { - level.Error(logger).Log("err", err) + logger.Error("error creating landing page", "err", err) os.Exit(1) } http.Handle("/", landingPage) @@ -170,7 +161,7 @@ func main() { srv := &http.Server{} if err := web.ListenAndServe(srv, webConfig, logger); err != nil { - level.Error(logger).Log("msg", "Error running HTTP server", "err", err) + logger.Error("Error running HTTP server", "err", err) os.Exit(1) } } diff --git a/cmd/postgres_exporter/probe.go b/cmd/postgres_exporter/probe.go index 827314a7a..66219c07d 100644 --- a/cmd/postgres_exporter/probe.go +++ b/cmd/postgres_exporter/probe.go @@ -15,17 +15,17 @@ package main import ( "fmt" + "log/slog" "net/http" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/prometheus-community/postgres_exporter/collector" "github.com/prometheus-community/postgres_exporter/config" + "github.com/prometheus-community/postgres_exporter/exporter" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promhttp" ) -func handleProbe(logger log.Logger, excludeDatabases []string) http.HandlerFunc { +func handleProbe(logger *slog.Logger, excludeDatabases []string) http.HandlerFunc { return func(w http.ResponseWriter, r *http.Request) { ctx := r.Context() conf := c.GetConfig() @@ -38,7 +38,7 @@ func handleProbe(logger log.Logger, excludeDatabases []string) http.HandlerFunc var authModule config.AuthModule authModuleName := params.Get("auth_module") if authModuleName == "" { - level.Info(logger).Log("msg", "no auth_module specified, using default") + logger.Info("no auth_module specified, using default") } else { var ok bool authModule, ok = conf.AuthModules[authModuleName] @@ -54,37 +54,39 @@ func handleProbe(logger log.Logger, excludeDatabases []string) http.HandlerFunc dsn, err := authModule.ConfigureTarget(target) if err != nil { - level.Error(logger).Log("msg", "failed to configure target", "err", err) + logger.Error("failed to configure target", "err", err) http.Error(w, fmt.Sprintf("could not configure dsn for target: %v", err), http.StatusBadRequest) return } // TODO(@sysadmind): Timeout - tl := log.With(logger, "target", target) + tl := logger.With("target", target) registry := prometheus.NewRegistry() - opts := []ExporterOpt{ - DisableDefaultMetrics(*disableDefaultMetrics), - DisableSettingsMetrics(*disableSettingsMetrics), - AutoDiscoverDatabases(*autoDiscoverDatabases), - WithUserQueriesPath(*queriesPath), - WithConstantLabels(*constantLabelsList), - ExcludeDatabases(excludeDatabases), - IncludeDatabases(*includeDatabases), + opts := []exporter.ExporterOpt{ + exporter.DisableDefaultMetrics(*disableDefaultMetrics), + exporter.DisableSettingsMetrics(*disableSettingsMetrics), + exporter.AutoDiscoverDatabases(*autoDiscoverDatabases), + exporter.WithUserQueriesPath(*queriesPath), + exporter.WithConstantLabels(*constantLabelsList), + exporter.ExcludeDatabases(excludeDatabases), + exporter.IncludeDatabases(*includeDatabases), + exporter.WithMetricPrefix(*metricPrefix), } dsns := []string{dsn.GetConnectionString()} - exporter := NewExporter(dsns, opts...) + exporter := exporter.NewExporter(dsns, logger, opts...) defer func() { - exporter.servers.Close() + exporter.CloseServers() }() registry.MustRegister(exporter) // Run the probe pc, err := collector.NewProbeCollector(tl, excludeDatabases, registry, dsn) if err != nil { + logger.Error("Error creating probe collector", "err", err) http.Error(w, err.Error(), http.StatusInternalServerError) return } diff --git a/collector/collector.go b/collector/collector.go index c643bd91e..130f5313b 100644 --- a/collector/collector.go +++ b/collector/collector.go @@ -15,14 +15,14 @@ package collector import ( "context" + "database/sql" "errors" "fmt" + "log/slog" "sync" "time" "github.com/alecthomas/kingpin/v2" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/prometheus/client_golang/prometheus" ) @@ -38,8 +38,9 @@ const ( // Namespace for all metrics. namespace = "pg" - defaultEnabled = true - defaultDisabled = false + collectorFlagPrefix = "collector." + defaultEnabled = true + defaultDisabled = false ) var ( @@ -62,7 +63,7 @@ type Collector interface { } type collectorConfig struct { - logger log.Logger + logger *slog.Logger excludeDatabases []string } @@ -75,7 +76,7 @@ func registerCollector(name string, isDefaultEnabled bool, createFunc func(colle } // Create flag for this collector - flagName := fmt.Sprintf("collector.%s", name) + flagName := collectorFlagPrefix + name flagHelp := fmt.Sprintf("Enable the %s collector (default: %s).", name, helpDefaultState) defaultValue := fmt.Sprintf("%v", isDefaultEnabled) @@ -89,15 +90,16 @@ func registerCollector(name string, isDefaultEnabled bool, createFunc func(colle // PostgresCollector implements the prometheus.Collector interface. type PostgresCollector struct { Collectors map[string]Collector - logger log.Logger + logger *slog.Logger - instance *instance + instance *instance + CollectionTimeout time.Duration } type Option func(*PostgresCollector) error // NewPostgresCollector creates a new PostgresCollector. -func NewPostgresCollector(logger log.Logger, excludeDatabases []string, dsn string, filters []string, options ...Option) (*PostgresCollector, error) { +func NewPostgresCollector(logger *slog.Logger, excludeDatabases []string, dsn string, filters []string, options ...Option) (*PostgresCollector, error) { p := &PostgresCollector{ logger: logger, } @@ -131,7 +133,7 @@ func NewPostgresCollector(logger log.Logger, excludeDatabases []string, dsn stri collectors[key] = collector } else { collector, err := factories[key](collectorConfig{ - logger: log.With(logger, "collector", key), + logger: logger.With("collector", key), excludeDatabases: excludeDatabases, }) if err != nil { @@ -157,6 +159,20 @@ func NewPostgresCollector(logger log.Logger, excludeDatabases []string, dsn stri return p, nil } +func WithCollectionTimeout(s string) Option { + return func(e *PostgresCollector) error { + duration, err := time.ParseDuration(s) + if err != nil { + return err + } + if duration < 1*time.Millisecond { + return errors.New("timeout must be greater than 1ms") + } + e.CollectionTimeout = duration + return nil + } +} + // Describe implements the prometheus.Collector interface. func (p PostgresCollector) Describe(ch chan<- *prometheus.Desc) { ch <- scrapeDurationDesc @@ -165,28 +181,40 @@ func (p PostgresCollector) Describe(ch chan<- *prometheus.Desc) { // Collect implements the prometheus.Collector interface. func (p PostgresCollector) Collect(ch chan<- prometheus.Metric) { - ctx := context.TODO() + // copy the instance so that concurrent scrapes have independent instances + inst := p.instance.copy() // Set up the database connection for the collector. - err := p.instance.setup() + err := inst.setup() + defer inst.Close() if err != nil { - level.Error(p.logger).Log("msg", "Error opening connection to database", "err", err) + p.logger.Error("Error opening connection to database", "err", err) return } - defer p.instance.Close() + p.collectFromConnection(inst, ch) +} + +func (p PostgresCollector) collectFromConnection(inst *instance, ch chan<- prometheus.Metric) { + // Eventually, connect this to the http scraping context + ctx, cancel := context.WithTimeout(context.Background(), p.CollectionTimeout) + defer cancel() wg := sync.WaitGroup{} wg.Add(len(p.Collectors)) for name, c := range p.Collectors { go func(name string, c Collector) { - execute(ctx, name, c, p.instance, ch, p.logger) + execute(ctx, name, c, inst, ch, p.logger) wg.Done() }(name, c) } wg.Wait() } -func execute(ctx context.Context, name string, c Collector, instance *instance, ch chan<- prometheus.Metric, logger log.Logger) { +func (p *PostgresCollector) Close() error { + return p.instance.Close() +} + +func execute(ctx context.Context, name string, c Collector, instance *instance, ch chan<- prometheus.Metric, logger *slog.Logger) { begin := time.Now() err := c.Update(ctx, instance, ch) duration := time.Since(begin) @@ -194,13 +222,13 @@ func execute(ctx context.Context, name string, c Collector, instance *instance, if err != nil { if IsNoDataError(err) { - level.Debug(logger).Log("msg", "collector returned no data", "name", name, "duration_seconds", duration.Seconds(), "err", err) + logger.Debug("collector returned no data", "name", name, "duration_seconds", duration.Seconds(), "err", err) } else { - level.Error(logger).Log("msg", "collector failed", "name", name, "duration_seconds", duration.Seconds(), "err", err) + logger.Error("collector failed", "name", name, "duration_seconds", duration.Seconds(), "err", err) } success = 0 } else { - level.Debug(logger).Log("msg", "collector succeeded", "name", name, "duration_seconds", duration.Seconds()) + logger.Debug("collector succeeded", "name", name, "duration_seconds", duration.Seconds()) success = 1 } ch <- prometheus.MustNewConstMetric(scrapeDurationDesc, prometheus.GaugeValue, duration.Seconds(), name) @@ -225,3 +253,11 @@ var ErrNoData = errors.New("collector returned no data") func IsNoDataError(err error) bool { return err == ErrNoData } + +func Int32(m sql.NullInt32) float64 { + mM := 0.0 + if m.Valid { + mM = float64(m.Int32) + } + return mM +} diff --git a/collector/collector_test.go b/collector/collector_test.go index 18101f00e..984b2c0ff 100644 --- a/collector/collector_test.go +++ b/collector/collector_test.go @@ -14,9 +14,13 @@ package collector import ( "strings" + "testing" + "time" + "github.com/DATA-DOG/go-sqlmock" "github.com/prometheus/client_golang/prometheus" dto "github.com/prometheus/client_model/go" + "github.com/prometheus/common/promslog" ) type labelMap map[string]string @@ -48,15 +52,84 @@ func readMetric(m prometheus.Metric) MetricResult { func sanitizeQuery(q string) string { q = strings.Join(strings.Fields(q), " ") - q = strings.Replace(q, "(", "\\(", -1) - q = strings.Replace(q, "?", "\\?", -1) - q = strings.Replace(q, ")", "\\)", -1) - q = strings.Replace(q, "[", "\\[", -1) - q = strings.Replace(q, "]", "\\]", -1) - q = strings.Replace(q, "{", "\\{", -1) - q = strings.Replace(q, "}", "\\}", -1) - q = strings.Replace(q, "*", "\\*", -1) - q = strings.Replace(q, "^", "\\^", -1) - q = strings.Replace(q, "$", "\\$", -1) + q = strings.ReplaceAll(q, "(", "\\(") + q = strings.ReplaceAll(q, "?", "\\?") + q = strings.ReplaceAll(q, ")", "\\)") + q = strings.ReplaceAll(q, "[", "\\[") + q = strings.ReplaceAll(q, "]", "\\]") + q = strings.ReplaceAll(q, "{", "\\{") + q = strings.ReplaceAll(q, "}", "\\}") + q = strings.ReplaceAll(q, "*", "\\*") + q = strings.ReplaceAll(q, "^", "\\^") + q = strings.ReplaceAll(q, "$", "\\$") return q } + +// We ensure that when the database respond after a long time +// The collection process still occurs in a predictable manner +// Will avoid accumulation of queries on a completely frozen DB +func TestWithConnectionTimeout(t *testing.T) { + + timeoutForQuery := time.Duration(100 * time.Millisecond) + + db, mock, err := sqlmock.New() + if err != nil { + t.Fatalf("Error opening a stub db connection: %s", err) + } + defer db.Close() + + inst := &instance{db: db} + + columns := []string{"pg_roles.rolname", "pg_roles.rolconnlimit"} + rows := sqlmock.NewRows(columns).AddRow("role1", 2) + mock.ExpectQuery(pgRolesConnectionLimitsQuery). + WillDelayFor(30 * time.Second). + WillReturnRows(rows) + + log_config := promslog.Config{} + + logger := promslog.New(&log_config) + + c, err := NewPostgresCollector(logger, []string{}, "postgresql://local", []string{}, WithCollectionTimeout(timeoutForQuery.String())) + if err != nil { + t.Fatalf("error creating NewPostgresCollector: %s", err) + } + collector_config := collectorConfig{ + logger: logger, + excludeDatabases: []string{}, + } + + collector, err := NewPGRolesCollector(collector_config) + if err != nil { + t.Fatalf("error creating collector: %s", err) + } + c.Collectors["test"] = collector + c.instance = inst + + ch := make(chan prometheus.Metric) + defer close(ch) + + go func() { + for { + <-ch + time.Sleep(1 * time.Millisecond) + } + }() + + startTime := time.Now() + c.collectFromConnection(inst, ch) + elapsed := time.Since(startTime) + + if elapsed <= timeoutForQuery { + t.Errorf("elapsed time was %v, should be bigger than timeout=%v", elapsed, timeoutForQuery) + } + + // Ensure we took more than timeout, but not too much + if elapsed >= timeoutForQuery+500*time.Millisecond { + t.Errorf("elapsed time was %v, should not be much bigger than timeout=%v", elapsed, timeoutForQuery) + } + + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("there were unfulfilled exceptions: %s", err) + } +} diff --git a/collector/instance.go b/collector/instance.go index 87eb0591c..a365697d6 100644 --- a/collector/instance.go +++ b/collector/instance.go @@ -43,6 +43,13 @@ func newInstance(dsn string) (*instance, error) { return i, nil } +// copy returns a copy of the instance. +func (i *instance) copy() *instance { + return &instance{ + dsn: i.dsn, + } +} + func (i *instance) setup() error { db, err := sql.Open("postgres", i.dsn) if err != nil { diff --git a/collector/pg_buffercache_summary.go b/collector/pg_buffercache_summary.go new file mode 100644 index 000000000..8b0e0f007 --- /dev/null +++ b/collector/pg_buffercache_summary.go @@ -0,0 +1,135 @@ +// Copyright The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package collector + +import ( + "context" + "database/sql" + "log/slog" + + "github.com/blang/semver/v4" + "github.com/prometheus/client_golang/prometheus" +) + +const buffercacheSummarySubsystem = "buffercache_summary" + +func init() { + registerCollector(buffercacheSummarySubsystem, defaultDisabled, NewBuffercacheSummaryCollector) +} + +// BuffercacheSummaryCollector collects stats from pg_buffercache: https://www.postgresql.org/docs/current/pgbuffercache.html. +// +// It depends on the extension being loaded with +// +// create extension pg_buffercache; +// +// It does not take locks, see the PG docs above. +type BuffercacheSummaryCollector struct { + log *slog.Logger +} + +func NewBuffercacheSummaryCollector(config collectorConfig) (Collector, error) { + return &BuffercacheSummaryCollector{ + log: config.logger, + }, nil +} + +var ( + buffersUsedDesc = prometheus.NewDesc( + prometheus.BuildFQName(namespace, buffercacheSummarySubsystem, "buffers_used"), + "Number of used shared buffers", + []string{}, + prometheus.Labels{}, + ) + buffersUnusedDesc = prometheus.NewDesc( + prometheus.BuildFQName(namespace, buffercacheSummarySubsystem, "buffers_unused"), + "Number of unused shared buffers", + []string{}, + prometheus.Labels{}, + ) + buffersDirtyDesc = prometheus.NewDesc( + prometheus.BuildFQName(namespace, buffercacheSummarySubsystem, "buffers_dirty"), + "Number of dirty shared buffers", + []string{}, + prometheus.Labels{}, + ) + buffersPinnedDesc = prometheus.NewDesc( + prometheus.BuildFQName(namespace, buffercacheSummarySubsystem, "buffers_pinned"), + "Number of pinned shared buffers", + []string{}, + prometheus.Labels{}, + ) + usageCountAvgDesc = prometheus.NewDesc( + prometheus.BuildFQName(namespace, buffercacheSummarySubsystem, "usagecount_avg"), + "Average usage count of used shared buffers", + []string{}, + prometheus.Labels{}, + ) + + buffercacheQuery = ` + SELECT + buffers_used, + buffers_unused, + buffers_dirty, + buffers_pinned, + usagecount_avg + FROM + pg_buffercache_summary() + ` +) + +// Update implements Collector +// It is called by the Prometheus registry when collecting metrics. +func (c BuffercacheSummaryCollector) Update(ctx context.Context, instance *instance, ch chan<- prometheus.Metric) error { + // pg_buffercache_summary is only in v16, and we don't need support for earlier currently. + if !instance.version.GE(semver.MustParse("16.0.0")) { + return nil + } + db := instance.getDB() + rows, err := db.QueryContext(ctx, buffercacheQuery) + if err != nil { + return err + } + defer rows.Close() + + var used, unused, dirty, pinned sql.NullInt32 + var usagecountAvg sql.NullFloat64 + + for rows.Next() { + if err := rows.Scan( + &used, + &unused, + &dirty, + &pinned, + &usagecountAvg, + ); err != nil { + return err + } + + usagecountAvgMetric := 0.0 + if usagecountAvg.Valid { + usagecountAvgMetric = usagecountAvg.Float64 + } + ch <- prometheus.MustNewConstMetric( + usageCountAvgDesc, + prometheus.GaugeValue, + usagecountAvgMetric) + ch <- prometheus.MustNewConstMetric(buffersUsedDesc, prometheus.GaugeValue, Int32(used)) + ch <- prometheus.MustNewConstMetric(buffersUnusedDesc, prometheus.GaugeValue, Int32(unused)) + ch <- prometheus.MustNewConstMetric(buffersDirtyDesc, prometheus.GaugeValue, Int32(dirty)) + ch <- prometheus.MustNewConstMetric(buffersPinnedDesc, prometheus.GaugeValue, Int32(pinned)) + } + + return rows.Err() +} diff --git a/collector/pg_buffercache_summary_test.go b/collector/pg_buffercache_summary_test.go new file mode 100644 index 000000000..86d91751f --- /dev/null +++ b/collector/pg_buffercache_summary_test.go @@ -0,0 +1,73 @@ +// Copyright The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +package collector + +import ( + "context" + "testing" + + "github.com/DATA-DOG/go-sqlmock" + "github.com/blang/semver/v4" + "github.com/prometheus/client_golang/prometheus" + dto "github.com/prometheus/client_model/go" + "github.com/smartystreets/goconvey/convey" +) + +func TestBuffercacheSummaryCollector(t *testing.T) { + db, mock, err := sqlmock.New() + if err != nil { + t.Fatalf("Error opening a stub db connection: %s", err) + } + defer db.Close() + + inst := &instance{db: db, version: semver.MustParse("16.0.0")} + + columns := []string{ + "buffers_used", + "buffers_unused", + "buffers_dirty", + "buffers_pinned", + "usagecount_avg"} + + rows := sqlmock.NewRows(columns).AddRow(123, 456, 789, 234, 56.6778) + + mock.ExpectQuery(sanitizeQuery(buffercacheQuery)).WillReturnRows(rows) + + ch := make(chan prometheus.Metric) + go func() { + defer close(ch) + c := BuffercacheSummaryCollector{} + + if err := c.Update(context.Background(), inst, ch); err != nil { + t.Errorf("Error calling PGStatStatementsCollector.Update: %s", err) + } + }() + + expected := []MetricResult{ + {labels: labelMap{}, metricType: dto.MetricType_GAUGE, value: 56.6778}, + {labels: labelMap{}, metricType: dto.MetricType_GAUGE, value: 123}, + {labels: labelMap{}, metricType: dto.MetricType_GAUGE, value: 456}, + {labels: labelMap{}, metricType: dto.MetricType_GAUGE, value: 789}, + {labels: labelMap{}, metricType: dto.MetricType_GAUGE, value: 234}, + } + + convey.Convey("Metrics comparison", t, func() { + for _, expect := range expected { + m := readMetric(<-ch) + convey.So(expect, convey.ShouldResemble, m) + } + }) + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("there were unfulfilled exceptions: %s", err) + } +} diff --git a/collector/pg_database.go b/collector/pg_database.go index d2c4b206a..7f98748f4 100644 --- a/collector/pg_database.go +++ b/collector/pg_database.go @@ -16,8 +16,9 @@ package collector import ( "context" "database/sql" + "log/slog" + "slices" - "github.com/go-kit/log" "github.com/prometheus/client_golang/prometheus" ) @@ -28,7 +29,7 @@ func init() { } type PGDatabaseCollector struct { - log log.Logger + log *slog.Logger excludedDatabases []string } @@ -53,12 +54,21 @@ var ( "Disk space used by the database", []string{"datname"}, nil, ) + pgDatabaseConnectionLimitsDesc = prometheus.NewDesc( + prometheus.BuildFQName( + namespace, + databaseSubsystem, + "connection_limit", + ), + "Connection limit set for the database", + []string{"datname"}, nil, + ) - pgDatabaseQuery = "SELECT pg_database.datname FROM pg_database;" + pgDatabaseQuery = "SELECT pg_database.datname, pg_database.datconnlimit FROM pg_database;" pgDatabaseSizeQuery = "SELECT pg_database_size($1)" ) -// Update implements Collector and exposes database size. +// Update implements Collector and exposes database size and connection limits. // It is called by the Prometheus registry when collecting metrics. // The list of databases is retrieved from pg_database and filtered // by the excludeDatabase config parameter. The tradeoff here is that @@ -81,21 +91,32 @@ func (c PGDatabaseCollector) Update(ctx context.Context, instance *instance, ch for rows.Next() { var datname sql.NullString - if err := rows.Scan(&datname); err != nil { + var connLimit sql.NullInt64 + if err := rows.Scan(&datname, &connLimit); err != nil { return err } if !datname.Valid { continue } + database := datname.String // Ignore excluded databases // Filtering is done here instead of in the query to avoid // a complicated NOT IN query with a variable number of parameters - if sliceContains(c.excludedDatabases, datname.String) { + if slices.Contains(c.excludedDatabases, database) { continue } - databases = append(databases, datname.String) + databases = append(databases, database) + + connLimitMetric := 0.0 + if connLimit.Valid { + connLimitMetric = float64(connLimit.Int64) + } + ch <- prometheus.MustNewConstMetric( + pgDatabaseConnectionLimitsDesc, + prometheus.GaugeValue, connLimitMetric, database, + ) } // Query the size of the databases @@ -114,15 +135,7 @@ func (c PGDatabaseCollector) Update(ctx context.Context, instance *instance, ch pgDatabaseSizeDesc, prometheus.GaugeValue, sizeMetric, datname, ) - } - return rows.Err() -} -func sliceContains(slice []string, s string) bool { - for _, item := range slice { - if item == s { - return true - } } - return false + return rows.Err() } diff --git a/collector/pg_database_test.go b/collector/pg_database_test.go index b5052c5d1..fe94166e9 100644 --- a/collector/pg_database_test.go +++ b/collector/pg_database_test.go @@ -31,8 +31,8 @@ func TestPGDatabaseCollector(t *testing.T) { inst := &instance{db: db} - mock.ExpectQuery(sanitizeQuery(pgDatabaseQuery)).WillReturnRows(sqlmock.NewRows([]string{"datname"}). - AddRow("postgres")) + mock.ExpectQuery(sanitizeQuery(pgDatabaseQuery)).WillReturnRows(sqlmock.NewRows([]string{"datname", "datconnlimit"}). + AddRow("postgres", 15)) mock.ExpectQuery(sanitizeQuery(pgDatabaseSizeQuery)).WithArgs("postgres").WillReturnRows(sqlmock.NewRows([]string{"pg_database_size"}). AddRow(1024)) @@ -47,6 +47,7 @@ func TestPGDatabaseCollector(t *testing.T) { }() expected := []MetricResult{ + {labels: labelMap{"datname": "postgres"}, value: 15, metricType: dto.MetricType_GAUGE}, {labels: labelMap{"datname": "postgres"}, value: 1024, metricType: dto.MetricType_GAUGE}, } convey.Convey("Metrics comparison", t, func() { @@ -71,8 +72,8 @@ func TestPGDatabaseCollectorNullMetric(t *testing.T) { inst := &instance{db: db} - mock.ExpectQuery(sanitizeQuery(pgDatabaseQuery)).WillReturnRows(sqlmock.NewRows([]string{"datname"}). - AddRow("postgres")) + mock.ExpectQuery(sanitizeQuery(pgDatabaseQuery)).WillReturnRows(sqlmock.NewRows([]string{"datname", "datconnlimit"}). + AddRow("postgres", nil)) mock.ExpectQuery(sanitizeQuery(pgDatabaseSizeQuery)).WithArgs("postgres").WillReturnRows(sqlmock.NewRows([]string{"pg_database_size"}). AddRow(nil)) @@ -88,6 +89,7 @@ func TestPGDatabaseCollectorNullMetric(t *testing.T) { expected := []MetricResult{ {labels: labelMap{"datname": "postgres"}, value: 0, metricType: dto.MetricType_GAUGE}, + {labels: labelMap{"datname": "postgres"}, value: 0, metricType: dto.MetricType_GAUGE}, } convey.Convey("Metrics comparison", t, func() { for _, expect := range expected { diff --git a/collector/pg_database_wraparound.go b/collector/pg_database_wraparound.go index d46270637..d170821b5 100644 --- a/collector/pg_database_wraparound.go +++ b/collector/pg_database_wraparound.go @@ -16,9 +16,8 @@ package collector import ( "context" "database/sql" + "log/slog" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/prometheus/client_golang/prometheus" ) @@ -29,7 +28,7 @@ func init() { } type PGDatabaseWraparoundCollector struct { - log log.Logger + log *slog.Logger } func NewPGDatabaseWraparoundCollector(config collectorConfig) (Collector, error) { @@ -81,15 +80,15 @@ func (c *PGDatabaseWraparoundCollector) Update(ctx context.Context, instance *in } if !datname.Valid { - level.Debug(c.log).Log("msg", "Skipping database with NULL name") + c.log.Debug("Skipping database with NULL name") continue } if !ageDatfrozenxid.Valid { - level.Debug(c.log).Log("msg", "Skipping stat emission with NULL age_datfrozenxid") + c.log.Debug("Skipping stat emission with NULL age_datfrozenxid") continue } if !ageDatminmxid.Valid { - level.Debug(c.log).Log("msg", "Skipping stat emission with NULL age_datminmxid") + c.log.Debug("Skipping stat emission with NULL age_datminmxid") continue } diff --git a/collector/pg_locks.go b/collector/pg_locks.go index d2c77ccd5..add3e6d42 100644 --- a/collector/pg_locks.go +++ b/collector/pg_locks.go @@ -16,8 +16,8 @@ package collector import ( "context" "database/sql" + "log/slog" - "github.com/go-kit/log" "github.com/prometheus/client_golang/prometheus" ) @@ -28,7 +28,7 @@ func init() { } type PGLocksCollector struct { - log log.Logger + log *slog.Logger } func NewPGLocksCollector(config collectorConfig) (Collector, error) { diff --git a/collector/pg_long_running_transactions.go b/collector/pg_long_running_transactions.go index ffd89d5f0..ac1eaa70e 100644 --- a/collector/pg_long_running_transactions.go +++ b/collector/pg_long_running_transactions.go @@ -15,8 +15,9 @@ package collector import ( "context" + "database/sql" + "log/slog" - "github.com/go-kit/log" "github.com/prometheus/client_golang/prometheus" ) @@ -27,7 +28,7 @@ func init() { } type PGLongRunningTransactionsCollector struct { - log log.Logger + log *slog.Logger } func NewPGLongRunningTransactionsCollector(config collectorConfig) (Collector, error) { @@ -51,10 +52,13 @@ var ( longRunningTransactionsQuery = ` SELECT - COUNT(*) as transactions, - MAX(EXTRACT(EPOCH FROM clock_timestamp())) AS oldest_timestamp_seconds - FROM pg_catalog.pg_stat_activity - WHERE state is distinct from 'idle' AND query not like 'autovacuum:%' + COUNT(*) as transactions, + MAX(EXTRACT(EPOCH FROM clock_timestamp() - pg_stat_activity.xact_start)) AS oldest_timestamp_seconds +FROM pg_catalog.pg_stat_activity +WHERE state IS DISTINCT FROM 'idle' +AND query NOT LIKE 'autovacuum:%' +AND pg_stat_activity.xact_start IS NOT NULL +AND pid <> pg_backend_pid(); ` ) @@ -69,12 +73,20 @@ func (PGLongRunningTransactionsCollector) Update(ctx context.Context, instance * defer rows.Close() for rows.Next() { - var transactions, ageInSeconds float64 + var transactions float64 + var ageInSeconds sql.NullFloat64 if err := rows.Scan(&transactions, &ageInSeconds); err != nil { return err } + // If there are no long running transactions, ageInSeconds will be NULL + // so we set it to 0 + age := 0.0 + if ageInSeconds.Valid { + age = ageInSeconds.Float64 + } + ch <- prometheus.MustNewConstMetric( longRunningTransactionsCount, prometheus.GaugeValue, @@ -83,7 +95,7 @@ func (PGLongRunningTransactionsCollector) Update(ctx context.Context, instance * ch <- prometheus.MustNewConstMetric( longRunningTransactionsAgeInSeconds, prometheus.GaugeValue, - ageInSeconds, + age, ) } if err := rows.Err(); err != nil { diff --git a/collector/pg_long_running_transactions_test.go b/collector/pg_long_running_transactions_test.go index eedda7c65..ba7bc6a52 100644 --- a/collector/pg_long_running_transactions_test.go +++ b/collector/pg_long_running_transactions_test.go @@ -61,3 +61,44 @@ func TestPGLongRunningTransactionsCollector(t *testing.T) { t.Errorf("there were unfulfilled exceptions: %s", err) } } + +func TestPGLongRunningTransactionsCollectorNull(t *testing.T) { + // Test when no long running transactions are present + db, mock, err := sqlmock.New() + if err != nil { + t.Fatalf("Error opening a stub db connection: %s", err) + } + defer db.Close() + inst := &instance{db: db} + columns := []string{ + "transactions", + "age_in_seconds", + } + rows := sqlmock.NewRows(columns). + AddRow(0, nil) + + mock.ExpectQuery(sanitizeQuery(longRunningTransactionsQuery)).WillReturnRows(rows) + + ch := make(chan prometheus.Metric) + go func() { + defer close(ch) + c := PGLongRunningTransactionsCollector{} + + if err := c.Update(context.Background(), inst, ch); err != nil { + t.Errorf("Error calling PGLongRunningTransactionsCollector.Update: %s", err) + } + }() + expected := []MetricResult{ + {labels: labelMap{}, value: 0, metricType: dto.MetricType_GAUGE}, + {labels: labelMap{}, value: 0, metricType: dto.MetricType_GAUGE}, + } + convey.Convey("Metrics comparison", t, func() { + for _, expect := range expected { + m := readMetric(<-ch) + convey.So(expect, convey.ShouldResemble, m) + } + }) + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("there were unfulfilled exceptions: %s", err) + } +} diff --git a/collector/pg_process_idle.go b/collector/pg_process_idle.go index c401ab56f..3cb08f114 100644 --- a/collector/pg_process_idle.go +++ b/collector/pg_process_idle.go @@ -16,8 +16,8 @@ package collector import ( "context" "database/sql" + "log/slog" - "github.com/go-kit/log" "github.com/lib/pq" "github.com/prometheus/client_golang/prometheus" ) @@ -28,7 +28,7 @@ func init() { } type PGProcessIdleCollector struct { - log log.Logger + log *slog.Logger } const processIdleSubsystem = "process_idle" @@ -56,6 +56,7 @@ func (PGProcessIdleCollector) Update(ctx context.Context, instance *instance, ch COUNT(*) AS process_idle_seconds_count FROM pg_stat_activity WHERE state ~ '^idle' + AND pid <> pg_backend_pid() GROUP BY state, application_name ), buckets AS ( @@ -72,6 +73,7 @@ func (PGProcessIdleCollector) Update(ctx context.Context, instance *instance, ch FROM pg_stat_activity, UNNEST(ARRAY[1, 2, 5, 15, 30, 60, 90, 120, 300]) AS le + WHERE pid <> pg_backend_pid() GROUP BY state, application_name, le ORDER BY state, application_name, le ) diff --git a/collector/pg_replication.go b/collector/pg_replication.go index 6067cc9b1..7f8b2fbd7 100644 --- a/collector/pg_replication.go +++ b/collector/pg_replication.go @@ -51,6 +51,15 @@ var ( "Indicates if the server is a replica", []string{}, nil, ) + pgReplicationLastReplay = prometheus.NewDesc( + prometheus.BuildFQName( + namespace, + replicationSubsystem, + "last_replay_seconds", + ), + "Age of last replay in seconds", + []string{}, nil, + ) pgReplicationQuery = `SELECT CASE @@ -61,7 +70,8 @@ var ( CASE WHEN pg_is_in_recovery() THEN 1 ELSE 0 - END as is_replica` + END as is_replica, + GREATEST (0, EXTRACT(EPOCH FROM (now() - pg_last_xact_replay_timestamp()))) as last_replay` ) func (c *PGReplicationCollector) Update(ctx context.Context, instance *instance, ch chan<- prometheus.Metric) error { @@ -72,7 +82,8 @@ func (c *PGReplicationCollector) Update(ctx context.Context, instance *instance, var lag float64 var isReplica int64 - err := row.Scan(&lag, &isReplica) + var replayAge float64 + err := row.Scan(&lag, &isReplica, &replayAge) if err != nil { return err } @@ -84,5 +95,9 @@ func (c *PGReplicationCollector) Update(ctx context.Context, instance *instance, pgReplicationIsReplica, prometheus.GaugeValue, float64(isReplica), ) + ch <- prometheus.MustNewConstMetric( + pgReplicationLastReplay, + prometheus.GaugeValue, replayAge, + ) return nil } diff --git a/collector/pg_replication_slot.go b/collector/pg_replication_slot.go index c625fd4d5..e6c9773eb 100644 --- a/collector/pg_replication_slot.go +++ b/collector/pg_replication_slot.go @@ -16,8 +16,9 @@ package collector import ( "context" "database/sql" + "log/slog" - "github.com/go-kit/log" + "github.com/blang/semver/v4" "github.com/prometheus/client_golang/prometheus" ) @@ -28,7 +29,7 @@ func init() { } type PGReplicationSlotCollector struct { - log log.Logger + log *slog.Logger } func NewPGReplicationSlotCollector(config collectorConfig) (Collector, error) { @@ -43,7 +44,7 @@ var ( "slot_current_wal_lsn", ), "current wal lsn value", - []string{"slot_name"}, nil, + []string{"slot_name", "slot_type"}, nil, ) pgReplicationSlotCurrentFlushDesc = prometheus.NewDesc( prometheus.BuildFQName( @@ -52,7 +53,7 @@ var ( "slot_confirmed_flush_lsn", ), "last lsn confirmed flushed to the replication slot", - []string{"slot_name"}, nil, + []string{"slot_name", "slot_type"}, nil, ) pgReplicationSlotIsActiveDesc = prometheus.NewDesc( prometheus.BuildFQName( @@ -61,25 +62,62 @@ var ( "slot_is_active", ), "whether the replication slot is active or not", - []string{"slot_name"}, nil, + []string{"slot_name", "slot_type"}, nil, + ) + pgReplicationSlotSafeWal = prometheus.NewDesc( + prometheus.BuildFQName( + namespace, + replicationSlotSubsystem, + "safe_wal_size_bytes", + ), + "number of bytes that can be written to WAL such that this slot is not in danger of getting in state lost", + []string{"slot_name", "slot_type"}, nil, + ) + pgReplicationSlotWalStatus = prometheus.NewDesc( + prometheus.BuildFQName( + namespace, + replicationSlotSubsystem, + "wal_status", + ), + "availability of WAL files claimed by this slot", + []string{"slot_name", "slot_type", "wal_status"}, nil, ) - pgReplicationSlotQuery = `SELECT slot_name, - CASE WHEN pg_is_in_recovery() THEN + slot_type, + CASE WHEN pg_is_in_recovery() THEN pg_last_wal_receive_lsn() - '0/0' - ELSE - pg_current_wal_lsn() - '0/0' + ELSE + pg_current_wal_lsn() - '0/0' END AS current_wal_lsn, - COALESCE(confirmed_flush_lsn, '0/0') - '0/0', + COALESCE(confirmed_flush_lsn, '0/0') - '0/0' AS confirmed_flush_lsn, active FROM pg_replication_slots;` + pgReplicationSlotNewQuery = `SELECT + slot_name, + slot_type, + CASE WHEN pg_is_in_recovery() THEN + pg_last_wal_receive_lsn() - '0/0' + ELSE + pg_current_wal_lsn() - '0/0' + END AS current_wal_lsn, + COALESCE(confirmed_flush_lsn, '0/0') - '0/0' AS confirmed_flush_lsn, + active, + safe_wal_size, + wal_status + FROM pg_replication_slots;` ) func (PGReplicationSlotCollector) Update(ctx context.Context, instance *instance, ch chan<- prometheus.Metric) error { + query := pgReplicationSlotQuery + abovePG13 := instance.version.GTE(semver.MustParse("13.0.0")) + if abovePG13 { + query = pgReplicationSlotNewQuery + } + db := instance.getDB() rows, err := db.QueryContext(ctx, - pgReplicationSlotQuery) + query) if err != nil { return err } @@ -87,10 +125,28 @@ func (PGReplicationSlotCollector) Update(ctx context.Context, instance *instance for rows.Next() { var slotName sql.NullString + var slotType sql.NullString var walLSN sql.NullFloat64 var flushLSN sql.NullFloat64 var isActive sql.NullBool - if err := rows.Scan(&slotName, &walLSN, &flushLSN, &isActive); err != nil { + var safeWalSize sql.NullInt64 + var walStatus sql.NullString + + r := []any{ + &slotName, + &slotType, + &walLSN, + &flushLSN, + &isActive, + } + + if abovePG13 { + r = append(r, &safeWalSize) + r = append(r, &walStatus) + } + + err := rows.Scan(r...) + if err != nil { return err } @@ -102,6 +158,10 @@ func (PGReplicationSlotCollector) Update(ctx context.Context, instance *instance if slotName.Valid { slotNameLabel = slotName.String } + slotTypeLabel := "unknown" + if slotType.Valid { + slotTypeLabel = slotType.String + } var walLSNMetric float64 if walLSN.Valid { @@ -109,7 +169,7 @@ func (PGReplicationSlotCollector) Update(ctx context.Context, instance *instance } ch <- prometheus.MustNewConstMetric( pgReplicationSlotCurrentWalDesc, - prometheus.GaugeValue, walLSNMetric, slotNameLabel, + prometheus.GaugeValue, walLSNMetric, slotNameLabel, slotTypeLabel, ) if isActive.Valid && isActive.Bool { var flushLSNMetric float64 @@ -118,13 +178,27 @@ func (PGReplicationSlotCollector) Update(ctx context.Context, instance *instance } ch <- prometheus.MustNewConstMetric( pgReplicationSlotCurrentFlushDesc, - prometheus.GaugeValue, flushLSNMetric, slotNameLabel, + prometheus.GaugeValue, flushLSNMetric, slotNameLabel, slotTypeLabel, ) } ch <- prometheus.MustNewConstMetric( pgReplicationSlotIsActiveDesc, - prometheus.GaugeValue, isActiveValue, slotNameLabel, + prometheus.GaugeValue, isActiveValue, slotNameLabel, slotTypeLabel, ) + + if safeWalSize.Valid { + ch <- prometheus.MustNewConstMetric( + pgReplicationSlotSafeWal, + prometheus.GaugeValue, float64(safeWalSize.Int64), slotNameLabel, slotTypeLabel, + ) + } + + if walStatus.Valid { + ch <- prometheus.MustNewConstMetric( + pgReplicationSlotWalStatus, + prometheus.GaugeValue, 1, slotNameLabel, slotTypeLabel, walStatus.String, + ) + } } return rows.Err() } diff --git a/collector/pg_replication_slot_test.go b/collector/pg_replication_slot_test.go index 7e91ea261..981b5db62 100644 --- a/collector/pg_replication_slot_test.go +++ b/collector/pg_replication_slot_test.go @@ -17,6 +17,7 @@ import ( "testing" "github.com/DATA-DOG/go-sqlmock" + "github.com/blang/semver/v4" "github.com/prometheus/client_golang/prometheus" dto "github.com/prometheus/client_model/go" "github.com/smartystreets/goconvey/convey" @@ -29,12 +30,12 @@ func TestPgReplicationSlotCollectorActive(t *testing.T) { } defer db.Close() - inst := &instance{db: db} + inst := &instance{db: db, version: semver.MustParse("13.3.7")} - columns := []string{"slot_name", "current_wal_lsn", "confirmed_flush_lsn", "active"} + columns := []string{"slot_name", "slot_type", "current_wal_lsn", "confirmed_flush_lsn", "active", "safe_wal_size", "wal_status"} rows := sqlmock.NewRows(columns). - AddRow("test_slot", 5, 3, true) - mock.ExpectQuery(sanitizeQuery(pgReplicationSlotQuery)).WillReturnRows(rows) + AddRow("test_slot", "physical", 5, 3, true, 323906992, "reserved") + mock.ExpectQuery(sanitizeQuery(pgReplicationSlotNewQuery)).WillReturnRows(rows) ch := make(chan prometheus.Metric) go func() { @@ -47,9 +48,11 @@ func TestPgReplicationSlotCollectorActive(t *testing.T) { }() expected := []MetricResult{ - {labels: labelMap{"slot_name": "test_slot"}, value: 5, metricType: dto.MetricType_GAUGE}, - {labels: labelMap{"slot_name": "test_slot"}, value: 3, metricType: dto.MetricType_GAUGE}, - {labels: labelMap{"slot_name": "test_slot"}, value: 1, metricType: dto.MetricType_GAUGE}, + {labels: labelMap{"slot_name": "test_slot", "slot_type": "physical"}, value: 5, metricType: dto.MetricType_GAUGE}, + {labels: labelMap{"slot_name": "test_slot", "slot_type": "physical"}, value: 3, metricType: dto.MetricType_GAUGE}, + {labels: labelMap{"slot_name": "test_slot", "slot_type": "physical"}, value: 1, metricType: dto.MetricType_GAUGE}, + {labels: labelMap{"slot_name": "test_slot", "slot_type": "physical"}, value: 323906992, metricType: dto.MetricType_GAUGE}, + {labels: labelMap{"slot_name": "test_slot", "slot_type": "physical", "wal_status": "reserved"}, value: 1, metricType: dto.MetricType_GAUGE}, } convey.Convey("Metrics comparison", t, func() { @@ -70,12 +73,12 @@ func TestPgReplicationSlotCollectorInActive(t *testing.T) { } defer db.Close() - inst := &instance{db: db} + inst := &instance{db: db, version: semver.MustParse("13.3.7")} - columns := []string{"slot_name", "current_wal_lsn", "confirmed_flush_lsn", "active"} + columns := []string{"slot_name", "slot_type", "current_wal_lsn", "confirmed_flush_lsn", "active", "safe_wal_size", "wal_status"} rows := sqlmock.NewRows(columns). - AddRow("test_slot", 6, 12, false) - mock.ExpectQuery(sanitizeQuery(pgReplicationSlotQuery)).WillReturnRows(rows) + AddRow("test_slot", "physical", 6, 12, false, -4000, "extended") + mock.ExpectQuery(sanitizeQuery(pgReplicationSlotNewQuery)).WillReturnRows(rows) ch := make(chan prometheus.Metric) go func() { @@ -88,8 +91,10 @@ func TestPgReplicationSlotCollectorInActive(t *testing.T) { }() expected := []MetricResult{ - {labels: labelMap{"slot_name": "test_slot"}, value: 6, metricType: dto.MetricType_GAUGE}, - {labels: labelMap{"slot_name": "test_slot"}, value: 0, metricType: dto.MetricType_GAUGE}, + {labels: labelMap{"slot_name": "test_slot", "slot_type": "physical"}, value: 6, metricType: dto.MetricType_GAUGE}, + {labels: labelMap{"slot_name": "test_slot", "slot_type": "physical"}, value: 0, metricType: dto.MetricType_GAUGE}, + {labels: labelMap{"slot_name": "test_slot", "slot_type": "physical"}, value: -4000, metricType: dto.MetricType_GAUGE}, + {labels: labelMap{"slot_name": "test_slot", "slot_type": "physical", "wal_status": "extended"}, value: 1, metricType: dto.MetricType_GAUGE}, } convey.Convey("Metrics comparison", t, func() { @@ -111,12 +116,12 @@ func TestPgReplicationSlotCollectorActiveNil(t *testing.T) { } defer db.Close() - inst := &instance{db: db} + inst := &instance{db: db, version: semver.MustParse("13.3.7")} - columns := []string{"slot_name", "current_wal_lsn", "confirmed_flush_lsn", "active"} + columns := []string{"slot_name", "slot_type", "current_wal_lsn", "confirmed_flush_lsn", "active", "safe_wal_size", "wal_status"} rows := sqlmock.NewRows(columns). - AddRow("test_slot", 6, 12, nil) - mock.ExpectQuery(sanitizeQuery(pgReplicationSlotQuery)).WillReturnRows(rows) + AddRow("test_slot", "physical", 6, 12, nil, nil, "lost") + mock.ExpectQuery(sanitizeQuery(pgReplicationSlotNewQuery)).WillReturnRows(rows) ch := make(chan prometheus.Metric) go func() { @@ -129,8 +134,9 @@ func TestPgReplicationSlotCollectorActiveNil(t *testing.T) { }() expected := []MetricResult{ - {labels: labelMap{"slot_name": "test_slot"}, value: 6, metricType: dto.MetricType_GAUGE}, - {labels: labelMap{"slot_name": "test_slot"}, value: 0, metricType: dto.MetricType_GAUGE}, + {labels: labelMap{"slot_name": "test_slot", "slot_type": "physical"}, value: 6, metricType: dto.MetricType_GAUGE}, + {labels: labelMap{"slot_name": "test_slot", "slot_type": "physical"}, value: 0, metricType: dto.MetricType_GAUGE}, + {labels: labelMap{"slot_name": "test_slot", "slot_type": "physical", "wal_status": "lost"}, value: 1, metricType: dto.MetricType_GAUGE}, } convey.Convey("Metrics comparison", t, func() { @@ -151,12 +157,12 @@ func TestPgReplicationSlotCollectorTestNilValues(t *testing.T) { } defer db.Close() - inst := &instance{db: db} + inst := &instance{db: db, version: semver.MustParse("13.3.7")} - columns := []string{"slot_name", "current_wal_lsn", "confirmed_flush_lsn", "active"} + columns := []string{"slot_name", "slot_type", "current_wal_lsn", "confirmed_flush_lsn", "active", "safe_wal_size", "wal_status"} rows := sqlmock.NewRows(columns). - AddRow(nil, nil, nil, true) - mock.ExpectQuery(sanitizeQuery(pgReplicationSlotQuery)).WillReturnRows(rows) + AddRow(nil, nil, nil, nil, true, nil, nil) + mock.ExpectQuery(sanitizeQuery(pgReplicationSlotNewQuery)).WillReturnRows(rows) ch := make(chan prometheus.Metric) go func() { @@ -169,9 +175,9 @@ func TestPgReplicationSlotCollectorTestNilValues(t *testing.T) { }() expected := []MetricResult{ - {labels: labelMap{"slot_name": "unknown"}, value: 0, metricType: dto.MetricType_GAUGE}, - {labels: labelMap{"slot_name": "unknown"}, value: 0, metricType: dto.MetricType_GAUGE}, - {labels: labelMap{"slot_name": "unknown"}, value: 1, metricType: dto.MetricType_GAUGE}, + {labels: labelMap{"slot_name": "unknown", "slot_type": "unknown"}, value: 0, metricType: dto.MetricType_GAUGE}, + {labels: labelMap{"slot_name": "unknown", "slot_type": "unknown"}, value: 0, metricType: dto.MetricType_GAUGE}, + {labels: labelMap{"slot_name": "unknown", "slot_type": "unknown"}, value: 1, metricType: dto.MetricType_GAUGE}, } convey.Convey("Metrics comparison", t, func() { diff --git a/collector/pg_replication_test.go b/collector/pg_replication_test.go index b6df698e3..a48e9fd69 100644 --- a/collector/pg_replication_test.go +++ b/collector/pg_replication_test.go @@ -31,9 +31,9 @@ func TestPgReplicationCollector(t *testing.T) { inst := &instance{db: db} - columns := []string{"lag", "is_replica"} + columns := []string{"lag", "is_replica", "last_replay"} rows := sqlmock.NewRows(columns). - AddRow(1000, 1) + AddRow(1000, 1, 3) mock.ExpectQuery(sanitizeQuery(pgReplicationQuery)).WillReturnRows(rows) ch := make(chan prometheus.Metric) @@ -49,6 +49,7 @@ func TestPgReplicationCollector(t *testing.T) { expected := []MetricResult{ {labels: labelMap{}, value: 1000, metricType: dto.MetricType_GAUGE}, {labels: labelMap{}, value: 1, metricType: dto.MetricType_GAUGE}, + {labels: labelMap{}, value: 3, metricType: dto.MetricType_GAUGE}, } convey.Convey("Metrics comparison", t, func() { diff --git a/collector/pg_roles.go b/collector/pg_roles.go new file mode 100644 index 000000000..626dbb44f --- /dev/null +++ b/collector/pg_roles.go @@ -0,0 +1,91 @@ +// Copyright 2024 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package collector + +import ( + "context" + "database/sql" + "log/slog" + + "github.com/prometheus/client_golang/prometheus" +) + +const rolesSubsystem = "roles" + +func init() { + registerCollector(rolesSubsystem, defaultEnabled, NewPGRolesCollector) +} + +type PGRolesCollector struct { + log *slog.Logger +} + +func NewPGRolesCollector(config collectorConfig) (Collector, error) { + return &PGRolesCollector{ + log: config.logger, + }, nil +} + +var ( + pgRolesConnectionLimitsDesc = prometheus.NewDesc( + prometheus.BuildFQName( + namespace, + rolesSubsystem, + "connection_limit", + ), + "Connection limit set for the role", + []string{"rolname"}, nil, + ) + + pgRolesConnectionLimitsQuery = "SELECT pg_roles.rolname, pg_roles.rolconnlimit FROM pg_roles" +) + +// Update implements Collector and exposes roles connection limits. +// It is called by the Prometheus registry when collecting metrics. +func (c PGRolesCollector) Update(ctx context.Context, instance *instance, ch chan<- prometheus.Metric) error { + db := instance.getDB() + // Query the list of databases + rows, err := db.QueryContext(ctx, + pgRolesConnectionLimitsQuery, + ) + if err != nil { + return err + } + defer rows.Close() + + for rows.Next() { + var rolname sql.NullString + var connLimit sql.NullInt64 + if err := rows.Scan(&rolname, &connLimit); err != nil { + return err + } + + if !rolname.Valid { + continue + } + rolnameLabel := rolname.String + + if !connLimit.Valid { + continue + } + connLimitMetric := float64(connLimit.Int64) + + ch <- prometheus.MustNewConstMetric( + pgRolesConnectionLimitsDesc, + prometheus.GaugeValue, connLimitMetric, rolnameLabel, + ) + } + + return rows.Err() +} diff --git a/collector/pg_roles_test.go b/collector/pg_roles_test.go new file mode 100644 index 000000000..182a120f9 --- /dev/null +++ b/collector/pg_roles_test.go @@ -0,0 +1,58 @@ +// Copyright 2023 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +package collector + +import ( + "context" + "testing" + + "github.com/DATA-DOG/go-sqlmock" + "github.com/prometheus/client_golang/prometheus" + dto "github.com/prometheus/client_model/go" + "github.com/smartystreets/goconvey/convey" +) + +func TestPGRolesCollector(t *testing.T) { + db, mock, err := sqlmock.New() + if err != nil { + t.Fatalf("Error opening a stub db connection: %s", err) + } + defer db.Close() + + inst := &instance{db: db} + + mock.ExpectQuery(sanitizeQuery(pgRolesConnectionLimitsQuery)).WillReturnRows(sqlmock.NewRows([]string{"rolname", "rolconnlimit"}). + AddRow("postgres", 15)) + + ch := make(chan prometheus.Metric) + go func() { + defer close(ch) + c := PGRolesCollector{} + if err := c.Update(context.Background(), inst, ch); err != nil { + t.Errorf("Error calling PGRolesCollector.Update: %s", err) + } + }() + + expected := []MetricResult{ + {labels: labelMap{"rolname": "postgres"}, value: 15, metricType: dto.MetricType_GAUGE}, + } + convey.Convey("Metrics comparison", t, func() { + for _, expect := range expected { + m := readMetric(<-ch) + convey.So(expect, convey.ShouldResemble, m) + } + }) + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("there were unfulfilled exceptions: %s", err) + } +} diff --git a/collector/pg_stat_activity_autovacuum.go b/collector/pg_stat_activity_autovacuum.go index 6cf8cdcec..f08029d18 100644 --- a/collector/pg_stat_activity_autovacuum.go +++ b/collector/pg_stat_activity_autovacuum.go @@ -15,8 +15,8 @@ package collector import ( "context" + "log/slog" - "github.com/go-kit/log" "github.com/prometheus/client_golang/prometheus" ) @@ -27,7 +27,7 @@ func init() { } type PGStatActivityAutovacuumCollector struct { - log log.Logger + log *slog.Logger } func NewPGStatActivityAutovacuumCollector(config collectorConfig) (Collector, error) { diff --git a/collector/pg_stat_bgwriter.go b/collector/pg_stat_bgwriter.go index ec446d58c..6e3bd09cb 100644 --- a/collector/pg_stat_bgwriter.go +++ b/collector/pg_stat_bgwriter.go @@ -17,6 +17,7 @@ import ( "context" "database/sql" + "github.com/blang/semver/v4" "github.com/prometheus/client_golang/prometheus" ) @@ -101,7 +102,7 @@ var ( prometheus.Labels{}, ) - statBGWriterQuery = `SELECT + statBGWriterQueryBefore17 = `SELECT checkpoints_timed ,checkpoints_req ,checkpoint_write_time @@ -114,121 +115,177 @@ var ( ,buffers_alloc ,stats_reset FROM pg_stat_bgwriter;` + + statBGWriterQueryAfter17 = `SELECT + buffers_clean + ,maxwritten_clean + ,buffers_alloc + ,stats_reset + FROM pg_stat_bgwriter;` ) func (PGStatBGWriterCollector) Update(ctx context.Context, instance *instance, ch chan<- prometheus.Metric) error { - db := instance.getDB() - row := db.QueryRowContext(ctx, - statBGWriterQuery) + if instance.version.GE(semver.MustParse("17.0.0")) { + db := instance.getDB() + row := db.QueryRowContext(ctx, statBGWriterQueryAfter17) - var cpt, cpr, bcp, bc, mwc, bb, bbf, ba sql.NullInt64 - var cpwt, cpst sql.NullFloat64 - var sr sql.NullTime + var bc, mwc, ba sql.NullInt64 + var sr sql.NullTime - err := row.Scan(&cpt, &cpr, &cpwt, &cpst, &bcp, &bc, &mwc, &bb, &bbf, &ba, &sr) - if err != nil { - return err - } + err := row.Scan(&bc, &mwc, &ba, &sr) + if err != nil { + return err + } - cptMetric := 0.0 - if cpt.Valid { - cptMetric = float64(cpt.Int64) - } - ch <- prometheus.MustNewConstMetric( - statBGWriterCheckpointsTimedDesc, - prometheus.CounterValue, - cptMetric, - ) - cprMetric := 0.0 - if cpr.Valid { - cprMetric = float64(cpr.Int64) - } - ch <- prometheus.MustNewConstMetric( - statBGWriterCheckpointsReqDesc, - prometheus.CounterValue, - cprMetric, - ) - cpwtMetric := 0.0 - if cpwt.Valid { - cpwtMetric = float64(cpwt.Float64) - } - ch <- prometheus.MustNewConstMetric( - statBGWriterCheckpointsReqTimeDesc, - prometheus.CounterValue, - cpwtMetric, - ) - cpstMetric := 0.0 - if cpst.Valid { - cpstMetric = float64(cpst.Float64) - } - ch <- prometheus.MustNewConstMetric( - statBGWriterCheckpointsSyncTimeDesc, - prometheus.CounterValue, - cpstMetric, - ) - bcpMetric := 0.0 - if bcp.Valid { - bcpMetric = float64(bcp.Int64) - } - ch <- prometheus.MustNewConstMetric( - statBGWriterBuffersCheckpointDesc, - prometheus.CounterValue, - bcpMetric, - ) - bcMetric := 0.0 - if bc.Valid { - bcMetric = float64(bc.Int64) - } - ch <- prometheus.MustNewConstMetric( - statBGWriterBuffersCleanDesc, - prometheus.CounterValue, - bcMetric, - ) - mwcMetric := 0.0 - if mwc.Valid { - mwcMetric = float64(mwc.Int64) - } - ch <- prometheus.MustNewConstMetric( - statBGWriterMaxwrittenCleanDesc, - prometheus.CounterValue, - mwcMetric, - ) - bbMetric := 0.0 - if bb.Valid { - bbMetric = float64(bb.Int64) - } - ch <- prometheus.MustNewConstMetric( - statBGWriterBuffersBackendDesc, - prometheus.CounterValue, - bbMetric, - ) - bbfMetric := 0.0 - if bbf.Valid { - bbfMetric = float64(bbf.Int64) - } - ch <- prometheus.MustNewConstMetric( - statBGWriterBuffersBackendFsyncDesc, - prometheus.CounterValue, - bbfMetric, - ) - baMetric := 0.0 - if ba.Valid { - baMetric = float64(ba.Int64) - } - ch <- prometheus.MustNewConstMetric( - statBGWriterBuffersAllocDesc, - prometheus.CounterValue, - baMetric, - ) - srMetric := 0.0 - if sr.Valid { - srMetric = float64(sr.Time.Unix()) + bcMetric := 0.0 + if bc.Valid { + bcMetric = float64(bc.Int64) + } + ch <- prometheus.MustNewConstMetric( + statBGWriterBuffersCleanDesc, + prometheus.CounterValue, + bcMetric, + ) + mwcMetric := 0.0 + if mwc.Valid { + mwcMetric = float64(mwc.Int64) + } + ch <- prometheus.MustNewConstMetric( + statBGWriterMaxwrittenCleanDesc, + prometheus.CounterValue, + mwcMetric, + ) + baMetric := 0.0 + if ba.Valid { + baMetric = float64(ba.Int64) + } + ch <- prometheus.MustNewConstMetric( + statBGWriterBuffersAllocDesc, + prometheus.CounterValue, + baMetric, + ) + srMetric := 0.0 + if sr.Valid { + srMetric = float64(sr.Time.Unix()) + } + ch <- prometheus.MustNewConstMetric( + statBGWriterStatsResetDesc, + prometheus.CounterValue, + srMetric, + ) + } else { + db := instance.getDB() + row := db.QueryRowContext(ctx, statBGWriterQueryBefore17) + + var cpt, cpr, bcp, bc, mwc, bb, bbf, ba sql.NullInt64 + var cpwt, cpst sql.NullFloat64 + var sr sql.NullTime + + err := row.Scan(&cpt, &cpr, &cpwt, &cpst, &bcp, &bc, &mwc, &bb, &bbf, &ba, &sr) + if err != nil { + return err + } + + cptMetric := 0.0 + if cpt.Valid { + cptMetric = float64(cpt.Int64) + } + ch <- prometheus.MustNewConstMetric( + statBGWriterCheckpointsTimedDesc, + prometheus.CounterValue, + cptMetric, + ) + cprMetric := 0.0 + if cpr.Valid { + cprMetric = float64(cpr.Int64) + } + ch <- prometheus.MustNewConstMetric( + statBGWriterCheckpointsReqDesc, + prometheus.CounterValue, + cprMetric, + ) + cpwtMetric := 0.0 + if cpwt.Valid { + cpwtMetric = float64(cpwt.Float64) + } + ch <- prometheus.MustNewConstMetric( + statBGWriterCheckpointsReqTimeDesc, + prometheus.CounterValue, + cpwtMetric, + ) + cpstMetric := 0.0 + if cpst.Valid { + cpstMetric = float64(cpst.Float64) + } + ch <- prometheus.MustNewConstMetric( + statBGWriterCheckpointsSyncTimeDesc, + prometheus.CounterValue, + cpstMetric, + ) + bcpMetric := 0.0 + if bcp.Valid { + bcpMetric = float64(bcp.Int64) + } + ch <- prometheus.MustNewConstMetric( + statBGWriterBuffersCheckpointDesc, + prometheus.CounterValue, + bcpMetric, + ) + bcMetric := 0.0 + if bc.Valid { + bcMetric = float64(bc.Int64) + } + ch <- prometheus.MustNewConstMetric( + statBGWriterBuffersCleanDesc, + prometheus.CounterValue, + bcMetric, + ) + mwcMetric := 0.0 + if mwc.Valid { + mwcMetric = float64(mwc.Int64) + } + ch <- prometheus.MustNewConstMetric( + statBGWriterMaxwrittenCleanDesc, + prometheus.CounterValue, + mwcMetric, + ) + bbMetric := 0.0 + if bb.Valid { + bbMetric = float64(bb.Int64) + } + ch <- prometheus.MustNewConstMetric( + statBGWriterBuffersBackendDesc, + prometheus.CounterValue, + bbMetric, + ) + bbfMetric := 0.0 + if bbf.Valid { + bbfMetric = float64(bbf.Int64) + } + ch <- prometheus.MustNewConstMetric( + statBGWriterBuffersBackendFsyncDesc, + prometheus.CounterValue, + bbfMetric, + ) + baMetric := 0.0 + if ba.Valid { + baMetric = float64(ba.Int64) + } + ch <- prometheus.MustNewConstMetric( + statBGWriterBuffersAllocDesc, + prometheus.CounterValue, + baMetric, + ) + srMetric := 0.0 + if sr.Valid { + srMetric = float64(sr.Time.Unix()) + } + ch <- prometheus.MustNewConstMetric( + statBGWriterStatsResetDesc, + prometheus.CounterValue, + srMetric, + ) } - ch <- prometheus.MustNewConstMetric( - statBGWriterStatsResetDesc, - prometheus.CounterValue, - srMetric, - ) return nil } diff --git a/collector/pg_stat_bgwriter_test.go b/collector/pg_stat_bgwriter_test.go index 1c2cf98de..6fde2fb6a 100644 --- a/collector/pg_stat_bgwriter_test.go +++ b/collector/pg_stat_bgwriter_test.go @@ -52,7 +52,7 @@ func TestPGStatBGWriterCollector(t *testing.T) { rows := sqlmock.NewRows(columns). AddRow(354, 4945, 289097744, 1242257, int64(3275602074), 89320867, 450139, 2034563757, 0, int64(2725688749), srT) - mock.ExpectQuery(sanitizeQuery(statBGWriterQuery)).WillReturnRows(rows) + mock.ExpectQuery(sanitizeQuery(statBGWriterQueryBefore17)).WillReturnRows(rows) ch := make(chan prometheus.Metric) go func() { @@ -113,7 +113,7 @@ func TestPGStatBGWriterCollectorNullValues(t *testing.T) { rows := sqlmock.NewRows(columns). AddRow(nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil) - mock.ExpectQuery(sanitizeQuery(statBGWriterQuery)).WillReturnRows(rows) + mock.ExpectQuery(sanitizeQuery(statBGWriterQueryBefore17)).WillReturnRows(rows) ch := make(chan prometheus.Metric) go func() { diff --git a/collector/pg_stat_checkpointer.go b/collector/pg_stat_checkpointer.go new file mode 100644 index 000000000..31e9c5d62 --- /dev/null +++ b/collector/pg_stat_checkpointer.go @@ -0,0 +1,231 @@ +// Copyright 2024 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package collector + +import ( + "context" + "database/sql" + "log/slog" + + "github.com/blang/semver/v4" + "github.com/prometheus/client_golang/prometheus" +) + +const statCheckpointerSubsystem = "stat_checkpointer" + +func init() { + // WARNING: + // Disabled by default because this set of metrics is only available from Postgres 17 + registerCollector(statCheckpointerSubsystem, defaultDisabled, NewPGStatCheckpointerCollector) +} + +type PGStatCheckpointerCollector struct { + log *slog.Logger +} + +func NewPGStatCheckpointerCollector(config collectorConfig) (Collector, error) { + return &PGStatCheckpointerCollector{log: config.logger}, nil +} + +var ( + statCheckpointerNumTimedDesc = prometheus.NewDesc( + prometheus.BuildFQName(namespace, statCheckpointerSubsystem, "num_timed_total"), + "Number of scheduled checkpoints due to timeout", + []string{}, + prometheus.Labels{}, + ) + statCheckpointerNumRequestedDesc = prometheus.NewDesc( + prometheus.BuildFQName(namespace, statCheckpointerSubsystem, "num_requested_total"), + "Number of requested checkpoints that have been performed", + []string{}, + prometheus.Labels{}, + ) + statCheckpointerRestartpointsTimedDesc = prometheus.NewDesc( + prometheus.BuildFQName(namespace, statCheckpointerSubsystem, "restartpoints_timed_total"), + "Number of scheduled restartpoints due to timeout or after a failed attempt to perform it", + []string{}, + prometheus.Labels{}, + ) + statCheckpointerRestartpointsReqDesc = prometheus.NewDesc( + prometheus.BuildFQName(namespace, statCheckpointerSubsystem, "restartpoints_req_total"), + "Number of requested restartpoints", + []string{}, + prometheus.Labels{}, + ) + statCheckpointerRestartpointsDoneDesc = prometheus.NewDesc( + prometheus.BuildFQName(namespace, statCheckpointerSubsystem, "restartpoints_done_total"), + "Number of restartpoints that have been performed", + []string{}, + prometheus.Labels{}, + ) + statCheckpointerWriteTimeDesc = prometheus.NewDesc( + prometheus.BuildFQName(namespace, statCheckpointerSubsystem, "write_time_total"), + "Total amount of time that has been spent in the portion of processing checkpoints and restartpoints where files are written to disk, in milliseconds", + []string{}, + prometheus.Labels{}, + ) + statCheckpointerSyncTimeDesc = prometheus.NewDesc( + prometheus.BuildFQName(namespace, statCheckpointerSubsystem, "sync_time_total"), + "Total amount of time that has been spent in the portion of processing checkpoints and restartpoints where files are synchronized to disk, in milliseconds", + []string{}, + prometheus.Labels{}, + ) + statCheckpointerBuffersWrittenDesc = prometheus.NewDesc( + prometheus.BuildFQName(namespace, statCheckpointerSubsystem, "buffers_written_total"), + "Number of buffers written during checkpoints and restartpoints", + []string{}, + prometheus.Labels{}, + ) + statCheckpointerStatsResetDesc = prometheus.NewDesc( + prometheus.BuildFQName(namespace, statCheckpointerSubsystem, "stats_reset_total"), + "Time at which these statistics were last reset", + []string{}, + prometheus.Labels{}, + ) + + statCheckpointerQuery = `SELECT + num_timed + ,num_requested + ,restartpoints_timed + ,restartpoints_req + ,restartpoints_done + ,write_time + ,sync_time + ,buffers_written + ,stats_reset + FROM pg_stat_checkpointer;` +) + +func (c PGStatCheckpointerCollector) Update(ctx context.Context, instance *instance, ch chan<- prometheus.Metric) error { + db := instance.getDB() + + before17 := instance.version.LT(semver.MustParse("17.0.0")) + if before17 { + c.log.Warn("pg_stat_checkpointer collector is not available on PostgreSQL < 17.0.0, skipping") + return nil + } + + row := db.QueryRowContext(ctx, statCheckpointerQuery) + + // num_timed = nt = bigint + // num_requested = nr = bigint + // restartpoints_timed = rpt = bigint + // restartpoints_req = rpr = bigint + // restartpoints_done = rpd = bigint + // write_time = wt = double precision + // sync_time = st = double precision + // buffers_written = bw = bigint + // stats_reset = sr = timestamp + + var nt, nr, rpt, rpr, rpd, bw sql.NullInt64 + var wt, st sql.NullFloat64 + var sr sql.NullTime + + err := row.Scan(&nt, &nr, &rpt, &rpr, &rpd, &wt, &st, &bw, &sr) + if err != nil { + return err + } + + ntMetric := 0.0 + if nt.Valid { + ntMetric = float64(nt.Int64) + } + ch <- prometheus.MustNewConstMetric( + statCheckpointerNumTimedDesc, + prometheus.CounterValue, + ntMetric, + ) + + nrMetric := 0.0 + if nr.Valid { + nrMetric = float64(nr.Int64) + } + ch <- prometheus.MustNewConstMetric( + statCheckpointerNumRequestedDesc, + prometheus.CounterValue, + nrMetric, + ) + + rptMetric := 0.0 + if rpt.Valid { + rptMetric = float64(rpt.Int64) + } + ch <- prometheus.MustNewConstMetric( + statCheckpointerRestartpointsTimedDesc, + prometheus.CounterValue, + rptMetric, + ) + + rprMetric := 0.0 + if rpr.Valid { + rprMetric = float64(rpr.Int64) + } + ch <- prometheus.MustNewConstMetric( + statCheckpointerRestartpointsReqDesc, + prometheus.CounterValue, + rprMetric, + ) + + rpdMetric := 0.0 + if rpd.Valid { + rpdMetric = float64(rpd.Int64) + } + ch <- prometheus.MustNewConstMetric( + statCheckpointerRestartpointsDoneDesc, + prometheus.CounterValue, + rpdMetric, + ) + + wtMetric := 0.0 + if wt.Valid { + wtMetric = float64(wt.Float64) + } + ch <- prometheus.MustNewConstMetric( + statCheckpointerWriteTimeDesc, + prometheus.CounterValue, + wtMetric, + ) + + stMetric := 0.0 + if st.Valid { + stMetric = float64(st.Float64) + } + ch <- prometheus.MustNewConstMetric( + statCheckpointerSyncTimeDesc, + prometheus.CounterValue, + stMetric, + ) + + bwMetric := 0.0 + if bw.Valid { + bwMetric = float64(bw.Int64) + } + ch <- prometheus.MustNewConstMetric( + statCheckpointerBuffersWrittenDesc, + prometheus.CounterValue, + bwMetric, + ) + + srMetric := 0.0 + if sr.Valid { + srMetric = float64(sr.Time.Unix()) + } + ch <- prometheus.MustNewConstMetric( + statCheckpointerStatsResetDesc, + prometheus.CounterValue, + srMetric, + ) + + return nil +} diff --git a/collector/pg_stat_checkpointer_test.go b/collector/pg_stat_checkpointer_test.go new file mode 100644 index 000000000..9a8dd7f21 --- /dev/null +++ b/collector/pg_stat_checkpointer_test.go @@ -0,0 +1,144 @@ +// Copyright 2024 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +package collector + +import ( + "context" + "testing" + "time" + + "github.com/DATA-DOG/go-sqlmock" + "github.com/blang/semver/v4" + "github.com/prometheus/client_golang/prometheus" + dto "github.com/prometheus/client_model/go" + "github.com/smartystreets/goconvey/convey" +) + +func TestPGStatCheckpointerCollector(t *testing.T) { + db, mock, err := sqlmock.New() + if err != nil { + t.Fatalf("Error opening a stub db connection: %s", err) + } + defer db.Close() + + inst := &instance{db: db, version: semver.MustParse("17.0.0")} + + columns := []string{ + "num_timed", + "num_requested", + "restartpoints_timed", + "restartpoints_req", + "restartpoints_done", + "write_time", + "sync_time", + "buffers_written", + "stats_reset"} + + srT, err := time.Parse("2006-01-02 15:04:05.00000-07", "2023-05-25 17:10:42.81132-07") + if err != nil { + t.Fatalf("Error parsing time: %s", err) + } + + rows := sqlmock.NewRows(columns). + AddRow(354, 4945, 289097744, 1242257, int64(3275602074), 89320867, 450139, 2034563757, srT) + mock.ExpectQuery(sanitizeQuery(statCheckpointerQuery)).WillReturnRows(rows) + + ch := make(chan prometheus.Metric) + go func() { + defer close(ch) + c := PGStatCheckpointerCollector{} + + if err := c.Update(context.Background(), inst, ch); err != nil { + t.Errorf("Error calling PGStatCheckpointerCollector.Update: %s", err) + } + }() + + expected := []MetricResult{ + {labels: labelMap{}, metricType: dto.MetricType_COUNTER, value: 354}, + {labels: labelMap{}, metricType: dto.MetricType_COUNTER, value: 4945}, + {labels: labelMap{}, metricType: dto.MetricType_COUNTER, value: 289097744}, + {labels: labelMap{}, metricType: dto.MetricType_COUNTER, value: 1242257}, + {labels: labelMap{}, metricType: dto.MetricType_COUNTER, value: 3275602074}, + {labels: labelMap{}, metricType: dto.MetricType_COUNTER, value: 89320867}, + {labels: labelMap{}, metricType: dto.MetricType_COUNTER, value: 450139}, + {labels: labelMap{}, metricType: dto.MetricType_COUNTER, value: 2034563757}, + {labels: labelMap{}, metricType: dto.MetricType_COUNTER, value: 1685059842}, + } + + convey.Convey("Metrics comparison", t, func() { + for _, expect := range expected { + m := readMetric(<-ch) + convey.So(expect, convey.ShouldResemble, m) + } + }) + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("there were unfulfilled exceptions: %s", err) + } +} + +func TestPGStatCheckpointerCollectorNullValues(t *testing.T) { + db, mock, err := sqlmock.New() + if err != nil { + t.Fatalf("Error opening a stub db connection: %s", err) + } + defer db.Close() + + inst := &instance{db: db, version: semver.MustParse("17.0.0")} + + columns := []string{ + "num_timed", + "num_requested", + "restartpoints_timed", + "restartpoints_req", + "restartpoints_done", + "write_time", + "sync_time", + "buffers_written", + "stats_reset"} + + rows := sqlmock.NewRows(columns). + AddRow(nil, nil, nil, nil, nil, nil, nil, nil, nil) + mock.ExpectQuery(sanitizeQuery(statCheckpointerQuery)).WillReturnRows(rows) + + ch := make(chan prometheus.Metric) + go func() { + defer close(ch) + c := PGStatCheckpointerCollector{} + + if err := c.Update(context.Background(), inst, ch); err != nil { + t.Errorf("Error calling PGStatCheckpointerCollector.Update: %s", err) + } + }() + + expected := []MetricResult{ + {labels: labelMap{}, metricType: dto.MetricType_COUNTER, value: 0}, + {labels: labelMap{}, metricType: dto.MetricType_COUNTER, value: 0}, + {labels: labelMap{}, metricType: dto.MetricType_COUNTER, value: 0}, + {labels: labelMap{}, metricType: dto.MetricType_COUNTER, value: 0}, + {labels: labelMap{}, metricType: dto.MetricType_COUNTER, value: 0}, + {labels: labelMap{}, metricType: dto.MetricType_COUNTER, value: 0}, + {labels: labelMap{}, metricType: dto.MetricType_COUNTER, value: 0}, + {labels: labelMap{}, metricType: dto.MetricType_COUNTER, value: 0}, + {labels: labelMap{}, metricType: dto.MetricType_COUNTER, value: 0}, + } + + convey.Convey("Metrics comparison", t, func() { + for _, expect := range expected { + m := readMetric(<-ch) + convey.So(expect, convey.ShouldResemble, m) + } + }) + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("there were unfulfilled exceptions: %s", err) + } +} diff --git a/collector/pg_stat_database.go b/collector/pg_stat_database.go index 328afee2c..b9210740f 100644 --- a/collector/pg_stat_database.go +++ b/collector/pg_stat_database.go @@ -16,9 +16,11 @@ package collector import ( "context" "database/sql" + "fmt" + "log/slog" + "strings" - "github.com/go-kit/log" - "github.com/go-kit/log/level" + "github.com/blang/semver/v4" "github.com/prometheus/client_golang/prometheus" ) @@ -29,7 +31,7 @@ func init() { } type PGStatDatabaseCollector struct { - log log.Logger + log *slog.Logger } func NewPGStatDatabaseCollector(config collectorConfig) (Collector, error) { @@ -206,36 +208,53 @@ var ( []string{"datid", "datname"}, prometheus.Labels{}, ) - - statDatabaseQuery = ` - SELECT - datid - ,datname - ,numbackends - ,xact_commit - ,xact_rollback - ,blks_read - ,blks_hit - ,tup_returned - ,tup_fetched - ,tup_inserted - ,tup_updated - ,tup_deleted - ,conflicts - ,temp_files - ,temp_bytes - ,deadlocks - ,blk_read_time - ,blk_write_time - ,stats_reset - FROM pg_stat_database; - ` + statDatabaseActiveTime = prometheus.NewDesc(prometheus.BuildFQName( + namespace, + statDatabaseSubsystem, + "active_time_seconds_total", + ), + "Time spent executing SQL statements in this database, in seconds", + []string{"datid", "datname"}, + prometheus.Labels{}, + ) ) +func statDatabaseQuery(columns []string) string { + return fmt.Sprintf("SELECT %s FROM pg_stat_database;", strings.Join(columns, ",")) +} + func (c *PGStatDatabaseCollector) Update(ctx context.Context, instance *instance, ch chan<- prometheus.Metric) error { db := instance.getDB() + + columns := []string{ + "datid", + "datname", + "numbackends", + "xact_commit", + "xact_rollback", + "blks_read", + "blks_hit", + "tup_returned", + "tup_fetched", + "tup_inserted", + "tup_updated", + "tup_deleted", + "conflicts", + "temp_files", + "temp_bytes", + "deadlocks", + "blk_read_time", + "blk_write_time", + "stats_reset", + } + + activeTimeAvail := instance.version.GTE(semver.MustParse("14.0.0")) + if activeTimeAvail { + columns = append(columns, "active_time") + } + rows, err := db.QueryContext(ctx, - statDatabaseQuery, + statDatabaseQuery(columns), ) if err != nil { return err @@ -244,10 +263,10 @@ func (c *PGStatDatabaseCollector) Update(ctx context.Context, instance *instance for rows.Next() { var datid, datname sql.NullString - var numBackends, xactCommit, xactRollback, blksRead, blksHit, tupReturned, tupFetched, tupInserted, tupUpdated, tupDeleted, conflicts, tempFiles, tempBytes, deadlocks, blkReadTime, blkWriteTime sql.NullFloat64 + var numBackends, xactCommit, xactRollback, blksRead, blksHit, tupReturned, tupFetched, tupInserted, tupUpdated, tupDeleted, conflicts, tempFiles, tempBytes, deadlocks, blkReadTime, blkWriteTime, activeTime sql.NullFloat64 var statsReset sql.NullTime - err := rows.Scan( + r := []any{ &datid, &datname, &numBackends, @@ -267,87 +286,97 @@ func (c *PGStatDatabaseCollector) Update(ctx context.Context, instance *instance &blkReadTime, &blkWriteTime, &statsReset, - ) + } + + if activeTimeAvail { + r = append(r, &activeTime) + } + + err := rows.Scan(r...) if err != nil { return err } if !datid.Valid { - level.Debug(c.log).Log("msg", "Skipping collecting metric because it has no datid") + c.log.Debug("Skipping collecting metric because it has no datid") continue } if !datname.Valid { - level.Debug(c.log).Log("msg", "Skipping collecting metric because it has no datname") + c.log.Debug("Skipping collecting metric because it has no datname") continue } if !numBackends.Valid { - level.Debug(c.log).Log("msg", "Skipping collecting metric because it has no numbackends") + c.log.Debug("Skipping collecting metric because it has no numbackends") continue } if !xactCommit.Valid { - level.Debug(c.log).Log("msg", "Skipping collecting metric because it has no xact_commit") + c.log.Debug("Skipping collecting metric because it has no xact_commit") continue } if !xactRollback.Valid { - level.Debug(c.log).Log("msg", "Skipping collecting metric because it has no xact_rollback") + c.log.Debug("Skipping collecting metric because it has no xact_rollback") continue } if !blksRead.Valid { - level.Debug(c.log).Log("msg", "Skipping collecting metric because it has no blks_read") + c.log.Debug("Skipping collecting metric because it has no blks_read") continue } if !blksHit.Valid { - level.Debug(c.log).Log("msg", "Skipping collecting metric because it has no blks_hit") + c.log.Debug("Skipping collecting metric because it has no blks_hit") continue } if !tupReturned.Valid { - level.Debug(c.log).Log("msg", "Skipping collecting metric because it has no tup_returned") + c.log.Debug("Skipping collecting metric because it has no tup_returned") continue } if !tupFetched.Valid { - level.Debug(c.log).Log("msg", "Skipping collecting metric because it has no tup_fetched") + c.log.Debug("Skipping collecting metric because it has no tup_fetched") continue } if !tupInserted.Valid { - level.Debug(c.log).Log("msg", "Skipping collecting metric because it has no tup_inserted") + c.log.Debug("Skipping collecting metric because it has no tup_inserted") continue } if !tupUpdated.Valid { - level.Debug(c.log).Log("msg", "Skipping collecting metric because it has no tup_updated") + c.log.Debug("Skipping collecting metric because it has no tup_updated") continue } if !tupDeleted.Valid { - level.Debug(c.log).Log("msg", "Skipping collecting metric because it has no tup_deleted") + c.log.Debug("Skipping collecting metric because it has no tup_deleted") continue } if !conflicts.Valid { - level.Debug(c.log).Log("msg", "Skipping collecting metric because it has no conflicts") + c.log.Debug("Skipping collecting metric because it has no conflicts") continue } if !tempFiles.Valid { - level.Debug(c.log).Log("msg", "Skipping collecting metric because it has no temp_files") + c.log.Debug("Skipping collecting metric because it has no temp_files") continue } if !tempBytes.Valid { - level.Debug(c.log).Log("msg", "Skipping collecting metric because it has no temp_bytes") + c.log.Debug("Skipping collecting metric because it has no temp_bytes") continue } if !deadlocks.Valid { - level.Debug(c.log).Log("msg", "Skipping collecting metric because it has no deadlocks") + c.log.Debug("Skipping collecting metric because it has no deadlocks") continue } if !blkReadTime.Valid { - level.Debug(c.log).Log("msg", "Skipping collecting metric because it has no blk_read_time") + c.log.Debug("Skipping collecting metric because it has no blk_read_time") continue } if !blkWriteTime.Valid { - level.Debug(c.log).Log("msg", "Skipping collecting metric because it has no blk_write_time") + c.log.Debug("Skipping collecting metric because it has no blk_write_time") + continue + } + if activeTimeAvail && !activeTime.Valid { + c.log.Debug("Skipping collecting metric because it has no active_time") continue } statsResetMetric := 0.0 if !statsReset.Valid { - level.Debug(c.log).Log("msg", "No metric for stats_reset, will collect 0 instead") + c.log.Debug("No metric for stats_reset, will collect 0 instead") } if statsReset.Valid { statsResetMetric = float64(statsReset.Time.Unix()) @@ -473,6 +502,15 @@ func (c *PGStatDatabaseCollector) Update(ctx context.Context, instance *instance statsResetMetric, labels..., ) + + if activeTimeAvail { + ch <- prometheus.MustNewConstMetric( + statDatabaseActiveTime, + prometheus.CounterValue, + activeTime.Float64/1000.0, + labels..., + ) + } } return nil } diff --git a/collector/pg_stat_database_test.go b/collector/pg_stat_database_test.go index fe1b17066..e6194ca2e 100644 --- a/collector/pg_stat_database_test.go +++ b/collector/pg_stat_database_test.go @@ -18,9 +18,10 @@ import ( "time" "github.com/DATA-DOG/go-sqlmock" - "github.com/go-kit/log" + "github.com/blang/semver/v4" "github.com/prometheus/client_golang/prometheus" dto "github.com/prometheus/client_model/go" + "github.com/prometheus/common/promslog" "github.com/smartystreets/goconvey/convey" ) @@ -31,7 +32,7 @@ func TestPGStatDatabaseCollector(t *testing.T) { } defer db.Close() - inst := &instance{db: db} + inst := &instance{db: db, version: semver.MustParse("14.0.0")} columns := []string{ "datid", @@ -53,6 +54,7 @@ func TestPGStatDatabaseCollector(t *testing.T) { "blk_read_time", "blk_write_time", "stats_reset", + "active_time", } srT, err := time.Parse("2006-01-02 15:04:05.00000-07", "2023-05-25 17:10:42.81132-07") @@ -80,15 +82,17 @@ func TestPGStatDatabaseCollector(t *testing.T) { 925, 16, 823, - srT) + srT, + 33, + ) - mock.ExpectQuery(sanitizeQuery(statDatabaseQuery)).WillReturnRows(rows) + mock.ExpectQuery(sanitizeQuery(statDatabaseQuery(columns))).WillReturnRows(rows) ch := make(chan prometheus.Metric) go func() { defer close(ch) c := PGStatDatabaseCollector{ - log: log.With(log.NewNopLogger(), "collector", "pg_stat_database"), + log: promslog.NewNopLogger().With("collector", "pg_stat_database"), } if err := c.Update(context.Background(), inst, ch); err != nil { @@ -114,6 +118,7 @@ func TestPGStatDatabaseCollector(t *testing.T) { {labels: labelMap{"datid": "pid", "datname": "postgres"}, metricType: dto.MetricType_COUNTER, value: 16}, {labels: labelMap{"datid": "pid", "datname": "postgres"}, metricType: dto.MetricType_COUNTER, value: 823}, {labels: labelMap{"datid": "pid", "datname": "postgres"}, metricType: dto.MetricType_COUNTER, value: 1685059842}, + {labels: labelMap{"datid": "pid", "datname": "postgres"}, metricType: dto.MetricType_COUNTER, value: 0.033}, } convey.Convey("Metrics comparison", t, func() { @@ -138,7 +143,7 @@ func TestPGStatDatabaseCollectorNullValues(t *testing.T) { if err != nil { t.Fatalf("Error parsing time: %s", err) } - inst := &instance{db: db} + inst := &instance{db: db, version: semver.MustParse("14.0.0")} columns := []string{ "datid", @@ -160,6 +165,7 @@ func TestPGStatDatabaseCollectorNullValues(t *testing.T) { "blk_read_time", "blk_write_time", "stats_reset", + "active_time", } rows := sqlmock.NewRows(columns). @@ -182,7 +188,9 @@ func TestPGStatDatabaseCollectorNullValues(t *testing.T) { 925, 16, 823, - srT). + srT, + 32, + ). AddRow( "pid", "postgres", @@ -202,14 +210,16 @@ func TestPGStatDatabaseCollectorNullValues(t *testing.T) { 925, 16, 823, - srT) - mock.ExpectQuery(sanitizeQuery(statDatabaseQuery)).WillReturnRows(rows) + srT, + 32, + ) + mock.ExpectQuery(sanitizeQuery(statDatabaseQuery(columns))).WillReturnRows(rows) ch := make(chan prometheus.Metric) go func() { defer close(ch) c := PGStatDatabaseCollector{ - log: log.With(log.NewNopLogger(), "collector", "pg_stat_database"), + log: promslog.NewNopLogger().With("collector", "pg_stat_database"), } if err := c.Update(context.Background(), inst, ch); err != nil { @@ -235,6 +245,7 @@ func TestPGStatDatabaseCollectorNullValues(t *testing.T) { {labels: labelMap{"datid": "pid", "datname": "postgres"}, metricType: dto.MetricType_COUNTER, value: 16}, {labels: labelMap{"datid": "pid", "datname": "postgres"}, metricType: dto.MetricType_COUNTER, value: 823}, {labels: labelMap{"datid": "pid", "datname": "postgres"}, metricType: dto.MetricType_COUNTER, value: 1685059842}, + {labels: labelMap{"datid": "pid", "datname": "postgres"}, metricType: dto.MetricType_COUNTER, value: 0.032}, } convey.Convey("Metrics comparison", t, func() { @@ -254,7 +265,7 @@ func TestPGStatDatabaseCollectorRowLeakTest(t *testing.T) { } defer db.Close() - inst := &instance{db: db} + inst := &instance{db: db, version: semver.MustParse("14.0.0")} columns := []string{ "datid", @@ -276,6 +287,7 @@ func TestPGStatDatabaseCollectorRowLeakTest(t *testing.T) { "blk_read_time", "blk_write_time", "stats_reset", + "active_time", } srT, err := time.Parse("2006-01-02 15:04:05.00000-07", "2023-05-25 17:10:42.81132-07") @@ -303,7 +315,9 @@ func TestPGStatDatabaseCollectorRowLeakTest(t *testing.T) { 925, 16, 823, - srT). + srT, + 14, + ). AddRow( nil, nil, @@ -324,6 +338,7 @@ func TestPGStatDatabaseCollectorRowLeakTest(t *testing.T) { nil, nil, nil, + nil, ). AddRow( "pid", @@ -344,14 +359,16 @@ func TestPGStatDatabaseCollectorRowLeakTest(t *testing.T) { 926, 17, 824, - srT) - mock.ExpectQuery(sanitizeQuery(statDatabaseQuery)).WillReturnRows(rows) + srT, + 15, + ) + mock.ExpectQuery(sanitizeQuery(statDatabaseQuery(columns))).WillReturnRows(rows) ch := make(chan prometheus.Metric) go func() { defer close(ch) c := PGStatDatabaseCollector{ - log: log.With(log.NewNopLogger(), "collector", "pg_stat_database"), + log: promslog.NewNopLogger().With("collector", "pg_stat_database"), } if err := c.Update(context.Background(), inst, ch); err != nil { @@ -377,6 +394,8 @@ func TestPGStatDatabaseCollectorRowLeakTest(t *testing.T) { {labels: labelMap{"datid": "pid", "datname": "postgres"}, metricType: dto.MetricType_COUNTER, value: 16}, {labels: labelMap{"datid": "pid", "datname": "postgres"}, metricType: dto.MetricType_COUNTER, value: 823}, {labels: labelMap{"datid": "pid", "datname": "postgres"}, metricType: dto.MetricType_COUNTER, value: 1685059842}, + {labels: labelMap{"datid": "pid", "datname": "postgres"}, metricType: dto.MetricType_COUNTER, value: 0.014}, + {labels: labelMap{"datid": "pid", "datname": "postgres"}, metricType: dto.MetricType_GAUGE, value: 355}, {labels: labelMap{"datid": "pid", "datname": "postgres"}, metricType: dto.MetricType_COUNTER, value: 4946}, {labels: labelMap{"datid": "pid", "datname": "postgres"}, metricType: dto.MetricType_COUNTER, value: 289097745}, @@ -394,6 +413,7 @@ func TestPGStatDatabaseCollectorRowLeakTest(t *testing.T) { {labels: labelMap{"datid": "pid", "datname": "postgres"}, metricType: dto.MetricType_COUNTER, value: 17}, {labels: labelMap{"datid": "pid", "datname": "postgres"}, metricType: dto.MetricType_COUNTER, value: 824}, {labels: labelMap{"datid": "pid", "datname": "postgres"}, metricType: dto.MetricType_COUNTER, value: 1685059842}, + {labels: labelMap{"datid": "pid", "datname": "postgres"}, metricType: dto.MetricType_COUNTER, value: 0.015}, } convey.Convey("Metrics comparison", t, func() { @@ -414,7 +434,7 @@ func TestPGStatDatabaseCollectorTestNilStatReset(t *testing.T) { } defer db.Close() - inst := &instance{db: db} + inst := &instance{db: db, version: semver.MustParse("14.0.0")} columns := []string{ "datid", @@ -436,6 +456,7 @@ func TestPGStatDatabaseCollectorTestNilStatReset(t *testing.T) { "blk_read_time", "blk_write_time", "stats_reset", + "active_time", } rows := sqlmock.NewRows(columns). @@ -458,15 +479,17 @@ func TestPGStatDatabaseCollectorTestNilStatReset(t *testing.T) { 925, 16, 823, - nil) + nil, + 7, + ) - mock.ExpectQuery(sanitizeQuery(statDatabaseQuery)).WillReturnRows(rows) + mock.ExpectQuery(sanitizeQuery(statDatabaseQuery(columns))).WillReturnRows(rows) ch := make(chan prometheus.Metric) go func() { defer close(ch) c := PGStatDatabaseCollector{ - log: log.With(log.NewNopLogger(), "collector", "pg_stat_database"), + log: promslog.NewNopLogger().With("collector", "pg_stat_database"), } if err := c.Update(context.Background(), inst, ch); err != nil { @@ -492,6 +515,7 @@ func TestPGStatDatabaseCollectorTestNilStatReset(t *testing.T) { {labels: labelMap{"datid": "pid", "datname": "postgres"}, metricType: dto.MetricType_COUNTER, value: 16}, {labels: labelMap{"datid": "pid", "datname": "postgres"}, metricType: dto.MetricType_COUNTER, value: 823}, {labels: labelMap{"datid": "pid", "datname": "postgres"}, metricType: dto.MetricType_COUNTER, value: 0}, + {labels: labelMap{"datid": "pid", "datname": "postgres"}, metricType: dto.MetricType_COUNTER, value: 0.007}, } convey.Convey("Metrics comparison", t, func() { diff --git a/collector/pg_stat_progress_vacuum.go b/collector/pg_stat_progress_vacuum.go new file mode 100644 index 000000000..f8083a49f --- /dev/null +++ b/collector/pg_stat_progress_vacuum.go @@ -0,0 +1,222 @@ +// Copyright 2025 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package collector + +import ( + "context" + "database/sql" + "log/slog" + + "github.com/prometheus/client_golang/prometheus" +) + +const progressVacuumSubsystem = "stat_progress_vacuum" + +func init() { + registerCollector(progressVacuumSubsystem, defaultEnabled, NewPGStatProgressVacuumCollector) +} + +type PGStatProgressVacuumCollector struct { + log *slog.Logger +} + +func NewPGStatProgressVacuumCollector(config collectorConfig) (Collector, error) { + return &PGStatProgressVacuumCollector{log: config.logger}, nil +} + +var vacuumPhases = []string{ + "initializing", + "scanning heap", + "vacuuming indexes", + "vacuuming heap", + "cleaning up indexes", + "truncating heap", + "performing final cleanup", +} + +var ( + statProgressVacuumPhase = prometheus.NewDesc( + prometheus.BuildFQName(namespace, progressVacuumSubsystem, "phase"), + "Current vacuum phase (1 = active, 0 = inactive). Label 'phase' is human-readable.", + []string{"datname", "relname", "phase"}, + nil, + ) + + statProgressVacuumHeapBlksTotal = prometheus.NewDesc( + prometheus.BuildFQName(namespace, progressVacuumSubsystem, "heap_blks"), + "Total number of heap blocks in the table being vacuumed.", + []string{"datname", "relname"}, + nil, + ) + + statProgressVacuumHeapBlksScanned = prometheus.NewDesc( + prometheus.BuildFQName(namespace, progressVacuumSubsystem, "heap_blks_scanned"), + "Number of heap blocks scanned so far.", + []string{"datname", "relname"}, + nil, + ) + + statProgressVacuumHeapBlksVacuumed = prometheus.NewDesc( + prometheus.BuildFQName(namespace, progressVacuumSubsystem, "heap_blks_vacuumed"), + "Number of heap blocks vacuumed so far.", + []string{"datname", "relname"}, + nil, + ) + + statProgressVacuumIndexVacuumCount = prometheus.NewDesc( + prometheus.BuildFQName(namespace, progressVacuumSubsystem, "index_vacuums"), + "Number of completed index vacuum cycles.", + []string{"datname", "relname"}, + nil, + ) + + statProgressVacuumMaxDeadTuples = prometheus.NewDesc( + prometheus.BuildFQName(namespace, progressVacuumSubsystem, "max_dead_tuples"), + "Maximum number of dead tuples that can be stored before cleanup is performed.", + []string{"datname", "relname"}, + nil, + ) + + statProgressVacuumNumDeadTuples = prometheus.NewDesc( + prometheus.BuildFQName(namespace, progressVacuumSubsystem, "num_dead_tuples"), + "Current number of dead tuples found so far.", + []string{"datname", "relname"}, + nil, + ) + + // This is the view definition of pg_stat_progress_vacuum, albeit without the conversion + // of "phase" to a human-readable string. We will prefer the numeric representation. + statProgressVacuumQuery = `SELECT + d.datname, + s.relid::regclass::text AS relname, + s.param1 AS phase, + s.param2 AS heap_blks_total, + s.param3 AS heap_blks_scanned, + s.param4 AS heap_blks_vacuumed, + s.param5 AS index_vacuum_count, + s.param6 AS max_dead_tuples, + s.param7 AS num_dead_tuples + FROM + pg_stat_get_progress_info('VACUUM'::text) + s(pid, datid, relid, param1, param2, param3, param4, param5, param6, param7, param8, param9, param10, param11, param12, param13, param14, param15, param16, param17, param18, param19, param20) + LEFT JOIN + pg_database d ON s.datid = d.oid` +) + +func (c *PGStatProgressVacuumCollector) Update(ctx context.Context, instance *instance, ch chan<- prometheus.Metric) error { + db := instance.getDB() + rows, err := db.QueryContext(ctx, + statProgressVacuumQuery) + + if err != nil { + return err + } + defer rows.Close() + + for rows.Next() { + var ( + datname sql.NullString + relname sql.NullString + phase sql.NullInt64 + heapBlksTotal sql.NullInt64 + heapBlksScanned sql.NullInt64 + heapBlksVacuumed sql.NullInt64 + indexVacuumCount sql.NullInt64 + maxDeadTuples sql.NullInt64 + numDeadTuples sql.NullInt64 + ) + + if err := rows.Scan( + &datname, + &relname, + &phase, + &heapBlksTotal, + &heapBlksScanned, + &heapBlksVacuumed, + &indexVacuumCount, + &maxDeadTuples, + &numDeadTuples, + ); err != nil { + return err + } + + datnameLabel := "unknown" + if datname.Valid { + datnameLabel = datname.String + } + relnameLabel := "unknown" + if relname.Valid { + relnameLabel = relname.String + } + + labels := []string{datnameLabel, relnameLabel} + + var phaseMetric *float64 + if phase.Valid { + v := float64(phase.Int64) + phaseMetric = &v + } + + for i, label := range vacuumPhases { + v := 0.0 + // Only the current phase should be 1.0. + if phaseMetric != nil && float64(i) == *phaseMetric { + v = 1.0 + } + labelsCopy := append(labels, label) + ch <- prometheus.MustNewConstMetric(statProgressVacuumPhase, prometheus.GaugeValue, v, labelsCopy...) + } + + heapTotal := 0.0 + if heapBlksTotal.Valid { + heapTotal = float64(heapBlksTotal.Int64) + } + ch <- prometheus.MustNewConstMetric(statProgressVacuumHeapBlksTotal, prometheus.GaugeValue, heapTotal, labels...) + + heapScanned := 0.0 + if heapBlksScanned.Valid { + heapScanned = float64(heapBlksScanned.Int64) + } + ch <- prometheus.MustNewConstMetric(statProgressVacuumHeapBlksScanned, prometheus.GaugeValue, heapScanned, labels...) + + heapVacuumed := 0.0 + if heapBlksVacuumed.Valid { + heapVacuumed = float64(heapBlksVacuumed.Int64) + } + ch <- prometheus.MustNewConstMetric(statProgressVacuumHeapBlksVacuumed, prometheus.GaugeValue, heapVacuumed, labels...) + + indexCount := 0.0 + if indexVacuumCount.Valid { + indexCount = float64(indexVacuumCount.Int64) + } + ch <- prometheus.MustNewConstMetric(statProgressVacuumIndexVacuumCount, prometheus.GaugeValue, indexCount, labels...) + + maxDead := 0.0 + if maxDeadTuples.Valid { + maxDead = float64(maxDeadTuples.Int64) + } + ch <- prometheus.MustNewConstMetric(statProgressVacuumMaxDeadTuples, prometheus.GaugeValue, maxDead, labels...) + + numDead := 0.0 + if numDeadTuples.Valid { + numDead = float64(numDeadTuples.Int64) + } + ch <- prometheus.MustNewConstMetric(statProgressVacuumNumDeadTuples, prometheus.GaugeValue, numDead, labels...) + } + + if err := rows.Err(); err != nil { + return err + } + return nil +} diff --git a/collector/pg_stat_progress_vacuum_test.go b/collector/pg_stat_progress_vacuum_test.go new file mode 100644 index 000000000..80572feb8 --- /dev/null +++ b/collector/pg_stat_progress_vacuum_test.go @@ -0,0 +1,135 @@ +// Copyright 2025 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +package collector + +import ( + "context" + "testing" + + "github.com/DATA-DOG/go-sqlmock" + "github.com/prometheus/client_golang/prometheus" + dto "github.com/prometheus/client_model/go" + "github.com/smartystreets/goconvey/convey" +) + +func TestPGStatProgressVacuumCollector(t *testing.T) { + db, mock, err := sqlmock.New() + if err != nil { + t.Fatalf("Error opening a stub db connection: %s", err) + } + defer db.Close() + + inst := &instance{db: db} + + columns := []string{ + "datname", "relname", "phase", "heap_blks_total", "heap_blks_scanned", + "heap_blks_vacuumed", "index_vacuum_count", "max_dead_tuples", "num_dead_tuples", + } + + rows := sqlmock.NewRows(columns).AddRow( + "postgres", "a_table", 3, 3000, 400, 200, 2, 500, 123) + + mock.ExpectQuery(sanitizeQuery(statProgressVacuumQuery)).WillReturnRows(rows) + + ch := make(chan prometheus.Metric) + go func() { + defer close(ch) + c := PGStatProgressVacuumCollector{} + + if err := c.Update(context.Background(), inst, ch); err != nil { + t.Errorf("Error calling PGStatProgressVacuumCollector.Update; %+v", err) + } + }() + + expected := []MetricResult{ + {labels: labelMap{"datname": "postgres", "relname": "a_table", "phase": "initializing"}, metricType: dto.MetricType_GAUGE, value: 0}, + {labels: labelMap{"datname": "postgres", "relname": "a_table", "phase": "scanning heap"}, metricType: dto.MetricType_GAUGE, value: 0}, + {labels: labelMap{"datname": "postgres", "relname": "a_table", "phase": "vacuuming indexes"}, metricType: dto.MetricType_GAUGE, value: 0}, + {labels: labelMap{"datname": "postgres", "relname": "a_table", "phase": "vacuuming heap"}, metricType: dto.MetricType_GAUGE, value: 1}, + {labels: labelMap{"datname": "postgres", "relname": "a_table", "phase": "cleaning up indexes"}, metricType: dto.MetricType_GAUGE, value: 0}, + {labels: labelMap{"datname": "postgres", "relname": "a_table", "phase": "truncating heap"}, metricType: dto.MetricType_GAUGE, value: 0}, + {labels: labelMap{"datname": "postgres", "relname": "a_table", "phase": "performing final cleanup"}, metricType: dto.MetricType_GAUGE, value: 0}, + {labels: labelMap{"datname": "postgres", "relname": "a_table"}, metricType: dto.MetricType_GAUGE, value: 3000}, + {labels: labelMap{"datname": "postgres", "relname": "a_table"}, metricType: dto.MetricType_GAUGE, value: 400}, + {labels: labelMap{"datname": "postgres", "relname": "a_table"}, metricType: dto.MetricType_GAUGE, value: 200}, + {labels: labelMap{"datname": "postgres", "relname": "a_table"}, metricType: dto.MetricType_GAUGE, value: 2}, + {labels: labelMap{"datname": "postgres", "relname": "a_table"}, metricType: dto.MetricType_GAUGE, value: 500}, + {labels: labelMap{"datname": "postgres", "relname": "a_table"}, metricType: dto.MetricType_GAUGE, value: 123}, + } + + convey.Convey("Metrics comparison", t, func() { + for _, expect := range expected { + m := readMetric(<-ch) + convey.So(m, convey.ShouldResemble, expect) + } + }) + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("There were unfulfilled exceptions: %+v", err) + } +} + +func TestPGStatProgressVacuumCollectorNullValues(t *testing.T) { + db, mock, err := sqlmock.New() + if err != nil { + t.Fatalf("Error opening a stub db connection: %s", err) + } + defer db.Close() + + inst := &instance{db: db} + + columns := []string{ + "datname", "relname", "phase", "heap_blks_total", "heap_blks_scanned", + "heap_blks_vacuumed", "index_vacuum_count", "max_dead_tuples", "num_dead_tuples", + } + + rows := sqlmock.NewRows(columns).AddRow( + "postgres", nil, nil, nil, nil, nil, nil, nil, nil) + + mock.ExpectQuery(sanitizeQuery(statProgressVacuumQuery)).WillReturnRows(rows) + + ch := make(chan prometheus.Metric) + go func() { + defer close(ch) + c := PGStatProgressVacuumCollector{} + + if err := c.Update(context.Background(), inst, ch); err != nil { + t.Errorf("Error calling PGStatProgressVacuumCollector.Update; %+v", err) + } + }() + + expected := []MetricResult{ + {labels: labelMap{"datname": "postgres", "relname": "unknown", "phase": "initializing"}, metricType: dto.MetricType_GAUGE, value: 0}, + {labels: labelMap{"datname": "postgres", "relname": "unknown", "phase": "scanning heap"}, metricType: dto.MetricType_GAUGE, value: 0}, + {labels: labelMap{"datname": "postgres", "relname": "unknown", "phase": "vacuuming indexes"}, metricType: dto.MetricType_GAUGE, value: 0}, + {labels: labelMap{"datname": "postgres", "relname": "unknown", "phase": "vacuuming heap"}, metricType: dto.MetricType_GAUGE, value: 0}, + {labels: labelMap{"datname": "postgres", "relname": "unknown", "phase": "cleaning up indexes"}, metricType: dto.MetricType_GAUGE, value: 0}, + {labels: labelMap{"datname": "postgres", "relname": "unknown", "phase": "truncating heap"}, metricType: dto.MetricType_GAUGE, value: 0}, + {labels: labelMap{"datname": "postgres", "relname": "unknown", "phase": "performing final cleanup"}, metricType: dto.MetricType_GAUGE, value: 0}, + {labels: labelMap{"datname": "postgres", "relname": "unknown"}, metricType: dto.MetricType_GAUGE, value: 0}, + {labels: labelMap{"datname": "postgres", "relname": "unknown"}, metricType: dto.MetricType_GAUGE, value: 0}, + {labels: labelMap{"datname": "postgres", "relname": "unknown"}, metricType: dto.MetricType_GAUGE, value: 0}, + {labels: labelMap{"datname": "postgres", "relname": "unknown"}, metricType: dto.MetricType_GAUGE, value: 0}, + {labels: labelMap{"datname": "postgres", "relname": "unknown"}, metricType: dto.MetricType_GAUGE, value: 0}, + {labels: labelMap{"datname": "postgres", "relname": "unknown"}, metricType: dto.MetricType_GAUGE, value: 0}, + } + + convey.Convey("Metrics comparison", t, func() { + for _, expect := range expected { + m := readMetric(<-ch) + convey.So(expect, convey.ShouldResemble, m) + } + }) + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("There were unfulfilled exceptions: %+v", err) + } +} diff --git a/collector/pg_stat_statements.go b/collector/pg_stat_statements.go index c03e78b92..5c5c036d1 100644 --- a/collector/pg_stat_statements.go +++ b/collector/pg_stat_statements.go @@ -16,31 +16,101 @@ package collector import ( "context" "database/sql" + "fmt" + "log/slog" + "strings" + "github.com/alecthomas/kingpin/v2" "github.com/blang/semver/v4" - "github.com/go-kit/log" "github.com/prometheus/client_golang/prometheus" ) -const statStatementsSubsystem = "stat_statements" +const ( + statStatementsSubsystem = "stat_statements" + defaultStatementLimit = "100" +) + +var ( + includeQueryFlag *bool = nil + statementLengthFlag *uint = nil + statementLimitFlag *uint = nil + excludedDatabasesFlag *string = nil + excludedUsersFlag *string = nil +) func init() { // WARNING: // Disabled by default because this set of metrics can be quite expensive on a busy server // Every unique query will cause a new timeseries to be created registerCollector(statStatementsSubsystem, defaultDisabled, NewPGStatStatementsCollector) + + includeQueryFlag = kingpin.Flag( + fmt.Sprint(collectorFlagPrefix, statStatementsSubsystem, ".include_query"), + "Enable selecting statement query together with queryId. (default: disabled)"). + Default(fmt.Sprintf("%v", defaultDisabled)). + Bool() + statementLengthFlag = kingpin.Flag( + fmt.Sprint(collectorFlagPrefix, statStatementsSubsystem, ".query_length"), + "Maximum length of the statement text."). + Default("120"). + Uint() + statementLimitFlag = kingpin.Flag( + fmt.Sprint(collectorFlagPrefix, statStatementsSubsystem, ".limit"), + "Maximum number of statements to return."). + Default(defaultStatementLimit). + Uint() + excludedDatabasesFlag = kingpin.Flag( + fmt.Sprint(collectorFlagPrefix, statStatementsSubsystem, ".exclude_databases"), + "Comma-separated list of database names to exclude. (default: none)"). + Default(""). + String() + excludedUsersFlag = kingpin.Flag( + fmt.Sprint(collectorFlagPrefix, statStatementsSubsystem, ".exclude_users"), + "Comma-separated list of user names to exclude. (default: none)"). + Default(""). + String() } type PGStatStatementsCollector struct { - log log.Logger + log *slog.Logger + includeQueryStatement bool + statementLength uint + statementLimit uint + excludedDatabases []string + excludedUsers []string } func NewPGStatStatementsCollector(config collectorConfig) (Collector, error) { - return &PGStatStatementsCollector{log: config.logger}, nil + var excludedDatabases []string + if *excludedDatabasesFlag != "" { + for db := range strings.SplitSeq(*excludedDatabasesFlag, ",") { + if trimmed := strings.TrimSpace(db); trimmed != "" { + excludedDatabases = append(excludedDatabases, trimmed) + } + } + } + + var excludedUsers []string + if *excludedUsersFlag != "" { + for user := range strings.SplitSeq(*excludedUsersFlag, ",") { + if trimmed := strings.TrimSpace(user); trimmed != "" { + excludedUsers = append(excludedUsers, trimmed) + } + } + } + + return &PGStatStatementsCollector{ + log: config.logger, + includeQueryStatement: *includeQueryFlag, + statementLength: *statementLengthFlag, + statementLimit: *statementLimitFlag, + excludedDatabases: excludedDatabases, + excludedUsers: excludedUsers, + }, nil } var ( - statSTatementsCallsTotal = prometheus.NewDesc( + statStatementsCallsTotal = prometheus.NewDesc( prometheus.BuildFQName(namespace, statStatementsSubsystem, "calls_total"), "Number of times executed", []string{"user", "datname", "queryid"}, @@ -71,10 +141,24 @@ var ( prometheus.Labels{}, ) + statStatementsQuery = prometheus.NewDesc( + prometheus.BuildFQName(namespace, statStatementsSubsystem, "query_id"), + "SQL Query to queryid mapping", + []string{"queryid", "query"}, + prometheus.Labels{}, + ) +) + +const ( + pgStatStatementQuerySelect = `LEFT(pg_stat_statements.query, %d) as query,` + pgStatStatementExcludeDatabases = `AND pg_database.datname NOT IN (%s) ` + pgStatStatementExcludeUsers = `AND pg_get_userbyid(userid) NOT IN (%s) ` + pgStatStatementsQuery = `SELECT pg_get_userbyid(userid) as user, pg_database.datname, pg_stat_statements.queryid, + %s pg_stat_statements.calls as calls_total, pg_stat_statements.total_time / 1000.0 as seconds_total, pg_stat_statements.rows as rows_total, @@ -89,50 +173,101 @@ var ( WITHIN GROUP (ORDER BY total_time) FROM pg_stat_statements ) + %s %s ORDER BY seconds_total DESC - LIMIT 100;` + LIMIT %s;` - pgStatStatementsNewQuery = `SELECT + pgStatStatementsQuery_PG13 = `SELECT pg_get_userbyid(userid) as user, pg_database.datname, pg_stat_statements.queryid, + %s pg_stat_statements.calls as calls_total, pg_stat_statements.total_exec_time / 1000.0 as seconds_total, pg_stat_statements.rows as rows_total, pg_stat_statements.blk_read_time / 1000.0 as block_read_seconds_total, pg_stat_statements.blk_write_time / 1000.0 as block_write_seconds_total - FROM pg_stat_statements + FROM pg_stat_statements(%t) JOIN pg_database ON pg_database.oid = pg_stat_statements.dbid WHERE total_exec_time > ( SELECT percentile_cont(0.1) WITHIN GROUP (ORDER BY total_exec_time) - FROM pg_stat_statements + FROM pg_stat_statements(false) + ) + %s %s + ORDER BY seconds_total DESC + LIMIT %s;` + + pgStatStatementsQuery_PG17 = `SELECT + pg_get_userbyid(userid) as user, + pg_database.datname, + pg_stat_statements.queryid, + %s + pg_stat_statements.calls as calls_total, + pg_stat_statements.total_exec_time / 1000.0 as seconds_total, + pg_stat_statements.rows as rows_total, + pg_stat_statements.shared_blk_read_time / 1000.0 as block_read_seconds_total, + pg_stat_statements.shared_blk_write_time / 1000.0 as block_write_seconds_total + FROM pg_stat_statements(%t) + JOIN pg_database + ON pg_database.oid = pg_stat_statements.dbid + WHERE + total_exec_time > ( + SELECT percentile_cont(0.1) + WITHIN GROUP (ORDER BY total_exec_time) + FROM pg_stat_statements(false) ) + %s %s ORDER BY seconds_total DESC - LIMIT 100;` + LIMIT %s;` ) -func (PGStatStatementsCollector) Update(ctx context.Context, instance *instance, ch chan<- prometheus.Metric) error { - query := pgStatStatementsQuery - if instance.version.GE(semver.MustParse("13.0.0")) { - query = pgStatStatementsNewQuery +func (c PGStatStatementsCollector) Update(ctx context.Context, instance *instance, ch chan<- prometheus.Metric) error { + querySelect := "" + if c.includeQueryStatement { + querySelect = fmt.Sprintf(pgStatStatementQuerySelect, c.statementLength) + } + databaseFilter := c.buildExclusionClause(c.excludedDatabases, pgStatStatementExcludeDatabases) + userFilter := c.buildExclusionClause(c.excludedUsers, pgStatStatementExcludeUsers) + statementLimit := defaultStatementLimit + if c.statementLimit > 0 { + statementLimit = fmt.Sprintf("%d", c.statementLimit) + } + + var query string + switch { + case instance.version.GE(semver.MustParse("17.0.0")): + query = fmt.Sprintf(pgStatStatementsQuery_PG17, querySelect, c.includeQueryStatement, databaseFilter, userFilter, statementLimit) + case instance.version.GE(semver.MustParse("13.0.0")): + query = fmt.Sprintf(pgStatStatementsQuery_PG13, querySelect, c.includeQueryStatement, databaseFilter, userFilter, statementLimit) + default: + query = fmt.Sprintf(pgStatStatementsQuery, querySelect, databaseFilter, userFilter, statementLimit) } db := instance.getDB() rows, err := db.QueryContext(ctx, query) + presentQueryIds := make(map[string]struct{}) + + seen := make(map[string]struct{}) // to track duplicates by (user, datname, queryid) + if err != nil { return err } defer rows.Close() for rows.Next() { - var user, datname, queryid sql.NullString + var user, datname, queryid, statement sql.NullString var callsTotal, rowsTotal sql.NullInt64 var secondsTotal, blockReadSecondsTotal, blockWriteSecondsTotal sql.NullFloat64 - - if err := rows.Scan(&user, &datname, &queryid, &callsTotal, &secondsTotal, &rowsTotal, &blockReadSecondsTotal, &blockWriteSecondsTotal); err != nil { + var columns []any + if c.includeQueryStatement { + columns = []any{&user, &datname, &queryid, &statement, &callsTotal, &secondsTotal, &rowsTotal, &blockReadSecondsTotal, &blockWriteSecondsTotal} + } else { + columns = []any{&user, &datname, &queryid, &callsTotal, &secondsTotal, &rowsTotal, &blockReadSecondsTotal, &blockWriteSecondsTotal} + } + if err := rows.Scan(columns...); err != nil { return err } @@ -149,12 +284,20 @@ func (PGStatStatementsCollector) Update(ctx context.Context, instance *instance, queryidLabel = queryid.String } + key := fmt.Sprintf("%s|%s|%s", userLabel, datnameLabel, queryidLabel) + _, ok := seen[key] + if ok { + c.log.Warn("Duplicate found", "user", userLabel, "datname", datnameLabel, "queryid", queryidLabel) + continue + } + seen[key] = struct{}{} + callsTotalMetric := 0.0 if callsTotal.Valid { callsTotalMetric = float64(callsTotal.Int64) } ch <- prometheus.MustNewConstMetric( - statSTatementsCallsTotal, + statStatementsCallsTotal, prometheus.CounterValue, callsTotalMetric, userLabel, datnameLabel, queryidLabel, @@ -203,9 +346,41 @@ func (PGStatStatementsCollector) Update(ctx context.Context, instance *instance, blockWriteSecondsTotalMetric, userLabel, datnameLabel, queryidLabel, ) + + if c.includeQueryStatement { + _, ok := presentQueryIds[queryidLabel] + if !ok { + presentQueryIds[queryidLabel] = struct{}{} + + queryLabel := "unknown" + if statement.Valid { + queryLabel = statement.String + } + + ch <- prometheus.MustNewConstMetric( + statStatementsQuery, + prometheus.CounterValue, + 1, + queryidLabel, queryLabel, + ) + } + } } if err := rows.Err(); err != nil { return err } return nil } + +func (c PGStatStatementsCollector) buildExclusionClause(identifiers []string, clauseTemplate string) string { + if len(identifiers) == 0 { + return "" + } + + escaped := make([]string, 0, len(identifiers)) + for _, identifier := range identifiers { + escaped = append(escaped, fmt.Sprintf("'%s'", strings.ReplaceAll(identifier, "'", "''"))) + } + + return fmt.Sprintf(clauseTemplate, strings.Join(escaped, ", ")) +} diff --git a/collector/pg_stat_statements_test.go b/collector/pg_stat_statements_test.go index 08aba34c2..00171c83c 100644 --- a/collector/pg_stat_statements_test.go +++ b/collector/pg_stat_statements_test.go @@ -14,6 +14,7 @@ package collector import ( "context" + "fmt" "testing" "github.com/DATA-DOG/go-sqlmock" @@ -23,7 +24,7 @@ import ( "github.com/smartystreets/goconvey/convey" ) -func TestPGStateStatementsCollector(t *testing.T) { +func TestPGStatStatementsCollector(t *testing.T) { db, mock, err := sqlmock.New() if err != nil { t.Fatalf("Error opening a stub db connection: %s", err) @@ -35,7 +36,7 @@ func TestPGStateStatementsCollector(t *testing.T) { columns := []string{"user", "datname", "queryid", "calls_total", "seconds_total", "rows_total", "block_read_seconds_total", "block_write_seconds_total"} rows := sqlmock.NewRows(columns). AddRow("postgres", "postgres", 1500, 5, 0.4, 100, 0.1, 0.2) - mock.ExpectQuery(sanitizeQuery(pgStatStatementsQuery)).WillReturnRows(rows) + mock.ExpectQuery(sanitizeQuery(fmt.Sprintf(pgStatStatementsQuery, "", "", "", defaultStatementLimit))).WillReturnRows(rows) ch := make(chan prometheus.Metric) go func() { @@ -66,7 +67,95 @@ func TestPGStateStatementsCollector(t *testing.T) { } } -func TestPGStateStatementsCollectorNull(t *testing.T) { +func TestPGStatStatementsCollectorWithStatement(t *testing.T) { + db, mock, err := sqlmock.New() + if err != nil { + t.Fatalf("Error opening a stub db connection: %s", err) + } + defer db.Close() + + inst := &instance{db: db, version: semver.MustParse("12.0.0")} + + columns := []string{"user", "datname", "queryid", "LEFT(pg_stat_statements.query, 100) as query", "calls_total", "seconds_total", "rows_total", "block_read_seconds_total", "block_write_seconds_total"} + rows := sqlmock.NewRows(columns). + AddRow("postgres", "postgres", 1500, "select 1 from foo", 5, 0.4, 100, 0.1, 0.2) + mock.ExpectQuery(sanitizeQuery(fmt.Sprintf(pgStatStatementsQuery, fmt.Sprintf(pgStatStatementQuerySelect, 100), "", "", defaultStatementLimit))).WillReturnRows(rows) + + ch := make(chan prometheus.Metric) + go func() { + defer close(ch) + c := PGStatStatementsCollector{includeQueryStatement: true, statementLength: 100} + + if err := c.Update(context.Background(), inst, ch); err != nil { + t.Errorf("Error calling PGStatStatementsCollector.Update: %s", err) + } + }() + + expected := []MetricResult{ + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 5}, + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 0.4}, + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 100}, + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 0.1}, + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 0.2}, + {labels: labelMap{"queryid": "1500", "query": "select 1 from foo"}, metricType: dto.MetricType_COUNTER, value: 1}, + } + + convey.Convey("Metrics comparison", t, func() { + for _, expect := range expected { + m := readMetric(<-ch) + convey.So(expect, convey.ShouldResemble, m) + } + }) + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("there were unfulfilled exceptions: %s", err) + } +} + +func TestPGStatStatementsCollectorWithStatementAndLimit(t *testing.T) { + db, mock, err := sqlmock.New() + if err != nil { + t.Fatalf("Error opening a stub db connection: %s", err) + } + defer db.Close() + + inst := &instance{db: db, version: semver.MustParse("12.0.0")} + + columns := []string{"user", "datname", "queryid", "LEFT(pg_stat_statements.query, 100) as query", "calls_total", "seconds_total", "rows_total", "block_read_seconds_total", "block_write_seconds_total"} + rows := sqlmock.NewRows(columns). + AddRow("postgres", "postgres", 1500, "select 1 from foo", 5, 0.4, 100, 0.1, 0.2) + mock.ExpectQuery(sanitizeQuery(fmt.Sprintf(pgStatStatementsQuery, fmt.Sprintf(pgStatStatementQuerySelect, 100), "", "", "10"))).WillReturnRows(rows) + + ch := make(chan prometheus.Metric) + go func() { + defer close(ch) + c := PGStatStatementsCollector{includeQueryStatement: true, statementLength: 100, statementLimit: 10} + + if err := c.Update(context.Background(), inst, ch); err != nil { + t.Errorf("Error calling PGStatStatementsCollector.Update: %s", err) + } + }() + + expected := []MetricResult{ + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 5}, + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 0.4}, + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 100}, + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 0.1}, + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 0.2}, + {labels: labelMap{"queryid": "1500", "query": "select 1 from foo"}, metricType: dto.MetricType_COUNTER, value: 1}, + } + + convey.Convey("Metrics comparison", t, func() { + for _, expect := range expected { + m := readMetric(<-ch) + convey.So(expect, convey.ShouldResemble, m) + } + }) + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("there were unfulfilled exceptions: %s", err) + } +} + +func TestPGStatStatementsCollectorNull(t *testing.T) { db, mock, err := sqlmock.New() if err != nil { t.Fatalf("Error opening a stub db connection: %s", err) @@ -78,7 +167,7 @@ func TestPGStateStatementsCollectorNull(t *testing.T) { columns := []string{"user", "datname", "queryid", "calls_total", "seconds_total", "rows_total", "block_read_seconds_total", "block_write_seconds_total"} rows := sqlmock.NewRows(columns). AddRow(nil, nil, nil, nil, nil, nil, nil, nil) - mock.ExpectQuery(sanitizeQuery(pgStatStatementsNewQuery)).WillReturnRows(rows) + mock.ExpectQuery(sanitizeQuery(fmt.Sprintf(pgStatStatementsQuery_PG13, "", false, "", "", defaultStatementLimit))).WillReturnRows(rows) ch := make(chan prometheus.Metric) go func() { @@ -109,7 +198,95 @@ func TestPGStateStatementsCollectorNull(t *testing.T) { } } -func TestPGStateStatementsCollectorNewPG(t *testing.T) { +func TestPGStatStatementsCollectorNullWithStatement(t *testing.T) { + db, mock, err := sqlmock.New() + if err != nil { + t.Fatalf("Error opening a stub db connection: %s", err) + } + defer db.Close() + + inst := &instance{db: db, version: semver.MustParse("13.3.7")} + + columns := []string{"user", "datname", "queryid", "LEFT(pg_stat_statements.query, 200) as query", "calls_total", "seconds_total", "rows_total", "block_read_seconds_total", "block_write_seconds_total"} + rows := sqlmock.NewRows(columns). + AddRow(nil, nil, nil, nil, nil, nil, nil, nil, nil) + mock.ExpectQuery(sanitizeQuery(fmt.Sprintf(pgStatStatementsQuery_PG13, fmt.Sprintf(pgStatStatementQuerySelect, 200), true, "", "", defaultStatementLimit))).WillReturnRows(rows) + + ch := make(chan prometheus.Metric) + go func() { + defer close(ch) + c := PGStatStatementsCollector{includeQueryStatement: true, statementLength: 200} + + if err := c.Update(context.Background(), inst, ch); err != nil { + t.Errorf("Error calling PGStatStatementsCollector.Update: %s", err) + } + }() + + expected := []MetricResult{ + {labels: labelMap{"user": "unknown", "datname": "unknown", "queryid": "unknown"}, metricType: dto.MetricType_COUNTER, value: 0}, + {labels: labelMap{"user": "unknown", "datname": "unknown", "queryid": "unknown"}, metricType: dto.MetricType_COUNTER, value: 0}, + {labels: labelMap{"user": "unknown", "datname": "unknown", "queryid": "unknown"}, metricType: dto.MetricType_COUNTER, value: 0}, + {labels: labelMap{"user": "unknown", "datname": "unknown", "queryid": "unknown"}, metricType: dto.MetricType_COUNTER, value: 0}, + {labels: labelMap{"user": "unknown", "datname": "unknown", "queryid": "unknown"}, metricType: dto.MetricType_COUNTER, value: 0}, + {labels: labelMap{"queryid": "unknown", "query": "unknown"}, metricType: dto.MetricType_COUNTER, value: 1}, + } + + convey.Convey("Metrics comparison", t, func() { + for _, expect := range expected { + m := readMetric(<-ch) + convey.So(expect, convey.ShouldResemble, m) + } + }) + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("there were unfulfilled exceptions: %s", err) + } +} + +func TestPGStatStatementsCollectorNullWithStatementAndLimit(t *testing.T) { + db, mock, err := sqlmock.New() + if err != nil { + t.Fatalf("Error opening a stub db connection: %s", err) + } + defer db.Close() + + inst := &instance{db: db, version: semver.MustParse("13.3.7")} + + columns := []string{"user", "datname", "queryid", "LEFT(pg_stat_statements.query, 200) as query", "calls_total", "seconds_total", "rows_total", "block_read_seconds_total", "block_write_seconds_total"} + rows := sqlmock.NewRows(columns). + AddRow(nil, nil, nil, nil, nil, nil, nil, nil, nil) + mock.ExpectQuery(sanitizeQuery(fmt.Sprintf(pgStatStatementsQuery_PG13, fmt.Sprintf(pgStatStatementQuerySelect, 200), true, "", "", "10"))).WillReturnRows(rows) + + ch := make(chan prometheus.Metric) + go func() { + defer close(ch) + c := PGStatStatementsCollector{includeQueryStatement: true, statementLength: 200, statementLimit: 10} + + if err := c.Update(context.Background(), inst, ch); err != nil { + t.Errorf("Error calling PGStatStatementsCollector.Update: %s", err) + } + }() + + expected := []MetricResult{ + {labels: labelMap{"user": "unknown", "datname": "unknown", "queryid": "unknown"}, metricType: dto.MetricType_COUNTER, value: 0}, + {labels: labelMap{"user": "unknown", "datname": "unknown", "queryid": "unknown"}, metricType: dto.MetricType_COUNTER, value: 0}, + {labels: labelMap{"user": "unknown", "datname": "unknown", "queryid": "unknown"}, metricType: dto.MetricType_COUNTER, value: 0}, + {labels: labelMap{"user": "unknown", "datname": "unknown", "queryid": "unknown"}, metricType: dto.MetricType_COUNTER, value: 0}, + {labels: labelMap{"user": "unknown", "datname": "unknown", "queryid": "unknown"}, metricType: dto.MetricType_COUNTER, value: 0}, + {labels: labelMap{"queryid": "unknown", "query": "unknown"}, metricType: dto.MetricType_COUNTER, value: 1}, + } + + convey.Convey("Metrics comparison", t, func() { + for _, expect := range expected { + m := readMetric(<-ch) + convey.So(expect, convey.ShouldResemble, m) + } + }) + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("there were unfulfilled exceptions: %s", err) + } +} + +func TestPGStatStatementsCollector_PG13(t *testing.T) { db, mock, err := sqlmock.New() if err != nil { t.Fatalf("Error opening a stub db connection: %s", err) @@ -121,7 +298,7 @@ func TestPGStateStatementsCollectorNewPG(t *testing.T) { columns := []string{"user", "datname", "queryid", "calls_total", "seconds_total", "rows_total", "block_read_seconds_total", "block_write_seconds_total"} rows := sqlmock.NewRows(columns). AddRow("postgres", "postgres", 1500, 5, 0.4, 100, 0.1, 0.2) - mock.ExpectQuery(sanitizeQuery(pgStatStatementsNewQuery)).WillReturnRows(rows) + mock.ExpectQuery(sanitizeQuery(fmt.Sprintf(pgStatStatementsQuery_PG13, "", false, "", "", defaultStatementLimit))).WillReturnRows(rows) ch := make(chan prometheus.Metric) go func() { @@ -151,3 +328,463 @@ func TestPGStateStatementsCollectorNewPG(t *testing.T) { t.Errorf("there were unfulfilled exceptions: %s", err) } } + +func TestPGStatStatementsCollector_PG13_WithStatement(t *testing.T) { + db, mock, err := sqlmock.New() + if err != nil { + t.Fatalf("Error opening a stub db connection: %s", err) + } + defer db.Close() + + inst := &instance{db: db, version: semver.MustParse("13.3.7")} + + columns := []string{"user", "datname", "queryid", "LEFT(pg_stat_statements.query, 300) as query", "calls_total", "seconds_total", "rows_total", "block_read_seconds_total", "block_write_seconds_total"} + rows := sqlmock.NewRows(columns). + AddRow("postgres", "postgres", 1500, "select 1 from foo", 5, 0.4, 100, 0.1, 0.2) + mock.ExpectQuery(sanitizeQuery(fmt.Sprintf(pgStatStatementsQuery_PG13, fmt.Sprintf(pgStatStatementQuerySelect, 300), true, "", "", defaultStatementLimit))).WillReturnRows(rows) + + ch := make(chan prometheus.Metric) + go func() { + defer close(ch) + c := PGStatStatementsCollector{includeQueryStatement: true, statementLength: 300} + + if err := c.Update(context.Background(), inst, ch); err != nil { + t.Errorf("Error calling PGStatStatementsCollector.Update: %s", err) + } + }() + + expected := []MetricResult{ + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 5}, + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 0.4}, + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 100}, + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 0.1}, + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 0.2}, + {labels: labelMap{"queryid": "1500", "query": "select 1 from foo"}, metricType: dto.MetricType_COUNTER, value: 1}, + } + + convey.Convey("Metrics comparison", t, func() { + for _, expect := range expected { + m := readMetric(<-ch) + convey.So(expect, convey.ShouldResemble, m) + } + }) + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("there were unfulfilled exceptions: %s", err) + } +} + +func TestPGStatStatementsCollector_PG13_WithStatementAndLimit(t *testing.T) { + db, mock, err := sqlmock.New() + if err != nil { + t.Fatalf("Error opening a stub db connection: %s", err) + } + defer db.Close() + + inst := &instance{db: db, version: semver.MustParse("13.3.7")} + + columns := []string{"user", "datname", "queryid", "LEFT(pg_stat_statements.query, 300) as query", "calls_total", "seconds_total", "rows_total", "block_read_seconds_total", "block_write_seconds_total"} + rows := sqlmock.NewRows(columns). + AddRow("postgres", "postgres", 1500, "select 1 from foo", 5, 0.4, 100, 0.1, 0.2) + mock.ExpectQuery(sanitizeQuery(fmt.Sprintf(pgStatStatementsQuery_PG13, fmt.Sprintf(pgStatStatementQuerySelect, 300), true, "", "", "10"))).WillReturnRows(rows) + + ch := make(chan prometheus.Metric) + go func() { + defer close(ch) + c := PGStatStatementsCollector{includeQueryStatement: true, statementLength: 300, statementLimit: 10} + + if err := c.Update(context.Background(), inst, ch); err != nil { + t.Errorf("Error calling PGStatStatementsCollector.Update: %s", err) + } + }() + + expected := []MetricResult{ + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 5}, + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 0.4}, + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 100}, + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 0.1}, + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 0.2}, + {labels: labelMap{"queryid": "1500", "query": "select 1 from foo"}, metricType: dto.MetricType_COUNTER, value: 1}, + } + + convey.Convey("Metrics comparison", t, func() { + for _, expect := range expected { + m := readMetric(<-ch) + convey.So(expect, convey.ShouldResemble, m) + } + }) + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("there were unfulfilled exceptions: %s", err) + } +} + +func TestPGStatStatementsCollector_PG17(t *testing.T) { + db, mock, err := sqlmock.New() + if err != nil { + t.Fatalf("Error opening a stub db connection: %s", err) + } + defer db.Close() + + inst := &instance{db: db, version: semver.MustParse("17.0.0")} + + columns := []string{"user", "datname", "queryid", "calls_total", "seconds_total", "rows_total", "block_read_seconds_total", "block_write_seconds_total"} + rows := sqlmock.NewRows(columns). + AddRow("postgres", "postgres", 1500, 5, 0.4, 100, 0.1, 0.2) + mock.ExpectQuery(sanitizeQuery(fmt.Sprintf(pgStatStatementsQuery_PG17, "", false, "", "", defaultStatementLimit))).WillReturnRows(rows) + + ch := make(chan prometheus.Metric) + go func() { + defer close(ch) + c := PGStatStatementsCollector{} + + if err := c.Update(context.Background(), inst, ch); err != nil { + t.Errorf("Error calling PGStatStatementsCollector.Update: %s", err) + } + }() + + expected := []MetricResult{ + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 5}, + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 0.4}, + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 100}, + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 0.1}, + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 0.2}, + } + + convey.Convey("Metrics comparison", t, func() { + for _, expect := range expected { + m := readMetric(<-ch) + convey.So(expect, convey.ShouldResemble, m) + } + }) + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("there were unfulfilled exceptions: %s", err) + } +} + +func TestPGStatStatementsCollector_PG17_WithStatement(t *testing.T) { + db, mock, err := sqlmock.New() + if err != nil { + t.Fatalf("Error opening a stub db connection: %s", err) + } + defer db.Close() + + inst := &instance{db: db, version: semver.MustParse("17.0.0")} + + columns := []string{"user", "datname", "queryid", "LEFT(pg_stat_statements.query, 300) as query", "calls_total", "seconds_total", "rows_total", "block_read_seconds_total", "block_write_seconds_total"} + rows := sqlmock.NewRows(columns). + AddRow("postgres", "postgres", 1500, "select 1 from foo", 5, 0.4, 100, 0.1, 0.2) + mock.ExpectQuery(sanitizeQuery(fmt.Sprintf(pgStatStatementsQuery_PG17, fmt.Sprintf(pgStatStatementQuerySelect, 300), true, "", "", defaultStatementLimit))).WillReturnRows(rows) + + ch := make(chan prometheus.Metric) + go func() { + defer close(ch) + c := PGStatStatementsCollector{includeQueryStatement: true, statementLength: 300} + + if err := c.Update(context.Background(), inst, ch); err != nil { + t.Errorf("Error calling PGStatStatementsCollector.Update: %s", err) + } + }() + + expected := []MetricResult{ + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 5}, + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 0.4}, + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 100}, + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 0.1}, + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 0.2}, + {labels: labelMap{"queryid": "1500", "query": "select 1 from foo"}, metricType: dto.MetricType_COUNTER, value: 1}, + } + + convey.Convey("Metrics comparison", t, func() { + for _, expect := range expected { + m := readMetric(<-ch) + convey.So(expect, convey.ShouldResemble, m) + } + }) + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("there were unfulfilled exceptions: %s", err) + } +} + +func TestPGStatStatementsCollector_PG17_WithStatementAndLimit(t *testing.T) { + db, mock, err := sqlmock.New() + if err != nil { + t.Fatalf("Error opening a stub db connection: %s", err) + } + defer db.Close() + + inst := &instance{db: db, version: semver.MustParse("17.0.0")} + + columns := []string{"user", "datname", "queryid", "LEFT(pg_stat_statements.query, 300) as query", "calls_total", "seconds_total", "rows_total", "block_read_seconds_total", "block_write_seconds_total"} + rows := sqlmock.NewRows(columns). + AddRow("postgres", "postgres", 1500, "select 1 from foo", 5, 0.4, 100, 0.1, 0.2) + mock.ExpectQuery(sanitizeQuery(fmt.Sprintf(pgStatStatementsQuery_PG17, fmt.Sprintf(pgStatStatementQuerySelect, 300), true, "", "", "10"))).WillReturnRows(rows) + + ch := make(chan prometheus.Metric) + go func() { + defer close(ch) + c := PGStatStatementsCollector{includeQueryStatement: true, statementLength: 300, statementLimit: 10} + + if err := c.Update(context.Background(), inst, ch); err != nil { + t.Errorf("Error calling PGStatStatementsCollector.Update: %s", err) + } + }() + + expected := []MetricResult{ + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 5}, + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 0.4}, + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 100}, + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 0.1}, + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 0.2}, + {labels: labelMap{"queryid": "1500", "query": "select 1 from foo"}, metricType: dto.MetricType_COUNTER, value: 1}, + } + + convey.Convey("Metrics comparison", t, func() { + for _, expect := range expected { + m := readMetric(<-ch) + convey.So(expect, convey.ShouldResemble, m) + } + }) + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("there were unfulfilled exceptions: %s", err) + } +} + +func TestPGStatStatementsCollectorWithExcludedDatabases(t *testing.T) { + db, mock, err := sqlmock.New() + if err != nil { + t.Fatalf("Error opening a stub db connection: %s", err) + } + defer db.Close() + + inst := &instance{db: db, version: semver.MustParse("12.0.0")} + + excludedDatabases := []string{"rdsadmin", "cloudsqladmin"} + + columns := []string{"user", "datname", "queryid", "calls_total", "seconds_total", "rows_total", "block_read_seconds_total", "block_write_seconds_total"} + rows := sqlmock.NewRows(columns). + AddRow("postgres", "postgres", 1500, 5, 0.4, 100, 0.1, 0.2) + + // Expected query should include database exclusion filters + expectedFilter := " AND pg_database.datname NOT IN ('rdsadmin', 'cloudsqladmin')" + mock.ExpectQuery(sanitizeQuery(fmt.Sprintf(pgStatStatementsQuery, "", expectedFilter, "", defaultStatementLimit))).WillReturnRows(rows) + + ch := make(chan prometheus.Metric) + go func() { + defer close(ch) + c := PGStatStatementsCollector{excludedDatabases: excludedDatabases} + + if err := c.Update(context.Background(), inst, ch); err != nil { + t.Errorf("Error calling PGStatStatementsCollector.Update: %s", err) + } + }() + + expected := []MetricResult{ + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 5}, + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 0.4}, + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 100}, + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 0.1}, + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 0.2}, + } + + convey.Convey("Metrics comparison with excluded databases", t, func() { + for _, expect := range expected { + m := readMetric(<-ch) + convey.So(expect, convey.ShouldResemble, m) + } + }) + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("there were unfulfilled exceptions: %s", err) + } +} + +func TestPGStatStatementsCollectorWithExcludedDatabasesSQLEscaping(t *testing.T) { + db, mock, err := sqlmock.New() + if err != nil { + t.Fatalf("Error opening a stub db connection: %s", err) + } + defer db.Close() + + inst := &instance{db: db, version: semver.MustParse("13.0.0")} + + // exclude a database name with a single quote (SQL injection test) + excludedDatabases := []string{"test'db"} + + columns := []string{"user", "datname", "queryid", "calls_total", "seconds_total", "rows_total", "block_read_seconds_total", "block_write_seconds_total"} + rows := sqlmock.NewRows(columns). + AddRow("postgres", "postgres", 1500, 5, 0.4, 100, 0.1, 0.2) + + expectedFilter := " AND pg_database.datname NOT IN ('test''db')" + mock.ExpectQuery(sanitizeQuery(fmt.Sprintf(pgStatStatementsQuery_PG13, "", false, expectedFilter, "", defaultStatementLimit))).WillReturnRows(rows) + + ch := make(chan prometheus.Metric) + go func() { + defer close(ch) + c := PGStatStatementsCollector{excludedDatabases: excludedDatabases} + + if err := c.Update(context.Background(), inst, ch); err != nil { + t.Errorf("Error calling PGStatStatementsCollector.Update: %s", err) + } + }() + + expected := []MetricResult{ + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 5}, + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 0.4}, + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 100}, + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 0.1}, + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 0.2}, + } + + convey.Convey("Metrics comparison with SQL escaping", t, func() { + for _, expect := range expected { + m := readMetric(<-ch) + convey.So(expect, convey.ShouldResemble, m) + } + }) + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("there were unfulfilled exceptions: %s", err) + } +} + +func TestPGStatStatementsCollectorWithExcludedUsers(t *testing.T) { + db, mock, err := sqlmock.New() + if err != nil { + t.Fatalf("Error opening a stub db connection: %s", err) + } + defer db.Close() + + inst := &instance{db: db, version: semver.MustParse("12.0.0")} + + excludedUsers := []string{"monitoring", "readonly"} + + columns := []string{"user", "datname", "queryid", "calls_total", "seconds_total", "rows_total", "block_read_seconds_total", "block_write_seconds_total"} + rows := sqlmock.NewRows(columns). + AddRow("postgres", "postgres", 1500, 5, 0.4, 100, 0.1, 0.2) + + expectedFilter := " AND pg_get_userbyid(userid) NOT IN ('monitoring', 'readonly')" + mock.ExpectQuery(sanitizeQuery(fmt.Sprintf(pgStatStatementsQuery, "", "", expectedFilter, defaultStatementLimit))).WillReturnRows(rows) + + ch := make(chan prometheus.Metric) + go func() { + defer close(ch) + c := PGStatStatementsCollector{excludedUsers: excludedUsers} + + if err := c.Update(context.Background(), inst, ch); err != nil { + t.Errorf("Error calling PGStatStatementsCollector.Update: %s", err) + } + }() + + expected := []MetricResult{ + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 5}, + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 0.4}, + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 100}, + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 0.1}, + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 0.2}, + } + + convey.Convey("Metrics comparison with excluded users", t, func() { + for _, expect := range expected { + m := readMetric(<-ch) + convey.So(expect, convey.ShouldResemble, m) + } + }) + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("there were unfulfilled exceptions: %s", err) + } +} + +func TestPGStatStatementsCollectorWithExcludedUsersSQLEscaping(t *testing.T) { + db, mock, err := sqlmock.New() + if err != nil { + t.Fatalf("Error opening a stub db connection: %s", err) + } + defer db.Close() + + inst := &instance{db: db, version: semver.MustParse("13.0.0")} + + // exclude a user name with a single quote (SQL injection test) + excludedUsers := []string{"test'user"} + + columns := []string{"user", "datname", "queryid", "calls_total", "seconds_total", "rows_total", "block_read_seconds_total", "block_write_seconds_total"} + rows := sqlmock.NewRows(columns). + AddRow("postgres", "postgres", 1500, 5, 0.4, 100, 0.1, 0.2) + + expectedFilter := " AND pg_get_userbyid(userid) NOT IN ('test''user')" + mock.ExpectQuery(sanitizeQuery(fmt.Sprintf(pgStatStatementsQuery_PG13, "", false, "", expectedFilter, defaultStatementLimit))).WillReturnRows(rows) + + ch := make(chan prometheus.Metric) + go func() { + defer close(ch) + c := PGStatStatementsCollector{excludedUsers: excludedUsers} + + if err := c.Update(context.Background(), inst, ch); err != nil { + t.Errorf("Error calling PGStatStatementsCollector.Update: %s", err) + } + }() + + expected := []MetricResult{ + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 5}, + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 0.4}, + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 100}, + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 0.1}, + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 0.2}, + } + + convey.Convey("Metrics comparison with SQL escaping for users", t, func() { + for _, expect := range expected { + m := readMetric(<-ch) + convey.So(expect, convey.ShouldResemble, m) + } + }) + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("there were unfulfilled exceptions: %s", err) + } +} + +func TestPGStatStatementsCollectorWithExcludedDatabasesAndUsers(t *testing.T) { + db, mock, err := sqlmock.New() + if err != nil { + t.Fatalf("Error opening a stub db connection: %s", err) + } + defer db.Close() + + inst := &instance{db: db, version: semver.MustParse("17.0.0")} + + // exclude both databases and users + excludedDatabases := []string{"rdsadmin", "cloudsqladmin"} + excludedUsers := []string{"monitoring", "readonly"} + + columns := []string{"user", "datname", "queryid", "calls_total", "seconds_total", "rows_total", "block_read_seconds_total", "block_write_seconds_total"} + rows := sqlmock.NewRows(columns). + AddRow("postgres", "postgres", 1500, 5, 0.4, 100, 0.1, 0.2) + + expectedDbFilter := " AND pg_database.datname NOT IN ('rdsadmin', 'cloudsqladmin')" + expectedUserFilter := " AND pg_get_userbyid(userid) NOT IN ('monitoring', 'readonly')" + mock.ExpectQuery(sanitizeQuery(fmt.Sprintf(pgStatStatementsQuery_PG17, "", false, expectedDbFilter, expectedUserFilter, defaultStatementLimit))).WillReturnRows(rows) + + ch := make(chan prometheus.Metric) + go func() { + defer close(ch) + c := PGStatStatementsCollector{excludedDatabases: excludedDatabases, excludedUsers: excludedUsers} + + if err := c.Update(context.Background(), inst, ch); err != nil { + t.Errorf("Error calling PGStatStatementsCollector.Update: %s", err) + } + }() + + expected := []MetricResult{ + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 5}, + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 0.4}, + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 100}, + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 0.1}, + {labels: labelMap{"user": "postgres", "datname": "postgres", "queryid": "1500"}, metricType: dto.MetricType_COUNTER, value: 0.2}, + } + + convey.Convey("Metrics comparison with excluded databases and users", t, func() { + for _, expect := range expected { + m := readMetric(<-ch) + convey.So(expect, convey.ShouldResemble, m) + } + }) + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("there were unfulfilled exceptions: %s", err) + } +} diff --git a/collector/pg_stat_user_tables.go b/collector/pg_stat_user_tables.go index af3822ca8..ad8bcace7 100644 --- a/collector/pg_stat_user_tables.go +++ b/collector/pg_stat_user_tables.go @@ -16,8 +16,8 @@ package collector import ( "context" "database/sql" + "log/slog" - "github.com/go-kit/log" "github.com/prometheus/client_golang/prometheus" ) @@ -28,7 +28,7 @@ func init() { } type PGStatUserTablesCollector struct { - log log.Logger + log *slog.Logger } func NewPGStatUserTablesCollector(config collectorConfig) (Collector, error) { @@ -150,9 +150,15 @@ var ( []string{"datname", "schemaname", "relname"}, prometheus.Labels{}, ) - statUserTablesTotalSize = prometheus.NewDesc( - prometheus.BuildFQName(namespace, userTableSubsystem, "size_bytes"), - "Total disk space used by this table, in bytes, including all indexes and TOAST data", + statUserIndexSize = prometheus.NewDesc( + prometheus.BuildFQName(namespace, userTableSubsystem, "index_size_bytes"), + "Total disk space used by this index, in bytes", + []string{"datname", "schemaname", "relname"}, + prometheus.Labels{}, + ) + statUserTableSize = prometheus.NewDesc( + prometheus.BuildFQName(namespace, userTableSubsystem, "table_size_bytes"), + "Total disk space used by this table, in bytes", []string{"datname", "schemaname", "relname"}, prometheus.Labels{}, ) @@ -180,7 +186,8 @@ var ( autovacuum_count, analyze_count, autoanalyze_count, - pg_total_relation_size(relid) as total_size + pg_indexes_size(relid) as indexes_size, + pg_table_size(relid) as table_size FROM pg_stat_user_tables` ) @@ -198,10 +205,10 @@ func (c *PGStatUserTablesCollector) Update(ctx context.Context, instance *instan for rows.Next() { var datname, schemaname, relname sql.NullString var seqScan, seqTupRead, idxScan, idxTupFetch, nTupIns, nTupUpd, nTupDel, nTupHotUpd, nLiveTup, nDeadTup, - nModSinceAnalyze, vacuumCount, autovacuumCount, analyzeCount, autoanalyzeCount, totalSize sql.NullInt64 + nModSinceAnalyze, vacuumCount, autovacuumCount, analyzeCount, autoanalyzeCount, indexSize, tableSize sql.NullInt64 var lastVacuum, lastAutovacuum, lastAnalyze, lastAutoanalyze sql.NullTime - if err := rows.Scan(&datname, &schemaname, &relname, &seqScan, &seqTupRead, &idxScan, &idxTupFetch, &nTupIns, &nTupUpd, &nTupDel, &nTupHotUpd, &nLiveTup, &nDeadTup, &nModSinceAnalyze, &lastVacuum, &lastAutovacuum, &lastAnalyze, &lastAutoanalyze, &vacuumCount, &autovacuumCount, &analyzeCount, &autoanalyzeCount, &totalSize); err != nil { + if err := rows.Scan(&datname, &schemaname, &relname, &seqScan, &seqTupRead, &idxScan, &idxTupFetch, &nTupIns, &nTupUpd, &nTupDel, &nTupHotUpd, &nLiveTup, &nDeadTup, &nModSinceAnalyze, &lastVacuum, &lastAutovacuum, &lastAnalyze, &lastAutoanalyze, &vacuumCount, &autovacuumCount, &analyzeCount, &autoanalyzeCount, &indexSize, &tableSize); err != nil { return err } @@ -427,14 +434,25 @@ func (c *PGStatUserTablesCollector) Update(ctx context.Context, instance *instan datnameLabel, schemanameLabel, relnameLabel, ) - totalSizeMetric := 0.0 - if totalSize.Valid { - totalSizeMetric = float64(totalSize.Int64) + indexSizeMetric := 0.0 + if indexSize.Valid { + indexSizeMetric = float64(indexSize.Int64) + } + ch <- prometheus.MustNewConstMetric( + statUserIndexSize, + prometheus.GaugeValue, + indexSizeMetric, + datnameLabel, schemanameLabel, relnameLabel, + ) + + tableSizeMetric := 0.0 + if tableSize.Valid { + tableSizeMetric = float64(tableSize.Int64) } ch <- prometheus.MustNewConstMetric( - statUserTablesTotalSize, + statUserTableSize, prometheus.GaugeValue, - totalSizeMetric, + tableSizeMetric, datnameLabel, schemanameLabel, relnameLabel, ) } diff --git a/collector/pg_stat_user_tables_test.go b/collector/pg_stat_user_tables_test.go index 5e82335c3..4649bdbc5 100644 --- a/collector/pg_stat_user_tables_test.go +++ b/collector/pg_stat_user_tables_test.go @@ -72,7 +72,8 @@ func TestPGStatUserTablesCollector(t *testing.T) { "autovacuum_count", "analyze_count", "autoanalyze_count", - "total_size"} + "index_size", + "table_size"} rows := sqlmock.NewRows(columns). AddRow("postgres", "public", @@ -96,7 +97,8 @@ func TestPGStatUserTablesCollector(t *testing.T) { 12, 13, 14, - 15) + 15, + 16) mock.ExpectQuery(sanitizeQuery(statUserTablesQuery)).WillReturnRows(rows) ch := make(chan prometheus.Metric) go func() { @@ -128,6 +130,8 @@ func TestPGStatUserTablesCollector(t *testing.T) { {labels: labelMap{"datname": "postgres", "schemaname": "public", "relname": "a_table"}, metricType: dto.MetricType_COUNTER, value: 12}, {labels: labelMap{"datname": "postgres", "schemaname": "public", "relname": "a_table"}, metricType: dto.MetricType_COUNTER, value: 13}, {labels: labelMap{"datname": "postgres", "schemaname": "public", "relname": "a_table"}, metricType: dto.MetricType_COUNTER, value: 14}, + {labels: labelMap{"datname": "postgres", "schemaname": "public", "relname": "a_table"}, metricType: dto.MetricType_GAUGE, value: 15}, + {labels: labelMap{"datname": "postgres", "schemaname": "public", "relname": "a_table"}, metricType: dto.MetricType_GAUGE, value: 16}, } convey.Convey("Metrics comparison", t, func() { @@ -173,7 +177,8 @@ func TestPGStatUserTablesCollectorNullValues(t *testing.T) { "autovacuum_count", "analyze_count", "autoanalyze_count", - "total_size"} + "index_size", + "table_size"} rows := sqlmock.NewRows(columns). AddRow("postgres", nil, @@ -197,6 +202,7 @@ func TestPGStatUserTablesCollectorNullValues(t *testing.T) { nil, nil, nil, + nil, nil) mock.ExpectQuery(sanitizeQuery(statUserTablesQuery)).WillReturnRows(rows) ch := make(chan prometheus.Metric) @@ -229,6 +235,8 @@ func TestPGStatUserTablesCollectorNullValues(t *testing.T) { {labels: labelMap{"datname": "postgres", "schemaname": "unknown", "relname": "unknown"}, metricType: dto.MetricType_COUNTER, value: 0}, {labels: labelMap{"datname": "postgres", "schemaname": "unknown", "relname": "unknown"}, metricType: dto.MetricType_COUNTER, value: 0}, {labels: labelMap{"datname": "postgres", "schemaname": "unknown", "relname": "unknown"}, metricType: dto.MetricType_COUNTER, value: 0}, + {labels: labelMap{"datname": "postgres", "schemaname": "unknown", "relname": "unknown"}, metricType: dto.MetricType_GAUGE, value: 0}, + {labels: labelMap{"datname": "postgres", "schemaname": "unknown", "relname": "unknown"}, metricType: dto.MetricType_GAUGE, value: 0}, } convey.Convey("Metrics comparison", t, func() { diff --git a/collector/pg_stat_walreceiver.go b/collector/pg_stat_walreceiver.go index db533ab55..e6cc13652 100644 --- a/collector/pg_stat_walreceiver.go +++ b/collector/pg_stat_walreceiver.go @@ -16,9 +16,8 @@ import ( "context" "database/sql" "fmt" + "log/slog" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/prometheus/client_golang/prometheus" ) @@ -27,7 +26,7 @@ func init() { } type PGStatWalReceiverCollector struct { - log log.Logger + log *slog.Logger } const statWalReceiverSubsystem = "stat_wal_receiver" @@ -109,7 +108,7 @@ var ( status, (receive_start_lsn- '0/0') %% (2^52)::bigint as receive_start_lsn, %s -receive_start_tli, + receive_start_tli, received_tli, extract(epoch from last_msg_send_time) as last_msg_send_time, extract(epoch from last_msg_receipt_time) as last_msg_receipt_time, @@ -148,7 +147,7 @@ func (c *PGStatWalReceiverCollector) Update(ctx context.Context, instance *insta var lastMsgSendTime, lastMsgReceiptTime, latestEndTime sql.NullFloat64 if hasFlushedLSN { - if err := rows.Scan(&upstreamHost, &slotName, &status, &receiveStartLsn, &receiveStartTli, &flushedLsn, &receivedTli, &lastMsgSendTime, &lastMsgReceiptTime, &latestEndLsn, &latestEndTime, &upstreamNode); err != nil { + if err := rows.Scan(&upstreamHost, &slotName, &status, &receiveStartLsn, &flushedLsn, &receiveStartTli, &receivedTli, &lastMsgSendTime, &lastMsgReceiptTime, &latestEndLsn, &latestEndTime, &upstreamNode); err != nil { return err } } else { @@ -157,55 +156,51 @@ func (c *PGStatWalReceiverCollector) Update(ctx context.Context, instance *insta } } if !upstreamHost.Valid { - level.Debug(c.log).Log("msg", "Skipping wal receiver stats because upstream host is null") + c.log.Debug("Skipping wal receiver stats because upstream host is null") continue } if !slotName.Valid { - level.Debug(c.log).Log("msg", "Skipping wal receiver stats because slotname host is null") + c.log.Debug("Skipping wal receiver stats because slotname host is null") continue } if !status.Valid { - level.Debug(c.log).Log("msg", "Skipping wal receiver stats because status is null") + c.log.Debug("Skipping wal receiver stats because status is null") continue } labels := []string{upstreamHost.String, slotName.String, status.String} if !receiveStartLsn.Valid { - level.Debug(c.log).Log("msg", "Skipping wal receiver stats because receive_start_lsn is null") + c.log.Debug("Skipping wal receiver stats because receive_start_lsn is null") continue } if !receiveStartTli.Valid { - level.Debug(c.log).Log("msg", "Skipping wal receiver stats because receive_start_tli is null") + c.log.Debug("Skipping wal receiver stats because receive_start_tli is null") continue } if hasFlushedLSN && !flushedLsn.Valid { - level.Debug(c.log).Log("msg", "Skipping wal receiver stats because flushed_lsn is null") + c.log.Debug("Skipping wal receiver stats because flushed_lsn is null") continue } if !receivedTli.Valid { - level.Debug(c.log).Log("msg", "Skipping wal receiver stats because received_tli is null") + c.log.Debug("Skipping wal receiver stats because received_tli is null") continue } if !lastMsgSendTime.Valid { - level.Debug(c.log).Log("msg", "Skipping wal receiver stats because last_msg_send_time is null") + c.log.Debug("Skipping wal receiver stats because last_msg_send_time is null") continue } if !lastMsgReceiptTime.Valid { - level.Debug(c.log).Log("msg", "Skipping wal receiver stats because last_msg_receipt_time is null") + c.log.Debug("Skipping wal receiver stats because last_msg_receipt_time is null") continue } if !latestEndLsn.Valid { - level.Debug(c.log).Log("msg", "Skipping wal receiver stats because latest_end_lsn is null") + c.log.Debug("Skipping wal receiver stats because latest_end_lsn is null") continue } if !latestEndTime.Valid { - level.Debug(c.log).Log("msg", "Skipping wal receiver stats because latest_end_time is null") - continue - } - if !upstreamNode.Valid { - level.Debug(c.log).Log("msg", "Skipping wal receiver stats because upstream_node is null") + c.log.Debug("Skipping wal receiver stats because latest_end_time is null") continue } ch <- prometheus.MustNewConstMetric( @@ -214,12 +209,6 @@ func (c *PGStatWalReceiverCollector) Update(ctx context.Context, instance *insta float64(receiveStartLsn.Int64), labels...) - ch <- prometheus.MustNewConstMetric( - statWalReceiverReceiveStartTli, - prometheus.GaugeValue, - float64(receiveStartTli.Int64), - labels...) - if hasFlushedLSN { ch <- prometheus.MustNewConstMetric( statWalReceiverFlushedLSN, @@ -228,6 +217,12 @@ func (c *PGStatWalReceiverCollector) Update(ctx context.Context, instance *insta labels...) } + ch <- prometheus.MustNewConstMetric( + statWalReceiverReceiveStartTli, + prometheus.GaugeValue, + float64(receiveStartTli.Int64), + labels...) + ch <- prometheus.MustNewConstMetric( statWalReceiverReceivedTli, prometheus.GaugeValue, @@ -258,11 +253,15 @@ func (c *PGStatWalReceiverCollector) Update(ctx context.Context, instance *insta latestEndTime.Float64, labels...) - ch <- prometheus.MustNewConstMetric( - statWalReceiverUpstreamNode, - prometheus.GaugeValue, - float64(upstreamNode.Int64), - labels...) + if !upstreamNode.Valid { + c.log.Debug("Skipping wal receiver stats upstream_node because it is null") + } else { + ch <- prometheus.MustNewConstMetric( + statWalReceiverUpstreamNode, + prometheus.GaugeValue, + float64(upstreamNode.Int64), + labels...) + } } if err := rows.Err(); err != nil { return err diff --git a/collector/pg_stat_walreceiver_test.go b/collector/pg_stat_walreceiver_test.go index 3e2418b25..13351255d 100644 --- a/collector/pg_stat_walreceiver_test.go +++ b/collector/pg_stat_walreceiver_test.go @@ -50,8 +50,8 @@ func TestPGStatWalReceiverCollectorWithFlushedLSN(t *testing.T) { "slot_name", "status", "receive_start_lsn", - "receive_start_tli", "flushed_lsn", + "receive_start_tli", "received_tli", "last_msg_send_time", "last_msg_receipt_time", @@ -64,13 +64,13 @@ func TestPGStatWalReceiverCollectorWithFlushedLSN(t *testing.T) { "foo", "bar", "stopping", - 1200668684563608, + int64(1200668684563608), + int64(1200668684563609), 1687321285, - 1200668684563609, 1687321280, 1687321275, 1687321276, - 1200668684563610, + int64(1200668684563610), 1687321277, 5, ) @@ -88,8 +88,8 @@ func TestPGStatWalReceiverCollectorWithFlushedLSN(t *testing.T) { }() expected := []MetricResult{ {labels: labelMap{"upstream_host": "foo", "slot_name": "bar", "status": "stopping"}, value: 1200668684563608, metricType: dto.MetricType_COUNTER}, - {labels: labelMap{"upstream_host": "foo", "slot_name": "bar", "status": "stopping"}, value: 1687321285, metricType: dto.MetricType_GAUGE}, {labels: labelMap{"upstream_host": "foo", "slot_name": "bar", "status": "stopping"}, value: 1200668684563609, metricType: dto.MetricType_COUNTER}, + {labels: labelMap{"upstream_host": "foo", "slot_name": "bar", "status": "stopping"}, value: 1687321285, metricType: dto.MetricType_GAUGE}, {labels: labelMap{"upstream_host": "foo", "slot_name": "bar", "status": "stopping"}, value: 1687321280, metricType: dto.MetricType_GAUGE}, {labels: labelMap{"upstream_host": "foo", "slot_name": "bar", "status": "stopping"}, value: 1687321275, metricType: dto.MetricType_COUNTER}, {labels: labelMap{"upstream_host": "foo", "slot_name": "bar", "status": "stopping"}, value: 1687321276, metricType: dto.MetricType_COUNTER}, @@ -143,12 +143,12 @@ func TestPGStatWalReceiverCollectorWithNoFlushedLSN(t *testing.T) { "foo", "bar", "starting", - 1200668684563608, + int64(1200668684563608), 1687321285, 1687321280, 1687321275, 1687321276, - 1200668684563610, + int64(1200668684563610), 1687321277, 5, ) diff --git a/collector/pg_statio_user_indexes.go b/collector/pg_statio_user_indexes.go index b5516338d..c53f52185 100644 --- a/collector/pg_statio_user_indexes.go +++ b/collector/pg_statio_user_indexes.go @@ -15,8 +15,8 @@ package collector import ( "context" "database/sql" + "log/slog" - "github.com/go-kit/log" "github.com/prometheus/client_golang/prometheus" ) @@ -25,7 +25,7 @@ func init() { } type PGStatioUserIndexesCollector struct { - log log.Logger + log *slog.Logger } const statioUserIndexesSubsystem = "statio_user_indexes" diff --git a/collector/pg_statio_user_tables.go b/collector/pg_statio_user_tables.go index 4315fda0a..48f6438f2 100644 --- a/collector/pg_statio_user_tables.go +++ b/collector/pg_statio_user_tables.go @@ -16,8 +16,8 @@ package collector import ( "context" "database/sql" + "log/slog" - "github.com/go-kit/log" "github.com/prometheus/client_golang/prometheus" ) @@ -28,7 +28,7 @@ func init() { } type PGStatIOUserTablesCollector struct { - log log.Logger + log *slog.Logger } func NewPGStatIOUserTablesCollector(config collectorConfig) (Collector, error) { diff --git a/collector/pg_wal.go b/collector/pg_wal.go index afa8fcef6..b76b7118b 100644 --- a/collector/pg_wal.go +++ b/collector/pg_wal.go @@ -15,6 +15,7 @@ package collector import ( "context" + "database/sql" "github.com/prometheus/client_golang/prometheus" ) @@ -67,7 +68,7 @@ func (c PGWALCollector) Update(ctx context.Context, instance *instance, ch chan< ) var segments uint64 - var size uint64 + var size sql.NullInt64 err := row.Scan(&segments, &size) if err != nil { return err @@ -76,9 +77,11 @@ func (c PGWALCollector) Update(ctx context.Context, instance *instance, ch chan< pgWALSegments, prometheus.GaugeValue, float64(segments), ) - ch <- prometheus.MustNewConstMetric( - pgWALSize, - prometheus.GaugeValue, float64(size), - ) + if size.Valid { + ch <- prometheus.MustNewConstMetric( + pgWALSize, + prometheus.GaugeValue, float64(size.Int64), + ) + } return nil } diff --git a/collector/pg_wal_test.go b/collector/pg_wal_test.go index 745105a13..8109953ef 100644 --- a/collector/pg_wal_test.go +++ b/collector/pg_wal_test.go @@ -61,3 +61,42 @@ func TestPgWALCollector(t *testing.T) { t.Errorf("there were unfulfilled exceptions: %s", err) } } + +func TestPgWALCollectorZeroSegments(t *testing.T) { + db, mock, err := sqlmock.New() + if err != nil { + t.Fatalf("Error opening a stub db connection: %s", err) + } + defer db.Close() + + inst := &instance{db: db} + + columns := []string{"segments", "size"} + rows := sqlmock.NewRows(columns). + AddRow(0, nil) + mock.ExpectQuery(sanitizeQuery(pgWALQuery)).WillReturnRows(rows) + + ch := make(chan prometheus.Metric) + go func() { + defer close(ch) + c := PGWALCollector{} + + if err := c.Update(context.Background(), inst, ch); err != nil { + t.Errorf("Error calling PGWALCollector.Update: %s", err) + } + }() + + expected := []MetricResult{ + {labels: labelMap{}, value: 0, metricType: dto.MetricType_GAUGE}, + } + + convey.Convey("Metrics comparison (zero segments)", t, func() { + for _, expect := range expected { + m := readMetric(<-ch) + convey.So(expect, convey.ShouldResemble, m) + } + }) + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("there were unfulfilled exceptions: %s", err) + } +} diff --git a/collector/pg_xlog_location.go b/collector/pg_xlog_location.go index 237204f7d..5f091471f 100644 --- a/collector/pg_xlog_location.go +++ b/collector/pg_xlog_location.go @@ -15,10 +15,9 @@ package collector import ( "context" + "log/slog" "github.com/blang/semver/v4" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/prometheus/client_golang/prometheus" ) @@ -29,7 +28,7 @@ func init() { } type PGXlogLocationCollector struct { - log log.Logger + log *slog.Logger } func NewPGXlogLocationCollector(config collectorConfig) (Collector, error) { @@ -59,7 +58,7 @@ func (c PGXlogLocationCollector) Update(ctx context.Context, instance *instance, // https://wiki.postgresql.org/wiki/New_in_postgres_10#Renaming_of_.22xlog.22_to_.22wal.22_Globally_.28and_location.2Flsn.29 after10 := instance.version.Compare(semver.MustParse("10.0.0")) if after10 >= 0 { - level.Warn(c.log).Log("msg", "xlog_location collector is not available on PostgreSQL >= 10.0.0, skipping") + c.log.Warn("xlog_location collector is not available on PostgreSQL >= 10.0.0, skipping") return nil } diff --git a/collector/probe.go b/collector/probe.go index 4c0f0419b..e40d6fee1 100644 --- a/collector/probe.go +++ b/collector/probe.go @@ -15,10 +15,9 @@ package collector import ( "context" + "log/slog" "sync" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/prometheus-community/postgres_exporter/config" "github.com/prometheus/client_golang/prometheus" ) @@ -26,11 +25,11 @@ import ( type ProbeCollector struct { registry *prometheus.Registry collectors map[string]Collector - logger log.Logger + logger *slog.Logger instance *instance } -func NewProbeCollector(logger log.Logger, excludeDatabases []string, registry *prometheus.Registry, dsn config.DSN) (*ProbeCollector, error) { +func NewProbeCollector(logger *slog.Logger, excludeDatabases []string, registry *prometheus.Registry, dsn config.DSN) (*ProbeCollector, error) { collectors := make(map[string]Collector) initiatedCollectorsMtx.Lock() defer initiatedCollectorsMtx.Unlock() @@ -47,7 +46,7 @@ func NewProbeCollector(logger log.Logger, excludeDatabases []string, registry *p } else { collector, err := factories[key]( collectorConfig{ - logger: log.With(logger, "collector", key), + logger: logger.With("collector", key), excludeDatabases: excludeDatabases, }) if err != nil { @@ -77,11 +76,11 @@ func (pc *ProbeCollector) Describe(ch chan<- *prometheus.Desc) { func (pc *ProbeCollector) Collect(ch chan<- prometheus.Metric) { // Set up the database connection for the collector. err := pc.instance.setup() + defer pc.instance.Close() if err != nil { - level.Error(pc.logger).Log("msg", "Error opening connection to database", "err", err) + pc.logger.Error("Error opening connection to database", "err", err) return } - defer pc.instance.Close() wg := sync.WaitGroup{} wg.Add(len(pc.collectors)) diff --git a/config/config.go b/config/config.go index f67969725..52c66513a 100644 --- a/config/config.go +++ b/config/config.go @@ -15,10 +15,10 @@ package config import ( "fmt" + "log/slog" "os" "sync" - "github.com/go-kit/log" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promauto" "gopkg.in/yaml.v3" @@ -65,7 +65,7 @@ func (ch *Handler) GetConfig() *Config { return ch.Config } -func (ch *Handler) ReloadConfig(f string, logger log.Logger) error { +func (ch *Handler) ReloadConfig(f string, logger *slog.Logger) error { config := &Config{} var err error defer func() { @@ -79,14 +79,14 @@ func (ch *Handler) ReloadConfig(f string, logger log.Logger) error { yamlReader, err := os.Open(f) if err != nil { - return fmt.Errorf("Error opening config file %q: %s", f, err) + return fmt.Errorf("error opening config file %q: %s", f, err) } defer yamlReader.Close() decoder := yaml.NewDecoder(yamlReader) decoder.KnownFields(true) if err = decoder.Decode(config); err != nil { - return fmt.Errorf("Error parsing config file %q: %s", f, err) + return fmt.Errorf("error parsing config file %q: %s", f, err) } ch.Lock() diff --git a/config/config_test.go b/config/config_test.go index d5d23d3ba..fa59c9b40 100644 --- a/config/config_test.go +++ b/config/config_test.go @@ -24,7 +24,7 @@ func TestLoadConfig(t *testing.T) { err := ch.ReloadConfig("testdata/config-good.yaml", nil) if err != nil { - t.Errorf("Error loading config: %s", err) + t.Errorf("error loading config: %s", err) } } @@ -39,11 +39,11 @@ func TestLoadBadConfigs(t *testing.T) { }{ { input: "testdata/config-bad-auth-module.yaml", - want: "Error parsing config file \"testdata/config-bad-auth-module.yaml\": yaml: unmarshal errors:\n line 3: field pretendauth not found in type config.AuthModule", + want: "error parsing config file \"testdata/config-bad-auth-module.yaml\": yaml: unmarshal errors:\n line 3: field pretendauth not found in type config.AuthModule", }, { input: "testdata/config-bad-extra-field.yaml", - want: "Error parsing config file \"testdata/config-bad-extra-field.yaml\": yaml: unmarshal errors:\n line 8: field doesNotExist not found in type config.AuthModule", + want: "error parsing config file \"testdata/config-bad-extra-field.yaml\": yaml: unmarshal errors:\n line 8: field doesNotExist not found in type config.AuthModule", }, } diff --git a/cmd/postgres_exporter/datasource.go b/exporter/datasource.go similarity index 85% rename from cmd/postgres_exporter/datasource.go rename to exporter/datasource.go index 0b8cef04a..9ae3410fe 100644 --- a/cmd/postgres_exporter/datasource.go +++ b/exporter/datasource.go @@ -11,16 +11,16 @@ // See the License for the specific language governing permissions and // limitations under the License. -package main +package exporter import ( "fmt" "net/url" "os" "regexp" + "slices" "strings" - "github.com/go-kit/log/level" "github.com/prometheus/client_golang/prometheus" ) @@ -39,19 +39,19 @@ func (e *Exporter) discoverDatabaseDSNs() []string { var err error dsnURI, err = url.Parse(dsn) if err != nil { - level.Error(logger).Log("msg", "Unable to parse DSN as URI", "dsn", loggableDSN(dsn), "err", err) + e.logger.Error("Unable to parse DSN as URI", "dsn", loggableDSN(dsn), "err", err) continue } } else if connstringRe.MatchString(dsn) { dsnConnstring = dsn } else { - level.Error(logger).Log("msg", "Unable to parse DSN as either URI or connstring", "dsn", loggableDSN(dsn)) + e.logger.Error("Unable to parse DSN as either URI or connstring", "dsn", loggableDSN(dsn)) continue } server, err := e.servers.GetServer(dsn) if err != nil { - level.Error(logger).Log("msg", "Error opening connection to database", "dsn", loggableDSN(dsn), "err", err) + e.logger.Error("Error opening connection to database", "dsn", loggableDSN(dsn), "err", err) continue } dsns[dsn] = struct{}{} @@ -61,15 +61,15 @@ func (e *Exporter) discoverDatabaseDSNs() []string { databaseNames, err := queryDatabases(server) if err != nil { - level.Error(logger).Log("msg", "Error querying databases", "dsn", loggableDSN(dsn), "err", err) + e.logger.Error("Error querying databases", "dsn", loggableDSN(dsn), "err", err) continue } for _, databaseName := range databaseNames { - if contains(e.excludeDatabases, databaseName) { + if slices.Contains(e.excludeDatabases, databaseName) { continue } - if len(e.includeDatabases) != 0 && !contains(e.includeDatabases, databaseName) { + if len(e.includeDatabases) != 0 && !slices.Contains(e.includeDatabases, databaseName) { continue } @@ -109,7 +109,7 @@ func (e *Exporter) scrapeDSN(ch chan<- prometheus.Metric, dsn string) error { // Check if map versions need to be updated if err := e.checkMapVersions(ch, server); err != nil { - level.Warn(logger).Log("msg", "Proceeding with outdated query maps, as the Postgres version could not be determined", "err", err) + e.logger.Warn("Proceeding with outdated query maps, as the Postgres version could not be determined", "err", err) } return server.Scrape(ch, e.disableSettingsMetrics) @@ -119,7 +119,7 @@ func (e *Exporter) scrapeDSN(ch chan<- prometheus.Metric, dsn string) error { // DATA_SOURCE_NAME always wins so we do not break older versions // reading secrets from files wins over secrets in environment variables // DATA_SOURCE_NAME > DATA_SOURCE_{USER|PASS}_FILE > DATA_SOURCE_{USER|PASS} -func getDataSources() ([]string, error) { +func GetDataSources() ([]string, error) { var dsn = os.Getenv("DATA_SOURCE_NAME") if len(dsn) != 0 { return strings.Split(dsn, ","), nil diff --git a/cmd/postgres_exporter/namespace.go b/exporter/namespace.go similarity index 92% rename from cmd/postgres_exporter/namespace.go rename to exporter/namespace.go index 41674007d..5f6bd8edc 100644 --- a/cmd/postgres_exporter/namespace.go +++ b/exporter/namespace.go @@ -11,7 +11,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package main +package exporter import ( "database/sql" @@ -20,7 +20,6 @@ import ( "time" "github.com/blang/semver/v4" - "github.com/go-kit/log/level" "github.com/lib/pq" "github.com/prometheus/client_golang/prometheus" ) @@ -130,7 +129,7 @@ func queryNamespaceMapping(server *Server, namespace string, mapping MetricMapNa nonfatalErrors = append(nonfatalErrors, errors.New(fmt.Sprintln("Missing column: ", namespace, columnName+"_sum"))) continue } - sum, ok := dbToFloat64(columnData[idx]) + sum, ok := dbToFloat64(columnData[idx], server.logger) if !ok { nonfatalErrors = append(nonfatalErrors, errors.New(fmt.Sprintln("Unexpected error parsing column: ", namespace, columnName+"_sum", columnData[idx]))) continue @@ -141,7 +140,7 @@ func queryNamespaceMapping(server *Server, namespace string, mapping MetricMapNa nonfatalErrors = append(nonfatalErrors, errors.New(fmt.Sprintln("Missing column: ", namespace, columnName+"_count"))) continue } - count, ok := dbToUint64(columnData[idx]) + count, ok := dbToUint64(columnData[idx], server.logger) if !ok { nonfatalErrors = append(nonfatalErrors, errors.New(fmt.Sprintln("Unexpected error parsing column: ", namespace, columnName+"_count", columnData[idx]))) continue @@ -153,7 +152,7 @@ func queryNamespaceMapping(server *Server, namespace string, mapping MetricMapNa labels..., ) } else { - value, ok := dbToFloat64(columnData[idx]) + value, ok := dbToFloat64(columnData[idx], server.logger) if !ok { nonfatalErrors = append(nonfatalErrors, errors.New(fmt.Sprintln("Unexpected error parsing column: ", namespace, columnName, columnData[idx]))) continue @@ -168,7 +167,7 @@ func queryNamespaceMapping(server *Server, namespace string, mapping MetricMapNa // Its not an error to fail here, since the values are // unexpected anyway. - value, ok := dbToFloat64(columnData[idx]) + value, ok := dbToFloat64(columnData[idx], server.logger) if !ok { nonfatalErrors = append(nonfatalErrors, errors.New(fmt.Sprintln("Unparseable column type - discarding: ", namespace, columnName, err))) continue @@ -190,10 +189,10 @@ func queryNamespaceMappings(ch chan<- prometheus.Metric, server *Server) map[str scrapeStart := time.Now() for namespace, mapping := range server.metricMap { - level.Debug(logger).Log("msg", "Querying namespace", "namespace", namespace) + server.logger.Debug("Querying namespace", "namespace", namespace) if mapping.master && !server.master { - level.Debug(logger).Log("msg", "Query skipped...") + server.logger.Debug("Query skipped...") continue } @@ -202,7 +201,7 @@ func queryNamespaceMappings(ch chan<- prometheus.Metric, server *Server) map[str serVersion, _ := semver.Parse(server.lastMapVersion.String()) runServerRange, _ := semver.ParseRange(server.runonserver) if !runServerRange(serVersion) { - level.Debug(logger).Log("msg", "Query skipped for this database version", "version", server.lastMapVersion.String(), "target_version", server.runonserver) + server.logger.Debug("Query skipped for this database version", "version", server.lastMapVersion.String(), "target_version", server.runonserver) continue } } @@ -233,12 +232,12 @@ func queryNamespaceMappings(ch chan<- prometheus.Metric, server *Server) map[str // Serious error - a namespace disappeared if err != nil { namespaceErrors[namespace] = err - level.Info(logger).Log("err", err) + server.logger.Info("error finding namespace", "err", err) } // Non-serious errors - likely version or parsing problems. if len(nonFatalErrors) > 0 { for _, err := range nonFatalErrors { - level.Info(logger).Log("err", err) + server.logger.Info("error querying namespace", "err", err) } } diff --git a/cmd/postgres_exporter/pg_setting.go b/exporter/pg_setting.go similarity index 74% rename from cmd/postgres_exporter/pg_setting.go rename to exporter/pg_setting.go index 30b60ca1c..770536c4b 100644 --- a/cmd/postgres_exporter/pg_setting.go +++ b/exporter/pg_setting.go @@ -11,7 +11,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package main +package exporter import ( "fmt" @@ -19,7 +19,6 @@ import ( "strconv" "strings" - "github.com/go-kit/log/level" "github.com/prometheus/client_golang/prometheus" ) @@ -32,13 +31,17 @@ var ( // Query the pg_settings view containing runtime variables func querySettings(ch chan<- prometheus.Metric, server *Server) error { - level.Debug(logger).Log("msg", "Querying pg_setting view", "server", server) + server.logger.Debug("Querying pg_setting view", "server", server) // pg_settings docs: https://www.postgresql.org/docs/current/static/view-pg-settings.html // // NOTE: If you add more vartypes here, you must update the supported // types in normaliseUnit() below - query := "SELECT name, setting, COALESCE(unit, ''), short_desc, vartype FROM pg_settings WHERE vartype IN ('bool', 'integer', 'real') AND name != 'sync_commit_cancel_wait';" + // + // Settings intentionally ignored due to invalid format: + // - `sync_commit_cancel_wait`, specific to Azure Postgres, see https://github.com/prometheus-community/postgres_exporter/issues/523 + // - `google_dataplex.max_messages`, specific to Google Cloud SQL, see https://github.com/prometheus-community/postgres_exporter/issues/1240 + query := "SELECT name, setting, COALESCE(unit, ''), short_desc, vartype FROM pg_settings WHERE vartype IN ('bool', 'integer', 'real') AND name NOT IN ('sync_commit_cancel_wait', 'google_dataplex.max_messages');" rows, err := server.db.Query(query) if err != nil { @@ -52,8 +55,15 @@ func querySettings(ch chan<- prometheus.Metric, server *Server) error { if err != nil { return fmt.Errorf("Error retrieving rows on %q: %s %v", server, namespace, err) } - - ch <- s.metric(server.labels) + metric, err := s.metric(server.labels) + if err != nil { + // Log the error and continue + // This could be due to a bad value in the setting + // but we should not fail the entire scrape or panic + server.logger.Warn("Error normalising unit for setting", "setting", s.name, "value", s.setting, "unit", s.unit, "error", err) + continue + } + ch <- metric } return nil @@ -65,10 +75,10 @@ type pgSetting struct { name, setting, unit, shortDesc, vartype string } -func (s *pgSetting) metric(labels prometheus.Labels) prometheus.Metric { +func (s *pgSetting) metric(labels prometheus.Labels) (prometheus.Metric, error) { var ( err error - name = strings.Replace(s.name, ".", "_", -1) + name = strings.ReplaceAll(strings.ReplaceAll(s.name, ".", "_"), "-", "_") unit = s.unit // nolint: ineffassign shortDesc = fmt.Sprintf("Server Parameter: %s", s.name) subsystem = "settings" @@ -82,9 +92,7 @@ func (s *pgSetting) metric(labels prometheus.Labels) prometheus.Metric { } case "integer", "real": if val, unit, err = s.normaliseUnit(); err != nil { - // Panic, since we should recognise all units - // and don't want to silently exlude metrics - panic(err) + return nil, err } if len(unit) > 0 { @@ -92,12 +100,11 @@ func (s *pgSetting) metric(labels prometheus.Labels) prometheus.Metric { shortDesc = fmt.Sprintf("%s [Units converted to %s.]", shortDesc, unit) } default: - // Panic because we got a type we didn't ask for - panic(fmt.Sprintf("Unsupported vartype %q", s.vartype)) + return nil, fmt.Errorf("pgsetting: unsupported vartype %q", s.vartype) } desc := newDesc(subsystem, name, shortDesc, labels) - return prometheus.MustNewConstMetric(desc, prometheus.GaugeValue, val) + return prometheus.MustNewConstMetric(desc, prometheus.GaugeValue, val), nil } // Removes units from any of the setting values. @@ -129,10 +136,10 @@ func (s *pgSetting) normaliseUnit() (val float64, unit string, err error) { return case "ms", "s", "min", "h", "d": unit = "seconds" - case "B", "kB", "MB", "GB", "TB", "4kB", "8kB", "16kB", "32kB", "64kB", "16MB", "32MB", "64MB": + case "B", "kB", "MB", "GB", "TB", "1kB", "2kB", "4kB", "8kB", "16kB", "32kB", "64kB", "16MB", "32MB", "64MB": unit = "bytes" default: - err = fmt.Errorf("Unknown unit for runtime variable: %q", s.unit) + err = fmt.Errorf("unknown unit for runtime variable: %q", s.unit) return } @@ -158,6 +165,10 @@ func (s *pgSetting) normaliseUnit() (val float64, unit string, err error) { val *= math.Pow(2, 30) case "TB": val *= math.Pow(2, 40) + case "1kB": + val *= math.Pow(2, 10) + case "2kB": + val *= math.Pow(2, 11) case "4kB": val *= math.Pow(2, 12) case "8kB": diff --git a/cmd/postgres_exporter/pg_setting_test.go b/exporter/pg_setting_test.go similarity index 87% rename from cmd/postgres_exporter/pg_setting_test.go rename to exporter/pg_setting_test.go index 1b1168c7e..20fa65c07 100644 --- a/cmd/postgres_exporter/pg_setting_test.go +++ b/exporter/pg_setting_test.go @@ -12,9 +12,8 @@ // limitations under the License. //go:build !integration -// +build !integration -package main +package exporter import ( "github.com/prometheus/client_golang/prometheus" @@ -40,7 +39,7 @@ var fixtures = []fixture{ unit: "seconds", err: "", }, - d: `Desc{fqName: "pg_settings_seconds_fixture_metric_seconds", help: "Server Parameter: seconds_fixture_metric [Units converted to seconds.]", constLabels: {}, variableLabels: []}`, + d: `Desc{fqName: "pg_settings_seconds_fixture_metric_seconds", help: "Server Parameter: seconds_fixture_metric [Units converted to seconds.]", constLabels: {}, variableLabels: {}}`, v: 5, }, { @@ -56,7 +55,7 @@ var fixtures = []fixture{ unit: "seconds", err: "", }, - d: `Desc{fqName: "pg_settings_milliseconds_fixture_metric_seconds", help: "Server Parameter: milliseconds_fixture_metric [Units converted to seconds.]", constLabels: {}, variableLabels: []}`, + d: `Desc{fqName: "pg_settings_milliseconds_fixture_metric_seconds", help: "Server Parameter: milliseconds_fixture_metric [Units converted to seconds.]", constLabels: {}, variableLabels: {}}`, v: 5, }, { @@ -72,7 +71,7 @@ var fixtures = []fixture{ unit: "bytes", err: "", }, - d: `Desc{fqName: "pg_settings_eight_kb_fixture_metric_bytes", help: "Server Parameter: eight_kb_fixture_metric [Units converted to bytes.]", constLabels: {}, variableLabels: []}`, + d: `Desc{fqName: "pg_settings_eight_kb_fixture_metric_bytes", help: "Server Parameter: eight_kb_fixture_metric [Units converted to bytes.]", constLabels: {}, variableLabels: {}}`, v: 139264, }, { @@ -88,7 +87,7 @@ var fixtures = []fixture{ unit: "bytes", err: "", }, - d: `Desc{fqName: "pg_settings_16_kb_real_fixture_metric_bytes", help: "Server Parameter: 16_kb_real_fixture_metric [Units converted to bytes.]", constLabels: {}, variableLabels: []}`, + d: `Desc{fqName: "pg_settings_16_kb_real_fixture_metric_bytes", help: "Server Parameter: 16_kb_real_fixture_metric [Units converted to bytes.]", constLabels: {}, variableLabels: {}}`, v: 49152, }, { @@ -104,7 +103,7 @@ var fixtures = []fixture{ unit: "bytes", err: "", }, - d: `Desc{fqName: "pg_settings_16_mb_real_fixture_metric_bytes", help: "Server Parameter: 16_mb_real_fixture_metric [Units converted to bytes.]", constLabels: {}, variableLabels: []}`, + d: `Desc{fqName: "pg_settings_16_mb_real_fixture_metric_bytes", help: "Server Parameter: 16_mb_real_fixture_metric [Units converted to bytes.]", constLabels: {}, variableLabels: {}}`, v: 5.0331648e+07, }, { @@ -120,7 +119,7 @@ var fixtures = []fixture{ unit: "bytes", err: "", }, - d: `Desc{fqName: "pg_settings_32_mb_real_fixture_metric_bytes", help: "Server Parameter: 32_mb_real_fixture_metric [Units converted to bytes.]", constLabels: {}, variableLabels: []}`, + d: `Desc{fqName: "pg_settings_32_mb_real_fixture_metric_bytes", help: "Server Parameter: 32_mb_real_fixture_metric [Units converted to bytes.]", constLabels: {}, variableLabels: {}}`, v: 1.00663296e+08, }, { @@ -136,7 +135,7 @@ var fixtures = []fixture{ unit: "bytes", err: "", }, - d: `Desc{fqName: "pg_settings_64_mb_real_fixture_metric_bytes", help: "Server Parameter: 64_mb_real_fixture_metric [Units converted to bytes.]", constLabels: {}, variableLabels: []}`, + d: `Desc{fqName: "pg_settings_64_mb_real_fixture_metric_bytes", help: "Server Parameter: 64_mb_real_fixture_metric [Units converted to bytes.]", constLabels: {}, variableLabels: {}}`, v: 2.01326592e+08, }, { @@ -152,7 +151,7 @@ var fixtures = []fixture{ unit: "", err: "", }, - d: `Desc{fqName: "pg_settings_bool_on_fixture_metric", help: "Server Parameter: bool_on_fixture_metric", constLabels: {}, variableLabels: []}`, + d: `Desc{fqName: "pg_settings_bool_on_fixture_metric", help: "Server Parameter: bool_on_fixture_metric", constLabels: {}, variableLabels: {}}`, v: 1, }, { @@ -168,7 +167,7 @@ var fixtures = []fixture{ unit: "", err: "", }, - d: `Desc{fqName: "pg_settings_bool_off_fixture_metric", help: "Server Parameter: bool_off_fixture_metric", constLabels: {}, variableLabels: []}`, + d: `Desc{fqName: "pg_settings_bool_off_fixture_metric", help: "Server Parameter: bool_off_fixture_metric", constLabels: {}, variableLabels: {}}`, v: 0, }, { @@ -184,7 +183,7 @@ var fixtures = []fixture{ unit: "seconds", err: "", }, - d: `Desc{fqName: "pg_settings_special_minus_one_value_seconds", help: "Server Parameter: special_minus_one_value [Units converted to seconds.]", constLabels: {}, variableLabels: []}`, + d: `Desc{fqName: "pg_settings_special_minus_one_value_seconds", help: "Server Parameter: special_minus_one_value [Units converted to seconds.]", constLabels: {}, variableLabels: {}}`, v: -1, }, { @@ -200,7 +199,7 @@ var fixtures = []fixture{ unit: "", err: "", }, - d: `Desc{fqName: "pg_settings_rds_rds_superuser_reserved_connections", help: "Server Parameter: rds.rds_superuser_reserved_connections", constLabels: {}, variableLabels: []}`, + d: `Desc{fqName: "pg_settings_rds_rds_superuser_reserved_connections", help: "Server Parameter: rds.rds_superuser_reserved_connections", constLabels: {}, variableLabels: {}}`, v: 2, }, { @@ -214,7 +213,7 @@ var fixtures = []fixture{ n: normalised{ val: 10, unit: "", - err: `Unknown unit for runtime variable: "nonexistent"`, + err: `unknown unit for runtime variable: "nonexistent"`, }, }, } @@ -238,17 +237,15 @@ func (s *PgSettingSuite) TestNormaliseUnit(c *C) { } func (s *PgSettingSuite) TestMetric(c *C) { - defer func() { - if r := recover(); r != nil { - if r.(error).Error() != `Unknown unit for runtime variable: "nonexistent"` { - panic(r) - } - } - }() - for _, f := range fixtures { + if f.n.err != "" { + continue + } d := &dto.Metric{} - m := f.p.metric(prometheus.Labels{}) + m, err := f.p.metric(prometheus.Labels{}) + if err != nil { + c.Fatalf("Error creating metric: %v", err) + } m.Write(d) // nolint: errcheck c.Check(m.Desc().String(), Equals, f.d) diff --git a/cmd/postgres_exporter/postgres_exporter.go b/exporter/postgres_exporter.go similarity index 90% rename from cmd/postgres_exporter/postgres_exporter.go rename to exporter/postgres_exporter.go index fa34eecc5..720a4667f 100644 --- a/cmd/postgres_exporter/postgres_exporter.go +++ b/exporter/postgres_exporter.go @@ -11,13 +11,14 @@ // See the License for the specific language governing permissions and // limitations under the License. -package main +package exporter import ( "crypto/sha256" "database/sql" "errors" "fmt" + "log/slog" "math" "os" "regexp" @@ -25,10 +26,22 @@ import ( "time" "github.com/blang/semver/v4" - "github.com/go-kit/log/level" "github.com/prometheus/client_golang/prometheus" ) +// Metric name parts. +const ( + // Namespace for all metrics. + namespace = "pg" + // Subsystems. + exporter = "exporter" + // Metric label used for static string data thats handy to send to Prometheus + // e.g. version + staticLabelName = "static" + // Metric label used for server identification. + serverLabelName = "server" +) + // ColumnUsage should be one of several enum values which describe how a // queried row is to be converted to a Prometheus metric. type ColumnUsage int @@ -143,7 +156,7 @@ func (e *ErrorConnectToServer) Error() string { } // TODO: revisit this with the semver system -func dumpMaps() { +func DumpMaps() { // TODO: make this function part of the exporter for name, cmap := range builtinMetricMaps { query, ok := queryOverrides[name] @@ -252,6 +265,9 @@ var builtinMetricMaps = map[string]intermediateMetricMap{ "state": {LABEL, "connection state", nil, semver.MustParseRange(">=9.2.0")}, "usename": {LABEL, "connection usename", nil, nil}, "application_name": {LABEL, "connection application_name", nil, nil}, + "backend_type": {LABEL, "connection backend_type", nil, nil}, + "wait_event_type": {LABEL, "connection wait_event_type", nil, nil}, + "wait_event": {LABEL, "connection wait_event", nil, nil}, "count": {GAUGE, "number of connections in this state", nil, nil}, "max_tx_duration": {GAUGE, "max duration in seconds any active transaction has been running", nil, nil}, }, @@ -261,13 +277,13 @@ var builtinMetricMaps = map[string]intermediateMetricMap{ } // Turn the MetricMap column mapping into a prometheus descriptor mapping. -func makeDescMap(pgVersion semver.Version, serverLabels prometheus.Labels, metricMaps map[string]intermediateMetricMap) map[string]MetricMapNamespace { +func makeDescMap(pgVersion semver.Version, serverLabels prometheus.Labels, metricMaps map[string]intermediateMetricMap, logger *slog.Logger, metricPrefix string) map[string]MetricMapNamespace { var metricMap = make(map[string]MetricMapNamespace) for namespace, intermediateMappings := range metricMaps { thisMap := make(map[string]MetricMap) - namespace = strings.Replace(namespace, "pg", *metricPrefix, 1) + namespace = strings.Replace(namespace, "pg", metricPrefix, 1) // Get the constant labels var variableLabels []string @@ -284,7 +300,7 @@ func makeDescMap(pgVersion semver.Version, serverLabels prometheus.Labels, metri if !columnMapping.supportedVersions(pgVersion) { // It's very useful to be able to see what columns are being // rejected. - level.Debug(logger).Log("msg", "Column is being forced to discard due to version incompatibility", "column", columnName) + logger.Debug("Column is being forced to discard due to version incompatibility", "column", columnName) thisMap[columnName] = MetricMap{ discard: true, conversion: func(_ interface{}) (float64, bool) { @@ -310,7 +326,7 @@ func makeDescMap(pgVersion semver.Version, serverLabels prometheus.Labels, metri vtype: prometheus.CounterValue, desc: prometheus.NewDesc(fmt.Sprintf("%s_%s", namespace, columnName), columnMapping.description, variableLabels, serverLabels), conversion: func(in interface{}) (float64, bool) { - return dbToFloat64(in) + return dbToFloat64(in, logger) }, } case GAUGE: @@ -318,7 +334,7 @@ func makeDescMap(pgVersion semver.Version, serverLabels prometheus.Labels, metri vtype: prometheus.GaugeValue, desc: prometheus.NewDesc(fmt.Sprintf("%s_%s", namespace, columnName), columnMapping.description, variableLabels, serverLabels), conversion: func(in interface{}) (float64, bool) { - return dbToFloat64(in) + return dbToFloat64(in, logger) }, } case HISTOGRAM: @@ -327,7 +343,7 @@ func makeDescMap(pgVersion semver.Version, serverLabels prometheus.Labels, metri vtype: prometheus.UntypedValue, desc: prometheus.NewDesc(fmt.Sprintf("%s_%s", namespace, columnName), columnMapping.description, variableLabels, serverLabels), conversion: func(in interface{}) (float64, bool) { - return dbToFloat64(in) + return dbToFloat64(in, logger) }, } thisMap[columnName+"_bucket"] = MetricMap{ @@ -371,7 +387,7 @@ func makeDescMap(pgVersion semver.Version, serverLabels prometheus.Labels, metri case string: durationString = t default: - level.Error(logger).Log("msg", "Duration conversion metric was not a string") + logger.Error("Duration conversion metric was not a string") return math.NaN(), false } @@ -381,7 +397,7 @@ func makeDescMap(pgVersion semver.Version, serverLabels prometheus.Labels, metri d, err := time.ParseDuration(durationString) if err != nil { - level.Error(logger).Log("msg", "Failed converting result to metric", "column", columnName, "in", in, "err", err) + logger.Error("Failed converting result to metric", "column", columnName, "in", in, "err", err) return math.NaN(), false } return float64(d / time.Millisecond), true @@ -423,6 +439,9 @@ type Exporter struct { // servers are used to allow re-using the DB connection between scrapes. // servers contains metrics map and query overrides. servers *Servers + + logger *slog.Logger + metricPrefix string } // ExporterOpt configures Exporter. @@ -475,11 +494,18 @@ func WithUserQueriesPath(p string) ExporterOpt { // WithConstantLabels configures constant labels. func WithConstantLabels(s string) ExporterOpt { return func(e *Exporter) { - e.constantLabels = parseConstLabels(s) + e.constantLabels = parseConstLabels(s, e.logger) + } +} + +// WithMetricPrefix configures metric prefix. +func WithMetricPrefix(prefix string) ExporterOpt { + return func(e *Exporter) { + e.metricPrefix = prefix } } -func parseConstLabels(s string) prometheus.Labels { +func parseConstLabels(s string, logger *slog.Logger) prometheus.Labels { labels := make(prometheus.Labels) s = strings.TrimSpace(s) @@ -491,7 +517,7 @@ func parseConstLabels(s string) prometheus.Labels { for _, p := range parts { keyValue := strings.Split(strings.TrimSpace(p), "=") if len(keyValue) != 2 { - level.Error(logger).Log(`Wrong constant labels format, should be "key=value"`, "input", p) + logger.Error(`Wrong constant labels format, should be "key=value"`, "input", p) continue } key := strings.TrimSpace(keyValue[0]) @@ -506,10 +532,11 @@ func parseConstLabels(s string) prometheus.Labels { } // NewExporter returns a new PostgreSQL exporter for the provided DSN. -func NewExporter(dsn []string, opts ...ExporterOpt) *Exporter { +func NewExporter(dsn []string, logger *slog.Logger, opts ...ExporterOpt) *Exporter { e := &Exporter{ dsn: dsn, builtinMetricMaps: builtinMetricMaps, + logger: logger, } for _, opt := range opts { @@ -517,7 +544,7 @@ func NewExporter(dsn []string, opts ...ExporterOpt) *Exporter { } e.setupInternalMetrics() - e.servers = NewServers(ServerWithLabels(e.constantLabels)) + e.servers = NewServers(ServerWithLabels(e.constantLabels), ServerWithLogger(e.logger)) return e } @@ -574,6 +601,10 @@ func (e *Exporter) Collect(ch chan<- prometheus.Metric) { e.userQueriesError.Collect(ch) } +func (e *Exporter) CloseServers() { + e.servers.Close() +} + func newDesc(subsystem, name, help string, labels prometheus.Labels) *prometheus.Desc { return prometheus.NewDesc( prometheus.BuildFQName(namespace, subsystem, name), @@ -581,8 +612,8 @@ func newDesc(subsystem, name, help string, labels prometheus.Labels) *prometheus ) } -func checkPostgresVersion(db *sql.DB, server string) (semver.Version, string, error) { - level.Debug(logger).Log("msg", "Querying PostgreSQL version", "server", server) +func checkPostgresVersion(db *sql.DB, server string, logger *slog.Logger) (semver.Version, string, error) { + logger.Debug("Querying PostgreSQL version", "server", server) versionRow := db.QueryRow("SELECT version();") var versionString string err := versionRow.Scan(&versionString) @@ -599,24 +630,24 @@ func checkPostgresVersion(db *sql.DB, server string) (semver.Version, string, er // Check and update the exporters query maps if the version has changed. func (e *Exporter) checkMapVersions(ch chan<- prometheus.Metric, server *Server) error { - semanticVersion, versionString, err := checkPostgresVersion(server.db, server.String()) + semanticVersion, versionString, err := checkPostgresVersion(server.db, server.String(), e.logger) if err != nil { return fmt.Errorf("Error fetching version string on %q: %v", server, err) } if !e.disableDefaultMetrics && semanticVersion.LT(lowestSupportedVersion) { - level.Warn(logger).Log("msg", "PostgreSQL version is lower than our lowest supported version", "server", server, "version", semanticVersion, "lowest_supported_version", lowestSupportedVersion) + e.logger.Warn("PostgreSQL version is lower than our lowest supported version", "server", server, "version", semanticVersion, "lowest_supported_version", lowestSupportedVersion) } // Check if semantic version changed and recalculate maps if needed. if semanticVersion.NE(server.lastMapVersion) || server.metricMap == nil { - level.Info(logger).Log("msg", "Semantic version changed", "server", server, "from", server.lastMapVersion, "to", semanticVersion) + e.logger.Info("Semantic version changed", "server", server, "from", server.lastMapVersion, "to", semanticVersion) server.mappingMtx.Lock() // Get Default Metrics only for master database if !e.disableDefaultMetrics && server.master { - server.metricMap = makeDescMap(semanticVersion, server.labels, e.builtinMetricMaps) - server.queryOverrides = makeQueryOverrideMap(semanticVersion, queryOverrides) + server.metricMap = makeDescMap(semanticVersion, server.labels, e.builtinMetricMaps, server.logger, e.metricPrefix) + server.queryOverrides = makeQueryOverrideMap(semanticVersion, queryOverrides, server.logger) } else { server.metricMap = make(map[string]MetricMapNamespace) server.queryOverrides = make(map[string]string) @@ -631,13 +662,13 @@ func (e *Exporter) checkMapVersions(ch chan<- prometheus.Metric, server *Server) // Calculate the hashsum of the useQueries userQueriesData, err := os.ReadFile(e.userQueriesPath) if err != nil { - level.Error(logger).Log("msg", "Failed to reload user queries", "path", e.userQueriesPath, "err", err) + e.logger.Error("Failed to reload user queries", "path", e.userQueriesPath, "err", err) e.userQueriesError.WithLabelValues(e.userQueriesPath, "").Set(1) } else { hashsumStr := fmt.Sprintf("%x", sha256.Sum256(userQueriesData)) - if err := addQueries(userQueriesData, semanticVersion, server); err != nil { - level.Error(logger).Log("msg", "Failed to reload user queries", "path", e.userQueriesPath, "err", err) + if err := addQueries(userQueriesData, semanticVersion, server, e.metricPrefix); err != nil { + e.logger.Error("Failed to reload user queries", "path", e.userQueriesPath, "err", err) e.userQueriesError.WithLabelValues(e.userQueriesPath, hashsumStr).Set(1) } else { // Mark user queries as successfully loaded @@ -679,7 +710,7 @@ func (e *Exporter) scrape(ch chan<- prometheus.Metric) { if err := e.scrapeDSN(ch, dsn); err != nil { errorsCount++ - level.Error(logger).Log("err", err) + e.logger.Error("error scraping dsn", "err", err, "dsn", loggableDSN(dsn)) if _, ok := err.(*ErrorConnectToServer); ok { connectionErrorsCount++ diff --git a/cmd/postgres_exporter/postgres_exporter_integration_test.go b/exporter/postgres_exporter_integration_test.go similarity index 92% rename from cmd/postgres_exporter/postgres_exporter_integration_test.go rename to exporter/postgres_exporter_integration_test.go index 7043c1e88..a65dbdaab 100644 --- a/cmd/postgres_exporter/postgres_exporter_integration_test.go +++ b/exporter/postgres_exporter_integration_test.go @@ -15,9 +15,8 @@ // a lot of additional work to keep the external docker environment they require // working. //go:build integration -// +build integration -package main +package exporter import ( "fmt" @@ -27,6 +26,7 @@ import ( _ "github.com/lib/pq" "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/common/promslog" . "gopkg.in/check.v1" ) @@ -43,7 +43,7 @@ func (s *IntegrationSuite) SetUpSuite(c *C) { dsn := os.Getenv("DATA_SOURCE_NAME") c.Assert(dsn, Not(Equals), "") - exporter := NewExporter(strings.Split(dsn, ",")) + exporter := NewExporter(strings.Split(dsn, ","), promslog.NewNopLogger()) c.Assert(exporter, NotNil) // Assign the exporter to the suite s.e = exporter @@ -100,12 +100,12 @@ func (s *IntegrationSuite) TestInvalidDsnDoesntCrash(c *C) { }() // Send a bad DSN - exporter := NewExporter([]string{"invalid dsn"}) + exporter := NewExporter([]string{"invalid dsn"}, promslog.NewNopLogger()) c.Assert(exporter, NotNil) exporter.scrape(ch) // Send a DSN to a non-listening port. - exporter = NewExporter([]string{"postgresql://nothing:nothing@127.0.0.1:1/nothing"}) + exporter = NewExporter([]string{"postgresql://nothing:nothing@127.0.0.1:1/nothing"}, promslog.NewNopLogger()) c.Assert(exporter, NotNil) exporter.scrape(ch) } @@ -123,7 +123,7 @@ func (s *IntegrationSuite) TestUnknownMetricParsingDoesntCrash(c *C) { dsn := os.Getenv("DATA_SOURCE_NAME") c.Assert(dsn, Not(Equals), "") - exporter := NewExporter(strings.Split(dsn, ",")) + exporter := NewExporter(strings.Split(dsn, ","), promslog.NewNopLogger()) c.Assert(exporter, NotNil) // Convert the default maps into a list of empty maps. @@ -156,6 +156,7 @@ func (s *IntegrationSuite) TestExtendQueriesDoesntCrash(c *C) { exporter := NewExporter( strings.Split(dsn, ","), + promslog.NewNopLogger(), WithUserQueriesPath("../user_queries_test.yaml"), ) c.Assert(exporter, NotNil) @@ -169,6 +170,7 @@ func (s *IntegrationSuite) TestAutoDiscoverDatabases(c *C) { exporter := NewExporter( strings.Split(dsn, ","), + promslog.NewNopLogger(), ) c.Assert(exporter, NotNil) diff --git a/cmd/postgres_exporter/postgres_exporter_test.go b/exporter/postgres_exporter_test.go similarity index 95% rename from cmd/postgres_exporter/postgres_exporter_test.go rename to exporter/postgres_exporter_test.go index 0f36febf4..6364fc4f9 100644 --- a/cmd/postgres_exporter/postgres_exporter_test.go +++ b/exporter/postgres_exporter_test.go @@ -12,9 +12,8 @@ // limitations under the License. //go:build !integration -// +build !integration -package main +package exporter import ( "math" @@ -25,6 +24,7 @@ import ( "github.com/blang/semver/v4" "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/common/promslog" . "gopkg.in/check.v1" ) @@ -37,7 +37,6 @@ type FunctionalSuite struct { var _ = Suite(&FunctionalSuite{}) func (s *FunctionalSuite) SetUpSuite(c *C) { - } func (s *FunctionalSuite) TestSemanticVersionColumnDiscard(c *C) { @@ -54,7 +53,7 @@ func (s *FunctionalSuite) TestSemanticVersionColumnDiscard(c *C) { { // No metrics should be eliminated - resultMap := makeDescMap(semver.MustParse("0.0.1"), prometheus.Labels{}, testMetricMap) + resultMap := makeDescMap(semver.MustParse("0.0.1"), prometheus.Labels{}, testMetricMap, promslog.NewNopLogger(), "pg") c.Check( resultMap["test_namespace"].columnMappings["metric_which_stays"].discard, Equals, @@ -75,7 +74,7 @@ func (s *FunctionalSuite) TestSemanticVersionColumnDiscard(c *C) { testMetricMap["test_namespace"].columnMappings["metric_which_discards"] = discardableMetric // Discard metric should be discarded - resultMap := makeDescMap(semver.MustParse("0.0.1"), prometheus.Labels{}, testMetricMap) + resultMap := makeDescMap(semver.MustParse("0.0.1"), prometheus.Labels{}, testMetricMap, promslog.NewNopLogger(), "pg") c.Check( resultMap["test_namespace"].columnMappings["metric_which_stays"].discard, Equals, @@ -96,7 +95,7 @@ func (s *FunctionalSuite) TestSemanticVersionColumnDiscard(c *C) { testMetricMap["test_namespace"].columnMappings["metric_which_discards"] = discardableMetric // Discard metric should be discarded - resultMap := makeDescMap(semver.MustParse("0.0.2"), prometheus.Labels{}, testMetricMap) + resultMap := makeDescMap(semver.MustParse("0.0.2"), prometheus.Labels{}, testMetricMap, promslog.NewNopLogger(), "pg") c.Check( resultMap["test_namespace"].columnMappings["metric_which_stays"].discard, Equals, @@ -126,7 +125,7 @@ func (s *FunctionalSuite) TestEnvironmentSettingWithSecretsFiles(c *C) { var expected = "postgresql://custom_username$&+,%2F%3A;=%3F%40:custom_password$&+,%2F%3A;=%3F%40@localhost:5432/?sslmode=disable" - dsn, err := getDataSources() + dsn, err := GetDataSources() if err != nil { c.Errorf("Unexpected error reading datasources") } @@ -146,7 +145,7 @@ func (s *FunctionalSuite) TestEnvironmentSettingWithDns(c *C) { c.Assert(err, IsNil) defer UnsetEnvironment(c, "DATA_SOURCE_NAME") - dsn, err := getDataSources() + dsn, err := GetDataSources() if err != nil { c.Errorf("Unexpected error reading datasources") } @@ -174,7 +173,7 @@ func (s *FunctionalSuite) TestEnvironmentSettingWithDnsAndSecrets(c *C) { c.Assert(err, IsNil) defer UnsetEnvironment(c, "DATA_SOURCE_PASS") - dsn, err := getDataSources() + dsn, err := GetDataSources() if err != nil { c.Errorf("Unexpected error reading datasources") } @@ -290,7 +289,7 @@ func (s *FunctionalSuite) TestParseConstLabels(c *C) { } for _, cs := range cases { - labels := parseConstLabels(cs.s) + labels := parseConstLabels(cs.s, promslog.NewNopLogger()) if !reflect.DeepEqual(labels, cs.labels) { c.Fatalf("labels not equal (%v -> %v)", labels, cs.labels) } @@ -320,7 +319,6 @@ func (checker *isNaNChecker) Check(params []interface{}, names []string) (result // test boolean metric type gets converted to float func (s *FunctionalSuite) TestBooleanConversionToValueAndString(c *C) { - type TestCase struct { input interface{} expectedString string @@ -389,7 +387,7 @@ func (s *FunctionalSuite) TestBooleanConversionToValueAndString(c *C) { } for _, cs := range cases { - value, ok := dbToFloat64(cs.input) + value, ok := dbToFloat64(cs.input, promslog.NewNopLogger()) if math.IsNaN(cs.expectedValue) { c.Assert(value, IsNaN) } else { @@ -397,7 +395,7 @@ func (s *FunctionalSuite) TestBooleanConversionToValueAndString(c *C) { } c.Assert(ok, Equals, cs.expectedOK) - count, ok := dbToUint64(cs.input) + count, ok := dbToUint64(cs.input, promslog.NewNopLogger()) c.Assert(count, Equals, cs.expectedCount) c.Assert(ok, Equals, cs.expectedOK) @@ -410,7 +408,7 @@ func (s *FunctionalSuite) TestBooleanConversionToValueAndString(c *C) { func (s *FunctionalSuite) TestParseUserQueries(c *C) { userQueriesData, err := os.ReadFile("./tests/user_queries_ok.yaml") if err == nil { - metricMaps, newQueryOverrides, err := parseUserQueries(userQueriesData) + metricMaps, newQueryOverrides, err := parseUserQueries(userQueriesData, promslog.NewNopLogger()) c.Assert(err, Equals, nil) c.Assert(metricMaps, NotNil) c.Assert(newQueryOverrides, NotNil) diff --git a/cmd/postgres_exporter/queries.go b/exporter/queries.go similarity index 71% rename from cmd/postgres_exporter/queries.go rename to exporter/queries.go index fa0b5c272..b0f64f657 100644 --- a/cmd/postgres_exporter/queries.go +++ b/exporter/queries.go @@ -11,14 +11,14 @@ // See the License for the specific language governing permissions and // limitations under the License. -package main +package exporter import ( "errors" "fmt" + "log/slog" "github.com/blang/semver/v4" - "github.com/go-kit/log/level" "gopkg.in/yaml.v2" ) @@ -51,9 +51,9 @@ var queryOverrides = map[string][]OverrideQuery{ semver.MustParseRange(">=10.0.0"), ` SELECT *, - (case pg_is_in_recovery() when 't' then null else pg_current_wal_lsn() end) AS pg_current_wal_lsn, - (case pg_is_in_recovery() when 't' then null else pg_wal_lsn_diff(pg_current_wal_lsn(), pg_lsn('0/0'))::float end) AS pg_current_wal_lsn_bytes, - (case pg_is_in_recovery() when 't' then null else pg_wal_lsn_diff(pg_current_wal_lsn(), replay_lsn)::float end) AS pg_wal_lsn_diff + (case pg_is_in_recovery() when 't' then pg_last_wal_receive_lsn() else pg_current_wal_lsn() end) AS pg_current_wal_lsn, + (case pg_is_in_recovery() when 't' then pg_wal_lsn_diff(pg_last_wal_receive_lsn(), pg_lsn('0/0'))::float else pg_wal_lsn_diff(pg_current_wal_lsn(), pg_lsn('0/0'))::float end) AS pg_current_wal_lsn_bytes, + (case pg_is_in_recovery() when 't' then pg_wal_lsn_diff(pg_last_wal_receive_lsn(), replay_lsn)::float else pg_wal_lsn_diff(pg_current_wal_lsn(), replay_lsn)::float end) AS pg_wal_lsn_diff FROM pg_stat_replication `, }, @@ -61,8 +61,8 @@ var queryOverrides = map[string][]OverrideQuery{ semver.MustParseRange(">=9.2.0 <10.0.0"), ` SELECT *, - (case pg_is_in_recovery() when 't' then null else pg_current_xlog_location() end) AS pg_current_xlog_location, - (case pg_is_in_recovery() when 't' then null else pg_xlog_location_diff(pg_current_xlog_location(), replay_location)::float end) AS pg_xlog_location_diff + (case pg_is_in_recovery() when 't' then pg_last_xlog_receive_location() else pg_current_xlog_location() end) AS pg_current_xlog_location, + (case pg_is_in_recovery() when 't' then pg_xlog_location_diff(pg_last_xlog_receive_location(), replay_location)::float else pg_xlog_location_diff(pg_current_xlog_location(), replay_location)::float end) AS pg_xlog_location_diff FROM pg_stat_replication `, }, @@ -70,7 +70,7 @@ var queryOverrides = map[string][]OverrideQuery{ semver.MustParseRange("<9.2.0"), ` SELECT *, - (case pg_is_in_recovery() when 't' then null else pg_current_xlog_location() end) AS pg_current_xlog_location + (case pg_is_in_recovery() when 't' then pg_last_xlog_receive_location() else pg_current_xlog_location() end) AS pg_current_xlog_location FROM pg_stat_replication `, }, @@ -80,14 +80,16 @@ var queryOverrides = map[string][]OverrideQuery{ { semver.MustParseRange(">=9.4.0 <10.0.0"), ` - SELECT slot_name, database, active, pg_xlog_location_diff(pg_current_xlog_location(), restart_lsn) + SELECT slot_name, database, active, + (case pg_is_in_recovery() when 't' then pg_xlog_location_diff(pg_last_xlog_receive_location(), restart_lsn) else pg_xlog_location_diff(pg_current_xlog_location(), restart_lsn) end) as pg_xlog_location_diff FROM pg_replication_slots `, }, { semver.MustParseRange(">=10.0.0"), ` - SELECT slot_name, database, active, pg_wal_lsn_diff(pg_current_wal_lsn(), restart_lsn) + SELECT slot_name, database, active, + (case pg_is_in_recovery() when 't' then pg_wal_lsn_diff(pg_last_wal_receive_lsn(), restart_lsn) else pg_wal_lsn_diff(pg_current_wal_lsn(), restart_lsn) end) as pg_wal_lsn_diff FROM pg_replication_slots `, }, @@ -114,6 +116,9 @@ var queryOverrides = map[string][]OverrideQuery{ tmp.state, tmp2.usename, tmp2.application_name, + tmp2.backend_type, + tmp2.wait_event_type, + tmp2.wait_event, COALESCE(count,0) as count, COALESCE(max_tx_duration,0) as max_tx_duration FROM @@ -132,9 +137,14 @@ var queryOverrides = map[string][]OverrideQuery{ state, usename, application_name, + backend_type, + wait_event_type, + wait_event, count(*) AS count, MAX(EXTRACT(EPOCH FROM now() - xact_start))::float AS max_tx_duration - FROM pg_stat_activity GROUP BY datname,state,usename,application_name) AS tmp2 + FROM pg_stat_activity + WHERE pid <> pg_backend_pid() + GROUP BY datname,state,usename,application_name,backend_type,wait_event_type,wait_event) AS tmp2 ON tmp.state = tmp2.state AND pg_database.datname = tmp2.datname `, }, @@ -148,7 +158,9 @@ var queryOverrides = map[string][]OverrideQuery{ application_name, COALESCE(count(*),0) AS count, COALESCE(MAX(EXTRACT(EPOCH FROM now() - xact_start))::float,0) AS max_tx_duration - FROM pg_stat_activity GROUP BY datname,usename,application_name + FROM pg_stat_activity + WHERE procpid <> pg_backend_pid() + GROUP BY datname,usename,application_name `, }, }, @@ -156,7 +168,7 @@ var queryOverrides = map[string][]OverrideQuery{ // Convert the query override file to the version-specific query override file // for the exporter. -func makeQueryOverrideMap(pgVersion semver.Version, queryOverrides map[string][]OverrideQuery) map[string]string { +func makeQueryOverrideMap(pgVersion semver.Version, queryOverrides map[string][]OverrideQuery, logger *slog.Logger) map[string]string { resultMap := make(map[string]string) for name, overrideDef := range queryOverrides { // Find a matching semver. We make it an error to have overlapping @@ -170,7 +182,7 @@ func makeQueryOverrideMap(pgVersion semver.Version, queryOverrides map[string][] } } if !matched { - level.Warn(logger).Log("msg", "No query matched override, disabling metric space", "name", name) + logger.Warn("No query matched override, disabling metric space", "name", name) resultMap[name] = "" } } @@ -178,7 +190,7 @@ func makeQueryOverrideMap(pgVersion semver.Version, queryOverrides map[string][] return resultMap } -func parseUserQueries(content []byte) (map[string]intermediateMetricMap, map[string]string, error) { +func parseUserQueries(content []byte, logger *slog.Logger) (map[string]intermediateMetricMap, map[string]string, error) { var userQueries UserQueries err := yaml.Unmarshal(content, &userQueries) @@ -191,7 +203,7 @@ func parseUserQueries(content []byte) (map[string]intermediateMetricMap, map[str newQueryOverrides := make(map[string]string) for metric, specs := range userQueries { - level.Debug(logger).Log("msg", "New user metric namespace from YAML metric", "metric", metric, "cache_seconds", specs.CacheSeconds) + logger.Debug("New user metric namespace from YAML metric", "metric", metric, "cache_seconds", specs.CacheSeconds) newQueryOverrides[metric] = specs.Query metricMap, ok := metricMaps[metric] if !ok { @@ -231,21 +243,21 @@ func parseUserQueries(content []byte) (map[string]intermediateMetricMap, map[str // queries. // TODO: test code for all cu. // TODO: the YAML this supports is "non-standard" - we should move away from it. -func addQueries(content []byte, pgVersion semver.Version, server *Server) error { - metricMaps, newQueryOverrides, err := parseUserQueries(content) +func addQueries(content []byte, pgVersion semver.Version, server *Server, metricPrefix string) error { + metricMaps, newQueryOverrides, err := parseUserQueries(content, server.logger) if err != nil { return err } // Convert the loaded metric map into exporter representation - partialExporterMap := makeDescMap(pgVersion, server.labels, metricMaps) + partialExporterMap := makeDescMap(pgVersion, server.labels, metricMaps, server.logger, metricPrefix) // Merge the two maps (which are now quite flatteend) for k, v := range partialExporterMap { _, found := server.metricMap[k] if found { - level.Debug(logger).Log("msg", "Overriding metric from user YAML file", "metric", k) + server.logger.Debug("Overriding metric from user YAML file", "metric", k) } else { - level.Debug(logger).Log("msg", "Adding new metric from user YAML file", "metric", k) + server.logger.Debug("Adding new metric from user YAML file", "metric", k) } server.metricMap[k] = v } @@ -254,9 +266,9 @@ func addQueries(content []byte, pgVersion semver.Version, server *Server) error for k, v := range newQueryOverrides { _, found := server.queryOverrides[k] if found { - level.Debug(logger).Log("msg", "Overriding query override from user YAML file", "query_override", k) + server.logger.Debug("Overriding query override from user YAML file", "query_override", k) } else { - level.Debug(logger).Log("msg", "Adding new query override from user YAML file", "query_override", k) + server.logger.Debug("Adding new query override from user YAML file", "query_override", k) } server.queryOverrides[k] = v } diff --git a/cmd/postgres_exporter/server.go b/exporter/server.go similarity index 84% rename from cmd/postgres_exporter/server.go rename to exporter/server.go index bcfee6812..2aa2c6d67 100644 --- a/cmd/postgres_exporter/server.go +++ b/exporter/server.go @@ -11,17 +11,18 @@ // See the License for the specific language governing permissions and // limitations under the License. -package main +package exporter import ( "database/sql" "fmt" + "log/slog" "sync" "time" "github.com/blang/semver/v4" - "github.com/go-kit/log/level" "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/common/promslog" ) // Server describes a connection to Postgres. @@ -43,6 +44,7 @@ type Server struct { // Currently cached metrics metricCache map[string]cachedMetrics cacheMtx sync.Mutex + logger *slog.Logger } // ServerOpt configures a server. @@ -57,6 +59,12 @@ func ServerWithLabels(labels prometheus.Labels) ServerOpt { } } +func ServerWithLogger(logger *slog.Logger) ServerOpt { + return func(s *Server) { + s.logger = logger + } +} + // NewServer establishes a new connection using DSN. func NewServer(dsn string, opts ...ServerOpt) (*Server, error) { fingerprint, err := parseFingerprint(dsn) @@ -71,8 +79,6 @@ func NewServer(dsn string, opts ...ServerOpt) (*Server, error) { db.SetMaxOpenConns(1) db.SetMaxIdleConns(1) - level.Info(logger).Log("msg", "Established new database connection", "fingerprint", fingerprint) - s := &Server{ db: db, master: false, @@ -80,12 +86,15 @@ func NewServer(dsn string, opts ...ServerOpt) (*Server, error) { serverLabelName: fingerprint, }, metricCache: make(map[string]cachedMetrics), + logger: promslog.NewNopLogger(), } for _, opt := range opts { opt(s) } + s.logger.Info("Established new database connection", "fingerprint", fingerprint) + return s, nil } @@ -98,7 +107,7 @@ func (s *Server) Close() error { func (s *Server) Ping() error { if err := s.db.Ping(); err != nil { if cerr := s.Close(); cerr != nil { - level.Error(logger).Log("msg", "Error while closing non-pinging DB connection", "server", s, "err", cerr) + s.logger.Error("Error while closing non-pinging DB connection", "server", s, "err", cerr) } return err } @@ -120,12 +129,17 @@ func (s *Server) Scrape(ch chan<- prometheus.Metric, disableSettingsMetrics bool if !disableSettingsMetrics && s.master { if err = querySettings(ch, s); err != nil { err = fmt.Errorf("error retrieving settings: %s", err) + return err } } errMap := queryNamespaceMappings(ch, s) - if len(errMap) > 0 { - err = fmt.Errorf("queryNamespaceMappings returned %d errors", len(errMap)) + if len(errMap) == 0 { + return nil + } + err = fmt.Errorf("queryNamespaceMappings errors encountered") + for namespace, errStr := range errMap { + err = fmt.Errorf("%s, namespace: %s error: %s", err, namespace, errStr) } return err @@ -169,6 +183,7 @@ func (s *Servers) GetServer(dsn string) (*Server, error) { s.servers[dsn] = server } if err = server.Ping(); err != nil { + server.Close() delete(s.servers, dsn) time.Sleep(time.Duration(errCount) * time.Second) continue @@ -184,7 +199,7 @@ func (s *Servers) Close() { defer s.m.Unlock() for _, server := range s.servers { if err := server.Close(); err != nil { - level.Error(logger).Log("msg", "Failed to close connection", "server", server, "err", err) + server.logger.Error("Failed to close connection", "server", server, "err", err) } } } diff --git a/cmd/postgres_exporter/tests/docker-postgres-replication/Dockerfile b/exporter/tests/docker-postgres-replication/Dockerfile similarity index 100% rename from cmd/postgres_exporter/tests/docker-postgres-replication/Dockerfile rename to exporter/tests/docker-postgres-replication/Dockerfile diff --git a/cmd/postgres_exporter/tests/docker-postgres-replication/Dockerfile.p2 b/exporter/tests/docker-postgres-replication/Dockerfile.p2 similarity index 100% rename from cmd/postgres_exporter/tests/docker-postgres-replication/Dockerfile.p2 rename to exporter/tests/docker-postgres-replication/Dockerfile.p2 diff --git a/cmd/postgres_exporter/tests/docker-postgres-replication/README.md b/exporter/tests/docker-postgres-replication/README.md similarity index 100% rename from cmd/postgres_exporter/tests/docker-postgres-replication/README.md rename to exporter/tests/docker-postgres-replication/README.md diff --git a/cmd/postgres_exporter/tests/docker-postgres-replication/docker-compose.yml b/exporter/tests/docker-postgres-replication/docker-compose.yml similarity index 100% rename from cmd/postgres_exporter/tests/docker-postgres-replication/docker-compose.yml rename to exporter/tests/docker-postgres-replication/docker-compose.yml diff --git a/cmd/postgres_exporter/tests/docker-postgres-replication/docker-entrypoint.sh b/exporter/tests/docker-postgres-replication/docker-entrypoint.sh similarity index 100% rename from cmd/postgres_exporter/tests/docker-postgres-replication/docker-entrypoint.sh rename to exporter/tests/docker-postgres-replication/docker-entrypoint.sh diff --git a/cmd/postgres_exporter/tests/docker-postgres-replication/setup-replication.sh b/exporter/tests/docker-postgres-replication/setup-replication.sh similarity index 100% rename from cmd/postgres_exporter/tests/docker-postgres-replication/setup-replication.sh rename to exporter/tests/docker-postgres-replication/setup-replication.sh diff --git a/cmd/postgres_exporter/tests/test-smoke b/exporter/tests/test-smoke similarity index 100% rename from cmd/postgres_exporter/tests/test-smoke rename to exporter/tests/test-smoke diff --git a/cmd/postgres_exporter/tests/user_queries_ok.yaml b/exporter/tests/user_queries_ok.yaml similarity index 100% rename from cmd/postgres_exporter/tests/user_queries_ok.yaml rename to exporter/tests/user_queries_ok.yaml diff --git a/cmd/postgres_exporter/tests/user_queries_test.yaml b/exporter/tests/user_queries_test.yaml similarity index 100% rename from cmd/postgres_exporter/tests/user_queries_test.yaml rename to exporter/tests/user_queries_test.yaml diff --git a/cmd/postgres_exporter/tests/username_file b/exporter/tests/username_file similarity index 100% rename from cmd/postgres_exporter/tests/username_file rename to exporter/tests/username_file diff --git a/cmd/postgres_exporter/tests/userpass_file b/exporter/tests/userpass_file similarity index 100% rename from cmd/postgres_exporter/tests/userpass_file rename to exporter/tests/userpass_file diff --git a/cmd/postgres_exporter/util.go b/exporter/util.go similarity index 88% rename from cmd/postgres_exporter/util.go rename to exporter/util.go index 3a125f1d3..895c2d07e 100644 --- a/cmd/postgres_exporter/util.go +++ b/exporter/util.go @@ -11,29 +11,20 @@ // See the License for the specific language governing permissions and // limitations under the License. -package main +package exporter import ( "fmt" + "log/slog" "math" "net/url" "strconv" "strings" "time" - "github.com/go-kit/log/level" "github.com/lib/pq" ) -func contains(a []string, x string) bool { - for _, n := range a { - if x == n { - return true - } - } - return false -} - // convert a string to the corresponding ColumnUsage func stringToColumnUsage(s string) (ColumnUsage, error) { var u ColumnUsage @@ -69,7 +60,7 @@ func stringToColumnUsage(s string) (ColumnUsage, error) { // Convert database.sql types to float64s for Prometheus consumption. Null types are mapped to NaN. string and []byte // types are mapped as NaN and !ok -func dbToFloat64(t interface{}) (float64, bool) { +func dbToFloat64(t interface{}, logger *slog.Logger) (float64, bool) { switch v := t.(type) { case int64: return float64(v), true @@ -82,14 +73,14 @@ func dbToFloat64(t interface{}) (float64, bool) { strV := string(v) result, err := strconv.ParseFloat(strV, 64) if err != nil { - level.Info(logger).Log("msg", "Could not parse []byte", "err", err) + logger.Info("Could not parse []byte", "err", err) return math.NaN(), false } return result, true case string: result, err := strconv.ParseFloat(v, 64) if err != nil { - level.Info(logger).Log("msg", "Could not parse string", "err", err) + logger.Info("Could not parse string", "err", err) return math.NaN(), false } return result, true @@ -107,7 +98,7 @@ func dbToFloat64(t interface{}) (float64, bool) { // Convert database.sql types to uint64 for Prometheus consumption. Null types are mapped to 0. string and []byte // types are mapped as 0 and !ok -func dbToUint64(t interface{}) (uint64, bool) { +func dbToUint64(t interface{}, logger *slog.Logger) (uint64, bool) { switch v := t.(type) { case uint64: return v, true @@ -122,14 +113,14 @@ func dbToUint64(t interface{}) (uint64, bool) { strV := string(v) result, err := strconv.ParseUint(strV, 10, 64) if err != nil { - level.Info(logger).Log("msg", "Could not parse []byte", "err", err) + logger.Info("Could not parse []byte", "err", err) return 0, false } return result, true case string: result, err := strconv.ParseUint(v, 10, 64) if err != nil { - level.Info(logger).Log("msg", "Could not parse string", "err", err) + logger.Info("Could not parse string", "err", err) return 0, false } return result, true @@ -160,7 +151,7 @@ func dbToString(t interface{}) (string, bool) { // Try and convert to string return string(v), true case string: - return v, true + return strings.ToValidUTF8(v, "�"), true case bool: if v { return "true", true diff --git a/go.mod b/go.mod index 1e1d70c4e..c68d6d359 100644 --- a/go.mod +++ b/go.mod @@ -1,17 +1,16 @@ module github.com/prometheus-community/postgres_exporter -go 1.19 +go 1.24.0 require ( - github.com/DATA-DOG/go-sqlmock v1.5.0 - github.com/alecthomas/kingpin/v2 v2.3.2 + github.com/DATA-DOG/go-sqlmock v1.5.2 + github.com/alecthomas/kingpin/v2 v2.4.0 github.com/blang/semver/v4 v4.0.0 - github.com/go-kit/log v0.2.1 github.com/lib/pq v1.10.9 - github.com/prometheus/client_golang v1.16.0 - github.com/prometheus/client_model v0.4.0 - github.com/prometheus/common v0.44.0 - github.com/prometheus/exporter-toolkit v0.10.0 + github.com/prometheus/client_golang v1.23.2 + github.com/prometheus/client_model v0.6.2 + github.com/prometheus/common v0.67.5 + github.com/prometheus/exporter-toolkit v0.15.1 github.com/smartystreets/goconvey v1.8.1 gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c gopkg.in/yaml.v2 v2.4.0 @@ -19,29 +18,32 @@ require ( ) require ( - github.com/alecthomas/units v0.0.0-20211218093645-b94a6e3cc137 // indirect + github.com/alecthomas/units v0.0.0-20240927000941-0f3dac36c52b // indirect github.com/beorn7/perks v1.0.1 // indirect - github.com/cespare/xxhash/v2 v2.2.0 // indirect - github.com/coreos/go-systemd/v22 v22.5.0 // indirect - github.com/go-logfmt/logfmt v0.5.1 // indirect - github.com/golang/protobuf v1.5.3 // indirect + github.com/cespare/xxhash/v2 v2.3.0 // indirect + github.com/coreos/go-systemd/v22 v22.6.0 // indirect + github.com/golang-jwt/jwt/v5 v5.3.0 // indirect + github.com/google/uuid v1.6.0 // indirect github.com/gopherjs/gopherjs v1.17.2 // indirect github.com/jpillora/backoff v1.0.0 // indirect github.com/jtolds/gls v4.20.0+incompatible // indirect github.com/kr/pretty v0.3.1 // indirect github.com/kr/text v0.2.0 // indirect - github.com/matttproud/golang_protobuf_extensions v1.0.4 // indirect + github.com/mdlayher/socket v0.4.1 // indirect + github.com/mdlayher/vsock v1.2.1 // indirect + github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f // indirect - github.com/prometheus/procfs v0.10.1 // indirect + github.com/prometheus/procfs v0.16.1 // indirect github.com/rogpeppe/go-internal v1.10.0 // indirect github.com/smarty/assertions v1.15.0 // indirect github.com/xhit/go-str2duration/v2 v2.1.0 // indirect - golang.org/x/crypto v0.8.0 // indirect - golang.org/x/net v0.10.0 // indirect - golang.org/x/oauth2 v0.8.0 // indirect - golang.org/x/sync v0.2.0 // indirect - golang.org/x/sys v0.8.0 // indirect - golang.org/x/text v0.9.0 // indirect - google.golang.org/appengine v1.6.7 // indirect - google.golang.org/protobuf v1.30.0 // indirect + go.yaml.in/yaml/v2 v2.4.3 // indirect + golang.org/x/crypto v0.46.0 // indirect + golang.org/x/net v0.48.0 // indirect + golang.org/x/oauth2 v0.34.0 // indirect + golang.org/x/sync v0.19.0 // indirect + golang.org/x/sys v0.39.0 // indirect + golang.org/x/text v0.32.0 // indirect + golang.org/x/time v0.14.0 // indirect + google.golang.org/protobuf v1.36.11 // indirect ) diff --git a/go.sum b/go.sum index c6efce431..a40e54c06 100644 --- a/go.sum +++ b/go.sum @@ -1,38 +1,36 @@ -github.com/DATA-DOG/go-sqlmock v1.5.0 h1:Shsta01QNfFxHCfpW6YH2STWB0MudeXXEWMr20OEh60= -github.com/DATA-DOG/go-sqlmock v1.5.0/go.mod h1:f/Ixk793poVmq4qj/V1dPUg2JEAKC73Q5eFN3EC/SaM= -github.com/alecthomas/kingpin/v2 v2.3.2 h1:H0aULhgmSzN8xQ3nX1uxtdlTHYoPLu5AhHxWrKI6ocU= -github.com/alecthomas/kingpin/v2 v2.3.2/go.mod h1:0gyi0zQnjuFk8xrkNKamJoyUo382HRL7ATRpFZCw6tE= -github.com/alecthomas/units v0.0.0-20211218093645-b94a6e3cc137 h1:s6gZFSlWYmbqAuRjVTiNNhvNRfY2Wxp9nhfyel4rklc= -github.com/alecthomas/units v0.0.0-20211218093645-b94a6e3cc137/go.mod h1:OMCwj8VM1Kc9e19TLln2VL61YJF0x1XFtfdL4JdbSyE= +github.com/DATA-DOG/go-sqlmock v1.5.2 h1:OcvFkGmslmlZibjAjaHm3L//6LiuBgolP7OputlJIzU= +github.com/DATA-DOG/go-sqlmock v1.5.2/go.mod h1:88MAG/4G7SMwSE3CeA0ZKzrT5CiOU3OJ+JlNzwDqpNU= +github.com/alecthomas/kingpin/v2 v2.4.0 h1:f48lwail6p8zpO1bC4TxtqACaGqHYA22qkHjHpqDjYY= +github.com/alecthomas/kingpin/v2 v2.4.0/go.mod h1:0gyi0zQnjuFk8xrkNKamJoyUo382HRL7ATRpFZCw6tE= +github.com/alecthomas/units v0.0.0-20240927000941-0f3dac36c52b h1:mimo19zliBX/vSQ6PWWSL9lK8qwHozUj03+zLoEB8O0= +github.com/alecthomas/units v0.0.0-20240927000941-0f3dac36c52b/go.mod h1:fvzegU4vN3H1qMT+8wDmzjAcDONcgo2/SZ/TyfdUOFs= github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= github.com/blang/semver/v4 v4.0.0 h1:1PFHFE6yCCTv8C1TeyNNarDzntLi7wMI5i/pzqYIsAM= github.com/blang/semver/v4 v4.0.0/go.mod h1:IbckMUScFkM3pff0VJDNKRiT6TG/YpiHIM2yvyW5YoQ= -github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44= -github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= -github.com/coreos/go-systemd/v22 v22.5.0 h1:RrqgGjYQKalulkV8NGVIfkXQf6YYmOyiJKk8iXXhfZs= -github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc= +github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= +github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= +github.com/coreos/go-systemd/v22 v22.6.0 h1:aGVa/v8B7hpb0TKl0MWoAavPDmHvobFe5R5zn0bCJWo= +github.com/coreos/go-systemd/v22 v22.6.0/go.mod h1:iG+pp635Fo7ZmV/j14KUcmEyWF+0X7Lua8rrTWzYgWU= github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= -github.com/go-kit/log v0.2.1 h1:MRVx0/zhvdseW+Gza6N9rVzU/IVzaeE1SFI4raAhmBU= -github.com/go-kit/log v0.2.1/go.mod h1:NwTd00d/i8cPZ3xOwwiv2PO5MOcx78fFErGNcVmBjv0= -github.com/go-logfmt/logfmt v0.5.1 h1:otpy5pqBCBZ1ng9RQ0dPu4PN7ba75Y/aA+UpowDyNVA= -github.com/go-logfmt/logfmt v0.5.1/go.mod h1:WYhtIu8zTZfxdn5+rREduYbwxfcBr/Vr6KEVveWlfTs= -github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= -github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= -github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= -github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= -github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg= -github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= -github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/golang-jwt/jwt/v5 v5.3.0 h1:pv4AsKCKKZuqlgs5sUmn4x8UlGa0kEVt/puTpKx9vvo= +github.com/golang-jwt/jwt/v5 v5.3.0/go.mod h1:fxCRLWMO43lRc8nhHWY6LGqRcf+1gQWArsqaEUEa5bE= +github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= +github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= +github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= +github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/gopherjs/gopherjs v1.17.2 h1:fQnZVsXk8uxXIStYb0N4bGk7jeyTalG/wsZjQ25dO0g= github.com/gopherjs/gopherjs v1.17.2/go.mod h1:pRRIvn/QzFLrKfvEz3qUuEhtE/zLCWfreZ6J5gM2i+k= github.com/jpillora/backoff v1.0.0 h1:uvFg412JmmHBHw7iwprIxkPMI+sGQ4kzOWsMeHnm2EA= github.com/jpillora/backoff v1.0.0/go.mod h1:J/6gKK9jxlEcS3zixgDgUAsiuZ7yrSoa/FX5e0EB2j4= github.com/jtolds/gls v4.20.0+incompatible h1:xdiiI2gbIgH/gLH7ADydsJ1uDOEzR8yvV7C0MuV77Wo= github.com/jtolds/gls v4.20.0+incompatible/go.mod h1:QJZ7F/aHp+rZTRtaJ1ow/lLfFfVYBRgL+9YlvaHOwJU= +github.com/kisielk/sqlstruct v0.0.0-20201105191214-5f3e10d3ab46/go.mod h1:yyMNCyc/Ib3bDTKd379tNMpB/7/H5TjM2Y9QJ5THLbE= +github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo= +github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ= github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= @@ -40,25 +38,31 @@ github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc= +github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw= github.com/lib/pq v1.10.9 h1:YXG7RB+JIjhP29X+OtkiDnYaXQwpS4JEWq7dtCCRUEw= github.com/lib/pq v1.10.9/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o= -github.com/matttproud/golang_protobuf_extensions v1.0.4 h1:mmDVorXM7PCGKw94cs5zkfA9PSy5pEvNWRP0ET0TIVo= -github.com/matttproud/golang_protobuf_extensions v1.0.4/go.mod h1:BSXmuO+STAnVfrANrmjBb36TMTDstsz7MSK+HVaYKv4= +github.com/mdlayher/socket v0.4.1 h1:eM9y2/jlbs1M615oshPQOHZzj6R6wMT7bX5NPiQvn2U= +github.com/mdlayher/socket v0.4.1/go.mod h1:cAqeGjoufqdxWkD7DkpyS+wcefOtmu5OQ8KuoJGIReA= +github.com/mdlayher/vsock v1.2.1 h1:pC1mTJTvjo1r9n9fbm7S1j04rCgCzhCOS5DY0zqHlnQ= +github.com/mdlayher/vsock v1.2.1/go.mod h1:NRfCibel++DgeMD8z/hP+PPTjlNJsdPOmxcnENvE+SE= +github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= +github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f h1:KUppIJq7/+SVif2QVs3tOP0zanoHgBEVAwHxUSIzRqU= github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U= github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e/go.mod h1:pJLUxLENpZxwdsKMEsNbx1VGcRFpLqf3715MtcvvzbA= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= -github.com/prometheus/client_golang v1.16.0 h1:yk/hx9hDbrGHovbci4BY+pRMfSuuat626eFsHb7tmT8= -github.com/prometheus/client_golang v1.16.0/go.mod h1:Zsulrv/L9oM40tJ7T815tM89lFEugiJ9HzIqaAx4LKc= -github.com/prometheus/client_model v0.4.0 h1:5lQXD3cAg1OXBf4Wq03gTrXHeaV0TQvGfUooCfx1yqY= -github.com/prometheus/client_model v0.4.0/go.mod h1:oMQmHW1/JoDwqLtg57MGgP/Fb1CJEYF2imWWhWtMkYU= -github.com/prometheus/common v0.44.0 h1:+5BrQJwiBB9xsMygAB3TNvpQKOwlkc25LbISbrdOOfY= -github.com/prometheus/common v0.44.0/go.mod h1:ofAIvZbQ1e/nugmZGz4/qCb9Ap1VoSTIO7x0VV9VvuY= -github.com/prometheus/exporter-toolkit v0.10.0 h1:yOAzZTi4M22ZzVxD+fhy1URTuNRj/36uQJJ5S8IPza8= -github.com/prometheus/exporter-toolkit v0.10.0/go.mod h1:+sVFzuvV5JDyw+Ih6p3zFxZNVnKQa3x5qPmDSiPu4ZY= -github.com/prometheus/procfs v0.10.1 h1:kYK1Va/YMlutzCGazswoHKo//tZVlFpKYh+PymziUAg= -github.com/prometheus/procfs v0.10.1/go.mod h1:nwNm2aOCAYw8uTR/9bWRREkZFxAUcWzPHWJq+XBB/FM= +github.com/prometheus/client_golang v1.23.2 h1:Je96obch5RDVy3FDMndoUsjAhG5Edi49h0RJWRi/o0o= +github.com/prometheus/client_golang v1.23.2/go.mod h1:Tb1a6LWHB3/SPIzCoaDXI4I8UHKeFTEQ1YCr+0Gyqmg= +github.com/prometheus/client_model v0.6.2 h1:oBsgwpGs7iVziMvrGhE53c/GrLUsZdHnqNwqPLxwZyk= +github.com/prometheus/client_model v0.6.2/go.mod h1:y3m2F6Gdpfy6Ut/GBsUqTWZqCUvMVzSfMLjcu6wAwpE= +github.com/prometheus/common v0.67.5 h1:pIgK94WWlQt1WLwAC5j2ynLaBRDiinoAb86HZHTUGI4= +github.com/prometheus/common v0.67.5/go.mod h1:SjE/0MzDEEAyrdr5Gqc6G+sXI67maCxzaT3A2+HqjUw= +github.com/prometheus/exporter-toolkit v0.15.1 h1:XrGGr/qWl8Gd+pqJqTkNLww9eG8vR/CoRk0FubOKfLE= +github.com/prometheus/exporter-toolkit v0.15.1/go.mod h1:P/NR9qFRGbCFgpklyhix9F6v6fFr/VQB/CVsrMDGKo4= +github.com/prometheus/procfs v0.16.1 h1:hZ15bTNuirocR6u0JZ6BAHHmwS1p8B4P6MRqxtzMyRg= +github.com/prometheus/procfs v0.16.1/go.mod h1:teAbpZRB1iIAJYREa1LsoWUXykVXA1KlTmWl8x/U+Is= github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs= github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ= github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog= @@ -67,41 +71,42 @@ github.com/smarty/assertions v1.15.0/go.mod h1:yABtdzeQs6l1brC900WlRNwj6ZR55d7B+ github.com/smartystreets/goconvey v1.8.1 h1:qGjIddxOk4grTu9JPOU31tVfq3cNdBlNa5sSznIX1xY= github.com/smartystreets/goconvey v1.8.1/go.mod h1:+/u4qLyY6x1jReYOp7GOM2FSt8aP9CzCZL03bI28W60= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= -github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= -github.com/stretchr/testify v1.8.2 h1:+h33VjcLVPDHtOdpUCuF+7gSuG3yGIftsP1YvFihtJ8= +github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= +github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= +github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA= +github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= +github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= +github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U= +github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= github.com/xhit/go-str2duration/v2 v2.1.0 h1:lxklc02Drh6ynqX+DdPyp5pCKLUQpRT8bp8Ydu2Bstc= github.com/xhit/go-str2duration/v2 v2.1.0/go.mod h1:ohY8p+0f07DiV6Em5LKB0s2YpLtXVyJfNt1+BlmyAsU= -golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= -golang.org/x/crypto v0.8.0 h1:pd9TJtTueMTVQXzk8E2XESSMQDj/U7OUu0PqJqPXQjQ= -golang.org/x/crypto v0.8.0/go.mod h1:mRqEX+O9/h5TFCrQhkgjo2yKi0yYA+9ecGkdQoHrywE= -golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks= -golang.org/x/net v0.10.0 h1:X2//UzNDwYmtCLn7To6G58Wr6f5ahEAQgKNzv9Y951M= -golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg= -golang.org/x/oauth2 v0.8.0 h1:6dkIjl3j3LtZ/O3sTgZTMsLKSftL/B8Zgq4huOIIUu8= -golang.org/x/oauth2 v0.8.0/go.mod h1:yr7u4HXZRm1R1kBWqr/xKNqewf0plRYoB7sla+BCIXE= -golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.2.0 h1:PUR+T4wwASmuSTYdKjYHI5TD22Wy5ogLU5qZCOLxBrI= -golang.org/x/sync v0.2.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.8.0 h1:EBmGv8NaZBZTWvrbjNoL6HVt+IVy3QDQpJs7VRIw3tU= -golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= -golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= -golang.org/x/text v0.9.0 h1:2sjJmO8cDvYveuX97RDLsxlyUxLl+GHoLxBiRdHllBE= -golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= -golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= -golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -google.golang.org/appengine v1.6.7 h1:FZR1q0exgwxzPzp/aF+VccGrSfxfPpkBqjIIEq3ru6c= -google.golang.org/appengine v1.6.7/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc= -google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= -google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= -google.golang.org/protobuf v1.30.0 h1:kPPoIgf3TsEvrm0PFe15JQ+570QVxYzEvvHqChK+cng= -google.golang.org/protobuf v1.30.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= +go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= +go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= +go.yaml.in/yaml/v2 v2.4.3 h1:6gvOSjQoTB3vt1l+CU+tSyi/HOjfOjRLJ4YwYZGwRO0= +go.yaml.in/yaml/v2 v2.4.3/go.mod h1:zSxWcmIDjOzPXpjlTTbAsKokqkDNAVtZO0WOMiT90s8= +golang.org/x/crypto v0.46.0 h1:cKRW/pmt1pKAfetfu+RCEvjvZkA9RimPbh7bhFjGVBU= +golang.org/x/crypto v0.46.0/go.mod h1:Evb/oLKmMraqjZ2iQTwDwvCtJkczlDuTmdJXoZVzqU0= +golang.org/x/net v0.48.0 h1:zyQRTTrjc33Lhh0fBgT/H3oZq9WuvRR5gPC70xpDiQU= +golang.org/x/net v0.48.0/go.mod h1:+ndRgGjkh8FGtu1w1FGbEC31if4VrNVMuKTgcAAnQRY= +golang.org/x/oauth2 v0.34.0 h1:hqK/t4AKgbqWkdkcAeI8XLmbK+4m4G5YeQRrmiotGlw= +golang.org/x/oauth2 v0.34.0/go.mod h1:lzm5WQJQwKZ3nwavOZ3IS5Aulzxi68dUSgRHujetwEA= +golang.org/x/sync v0.19.0 h1:vV+1eWNmZ5geRlYjzm2adRgW2/mcpevXNg50YZtPCE4= +golang.org/x/sync v0.19.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI= +golang.org/x/sys v0.39.0 h1:CvCKL8MeisomCi6qNZ+wbb0DN9E5AATixKsvNtMoMFk= +golang.org/x/sys v0.39.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= +golang.org/x/text v0.32.0 h1:ZD01bjUt1FQ9WJ0ClOL5vxgxOI/sVCNgX1YtKwcY0mU= +golang.org/x/text v0.32.0/go.mod h1:o/rUWzghvpD5TXrTIBuJU77MTaN0ljMWE47kxGJQ7jY= +golang.org/x/time v0.14.0 h1:MRx4UaLrDotUKUdCIqzPC48t1Y9hANFKIRpNx+Te8PI= +golang.org/x/time v0.14.0/go.mod h1:eL/Oa2bBBK0TkX57Fyni+NgnyQQN4LitPmob2Hjnqw4= +google.golang.org/protobuf v1.36.11 h1:fV6ZwhNocDyBLK0dj+fg8ektcVegBBuEolpbTQyBNVE= +google.golang.org/protobuf v1.36.11/go.mod h1:HTf+CrKn2C3g5S8VImy6tdcUvCska2kB7j23XfzDpco= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= -gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/postgres_mixin/.lint b/postgres_mixin/.lint new file mode 100644 index 000000000..65d621f69 --- /dev/null +++ b/postgres_mixin/.lint @@ -0,0 +1,15 @@ +--- +exclusions: + panel-units-rule: + reason: Ignoring so far, need to address this in future + panel-title-description-rule: + reason: Ignoring so far, need to address this in future + panel-datasource-rule: + reason: "Loki datasource variable is being named as loki_datasource now while linter expects 'datasource'" + template-datasource-rule: + reason: "Based on new convention we are using variable names prometheus_datasource and loki_datasource where as linter expects 'datasource'" + alert-name-camelcase: + reason: QPS is a common acronym (Queries Per Second) and should be allowed + entries: + - alert: PostgreSQLQPS + \ No newline at end of file diff --git a/postgres_mixin/alerts/postgres.libsonnet b/postgres_mixin/alerts/postgres.libsonnet index 4b0275df1..e3c8ee278 100644 --- a/postgres_mixin/alerts/postgres.libsonnet +++ b/postgres_mixin/alerts/postgres.libsonnet @@ -7,16 +7,16 @@ { alert: 'PostgreSQLMaxConnectionsReached', annotations: { - description: '{{ $labels.instance }} is exceeding the currently configured maximum Postgres connection limit (current value: {{ $value }}s). Services may be degraded - please take immediate action (you probably need to increase max_connections in the Docker image and re-deploy.', - summary: '{{ $labels.instance }} has maxed out Postgres connections.', + description: '{{ $labels.instance }} is exceeding the currently configured maximum Postgres connection limit (current value: {{ $value }}s). Services may be degraded - please take immediate action (you probably need to increase max_connections in the Docker image and re-deploy).', + summary: 'Postgres connections count is over the maximum amount.', }, expr: ||| - sum by (instance) (pg_stat_activity_count{%(postgresExporterSelector)s}) + sum by (%(agg)s) (pg_stat_activity_count{%(postgresExporterSelector)s}) >= - sum by (instance) (pg_settings_max_connections{%(postgresExporterSelector)s}) + sum by (%(agg)s) (pg_settings_max_connections{%(postgresExporterSelector)s}) - - sum by (instance) (pg_settings_superuser_reserved_connections{%(postgresExporterSelector)s}) - ||| % $._config, + sum by (%(agg)s) (pg_settings_superuser_reserved_connections{%(postgresExporterSelector)s}) + ||| % $._config { agg: std.join(', ', $._config.groupLabels + $._config.instanceLabels) }, 'for': '1m', labels: { severity: 'warning', @@ -26,17 +26,17 @@ alert: 'PostgreSQLHighConnections', annotations: { description: '{{ $labels.instance }} is exceeding 80% of the currently configured maximum Postgres connection limit (current value: {{ $value }}s). Please check utilization graphs and confirm if this is normal service growth, abuse or an otherwise temporary condition or if new resources need to be provisioned (or the limits increased, which is mostly likely).', - summary: '{{ $labels.instance }} is over 80% of max Postgres connections.', + summary: 'Postgres connections count is over 80% of maximum amount.', }, expr: ||| - sum by (instance) (pg_stat_activity_count{%(postgresExporterSelector)s}) + sum by (%(agg)s) (pg_stat_activity_count{%(postgresExporterSelector)s}) > ( - sum by (instance) (pg_settings_max_connections{%(postgresExporterSelector)s}) + sum by (%(agg)s) (pg_settings_max_connections{%(postgresExporterSelector)s}) - - sum by (instance) (pg_settings_superuser_reserved_connections{%(postgresExporterSelector)s}) + sum by (%(agg)s) (pg_settings_superuser_reserved_connections{%(postgresExporterSelector)s}) ) * 0.8 - ||| % $._config, + ||| % $._config { agg: std.join(', ', $._config.groupLabels + $._config.instanceLabels) }, 'for': '10m', labels: { severity: 'warning', @@ -46,7 +46,7 @@ alert: 'PostgreSQLDown', annotations: { description: '{{ $labels.instance }} is rejecting query requests from the exporter, and thus probably not allowing DNS requests to work either. User services should not be effected provided at least 1 node is still alive.', - summary: 'PostgreSQL is not processing queries: {{ $labels.instance }}', + summary: 'PostgreSQL is not processing queries.', }, expr: 'pg_up{%(postgresExporterSelector)s} != 1' % $._config, 'for': '1m', @@ -58,15 +58,15 @@ alert: 'PostgreSQLSlowQueries', annotations: { description: 'PostgreSQL high number of slow queries {{ $labels.cluster }} for database {{ $labels.datname }} with a value of {{ $value }} ', - summary: 'PostgreSQL high number of slow on {{ $labels.cluster }} for database {{ $labels.datname }} ', + summary: 'PostgreSQL high number of slow queries.', }, expr: ||| - avg by (datname) ( + avg by (datname, %(agg)s) ( rate ( - pg_stat_activity_max_tx_duration{datname!~"template.*",%(postgresExporterSelector)s}[2m] + pg_stat_activity_max_tx_duration{%(dbNameFilter)s, %(postgresExporterSelector)s}[2m] ) ) > 2 * 60 - ||| % $._config, + ||| % $._config { agg: std.join(', ', $._config.groupLabels + $._config.instanceLabels) }, 'for': '2m', labels: { severity: 'warning', @@ -76,19 +76,19 @@ alert: 'PostgreSQLQPS', annotations: { description: 'PostgreSQL high number of queries per second on {{ $labels.cluster }} for database {{ $labels.datname }} with a value of {{ $value }}', - summary: 'PostgreSQL high number of queries per second {{ $labels.cluster }} for database {{ $labels.datname }}', + summary: 'PostgreSQL high number of queries per second.', }, expr: ||| - avg by (datname) ( + avg by (datname, %(agg)s) ( irate( - pg_stat_database_xact_commit{datname!~"template.*",%(postgresExporterSelector)s}[5m] + pg_stat_database_xact_commit{%(dbNameFilter)s, %(postgresExporterSelector)s}[5m] ) + irate( - pg_stat_database_xact_rollback{datname!~"template.*",%(postgresExporterSelector)s}[5m] + pg_stat_database_xact_rollback{%(dbNameFilter)s, %(postgresExporterSelector)s}[5m] ) ) > 10000 - ||| % $._config, + ||| % $._config { agg: std.join(', ', $._config.groupLabels + $._config.instanceLabels) }, 'for': '5m', labels: { severity: 'warning', @@ -98,28 +98,165 @@ alert: 'PostgreSQLCacheHitRatio', annotations: { description: 'PostgreSQL low on cache hit rate on {{ $labels.cluster }} for database {{ $labels.datname }} with a value of {{ $value }}', - summary: 'PostgreSQL low cache hit rate on {{ $labels.cluster }} for database {{ $labels.datname }}', + summary: 'PostgreSQL low cache hit rate.', }, expr: ||| - avg by (datname) ( - rate(pg_stat_database_blks_hit{datname!~"template.*",%(postgresExporterSelector)s}[5m]) + avg by (datname, %(agg)s) ( + rate(pg_stat_database_blks_hit{%(dbNameFilter)s, %(postgresExporterSelector)s}[5m]) / ( rate( - pg_stat_database_blks_hit{datname!~"template.*",%(postgresExporterSelector)s}[5m] + pg_stat_database_blks_hit{%(dbNameFilter)s, %(postgresExporterSelector)s}[5m] ) + rate( - pg_stat_database_blks_read{datname!~"template.*",%(postgresExporterSelector)s}[5m] + pg_stat_database_blks_read{%(dbNameFilter)s, %(postgresExporterSelector)s}[5m] ) ) ) < 0.98 + ||| % $._config { agg: std.join(', ', $._config.groupLabels + $._config.instanceLabels) }, + 'for': '5m', + labels: { + severity: 'warning', + }, + }, + { + alert: 'PostgresHasTooManyRollbacks', + annotations: { + description: 'PostgreSQL has too many rollbacks on {{ $labels.cluster }} for database {{ $labels.datname }} with a value of {{ $value }}', + summary: 'PostgreSQL has too many rollbacks.', + }, + expr: ||| + avg without(pod, instance) + (rate(pg_stat_database_xact_rollback{%(dbNameFilter)s}[5m]) / + (rate(pg_stat_database_xact_commit{%(dbNameFilter)s}[5m]) + rate(pg_stat_database_xact_rollback{%(dbNameFilter)s}[5m]))) > 0.10 + ||| % $._config, + 'for': '5m', + labels: { + severity: 'warning', + }, + }, + { + alert: 'PostgresHasHighDeadLocks', + annotations: { + description: 'PostgreSQL has too high deadlocks on {{ $labels.cluster }} for database {{ $labels.datname }} with a value of {{ $value }}', + summary: 'PostgreSQL has high number of deadlocks.', + }, + expr: ||| + max without(pod, instance) (rate(pg_stat_database_deadlocks{%(dbNameFilter)s}[5m]) * 60) > 5 ||| % $._config, 'for': '5m', labels: { severity: 'warning', }, }, + { + alert: 'PostgresAcquiredTooManyLocks', + annotations: { + description: 'PostgreSQL has acquired too many locks on {{ $labels.cluster }} for database {{ $labels.datname }} with a value of {{ $value }}', + summary: 'PostgreSQL has high number of acquired locks.', + }, + expr: ||| + max by(datname, %(agg)s) ( + (pg_locks_count{%(dbNameFilter)s}) + / + on(%(aggWithoutServer)s) group_left(server) ( + pg_settings_max_locks_per_transaction{} * pg_settings_max_connections{} + ) + ) > 0.20 + ||| % $._config { agg: std.join(',', $._config.groupLabels + $._config.instanceLabels), aggWithoutServer: std.join(',', std.filter(function(x) x != "server", $._config.groupLabels + $._config.instanceLabels)) }, + 'for': '5m', + labels: { + severity: 'warning', + }, + }, + { + alert: 'PostgresReplicationLaggingMore1Hour', + annotations: { + description: '{{ $labels.instance }} replication lag exceeds 1 hour. Check for network issues or load imbalances.', + summary: 'PostgreSQL replication lagging more than 1 hour.', + }, + expr: ||| + (pg_replication_lag{} > 3600) and on (%(agg)s) (pg_replication_is_replica{} == 1) + ||| % $._config { agg: std.join(', ', $._config.groupLabels + $._config.instanceLabels) }, + 'for': '5m', + labels: { + severity: 'warning', + }, + }, + { + alert: 'PostgresHasReplicationSlotUsed', + annotations: { + description: '{{ $labels.instance }} has replication slots that are not used, which might lead to replication lag or data inconsistency.', + summary: 'PostgreSQL has unused replication slots.', + }, + expr: 'pg_replication_slots_active{} == 0', + 'for': '30m', + labels: { + severity: 'critical', + }, + }, + { + alert: 'PostgresReplicationRoleChanged', + annotations: { + description: '{{ $labels.instance }} replication role has changed. Verify if this is expected or if it indicates a failover.', + summary: 'PostgreSQL replication role change detected.', + }, + expr: 'pg_replication_is_replica{} and changes(pg_replication_is_replica{}[1m]) > 0', + labels: { + severity: 'warning', + }, + }, + { + alert: 'PostgresHasExporterErrors', + annotations: { + description: '{{ $labels.instance }} exporter is experiencing errors. Verify exporter health and configuration.', + summary: 'PostgreSQL exporter errors detected.', + }, + expr: 'pg_exporter_last_scrape_error{} > 0', + 'for': '30m', + labels: { + severity: 'critical', + }, + }, + { + alert: 'PostgresTablesNotVaccumed', + annotations: { + description: '{{ $labels.instance }} tables have not been vacuumed recently within the last hour, which may lead to performance degradation.', + summary: 'PostgreSQL tables not vacuumed.', + }, + expr: ||| + group without(pod, instance)( + timestamp( + pg_stat_user_tables_n_dead_tup{} > + pg_stat_user_tables_n_live_tup{} + * on(%(agg)s) group_left pg_settings_autovacuum_vacuum_scale_factor{} + + on(%(agg)s) group_left pg_settings_autovacuum_vacuum_threshold{} + ) + < time() - 36000 + ) + ||| % $._config { agg: std.join(', ', $._config.groupLabels + $._config.instanceLabels) }, + 'for': '30m', + labels: { + severity: 'critical', + }, + }, + { + alert: 'PostgresTooManyCheckpointsRequested', + annotations: { + description: '{{ $labels.instance }} is requesting too many checkpoints, which may lead to performance degradation.', + summary: 'PostgreSQL too many checkpoints requested.', + }, + expr: ||| + rate(pg_stat_bgwriter_checkpoints_timed_total{}[5m]) / + (rate(pg_stat_bgwriter_checkpoints_timed_total{}[5m]) + rate(pg_stat_bgwriter_checkpoints_req_total{}[5m])) + < 0.5 + |||, + 'for': '5m', + labels: { + severity: 'warning', + }, + }, ], }, ], diff --git a/postgres_mixin/config.libsonnet b/postgres_mixin/config.libsonnet index d7bd7ac1b..4ea3b7e52 100644 --- a/postgres_mixin/config.libsonnet +++ b/postgres_mixin/config.libsonnet @@ -1,5 +1,9 @@ { _config+:: { + dbNameFilter: 'datname!~"template.*"', postgresExporterSelector: '', + groupLabels: if self.enableMultiCluster then ['job', 'cluster'] else ['job'], + instanceLabels: ['instance', 'server'], + enableMultiCluster: false, }, } diff --git a/postgres_mixin/dashboards/postgres-overview.json b/postgres_mixin/dashboards/postgres-overview.json index 9bf41be6a..8baf6fbb3 100644 --- a/postgres_mixin/dashboards/postgres-overview.json +++ b/postgres_mixin/dashboards/postgres-overview.json @@ -3,81 +3,119 @@ "list": [ { "builtIn": 1, - "datasource": "-- Grafana --", + "datasource": { + "type": "datasource", + "uid": "grafana" + }, "enable": true, "hide": true, "iconColor": "rgba(0, 211, 255, 1)", "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, "type": "dashboard" } ] }, "description": "Performance metrics for Postgres", "editable": true, + "fiscalYearStartMonth": 0, "gnetId": 455, "graphTooltip": 0, - "id": 1, - "iteration": 1603191461722, + "id": 38, "links": [], + "liveNow": false, "panels": [ { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "editable": true, - "error": false, + "collapse": false, + "collapsed": false, + "gridPos": { + "h": 2, + "w": 8, + "x": 0, + "y": 0 + }, + "panels": [ ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "span": 4, + "title": "PostgreSQL overview", + "titleSize": "h6", + "type": "row" + }, + { + + "datasource": { + "uid": "$datasource" + }, "fieldConfig": { "defaults": { - "custom": {} + "color": { + "fixedColor": "rgb(31, 120, 193)", + "mode": "fixed" + }, + "decimals": 0, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "none" }, "overrides": [] }, - "fill": 1, - "fillGradient": 0, - "grid": {}, "gridPos": { "h": 7, - "w": 20, + "w": 4, "x": 0, "y": 0 }, - "hiddenSeries": false, - "id": 1, - "isNew": true, - "legend": { - "alignAsTable": true, - "avg": true, - "current": false, - "max": true, - "min": true, - "rightSide": true, - "show": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, + "id": 11, "links": [], - "nullPointMode": "connected", + "maxDataPoints": 100, "options": { - "alertThreshold": true + "colorMode": "none", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false + }, + "textMode": "auto" }, - "percentage": false, - "pluginVersion": "7.2.1", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, + "pluginVersion": "9.4.3", "targets": [ { - "alias": "fetched", + "datasource": { + "uid": "$datasource" + }, "dsType": "prometheus", - "expr": "sum(irate(pg_stat_database_tup_fetched{datname=~\"$db\",job=~\"$job\",instance=~\"$instance\"}[$__rate_interval]))", + "expr": "sum(irate(pg_stat_database_xact_commit{datname=~\"$db\",job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])) + sum(irate(pg_stat_database_xact_rollback{datname=~\"$db\",job=~\"$job\",instance=~\"$instance\"}[$__rate_interval]))", "format": "time_series", "groupBy": [ { @@ -94,7 +132,6 @@ } ], "intervalFactor": 2, - "legendFormat": "fetched", "measurement": "postgresql", "policy": "default", "refId": "A", @@ -103,7 +140,7 @@ [ { "params": [ - "tup_fetched" + "xact_commit" ], "type": "field" }, @@ -119,7 +156,7 @@ } ] ], - "step": 120, + "step": 1800, "tags": [ { "key": "instance", @@ -127,11 +164,101 @@ "value": "/^$instance$/" } ] + } + ], + "title": "QPS", + "type": "stat" + }, + { + "datasource": { + "uid": "$datasource" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 20, + "x": 4, + "y": 0 + }, + "id": 1, + "links": [], + "options": { + "legend": { + "calcs": [ + "mean", + "max", + "min" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.4.3", + "targets": [ { "alias": "fetched", + "datasource": { + "uid": "$datasource" + }, "dsType": "prometheus", - "expr": "sum(irate(pg_stat_database_tup_returned{datname=~\"$db\",job=~\"$job\",instance=~\"$instance\"}[$__rate_interval]))", + "expr": "sum(irate(pg_stat_database_tup_fetched{datname=~\"$db\",job=~\"$job\",instance=~\"$instance\"}[$__rate_interval]))", "format": "time_series", "groupBy": [ { @@ -148,10 +275,10 @@ } ], "intervalFactor": 2, - "legendFormat": "returned", + "legendFormat": "fetched", "measurement": "postgresql", "policy": "default", - "refId": "B", + "refId": "A", "resultFormat": "time_series", "select": [ [ @@ -184,8 +311,11 @@ }, { "alias": "fetched", + "datasource": { + "uid": "$datasource" + }, "dsType": "prometheus", - "expr": "sum(irate(pg_stat_database_tup_inserted{datname=~\"$db\",job=~\"$job\",instance=~\"$instance\"}[$__rate_interval]))", + "expr": "sum(irate(pg_stat_database_tup_returned{datname=~\"$db\",job=~\"$job\",instance=~\"$instance\"}[$__rate_interval]))", "format": "time_series", "groupBy": [ { @@ -202,10 +332,10 @@ } ], "intervalFactor": 2, - "legendFormat": "inserted", + "legendFormat": "returned", "measurement": "postgresql", "policy": "default", - "refId": "C", + "refId": "B", "resultFormat": "time_series", "select": [ [ @@ -238,8 +368,11 @@ }, { "alias": "fetched", + "datasource": { + "uid": "$datasource" + }, "dsType": "prometheus", - "expr": "sum(irate(pg_stat_database_tup_updated{datname=~\"$db\",job=~\"$job\",instance=~\"$instance\"}[$__rate_interval]))", + "expr": "sum(irate(pg_stat_database_tup_inserted{datname=~\"$db\",job=~\"$job\",instance=~\"$instance\"}[$__rate_interval]))", "format": "time_series", "groupBy": [ { @@ -256,10 +389,10 @@ } ], "intervalFactor": 2, - "legendFormat": "updated", + "legendFormat": "inserted", "measurement": "postgresql", "policy": "default", - "refId": "D", + "refId": "C", "resultFormat": "time_series", "select": [ [ @@ -292,8 +425,11 @@ }, { "alias": "fetched", + "datasource": { + "uid": "$datasource" + }, "dsType": "prometheus", - "expr": "sum(irate(pg_stat_database_tup_deleted{datname=~\"$db\",job=~\"$job\",instance=~\"$instance\"}[$__rate_interval]))", + "expr": "sum(irate(pg_stat_database_tup_updated{datname=~\"$db\",job=~\"$job\",instance=~\"$instance\"}[$__rate_interval]))", "format": "time_series", "groupBy": [ { @@ -310,10 +446,10 @@ } ], "intervalFactor": 2, - "legendFormat": "deleted", + "legendFormat": "updated", "measurement": "postgresql", "policy": "default", - "refId": "E", + "refId": "D", "resultFormat": "time_series", "select": [ [ @@ -343,124 +479,14 @@ "value": "/^$instance$/" } ] - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Rows", - "tooltip": { - "msResolution": true, - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true }, { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "datasource": "$datasource", - "decimals": 0, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {} - }, - "overrides": [] - }, - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 3, - "w": 4, - "x": 20, - "y": 0 - }, - "height": "55px", - "id": 11, - "interval": null, - "isNew": true, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": true, - "lineColor": "rgb(31, 120, 193)", - "show": true - }, - "tableColumn": "", - "targets": [ - { + "alias": "fetched", + "datasource": { + "uid": "$datasource" + }, "dsType": "prometheus", - "expr": "sum(irate(pg_stat_database_xact_commit{datname=~\"$db\",job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])) + sum(irate(pg_stat_database_xact_rollback{datname=~\"$db\",job=~\"$job\",instance=~\"$instance\"}[$__rate_interval]))", + "expr": "sum(irate(pg_stat_database_tup_deleted{datname=~\"$db\",job=~\"$job\",instance=~\"$instance\"}[$__rate_interval]))", "format": "time_series", "groupBy": [ { @@ -477,15 +503,16 @@ } ], "intervalFactor": 2, + "legendFormat": "deleted", "measurement": "postgresql", "policy": "default", - "refId": "A", + "refId": "E", "resultFormat": "time_series", "select": [ [ { "params": [ - "xact_commit" + "tup_fetched" ], "type": "field" }, @@ -501,7 +528,7 @@ } ] ], - "step": 1800, + "step": 120, "tags": [ { "key": "instance", @@ -511,80 +538,100 @@ ] } ], - "thresholds": "", - "title": "QPS", - "transparent": true, - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "avg" + "title": "Rows", + "type": "timeseries" }, { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "decimals": 1, - "editable": true, - "error": false, + "datasource": { + "uid": "$datasource" + }, "fieldConfig": { "defaults": { - "custom": {} + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" }, "overrides": [] }, - "fill": 1, - "fillGradient": 0, - "grid": {}, "gridPos": { "h": 7, "w": 12, "x": 0, "y": 7 }, - "hiddenSeries": false, "id": 2, - "isNew": true, - "legend": { - "alignAsTable": true, - "avg": true, - "current": false, - "hideZero": true, - "max": true, - "min": true, - "rightSide": false, - "show": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, "links": [], - "nullPointMode": "connected", "options": { - "alertThreshold": true + "legend": { + "calcs": [ + "mean", + "max", + "min" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } }, - "percentage": false, - "pluginVersion": "7.2.1", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, + "pluginVersion": "9.4.3", "targets": [ { "alias": "Buffers Allocated", + "datasource": { + "uid": "$datasource" + }, "dsType": "prometheus", - "expr": "irate(pg_stat_bgwriter_buffers_alloc{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])", + "expr": "irate(pg_stat_bgwriter_buffers_alloc_total{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])", "format": "time_series", "groupBy": [ { @@ -635,8 +682,11 @@ }, { "alias": "Buffers Allocated", + "datasource": { + "uid": "$datasource" + }, "dsType": "prometheus", - "expr": "irate(pg_stat_bgwriter_buffers_backend_fsync{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])", + "expr": "irate(pg_stat_bgwriter_buffers_backend_fsync_total{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])", "format": "time_series", "groupBy": [ { @@ -687,8 +737,11 @@ }, { "alias": "Buffers Allocated", + "datasource": { + "uid": "$datasource" + }, "dsType": "prometheus", - "expr": "irate(pg_stat_bgwriter_buffers_backend{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])", + "expr": "irate(pg_stat_bgwriter_buffers_backend_total{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])", "format": "time_series", "groupBy": [ { @@ -739,8 +792,11 @@ }, { "alias": "Buffers Allocated", + "datasource": { + "uid": "$datasource" + }, "dsType": "prometheus", - "expr": "irate(pg_stat_bgwriter_buffers_clean{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])", + "expr": "irate(pg_stat_bgwriter_buffers_clean_total{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])", "format": "time_series", "groupBy": [ { @@ -791,8 +847,11 @@ }, { "alias": "Buffers Allocated", + "datasource": { + "uid": "$datasource" + }, "dsType": "prometheus", - "expr": "irate(pg_stat_bgwriter_buffers_checkpoint{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])", + "expr": "irate(pg_stat_bgwriter_buffers_checkpoint_total{job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])", "format": "time_series", "groupBy": [ { @@ -842,104 +901,113 @@ ] } ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, "title": "Buffers", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } + "type": "timeseries" }, { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "editable": true, - "error": false, + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, "fieldConfig": { "defaults": { - "custom": {} + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" }, - "overrides": [] + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "deadlocks" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "light-red", + "mode": "fixed" + } + } + ] + } + ] }, - "fill": 1, - "fillGradient": 0, - "grid": {}, "gridPos": { "h": 7, "w": 12, "x": 12, "y": 7 }, - "hiddenSeries": false, "id": 3, - "isNew": true, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 2, "links": [], - "nullPointMode": "connected", "options": { - "alertThreshold": true + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } }, - "percentage": false, - "pluginVersion": "7.2.1", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, + "pluginVersion": "9.4.3", "targets": [ { "alias": "conflicts", + "datasource": { + "uid": "$datasource" + }, "dsType": "prometheus", - "expr": "sum(rate(pg_stat_database_deadlocks{datname=~\"$db\",job=~\"$job\",instance=~\"$instance\"}[$__rate_interval]))", + "expr": "sum(pg_stat_database_deadlocks{datname=~\"$db\",job=~\"$job\",instance=~\"$instance\"})", "format": "time_series", "groupBy": [ { @@ -959,6 +1027,7 @@ "legendFormat": "deadlocks", "measurement": "postgresql", "policy": "default", + "range": true, "refId": "A", "resultFormat": "time_series", "select": [ @@ -990,8 +1059,12 @@ }, { "alias": "deadlocks", + "datasource": { + "uid": "$datasource" + }, "dsType": "prometheus", - "expr": "sum(rate(pg_stat_database_conflicts{datname=~\"$db\",job=~\"$job\",instance=~\"$instance\"}[$__rate_interval]))", + "editorMode": "code", + "expr": "sum(pg_stat_database_conflicts{datname=~\"$db\",job=~\"$job\",instance=~\"$instance\"})", "format": "time_series", "groupBy": [ { @@ -1011,6 +1084,7 @@ "legendFormat": "conflicts", "measurement": "postgresql", "policy": "default", + "range": true, "refId": "B", "resultFormat": "time_series", "select": [ @@ -1041,204 +1115,187 @@ ] } ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, "title": "Conflicts/Deadlocks", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } + "type": "timeseries" }, { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "editable": true, - "error": false, + "datasource": { + "uid": "$datasource" + }, "fieldConfig": { "defaults": { - "custom": {} + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percent" }, "overrides": [] }, - "fill": 1, - "fillGradient": 0, - "grid": {}, "gridPos": { "h": 7, "w": 12, "x": 0, "y": 14 }, - "hiddenSeries": false, "id": 12, - "isNew": true, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 2, "links": [], - "nullPointMode": "connected", "options": { - "alertThreshold": true + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } }, - "percentage": true, - "pluginVersion": "7.2.1", - "pointradius": 1, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, + "pluginVersion": "9.4.3", "targets": [ { - "expr": "sum by (datname) (rate(pg_stat_database_blks_hit{datname=~\"$db\",job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])) / (sum by (datname)(rate(pg_stat_database_blks_hit{datname=~\"$db\",job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])) + sum by (datname)(rate(pg_stat_database_blks_read{datname=~\"$db\",job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])))", + "datasource": { + "uid": "$datasource" + }, + "expr": "round(sum by (datname) (rate(pg_stat_database_blks_hit{datname=~\"$db\",job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])) / (sum by (datname)(rate(pg_stat_database_blks_hit{datname=~\"$db\",job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])) + sum by (datname)(rate(pg_stat_database_blks_read{datname=~\"$db\",job=~\"$job\",instance=~\"$instance\"}[$__rate_interval])))*100,0.001)", "format": "time_series", - "intervalFactor": 2, "legendFormat": "{{datname}} - cache hit rate", "refId": "A", "step": 240 } ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, "title": "Cache hit ratio", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "percentunit", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } + "type": "timeseries" }, { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "editable": true, - "error": false, + "datasource": { + "uid": "$datasource" + }, "fieldConfig": { "defaults": { - "custom": {} + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" }, "overrides": [] }, - "fill": 1, - "fillGradient": 0, - "grid": {}, "gridPos": { "h": 7, "w": 12, "x": 12, "y": 14 }, - "hiddenSeries": false, "id": 13, - "isNew": true, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 2, "links": [], - "nullPointMode": "connected", "options": { - "alertThreshold": true + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } }, - "percentage": false, - "pluginVersion": "7.2.1", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, + "pluginVersion": "9.4.3", "targets": [ { + "datasource": { + "uid": "$datasource" + }, "expr": "pg_stat_database_numbackends{datname=~\"$db\",job=~\"$job\",instance=~\"$instance\"}", "format": "time_series", "intervalFactor": 2, @@ -1247,51 +1304,13 @@ "step": 240 } ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, "title": "Number of active connections", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } + "type": "timeseries" } ], "refresh": false, - "schemaVersion": 26, + "revision": 1, + "schemaVersion": 38, "style": "dark", "tags": [ "postgres" @@ -1310,66 +1329,96 @@ "regex": "", "skipUrlSync": false, "type": "datasource" - }, + }, { "allValue": ".+", - "datasource": "$datasource", + "current": { + "selected": true, + "text": [ + "All" + ], + "value": [ + "$__all" + ] + }, + "datasource": { + "uid": "$datasource" + }, "definition": "label_values(pg_up, job)", "hide": 0, "includeAll": true, - "label": "job", + "label": "Job", "multi": true, "name": "job", "options": [], - "query": "label_values(pg_up, job)", - "refresh": 0, + "query": { + "query": "label_values(pg_up, job)", + "refId": "StandardVariableQuery" + }, + "refresh": 2, "regex": "", "skipUrlSync": false, "sort": 0, "tagValuesQuery": "", - "tags": [], "tagsQuery": "", "type": "query", "useTags": false }, { "allValue": ".+", - "datasource": "$datasource", - "definition": "", + "current": { + "selected": true, + "text": [ + "All" + ], + "value": [ + "$__all" + ] + }, + "datasource": { + "uid": "$datasource" + }, + "definition": "label_values(pg_up{job=~\"$job\"},instance)", "hide": 0, "includeAll": true, - "label": "instance", + "label": "Instance", "multi": true, "name": "instance", "options": [], - "query": "label_values(up{job=~\"$job\"},instance)", - "refresh": 1, + "query": { + "query": "label_values(pg_up{job=~\"$job\"},instance)", + "refId": "StandardVariableQuery" + }, + "refresh": 2, "regex": "", "skipUrlSync": false, "sort": 0, "tagValuesQuery": "", - "tags": [], "tagsQuery": "", "type": "query", "useTags": false }, { "allValue": ".+", - "datasource": "$datasource", - "definition": "label_values(pg_stat_database_tup_fetched{instance=~\"$instance\",datname!~\"template.*|postgres\"},datname)", + "datasource": { + "uid": "$datasource" + }, + "definition": "label_values(pg_stat_database_tup_fetched{job=~\"$job\",instance=~\"$instance\",datname!~\"template.*|postgres\"},datname)", "hide": 0, "includeAll": true, - "label": "db", + "label": "Database", "multi": false, "name": "db", "options": [], - "query": "label_values(pg_stat_database_tup_fetched{instance=~\"$instance\",datname!~\"template.*|postgres\"},datname)", - "refresh": 1, + "query": { + "query": "label_values(pg_stat_database_tup_fetched{job=~\"$job\",instance=~\"$instance\",datname!~\"template.*|postgres\"},datname)", + "refId": "StandardVariableQuery" + }, + "refresh": 2, "regex": "", "skipUrlSync": false, "sort": 0, "tagValuesQuery": "", - "tags": [], "tagsQuery": "", "type": "query", "useTags": false @@ -1408,5 +1457,6 @@ "timezone": "browser", "title": "Postgres Overview", "uid": "wGgaPlciz", - "version": 5 -} + "version": 39, + "weekStart": "" +} \ No newline at end of file diff --git a/queries.yaml b/queries.yaml index 189ce0866..9e9ee167c 100644 --- a/queries.yaml +++ b/queries.yaml @@ -1,2 +1,2 @@ # Adding queries to this file is deprecated -# Example queries have been transformed into collectors. \ No newline at end of file +# Example queries have been transformed into collectors.