From 5e5a97dfd280b31c1cc2e6f04f5efd6b50a895b7 Mon Sep 17 00:00:00 2001 From: Lukas Fittl Date: Sat, 28 Feb 2026 15:33:56 -0800 Subject: [PATCH] pg_buffercache: Add pg_buffercache_relation_stats() function This function returns an aggregation of buffer contents, grouped on a per-relfilenode basis. This is often useful to understand which tables or indexes are currently in cache, and can show cache disruptions due to query activity when sampled over time. The existing pg_buffercache() function can be utilized for this by grouping the result, but due to the amount of buffer entries (one per page) this can be prohibitively expensive on large machines. Even on a small shared buffers (128MB) the new function is 10x faster. Similar to the existing summary functions this new function does not hold a lock whilst gathering its statistics. Author: Lukas Fittl Reviewed by: Discussion: --- contrib/pg_buffercache/Makefile | 3 +- .../expected/pg_buffercache.out | 14 ++ contrib/pg_buffercache/meson.build | 1 + .../pg_buffercache--1.7--1.8.sql | 20 +++ contrib/pg_buffercache/pg_buffercache.control | 2 +- contrib/pg_buffercache/pg_buffercache_pages.c | 134 ++++++++++++++++++ contrib/pg_buffercache/sql/pg_buffercache.sql | 4 + doc/src/sgml/pgbuffercache.sgml | 130 +++++++++++++++++ src/tools/pgindent/typedefs.list | 2 + 9 files changed, 308 insertions(+), 2 deletions(-) create mode 100644 contrib/pg_buffercache/pg_buffercache--1.7--1.8.sql diff --git a/contrib/pg_buffercache/Makefile b/contrib/pg_buffercache/Makefile index 0e618f66aec6e..7fd5cdfc43d66 100644 --- a/contrib/pg_buffercache/Makefile +++ b/contrib/pg_buffercache/Makefile @@ -9,7 +9,8 @@ EXTENSION = pg_buffercache DATA = pg_buffercache--1.2.sql pg_buffercache--1.2--1.3.sql \ pg_buffercache--1.1--1.2.sql pg_buffercache--1.0--1.1.sql \ pg_buffercache--1.3--1.4.sql pg_buffercache--1.4--1.5.sql \ - pg_buffercache--1.5--1.6.sql pg_buffercache--1.6--1.7.sql + pg_buffercache--1.5--1.6.sql pg_buffercache--1.6--1.7.sql \ + pg_buffercache--1.7--1.8.sql PGFILEDESC = "pg_buffercache - monitoring of shared buffer cache in real-time" REGRESS = pg_buffercache pg_buffercache_numa diff --git a/contrib/pg_buffercache/expected/pg_buffercache.out b/contrib/pg_buffercache/expected/pg_buffercache.out index 886dea770f626..cb5507a0d9263 100644 --- a/contrib/pg_buffercache/expected/pg_buffercache.out +++ b/contrib/pg_buffercache/expected/pg_buffercache.out @@ -33,6 +33,12 @@ SELECT count(*) > 0 FROM pg_buffercache_usage_counts() WHERE buffers >= 0; t (1 row) +SELECT count(*) > 0 FROM pg_buffercache_relation_stats() WHERE buffers >= 0; + ?column? +---------- + t +(1 row) + -- Check that the functions / views can't be accessed by default. To avoid -- having to create a dedicated user, use the pg_database_owner pseudo-role. SET ROLE pg_database_owner; @@ -46,6 +52,8 @@ SELECT * FROM pg_buffercache_summary(); ERROR: permission denied for function pg_buffercache_summary SELECT * FROM pg_buffercache_usage_counts(); ERROR: permission denied for function pg_buffercache_usage_counts +SELECT * FROM pg_buffercache_relation_stats(); +ERROR: permission denied for function pg_buffercache_relation_stats RESET role; -- Check that pg_monitor is allowed to query view / function SET ROLE pg_monitor; @@ -73,6 +81,12 @@ SELECT count(*) > 0 FROM pg_buffercache_usage_counts(); t (1 row) +SELECT count(*) > 0 FROM pg_buffercache_relation_stats(); + ?column? +---------- + t +(1 row) + RESET role; ------ ---- Test pg_buffercache_evict* and pg_buffercache_mark_dirty* functions diff --git a/contrib/pg_buffercache/meson.build b/contrib/pg_buffercache/meson.build index e681205abb2d8..361628b8bea42 100644 --- a/contrib/pg_buffercache/meson.build +++ b/contrib/pg_buffercache/meson.build @@ -25,6 +25,7 @@ install_data( 'pg_buffercache--1.4--1.5.sql', 'pg_buffercache--1.5--1.6.sql', 'pg_buffercache--1.6--1.7.sql', + 'pg_buffercache--1.7--1.8.sql', 'pg_buffercache.control', kwargs: contrib_data_args, ) diff --git a/contrib/pg_buffercache/pg_buffercache--1.7--1.8.sql b/contrib/pg_buffercache/pg_buffercache--1.7--1.8.sql new file mode 100644 index 0000000000000..9619d1c3e85a6 --- /dev/null +++ b/contrib/pg_buffercache/pg_buffercache--1.7--1.8.sql @@ -0,0 +1,20 @@ +/* contrib/pg_buffercache/pg_buffercache--1.7--1.8.sql */ + +-- complain if script is sourced in psql, rather than via ALTER EXTENSION +\echo Use "ALTER EXTENSION pg_buffercache UPDATE TO '1.8'" to load this file. \quit + +CREATE FUNCTION pg_buffercache_relation_stats( + OUT relfilenode oid, + OUT reltablespace oid, + OUT reldatabase oid, + OUT relforknumber int2, + OUT buffers int4, + OUT buffers_dirty int4, + OUT buffers_pinned int4, + OUT usagecount_avg float8) +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'pg_buffercache_relation_stats' +LANGUAGE C PARALLEL SAFE; + +REVOKE ALL ON FUNCTION pg_buffercache_relation_stats() FROM PUBLIC; +GRANT EXECUTE ON FUNCTION pg_buffercache_relation_stats() TO pg_monitor; diff --git a/contrib/pg_buffercache/pg_buffercache.control b/contrib/pg_buffercache/pg_buffercache.control index 11499550945ee..d2fa8ba53ba9f 100644 --- a/contrib/pg_buffercache/pg_buffercache.control +++ b/contrib/pg_buffercache/pg_buffercache.control @@ -1,5 +1,5 @@ # pg_buffercache extension comment = 'examine the shared buffer cache' -default_version = '1.7' +default_version = '1.8' module_pathname = '$libdir/pg_buffercache' relocatable = true diff --git a/contrib/pg_buffercache/pg_buffercache_pages.c b/contrib/pg_buffercache/pg_buffercache_pages.c index 89b8685524318..b16a421cb779c 100644 --- a/contrib/pg_buffercache/pg_buffercache_pages.c +++ b/contrib/pg_buffercache/pg_buffercache_pages.c @@ -15,6 +15,7 @@ #include "port/pg_numa.h" #include "storage/buf_internals.h" #include "storage/bufmgr.h" +#include "utils/hsearch.h" #include "utils/rel.h" @@ -22,6 +23,7 @@ #define NUM_BUFFERCACHE_PAGES_ELEM 9 #define NUM_BUFFERCACHE_SUMMARY_ELEM 5 #define NUM_BUFFERCACHE_USAGE_COUNTS_ELEM 4 +#define NUM_BUFFERCACHE_RELATION_STATS_ELEM 8 #define NUM_BUFFERCACHE_EVICT_ELEM 2 #define NUM_BUFFERCACHE_EVICT_RELATION_ELEM 3 #define NUM_BUFFERCACHE_EVICT_ALL_ELEM 3 @@ -107,8 +109,33 @@ PG_FUNCTION_INFO_V1(pg_buffercache_evict_all); PG_FUNCTION_INFO_V1(pg_buffercache_mark_dirty); PG_FUNCTION_INFO_V1(pg_buffercache_mark_dirty_relation); PG_FUNCTION_INFO_V1(pg_buffercache_mark_dirty_all); +PG_FUNCTION_INFO_V1(pg_buffercache_relation_stats); +/* + * Hash key for pg_buffercache_relation_stats — groups by relation identity. + */ +typedef struct +{ + RelFileNumber relfilenumber; + Oid reltablespace; + Oid reldatabase; + ForkNumber forknum; +} BufferRelStatsKey; + +/* + * Hash entry for pg_buffercache_relation_stats — accumulates per-relation + * buffer statistics. + */ +typedef struct +{ + BufferRelStatsKey key; /* must be first */ + int32 buffers; + int32 buffers_dirty; + int32 buffers_pinned; + int64 usagecount_total; +} BufferRelStatsEntry; + /* Only need to touch memory once per backend process lifetime */ static bool firstNumaTouch = true; @@ -958,3 +985,110 @@ pg_buffercache_mark_dirty_all(PG_FUNCTION_ARGS) PG_RETURN_DATUM(result); } + +/* + * pg_buffercache_relation_stats + * + * Produces a set of rows that summarize buffer cache usage per relation-fork + * combination. This enables monitoring scripts to only get the summary stats, + * instead of accumulating in a query with the full buffer information. + */ +Datum +pg_buffercache_relation_stats(PG_FUNCTION_ARGS) +{ + ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; + HTAB *relstats_hash; + HASHCTL hash_ctl; + HASH_SEQ_STATUS hash_seq; + BufferRelStatsEntry *entry; + Datum values[NUM_BUFFERCACHE_RELATION_STATS_ELEM]; + bool nulls[NUM_BUFFERCACHE_RELATION_STATS_ELEM] = {0}; + + InitMaterializedSRF(fcinfo, 0); + + /* Create a hash table to aggregate stats by relation-fork */ + hash_ctl.keysize = sizeof(BufferRelStatsKey); + hash_ctl.entrysize = sizeof(BufferRelStatsEntry); + hash_ctl.hcxt = CurrentMemoryContext; + + relstats_hash = hash_create("pg_buffercache relation stats", + 128, + &hash_ctl, + HASH_ELEM | HASH_BLOBS | HASH_CONTEXT); + + /* Single pass over all buffers */ + for (int i = 0; i < NBuffers; i++) + { + BufferDesc *bufHdr; + uint64 buf_state; + BufferRelStatsKey key; + bool found; + + CHECK_FOR_INTERRUPTS(); + + /* + * Read buffer state without locking, same as pg_buffercache_summary + * and pg_buffercache_usage_counts. Locking wouldn't provide a + * meaningfully more consistent result since buffers can change state + * immediately after we release the lock. + */ + bufHdr = GetBufferDescriptor(i); + buf_state = pg_atomic_read_u64(&bufHdr->state); + + /* Skip unused/invalid buffers */ + if (!(buf_state & BM_VALID)) + continue; + + key.relfilenumber = BufTagGetRelNumber(&bufHdr->tag); + key.reltablespace = bufHdr->tag.spcOid; + key.reldatabase = bufHdr->tag.dbOid; + key.forknum = BufTagGetForkNum(&bufHdr->tag); + + entry = (BufferRelStatsEntry *) hash_search(relstats_hash, + &key, + HASH_ENTER, + &found); + + if (!found) + { + entry->buffers = 0; + entry->buffers_dirty = 0; + entry->buffers_pinned = 0; + entry->usagecount_total = 0; + } + + entry->buffers++; + entry->usagecount_total += BUF_STATE_GET_USAGECOUNT(buf_state); + + if (buf_state & BM_DIRTY) + entry->buffers_dirty++; + + if (BUF_STATE_GET_REFCOUNT(buf_state) > 0) + entry->buffers_pinned++; + } + + /* Emit one row per hash entry */ + hash_seq_init(&hash_seq, relstats_hash); + while ((entry = (BufferRelStatsEntry *) hash_seq_search(&hash_seq)) != NULL) + { + if (entry->buffers == 0) + continue; + + values[0] = ObjectIdGetDatum(entry->key.relfilenumber); + values[1] = ObjectIdGetDatum(entry->key.reltablespace); + values[2] = ObjectIdGetDatum(entry->key.reldatabase); + values[3] = Int16GetDatum(entry->key.forknum); + values[4] = Int32GetDatum(entry->buffers); + values[5] = Int32GetDatum(entry->buffers_dirty); + values[6] = Int32GetDatum(entry->buffers_pinned); + values[7] = Float8GetDatum((double) entry->usagecount_total / + entry->buffers); + + tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, + values, nulls); + } + + hash_destroy(relstats_hash); + + return (Datum) 0; +} diff --git a/contrib/pg_buffercache/sql/pg_buffercache.sql b/contrib/pg_buffercache/sql/pg_buffercache.sql index 127d604905ca0..ea5950855d2a7 100644 --- a/contrib/pg_buffercache/sql/pg_buffercache.sql +++ b/contrib/pg_buffercache/sql/pg_buffercache.sql @@ -18,6 +18,8 @@ from pg_buffercache_summary(); SELECT count(*) > 0 FROM pg_buffercache_usage_counts() WHERE buffers >= 0; +SELECT count(*) > 0 FROM pg_buffercache_relation_stats() WHERE buffers >= 0; + -- Check that the functions / views can't be accessed by default. To avoid -- having to create a dedicated user, use the pg_database_owner pseudo-role. SET ROLE pg_database_owner; @@ -26,6 +28,7 @@ SELECT * FROM pg_buffercache_os_pages; SELECT * FROM pg_buffercache_pages() AS p (wrong int); SELECT * FROM pg_buffercache_summary(); SELECT * FROM pg_buffercache_usage_counts(); +SELECT * FROM pg_buffercache_relation_stats(); RESET role; -- Check that pg_monitor is allowed to query view / function @@ -34,6 +37,7 @@ SELECT count(*) > 0 FROM pg_buffercache; SELECT count(*) > 0 FROM pg_buffercache_os_pages; SELECT buffers_used + buffers_unused > 0 FROM pg_buffercache_summary(); SELECT count(*) > 0 FROM pg_buffercache_usage_counts(); +SELECT count(*) > 0 FROM pg_buffercache_relation_stats(); RESET role; diff --git a/doc/src/sgml/pgbuffercache.sgml b/doc/src/sgml/pgbuffercache.sgml index 1e9aee10275f2..921ba9b5306b9 100644 --- a/doc/src/sgml/pgbuffercache.sgml +++ b/doc/src/sgml/pgbuffercache.sgml @@ -31,6 +31,10 @@ pg_buffercache_usage_counts + + pg_buffercache_relation_stats + + pg_buffercache_evict @@ -63,6 +67,7 @@ pg_buffercache_numa views), the pg_buffercache_summary() function, the pg_buffercache_usage_counts() function, the + pg_buffercache_relation_stats() function, the pg_buffercache_evict() function, the pg_buffercache_evict_relation() function, the pg_buffercache_evict_all() function, the @@ -102,6 +107,12 @@ count. + + The pg_buffercache_relation_stats() function returns a + set of rows summarizing buffer cache usage aggregated by relation and fork + number. + + By default, use of the above functions is restricted to superusers and roles with privileges of the pg_monitor role. Access may be @@ -564,6 +575,125 @@ + + The <function>pg_buffercache_relation_stats()</function> Function + + + The definitions of the columns exposed by the function are shown in + . + + + + <function>pg_buffercache_relation_stats()</function> Output Columns + + + + + Column Type + + + Description + + + + + + + + relfilenode oid + (references pg_class.relfilenode) + + + Filenode number of the relation + + + + + + reltablespace oid + (references pg_tablespace.oid) + + + Tablespace OID of the relation + + + + + + reldatabase oid + (references pg_database.oid) + + + Database OID of the relation + + + + + + relforknumber smallint + + + Fork number within the relation; see + common/relpath.h + + + + + + buffers int4 + + + Number of buffers for the relation + + + + + + buffers_dirty int4 + + + Number of dirty buffers for the relation + + + + + + buffers_pinned int4 + + + Number of pinned buffers for the relation + + + + + + usagecount_avg float8 + + + Average usage count of the relation's buffers + + + + +
+ + + The pg_buffercache_relation_stats() function returns a + set of rows summarizing the state of all shared buffers, aggregated by + relation and fork number. Similar and more detailed information is + provided by the pg_buffercache view, but + pg_buffercache_relation_stats() is significantly + cheaper. + + + + Like the pg_buffercache view, + pg_buffercache_relation_stats() does not acquire buffer + manager locks. Therefore concurrent activity can lead to minor inaccuracies + in the result. + +
+ The <function>pg_buffercache_evict()</function> Function diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index 77e3c04144e8a..19a87f702ee24 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -357,6 +357,8 @@ BufferHeapTupleTableSlot BufferLockMode BufferLookupEnt BufferManagerRelation +BufferRelStatsEntry +BufferRelStatsKey BufferStrategyControl BufferTag BufferUsage