From 1611911dc9aef2f87db1bcb68442c2dd483ae0ff Mon Sep 17 00:00:00 2001 From: Lukas Fittl Date: Sat, 28 Feb 2026 15:33:56 -0800 Subject: [PATCH] pg_buffercache: Add pg_buffercache_relations() function This function returns an aggregation of buffer contents, grouped on a per-relfilenode basis. This is often useful to understand which tables or indexes are currently in cache, and can show cache disruptions due to query activity when sampled over time. The existing pg_buffercache() function can be utilized for this by grouping the result, but passing a large amount of buffer entries (one per page) back to the tuplestore and then aggregating it can be prohibitively expensive with large buffer counts. Even on a small shared buffers (128MB) the new function is 10x faster. Similar to the existing summary functions this new function does not hold buffer partition or buffer header locks whilst gathering its statistics. Author: Lukas Fittl Reviewed by: Jakub Wartak Reviewed by: Bertrand Drouvot Reviewed by: Haibo Yan Reviewed by: Masahiko Sawada Reviewed by: Paul A Jungwirth Reviewed by: Khoa Nguyen Discussion: https://www.postgresql.org/message-id/flat/CAP53Pkx0=ph0vG_M20yVAoK11yGSTZP=53-rZt36OCP4hBPaDQ@mail.gmail.com --- .../expected/pg_buffercache.out | 14 ++ .../pg_buffercache--1.6--1.7.sql | 16 +++ contrib/pg_buffercache/pg_buffercache_pages.c | 136 +++++++++++++++++- contrib/pg_buffercache/sql/pg_buffercache.sql | 4 + doc/src/sgml/pgbuffercache.sgml | 130 +++++++++++++++++ src/tools/pgindent/typedefs.list | 2 + 6 files changed, 301 insertions(+), 1 deletion(-) diff --git a/contrib/pg_buffercache/expected/pg_buffercache.out b/contrib/pg_buffercache/expected/pg_buffercache.out index 886dea770f626..452f6ca6f58fd 100644 --- a/contrib/pg_buffercache/expected/pg_buffercache.out +++ b/contrib/pg_buffercache/expected/pg_buffercache.out @@ -33,6 +33,12 @@ SELECT count(*) > 0 FROM pg_buffercache_usage_counts() WHERE buffers >= 0; t (1 row) +SELECT count(*) > 0 FROM pg_buffercache_relations() WHERE buffers >= 0; + ?column? +---------- + t +(1 row) + -- Check that the functions / views can't be accessed by default. To avoid -- having to create a dedicated user, use the pg_database_owner pseudo-role. SET ROLE pg_database_owner; @@ -46,6 +52,8 @@ SELECT * FROM pg_buffercache_summary(); ERROR: permission denied for function pg_buffercache_summary SELECT * FROM pg_buffercache_usage_counts(); ERROR: permission denied for function pg_buffercache_usage_counts +SELECT * FROM pg_buffercache_relations(); +ERROR: permission denied for function pg_buffercache_relations RESET role; -- Check that pg_monitor is allowed to query view / function SET ROLE pg_monitor; @@ -73,6 +81,12 @@ SELECT count(*) > 0 FROM pg_buffercache_usage_counts(); t (1 row) +SELECT count(*) > 0 FROM pg_buffercache_relations(); + ?column? +---------- + t +(1 row) + RESET role; ------ ---- Test pg_buffercache_evict* and pg_buffercache_mark_dirty* functions diff --git a/contrib/pg_buffercache/pg_buffercache--1.6--1.7.sql b/contrib/pg_buffercache/pg_buffercache--1.6--1.7.sql index 9a7bf66dab54b..99b8e37a81c75 100644 --- a/contrib/pg_buffercache/pg_buffercache--1.6--1.7.sql +++ b/contrib/pg_buffercache/pg_buffercache--1.6--1.7.sql @@ -54,3 +54,19 @@ CREATE FUNCTION pg_buffercache_mark_dirty_all( OUT buffers_skipped int4) AS 'MODULE_PATHNAME', 'pg_buffercache_mark_dirty_all' LANGUAGE C PARALLEL SAFE VOLATILE; + +CREATE FUNCTION pg_buffercache_relations( + OUT relfilenode oid, + OUT reltablespace oid, + OUT reldatabase oid, + OUT relforknumber int2, + OUT buffers int4, + OUT buffers_dirty int4, + OUT buffers_pinned int4, + OUT usagecount_avg float8) +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'pg_buffercache_relations' +LANGUAGE C PARALLEL SAFE; + +REVOKE ALL ON FUNCTION pg_buffercache_relations() FROM PUBLIC; +GRANT EXECUTE ON FUNCTION pg_buffercache_relations() TO pg_monitor; diff --git a/contrib/pg_buffercache/pg_buffercache_pages.c b/contrib/pg_buffercache/pg_buffercache_pages.c index db4d711cce7d6..6f2fc7a69afde 100644 --- a/contrib/pg_buffercache/pg_buffercache_pages.c +++ b/contrib/pg_buffercache/pg_buffercache_pages.c @@ -15,6 +15,7 @@ #include "port/pg_numa.h" #include "storage/buf_internals.h" #include "storage/bufmgr.h" +#include "common/hashfn.h" #include "utils/rel.h" #include "utils/tuplestore.h" @@ -23,6 +24,7 @@ #define NUM_BUFFERCACHE_PAGES_ELEM 9 #define NUM_BUFFERCACHE_SUMMARY_ELEM 5 #define NUM_BUFFERCACHE_USAGE_COUNTS_ELEM 4 +#define NUM_BUFFERCACHE_RELATIONS_ELEM 8 #define NUM_BUFFERCACHE_EVICT_ELEM 2 #define NUM_BUFFERCACHE_EVICT_RELATION_ELEM 3 #define NUM_BUFFERCACHE_EVICT_ALL_ELEM 3 @@ -92,6 +94,29 @@ typedef struct BufferCacheOsPagesRec *record; } BufferCacheOsPagesContext; +/* + * Hash key for pg_buffercache_relations — groups by relation file. + */ +typedef struct +{ + RelFileLocator locator; + ForkNumber forknum; +} BufferRelStatsKey; + +/* + * Hash entry for pg_buffercache_relations — accumulates per-relation + * buffer statistics. + */ +typedef struct +{ + BufferRelStatsKey key; + uint32 status; /* for simplehash */ + int32 buffers; + int32 buffers_dirty; + int32 buffers_pinned; + int64 usagecount_total; +} BufferRelStatsEntry; + /* * Function returning data from the shared buffer cache - buffer number, @@ -108,7 +133,20 @@ PG_FUNCTION_INFO_V1(pg_buffercache_evict_all); PG_FUNCTION_INFO_V1(pg_buffercache_mark_dirty); PG_FUNCTION_INFO_V1(pg_buffercache_mark_dirty_relation); PG_FUNCTION_INFO_V1(pg_buffercache_mark_dirty_all); - +PG_FUNCTION_INFO_V1(pg_buffercache_relations); + +#define SH_PREFIX relstats +#define SH_ELEMENT_TYPE BufferRelStatsEntry +#define SH_KEY_TYPE BufferRelStatsKey +#define SH_KEY key +#define SH_HASH_KEY(tb, key) \ + hash_bytes((const unsigned char *) &(key), sizeof(BufferRelStatsKey)) +#define SH_EQUAL(tb, a, b) \ + (memcmp(&(a), &(b), sizeof(BufferRelStatsKey)) == 0) +#define SH_SCOPE static inline +#define SH_DECLARE +#define SH_DEFINE +#include "lib/simplehash.h" /* Only need to touch memory once per backend process lifetime */ static bool firstNumaTouch = true; @@ -961,3 +999,99 @@ pg_buffercache_mark_dirty_all(PG_FUNCTION_ARGS) PG_RETURN_DATUM(result); } + +/* + * pg_buffercache_relations + * + * Produces a set of rows that summarize buffer cache usage per relation-fork + * combination. This enables monitoring scripts to only get the summary stats, + * instead of accumulating in a query with the full buffer information. + */ +Datum +pg_buffercache_relations(PG_FUNCTION_ARGS) +{ + ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; + relstats_hash *relstats; + relstats_iterator iter; + BufferRelStatsEntry *entry; + Datum values[NUM_BUFFERCACHE_RELATIONS_ELEM]; + bool nulls[NUM_BUFFERCACHE_RELATIONS_ELEM] = {0}; + + InitMaterializedSRF(fcinfo, 0); + + /* Create a hash table to aggregate stats by relation-fork */ + relstats = relstats_create(CurrentMemoryContext, 128, NULL); + + /* Single pass over all buffers */ + for (int i = 0; i < NBuffers; i++) + { + BufferDesc *bufHdr; + uint64 buf_state; + BufferRelStatsKey key = {0}; + bool found; + + CHECK_FOR_INTERRUPTS(); + + /* + * Read buffer state without locking, same as pg_buffercache_summary + * and pg_buffercache_usage_counts. Locking wouldn't provide a + * meaningfully more consistent result since buffers can change state + * immediately after we release the lock. + */ + bufHdr = GetBufferDescriptor(i); + buf_state = pg_atomic_read_u64(&bufHdr->state); + + /* Skip unused/invalid buffers */ + if (!(buf_state & BM_VALID)) + continue; + + key.locator = BufTagGetRelFileLocator(&bufHdr->tag); + key.forknum = BufTagGetForkNum(&bufHdr->tag); + + entry = relstats_insert(relstats, key, &found); + + if (!found) + { + entry->buffers = 0; + entry->buffers_dirty = 0; + entry->buffers_pinned = 0; + entry->usagecount_total = 0; + } + + entry->buffers++; + entry->usagecount_total += BUF_STATE_GET_USAGECOUNT(buf_state); + + if (buf_state & BM_DIRTY) + entry->buffers_dirty++; + + if (BUF_STATE_GET_REFCOUNT(buf_state) > 0) + entry->buffers_pinned++; + } + + /* Emit one row per hash entry */ + relstats_start_iterate(relstats, &iter); + while ((entry = relstats_iterate(relstats, &iter)) != NULL) + { + CHECK_FOR_INTERRUPTS(); + + if (entry->buffers == 0) + continue; + + values[0] = ObjectIdGetDatum(entry->key.locator.relNumber); + values[1] = ObjectIdGetDatum(entry->key.locator.spcOid); + values[2] = ObjectIdGetDatum(entry->key.locator.dbOid); + values[3] = Int16GetDatum(entry->key.forknum); + values[4] = Int32GetDatum(entry->buffers); + values[5] = Int32GetDatum(entry->buffers_dirty); + values[6] = Int32GetDatum(entry->buffers_pinned); + values[7] = Float8GetDatum((double) entry->usagecount_total / + entry->buffers); + + tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, + values, nulls); + } + + relstats_destroy(relstats); + + return (Datum) 0; +} diff --git a/contrib/pg_buffercache/sql/pg_buffercache.sql b/contrib/pg_buffercache/sql/pg_buffercache.sql index 127d604905ca0..b7f4d14afc2db 100644 --- a/contrib/pg_buffercache/sql/pg_buffercache.sql +++ b/contrib/pg_buffercache/sql/pg_buffercache.sql @@ -18,6 +18,8 @@ from pg_buffercache_summary(); SELECT count(*) > 0 FROM pg_buffercache_usage_counts() WHERE buffers >= 0; +SELECT count(*) > 0 FROM pg_buffercache_relations() WHERE buffers >= 0; + -- Check that the functions / views can't be accessed by default. To avoid -- having to create a dedicated user, use the pg_database_owner pseudo-role. SET ROLE pg_database_owner; @@ -26,6 +28,7 @@ SELECT * FROM pg_buffercache_os_pages; SELECT * FROM pg_buffercache_pages() AS p (wrong int); SELECT * FROM pg_buffercache_summary(); SELECT * FROM pg_buffercache_usage_counts(); +SELECT * FROM pg_buffercache_relations(); RESET role; -- Check that pg_monitor is allowed to query view / function @@ -34,6 +37,7 @@ SELECT count(*) > 0 FROM pg_buffercache; SELECT count(*) > 0 FROM pg_buffercache_os_pages; SELECT buffers_used + buffers_unused > 0 FROM pg_buffercache_summary(); SELECT count(*) > 0 FROM pg_buffercache_usage_counts(); +SELECT count(*) > 0 FROM pg_buffercache_relations(); RESET role; diff --git a/doc/src/sgml/pgbuffercache.sgml b/doc/src/sgml/pgbuffercache.sgml index 1e9aee10275f2..5c50a130c4f1c 100644 --- a/doc/src/sgml/pgbuffercache.sgml +++ b/doc/src/sgml/pgbuffercache.sgml @@ -31,6 +31,10 @@ pg_buffercache_usage_counts + + pg_buffercache_relations + + pg_buffercache_evict @@ -63,6 +67,7 @@ pg_buffercache_numa views), the pg_buffercache_summary() function, the pg_buffercache_usage_counts() function, the + pg_buffercache_relations() function, the pg_buffercache_evict() function, the pg_buffercache_evict_relation() function, the pg_buffercache_evict_all() function, the @@ -102,6 +107,12 @@ count. + + The pg_buffercache_relations() function returns a + set of rows summarizing buffer cache usage aggregated by relation and fork + number. + + By default, use of the above functions is restricted to superusers and roles with privileges of the pg_monitor role. Access may be @@ -564,6 +575,125 @@ + + The <function>pg_buffercache_relations()</function> Function + + + The definitions of the columns exposed by the function are shown in + . + + + + <function>pg_buffercache_relations()</function> Output Columns + + + + + Column Type + + + Description + + + + + + + + relfilenode oid + (references pg_class.relfilenode) + + + Filenode number of the relation + + + + + + reltablespace oid + (references pg_tablespace.oid) + + + Tablespace OID of the relation + + + + + + reldatabase oid + (references pg_database.oid) + + + Database OID of the relation + + + + + + relforknumber smallint + + + Fork number within the relation; see + common/relpath.h + + + + + + buffers int4 + + + Number of buffers for the relation + + + + + + buffers_dirty int4 + + + Number of dirty buffers for the relation + + + + + + buffers_pinned int4 + + + Number of pinned buffers for the relation + + + + + + usagecount_avg float8 + + + Average usage count of the relation's buffers + + + + +
+ + + The pg_buffercache_relations() function returns a + set of rows summarizing the state of all shared buffers, aggregated by + relation and fork number. Similar and more detailed information is + provided by the pg_buffercache view, but + pg_buffercache_relations() is significantly + cheaper. + + + + Like the pg_buffercache view, + pg_buffercache_relations() does not acquire buffer + manager locks. Therefore concurrent activity can lead to minor inaccuracies + in the result. + +
+ The <function>pg_buffercache_evict()</function> Function diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index 8df23840e57c1..4ae3ef9103bf8 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -362,6 +362,8 @@ BufferHeapTupleTableSlot BufferLockMode BufferLookupEnt BufferManagerRelation +BufferRelStatsEntry +BufferRelStatsKey BufferStrategyControl BufferTag BufferUsage