1515
1616#include <unistd.h>
1717
18+ #include "access/xact.h"
1819#include "executor/instrument.h"
1920#include "utils/memutils.h"
20- #include "utils/resowner.h"
2121
2222WalUsage pgWalUsage ;
2323Instrumentation instr_top ;
2424InstrStackState instr_stack = {0 , 0 , NULL , & instr_top };
2525
26+ /* List of active QueryInstrumentation entries, for abort cleanup */
27+ static dlist_head active_query_instrumentations = DLIST_STATIC_INIT (active_query_instrumentations );
28+
2629void
2730InstrStackGrow (void )
2831{
@@ -98,7 +101,7 @@ InstrStop(Instrumentation *instr)
98101 * Stops instrumentation, finalizes the stack entry and accumulates to its parent.
99102 *
100103 * Note that this intentionally allows passing a stack that is not the current
101- * top, as can happen with PG_FINALLY, or resource owners , which don't have a
104+ * top, as can happen with PG_FINALLY or abort cleanup , which don't have a
102105 * guaranteed cleanup order.
103106 *
104107 * We are careful here to achieve two goals:
@@ -140,34 +143,16 @@ InstrStopFinalize(Instrumentation *instr)
140143/* Query instrumentation handling */
141144
142145/*
143- * Use ResourceOwner mechanism to correctly reset instr_stack on abort.
146+ * Finalize a single QueryInstrumentation entry during abort.
147+ *
148+ * Accumulates buffer/WAL data from unfinalized children and triggers,
149+ * resets the instrumentation stack, and removes the entry from the
150+ * active list. We do NOT pfree any memory here since portal/executor
151+ * context cleanup will handle that shortly after.
144152 */
145- static void ResOwnerReleaseInstrumentation (Datum res );
146- static const ResourceOwnerDesc instrumentation_resowner_desc =
147- {
148- .name = "instrumentation" ,
149- .release_phase = RESOURCE_RELEASE_AFTER_LOCKS ,
150- .release_priority = RELEASE_PRIO_INSTRUMENTATION ,
151- .ReleaseResource = ResOwnerReleaseInstrumentation ,
152- .DebugPrint = NULL , /* default message is fine */
153- };
154-
155- static inline void
156- ResourceOwnerRememberInstrumentation (ResourceOwner owner , QueryInstrumentation * qinstr )
157- {
158- ResourceOwnerRemember (owner , PointerGetDatum (qinstr ), & instrumentation_resowner_desc );
159- }
160-
161- static inline void
162- ResourceOwnerForgetInstrumentation (ResourceOwner owner , QueryInstrumentation * qinstr )
163- {
164- ResourceOwnerForget (owner , PointerGetDatum (qinstr ), & instrumentation_resowner_desc );
165- }
166-
167153static void
168- ResOwnerReleaseInstrumentation ( Datum res )
154+ InstrAbortQueryInstrumentation ( QueryInstrumentation * qinstr )
169155{
170- QueryInstrumentation * qinstr = (QueryInstrumentation * ) DatumGetPointer (res );
171156 dlist_mutable_iter iter ;
172157
173158 /* Accumulate data from all unfinalized child node entries. */
@@ -176,12 +161,6 @@ ResOwnerReleaseInstrumentation(Datum res)
176161 NodeInstrumentation * child = dlist_container (NodeInstrumentation , unfinalized_node , iter .cur );
177162
178163 InstrAccumStack (& qinstr -> instr , & child -> instr );
179-
180- /*
181- * Free NodeInstrumentation now, since InstrFinalizeNode won't be
182- * called
183- */
184- pfree (child );
185164 }
186165
187166 /* Accumulate data from any active trigger instrumentation entries. */
@@ -190,37 +169,66 @@ ResOwnerReleaseInstrumentation(Datum res)
190169 TriggerInstrumentation * tginstr = dlist_container (TriggerInstrumentation , unfinalized_trigger , iter .cur );
191170
192171 InstrAccumStack (& qinstr -> instr , & tginstr -> instr );
193-
194- /*
195- * We can't pfree tginstr here, since its part of a bigger allocation - we'll instead
196- * let transaction end deal with clean up, and instead just remove the list entry.
197- */
198- dlist_delete (& tginstr -> unfinalized_trigger );
199172 }
200173
201174 /* Ensure the stack is reset as expected, and we accumulate to the parent */
202175 InstrStopFinalize (& qinstr -> instr );
176+ }
177+
178+ /*
179+ * Transaction abort cleanup for instrumentation.
180+ *
181+ * Called from AbortTransaction before AtAbort_Portals, so that we can
182+ * recover buffer/WAL statistics from active instrumentation entries before
183+ * the executor memory contexts are destroyed.
184+ */
185+ void
186+ AtAbort_Instrumentation (void )
187+ {
188+ dlist_mutable_iter iter ;
189+
190+ dlist_foreach_modify (iter , & active_query_instrumentations )
191+ {
192+ QueryInstrumentation * qinstr = dlist_container (QueryInstrumentation , active_node , iter .cur );
193+
194+ InstrAbortQueryInstrumentation (qinstr );
195+ }
196+
197+ /* Reset the list — memory will be freed by context cleanup */
198+ dlist_init (& active_query_instrumentations );
199+ }
200+
201+ /*
202+ * Subtransaction abort cleanup for instrumentation.
203+ *
204+ * Only finalizes entries created at or below the given nesting level.
205+ */
206+ void
207+ AtSubAbort_Instrumentation (int nestLevel )
208+ {
209+ dlist_mutable_iter iter ;
210+
211+ dlist_foreach_modify (iter , & active_query_instrumentations )
212+ {
213+ QueryInstrumentation * qinstr = dlist_container (QueryInstrumentation , active_node , iter .cur );
214+
215+ if (qinstr -> nestingLevel < nestLevel )
216+ continue ;
203217
204- /* Free QueryInstrumentation now, since InstrStop won't be called */
205- pfree (qinstr );
218+ InstrAbortQueryInstrumentation (qinstr );
219+ dlist_delete (& qinstr -> active_node );
220+ }
206221}
207222
208223QueryInstrumentation *
209224InstrQueryAlloc (int instrument_options )
210225{
211226 QueryInstrumentation * instr ;
212227
213- /*
214- * If needed, allocate in TopMemoryContext so that the Instrumentation
215- * survives transaction abort — ResourceOwner release needs to access
216- * it.
217- */
218- if ((instrument_options & (INSTRUMENT_BUFFERS | INSTRUMENT_WAL )) != 0 )
219- instr = MemoryContextAllocZero (TopMemoryContext , sizeof (QueryInstrumentation ));
220- else
221- instr = palloc0 (sizeof (QueryInstrumentation ));
228+ instr = palloc0 (sizeof (QueryInstrumentation ));
222229
223230 InstrInitOptions (& instr -> instr , instrument_options );
231+ dlist_node_init (& instr -> active_node );
224232 dlist_init (& instr -> unfinalized_children );
225233 dlist_init (& instr -> unfinalized_triggers );
226234
@@ -232,58 +240,35 @@ InstrQueryStart(QueryInstrumentation *qinstr)
232240{
233241 InstrStart (& qinstr -> instr );
234242
235- if (qinstr -> instr .need_stack )
243+ if (qinstr -> instr .need_stack && dlist_node_is_detached ( & qinstr -> active_node ) )
236244 {
237- Assert (CurrentResourceOwner != NULL );
238- qinstr -> owner = CurrentResourceOwner ;
239-
240- ResourceOwnerEnlarge (qinstr -> owner );
241- ResourceOwnerRememberInstrumentation (qinstr -> owner , qinstr );
245+ qinstr -> nestingLevel = GetCurrentTransactionNestLevel ();
246+ dlist_push_head (& active_query_instrumentations , & qinstr -> active_node );
242247 }
243248}
244249
245250void
246251InstrQueryStop (QueryInstrumentation * qinstr )
247252{
248253 InstrStop (& qinstr -> instr );
249-
250- if (qinstr -> instr .need_stack )
251- {
252- Assert (qinstr -> owner != NULL );
253- ResourceOwnerForgetInstrumentation (qinstr -> owner , qinstr );
254- qinstr -> owner = NULL ;
255- }
256254}
257255
258256QueryInstrumentation *
259257InstrQueryStopFinalize (QueryInstrumentation * qinstr )
260258{
261- QueryInstrumentation * copy ;
262-
263259 InstrStopFinalize (& qinstr -> instr );
264260
265- if (!qinstr -> instr .need_stack )
266- return qinstr ;
267-
268- Assert (qinstr -> owner != NULL );
269- ResourceOwnerForgetInstrumentation (qinstr -> owner , qinstr );
270- qinstr -> owner = NULL ;
261+ if (qinstr -> instr .need_stack && !dlist_node_is_detached (& qinstr -> active_node ))
262+ dlist_delete_thoroughly (& qinstr -> active_node );
271263
272- /*
273- * Copy to the current memory context so the caller doesn't need to
274- * explicitly free the TopMemoryContext allocation.
275- */
276- copy = palloc (sizeof (QueryInstrumentation ));
277- memcpy (copy , qinstr , sizeof (QueryInstrumentation ));
278- pfree (qinstr );
279- return copy ;
264+ return qinstr ;
280265}
281266
282267/*
283268 * Register a child NodeInstrumentation entry for abort processing.
284269 *
285- * On abort, ResOwnerReleaseInstrumentation will walk the parent's list to
286- * recover buffer/WAL data from entries that were never finalized, in order for
270+ * On abort, AtAbort_Instrumentation will walk the parent's list to recover
271+ * buffer/WAL data from entries that were never finalized, in order for
287272 * aggregate totals to be accurate despite the query erroring out.
288273 */
289274void
@@ -340,17 +325,7 @@ InstrAllocNode(int instrument_options, bool async_mode)
340325{
341326 NodeInstrumentation * instr ;
342327
343- /*
344- * If needed, allocate in a context that supports stack-based
345- * instrumentation abort handling. We can utilize TopTransactionContext
346- * instead of TopMemoryContext here because nodes don't get used for
347- * utility commands that restart transactions, which would require a
348- * context that survives longer (EXPLAIN ANALYZE is fine).
349- */
350- if ((instrument_options & (INSTRUMENT_BUFFERS | INSTRUMENT_WAL )) != 0 )
351- instr = MemoryContextAlloc (TopTransactionContext , sizeof (NodeInstrumentation ));
352- else
353- instr = palloc (sizeof (NodeInstrumentation ));
328+ instr = palloc (sizeof (NodeInstrumentation ));
354329
355330 InstrInitNode (instr , instrument_options );
356331 instr -> async_mode = async_mode ;
@@ -422,26 +397,17 @@ InstrStopNode(NodeInstrumentation *instr, double nTuples)
422397NodeInstrumentation *
423398InstrFinalizeNode (NodeInstrumentation * instr , Instrumentation * parent )
424399{
425- NodeInstrumentation * dst ;
426-
427400 /* If we didn't use stack based instrumentation, nothing to be done */
428401 if (!instr -> instr .need_stack )
429402 return instr ;
430403
431- /* Copy into per-query memory context */
432- dst = palloc (sizeof (NodeInstrumentation ));
433- memcpy (dst , instr , sizeof (NodeInstrumentation ));
434-
435404 /* Accumulate node's buffer/WAL usage to the parent */
436- InstrAccumStack (parent , & dst -> instr );
405+ InstrAccumStack (parent , & instr -> instr );
437406
438- /* Unregister from query's unfinalized list before freeing */
439- if (instr -> instr .need_stack )
440- dlist_delete (& instr -> unfinalized_node );
441-
442- pfree (instr );
407+ /* Unregister from query's unfinalized list */
408+ dlist_delete (& instr -> unfinalized_node );
443409
444- return dst ;
410+ return instr ;
445411}
446412
447413/* Update tuple count */
@@ -630,18 +596,10 @@ TriggerInstrumentation *
630596InstrAllocTrigger (int n , int instrument_options )
631597{
632598 TriggerInstrumentation * tginstr ;
633-
634- /*
635- * If needed, allocate in TopTransactionContext so the memory survives
636- * transaction abort — ResOwnerReleaseInstrumentation needs to access it.
637- */
638- if ((instrument_options & (INSTRUMENT_BUFFERS | INSTRUMENT_WAL )) != 0 )
639- tginstr = MemoryContextAllocZero (TopTransactionContext ,
640- n * sizeof (TriggerInstrumentation ));
641- else
642- tginstr = palloc0 (n * sizeof (TriggerInstrumentation ));
643599 int i ;
644600
601+ tginstr = palloc0 (n * sizeof (TriggerInstrumentation ));
602+
645603 for (i = 0 ; i < n ; i ++ )
646604 InstrInitOptions (& tginstr [i ].instr , instrument_options );
647605
@@ -657,7 +615,7 @@ InstrStartTrigger(QueryInstrumentation *qinstr, TriggerInstrumentation *tginstr)
657615 * On first call, register with the parent QueryInstrumentation for abort
658616 * recovery. The trigger stays on the list for the query lifetime -- on
659617 * normal completion ExecFinalizeTriggerInstrumentation handles it, on
660- * abort ResOwnerReleaseInstrumentation does.
618+ * abort AtAbort_Instrumentation does.
661619 *
662620 * We detect first call by checking if the dlist_node is still in its
663621 * palloc0-zeroed state (prev == NULL).
0 commit comments