Skip to content

Commit 08c9eb5

Browse files
committed
Huh, maybe that's the better way to do it?
1 parent 52e7686 commit 08c9eb5

4 files changed

Lines changed: 93 additions & 128 deletions

File tree

src/backend/access/transam/xact.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
#include "commands/tablecmds.h"
4141
#include "commands/trigger.h"
4242
#include "common/pg_prng.h"
43+
#include "executor/instrument.h"
4344
#include "executor/spi.h"
4445
#include "libpq/be-fsstubs.h"
4546
#include "libpq/pqsignal.h"
@@ -2937,6 +2938,7 @@ AbortTransaction(void)
29372938
* do abort processing
29382939
*/
29392940
AfterTriggerEndXact(false); /* 'false' means it's abort */
2941+
AtAbort_Instrumentation();
29402942
AtAbort_Portals();
29412943
smgrDoPendingSyncs(false, is_parallel_worker);
29422944
AtEOXact_LargeObject(false);
@@ -5338,6 +5340,7 @@ AbortSubTransaction(void)
53385340
if (s->curTransactionOwner)
53395341
{
53405342
AfterTriggerEndSubXact(false);
5343+
AtSubAbort_Instrumentation(s->nestingLevel);
53415344
AtSubAbort_Portals(s->subTransactionId,
53425345
s->parent->subTransactionId,
53435346
s->curTransactionOwner,

src/backend/executor/instrument.c

Lines changed: 74 additions & 116 deletions
Original file line numberDiff line numberDiff line change
@@ -15,14 +15,17 @@
1515

1616
#include <unistd.h>
1717

18+
#include "access/xact.h"
1819
#include "executor/instrument.h"
1920
#include "utils/memutils.h"
20-
#include "utils/resowner.h"
2121

2222
WalUsage pgWalUsage;
2323
Instrumentation instr_top;
2424
InstrStackState instr_stack = {0, 0, NULL, &instr_top};
2525

26+
/* List of active QueryInstrumentation entries, for abort cleanup */
27+
static dlist_head active_query_instrumentations = DLIST_STATIC_INIT(active_query_instrumentations);
28+
2629
void
2730
InstrStackGrow(void)
2831
{
@@ -98,7 +101,7 @@ InstrStop(Instrumentation *instr)
98101
* Stops instrumentation, finalizes the stack entry and accumulates to its parent.
99102
*
100103
* Note that this intentionally allows passing a stack that is not the current
101-
* top, as can happen with PG_FINALLY, or resource owners, which don't have a
104+
* top, as can happen with PG_FINALLY or abort cleanup, which don't have a
102105
* guaranteed cleanup order.
103106
*
104107
* We are careful here to achieve two goals:
@@ -140,34 +143,16 @@ InstrStopFinalize(Instrumentation *instr)
140143
/* Query instrumentation handling */
141144

142145
/*
143-
* Use ResourceOwner mechanism to correctly reset instr_stack on abort.
146+
* Finalize a single QueryInstrumentation entry during abort.
147+
*
148+
* Accumulates buffer/WAL data from unfinalized children and triggers,
149+
* resets the instrumentation stack, and removes the entry from the
150+
* active list. We do NOT pfree any memory here since portal/executor
151+
* context cleanup will handle that shortly after.
144152
*/
145-
static void ResOwnerReleaseInstrumentation(Datum res);
146-
static const ResourceOwnerDesc instrumentation_resowner_desc =
147-
{
148-
.name = "instrumentation",
149-
.release_phase = RESOURCE_RELEASE_AFTER_LOCKS,
150-
.release_priority = RELEASE_PRIO_INSTRUMENTATION,
151-
.ReleaseResource = ResOwnerReleaseInstrumentation,
152-
.DebugPrint = NULL, /* default message is fine */
153-
};
154-
155-
static inline void
156-
ResourceOwnerRememberInstrumentation(ResourceOwner owner, QueryInstrumentation *qinstr)
157-
{
158-
ResourceOwnerRemember(owner, PointerGetDatum(qinstr), &instrumentation_resowner_desc);
159-
}
160-
161-
static inline void
162-
ResourceOwnerForgetInstrumentation(ResourceOwner owner, QueryInstrumentation *qinstr)
163-
{
164-
ResourceOwnerForget(owner, PointerGetDatum(qinstr), &instrumentation_resowner_desc);
165-
}
166-
167153
static void
168-
ResOwnerReleaseInstrumentation(Datum res)
154+
InstrAbortQueryInstrumentation(QueryInstrumentation *qinstr)
169155
{
170-
QueryInstrumentation *qinstr = (QueryInstrumentation *) DatumGetPointer(res);
171156
dlist_mutable_iter iter;
172157

173158
/* Accumulate data from all unfinalized child node entries. */
@@ -176,12 +161,6 @@ ResOwnerReleaseInstrumentation(Datum res)
176161
NodeInstrumentation *child = dlist_container(NodeInstrumentation, unfinalized_node, iter.cur);
177162

178163
InstrAccumStack(&qinstr->instr, &child->instr);
179-
180-
/*
181-
* Free NodeInstrumentation now, since InstrFinalizeNode won't be
182-
* called
183-
*/
184-
pfree(child);
185164
}
186165

187166
/* Accumulate data from any active trigger instrumentation entries. */
@@ -190,37 +169,66 @@ ResOwnerReleaseInstrumentation(Datum res)
190169
TriggerInstrumentation *tginstr = dlist_container(TriggerInstrumentation, unfinalized_trigger, iter.cur);
191170

192171
InstrAccumStack(&qinstr->instr, &tginstr->instr);
193-
194-
/*
195-
* We can't pfree tginstr here, since its part of a bigger allocation - we'll instead
196-
* let transaction end deal with clean up, and instead just remove the list entry.
197-
*/
198-
dlist_delete(&tginstr->unfinalized_trigger);
199172
}
200173

201174
/* Ensure the stack is reset as expected, and we accumulate to the parent */
202175
InstrStopFinalize(&qinstr->instr);
176+
}
177+
178+
/*
179+
* Transaction abort cleanup for instrumentation.
180+
*
181+
* Called from AbortTransaction before AtAbort_Portals, so that we can
182+
* recover buffer/WAL statistics from active instrumentation entries before
183+
* the executor memory contexts are destroyed.
184+
*/
185+
void
186+
AtAbort_Instrumentation(void)
187+
{
188+
dlist_mutable_iter iter;
189+
190+
dlist_foreach_modify(iter, &active_query_instrumentations)
191+
{
192+
QueryInstrumentation *qinstr = dlist_container(QueryInstrumentation, active_node, iter.cur);
193+
194+
InstrAbortQueryInstrumentation(qinstr);
195+
}
196+
197+
/* Reset the list — memory will be freed by context cleanup */
198+
dlist_init(&active_query_instrumentations);
199+
}
200+
201+
/*
202+
* Subtransaction abort cleanup for instrumentation.
203+
*
204+
* Only finalizes entries created at or below the given nesting level.
205+
*/
206+
void
207+
AtSubAbort_Instrumentation(int nestLevel)
208+
{
209+
dlist_mutable_iter iter;
210+
211+
dlist_foreach_modify(iter, &active_query_instrumentations)
212+
{
213+
QueryInstrumentation *qinstr = dlist_container(QueryInstrumentation, active_node, iter.cur);
214+
215+
if (qinstr->nestingLevel < nestLevel)
216+
continue;
203217

204-
/* Free QueryInstrumentation now, since InstrStop won't be called */
205-
pfree(qinstr);
218+
InstrAbortQueryInstrumentation(qinstr);
219+
dlist_delete(&qinstr->active_node);
220+
}
206221
}
207222

208223
QueryInstrumentation *
209224
InstrQueryAlloc(int instrument_options)
210225
{
211226
QueryInstrumentation *instr;
212227

213-
/*
214-
* If needed, allocate in TopMemoryContext so that the Instrumentation
215-
* survives transaction abort — ResourceOwner release needs to access
216-
* it.
217-
*/
218-
if ((instrument_options & (INSTRUMENT_BUFFERS | INSTRUMENT_WAL)) != 0)
219-
instr = MemoryContextAllocZero(TopMemoryContext, sizeof(QueryInstrumentation));
220-
else
221-
instr = palloc0(sizeof(QueryInstrumentation));
228+
instr = palloc0(sizeof(QueryInstrumentation));
222229

223230
InstrInitOptions(&instr->instr, instrument_options);
231+
dlist_node_init(&instr->active_node);
224232
dlist_init(&instr->unfinalized_children);
225233
dlist_init(&instr->unfinalized_triggers);
226234

@@ -232,58 +240,35 @@ InstrQueryStart(QueryInstrumentation *qinstr)
232240
{
233241
InstrStart(&qinstr->instr);
234242

235-
if (qinstr->instr.need_stack)
243+
if (qinstr->instr.need_stack && dlist_node_is_detached(&qinstr->active_node))
236244
{
237-
Assert(CurrentResourceOwner != NULL);
238-
qinstr->owner = CurrentResourceOwner;
239-
240-
ResourceOwnerEnlarge(qinstr->owner);
241-
ResourceOwnerRememberInstrumentation(qinstr->owner, qinstr);
245+
qinstr->nestingLevel = GetCurrentTransactionNestLevel();
246+
dlist_push_head(&active_query_instrumentations, &qinstr->active_node);
242247
}
243248
}
244249

245250
void
246251
InstrQueryStop(QueryInstrumentation *qinstr)
247252
{
248253
InstrStop(&qinstr->instr);
249-
250-
if (qinstr->instr.need_stack)
251-
{
252-
Assert(qinstr->owner != NULL);
253-
ResourceOwnerForgetInstrumentation(qinstr->owner, qinstr);
254-
qinstr->owner = NULL;
255-
}
256254
}
257255

258256
QueryInstrumentation *
259257
InstrQueryStopFinalize(QueryInstrumentation *qinstr)
260258
{
261-
QueryInstrumentation *copy;
262-
263259
InstrStopFinalize(&qinstr->instr);
264260

265-
if (!qinstr->instr.need_stack)
266-
return qinstr;
267-
268-
Assert(qinstr->owner != NULL);
269-
ResourceOwnerForgetInstrumentation(qinstr->owner, qinstr);
270-
qinstr->owner = NULL;
261+
if (qinstr->instr.need_stack && !dlist_node_is_detached(&qinstr->active_node))
262+
dlist_delete_thoroughly(&qinstr->active_node);
271263

272-
/*
273-
* Copy to the current memory context so the caller doesn't need to
274-
* explicitly free the TopMemoryContext allocation.
275-
*/
276-
copy = palloc(sizeof(QueryInstrumentation));
277-
memcpy(copy, qinstr, sizeof(QueryInstrumentation));
278-
pfree(qinstr);
279-
return copy;
264+
return qinstr;
280265
}
281266

282267
/*
283268
* Register a child NodeInstrumentation entry for abort processing.
284269
*
285-
* On abort, ResOwnerReleaseInstrumentation will walk the parent's list to
286-
* recover buffer/WAL data from entries that were never finalized, in order for
270+
* On abort, AtAbort_Instrumentation will walk the parent's list to recover
271+
* buffer/WAL data from entries that were never finalized, in order for
287272
* aggregate totals to be accurate despite the query erroring out.
288273
*/
289274
void
@@ -340,17 +325,7 @@ InstrAllocNode(int instrument_options, bool async_mode)
340325
{
341326
NodeInstrumentation *instr;
342327

343-
/*
344-
* If needed, allocate in a context that supports stack-based
345-
* instrumentation abort handling. We can utilize TopTransactionContext
346-
* instead of TopMemoryContext here because nodes don't get used for
347-
* utility commands that restart transactions, which would require a
348-
* context that survives longer (EXPLAIN ANALYZE is fine).
349-
*/
350-
if ((instrument_options & (INSTRUMENT_BUFFERS | INSTRUMENT_WAL)) != 0)
351-
instr = MemoryContextAlloc(TopTransactionContext, sizeof(NodeInstrumentation));
352-
else
353-
instr = palloc(sizeof(NodeInstrumentation));
328+
instr = palloc(sizeof(NodeInstrumentation));
354329

355330
InstrInitNode(instr, instrument_options);
356331
instr->async_mode = async_mode;
@@ -422,26 +397,17 @@ InstrStopNode(NodeInstrumentation *instr, double nTuples)
422397
NodeInstrumentation *
423398
InstrFinalizeNode(NodeInstrumentation *instr, Instrumentation *parent)
424399
{
425-
NodeInstrumentation *dst;
426-
427400
/* If we didn't use stack based instrumentation, nothing to be done */
428401
if (!instr->instr.need_stack)
429402
return instr;
430403

431-
/* Copy into per-query memory context */
432-
dst = palloc(sizeof(NodeInstrumentation));
433-
memcpy(dst, instr, sizeof(NodeInstrumentation));
434-
435404
/* Accumulate node's buffer/WAL usage to the parent */
436-
InstrAccumStack(parent, &dst->instr);
405+
InstrAccumStack(parent, &instr->instr);
437406

438-
/* Unregister from query's unfinalized list before freeing */
439-
if (instr->instr.need_stack)
440-
dlist_delete(&instr->unfinalized_node);
441-
442-
pfree(instr);
407+
/* Unregister from query's unfinalized list */
408+
dlist_delete(&instr->unfinalized_node);
443409

444-
return dst;
410+
return instr;
445411
}
446412

447413
/* Update tuple count */
@@ -630,18 +596,10 @@ TriggerInstrumentation *
630596
InstrAllocTrigger(int n, int instrument_options)
631597
{
632598
TriggerInstrumentation *tginstr;
633-
634-
/*
635-
* If needed, allocate in TopTransactionContext so the memory survives
636-
* transaction abort — ResOwnerReleaseInstrumentation needs to access it.
637-
*/
638-
if ((instrument_options & (INSTRUMENT_BUFFERS | INSTRUMENT_WAL)) != 0)
639-
tginstr = MemoryContextAllocZero(TopTransactionContext,
640-
n * sizeof(TriggerInstrumentation));
641-
else
642-
tginstr = palloc0(n * sizeof(TriggerInstrumentation));
643599
int i;
644600

601+
tginstr = palloc0(n * sizeof(TriggerInstrumentation));
602+
645603
for (i = 0; i < n; i++)
646604
InstrInitOptions(&tginstr[i].instr, instrument_options);
647605

@@ -657,7 +615,7 @@ InstrStartTrigger(QueryInstrumentation *qinstr, TriggerInstrumentation *tginstr)
657615
* On first call, register with the parent QueryInstrumentation for abort
658616
* recovery. The trigger stays on the list for the query lifetime -- on
659617
* normal completion ExecFinalizeTriggerInstrumentation handles it, on
660-
* abort ResOwnerReleaseInstrumentation does.
618+
* abort AtAbort_Instrumentation does.
661619
*
662620
* We detect first call by checking if the dlist_node is still in its
663621
* palloc0-zeroed state (prev == NULL).

0 commit comments

Comments
 (0)