Improve memory management and performance of tuplestore.c
Here we make tuplestore.c use a generation.c memory context rather than allocating tuples into the CurrentMemoryContext, which primarily is the ExecutorState or PortalHoldContext memory context. Not having a dedicated context can cause the CurrentMemoryContext context to become bloated when pfree'd chunks are not reused by future tuples. Using generation speeds up users of tuplestore.c, such as the Materialize, WindowAgg and CTE Scan executor nodes. The main reason for the speedup is due to generation.c being more memory efficient than aset.c memory contexts. Specifically, generation does not round sizes up to the next power of 2 value. This both saves memory, allowing more tuples to fit in work_mem, but also makes the memory usage more compact and fit on fewer cachelines. One benchmark showed up to a 22% performance increase in a query containing a Materialize node. Much higher gains are possible if the memory reduction prevents tuplestore.c from spilling to disk. This is especially true for WindowAgg nodes where improvements of several thousand times are possible if the memory reductions made here prevent tuplestore from spilling to disk. Additionally, a generation.c memory context is much better suited for this job as it works well with FIFO palloc/pfree patterns, which is exactly how tuplestore.c uses it. Because of the way generation.c allocates memory, tuples consecutively stored in tuplestores are much more likely to be stored consecutively in memory. This allows the CPU's hardware prefetcher to work more efficiently as it provides a more predictable pattern to allow cachelines for the next tuple to be loaded from RAM in advance of them being needed by the executor. Using a dedicated memory context for storing tuples also allows us to more efficiently clean up the memory used by the tuplestore as we can reset or delete the context rather than looping over all stored tuples and pfree'ing them one by one. Also, remove a badly placed USEMEM call in readtup_heap(). The tuple wasn't being allocated in the Tuplestorestate's context, so no need to adjust the memory consumed by the tuplestore there. Author: David Rowley Reviewed-by: Matthias van de Meent, Dmitry Dolgov Discussion: https://postgr.es/m/CAApHDvp5Py9g4Rjq7_inL3-MCK1Co2CRt_YWFwTU2zfQix0p4A@mail.gmail.com
This commit is contained in:
parent
53abb1e0eb
commit
590b045c37
@ -266,7 +266,14 @@ tuplestore_begin_common(int eflags, bool interXact, int maxKBytes)
|
||||
state->availMem = state->allowedMem;
|
||||
state->maxSpace = 0;
|
||||
state->myfile = NULL;
|
||||
state->context = CurrentMemoryContext;
|
||||
|
||||
/*
|
||||
* The palloc/pfree pattern for tuple memory is in a FIFO pattern. A
|
||||
* generation context is perfectly suited for this.
|
||||
*/
|
||||
state->context = GenerationContextCreate(CurrentMemoryContext,
|
||||
"tuplestore tuples",
|
||||
ALLOCSET_DEFAULT_SIZES);
|
||||
state->resowner = CurrentResourceOwner;
|
||||
|
||||
state->memtupdeleted = 0;
|
||||
@ -429,14 +436,38 @@ tuplestore_clear(Tuplestorestate *state)
|
||||
if (state->myfile)
|
||||
BufFileClose(state->myfile);
|
||||
state->myfile = NULL;
|
||||
if (state->memtuples)
|
||||
|
||||
#ifdef USE_ASSERT_CHECKING
|
||||
{
|
||||
int64 availMem = state->availMem;
|
||||
|
||||
/*
|
||||
* Below, we reset the memory context for storing tuples. To save
|
||||
* from having to always call GetMemoryChunkSpace() on all stored
|
||||
* tuples, we adjust the availMem to forget all the tuples and just
|
||||
* recall USEMEM for the space used by the memtuples array. Here we
|
||||
* just Assert that's correct and the memory tracking hasn't gone
|
||||
* wrong anywhere.
|
||||
*/
|
||||
for (i = state->memtupdeleted; i < state->memtupcount; i++)
|
||||
{
|
||||
FREEMEM(state, GetMemoryChunkSpace(state->memtuples[i]));
|
||||
pfree(state->memtuples[i]);
|
||||
}
|
||||
availMem += GetMemoryChunkSpace(state->memtuples[i]);
|
||||
|
||||
availMem += GetMemoryChunkSpace(state->memtuples);
|
||||
|
||||
Assert(availMem == state->allowedMem);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* clear the memory consumed by the memory tuples */
|
||||
MemoryContextReset(state->context);
|
||||
|
||||
/*
|
||||
* Zero the used memory and re-consume the space for the memtuples array.
|
||||
* This saves having to FREEMEM for each stored tuple.
|
||||
*/
|
||||
state->availMem = state->allowedMem;
|
||||
USEMEM(state, GetMemoryChunkSpace(state->memtuples));
|
||||
|
||||
state->status = TSS_INMEM;
|
||||
state->truncated = false;
|
||||
state->memtupdeleted = 0;
|
||||
@ -458,16 +489,11 @@ tuplestore_clear(Tuplestorestate *state)
|
||||
void
|
||||
tuplestore_end(Tuplestorestate *state)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (state->myfile)
|
||||
BufFileClose(state->myfile);
|
||||
if (state->memtuples)
|
||||
{
|
||||
for (i = state->memtupdeleted; i < state->memtupcount; i++)
|
||||
pfree(state->memtuples[i]);
|
||||
pfree(state->memtuples);
|
||||
}
|
||||
|
||||
MemoryContextDelete(state->context);
|
||||
pfree(state->memtuples);
|
||||
pfree(state->readptrs);
|
||||
pfree(state);
|
||||
}
|
||||
@ -1578,7 +1604,6 @@ readtup_heap(Tuplestorestate *state, unsigned int len)
|
||||
MinimalTuple tuple = (MinimalTuple) palloc(tuplen);
|
||||
char *tupbody = (char *) tuple + MINIMAL_TUPLE_DATA_OFFSET;
|
||||
|
||||
USEMEM(state, GetMemoryChunkSpace(tuple));
|
||||
/* read in the tuple proper */
|
||||
tuple->t_len = tuplen;
|
||||
BufFileReadExact(state->myfile, tupbody, tupbodylen);
|
||||
|
Loading…
x
Reference in New Issue
Block a user