diff --git a/src/backend/access/heap/tuptoaster.c b/src/backend/access/heap/tuptoaster.c index 3ba37f6e88..28b5a20ae7 100644 --- a/src/backend/access/heap/tuptoaster.c +++ b/src/backend/access/heap/tuptoaster.c @@ -76,7 +76,8 @@ do { \ static void toast_delete_datum(Relation rel, Datum value); static Datum toast_save_datum(Relation rel, Datum value, struct varlena *oldexternal, int options); -static bool toast_valueid_exists(Oid toastrelid, Oid valueid); +static bool toastrel_valueid_exists(Relation toastrel, Oid valueid); +static bool toastid_valueid_exists(Oid toastrelid, Oid valueid); static struct varlena *toast_fetch_datum(struct varlena * attr); static struct varlena *toast_fetch_datum_slice(struct varlena * attr, int32 sliceoffset, int32 length); @@ -1342,7 +1343,34 @@ toast_save_datum(Relation rel, Datum value, /* Must copy to access aligned fields */ VARATT_EXTERNAL_GET_POINTER(old_toast_pointer, oldexternal); if (old_toast_pointer.va_toastrelid == rel->rd_toastoid) + { + /* This value came from the old toast table; reuse its OID */ toast_pointer.va_valueid = old_toast_pointer.va_valueid; + + /* + * There is a corner case here: the table rewrite might have + * to copy both live and recently-dead versions of a row, and + * those versions could easily reference the same toast value. + * When we copy the second or later version of such a row, + * reusing the OID will mean we select an OID that's already + * in the new toast table. Check for that, and if so, just + * fall through without writing the data again. + * + * While annoying and ugly-looking, this is a good thing + * because it ensures that we wind up with only one copy of + * the toast value when there is only one copy in the old + * toast table. Before we detected this case, we'd have made + * multiple copies, wasting space; and what's worse, the + * copies belonging to already-deleted heap tuples would not + * be reclaimed by VACUUM. + */ + if (toastrel_valueid_exists(toastrel, + toast_pointer.va_valueid)) + { + /* Match, so short-circuit the data storage loop below */ + data_todo = 0; + } + } } if (toast_pointer.va_valueid == InvalidOid) { @@ -1356,8 +1384,8 @@ toast_save_datum(Relation rel, Datum value, GetNewOidWithIndex(toastrel, RelationGetRelid(toastidx), (AttrNumber) 1); - } while (toast_valueid_exists(rel->rd_toastoid, - toast_pointer.va_valueid)); + } while (toastid_valueid_exists(rel->rd_toastoid, + toast_pointer.va_valueid)); } } @@ -1495,24 +1523,18 @@ toast_delete_datum(Relation rel, Datum value) /* ---------- - * toast_valueid_exists - + * toastrel_valueid_exists - * * Test whether a toast value with the given ID exists in the toast relation * ---------- */ static bool -toast_valueid_exists(Oid toastrelid, Oid valueid) +toastrel_valueid_exists(Relation toastrel, Oid valueid) { bool result = false; - Relation toastrel; ScanKeyData toastkey; SysScanDesc toastscan; - /* - * Open the toast relation - */ - toastrel = heap_open(toastrelid, AccessShareLock); - /* * Setup a scan key to find chunks with matching va_valueid */ @@ -1530,10 +1552,27 @@ toast_valueid_exists(Oid toastrelid, Oid valueid) if (systable_getnext(toastscan) != NULL) result = true; - /* - * End scan and close relations - */ systable_endscan(toastscan); + + return result; +} + +/* ---------- + * toastid_valueid_exists - + * + * As above, but work from toast rel's OID not an open relation + * ---------- + */ +static bool +toastid_valueid_exists(Oid toastrelid, Oid valueid) +{ + bool result; + Relation toastrel; + + toastrel = heap_open(toastrelid, AccessShareLock); + + result = toastrel_valueid_exists(toastrel, valueid); + heap_close(toastrel, AccessShareLock); return result; diff --git a/src/backend/commands/cluster.c b/src/backend/commands/cluster.c index 997449ef29..9408f259a6 100644 --- a/src/backend/commands/cluster.c +++ b/src/backend/commands/cluster.c @@ -787,16 +787,19 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, * When doing swap by content, any toast pointers written into NewHeap * must use the old toast table's OID, because that's where the toast * data will eventually be found. Set this up by setting rd_toastoid. - * This also tells tuptoaster.c to preserve the toast value OIDs, - * which we want so as not to invalidate toast pointers in system - * catalog caches. + * This also tells toast_save_datum() to preserve the toast value + * OIDs, which we want so as not to invalidate toast pointers in + * system catalog caches, and to avoid making multiple copies of a + * single toast value. * * Note that we must hold NewHeap open until we are done writing data, * since the relcache will not guarantee to remember this setting once * the relation is closed. Also, this technique depends on the fact * that no one will try to read from the NewHeap until after we've * finished writing it and swapping the rels --- otherwise they could - * follow the toast pointers to the wrong place. + * follow the toast pointers to the wrong place. (It would actually + * work for values copied over from the old toast table, but not for + * any values that we toast which were previously not toasted.) */ NewHeap->rd_toastoid = OldHeap->rd_rel->reltoastrelid; } diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h index 0e6286ba20..d404c2adb5 100644 --- a/src/include/utils/rel.h +++ b/src/include/utils/rel.h @@ -159,7 +159,8 @@ typedef struct RelationData * have the existing toast table's OID, not the OID of the transient toast * table. If rd_toastoid isn't InvalidOid, it is the OID to place in * toast pointers inserted into this rel. (Note it's set on the new - * version of the main heap, not the toast table itself.) + * version of the main heap, not the toast table itself.) This also + * causes toast_save_datum() to try to preserve toast value OIDs. */ Oid rd_toastoid; /* Real TOAST table's OID, or InvalidOid */