Allow the use of a hash index on the subscriber during replication.
Commit 89e46da5e5 allowed using BTREE indexes that are neither PRIMARY KEY nor REPLICA IDENTITY on the subscriber during apply of update/delete. This patch extends that functionality to also allow HASH indexes. We explored supporting other index access methods as well but they don't have a fixed strategy for equality operation which is required by the current infrastructure in logical replication to scan the indexes. Author: Kuroda Hayato Reviewed-by: Peter Smith, Onder Kalaci, Amit Kapila Discussion: https://postgr.es/m/TYAPR01MB58669D7414E59664E17A5827F522A@TYAPR01MB5866.jpnprd01.prod.outlook.com
This commit is contained in:
parent
a5ea825f95
commit
edca342434
@ -134,9 +134,9 @@
|
|||||||
to replica identity <literal>FULL</literal>, which means the entire row becomes
|
to replica identity <literal>FULL</literal>, which means the entire row becomes
|
||||||
the key. When replica identity <literal>FULL</literal> is specified,
|
the key. When replica identity <literal>FULL</literal> is specified,
|
||||||
indexes can be used on the subscriber side for searching the rows. Candidate
|
indexes can be used on the subscriber side for searching the rows. Candidate
|
||||||
indexes must be btree, non-partial, and the leftmost index field must be a
|
indexes must be btree or hash, non-partial, and the leftmost index field must
|
||||||
column (not an expression) that references the published table column. These
|
be a column (not an expression) that references the published table column.
|
||||||
restrictions on the non-unique index properties adhere to some of the
|
These restrictions on the non-unique index properties adhere to some of the
|
||||||
restrictions that are enforced for primary keys. If there are no such
|
restrictions that are enforced for primary keys. If there are no such
|
||||||
suitable indexes, the search on the subscriber side can be very inefficient,
|
suitable indexes, the search on the subscriber side can be very inefficient,
|
||||||
therefore replica identity <literal>FULL</literal> should only be used as a
|
therefore replica identity <literal>FULL</literal> should only be used as a
|
||||||
|
@ -19,6 +19,7 @@
|
|||||||
#include "access/tableam.h"
|
#include "access/tableam.h"
|
||||||
#include "access/transam.h"
|
#include "access/transam.h"
|
||||||
#include "access/xact.h"
|
#include "access/xact.h"
|
||||||
|
#include "catalog/pg_am_d.h"
|
||||||
#include "commands/trigger.h"
|
#include "commands/trigger.h"
|
||||||
#include "executor/executor.h"
|
#include "executor/executor.h"
|
||||||
#include "executor/nodeModifyTable.h"
|
#include "executor/nodeModifyTable.h"
|
||||||
@ -41,6 +42,49 @@
|
|||||||
static bool tuples_equal(TupleTableSlot *slot1, TupleTableSlot *slot2,
|
static bool tuples_equal(TupleTableSlot *slot1, TupleTableSlot *slot2,
|
||||||
TypeCacheEntry **eq);
|
TypeCacheEntry **eq);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Returns the fixed strategy number, if any, of the equality operator for the
|
||||||
|
* given index access method, otherwise, InvalidStrategy.
|
||||||
|
*
|
||||||
|
* Currently, only Btree and Hash indexes are supported. The other index access
|
||||||
|
* methods don't have a fixed strategy for equality operation - instead, the
|
||||||
|
* support routines of each operator class interpret the strategy numbers
|
||||||
|
* according to the operator class's definition.
|
||||||
|
*/
|
||||||
|
StrategyNumber
|
||||||
|
get_equal_strategy_number_for_am(Oid am)
|
||||||
|
{
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
switch (am)
|
||||||
|
{
|
||||||
|
case BTREE_AM_OID:
|
||||||
|
ret = BTEqualStrategyNumber;
|
||||||
|
break;
|
||||||
|
case HASH_AM_OID:
|
||||||
|
ret = HTEqualStrategyNumber;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
/* XXX: Only Btree and Hash indexes are supported */
|
||||||
|
ret = InvalidStrategy;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Return the appropriate strategy number which corresponds to the equality
|
||||||
|
* operator.
|
||||||
|
*/
|
||||||
|
static StrategyNumber
|
||||||
|
get_equal_strategy_number(Oid opclass)
|
||||||
|
{
|
||||||
|
Oid am = get_opclass_method(opclass);
|
||||||
|
|
||||||
|
return get_equal_strategy_number_for_am(am);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Setup a ScanKey for a search in the relation 'rel' for a tuple 'key' that
|
* Setup a ScanKey for a search in the relation 'rel' for a tuple 'key' that
|
||||||
* is setup to match 'rel' (*NOT* idxrel!).
|
* is setup to match 'rel' (*NOT* idxrel!).
|
||||||
@ -77,6 +121,7 @@ build_replindex_scan_key(ScanKey skey, Relation rel, Relation idxrel,
|
|||||||
Oid opfamily;
|
Oid opfamily;
|
||||||
RegProcedure regop;
|
RegProcedure regop;
|
||||||
int table_attno = indkey->values[index_attoff];
|
int table_attno = indkey->values[index_attoff];
|
||||||
|
StrategyNumber eq_strategy;
|
||||||
|
|
||||||
if (!AttributeNumberIsValid(table_attno))
|
if (!AttributeNumberIsValid(table_attno))
|
||||||
{
|
{
|
||||||
@ -93,20 +138,22 @@ build_replindex_scan_key(ScanKey skey, Relation rel, Relation idxrel,
|
|||||||
*/
|
*/
|
||||||
optype = get_opclass_input_type(opclass->values[index_attoff]);
|
optype = get_opclass_input_type(opclass->values[index_attoff]);
|
||||||
opfamily = get_opclass_family(opclass->values[index_attoff]);
|
opfamily = get_opclass_family(opclass->values[index_attoff]);
|
||||||
|
eq_strategy = get_equal_strategy_number(opclass->values[index_attoff]);
|
||||||
|
|
||||||
operator = get_opfamily_member(opfamily, optype,
|
operator = get_opfamily_member(opfamily, optype,
|
||||||
optype,
|
optype,
|
||||||
BTEqualStrategyNumber);
|
eq_strategy);
|
||||||
|
|
||||||
if (!OidIsValid(operator))
|
if (!OidIsValid(operator))
|
||||||
elog(ERROR, "missing operator %d(%u,%u) in opfamily %u",
|
elog(ERROR, "missing operator %d(%u,%u) in opfamily %u",
|
||||||
BTEqualStrategyNumber, optype, optype, opfamily);
|
eq_strategy, optype, optype, opfamily);
|
||||||
|
|
||||||
regop = get_opcode(operator);
|
regop = get_opcode(operator);
|
||||||
|
|
||||||
/* Initialize the scankey. */
|
/* Initialize the scankey. */
|
||||||
ScanKeyInit(&skey[skey_attoff],
|
ScanKeyInit(&skey[skey_attoff],
|
||||||
index_attoff + 1,
|
index_attoff + 1,
|
||||||
BTEqualStrategyNumber,
|
eq_strategy,
|
||||||
regop,
|
regop,
|
||||||
searchslot->tts_values[table_attno - 1]);
|
searchslot->tts_values[table_attno - 1]);
|
||||||
|
|
||||||
|
@ -17,6 +17,9 @@
|
|||||||
|
|
||||||
#include "postgres.h"
|
#include "postgres.h"
|
||||||
|
|
||||||
|
#ifdef USE_ASSERT_CHECKING
|
||||||
|
#include "access/amapi.h"
|
||||||
|
#endif
|
||||||
#include "access/genam.h"
|
#include "access/genam.h"
|
||||||
#include "access/table.h"
|
#include "access/table.h"
|
||||||
#include "catalog/namespace.h"
|
#include "catalog/namespace.h"
|
||||||
@ -779,7 +782,7 @@ RemoteRelContainsLeftMostColumnOnIdx(IndexInfo *indexInfo, AttrMap *attrmap)
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* Returns the oid of an index that can be used by the apply worker to scan
|
* Returns the oid of an index that can be used by the apply worker to scan
|
||||||
* the relation. The index must be btree, non-partial, and the leftmost
|
* the relation. The index must be btree or hash, non-partial, and the leftmost
|
||||||
* field must be a column (not an expression) that references the remote
|
* field must be a column (not an expression) that references the remote
|
||||||
* relation column. These limitations help to keep the index scan similar
|
* relation column. These limitations help to keep the index scan similar
|
||||||
* to PK/RI index scans.
|
* to PK/RI index scans.
|
||||||
@ -791,11 +794,11 @@ RemoteRelContainsLeftMostColumnOnIdx(IndexInfo *indexInfo, AttrMap *attrmap)
|
|||||||
* compare the tuples for non-PK/RI index scans. See
|
* compare the tuples for non-PK/RI index scans. See
|
||||||
* RelationFindReplTupleByIndex().
|
* RelationFindReplTupleByIndex().
|
||||||
*
|
*
|
||||||
* XXX: There are no fundamental problems for supporting non-btree indexes.
|
* XXX: See IsIndexUsableForReplicaIdentityFull() to know the challenges in
|
||||||
* We mostly need to relax the limitations in RelationFindReplTupleByIndex().
|
* supporting indexes other than btree and hash. For partial indexes, the
|
||||||
* For partial indexes, the required changes are likely to be larger. If
|
* required changes are likely to be larger. If none of the tuples satisfy
|
||||||
* none of the tuples satisfy the expression for the index scan, we fall-back
|
* the expression for the index scan, we fall-back to sequential execution,
|
||||||
* to sequential execution, which might not be a good idea in some cases.
|
* which might not be a good idea in some cases.
|
||||||
*
|
*
|
||||||
* We expect to call this function when REPLICA IDENTITY FULL is defined for
|
* We expect to call this function when REPLICA IDENTITY FULL is defined for
|
||||||
* the remote relation.
|
* the remote relation.
|
||||||
@ -834,15 +837,43 @@ FindUsableIndexForReplicaIdentityFull(Relation localrel, AttrMap *attrmap)
|
|||||||
/*
|
/*
|
||||||
* Returns true if the index is usable for replica identity full. For details,
|
* Returns true if the index is usable for replica identity full. For details,
|
||||||
* see FindUsableIndexForReplicaIdentityFull.
|
* see FindUsableIndexForReplicaIdentityFull.
|
||||||
|
*
|
||||||
|
* Currently, only Btree and Hash indexes can be returned as usable. This
|
||||||
|
* is due to following reasons:
|
||||||
|
*
|
||||||
|
* 1) Other index access methods don't have a fixed strategy for equality
|
||||||
|
* operation. Refer get_equal_strategy_number_for_am().
|
||||||
|
*
|
||||||
|
* 2) For indexes other than PK and REPLICA IDENTITY, we need to match the
|
||||||
|
* local and remote tuples. The equality routine tuples_equal() cannot accept
|
||||||
|
* a datatype (e.g. point or box) that does not have a default operator class
|
||||||
|
* for Btree or Hash.
|
||||||
|
*
|
||||||
|
* XXX: Note that BRIN and GIN indexes do not implement "amgettuple" which
|
||||||
|
* will be used later to fetch the tuples. See RelationFindReplTupleByIndex().
|
||||||
*/
|
*/
|
||||||
bool
|
bool
|
||||||
IsIndexUsableForReplicaIdentityFull(IndexInfo *indexInfo)
|
IsIndexUsableForReplicaIdentityFull(IndexInfo *indexInfo)
|
||||||
{
|
{
|
||||||
bool is_btree = (indexInfo->ii_Am == BTREE_AM_OID);
|
/* Ensure that the index access method has a valid equal strategy */
|
||||||
bool is_partial = (indexInfo->ii_Predicate != NIL);
|
if (get_equal_strategy_number_for_am(indexInfo->ii_Am) == InvalidStrategy)
|
||||||
bool is_only_on_expression = IsIndexOnlyOnExpression(indexInfo);
|
return false;
|
||||||
|
if (indexInfo->ii_Predicate != NIL)
|
||||||
|
return false;
|
||||||
|
if (IsIndexOnlyOnExpression(indexInfo))
|
||||||
|
return false;
|
||||||
|
|
||||||
return is_btree && !is_partial && !is_only_on_expression;
|
#ifdef USE_ASSERT_CHECKING
|
||||||
|
{
|
||||||
|
IndexAmRoutine *amroutine;
|
||||||
|
|
||||||
|
/* The given index access method must implement amgettuple. */
|
||||||
|
amroutine = GetIndexAmRoutineByAmId(indexInfo->ii_Am, false);
|
||||||
|
Assert(amroutine->amgettuple != NULL);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
22
src/backend/utils/cache/lsyscache.c
vendored
22
src/backend/utils/cache/lsyscache.c
vendored
@ -1255,6 +1255,28 @@ get_opclass_opfamily_and_input_type(Oid opclass, Oid *opfamily, Oid *opcintype)
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* get_opclass_method
|
||||||
|
*
|
||||||
|
* Returns the OID of the index access method the opclass belongs to.
|
||||||
|
*/
|
||||||
|
Oid
|
||||||
|
get_opclass_method(Oid opclass)
|
||||||
|
{
|
||||||
|
HeapTuple tp;
|
||||||
|
Form_pg_opclass cla_tup;
|
||||||
|
Oid result;
|
||||||
|
|
||||||
|
tp = SearchSysCache1(CLAOID, ObjectIdGetDatum(opclass));
|
||||||
|
if (!HeapTupleIsValid(tp))
|
||||||
|
elog(ERROR, "cache lookup failed for opclass %u", opclass);
|
||||||
|
cla_tup = (Form_pg_opclass) GETSTRUCT(tp);
|
||||||
|
|
||||||
|
result = cla_tup->opcmethod;
|
||||||
|
ReleaseSysCache(tp);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
/* ---------- OPERATOR CACHE ---------- */
|
/* ---------- OPERATOR CACHE ---------- */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -646,6 +646,7 @@ extern void check_exclusion_constraint(Relation heap, Relation index,
|
|||||||
/*
|
/*
|
||||||
* prototypes from functions in execReplication.c
|
* prototypes from functions in execReplication.c
|
||||||
*/
|
*/
|
||||||
|
extern StrategyNumber get_equal_strategy_number_for_am(Oid am);
|
||||||
extern bool RelationFindReplTupleByIndex(Relation rel, Oid idxoid,
|
extern bool RelationFindReplTupleByIndex(Relation rel, Oid idxoid,
|
||||||
LockTupleMode lockmode,
|
LockTupleMode lockmode,
|
||||||
TupleTableSlot *searchslot,
|
TupleTableSlot *searchslot,
|
||||||
|
@ -106,6 +106,7 @@ extern Oid get_opclass_family(Oid opclass);
|
|||||||
extern Oid get_opclass_input_type(Oid opclass);
|
extern Oid get_opclass_input_type(Oid opclass);
|
||||||
extern bool get_opclass_opfamily_and_input_type(Oid opclass,
|
extern bool get_opclass_opfamily_and_input_type(Oid opclass,
|
||||||
Oid *opfamily, Oid *opcintype);
|
Oid *opfamily, Oid *opcintype);
|
||||||
|
extern Oid get_opclass_method(Oid opclass);
|
||||||
extern RegProcedure get_opcode(Oid opno);
|
extern RegProcedure get_opcode(Oid opno);
|
||||||
extern char *get_opname(Oid opno);
|
extern char *get_opname(Oid opno);
|
||||||
extern Oid get_op_rettype(Oid opno);
|
extern Oid get_op_rettype(Oid opno);
|
||||||
|
@ -478,6 +478,74 @@ $node_subscriber->safe_psql('postgres', "DROP TABLE test_replica_id_full");
|
|||||||
# data
|
# data
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# Testcase start: Subscription can use hash index
|
||||||
|
#
|
||||||
|
|
||||||
|
# create tables on pub and sub
|
||||||
|
$node_publisher->safe_psql('postgres',
|
||||||
|
"CREATE TABLE test_replica_id_full (x int, y text)");
|
||||||
|
$node_publisher->safe_psql('postgres',
|
||||||
|
"ALTER TABLE test_replica_id_full REPLICA IDENTITY FULL");
|
||||||
|
$node_subscriber->safe_psql('postgres',
|
||||||
|
"CREATE TABLE test_replica_id_full (x int, y text)");
|
||||||
|
$node_subscriber->safe_psql('postgres',
|
||||||
|
"CREATE INDEX test_replica_id_full_idx ON test_replica_id_full USING HASH (x)");
|
||||||
|
|
||||||
|
# insert some initial data
|
||||||
|
$node_publisher->safe_psql('postgres',
|
||||||
|
"INSERT INTO test_replica_id_full SELECT i, (i%10)::text FROM generate_series(0,10) i"
|
||||||
|
);
|
||||||
|
|
||||||
|
# create pub/sub
|
||||||
|
$node_publisher->safe_psql('postgres',
|
||||||
|
"CREATE PUBLICATION tap_pub_rep_full FOR TABLE test_replica_id_full");
|
||||||
|
$node_subscriber->safe_psql('postgres',
|
||||||
|
"CREATE SUBSCRIPTION tap_sub_rep_full CONNECTION '$publisher_connstr application_name=$appname' PUBLICATION tap_pub_rep_full"
|
||||||
|
);
|
||||||
|
|
||||||
|
# wait for initial table synchronization to finish
|
||||||
|
$node_subscriber->wait_for_subscription_sync($node_publisher, $appname);
|
||||||
|
|
||||||
|
# delete 2 rows
|
||||||
|
$node_publisher->safe_psql('postgres',
|
||||||
|
"DELETE FROM test_replica_id_full WHERE x IN (5, 6)");
|
||||||
|
|
||||||
|
# update 2 rows
|
||||||
|
$node_publisher->safe_psql('postgres',
|
||||||
|
"UPDATE test_replica_id_full SET x = 100, y = '200' WHERE x IN (1, 2)");
|
||||||
|
|
||||||
|
# wait until the index is used on the subscriber
|
||||||
|
$node_publisher->wait_for_catchup($appname);
|
||||||
|
$node_subscriber->poll_query_until('postgres',
|
||||||
|
q{select (idx_scan = 4) from pg_stat_all_indexes where indexrelname = 'test_replica_id_full_idx';}
|
||||||
|
)
|
||||||
|
or die
|
||||||
|
"Timed out while waiting for check subscriber tap_sub_rep_full deletes 2 rows and updates 2 rows via index";
|
||||||
|
|
||||||
|
# make sure that the subscriber has the correct data after the UPDATE
|
||||||
|
$result = $node_subscriber->safe_psql('postgres',
|
||||||
|
"select count(*) from test_replica_id_full WHERE (x = 100 and y = '200')"
|
||||||
|
);
|
||||||
|
is($result, qq(2),
|
||||||
|
'ensure subscriber has the correct data at the end of the test');
|
||||||
|
|
||||||
|
# make sure that the subscriber has the correct data after the first DELETE
|
||||||
|
$result = $node_subscriber->safe_psql('postgres',
|
||||||
|
"select count(*) from test_replica_id_full where x in (5, 6)");
|
||||||
|
is($result, qq(0),
|
||||||
|
'ensure subscriber has the correct data at the end of the test');
|
||||||
|
|
||||||
|
# cleanup pub
|
||||||
|
$node_publisher->safe_psql('postgres', "DROP PUBLICATION tap_pub_rep_full");
|
||||||
|
$node_publisher->safe_psql('postgres', "DROP TABLE test_replica_id_full");
|
||||||
|
# cleanup sub
|
||||||
|
$node_subscriber->safe_psql('postgres', "DROP SUBSCRIPTION tap_sub_rep_full");
|
||||||
|
$node_subscriber->safe_psql('postgres', "DROP TABLE test_replica_id_full");
|
||||||
|
|
||||||
|
# Testcase end: Subscription can use hash index
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
$node_subscriber->stop('fast');
|
$node_subscriber->stop('fast');
|
||||||
$node_publisher->stop('fast');
|
$node_publisher->stop('fast');
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user