From 1ce34519c79e9eed1b6b8dd2f47ae95564e739d7 Mon Sep 17 00:00:00 2001 From: Amit Kapila Date: Fri, 24 Jun 2022 08:37:26 +0530 Subject: [PATCH] Doc: clarify that tablesync ignores publish operation. This patch documents that the initial data synchronization (tablesync) for logical replication does not take into account the publication 'publish' parameter when copying the existing table data. Author: Peter Smith Reviewed-by: Shi yu, Euler Taveira, Robert Haas, Amit Kapila Discussion: https://postgr.es/m/CAHut+PtbfALjFpS2MkrvQ+wWQKByP7CNh9RtFta-r=BHEU3S3w@mail.gmail.com --- doc/src/sgml/logical-replication.sgml | 218 +++++++++++++++++++++- doc/src/sgml/ref/create_publication.sgml | 6 + doc/src/sgml/ref/create_subscription.sgml | 4 + 3 files changed, 225 insertions(+), 3 deletions(-) diff --git a/doc/src/sgml/logical-replication.sgml b/doc/src/sgml/logical-replication.sgml index 145ea71d61..bdf1e7b727 100644 --- a/doc/src/sgml/logical-replication.sgml +++ b/doc/src/sgml/logical-replication.sgml @@ -117,9 +117,10 @@ Publications can choose to limit the changes they produce to any combination of INSERT, UPDATE, DELETE, and TRUNCATE, similar to how triggers are fired by - particular event types. By default, all operation types are replicated. - (Row filters have no effect for TRUNCATE. See - ). + particular event types. By default, all operation types are replicated. + These publication specifications apply only for DML operations; they do not affect the initial + data synchronization copy. (Row filters have no effect for + TRUNCATE. See ). @@ -317,6 +318,200 @@ + + + Examples + + + Create some test tables on the publisher. + +test_pub=# CREATE TABLE t1(a int, b text, PRIMARY KEY(a)); +CREATE TABLE +test_pub=# CREATE TABLE t2(c int, d text, PRIMARY KEY(c)); +CREATE TABLE +test_pub=# CREATE TABLE t3(e int, f text, PRIMARY KEY(e)); +CREATE TABLE + + + + Create the same tables on the subscriber. + +test_sub=# CREATE TABLE t1(a int, b text, PRIMARY KEY(a)); +CREATE TABLE +test_sub=# CREATE TABLE t2(c int, d text, PRIMARY KEY(c)); +CREATE TABLE +test_sub=# CREATE TABLE t3(e int, f text, PRIMARY KEY(e)); +CREATE TABLE + + + + Insert data to the tables at the publisher side. + +test_pub=# INSERT INTO t1 VALUES (1, 'one'), (2, 'two'), (3, 'three'); +INSERT 0 3 +test_pub=# INSERT INTO t2 VALUES (1, 'A'), (2, 'B'), (3, 'C'); +INSERT 0 3 +test_pub=# INSERT INTO t3 VALUES (1, 'i'), (2, 'ii'), (3, 'iii'); +INSERT 0 3 + + + + Create publications for the tables. The publications pub2 + and pub3a disallow some publish + operations. The publication pub3b has a row filter (see + ). + +test_pub=# CREATE PUBLICATION pub1 FOR TABLE t1; +CREATE PUBLICATION +test_pub=# CREATE PUBLICATION pub2 FOR TABLE t2 WITH (publish = 'truncate'); +CREATE PUBLICATION +test_pub=# CREATE PUBLICATION pub3a FOR TABLE t3 WITH (publish = 'truncate'); +CREATE PUBLICATION +test_pub=# CREATE PUBLICATION pub3b FOR TABLE t3 WHERE (e > 5); +CREATE PUBLICATION + + + + Create subscriptions for the publications. The subscription + sub3 subscribes to both pub3a and + pub3b. All subscriptions will copy initial data by default. + +test_sub=# CREATE SUBSCRIPTION sub1 +test_sub-# CONNECTION 'host=localhost dbname=test_pub application_name=sub1' +test_sub-# PUBLICATION pub1; +CREATE SUBSCRIPTION +test_sub=# CREATE SUBSCRIPTION sub2 +test_sub-# CONNECTION 'host=localhost dbname=test_pub application_name=sub2' +test_sub-# PUBLICATION pub2; +CREATE SUBSCRIPTION +test_sub=# CREATE SUBSCRIPTION sub3 +test_sub-# CONNECTION 'host=localhost dbname=test_pub application_name=sub3' +test_sub-# PUBLICATION pub3a, pub3b; +CREATE SUBSCRIPTION + + + + Observe that initial table data is copied, regardless of the + publish operation of the publication. + +test_sub=# SELECT * FROM t1; + a | b +---+------- + 1 | one + 2 | two + 3 | three +(3 rows) + +test_sub=# SELECT * FROM t2; + c | d +---+--- + 1 | A + 2 | B + 3 | C +(3 rows) + + + + Furthermore, because the initial data copy ignores the publish + operation, and because publication pub3a has no row filter, + it means the copied table t3 contains all rows even when + they do not match the row filter of publication pub3b. + +test_sub=# SELECT * FROM t3; + e | f +---+----- + 1 | i + 2 | ii + 3 | iii +(3 rows) + + + + Insert more data to the tables at the publisher side. + +test_pub=# INSERT INTO t1 VALUES (4, 'four'), (5, 'five'), (6, 'six'); +INSERT 0 3 +test_pub=# INSERT INTO t2 VALUES (4, 'D'), (5, 'E'), (6, 'F'); +INSERT 0 3 +test_pub=# INSERT INTO t3 VALUES (4, 'iv'), (5, 'v'), (6, 'vi'); +INSERT 0 3 + + + + Now the publisher side data looks like: + +test_pub=# SELECT * FROM t1; + a | b +---+------- + 1 | one + 2 | two + 3 | three + 4 | four + 5 | five + 6 | six +(6 rows) + +test_pub=# SELECT * FROM t2; + c | d +---+--- + 1 | A + 2 | B + 3 | C + 4 | D + 5 | E + 6 | F +(6 rows) + +test_pub=# SELECT * FROM t3; + e | f +---+----- + 1 | i + 2 | ii + 3 | iii + 4 | iv + 5 | v + 6 | vi +(6 rows) + + + + Observe that during normal replication the appropriate + publish operations are used. This means publications + pub2 and pub3a will not replicate the + INSERT. Also, publication pub3b will + only replicate data that matches the row filter of pub3b. + Now the subscriber side data looks like: + +test_sub=# SELECT * FROM t1; + a | b +---+------- + 1 | one + 2 | two + 3 | three + 4 | four + 5 | five + 6 | six +(6 rows) + +test_sub=# SELECT * FROM t2; + c | d +---+--- + 1 | A + 2 | B + 3 | C +(3 rows) + +test_sub=# SELECT * FROM t3; + e | f +---+----- + 1 | i + 2 | ii + 3 | iii + 6 | vi +(4 rows) + + + @@ -461,6 +656,16 @@ for details. + + + Because initial data synchronization does not take into account the + publish parameter when copying existing table data, + some rows may be copied that would not be replicated using DML. Refer to + , and see + for examples. + + + If the subscriber is in a release prior to 15, copy pre-existing data @@ -1095,6 +1300,13 @@ CONTEXT: processing remote data for replication origin "pg_16395" during "INSER replication of the table is given back to the main apply process where replication continues as normal. + + + The publication publish parameter only affects what + DML operations will be replicated. The initial data synchronization does + not take this parameter into account when copying the existing table data. + + diff --git a/doc/src/sgml/ref/create_publication.sgml b/doc/src/sgml/ref/create_publication.sgml index 1a828e8d2f..5790d76270 100644 --- a/doc/src/sgml/ref/create_publication.sgml +++ b/doc/src/sgml/ref/create_publication.sgml @@ -177,6 +177,12 @@ CREATE PUBLICATION name and so the default value for this option is 'insert, update, delete, truncate'. + + This parameter only affects DML operations. In particular, the initial + data synchronization (see ) + for logical replication does not take this parameter into account when + copying existing table data. + diff --git a/doc/src/sgml/ref/create_subscription.sgml b/doc/src/sgml/ref/create_subscription.sgml index 35b39c28da..34b3264b26 100644 --- a/doc/src/sgml/ref/create_subscription.sgml +++ b/doc/src/sgml/ref/create_subscription.sgml @@ -353,6 +353,10 @@ CREATE SUBSCRIPTION subscription_namepublish parameter when copying existing table data, some rows + may be copied that would not be replicated using DML. See + for examples.