Remove wal_sender_delay GUC, because it's no longer useful.

The latch infrastructure is now capable of detecting all cases where the walsender loop needs to wake up, so there is no reason to have an arbitrary timeout. Also, modify the walsender loop logic to follow the standard pattern of ResetLatch, test for work to do, WaitLatch. The previous coding was both hard to follow and buggy: it would sometimes busy-loop despite having nothing available to do, eg between receipt of a signal and the next time it was caught up with new WAL, and it also had interesting choices like deciding to update to WALSNDSTATE_STREAMING on the strength of information known to be obsolete.
2011-08-10 18:50:28 -04:00 · 2011-08-10 18:50:28 -04:00 · cff75130b5
parent 79b2ee20c8
commit cff75130b5
5 changed files with 74 additions and 108 deletions
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@ -2008,29 +2008,6 @@ SET ENABLE_SEQSCAN TO OFF;
       </para>
       </listitem>
      </varlistentry>
-      <varlistentry id="guc-wal-sender-delay" xreflabel="wal_sender_delay">
-       <term><varname>wal_sender_delay</varname> (<type>integer</type>)</term>
-       <indexterm>
-        <primary><varname>wal_sender_delay</> configuration parameter</primary>
-       </indexterm>
-       <listitem>
-       <para>
-        Specifies the delay between activity rounds for WAL sender processes.
-        In each round the WAL sender sends any WAL accumulated since the last
-        round to the standby server. It then sleeps for
-        <varname>wal_sender_delay</> milliseconds, and repeats. The sleep
-        is interrupted by transaction commit, so the effects of a committed
-        transaction are sent to standby servers as soon as the commit
-        happens, regardless of this setting. The default value is one second
-        (<literal>1s</>).
-        Note that on many systems, the effective resolution of sleep delays is
-        10 milliseconds; setting <varname>wal_sender_delay</> to a value that
-        is not a multiple of 10 might have the same results as setting it to
-        the next higher multiple of 10. This parameter can only be set in the
-        <filename>postgresql.conf</> file or on the server command line.
-       </para>
-       </listitem>
-      </varlistentry>

      <varlistentry id="guc-wal-keep-segments" xreflabel="wal_keep_segments">
       <term><varname>wal_keep_segments</varname> (<type>integer</type>)</term>
--- a/src/backend/replication/walsender.c
+++ b/src/backend/replication/walsender.c
@ -75,7 +75,6 @@ bool		am_cascading_walsender = false;	/* Am I cascading WAL to another standby ?

 /* User-settable parameters for walsender */
 int			max_wal_senders = 0;	/* the maximum number of concurrent walsenders */
-int			WalSndDelay = 1000; /* max sleep time between some actions */
 int			replication_timeout = 60 * 1000;	/* maximum time to send one
 												 * WAL data message */

@ -475,7 +474,7 @@ ProcessRepliesIfAny(void)
 {
 	unsigned char firstchar;
 	int			r;
-	int			received = false;
+	bool		received = false;

 	for (;;)
 	{
@ -709,6 +708,9 @@ WalSndLoop(void)
 	/* Loop forever, unless we get an error */
 	for (;;)
 	{
+		/* Clear any already-pending wakeups */
+		ResetLatch(&MyWalSnd->latch);
+
 		/*
 		 * Emergency bailout if postmaster has died.  This is to avoid the
 		 * necessity for manual cleanup of all postmaster children.
@ -727,63 +729,87 @@ WalSndLoop(void)
 		/* Normal exit from the walsender is here */
 		if (walsender_shutdown_requested)
 		{
-			/* Inform the standby that XLOG streaming was done */
+			/* Inform the standby that XLOG streaming is done */
 			pq_puttextmessage('C', "COPY 0");
 			pq_flush();

 			proc_exit(0);
 		}

+		/* Check for input from the client */
+		ProcessRepliesIfAny();
+
 		/*
 		 * If we don't have any pending data in the output buffer, try to send
-		 * some more.
+		 * some more.  If there is some, we don't bother to call XLogSend
+		 * again until we've flushed it ... but we'd better assume we are not
+		 * caught up.
 		 */
 		if (!pq_is_send_pending())
-		{
 			XLogSend(output_message, &caughtup);
+		else
+			caughtup = false;

-			/*
-			 * Even if we wrote all the WAL that was available when we started
-			 * sending, more might have arrived while we were sending this
-			 * batch. We had the latch set while sending, so we have not
-			 * received any signals from that time. Let's arm the latch again,
-			 * and after that check that we're still up-to-date.
-			 */
-			if (caughtup && !pq_is_send_pending())
-			{
-				ResetLatch(&MyWalSnd->latch);
-
-				XLogSend(output_message, &caughtup);
-			}
-		}
-
-		/* Flush pending output to the client */
+		/* Try to flush pending output to the client */
 		if (pq_flush_if_writable() != 0)
 			break;

+		/* If nothing remains to be sent right now ... */
+		if (caughtup && !pq_is_send_pending())
+		{
+			/*
+			 * If we're in catchup state, move to streaming.  This is an
+			 * important state change for users to know about, since before
+			 * this point data loss might occur if the primary dies and we
+			 * need to failover to the standby. The state change is also
+			 * important for synchronous replication, since commits that
+			 * started to wait at that point might wait for some time.
+			 */
+			if (MyWalSnd->state == WALSNDSTATE_CATCHUP)
+			{
+				ereport(DEBUG1,
+						(errmsg("standby \"%s\" has now caught up with primary",
+								application_name)));
+				WalSndSetState(WALSNDSTATE_STREAMING);
+			}
+
 			/*
 			 * When SIGUSR2 arrives, we send any outstanding logs up to the
 			 * shutdown checkpoint record (i.e., the latest record) and exit.
 			 * This may be a normal termination at shutdown, or a promotion,
 			 * the walsender is not sure which.
 			 */
-		if (walsender_ready_to_stop && !pq_is_send_pending())
+			if (walsender_ready_to_stop)
 			{
+				/* ... let's just be real sure we're caught up ... */
 				XLogSend(output_message, &caughtup);
-			ProcessRepliesIfAny();
 				if (caughtup && !pq_is_send_pending())
+				{
 					walsender_shutdown_requested = true;
+					continue;		/* don't want to wait more */
+				}
+			}
 		}

-		if ((caughtup || pq_is_send_pending()) &&
-			!got_SIGHUP &&
-			!walsender_shutdown_requested)
+		/*
+		 * We don't block if not caught up, unless there is unsent data
+		 * pending in which case we'd better block until the socket is
+		 * write-ready.  This test is only needed for the case where XLogSend
+		 * loaded a subset of the available data but then pq_flush_if_writable
+		 * flushed it all --- we should immediately try to send more.
+		 */
+		if (caughtup || pq_is_send_pending())
 		{
 			TimestampTz finish_time = 0;
-			long		sleeptime;
+			long		sleeptime = -1;
 			int			wakeEvents;

-			/* Reschedule replication timeout */
+			wakeEvents = WL_LATCH_SET | WL_POSTMASTER_DEATH |
+				WL_SOCKET_READABLE;
+			if (pq_is_send_pending())
+				wakeEvents |= WL_SOCKET_WRITEABLE;
+
+			/* Determine time until replication timeout */
 			if (replication_timeout > 0)
 			{
 				long		secs;
@ -794,27 +820,21 @@ WalSndLoop(void)
 				TimestampDifference(GetCurrentTimestamp(),
 									finish_time, &secs, &usecs);
 				sleeptime = secs * 1000 + usecs / 1000;
-				if (WalSndDelay < sleeptime)
-					sleeptime = WalSndDelay;
-			}
-			else
-			{
-				/*
-				 * XXX: Without timeout, we don't really need the periodic
-				 * wakeups anymore, WaitLatchOrSocket should reliably wake up
-				 * as soon as something interesting happens.
-				 */
-				sleeptime = WalSndDelay;
+				/* Avoid Assert in WaitLatchOrSocket if timeout is past */
+				if (sleeptime < 0)
+					sleeptime = 0;
+				wakeEvents |= WL_TIMEOUT;
 			}

-			/* Sleep */
-			wakeEvents  = WL_LATCH_SET | WL_SOCKET_READABLE | WL_TIMEOUT;
-			if (pq_is_send_pending())
-				wakeEvents |= WL_SOCKET_WRITEABLE;
+			/* Sleep until something happens or replication timeout */
 			WaitLatchOrSocket(&MyWalSnd->latch, wakeEvents,
 							  MyProcPort->sock, sleeptime);

-			/* Check for replication timeout */
+			/*
+			 * Check for replication timeout.  Note we ignore the corner case
+			 * possibility that the client replied just as we reached the
+			 * timeout ... he's supposed to reply *before* that.
+			 */
 			if (replication_timeout > 0 &&
 				GetCurrentTimestamp() >= finish_time)
 			{
@ -828,24 +848,6 @@ WalSndLoop(void)
 				break;
 			}
 		}
-
-		/*
-		 * If we're in catchup state, see if its time to move to streaming.
-		 * This is an important state change for users, since before this
-		 * point data loss might occur if the primary dies and we need to
-		 * failover to the standby. The state change is also important for
-		 * synchronous replication, since commits that started to wait at that
-		 * point might wait for some time.
-		 */
-		if (MyWalSnd->state == WALSNDSTATE_CATCHUP && caughtup)
-		{
-			ereport(DEBUG1,
-					(errmsg("standby \"%s\" has now caught up with primary",
-							application_name)));
-			WalSndSetState(WALSNDSTATE_STREAMING);
-		}
-
-		ProcessRepliesIfAny();
 	}

 	/*
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@ -1997,17 +1997,6 @@ static struct config_int ConfigureNamesInt[] =
 		NULL, NULL, NULL
 	},

-	{
-		{"wal_sender_delay", PGC_SIGHUP, REPLICATION_SENDING,
-			gettext_noop("WAL sender sleep time between WAL replications."),
-			NULL,
-			GUC_UNIT_MS
-		},
-		&WalSndDelay,
-		1000, 1, 10000,
-		NULL, NULL, NULL
-	},
-
 	{
 		{"replication_timeout", PGC_SIGHUP, REPLICATION_SENDING,
 			gettext_noop("Sets the maximum time to wait for WAL replication."),
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@ -200,7 +200,6 @@

 #max_wal_senders = 0		# max number of walsender processes
 				# (change requires restart)
-#wal_sender_delay = 1s		# walsender cycle time, 1-10000 milliseconds
 #wal_keep_segments = 0		# in logfile segments, 16MB each; 0 disables
 #replication_timeout = 60s	# in milliseconds; 0 disables

--- a/src/include/replication/walsender.h
+++ b/src/include/replication/walsender.h
@ -98,7 +98,6 @@ extern volatile sig_atomic_t walsender_shutdown_requested;
 extern volatile sig_atomic_t walsender_ready_to_stop;

 /* user-settable parameters */
-extern int	WalSndDelay;
 extern int	max_wal_senders;
 extern int	replication_timeout;