Remove wal_sender_delay GUC, because it's no longer useful.

The latch infrastructure is now capable of detecting all cases where the
walsender loop needs to wake up, so there is no reason to have an arbitrary
timeout.

Also, modify the walsender loop logic to follow the standard pattern of
ResetLatch, test for work to do, WaitLatch.  The previous coding was both
hard to follow and buggy: it would sometimes busy-loop despite having
nothing available to do, eg between receipt of a signal and the next time
it was caught up with new WAL, and it also had interesting choices like
deciding to update to WALSNDSTATE_STREAMING on the strength of information
known to be obsolete.
This commit is contained in:
Tom Lane 2011-08-10 18:50:28 -04:00
parent 79b2ee20c8
commit cff75130b5
5 changed files with 74 additions and 108 deletions

View File

@ -2008,29 +2008,6 @@ SET ENABLE_SEQSCAN TO OFF;
</para>
</listitem>
</varlistentry>
<varlistentry id="guc-wal-sender-delay" xreflabel="wal_sender_delay">
<term><varname>wal_sender_delay</varname> (<type>integer</type>)</term>
<indexterm>
<primary><varname>wal_sender_delay</> configuration parameter</primary>
</indexterm>
<listitem>
<para>
Specifies the delay between activity rounds for WAL sender processes.
In each round the WAL sender sends any WAL accumulated since the last
round to the standby server. It then sleeps for
<varname>wal_sender_delay</> milliseconds, and repeats. The sleep
is interrupted by transaction commit, so the effects of a committed
transaction are sent to standby servers as soon as the commit
happens, regardless of this setting. The default value is one second
(<literal>1s</>).
Note that on many systems, the effective resolution of sleep delays is
10 milliseconds; setting <varname>wal_sender_delay</> to a value that
is not a multiple of 10 might have the same results as setting it to
the next higher multiple of 10. This parameter can only be set in the
<filename>postgresql.conf</> file or on the server command line.
</para>
</listitem>
</varlistentry>
<varlistentry id="guc-wal-keep-segments" xreflabel="wal_keep_segments">
<term><varname>wal_keep_segments</varname> (<type>integer</type>)</term>

View File

@ -75,7 +75,6 @@ bool am_cascading_walsender = false; /* Am I cascading WAL to another standby ?
/* User-settable parameters for walsender */
int max_wal_senders = 0; /* the maximum number of concurrent walsenders */
int WalSndDelay = 1000; /* max sleep time between some actions */
int replication_timeout = 60 * 1000; /* maximum time to send one
* WAL data message */
@ -475,7 +474,7 @@ ProcessRepliesIfAny(void)
{
unsigned char firstchar;
int r;
int received = false;
bool received = false;
for (;;)
{
@ -709,6 +708,9 @@ WalSndLoop(void)
/* Loop forever, unless we get an error */
for (;;)
{
/* Clear any already-pending wakeups */
ResetLatch(&MyWalSnd->latch);
/*
* Emergency bailout if postmaster has died. This is to avoid the
* necessity for manual cleanup of all postmaster children.
@ -727,63 +729,87 @@ WalSndLoop(void)
/* Normal exit from the walsender is here */
if (walsender_shutdown_requested)
{
/* Inform the standby that XLOG streaming was done */
/* Inform the standby that XLOG streaming is done */
pq_puttextmessage('C', "COPY 0");
pq_flush();
proc_exit(0);
}
/* Check for input from the client */
ProcessRepliesIfAny();
/*
* If we don't have any pending data in the output buffer, try to send
* some more.
* some more. If there is some, we don't bother to call XLogSend
* again until we've flushed it ... but we'd better assume we are not
* caught up.
*/
if (!pq_is_send_pending())
{
XLogSend(output_message, &caughtup);
else
caughtup = false;
/*
* Even if we wrote all the WAL that was available when we started
* sending, more might have arrived while we were sending this
* batch. We had the latch set while sending, so we have not
* received any signals from that time. Let's arm the latch again,
* and after that check that we're still up-to-date.
*/
if (caughtup && !pq_is_send_pending())
{
ResetLatch(&MyWalSnd->latch);
XLogSend(output_message, &caughtup);
}
}
/* Flush pending output to the client */
/* Try to flush pending output to the client */
if (pq_flush_if_writable() != 0)
break;
/* If nothing remains to be sent right now ... */
if (caughtup && !pq_is_send_pending())
{
/*
* If we're in catchup state, move to streaming. This is an
* important state change for users to know about, since before
* this point data loss might occur if the primary dies and we
* need to failover to the standby. The state change is also
* important for synchronous replication, since commits that
* started to wait at that point might wait for some time.
*/
if (MyWalSnd->state == WALSNDSTATE_CATCHUP)
{
ereport(DEBUG1,
(errmsg("standby \"%s\" has now caught up with primary",
application_name)));
WalSndSetState(WALSNDSTATE_STREAMING);
}
/*
* When SIGUSR2 arrives, we send any outstanding logs up to the
* shutdown checkpoint record (i.e., the latest record) and exit.
* This may be a normal termination at shutdown, or a promotion,
* the walsender is not sure which.
*/
if (walsender_ready_to_stop && !pq_is_send_pending())
if (walsender_ready_to_stop)
{
/* ... let's just be real sure we're caught up ... */
XLogSend(output_message, &caughtup);
ProcessRepliesIfAny();
if (caughtup && !pq_is_send_pending())
{
walsender_shutdown_requested = true;
continue; /* don't want to wait more */
}
}
}
if ((caughtup || pq_is_send_pending()) &&
!got_SIGHUP &&
!walsender_shutdown_requested)
/*
* We don't block if not caught up, unless there is unsent data
* pending in which case we'd better block until the socket is
* write-ready. This test is only needed for the case where XLogSend
* loaded a subset of the available data but then pq_flush_if_writable
* flushed it all --- we should immediately try to send more.
*/
if (caughtup || pq_is_send_pending())
{
TimestampTz finish_time = 0;
long sleeptime;
long sleeptime = -1;
int wakeEvents;
/* Reschedule replication timeout */
wakeEvents = WL_LATCH_SET | WL_POSTMASTER_DEATH |
WL_SOCKET_READABLE;
if (pq_is_send_pending())
wakeEvents |= WL_SOCKET_WRITEABLE;
/* Determine time until replication timeout */
if (replication_timeout > 0)
{
long secs;
@ -794,27 +820,21 @@ WalSndLoop(void)
TimestampDifference(GetCurrentTimestamp(),
finish_time, &secs, &usecs);
sleeptime = secs * 1000 + usecs / 1000;
if (WalSndDelay < sleeptime)
sleeptime = WalSndDelay;
}
else
{
/*
* XXX: Without timeout, we don't really need the periodic
* wakeups anymore, WaitLatchOrSocket should reliably wake up
* as soon as something interesting happens.
*/
sleeptime = WalSndDelay;
/* Avoid Assert in WaitLatchOrSocket if timeout is past */
if (sleeptime < 0)
sleeptime = 0;
wakeEvents |= WL_TIMEOUT;
}
/* Sleep */
wakeEvents = WL_LATCH_SET | WL_SOCKET_READABLE | WL_TIMEOUT;
if (pq_is_send_pending())
wakeEvents |= WL_SOCKET_WRITEABLE;
/* Sleep until something happens or replication timeout */
WaitLatchOrSocket(&MyWalSnd->latch, wakeEvents,
MyProcPort->sock, sleeptime);
/* Check for replication timeout */
/*
* Check for replication timeout. Note we ignore the corner case
* possibility that the client replied just as we reached the
* timeout ... he's supposed to reply *before* that.
*/
if (replication_timeout > 0 &&
GetCurrentTimestamp() >= finish_time)
{
@ -828,24 +848,6 @@ WalSndLoop(void)
break;
}
}
/*
* If we're in catchup state, see if its time to move to streaming.
* This is an important state change for users, since before this
* point data loss might occur if the primary dies and we need to
* failover to the standby. The state change is also important for
* synchronous replication, since commits that started to wait at that
* point might wait for some time.
*/
if (MyWalSnd->state == WALSNDSTATE_CATCHUP && caughtup)
{
ereport(DEBUG1,
(errmsg("standby \"%s\" has now caught up with primary",
application_name)));
WalSndSetState(WALSNDSTATE_STREAMING);
}
ProcessRepliesIfAny();
}
/*

View File

@ -1997,17 +1997,6 @@ static struct config_int ConfigureNamesInt[] =
NULL, NULL, NULL
},
{
{"wal_sender_delay", PGC_SIGHUP, REPLICATION_SENDING,
gettext_noop("WAL sender sleep time between WAL replications."),
NULL,
GUC_UNIT_MS
},
&WalSndDelay,
1000, 1, 10000,
NULL, NULL, NULL
},
{
{"replication_timeout", PGC_SIGHUP, REPLICATION_SENDING,
gettext_noop("Sets the maximum time to wait for WAL replication."),

View File

@ -200,7 +200,6 @@
#max_wal_senders = 0 # max number of walsender processes
# (change requires restart)
#wal_sender_delay = 1s # walsender cycle time, 1-10000 milliseconds
#wal_keep_segments = 0 # in logfile segments, 16MB each; 0 disables
#replication_timeout = 60s # in milliseconds; 0 disables

View File

@ -98,7 +98,6 @@ extern volatile sig_atomic_t walsender_shutdown_requested;
extern volatile sig_atomic_t walsender_ready_to_stop;
/* user-settable parameters */
extern int WalSndDelay;
extern int max_wal_senders;
extern int replication_timeout;