Change the way latency is calculated with pgbench --rate option.

The reported latency values now include the "schedule lag" time, that is, the time between the transaction's scheduled start time and the time it actually started. This relates better to a model where requests arrive at a certain rate, and we are interested in the response time to the end user or application, rather than the response time of the database itself. Also, when --rate is used, include the schedule lag time in the log output. The --rate option is new in 9.4, so backpatch to 9.4. It seems better to make this change in 9.4, while we're still in the beta period, than ship a 9.4 version that calculates the values differently than 9.5.
2014-09-11 12:40:01 +03:00 · 2014-09-11 12:40:01 +03:00 · 02e3bcc661
commit 02e3bcc661
parent 75717ce8f0
2 changed files with 147 additions and 78 deletions
--- a/contrib/pgbench/pgbench.c
+++ b/contrib/pgbench/pgbench.c
@ -210,10 +210,10 @@ typedef struct
 								 * sent */
 	int			sleeping;		/* 1 indicates that the client is napping */
 	bool		throttling;		/* whether nap is for throttling */
-	int64		until;			/* napping until (usec) */
 	Variable   *variables;		/* array of variable definitions */
 	int			nvariables;
-	instr_time	txn_begin;		/* used for measuring transaction latencies */
+	int64		txn_scheduled;	/* scheduled start time of transaction (usec) */
+	instr_time	txn_begin;		/* used for measuring schedule lag times */
 	instr_time	stmt_begin;		/* used for measuring statement latencies */
 	int64		txn_latencies;	/* cumulated latencies */
 	int64		txn_sqlats;		/* cumulated square latencies */
@ -284,12 +284,17 @@ typedef struct

 	long		start_time;		/* when does the interval start */
 	int			cnt;			/* number of transactions */
-	double		min_duration;	/* min/max durations */
-	double		max_duration;
-	double		sum;			/* sum(duration), sum(duration^2) - for
-								 * estimates */
-	double		sum2;

+	double		min_latency;	/* min/max latencies */
+	double		max_latency;
+	double		sum_latency;	/* sum(latency), sum(latency^2) - for
+								 * estimates */
+	double		sum2_latency;
+
+	double		min_lag;
+	double		max_lag;
+	double		sum_lag;		/* sum(lag) */
+	double		sum2_lag;		/* sum(lag*lag) */
 } AggVals;

 static Command **sql_files[MAX_FILES];	/* SQL script files */
@ -968,12 +973,18 @@ agg_vals_init(AggVals *aggs, instr_time start)
 {
 	/* basic counters */
 	aggs->cnt = 0;				/* number of transactions */
-	aggs->sum = 0;				/* SUM(duration) */
-	aggs->sum2 = 0;				/* SUM(duration*duration) */
+	aggs->sum_latency = 0;		/* SUM(latency) */
+	aggs->sum2_latency = 0;				/* SUM(latency*latency) */

 	/* min and max transaction duration */
-	aggs->min_duration = 0;
-	aggs->max_duration = 0;
+	aggs->min_latency = 0;
+	aggs->max_latency = 0;
+
+	/* schedule lag counters */
+	aggs->sum_lag = 0;
+	aggs->sum2_lag = 0;
+	aggs->min_lag = 0;
+	aggs->max_lag = 0;

 	/* start of the current interval */
 	aggs->start_time = INSTR_TIME_GET_DOUBLE(start);
@ -1016,7 +1027,7 @@ top:

 		thread->throttle_trigger += wait;

-		st->until = thread->throttle_trigger;
+		st->txn_scheduled = thread->throttle_trigger;
 		st->sleeping = 1;
 		st->throttling = true;
 		st->is_throttled = true;
@ -1032,13 +1043,13 @@ top:

 		INSTR_TIME_SET_CURRENT(now);
 		now_us = INSTR_TIME_GET_MICROSEC(now);
-		if (st->until <= now_us)
+		if (st->txn_scheduled <= now_us)
 		{
 			st->sleeping = 0;	/* Done sleeping, go ahead with next command */
 			if (st->throttling)
 			{
 				/* Measure lag of throttled transaction relative to target */
-				int64		lag = now_us - st->until;
+				int64		lag = now_us - st->txn_scheduled;

 				thread->throttle_lag += lag;
 				if (lag > thread->throttle_lag_max)
@ -1052,6 +1063,11 @@ top:

 	if (st->listen)
 	{							/* are we receiver? */
+		instr_time	now;
+		bool		now_valid = false;
+
+		INSTR_TIME_SET_ZERO(now); /* initialize to keep compiler quiet */
+
 		if (commands[st->state]->type == SQL_COMMAND)
 		{
 			if (debug)
@ -1071,10 +1087,13 @@ top:
 		 */
 		if (is_latencies)
 		{
-			instr_time	now;
 			int			cnum = commands[st->state]->command_num;

-			INSTR_TIME_SET_CURRENT(now);
+			if (!now_valid)
+			{
+				INSTR_TIME_SET_CURRENT(now);
+				now_valid = true;
+			}
 			INSTR_TIME_ACCUM_DIFF(thread->exec_elapsed[cnum],
 								  now, st->stmt_begin);
 			thread->exec_count[cnum]++;
@ -1083,12 +1102,16 @@ top:
 		/* transaction finished: record latency under progress or throttling */
 		if ((progress || throttle_delay) && commands[st->state + 1] == NULL)
 		{
-			instr_time	diff;
 			int64		latency;

-			INSTR_TIME_SET_CURRENT(diff);
-			INSTR_TIME_SUBTRACT(diff, st->txn_begin);
-			latency = INSTR_TIME_GET_MICROSEC(diff);
+			if (!now_valid)
+			{
+				INSTR_TIME_SET_CURRENT(now);
+				now_valid = true;
+			}
+
+			latency = INSTR_TIME_GET_MICROSEC(now) - st->txn_scheduled;
+
 			st->txn_latencies += latency;

 			/*
@ -1106,9 +1129,8 @@ top:
 		 */
 		if (logfile && commands[st->state + 1] == NULL)
 		{
-			instr_time	now;
-			instr_time	diff;
-			double		usec;
+			double		lag;
+			double		latency;

 			/*
 			 * write the log entry if this row belongs to the random sample,
@ -1117,10 +1139,13 @@ top:
 			if (sample_rate == 0.0 ||
 				pg_erand48(thread->random_state) <= sample_rate)
 			{
-				INSTR_TIME_SET_CURRENT(now);
-				diff = now;
-				INSTR_TIME_SUBTRACT(diff, st->txn_begin);
-				usec = (double) INSTR_TIME_GET_MICROSEC(diff);
+				if (!now_valid)
+				{
+					INSTR_TIME_SET_CURRENT(now);
+					now_valid = true;
+				}
+				latency = (double) (INSTR_TIME_GET_MICROSEC(now) - st->txn_scheduled);
+				lag = (double) (INSTR_TIME_GET_MICROSEC(st->txn_begin) - st->txn_scheduled);

 				/* should we aggregate the results or not? */
 				if (agg_interval > 0)
@ -1132,15 +1157,27 @@ top:
 					if (agg->start_time + agg_interval >= INSTR_TIME_GET_DOUBLE(now))
 					{
 						agg->cnt += 1;
-						agg->sum += usec;
-						agg->sum2 += usec * usec;
+						agg->sum_latency += latency;
+						agg->sum2_latency += latency * latency;

 						/* first in this aggregation interval */
-						if ((agg->cnt == 1) || (usec < agg->min_duration))
-							agg->min_duration = usec;
+						if ((agg->cnt == 1) || (latency < agg->min_latency))
+							agg->min_latency = latency;

-						if ((agg->cnt == 1) || (usec > agg->max_duration))
-							agg->max_duration = usec;
+						if ((agg->cnt == 1) || (latency > agg->max_latency))
+							agg->max_latency = latency;
+
+						/* and the same for schedule lag */
+						if (throttle_delay)
+						{
+							agg->sum_lag += lag;
+							agg->sum2_lag += lag * lag;
+
+							if ((agg->cnt == 1) || (lag < agg->min_lag))
+								agg->min_lag = lag;
+							if ((agg->cnt == 1) || (lag > agg->max_lag))
+								agg->max_lag = lag;
+						}
 					}
 					else
 					{
@ -1156,23 +1193,34 @@ top:
 							 * ifdef in usage), so we don't need to handle
 							 * this in a special way (see below).
 							 */
-							fprintf(logfile, "%ld %d %.0f %.0f %.0f %.0f\n",
+							fprintf(logfile, "%ld %d %.0f %.0f %.0f %.0f",
 									agg->start_time,
 									agg->cnt,
-									agg->sum,
-									agg->sum2,
-									agg->min_duration,
-									agg->max_duration);
+									agg->sum_latency,
+									agg->sum2_latency,
+									agg->min_latency,
+									agg->max_latency);
+							if (throttle_delay)
+								fprintf(logfile, " %.0f %.0f %.0f %.0f",
+										agg->sum_lag,
+										agg->sum2_lag,
+										agg->min_lag,
+										agg->max_lag);
+							fputc('\n', logfile);

 							/* move to the next inteval */
 							agg->start_time = agg->start_time + agg_interval;

 							/* reset for "no transaction" intervals */
 							agg->cnt = 0;
-							agg->min_duration = 0;
-							agg->max_duration = 0;
-							agg->sum = 0;
-							agg->sum2 = 0;
+							agg->min_latency = 0;
+							agg->max_latency = 0;
+							agg->sum_latency = 0;
+							agg->sum2_latency = 0;
+							agg->min_lag = 0;
+							agg->max_lag = 0;
+							agg->sum_lag = 0;
+							agg->sum2_lag = 0;
 						}

 						/*
@ -1180,10 +1228,14 @@ top:
 						 * current)
 						 */
 						agg->cnt = 1;
-						agg->min_duration = usec;
-						agg->max_duration = usec;
-						agg->sum = usec;
-						agg->sum2 = usec * usec;
+						agg->min_latency = latency;
+						agg->max_latency = latency;
+						agg->sum_latency = latency;
+						agg->sum2_latency = latency * latency;
+						agg->min_lag = lag;
+						agg->max_lag = lag;
+						agg->sum_lag = lag;
+						agg->sum2_lag = lag * lag;
 					}
 				}
 				else
@ -1195,8 +1247,8 @@ top:
 					 * This is more than we really ought to know about
 					 * instr_time
 					 */
-					fprintf(logfile, "%d %d %.0f %d %ld %ld\n",
-							st->id, st->cnt, usec, st->use_file,
+					fprintf(logfile, "%d %d %.0f %d %ld %ld",
+							st->id, st->cnt, latency, st->use_file,
 							(long) now.tv_sec, (long) now.tv_usec);
 #else

@ -1204,9 +1256,12 @@ top:
 					 * On Windows, instr_time doesn't provide a timestamp
 					 * anyway
 					 */
-					fprintf(logfile, "%d %d %.0f %d 0 0\n",
+					fprintf(logfile, "%d %d %.0f %d 0 0",
 							st->id, st->cnt, usec, st->use_file);
 #endif
+					if (throttle_delay)
+						fprintf(logfile, " %.0f", lag);
+					fputc('\n', logfile);
 				}
 			}
 		}
@ -1295,8 +1350,17 @@ top:

 	/* Record transaction start time under logging, progress or throttling */
 	if ((logfile || progress || throttle_delay) && st->state == 0)
+	{
 		INSTR_TIME_SET_CURRENT(st->txn_begin);

+		/*
+		 * When not throttling, this is also the transaction's scheduled start
+		 * time.
+		 */
+		if (!throttle_delay)
+			st->txn_scheduled = INSTR_TIME_GET_MICROSEC(st->txn_begin);
+	}
+
 	/* Record statement start time if per-command latencies are requested */
 	if (is_latencies)
 		INSTR_TIME_SET_CURRENT(st->stmt_begin);
@ -1620,7 +1684,7 @@ top:
 				usec *= 1000000;

 			INSTR_TIME_SET_CURRENT(now);
-			st->until = INSTR_TIME_GET_MICROSEC(now) + usec;
+			st->txn_scheduled = INSTR_TIME_GET_MICROSEC(now) + usec;
 			st->sleeping = 1;

 			st->listen = 1;
@ -3309,7 +3373,7 @@ threadRun(void *arg)
 						now_usec = INSTR_TIME_GET_MICROSEC(now);
 					}

-					this_usec = st->until - now_usec;
+					this_usec = st->txn_scheduled - now_usec;
 					if (min_usec > this_usec)
 						min_usec = this_usec;
 				}
--- a/doc/src/sgml/pgbench.sgml
+++ b/doc/src/sgml/pgbench.sgml
@ -403,8 +403,10 @@ pgbench <optional> <replaceable>options</> </optional> <replaceable>dbname</>
        Show progress report every <literal>sec</> seconds.  The report
        includes the time since the beginning of the run, the tps since the
        last report, and the transaction latency average and standard
-        deviation since the last report.  Under throttling (<option>-R</>), it
-        also includes the average schedule lag time since the last report.
+        deviation since the last report.  Under throttling (<option>-R</>),
+        the latency is computed with respect to the transaction scheduled
+        start time, not the actual transaction beginning time, thus it also
+        includes the average schedule lag time.
       </para>
      </listitem>
     </varlistentry>
@ -440,29 +442,25 @@ pgbench <optional> <replaceable>options</> </optional> <replaceable>dbname</>
        possible for later ones to catch up again.
       </para>
       <para>
-        When throttling is active, the average and maximum transaction
-        schedule lag time are reported in ms.  This is the delay between
-        the original scheduled transaction time and the actual transaction
-        start times.  The schedule lag shows whether a transaction was
-        started on time or late.  Once a client starts running behind its
-        schedule, every following transaction can continue to be penalized
-        for schedule lag.  If faster transactions are able to catch up, it's
-        possible for them to get back on schedule again.  The lag measurement
-        of every transaction is shown when pgbench is run with debugging
-        output.
+        When throttling is active, the transaction latency reported at the
+        end of the run is calculated from the scheduled start times, so it
+        includes the time each transaction had to wait for the previous
+        transaction to finish. The wait time is called the schedule lag time,
+        and its average and maximum are also reported separately. The
+        transaction latency with respect to the actual transaction start time,
+        i.e. the time spent executing the transaction in the database, can be
+        computed by subtracting the schedule lag time from the reported
+        latency.
       </para>
+
       <para>
-        High rate limit schedule lag values, that is lag values that are large
-        compared to the actual transaction latency, indicate that something is
-        amiss in the throttling process.  High schedule lag can highlight a subtle
-        problem there even if the target rate limit is met in the end.  One
-        possible cause of schedule lag is insufficient pgbench threads to
-        handle all of the clients.  To improve that, consider reducing the
-        number of clients, increasing the number of threads in pgbench, or
-        running pgbench on a separate host.  Another possibility is that the
-        database is not keeping up with the load at some point.  When that
-        happens, you will have to reduce the expected transaction rate to
-        lower schedule lag.
+        A high schedule lag time is an indication that the system cannot
+        process transactions at the specified rate, with the chosen number of
+        clients and threads. When the average transaction execution time is
+        longer than the scheduled interval between each transaction, each
+        successive transaction will fall further behind, and the schedule lag
+        time will keep increasing the longer the test run is. When that
+        happens, you will have to reduce the specified transaction rate.
       </para>
      </listitem>
     </varlistentry>
@ -942,7 +940,7 @@ END;
   The format of the log is:

 <synopsis>
-<replaceable>client_id</> <replaceable>transaction_no</> <replaceable>time</> <replaceable>file_no</> <replaceable>time_epoch</> <replaceable>time_us</>
+<replaceable>client_id</> <replaceable>transaction_no</> <replaceable>time</> <replaceable>file_no</> <replaceable>time_epoch</> <replaceable>time_us</> [<replaceable>schedule_lag</replaceable>]
 </synopsis>

   where <replaceable>time</> is the total elapsed transaction time in microseconds,
@ -952,7 +950,10 @@ END;
   UNIX epoch format timestamp and an offset
   in microseconds (suitable for creating an ISO 8601
   timestamp with fractional seconds) showing when
-   the transaction completed.
+   the transaction completed. The last field, <replaceable>schedule_lag</>, is
+   the difference between the transaction's scheduled start time, and the
+   time it actually started, in microseconds. It is only present when the
+   <option>--rate</> option is used.
  </para>

  <para>
@ -978,7 +979,7 @@ END;
   With the <option>--aggregate-interval</option> option, the logs use a bit different format:

 <synopsis>
-<replaceable>interval_start</> <replaceable>num_of_transactions</> <replaceable>latency_sum</> <replaceable>latency_2_sum</> <replaceable>min_latency</> <replaceable>max_latency</>
+<replaceable>interval_start</> <replaceable>num_of_transactions</> <replaceable>latency_sum</> <replaceable>latency_2_sum</> <replaceable>min_latency</> <replaceable>max_latency</> [<replaceable>lag_sum</> <replaceable>lag_2_sum</> <replaceable>min_lag</> <replaceable>max_lag</>]
 </synopsis>

   where <replaceable>interval_start</> is the start of the interval (UNIX epoch
@ -989,7 +990,11 @@ END;
   <replaceable>latency_2_sum</> is a sum of 2nd powers of latencies. The last two
   fields are <replaceable>min_latency</> - a minimum latency within the interval, and
   <replaceable>max_latency</> - maximum latency within the interval. A transaction is
-   counted into the interval when it was committed.
+   counted into the interval when it was committed. The last four fields, 
+   <replaceable>lag_sum</>, <replaceable>lag_2_sum</>, <replaceable>min_lag</>, and <replaceable>max_lag</>, are only present if the --rate option is used.
+   They are calculated from the time each transaction had to wait for the
+   previous one to finish, i.e. the difference between each transaction's
+   scheduled start time and the time it actually started.
  </para>

  <para>