diff --git a/contrib/pgbench/pgbench.c b/contrib/pgbench/pgbench.c index 2f7d80e940..c5c082c938 100644 --- a/contrib/pgbench/pgbench.c +++ b/contrib/pgbench/pgbench.c @@ -210,10 +210,10 @@ typedef struct * sent */ int sleeping; /* 1 indicates that the client is napping */ bool throttling; /* whether nap is for throttling */ - int64 until; /* napping until (usec) */ Variable *variables; /* array of variable definitions */ int nvariables; - instr_time txn_begin; /* used for measuring transaction latencies */ + int64 txn_scheduled; /* scheduled start time of transaction (usec) */ + instr_time txn_begin; /* used for measuring schedule lag times */ instr_time stmt_begin; /* used for measuring statement latencies */ int64 txn_latencies; /* cumulated latencies */ int64 txn_sqlats; /* cumulated square latencies */ @@ -284,12 +284,17 @@ typedef struct long start_time; /* when does the interval start */ int cnt; /* number of transactions */ - double min_duration; /* min/max durations */ - double max_duration; - double sum; /* sum(duration), sum(duration^2) - for - * estimates */ - double sum2; + double min_latency; /* min/max latencies */ + double max_latency; + double sum_latency; /* sum(latency), sum(latency^2) - for + * estimates */ + double sum2_latency; + + double min_lag; + double max_lag; + double sum_lag; /* sum(lag) */ + double sum2_lag; /* sum(lag*lag) */ } AggVals; static Command **sql_files[MAX_FILES]; /* SQL script files */ @@ -968,12 +973,18 @@ agg_vals_init(AggVals *aggs, instr_time start) { /* basic counters */ aggs->cnt = 0; /* number of transactions */ - aggs->sum = 0; /* SUM(duration) */ - aggs->sum2 = 0; /* SUM(duration*duration) */ + aggs->sum_latency = 0; /* SUM(latency) */ + aggs->sum2_latency = 0; /* SUM(latency*latency) */ /* min and max transaction duration */ - aggs->min_duration = 0; - aggs->max_duration = 0; + aggs->min_latency = 0; + aggs->max_latency = 0; + + /* schedule lag counters */ + aggs->sum_lag = 0; + aggs->sum2_lag = 0; + aggs->min_lag = 0; + aggs->max_lag = 0; /* start of the current interval */ aggs->start_time = INSTR_TIME_GET_DOUBLE(start); @@ -1016,7 +1027,7 @@ top: thread->throttle_trigger += wait; - st->until = thread->throttle_trigger; + st->txn_scheduled = thread->throttle_trigger; st->sleeping = 1; st->throttling = true; st->is_throttled = true; @@ -1032,13 +1043,13 @@ top: INSTR_TIME_SET_CURRENT(now); now_us = INSTR_TIME_GET_MICROSEC(now); - if (st->until <= now_us) + if (st->txn_scheduled <= now_us) { st->sleeping = 0; /* Done sleeping, go ahead with next command */ if (st->throttling) { /* Measure lag of throttled transaction relative to target */ - int64 lag = now_us - st->until; + int64 lag = now_us - st->txn_scheduled; thread->throttle_lag += lag; if (lag > thread->throttle_lag_max) @@ -1052,6 +1063,11 @@ top: if (st->listen) { /* are we receiver? */ + instr_time now; + bool now_valid = false; + + INSTR_TIME_SET_ZERO(now); /* initialize to keep compiler quiet */ + if (commands[st->state]->type == SQL_COMMAND) { if (debug) @@ -1071,10 +1087,13 @@ top: */ if (is_latencies) { - instr_time now; int cnum = commands[st->state]->command_num; - INSTR_TIME_SET_CURRENT(now); + if (!now_valid) + { + INSTR_TIME_SET_CURRENT(now); + now_valid = true; + } INSTR_TIME_ACCUM_DIFF(thread->exec_elapsed[cnum], now, st->stmt_begin); thread->exec_count[cnum]++; @@ -1083,12 +1102,16 @@ top: /* transaction finished: record latency under progress or throttling */ if ((progress || throttle_delay) && commands[st->state + 1] == NULL) { - instr_time diff; int64 latency; - INSTR_TIME_SET_CURRENT(diff); - INSTR_TIME_SUBTRACT(diff, st->txn_begin); - latency = INSTR_TIME_GET_MICROSEC(diff); + if (!now_valid) + { + INSTR_TIME_SET_CURRENT(now); + now_valid = true; + } + + latency = INSTR_TIME_GET_MICROSEC(now) - st->txn_scheduled; + st->txn_latencies += latency; /* @@ -1106,9 +1129,8 @@ top: */ if (logfile && commands[st->state + 1] == NULL) { - instr_time now; - instr_time diff; - double usec; + double lag; + double latency; /* * write the log entry if this row belongs to the random sample, @@ -1117,10 +1139,13 @@ top: if (sample_rate == 0.0 || pg_erand48(thread->random_state) <= sample_rate) { - INSTR_TIME_SET_CURRENT(now); - diff = now; - INSTR_TIME_SUBTRACT(diff, st->txn_begin); - usec = (double) INSTR_TIME_GET_MICROSEC(diff); + if (!now_valid) + { + INSTR_TIME_SET_CURRENT(now); + now_valid = true; + } + latency = (double) (INSTR_TIME_GET_MICROSEC(now) - st->txn_scheduled); + lag = (double) (INSTR_TIME_GET_MICROSEC(st->txn_begin) - st->txn_scheduled); /* should we aggregate the results or not? */ if (agg_interval > 0) @@ -1132,15 +1157,27 @@ top: if (agg->start_time + agg_interval >= INSTR_TIME_GET_DOUBLE(now)) { agg->cnt += 1; - agg->sum += usec; - agg->sum2 += usec * usec; + agg->sum_latency += latency; + agg->sum2_latency += latency * latency; /* first in this aggregation interval */ - if ((agg->cnt == 1) || (usec < agg->min_duration)) - agg->min_duration = usec; + if ((agg->cnt == 1) || (latency < agg->min_latency)) + agg->min_latency = latency; - if ((agg->cnt == 1) || (usec > agg->max_duration)) - agg->max_duration = usec; + if ((agg->cnt == 1) || (latency > agg->max_latency)) + agg->max_latency = latency; + + /* and the same for schedule lag */ + if (throttle_delay) + { + agg->sum_lag += lag; + agg->sum2_lag += lag * lag; + + if ((agg->cnt == 1) || (lag < agg->min_lag)) + agg->min_lag = lag; + if ((agg->cnt == 1) || (lag > agg->max_lag)) + agg->max_lag = lag; + } } else { @@ -1156,23 +1193,34 @@ top: * ifdef in usage), so we don't need to handle * this in a special way (see below). */ - fprintf(logfile, "%ld %d %.0f %.0f %.0f %.0f\n", + fprintf(logfile, "%ld %d %.0f %.0f %.0f %.0f", agg->start_time, agg->cnt, - agg->sum, - agg->sum2, - agg->min_duration, - agg->max_duration); + agg->sum_latency, + agg->sum2_latency, + agg->min_latency, + agg->max_latency); + if (throttle_delay) + fprintf(logfile, " %.0f %.0f %.0f %.0f", + agg->sum_lag, + agg->sum2_lag, + agg->min_lag, + agg->max_lag); + fputc('\n', logfile); /* move to the next inteval */ agg->start_time = agg->start_time + agg_interval; /* reset for "no transaction" intervals */ agg->cnt = 0; - agg->min_duration = 0; - agg->max_duration = 0; - agg->sum = 0; - agg->sum2 = 0; + agg->min_latency = 0; + agg->max_latency = 0; + agg->sum_latency = 0; + agg->sum2_latency = 0; + agg->min_lag = 0; + agg->max_lag = 0; + agg->sum_lag = 0; + agg->sum2_lag = 0; } /* @@ -1180,10 +1228,14 @@ top: * current) */ agg->cnt = 1; - agg->min_duration = usec; - agg->max_duration = usec; - agg->sum = usec; - agg->sum2 = usec * usec; + agg->min_latency = latency; + agg->max_latency = latency; + agg->sum_latency = latency; + agg->sum2_latency = latency * latency; + agg->min_lag = lag; + agg->max_lag = lag; + agg->sum_lag = lag; + agg->sum2_lag = lag * lag; } } else @@ -1195,8 +1247,8 @@ top: * This is more than we really ought to know about * instr_time */ - fprintf(logfile, "%d %d %.0f %d %ld %ld\n", - st->id, st->cnt, usec, st->use_file, + fprintf(logfile, "%d %d %.0f %d %ld %ld", + st->id, st->cnt, latency, st->use_file, (long) now.tv_sec, (long) now.tv_usec); #else @@ -1204,9 +1256,12 @@ top: * On Windows, instr_time doesn't provide a timestamp * anyway */ - fprintf(logfile, "%d %d %.0f %d 0 0\n", + fprintf(logfile, "%d %d %.0f %d 0 0", st->id, st->cnt, usec, st->use_file); #endif + if (throttle_delay) + fprintf(logfile, " %.0f", lag); + fputc('\n', logfile); } } } @@ -1295,8 +1350,17 @@ top: /* Record transaction start time under logging, progress or throttling */ if ((logfile || progress || throttle_delay) && st->state == 0) + { INSTR_TIME_SET_CURRENT(st->txn_begin); + /* + * When not throttling, this is also the transaction's scheduled start + * time. + */ + if (!throttle_delay) + st->txn_scheduled = INSTR_TIME_GET_MICROSEC(st->txn_begin); + } + /* Record statement start time if per-command latencies are requested */ if (is_latencies) INSTR_TIME_SET_CURRENT(st->stmt_begin); @@ -1620,7 +1684,7 @@ top: usec *= 1000000; INSTR_TIME_SET_CURRENT(now); - st->until = INSTR_TIME_GET_MICROSEC(now) + usec; + st->txn_scheduled = INSTR_TIME_GET_MICROSEC(now) + usec; st->sleeping = 1; st->listen = 1; @@ -3309,7 +3373,7 @@ threadRun(void *arg) now_usec = INSTR_TIME_GET_MICROSEC(now); } - this_usec = st->until - now_usec; + this_usec = st->txn_scheduled - now_usec; if (min_usec > this_usec) min_usec = this_usec; } diff --git a/doc/src/sgml/pgbench.sgml b/doc/src/sgml/pgbench.sgml index b479105a3e..c4e0cbd79a 100644 --- a/doc/src/sgml/pgbench.sgml +++ b/doc/src/sgml/pgbench.sgml @@ -403,8 +403,10 @@ pgbench <optional> <replaceable>options</> </optional> <replaceable>dbname</> Show progress report every <literal>sec</> seconds. The report includes the time since the beginning of the run, the tps since the last report, and the transaction latency average and standard - deviation since the last report. Under throttling (<option>-R</>), it - also includes the average schedule lag time since the last report. + deviation since the last report. Under throttling (<option>-R</>), + the latency is computed with respect to the transaction scheduled + start time, not the actual transaction beginning time, thus it also + includes the average schedule lag time. </para> </listitem> </varlistentry> @@ -440,29 +442,25 @@ pgbench <optional> <replaceable>options</> </optional> <replaceable>dbname</> possible for later ones to catch up again. </para> <para> - When throttling is active, the average and maximum transaction - schedule lag time are reported in ms. This is the delay between - the original scheduled transaction time and the actual transaction - start times. The schedule lag shows whether a transaction was - started on time or late. Once a client starts running behind its - schedule, every following transaction can continue to be penalized - for schedule lag. If faster transactions are able to catch up, it's - possible for them to get back on schedule again. The lag measurement - of every transaction is shown when pgbench is run with debugging - output. + When throttling is active, the transaction latency reported at the + end of the run is calculated from the scheduled start times, so it + includes the time each transaction had to wait for the previous + transaction to finish. The wait time is called the schedule lag time, + and its average and maximum are also reported separately. The + transaction latency with respect to the actual transaction start time, + i.e. the time spent executing the transaction in the database, can be + computed by subtracting the schedule lag time from the reported + latency. </para> + <para> - High rate limit schedule lag values, that is lag values that are large - compared to the actual transaction latency, indicate that something is - amiss in the throttling process. High schedule lag can highlight a subtle - problem there even if the target rate limit is met in the end. One - possible cause of schedule lag is insufficient pgbench threads to - handle all of the clients. To improve that, consider reducing the - number of clients, increasing the number of threads in pgbench, or - running pgbench on a separate host. Another possibility is that the - database is not keeping up with the load at some point. When that - happens, you will have to reduce the expected transaction rate to - lower schedule lag. + A high schedule lag time is an indication that the system cannot + process transactions at the specified rate, with the chosen number of + clients and threads. When the average transaction execution time is + longer than the scheduled interval between each transaction, each + successive transaction will fall further behind, and the schedule lag + time will keep increasing the longer the test run is. When that + happens, you will have to reduce the specified transaction rate. </para> </listitem> </varlistentry> @@ -942,7 +940,7 @@ END; The format of the log is: <synopsis> -<replaceable>client_id</> <replaceable>transaction_no</> <replaceable>time</> <replaceable>file_no</> <replaceable>time_epoch</> <replaceable>time_us</> +<replaceable>client_id</> <replaceable>transaction_no</> <replaceable>time</> <replaceable>file_no</> <replaceable>time_epoch</> <replaceable>time_us</> [<replaceable>schedule_lag</replaceable>] </synopsis> where <replaceable>time</> is the total elapsed transaction time in microseconds, @@ -952,7 +950,10 @@ END; UNIX epoch format timestamp and an offset in microseconds (suitable for creating an ISO 8601 timestamp with fractional seconds) showing when - the transaction completed. + the transaction completed. The last field, <replaceable>schedule_lag</>, is + the difference between the transaction's scheduled start time, and the + time it actually started, in microseconds. It is only present when the + <option>--rate</> option is used. </para> <para> @@ -978,7 +979,7 @@ END; With the <option>--aggregate-interval</option> option, the logs use a bit different format: <synopsis> -<replaceable>interval_start</> <replaceable>num_of_transactions</> <replaceable>latency_sum</> <replaceable>latency_2_sum</> <replaceable>min_latency</> <replaceable>max_latency</> +<replaceable>interval_start</> <replaceable>num_of_transactions</> <replaceable>latency_sum</> <replaceable>latency_2_sum</> <replaceable>min_latency</> <replaceable>max_latency</> [<replaceable>lag_sum</> <replaceable>lag_2_sum</> <replaceable>min_lag</> <replaceable>max_lag</>] </synopsis> where <replaceable>interval_start</> is the start of the interval (UNIX epoch @@ -989,7 +990,11 @@ END; <replaceable>latency_2_sum</> is a sum of 2nd powers of latencies. The last two fields are <replaceable>min_latency</> - a minimum latency within the interval, and <replaceable>max_latency</> - maximum latency within the interval. A transaction is - counted into the interval when it was committed. + counted into the interval when it was committed. The last four fields, + <replaceable>lag_sum</>, <replaceable>lag_2_sum</>, <replaceable>min_lag</>, and <replaceable>max_lag</>, are only present if the --rate option is used. + They are calculated from the time each transaction had to wait for the + previous one to finish, i.e. the difference between each transaction's + scheduled start time and the time it actually started. </para> <para>