[PATCH v12 3/3] pg_test_timing: Add --fast flag to test fast timing, report time source
Lukas Fittl <lukas@fittl.com>
From: Lukas Fittl <lukas@fittl.com>
To:
Date: 2025-07-27T15:48:48Z
Lists: pgsql-hackers
In passing also reduce the per-loop overhead caused by repeated divisions
in INSTR_TIME_GET_NANOSEC when the ticks variable has become very large,
instead diff first and then turn it into nanosecs.
---
src/bin/pg_test_timing/pg_test_timing.c | 59 +++++++++++++++++++------
src/include/portability/instr_time.h | 30 ++++++++-----
2 files changed, 65 insertions(+), 24 deletions(-)
diff --git a/src/bin/pg_test_timing/pg_test_timing.c b/src/bin/pg_test_timing/pg_test_timing.c
index a5621251afc..b77ef2063b6 100644
--- a/src/bin/pg_test_timing/pg_test_timing.c
+++ b/src/bin/pg_test_timing/pg_test_timing.c
@@ -16,6 +16,7 @@ static const char *progname;
static unsigned int test_duration = 3;
static double max_rprct = 99.99;
+static bool fast_timing = false;
/* record duration in powers of 2 nanoseconds */
static long long int histogram[32];
@@ -56,6 +57,7 @@ handle_args(int argc, char *argv[])
static struct option long_options[] = {
{"duration", required_argument, NULL, 'd'},
{"cutoff", required_argument, NULL, 'c'},
+ {"fast", no_argument, NULL, 'f'},
{NULL, 0, NULL, 0}
};
@@ -68,7 +70,7 @@ handle_args(int argc, char *argv[])
{
if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0)
{
- printf(_("Usage: %s [-d DURATION] [-c CUTOFF]\n"), progname);
+ printf(_("Usage: %s [-d DURATION] [-c CUTOFF] [--fast]\n"), progname);
exit(0);
}
if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0)
@@ -78,7 +80,7 @@ handle_args(int argc, char *argv[])
}
}
- while ((option = getopt_long(argc, argv, "d:c:",
+ while ((option = getopt_long(argc, argv, "d:c:f:",
long_options, &optindex)) != -1)
{
switch (option)
@@ -125,6 +127,10 @@ handle_args(int argc, char *argv[])
}
break;
+ case 'f':
+ fast_timing = true;
+ break;
+
default:
fprintf(stderr, _("Try \"%s --help\" for more information.\n"),
progname);
@@ -155,11 +161,31 @@ test_timing(unsigned int duration)
uint64 total_time;
int64 time_elapsed = 0;
uint64 loop_count = 0;
- uint64 prev,
- cur;
instr_time start_time,
end_time,
- temp;
+ prev,
+ cur;
+ char *time_source = NULL;
+ bool fast_timing_used = false;
+
+ INSTR_TIME_INITIALIZE();
+
+#if !defined(WIN32) && defined(__x86_64__) && defined(__linux__)
+ if (fast_timing && has_rdtsc)
+ {
+ time_source = "RDTSC";
+ fast_timing_used = true;
+ }
+ else if (has_rdtscp)
+ time_source = "RDTSCP";
+ else
+ time_source = PG_INSTR_CLOCK_NAME;
+#else
+ time_source = PG_INSTR_CLOCK_NAME;
+#endif
+ if (fast_timing && !fast_timing_used)
+ printf(_("Warning: Fast timing requested, but not available - regular timing source will be used\n"));
+ printf(_("Time source: %s\n"), time_source);
/*
* Pre-zero the statistics data structures. They're already zero by
@@ -173,8 +199,11 @@ test_timing(unsigned int duration)
total_time = duration > 0 ? duration * INT64CONST(1000000000) : 0;
- INSTR_TIME_SET_CURRENT(start_time);
- cur = INSTR_TIME_GET_NANOSEC(start_time);
+ if (fast_timing)
+ INSTR_TIME_SET_CURRENT_FAST(start_time);
+ else
+ INSTR_TIME_SET_CURRENT(start_time);
+ cur = start_time;
while (time_elapsed < total_time)
{
@@ -182,9 +211,11 @@ test_timing(unsigned int duration)
bits;
prev = cur;
- INSTR_TIME_SET_CURRENT(temp);
- cur = INSTR_TIME_GET_NANOSEC(temp);
- diff = cur - prev;
+ if (fast_timing)
+ INSTR_TIME_SET_CURRENT_FAST(cur);
+ else
+ INSTR_TIME_SET_CURRENT(cur);
+ diff = INSTR_TIME_DIFF_NANOSEC(cur, prev);
/* Did time go backwards? */
if (unlikely(diff < 0))
@@ -217,11 +248,13 @@ test_timing(unsigned int duration)
largest_diff_count++;
loop_count++;
- INSTR_TIME_SUBTRACT(temp, start_time);
- time_elapsed = INSTR_TIME_GET_NANOSEC(temp);
+ time_elapsed = INSTR_TIME_DIFF_NANOSEC(cur, start_time);
}
- INSTR_TIME_SET_CURRENT(end_time);
+ if (fast_timing)
+ INSTR_TIME_SET_CURRENT_FAST(end_time);
+ else
+ INSTR_TIME_SET_CURRENT(end_time);
INSTR_TIME_SUBTRACT(end_time, start_time);
diff --git a/src/include/portability/instr_time.h b/src/include/portability/instr_time.h
index e2e339a0c4f..f02296f1026 100644
--- a/src/include/portability/instr_time.h
+++ b/src/include/portability/instr_time.h
@@ -112,10 +112,13 @@ extern int64 max_ticks_no_overflow;
*/
#if defined(__darwin__) && defined(CLOCK_MONOTONIC_RAW)
#define PG_INSTR_CLOCK CLOCK_MONOTONIC_RAW
+#define PG_INSTR_CLOCK_NAME "clock_gettime (CLOCK_MONOTONIC_RAW)"
#elif defined(CLOCK_MONOTONIC)
#define PG_INSTR_CLOCK CLOCK_MONOTONIC
+#define PG_INSTR_CLOCK_NAME "clock_gettime (CLOCK_MONOTONIC)"
#else
#define PG_INSTR_CLOCK CLOCK_REALTIME
+#define PG_INSTR_CLOCK_NAME "clock_gettime (CLOCK_REALTIME)"
#endif
#if defined(__x86_64__) && defined(__linux__)
@@ -174,7 +177,7 @@ pg_get_ticks(void)
}
static inline int64_t
-pg_ticks_to_ns(instr_time t)
+pg_ticks_to_ns(int64 ticks)
{
/*
* Would multiplication overflow? If so perform computation in two parts.
@@ -183,7 +186,7 @@ pg_ticks_to_ns(instr_time t)
*/
int64 ns = 0;
- if (unlikely(t.ticks > max_ticks_no_overflow))
+ if (unlikely(ticks > max_ticks_no_overflow))
{
/*
* Compute how often the maximum number of ticks fits completely into
@@ -192,7 +195,7 @@ pg_ticks_to_ns(instr_time t)
* value. In a 2nd step we adjust the number of elapsed ticks and
* convert the remaining ticks.
*/
- int64 count = t.ticks / max_ticks_no_overflow;
+ int64 count = ticks / max_ticks_no_overflow;
int64 max_ns = max_ticks_no_overflow * ticks_per_ns_scaled / TICKS_TO_NS_PRECISION;
ns = max_ns * count;
@@ -201,11 +204,11 @@ pg_ticks_to_ns(instr_time t)
* Subtract the ticks that we now already accounted for, so that they
* don't get counted twice.
*/
- t.ticks -= count * max_ticks_no_overflow;
- Assert(t.ticks >= 0);
+ ticks -= count * max_ticks_no_overflow;
+ Assert(ticks >= 0);
}
- ns += t.ticks * ticks_per_ns_scaled / TICKS_TO_NS_PRECISION;
+ ns += ticks * ticks_per_ns_scaled / TICKS_TO_NS_PRECISION;
return ns;
}
@@ -226,14 +229,14 @@ pg_initialize_get_ticks()
#define INSTR_TIME_SET_CURRENT(t) \
((t) = pg_get_ticks())
-#define INSTR_TIME_GET_NANOSEC(t) \
- pg_ticks_to_ns(t)
-
+#define INSTR_TIME_TICKS_TO_NANOSEC(ticks) \
+ (pg_ticks_to_ns(ticks))
#else /* WIN32 */
/* Use QueryPerformanceCounter() */
+#define PG_INSTR_CLOCK_NAME "QueryPerformanceCounter"
/* helper for INSTR_TIME_SET_CURRENT / INSTR_TIME_SET_CURRENT_FAST */
static inline instr_time
@@ -265,8 +268,8 @@ GetTimerFrequency(void)
#define INSTR_TIME_SET_CURRENT(t) \
((t) = pg_query_performance_counter())
-#define INSTR_TIME_GET_NANOSEC(t) \
- ((int64) ((t).ticks * ((double) NS_PER_S / GetTimerFrequency())))
+#define INSTR_TIME_TICKS_TO_NANOSEC(ticks) \
+ ((int64) ((ticks) * ((double) NS_PER_S / GetTimerFrequency())))
#endif /* WIN32 */
@@ -285,9 +288,14 @@ GetTimerFrequency(void)
#define INSTR_TIME_SUBTRACT(x,y) \
((x).ticks -= (y).ticks)
+#define INSTR_TIME_DIFF_NANOSEC(x,y) \
+ (INSTR_TIME_TICKS_TO_NANOSEC((x).ticks - (y).ticks))
+
#define INSTR_TIME_ACCUM_DIFF(x,y,z) \
((x).ticks += (y).ticks - (z).ticks)
+#define INSTR_TIME_GET_NANOSEC(t) \
+ (INSTR_TIME_TICKS_TO_NANOSEC((t).ticks))
#define INSTR_TIME_GET_DOUBLE(t) \
((double) INSTR_TIME_GET_NANOSEC(t) / NS_PER_S)
--
2.47.3
--vtqqtrpooseurzip--