Thread

  1. [PATCH v12 3/3] pg_test_timing: Add --fast flag to test fast timing, report time source

    Lukas Fittl <lukas@fittl.com> — 2025-07-27T15:48:48Z

    In passing also reduce the per-loop overhead caused by repeated divisions
    in INSTR_TIME_GET_NANOSEC when the ticks variable has become very large,
    instead diff first and then turn it into nanosecs.
    ---
     src/bin/pg_test_timing/pg_test_timing.c | 59 +++++++++++++++++++------
     src/include/portability/instr_time.h    | 30 ++++++++-----
     2 files changed, 65 insertions(+), 24 deletions(-)
    
    diff --git a/src/bin/pg_test_timing/pg_test_timing.c b/src/bin/pg_test_timing/pg_test_timing.c
    index a5621251afc..b77ef2063b6 100644
    --- a/src/bin/pg_test_timing/pg_test_timing.c
    +++ b/src/bin/pg_test_timing/pg_test_timing.c
    @@ -16,6 +16,7 @@ static const char *progname;
     
     static unsigned int test_duration = 3;
     static double max_rprct = 99.99;
    +static bool fast_timing = false;
     
     /* record duration in powers of 2 nanoseconds */
     static long long int histogram[32];
    @@ -56,6 +57,7 @@ handle_args(int argc, char *argv[])
     	static struct option long_options[] = {
     		{"duration", required_argument, NULL, 'd'},
     		{"cutoff", required_argument, NULL, 'c'},
    +		{"fast", no_argument, NULL, 'f'},
     		{NULL, 0, NULL, 0}
     	};
     
    @@ -68,7 +70,7 @@ handle_args(int argc, char *argv[])
     	{
     		if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0)
     		{
    -			printf(_("Usage: %s [-d DURATION] [-c CUTOFF]\n"), progname);
    +			printf(_("Usage: %s [-d DURATION] [-c CUTOFF] [--fast]\n"), progname);
     			exit(0);
     		}
     		if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0)
    @@ -78,7 +80,7 @@ handle_args(int argc, char *argv[])
     		}
     	}
     
    -	while ((option = getopt_long(argc, argv, "d:c:",
    +	while ((option = getopt_long(argc, argv, "d:c:f:",
     								 long_options, &optindex)) != -1)
     	{
     		switch (option)
    @@ -125,6 +127,10 @@ handle_args(int argc, char *argv[])
     				}
     				break;
     
    +			case 'f':
    +				fast_timing = true;
    +				break;
    +
     			default:
     				fprintf(stderr, _("Try \"%s --help\" for more information.\n"),
     						progname);
    @@ -155,11 +161,31 @@ test_timing(unsigned int duration)
     	uint64		total_time;
     	int64		time_elapsed = 0;
     	uint64		loop_count = 0;
    -	uint64		prev,
    -				cur;
     	instr_time	start_time,
     				end_time,
    -				temp;
    +				prev,
    +				cur;
    +	char	   *time_source = NULL;
    +	bool		fast_timing_used = false;
    +
    +	INSTR_TIME_INITIALIZE();
    +
    +#if !defined(WIN32) && defined(__x86_64__) && defined(__linux__)
    +	if (fast_timing && has_rdtsc)
    +	{
    +		time_source = "RDTSC";
    +		fast_timing_used = true;
    +	}
    +	else if (has_rdtscp)
    +		time_source = "RDTSCP";
    +	else
    +		time_source = PG_INSTR_CLOCK_NAME;
    +#else
    +	time_source = PG_INSTR_CLOCK_NAME;
    +#endif
    +	if (fast_timing && !fast_timing_used)
    +		printf(_("Warning: Fast timing requested, but not available - regular timing source will be used\n"));
    +	printf(_("Time source: %s\n"), time_source);
     
     	/*
     	 * Pre-zero the statistics data structures.  They're already zero by
    @@ -173,8 +199,11 @@ test_timing(unsigned int duration)
     
     	total_time = duration > 0 ? duration * INT64CONST(1000000000) : 0;
     
    -	INSTR_TIME_SET_CURRENT(start_time);
    -	cur = INSTR_TIME_GET_NANOSEC(start_time);
    +	if (fast_timing)
    +		INSTR_TIME_SET_CURRENT_FAST(start_time);
    +	else
    +		INSTR_TIME_SET_CURRENT(start_time);
    +	cur = start_time;
     
     	while (time_elapsed < total_time)
     	{
    @@ -182,9 +211,11 @@ test_timing(unsigned int duration)
     					bits;
     
     		prev = cur;
    -		INSTR_TIME_SET_CURRENT(temp);
    -		cur = INSTR_TIME_GET_NANOSEC(temp);
    -		diff = cur - prev;
    +		if (fast_timing)
    +			INSTR_TIME_SET_CURRENT_FAST(cur);
    +		else
    +			INSTR_TIME_SET_CURRENT(cur);
    +		diff = INSTR_TIME_DIFF_NANOSEC(cur, prev);
     
     		/* Did time go backwards? */
     		if (unlikely(diff < 0))
    @@ -217,11 +248,13 @@ test_timing(unsigned int duration)
     			largest_diff_count++;
     
     		loop_count++;
    -		INSTR_TIME_SUBTRACT(temp, start_time);
    -		time_elapsed = INSTR_TIME_GET_NANOSEC(temp);
    +		time_elapsed = INSTR_TIME_DIFF_NANOSEC(cur, start_time);
     	}
     
    -	INSTR_TIME_SET_CURRENT(end_time);
    +	if (fast_timing)
    +		INSTR_TIME_SET_CURRENT_FAST(end_time);
    +	else
    +		INSTR_TIME_SET_CURRENT(end_time);
     
     	INSTR_TIME_SUBTRACT(end_time, start_time);
     
    diff --git a/src/include/portability/instr_time.h b/src/include/portability/instr_time.h
    index e2e339a0c4f..f02296f1026 100644
    --- a/src/include/portability/instr_time.h
    +++ b/src/include/portability/instr_time.h
    @@ -112,10 +112,13 @@ extern int64 max_ticks_no_overflow;
      */
     #if defined(__darwin__) && defined(CLOCK_MONOTONIC_RAW)
     #define PG_INSTR_CLOCK	CLOCK_MONOTONIC_RAW
    +#define PG_INSTR_CLOCK_NAME	"clock_gettime (CLOCK_MONOTONIC_RAW)"
     #elif defined(CLOCK_MONOTONIC)
     #define PG_INSTR_CLOCK	CLOCK_MONOTONIC
    +#define PG_INSTR_CLOCK_NAME	"clock_gettime (CLOCK_MONOTONIC)"
     #else
     #define PG_INSTR_CLOCK	CLOCK_REALTIME
    +#define PG_INSTR_CLOCK_NAME	"clock_gettime (CLOCK_REALTIME)"
     #endif
     
     #if defined(__x86_64__) && defined(__linux__)
    @@ -174,7 +177,7 @@ pg_get_ticks(void)
     }
     
     static inline int64_t
    -pg_ticks_to_ns(instr_time t)
    +pg_ticks_to_ns(int64 ticks)
     {
     	/*
     	 * Would multiplication overflow? If so perform computation in two parts.
    @@ -183,7 +186,7 @@ pg_ticks_to_ns(instr_time t)
     	 */
     	int64		ns = 0;
     
    -	if (unlikely(t.ticks > max_ticks_no_overflow))
    +	if (unlikely(ticks > max_ticks_no_overflow))
     	{
     		/*
     		 * Compute how often the maximum number of ticks fits completely into
    @@ -192,7 +195,7 @@ pg_ticks_to_ns(instr_time t)
     		 * value. In a 2nd step we adjust the number of elapsed ticks and
     		 * convert the remaining ticks.
     		 */
    -		int64		count = t.ticks / max_ticks_no_overflow;
    +		int64		count = ticks / max_ticks_no_overflow;
     		int64		max_ns = max_ticks_no_overflow * ticks_per_ns_scaled / TICKS_TO_NS_PRECISION;
     
     		ns = max_ns * count;
    @@ -201,11 +204,11 @@ pg_ticks_to_ns(instr_time t)
     		 * Subtract the ticks that we now already accounted for, so that they
     		 * don't get counted twice.
     		 */
    -		t.ticks -= count * max_ticks_no_overflow;
    -		Assert(t.ticks >= 0);
    +		ticks -= count * max_ticks_no_overflow;
    +		Assert(ticks >= 0);
     	}
     
    -	ns += t.ticks * ticks_per_ns_scaled / TICKS_TO_NS_PRECISION;
    +	ns += ticks * ticks_per_ns_scaled / TICKS_TO_NS_PRECISION;
     	return ns;
     }
     
    @@ -226,14 +229,14 @@ pg_initialize_get_ticks()
     #define INSTR_TIME_SET_CURRENT(t) \
     	((t) = pg_get_ticks())
     
    -#define INSTR_TIME_GET_NANOSEC(t) \
    -	pg_ticks_to_ns(t)
    -
    +#define INSTR_TIME_TICKS_TO_NANOSEC(ticks) \
    +	(pg_ticks_to_ns(ticks))
     
     #else							/* WIN32 */
     
     
     /* Use QueryPerformanceCounter() */
    +#define PG_INSTR_CLOCK_NAME	"QueryPerformanceCounter"
     
     /* helper for INSTR_TIME_SET_CURRENT / INSTR_TIME_SET_CURRENT_FAST */
     static inline instr_time
    @@ -265,8 +268,8 @@ GetTimerFrequency(void)
     #define INSTR_TIME_SET_CURRENT(t) \
     	((t) = pg_query_performance_counter())
     
    -#define INSTR_TIME_GET_NANOSEC(t) \
    -	((int64) ((t).ticks * ((double) NS_PER_S / GetTimerFrequency())))
    +#define INSTR_TIME_TICKS_TO_NANOSEC(ticks) \
    +	((int64) ((ticks) * ((double) NS_PER_S / GetTimerFrequency())))
     
     #endif							/* WIN32 */
     
    @@ -285,9 +288,14 @@ GetTimerFrequency(void)
     #define INSTR_TIME_SUBTRACT(x,y) \
     	((x).ticks -= (y).ticks)
     
    +#define INSTR_TIME_DIFF_NANOSEC(x,y) \
    +	(INSTR_TIME_TICKS_TO_NANOSEC((x).ticks - (y).ticks))
    +
     #define INSTR_TIME_ACCUM_DIFF(x,y,z) \
     	((x).ticks += (y).ticks - (z).ticks)
     
    +#define INSTR_TIME_GET_NANOSEC(t) \
    +	(INSTR_TIME_TICKS_TO_NANOSEC((t).ticks))
     
     #define INSTR_TIME_GET_DOUBLE(t) \
     	((double) INSTR_TIME_GET_NANOSEC(t) / NS_PER_S)
    -- 
    2.47.3
    
    
    --vtqqtrpooseurzip--