regr_intercept.patch

text/x-patch
Filename: regr_intercept.patch
Type: text/x-patch
Part: 0
Message: Re: [PATCH] Fix overflow and underflow in regr_r2()
diff --git a/src/backend/utils/adt/float.c b/src/backend/utils/adt/float.c
new file mode 100644
index cc00c10..262ea2b
--- a/src/backend/utils/adt/float.c
+++ b/src/backend/utils/adt/float.c
@@ -4010,7 +4010,8 @@ float8_regr_intercept(PG_FUNCTION_ARGS)
 				Sx,
 				Sxx,
 				Sy,
-				Sxy;
+				Sxy,
+				dy;
 
 	transvalues = check_float8_array(transarray, "float8_regr_intercept", 8);
 	N = transvalues[0];
@@ -4029,7 +4030,41 @@ float8_regr_intercept(PG_FUNCTION_ARGS)
 	if (Sxx == 0)
 		PG_RETURN_NULL();
 
-	PG_RETURN_FLOAT8((Sy - Sx * Sxy / Sxx) / N);
+	/*
+	 * The intercept is given by (Sy - dy) / N, where dy = Sx * Sxy / Sxx.
+	 * However, when computing dy, the intermediate product Sx * Sxy might
+	 * underflow or overflow.  If so, we can recover by decomposing Sx, Sxy,
+	 * and Sxx into normalized mantissa and integer power-of-two components,
+	 * computing the corresponding components of dy, and then recomposing dy.
+	 * We avoid doing this if Sx, Sxy, or Sxx are infinite or NaN, since the
+	 * exponent returned by frexp() is unspecified in those cases (and the
+	 * final result would be the same in any case).
+	 */
+	dy = Sx * Sxy / Sxx;
+	if ((dy == 0 || isinf(dy)) &&
+		!(isinf(Sx) || isinf(Sxy) || isinf(Sxx) ||
+		  isnan(Sx) || isnan(Sxy) || isnan(Sxx)))
+	{
+		float8		m_Sx,
+					m_Sxy,
+					m_Sxx,
+					m_dy;
+		int			n_Sx,
+					n_Sxy,
+					n_Sxx,
+					n_dy;
+
+		m_Sx = frexp(Sx, &n_Sx);
+		m_Sxy = frexp(Sxy, &n_Sxy);
+		m_Sxx = frexp(Sxx, &n_Sxx);
+
+		m_dy = m_Sx * m_Sxy / m_Sxx;
+		n_dy = n_Sx + n_Sxy - n_Sxx;
+
+		dy = ldexp(m_dy, n_dy);
+	}
+
+	PG_RETURN_FLOAT8((Sy - dy) / N);
 }
 
 
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
new file mode 100644
index 1ccdf7d..9f7f44e
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -563,6 +563,12 @@ SELECT corr(1e100 + g * 1e95, 1e100 + g
     1 |       1
 (1 row)
 
+SELECT regr_intercept(y, x) FROM (VALUES (-1e150, 0), (2e150, 3e150)) v(x, y);
+ regr_intercept 
+----------------
+         1e+150
+(1 row)
+
 -- these examples pose definitional questions for NaN inputs,
 -- which we resolve by saying that an all-NaN input column is not all equal
 SELECT corr(g, 'NaN'), regr_r2(g, 'NaN') FROM generate_series(1, 30) g;
diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql
new file mode 100644
index a310b39..0aef48c
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -159,6 +159,7 @@ SELECT corr(1e-100 + g * 1e-105, 1e-100
 SELECT corr(1e100 + g * 1e95, 1e100 + g * 1e95),
        regr_r2(1e100 + g * 1e95, 1e100 + g * 1e95)
   FROM generate_series(1, 2) g;
+SELECT regr_intercept(y, x) FROM (VALUES (-1e150, 0), (2e150, 3e150)) v(x, y);
 
 -- these examples pose definitional questions for NaN inputs,
 -- which we resolve by saying that an all-NaN input column is not all equal