regr_intercept.patch
text/x-patch
Filename: regr_intercept.patch
Type: text/x-patch
Part: 0
diff --git a/src/backend/utils/adt/float.c b/src/backend/utils/adt/float.c
new file mode 100644
index cc00c10..262ea2b
--- a/src/backend/utils/adt/float.c
+++ b/src/backend/utils/adt/float.c
@@ -4010,7 +4010,8 @@ float8_regr_intercept(PG_FUNCTION_ARGS)
Sx,
Sxx,
Sy,
- Sxy;
+ Sxy,
+ dy;
transvalues = check_float8_array(transarray, "float8_regr_intercept", 8);
N = transvalues[0];
@@ -4029,7 +4030,41 @@ float8_regr_intercept(PG_FUNCTION_ARGS)
if (Sxx == 0)
PG_RETURN_NULL();
- PG_RETURN_FLOAT8((Sy - Sx * Sxy / Sxx) / N);
+ /*
+ * The intercept is given by (Sy - dy) / N, where dy = Sx * Sxy / Sxx.
+ * However, when computing dy, the intermediate product Sx * Sxy might
+ * underflow or overflow. If so, we can recover by decomposing Sx, Sxy,
+ * and Sxx into normalized mantissa and integer power-of-two components,
+ * computing the corresponding components of dy, and then recomposing dy.
+ * We avoid doing this if Sx, Sxy, or Sxx are infinite or NaN, since the
+ * exponent returned by frexp() is unspecified in those cases (and the
+ * final result would be the same in any case).
+ */
+ dy = Sx * Sxy / Sxx;
+ if ((dy == 0 || isinf(dy)) &&
+ !(isinf(Sx) || isinf(Sxy) || isinf(Sxx) ||
+ isnan(Sx) || isnan(Sxy) || isnan(Sxx)))
+ {
+ float8 m_Sx,
+ m_Sxy,
+ m_Sxx,
+ m_dy;
+ int n_Sx,
+ n_Sxy,
+ n_Sxx,
+ n_dy;
+
+ m_Sx = frexp(Sx, &n_Sx);
+ m_Sxy = frexp(Sxy, &n_Sxy);
+ m_Sxx = frexp(Sxx, &n_Sxx);
+
+ m_dy = m_Sx * m_Sxy / m_Sxx;
+ n_dy = n_Sx + n_Sxy - n_Sxx;
+
+ dy = ldexp(m_dy, n_dy);
+ }
+
+ PG_RETURN_FLOAT8((Sy - dy) / N);
}
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
new file mode 100644
index 1ccdf7d..9f7f44e
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -563,6 +563,12 @@ SELECT corr(1e100 + g * 1e95, 1e100 + g
1 | 1
(1 row)
+SELECT regr_intercept(y, x) FROM (VALUES (-1e150, 0), (2e150, 3e150)) v(x, y);
+ regr_intercept
+----------------
+ 1e+150
+(1 row)
+
-- these examples pose definitional questions for NaN inputs,
-- which we resolve by saying that an all-NaN input column is not all equal
SELECT corr(g, 'NaN'), regr_r2(g, 'NaN') FROM generate_series(1, 30) g;
diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql
new file mode 100644
index a310b39..0aef48c
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -159,6 +159,7 @@ SELECT corr(1e-100 + g * 1e-105, 1e-100
SELECT corr(1e100 + g * 1e95, 1e100 + g * 1e95),
regr_r2(1e100 + g * 1e95, 1e100 + g * 1e95)
FROM generate_series(1, 2) g;
+SELECT regr_intercept(y, x) FROM (VALUES (-1e150, 0), (2e150, 3e150)) v(x, y);
-- these examples pose definitional questions for NaN inputs,
-- which we resolve by saying that an all-NaN input column is not all equal