wip-detect-constant-input-exactly-2.patch

text/x-diff
Filename: wip-detect-constant-input-exactly-2.patch
Type: text/x-diff
Part: 0
Message: Re: BUG #19340: Wrong result from CORR() function
diff --git a/src/backend/utils/adt/float.c b/src/backend/utils/adt/float.c
index 7b97d2be6ca..c2173f378bf 100644
--- a/src/backend/utils/adt/float.c
+++ b/src/backend/utils/adt/float.c
@@ -3319,9 +3319,16 @@ float8_stddev_samp(PG_FUNCTION_ARGS)
  * As with the preceding aggregates, we use the Youngs-Cramer algorithm to
  * reduce rounding errors in the aggregate final functions.
  *
- * The transition datatype for all these aggregates is a 6-element array of
+ * The transition datatype for all these aggregates is an 8-element array of
  * float8, holding the values N, Sx=sum(X), Sxx=sum((X-Sx/N)^2), Sy=sum(Y),
- * Syy=sum((Y-Sy/N)^2), Sxy=sum((X-Sx/N)*(Y-Sy/N)) in that order.
+ * Syy=sum((Y-Sy/N)^2), Sxy=sum((X-Sx/N)*(Y-Sy/N)), commonX, and commonY
+ * in that order.
+ *
+ * commonX is defined as the common X value if all the X values were the
+ * same, else NaN; likewise for commonY.  This is useful for deciding
+ * whether corr() should return NULL.  Note that this representation cannot
+ * distinguish all-the-values-were-NaN from the-values-weren't-all-the-same,
+ * but that's okay because we want to return NaN for all-NaN input.
  *
  * Note that Y is the first argument to all these aggregates!
  *
@@ -3345,17 +3352,21 @@ float8_regr_accum(PG_FUNCTION_ARGS)
 				Sy,
 				Syy,
 				Sxy,
+				commonX,
+				commonY,
 				tmpX,
 				tmpY,
 				scale;
 
-	transvalues = check_float8_array(transarray, "float8_regr_accum", 6);
+	transvalues = check_float8_array(transarray, "float8_regr_accum", 8);
 	N = transvalues[0];
 	Sx = transvalues[1];
 	Sxx = transvalues[2];
 	Sy = transvalues[3];
 	Syy = transvalues[4];
 	Sxy = transvalues[5];
+	commonX = transvalues[6];
+	commonY = transvalues[7];
 
 	/*
 	 * Use the Youngs-Cramer algorithm to incorporate the new values into the
@@ -3373,6 +3384,16 @@ float8_regr_accum(PG_FUNCTION_ARGS)
 		Syy += tmpY * tmpY * scale;
 		Sxy += tmpX * tmpY * scale;
 
+		/*
+		 * Check to see if we have seen distinct inputs.  We can use a test
+		 * that's a bit cheaper than float8_ne() because if commonX is already
+		 * NaN, it does not matter whether the != test returns true or not.
+		 */
+		if (newvalX != commonX || isnan(newvalX))
+			commonX = get_float8_nan();
+		if (newvalY != commonY || isnan(newvalY))
+			commonY = get_float8_nan();
+
 		/*
 		 * Overflow check.  We only report an overflow error when finite
 		 * inputs lead to infinite results.  Note also that Sxx, Syy and Sxy
@@ -3410,6 +3431,8 @@ float8_regr_accum(PG_FUNCTION_ARGS)
 			Sxx = Sxy = get_float8_nan();
 		if (isnan(newvalY) || isinf(newvalY))
 			Syy = Sxy = get_float8_nan();
+		commonX = newvalX;
+		commonY = newvalY;
 	}
 
 	/*
@@ -3425,12 +3448,14 @@ float8_regr_accum(PG_FUNCTION_ARGS)
 		transvalues[3] = Sy;
 		transvalues[4] = Syy;
 		transvalues[5] = Sxy;
+		transvalues[6] = commonX;
+		transvalues[7] = commonY;
 
 		PG_RETURN_ARRAYTYPE_P(transarray);
 	}
 	else
 	{
-		Datum		transdatums[6];
+		Datum		transdatums[8];
 		ArrayType  *result;
 
 		transdatums[0] = Float8GetDatumFast(N);
@@ -3439,8 +3464,10 @@ float8_regr_accum(PG_FUNCTION_ARGS)
 		transdatums[3] = Float8GetDatumFast(Sy);
 		transdatums[4] = Float8GetDatumFast(Syy);
 		transdatums[5] = Float8GetDatumFast(Sxy);
+		transdatums[6] = Float8GetDatumFast(commonX);
+		transdatums[7] = Float8GetDatumFast(commonY);
 
-		result = construct_array_builtin(transdatums, 6, FLOAT8OID);
+		result = construct_array_builtin(transdatums, 8, FLOAT8OID);
 
 		PG_RETURN_ARRAYTYPE_P(result);
 	}
@@ -3733,24 +3760,27 @@ float8_corr(PG_FUNCTION_ARGS)
 	float8		N,
 				Sxx,
 				Syy,
-				Sxy;
+				Sxy,
+				commonX,
+				commonY;
 
-	transvalues = check_float8_array(transarray, "float8_corr", 6);
+	transvalues = check_float8_array(transarray, "float8_corr", 8);
 	N = transvalues[0];
 	Sxx = transvalues[2];
 	Syy = transvalues[4];
 	Sxy = transvalues[5];
+	commonX = transvalues[6];
+	commonY = transvalues[7];
 
 	/* if N is 0 we should return NULL */
 	if (N < 1.0)
 		PG_RETURN_NULL();
 
-	/* Note that Sxx and Syy are guaranteed to be non-negative */
-
 	/* per spec, return NULL for horizontal and vertical lines */
-	if (Sxx == 0 || Syy == 0)
+	if (!isnan(commonX) || !isnan(commonY))
 		PG_RETURN_NULL();
 
+	/* at this point, Sxx and Syy cannot be zero or negative */
 	PG_RETURN_FLOAT8(Sxy / sqrt(Sxx * Syy));
 }
 
diff --git a/src/include/catalog/pg_aggregate.dat b/src/include/catalog/pg_aggregate.dat
index 870769e8f14..d1342f7bb94 100644
--- a/src/include/catalog/pg_aggregate.dat
+++ b/src/include/catalog/pg_aggregate.dat
@@ -505,7 +505,7 @@
   aggtranstype => '_float8', agginitval => '{0,0,0,0,0,0}' },
 { aggfnoid => 'corr', aggtransfn => 'float8_regr_accum',
   aggfinalfn => 'float8_corr', aggcombinefn => 'float8_regr_combine',
-  aggtranstype => '_float8', agginitval => '{0,0,0,0,0,0}' },
+  aggtranstype => '_float8', agginitval => '{0,0,0,0,0,0,0,0}' },
 
 # boolean-and and boolean-or
 { aggfnoid => 'bool_and', aggtransfn => 'booland_statefunc',