long_tests.diff.txt

text/plain

Filename: long_tests.diff.txt
Type: text/plain
Part: 1
Message: Re: Row pattern recognition
From e413583502bc7602ad1803fa3a8d74d265147002 Mon Sep 17 00:00:00 2001
From: Jacob Champion <champion.p@gmail.com>
Date: Mon, 23 Oct 2023 12:31:51 -0700
Subject: [PATCH 1/2] squash! Row pattern recognition patch (tests).

Add long-table tests.
---
 src/test/regress/expected/rpr.out | 68 +++++++++++++++++++++++++++++++
 src/test/regress/sql/rpr.sql      | 56 +++++++++++++++++++++++++
 2 files changed, 124 insertions(+)

diff --git a/src/test/regress/expected/rpr.out b/src/test/regress/expected/rpr.out
index 8f8254d3b2..806dee5ce6 100644
--- a/src/test/regress/expected/rpr.out
+++ b/src/test/regress/expected/rpr.out
@@ -635,6 +635,74 @@ DOWN AS price < PREV(price)
  company2 | 07-10-2023 |  1300 |             |            |      |      |      |                       |     0
 (20 rows)
 
+--
+-- Bigger datasets
+--
+CREATE TEMP TABLE long_stock (
+       company TEXT,
+       tdate DATE,
+       price INTEGER
+);
+INSERT INTO long_stock SELECT 'company1', DATE '2023-07-01' + i, 200 + 100 * sin(i)
+                         FROM generate_series(1, 1000) i;
+INSERT INTO long_stock SELECT 'company2', DATE '2023-07-01' + i, 300 + 200 * sin(i + 3)
+                         FROM generate_series(1, 1000) i;
+SELECT company, count(*), min(price), round(avg(price)) AS avg, max(price)
+  FROM long_stock GROUP BY company;
+ company  | count | min | avg | max 
+----------+-------+-----+-----+-----
+ company1 |  1000 | 100 | 200 | 300
+ company2 |  1000 | 100 | 300 | 500
+(2 rows)
+
+-- long test using PREV. Expect approximately 1000 / (2*pi) = 159 periods of the
+-- sinusoids to match.
+WITH q AS (
+  SELECT company, tdate, first_value(price) OVER w
+   FROM long_stock
+   WINDOW w AS (
+   PARTITION BY company
+   ORDER BY tdate
+   ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING
+   INITIAL
+   PATTERN (START UP+ DOWN+)
+   DEFINE
+    START AS TRUE,
+    UP AS price >= PREV(price),
+    DOWN AS price <= PREV(price)
+  )
+) SELECT company, count(first_value) AS matches
+   FROM q GROUP BY company;
+ company  | matches 
+----------+---------
+ company1 |     159
+ company2 |     159
+(2 rows)
+
+-- match everything, with multiple matching variables per row (stresses
+-- implementations susceptible to Cartesian explosion)
+WITH q AS (
+  SELECT company, tdate, first_value(price) OVER w
+   FROM long_stock
+   WINDOW w AS (
+   PARTITION BY company
+   ORDER BY tdate
+   ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING
+   INITIAL
+   PATTERN (A+ B+ C+)
+   DEFINE
+    A AS TRUE,
+    B AS TRUE,
+    C AS TRUE
+  )
+) SELECT company, count(first_value) AS matches
+   FROM q GROUP BY company;
+ company  | matches 
+----------+---------
+ company1 |       1
+ company2 |       1
+(2 rows)
+
 --
 -- Error cases
 --
diff --git a/src/test/regress/sql/rpr.sql b/src/test/regress/sql/rpr.sql
index 38309652f9..51c9245d3b 100644
--- a/src/test/regress/sql/rpr.sql
+++ b/src/test/regress/sql/rpr.sql
@@ -271,6 +271,62 @@ UP AS price > PREV(price),
 DOWN AS price < PREV(price)
 );
 
+--
+-- Bigger datasets
+--
+
+CREATE TEMP TABLE long_stock (
+       company TEXT,
+       tdate DATE,
+       price INTEGER
+);
+
+INSERT INTO long_stock SELECT 'company1', DATE '2023-07-01' + i, 200 + 100 * sin(i)
+                         FROM generate_series(1, 1000) i;
+INSERT INTO long_stock SELECT 'company2', DATE '2023-07-01' + i, 300 + 200 * sin(i + 3)
+                         FROM generate_series(1, 1000) i;
+
+SELECT company, count(*), min(price), round(avg(price)) AS avg, max(price)
+  FROM long_stock GROUP BY company;
+
+-- long test using PREV. Expect approximately 1000 / (2*pi) = 159 periods of the
+-- sinusoids to match.
+WITH q AS (
+  SELECT company, tdate, first_value(price) OVER w
+   FROM long_stock
+   WINDOW w AS (
+   PARTITION BY company
+   ORDER BY tdate
+   ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING
+   INITIAL
+   PATTERN (START UP+ DOWN+)
+   DEFINE
+    START AS TRUE,
+    UP AS price >= PREV(price),
+    DOWN AS price <= PREV(price)
+  )
+) SELECT company, count(first_value) AS matches
+   FROM q GROUP BY company;
+
+-- match everything, with multiple matching variables per row (stresses
+-- implementations susceptible to Cartesian explosion)
+WITH q AS (
+  SELECT company, tdate, first_value(price) OVER w
+   FROM long_stock
+   WINDOW w AS (
+   PARTITION BY company
+   ORDER BY tdate
+   ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING
+   INITIAL
+   PATTERN (A+ B+ C+)
+   DEFINE
+    A AS TRUE,
+    B AS TRUE,
+    C AS TRUE
+  )
+) SELECT company, count(first_value) AS matches
+   FROM q GROUP BY company;
+
 --
 -- Error cases
 --
-- 
2.39.2