backwards_compatibility_test.sql

application/octet-stream

Filename: backwards_compatibility_test.sql
Type: application/octet-stream
Part: 2
Message: Re: Fwd: [PATCH] Add zstd compression for TOAST using extended header format
--
-- Backwards Compatibility Test for TOAST Compression
--
-- Tests that existing PGLZ/LZ4 compressed data can be read correctly
-- and that new ZSTD data works alongside legacy data.
--
-- This ensures smooth upgrades and mixed-method environments.
--

\timing on
\pset pager off
\set ON_ERROR_STOP on

DROP SCHEMA IF EXISTS compat_test CASCADE;
CREATE SCHEMA compat_test;
SET search_path TO compat_test;

\echo ''
\echo '============================================================'
\echo 'BACKWARDS COMPATIBILITY TEST FOR TOAST COMPRESSION'
\echo '============================================================'
\echo ''

-- Detect available compression methods
SELECT
    (enumvals @> '{lz4}')::int  AS has_lz4,
    (enumvals @> '{zstd}')::int AS has_zstd
FROM pg_settings
WHERE name = 'default_toast_compression'
\gset

\echo 'Available compression methods:'
\echo '  PGLZ: yes (built-in)'
\echo '  LZ4:' :has_lz4
\echo '  ZSTD:' :has_zstd
\echo ''

------------------------------------------------------------
-- TEST 1: Small Values (Inline Storage)
------------------------------------------------------------

\echo '============================================================'
\echo 'TEST 1: SMALL VALUES (INLINE STORAGE - NO TOAST)'
\echo '============================================================'
\echo ''

-- Small values should stay inline regardless of compression setting
CREATE TABLE small_pglz (id serial, data text COMPRESSION pglz);
CREATE TABLE small_lz4 (id serial, data text COMPRESSION lz4);
CREATE TABLE small_zstd (id serial, data text COMPRESSION zstd);

-- Insert small values (500 bytes - well below TOAST threshold)
INSERT INTO small_pglz (data) SELECT repeat('A', 500) FROM generate_series(1, 100);
INSERT INTO small_lz4 (data) SELECT repeat('B', 500) FROM generate_series(1, 100);
INSERT INTO small_zstd (data) SELECT repeat('C', 500) FROM generate_series(1, 100);

\echo '--- Small Values: Verify Inline Storage ---'
SELECT
    'PGLZ' as method,
    count(*) as rows,
    count(*) FILTER (WHERE pg_column_toast_chunk_id(data) IS NOT NULL) as toasted,
    count(*) FILTER (WHERE pg_column_toast_chunk_id(data) IS NULL) as inline
FROM small_pglz
UNION ALL
SELECT
    'LZ4',
    count(*),
    count(*) FILTER (WHERE pg_column_toast_chunk_id(data) IS NOT NULL),
    count(*) FILTER (WHERE pg_column_toast_chunk_id(data) IS NULL)
FROM small_lz4
UNION ALL
SELECT
    'ZSTD',
    count(*),
    count(*) FILTER (WHERE pg_column_toast_chunk_id(data) IS NOT NULL),
    count(*) FILTER (WHERE pg_column_toast_chunk_id(data) IS NULL)
FROM small_zstd;

\echo ''
\echo 'Expected: All rows inline (toasted=0) for all methods'
\echo ''

------------------------------------------------------------
-- TEST 2: Medium Values (Inline Compressed)
------------------------------------------------------------

\echo '============================================================'
\echo 'TEST 2: MEDIUM VALUES (INLINE COMPRESSED)'
\echo '============================================================'
\echo ''

CREATE TABLE medium_pglz (id serial, data text COMPRESSION pglz);
CREATE TABLE medium_lz4 (id serial, data text COMPRESSION lz4);
CREATE TABLE medium_zstd (id serial, data text COMPRESSION zstd);

-- Insert compressible data ~5KB (should compress inline for PGLZ/LZ4)
INSERT INTO medium_pglz (data) SELECT repeat('PostgreSQL is awesome! ', 250) FROM generate_series(1, 100);
INSERT INTO medium_lz4 (data) SELECT repeat('PostgreSQL is awesome! ', 250) FROM generate_series(1, 100);
INSERT INTO medium_zstd (data) SELECT repeat('PostgreSQL is awesome! ', 250) FROM generate_series(1, 100);

\echo '--- Medium Values: Verify Compression Method ---'
SELECT
    'PGLZ' as declared_method,
    count(*) as rows,
    count(*) FILTER (WHERE pg_column_toast_chunk_id(data) IS NOT NULL) as external,
    count(*) FILTER (WHERE pg_column_compression(data) = 'pglz') as pglz_compressed,
    count(*) FILTER (WHERE pg_column_compression(data) = 'lz4') as lz4_compressed,
    count(*) FILTER (WHERE pg_column_compression(data) = 'zstd') as zstd_compressed,
    count(*) FILTER (WHERE pg_column_compression(data) IS NULL) as uncompressed_or_inline
FROM medium_pglz
UNION ALL
SELECT
    'LZ4',
    count(*),
    count(*) FILTER (WHERE pg_column_toast_chunk_id(data) IS NOT NULL),
    count(*) FILTER (WHERE pg_column_compression(data) = 'pglz'),
    count(*) FILTER (WHERE pg_column_compression(data) = 'lz4'),
    count(*) FILTER (WHERE pg_column_compression(data) = 'zstd'),
    count(*) FILTER (WHERE pg_column_compression(data) IS NULL)
FROM medium_lz4
UNION ALL
SELECT
    'ZSTD',
    count(*),
    count(*) FILTER (WHERE pg_column_toast_chunk_id(data) IS NOT NULL),
    count(*) FILTER (WHERE pg_column_compression(data) = 'pglz'),
    count(*) FILTER (WHERE pg_column_compression(data) = 'lz4'),
    count(*) FILTER (WHERE pg_column_compression(data) = 'zstd'),
    count(*) FILTER (WHERE pg_column_compression(data) IS NULL)
FROM medium_zstd;

\echo ''
\echo 'Expected: PGLZ/LZ4 inline compressed, ZSTD external'
\echo ''

------------------------------------------------------------
-- TEST 3: Large Values (External Storage)
------------------------------------------------------------

\echo '============================================================'
\echo 'TEST 3: LARGE VALUES (EXTERNAL TOAST STORAGE)'
\echo '============================================================'
\echo ''

CREATE TABLE large_pglz (id serial, data text COMPRESSION pglz);
CREATE TABLE large_lz4 (id serial, data text COMPRESSION lz4);
CREATE TABLE large_zstd (id serial, data text COMPRESSION zstd);

-- Insert large compressible data (100KB each)
INSERT INTO large_pglz (data) SELECT repeat('This is a large text value for external TOAST storage. ', 2000) FROM generate_series(1, 50);
INSERT INTO large_lz4 (data) SELECT repeat('This is a large text value for external TOAST storage. ', 2000) FROM generate_series(1, 50);
INSERT INTO large_zstd (data) SELECT repeat('This is a large text value for external TOAST storage. ', 2000) FROM generate_series(1, 50);

\echo '--- Large Values: Verify External Storage & Compression ---'
SELECT
    'PGLZ' as declared_method,
    count(*) as rows,
    count(*) FILTER (WHERE pg_column_toast_chunk_id(data) IS NOT NULL) as external,
    count(*) FILTER (WHERE pg_column_compression(data) = 'pglz') as pglz_count,
    count(*) FILTER (WHERE pg_column_compression(data) = 'lz4') as lz4_count,
    count(*) FILTER (WHERE pg_column_compression(data) = 'zstd') as zstd_count,
    pg_size_pretty(pg_total_relation_size('large_pglz')) as storage
FROM large_pglz
UNION ALL
SELECT
    'LZ4',
    count(*),
    count(*) FILTER (WHERE pg_column_toast_chunk_id(data) IS NOT NULL),
    count(*) FILTER (WHERE pg_column_compression(data) = 'pglz'),
    count(*) FILTER (WHERE pg_column_compression(data) = 'lz4'),
    count(*) FILTER (WHERE pg_column_compression(data) = 'zstd'),
    pg_size_pretty(pg_total_relation_size('large_lz4'))
FROM large_lz4
UNION ALL
SELECT
    'ZSTD',
    count(*),
    count(*) FILTER (WHERE pg_column_toast_chunk_id(data) IS NOT NULL),
    count(*) FILTER (WHERE pg_column_compression(data) = 'pglz'),
    count(*) FILTER (WHERE pg_column_compression(data) = 'lz4'),
    count(*) FILTER (WHERE pg_column_compression(data) = 'zstd'),
    pg_size_pretty(pg_total_relation_size('large_zstd'))
FROM large_zstd;

\echo ''
\echo 'Expected: PGLZ/LZ4 inline (highly compressible data), ZSTD external (external-only by design)'
\echo ''

------------------------------------------------------------
-- TEST 4: Mixed Compression in Single Table
------------------------------------------------------------

\echo '============================================================'
\echo 'TEST 4: MIXED COMPRESSION METHODS (BACKWARDS COMPATIBILITY)'
\echo '============================================================'
\echo ''

-- Create table with default compression
CREATE TABLE mixed_data (
    id serial PRIMARY KEY,
    data text
);

-- Insert data with PGLZ (simulate old data)
SET default_toast_compression = pglz;
INSERT INTO mixed_data (data) SELECT repeat('Old PGLZ data ' || g, 10000) FROM generate_series(1, 20) g;

-- Insert data with LZ4 (simulate upgrade to LZ4)
SET default_toast_compression = lz4;
INSERT INTO mixed_data (data) SELECT repeat('Newer LZ4 data ' || g, 10000) FROM generate_series(21, 40) g;

-- Insert data with ZSTD (simulate latest upgrade)
SET default_toast_compression = zstd;
INSERT INTO mixed_data (data) SELECT repeat('Latest ZSTD data ' || g, 10000) FROM generate_series(41, 60) g;

ANALYZE mixed_data;

\echo '--- Mixed Table: Compression Method Distribution ---'
SELECT
    pg_column_compression(data) as compression_method,
    count(*) as row_count,
    pg_size_pretty(sum(pg_column_size(data))) as total_compressed_size
FROM mixed_data
GROUP BY pg_column_compression(data)
ORDER BY
    CASE pg_column_compression(data)
        WHEN 'pglz' THEN 1
        WHEN 'lz4' THEN 2
        WHEN 'zstd' THEN 3
        ELSE 4
    END;

\echo ''
\echo 'Expected: 20 PGLZ, 20 LZ4, 20 ZSTD rows coexisting'
\echo ''

------------------------------------------------------------
-- TEST 5: Read All Data (Decompress Verification)
------------------------------------------------------------

\echo '============================================================'
\echo 'TEST 5: DATA INTEGRITY (READ & DECOMPRESS ALL METHODS)'
\echo '============================================================'
\echo ''

\echo '--- Verify all data decompresses correctly ---'

-- Small values
SELECT 'Small PGLZ' as test, count(*) as rows, sum(length(data)) as total_bytes FROM small_pglz;
SELECT 'Small LZ4' as test, count(*) as rows, sum(length(data)) as total_bytes FROM small_lz4;
SELECT 'Small ZSTD' as test, count(*) as rows, sum(length(data)) as total_bytes FROM small_zstd;

-- Medium values
SELECT 'Medium PGLZ' as test, count(*) as rows, sum(length(data)) as total_bytes FROM medium_pglz;
SELECT 'Medium LZ4' as test, count(*) as rows, sum(length(data)) as total_bytes FROM medium_lz4;
SELECT 'Medium ZSTD' as test, count(*) as rows, sum(length(data)) as total_bytes FROM medium_zstd;

-- Large values
SELECT 'Large PGLZ' as test, count(*) as rows, sum(length(data)) as total_bytes FROM large_pglz;
SELECT 'Large LZ4' as test, count(*) as rows, sum(length(data)) as total_bytes FROM large_lz4;
SELECT 'Large ZSTD' as test, count(*) as rows, sum(length(data)) as total_bytes FROM large_zstd;

-- Mixed table
SELECT 'Mixed (All)' as test, count(*) as rows, sum(length(data)) as total_bytes FROM mixed_data;

\echo ''
\echo 'Expected: All data readable, correct byte counts'
\echo ''

------------------------------------------------------------
-- TEST 6: ALTER COLUMN Compression Method
------------------------------------------------------------

\echo '============================================================'
\echo 'TEST 6: ALTER COLUMN COMPRESSION (LAZY RECOMPRESSION)'
\echo '============================================================'
\echo ''

-- Create table with PGLZ
CREATE TABLE alter_test (id serial, data text COMPRESSION pglz);
INSERT INTO alter_test (data) SELECT repeat('Original PGLZ data ' || g, 5000) FROM generate_series(1, 20) g;

\echo '--- Before ALTER: Compression Method ---'
SELECT
    pg_column_compression(data) as method,
    count(*) as rows
FROM alter_test
GROUP BY pg_column_compression(data);

-- Change compression to ZSTD (existing data stays PGLZ until rewritten)
ALTER TABLE alter_test ALTER COLUMN data SET COMPRESSION zstd;

\echo ''
\echo '--- After ALTER (before UPDATE): Compression Method ---'
SELECT
    pg_column_compression(data) as method,
    count(*) as rows
FROM alter_test
GROUP BY pg_column_compression(data);

\echo ''
\echo 'Expected: Still PGLZ (lazy conversion - data not rewritten yet)'
\echo ''

-- Force rewrite with UPDATE (must actually modify data to trigger re-toast)
UPDATE alter_test SET data = data || '';

\echo ''
\echo '--- After UPDATE: Compression Method ---'
SELECT
    pg_column_compression(data) as method,
    count(*) as rows
FROM alter_test
GROUP BY pg_column_compression(data);

\echo ''
\echo 'Expected: Now ZSTD (data rewritten with new compression)'
\echo ''

------------------------------------------------------------
-- TEST 7: VACUUM FULL (Recompression Verification)
------------------------------------------------------------

\echo '============================================================'
\echo 'TEST 7: VACUUM FULL (VERIFY RECOMPRESSION)'
\echo '============================================================'
\echo ''

-- Create table with PGLZ, then change to ZSTD
-- Use less-compressible data (md5 hashes) so it goes external with PGLZ
CREATE TABLE vacuum_test (id serial, data text COMPRESSION pglz);
INSERT INTO vacuum_test (data)
SELECT string_agg(md5(g::text || s::text), '')
FROM generate_series(1, 10) s, generate_series(1, 3000) g
GROUP BY s;

\echo '--- Before changing compression ---'
SELECT
    pg_column_compression(data) as method,
    count(*) as rows
FROM vacuum_test
GROUP BY pg_column_compression(data);

-- Change to ZSTD
ALTER TABLE vacuum_test ALTER COLUMN data SET COMPRESSION zstd;

\echo ''
\echo '--- After ALTER (before VACUUM FULL) ---'
SELECT
    pg_column_compression(data) as method,
    count(*) as rows
FROM vacuum_test
GROUP BY pg_column_compression(data);

-- VACUUM FULL rewrites table with new compression
VACUUM FULL vacuum_test;

\echo ''
\echo '--- After VACUUM FULL ---'
SELECT
    pg_column_compression(data) as method,
    count(*) as rows
FROM vacuum_test
GROUP BY pg_column_compression(data);

\echo ''
\echo 'Expected: ZSTD (VACUUM FULL rewrites with new compression)'
\echo ''

------------------------------------------------------------
-- SUMMARY
------------------------------------------------------------

\echo '============================================================'
\echo 'BACKWARDS COMPATIBILITY TEST COMPLETE'
\echo '============================================================'
\echo ''
\echo ''
\echo 'All backwards compatibility scenarios validated!'
\echo ''
\echo 'To clean up: DROP SCHEMA compat_test CASCADE;'
\echo ''