benchmark_toast_compression.sql

application/octet-stream

Filename: benchmark_toast_compression.sql
Type: application/octet-stream
Part: 0
Message: Re: Fwd: [PATCH] Add zstd compression for TOAST using extended header format
--
-- Comprehensive TOAST Compression Benchmark: PGLZ vs LZ4 vs ZSTD
--
-- Measures compression ratios, speed, storage efficiency, and TOAST behavior
-- across diverse workloads. Data is generated inline per INSERT to ensure
-- each compression method processes identical fresh input.
--
-- Usage: psql -f benchmark_toast_compression.sql > results.txt
--

\timing on
\pset pager off
\pset format wrapped
\pset columns 120
\set ON_ERROR_STOP on

DROP SCHEMA IF EXISTS toast_bench CASCADE;
CREATE SCHEMA toast_bench;
SET search_path TO toast_bench;

\echo ''
\echo '============================================================'
\echo 'PostgreSQL TOAST Compression Benchmark'
\echo '============================================================'
\echo ''

-- Detect available compression methods
\set has_lz4 0
\set has_zstd 0
SELECT
    (enumvals @> '{lz4}')::int  AS has_lz4,
    (enumvals @> '{zstd}')::int AS has_zstd
FROM pg_settings
WHERE name = 'default_toast_compression'
\gset

\echo 'Compression methods available:'
\echo '  PGLZ: yes (built-in)'
\echo '  LZ4:' :has_lz4
\echo '  ZSTD:' :has_zstd
\echo ''

------------------------------------------------------------
-- Helper Functions for Data Generation
------------------------------------------------------------

CREATE OR REPLACE FUNCTION generate_product_json(product_id integer) RETURNS jsonb AS $$
BEGIN
    RETURN jsonb_build_object(
        'id', product_id,
        'name', 'Product Name ' || product_id,
        'description', 'This is a detailed product description for item ' || product_id ||
                      '. It includes features, specifications, and usage information. ' ||
                      'The product is designed for high-quality performance and reliability. ' ||
                      repeat('Additional details about quality, manufacturing, and warranty. ', 5),
        'price', (random() * 1000)::numeric(10,2),
        'category', ARRAY['Electronics', 'Computers', 'Laptops', 'Gaming'],
        'reviews', (
            SELECT jsonb_agg(
                jsonb_build_object(
                    'user', 'user_' || i,
                    'rating', 1 + floor(random() * 5)::int,
                    'comment', 'This is review number ' || i || ' for product ' || product_id || '. ' ||
                              repeat('Great product with excellent features. ', 3),
                    'date', '2025-' || lpad((1 + floor(random() * 12))::text, 2, '0') || '-' ||
                            lpad((1 + floor(random() * 28))::text, 2, '0')
                )
            )
            FROM generate_series(1, 10) i
        ),
        'specifications', jsonb_build_object(
            'weight', (random() * 10)::numeric(4,2) || ' kg',
            'dimensions', jsonb_build_object('length', 30 + random() * 20, 'width', 20 + random() * 10, 'height', 5 + random() * 5),
            'warranty', '2 years',
            'manufacturer', 'TechCorp Inc.',
            'model', 'TC-' || product_id || '-2025'
        ),
        'metadata', jsonb_build_object(
            'created_at', now() - (random() * 365 || ' days')::interval,
            'updated_at', now() - (random() * 30 || ' days')::interval,
            'views', floor(random() * 10000)::int,
            'sales', floor(random() * 1000)::int
        )
    );
END;
$$ LANGUAGE plpgsql;

-- Server/application logs
CREATE OR REPLACE FUNCTION generate_log_entries(num_entries integer) RETURNS text
LANGUAGE sql VOLATILE AS $$
    SELECT string_agg(
        format('%s [%s] %s-%s - Request id=%s user=user_%s endpoint=/api/v1/resource/%s method=%s duration=%sms status=%s message="Processing completed for operation type %s"',
               to_char(clock_timestamp() - (random() * 86400 || ' seconds')::interval, 'YYYY-MM-DD HH24:MI:SS.MS'),
               (ARRAY['DEBUG','INFO','WARN','ERROR','FATAL'])[1 + floor(random() * 5)::int],
               (ARRAY['AuthService','DataService','CacheService','APIGateway','DatabasePool'])[1 + floor(random() * 5)::int],
               floor(random() * 100)::int,
               md5(random()::text || clock_timestamp()::text || g::text),
               floor(random() * 1000)::int,
               floor(random() * 100)::int,
               (ARRAY['GET','POST','PUT','DELETE'])[1 + floor(random() * 4)::int],
               floor(random() * 5000)::int,
               (ARRAY[200, 201, 400, 404, 500])[1 + floor(random() * 5)::int],
               g)
        , E'\n' ORDER BY g)
    FROM generate_series(1, num_entries) AS g;
$$;

-- HTML/XML documents
CREATE OR REPLACE FUNCTION generate_html_document(page_num integer) RETURNS text AS $$
DECLARE
    result text := '<!DOCTYPE html><html><head><meta charset="UTF-8"><title>Document ' || page_num || '</title>';
BEGIN
    result := result || '<style>body{font-family:Arial;margin:20px;} .section{margin:10px;padding:10px;}</style></head><body>';
    result := result || '<h1>Document Title ' || page_num || '</h1>';
    FOR i IN 1..20 LOOP
        result := result || '<div class="section"><h2>Section ' || i || '</h2>';
        result := result || '<p>' || repeat('This is paragraph text for section ' || i || ' of document ' || page_num || '. ', 20) || '</p>';
        result := result || '<ul><li>Item 1</li><li>Item 2</li><li>Item 3</li><li>Item 4</li></ul>';
        result := result || '</div>';
    END LOOP;
    result := result || '</body></html>';
    RETURN result;
END;
$$ LANGUAGE plpgsql;

-- Base64-encoded binary-like data
CREATE OR REPLACE FUNCTION generate_base64_data(size_kb integer) RETURNS text
LANGUAGE sql VOLATILE AS $$
    SELECT substr(
        string_agg(md5(random()::text || clock_timestamp()::text || g::text), '' ORDER BY g),
        1,
        size_kb * 1024)
    FROM generate_series(
        1,
        ceil((size_kb * 1024) / 32.0)::int) AS g;
$$;

-- CSV-like data
CREATE OR REPLACE FUNCTION generate_csv_data(num_rows integer) RETURNS text
LANGUAGE sql VOLATILE AS $$
    SELECT
        'id,timestamp,user_id,action,resource,duration_ms,status,ip_address,user_agent' || E'\n' ||
        string_agg(
            format('%s,%s,%s,%s,%s,%s,%s,%s,%s',
                   g,
                   to_char(clock_timestamp() - (random() * 86400 || ' seconds')::interval, 'YYYY-MM-DD HH24:MI:SS'),
                   floor(random() * 10000)::int,
                   (ARRAY['login','logout','create','read','update','delete'])[1 + floor(random() * 6)::int],
                   '/api/v1/resource/' || floor(random() * 100)::int,
                   floor(random() * 5000)::int,
                   (ARRAY['success','failure','timeout'])[1 + floor(random() * 3)::int],
                   (10 + floor(random() * 245))::int || '.' ||
                       floor(random() * 256)::int || '.' ||
                       floor(random() * 256)::int || '.' ||
                       floor(random() * 256)::int,
                   'Mozilla/5.0 (Platform) Browser/' || floor(random() * 100)::int),
            E'\n' ORDER BY g)
    FROM generate_series(1, num_rows) AS g;
$$;

------------------------------------------------------------
-- Create Benchmark Tables (one per compression method)
------------------------------------------------------------

\echo '--- Creating benchmark tables ---'

-- Product JSON data
CREATE TABLE bench_pglz_products (id serial PRIMARY KEY, data jsonb COMPRESSION pglz);
\if :has_lz4
CREATE TABLE bench_lz4_products (id serial PRIMARY KEY, data jsonb COMPRESSION lz4);
\endif
\if :has_zstd
CREATE TABLE bench_zstd_products (id serial PRIMARY KEY, data jsonb COMPRESSION zstd);
\endif

-- Log data
CREATE TABLE bench_pglz_logs (id serial PRIMARY KEY, data text COMPRESSION pglz);
\if :has_lz4
CREATE TABLE bench_lz4_logs (id serial PRIMARY KEY, data text COMPRESSION lz4);
\endif
\if :has_zstd
CREATE TABLE bench_zstd_logs (id serial PRIMARY KEY, data text COMPRESSION zstd);
\endif

-- HTML documents
CREATE TABLE bench_pglz_html (id serial PRIMARY KEY, data text COMPRESSION pglz);
\if :has_lz4
CREATE TABLE bench_lz4_html (id serial PRIMARY KEY, data text COMPRESSION lz4);
\endif
\if :has_zstd
CREATE TABLE bench_zstd_html (id serial PRIMARY KEY, data text COMPRESSION zstd);
\endif

-- Base64 data (low compressibility)
CREATE TABLE bench_pglz_base64 (id serial PRIMARY KEY, data text COMPRESSION pglz);
\if :has_lz4
CREATE TABLE bench_lz4_base64 (id serial PRIMARY KEY, data text COMPRESSION lz4);
\endif
\if :has_zstd
CREATE TABLE bench_zstd_base64 (id serial PRIMARY KEY, data text COMPRESSION zstd);
\endif

-- CSV data
CREATE TABLE bench_pglz_csv (id serial PRIMARY KEY, data text COMPRESSION pglz);
\if :has_lz4
CREATE TABLE bench_lz4_csv (id serial PRIMARY KEY, data text COMPRESSION lz4);
\endif
\if :has_zstd
CREATE TABLE bench_zstd_csv (id serial PRIMARY KEY, data text COMPRESSION zstd);
\endif

-- Highly repetitive data
CREATE TABLE bench_pglz_repetitive (id serial PRIMARY KEY, data text COMPRESSION pglz);
\if :has_lz4
CREATE TABLE bench_lz4_repetitive (id serial PRIMARY KEY, data text COMPRESSION lz4);
\endif
\if :has_zstd
CREATE TABLE bench_zstd_repetitive (id serial PRIMARY KEY, data text COMPRESSION zstd);
\endif

------------------------------------------------------------
-- BENCHMARK 1: Compression Speed (INSERT)
------------------------------------------------------------

\echo ''
\echo '============================================================'
\echo 'BENCHMARK 1: COMPRESSION SPEED (INSERT PERFORMANCE)'
\echo '============================================================'

\echo ''
\echo '--- 1a. Product JSON Data (500 rows, ~3KB each) ---'
\timing on
INSERT INTO bench_pglz_products (data) SELECT generate_product_json(i) FROM generate_series(1, 500) i;
\timing off

\echo ''
\if :has_lz4
\timing on
INSERT INTO bench_lz4_products (data) SELECT generate_product_json(i) FROM generate_series(1, 500) i;
\timing off
\endif

\echo ''
\if :has_zstd
\timing on
INSERT INTO bench_zstd_products (data) SELECT generate_product_json(i) FROM generate_series(1, 500) i;
\timing off
\endif

\echo ''
\echo '--- 1b. Log Data (200 rows, ~100KB each) ---'
\timing on
INSERT INTO bench_pglz_logs (data) SELECT generate_log_entries(1000) FROM generate_series(1, 200);
\timing off

\echo ''
\if :has_lz4
\timing on
INSERT INTO bench_lz4_logs (data) SELECT generate_log_entries(1000) FROM generate_series(1, 200);
\timing off
\endif

\echo ''
\if :has_zstd
\timing on
INSERT INTO bench_zstd_logs (data) SELECT generate_log_entries(1000) FROM generate_series(1, 200);
\timing off
\endif

\echo ''
\echo '--- 1c. HTML Documents (300 rows, ~50KB each) ---'
\timing on
INSERT INTO bench_pglz_html (data) SELECT generate_html_document(i) FROM generate_series(1, 300) i;
\timing off

\echo ''
\if :has_lz4
\timing on
INSERT INTO bench_lz4_html (data) SELECT generate_html_document(i) FROM generate_series(1, 300) i;
\timing off
\endif

\echo ''
\if :has_zstd
\timing on
INSERT INTO bench_zstd_html (data) SELECT generate_html_document(i) FROM generate_series(1, 300) i;
\timing off
\endif

\echo ''
\echo '--- 1d. Base64 Data (100 rows, 100KB each - low compressibility) ---'
\timing on
INSERT INTO bench_pglz_base64 (data) SELECT generate_base64_data(100) FROM generate_series(1, 100);
\timing off

\echo ''
\if :has_lz4
\timing on
INSERT INTO bench_lz4_base64 (data) SELECT generate_base64_data(100) FROM generate_series(1, 100);
\timing off
\endif

\echo ''
\if :has_zstd
\timing on
INSERT INTO bench_zstd_base64 (data) SELECT generate_base64_data(100) FROM generate_series(1, 100);
\timing off
\endif

\echo ''
\echo '--- 1e. CSV Data (400 rows, ~50KB each) ---'
\timing on
INSERT INTO bench_pglz_csv (data) SELECT generate_csv_data(500) FROM generate_series(1, 400);
\timing off

\echo ''
\if :has_lz4
\timing on
INSERT INTO bench_lz4_csv (data) SELECT generate_csv_data(500) FROM generate_series(1, 400);
\timing off
\endif

\echo ''
\if :has_zstd
\timing on
INSERT INTO bench_zstd_csv (data) SELECT generate_csv_data(500) FROM generate_series(1, 400);
\timing off
\endif

\echo ''
\echo '--- 1f. Highly Repetitive Data (1000 rows, ~90KB each) ---'
\timing on
INSERT INTO bench_pglz_repetitive (data) SELECT repeat('AAABBBCCCDDDEEEFFF', 5000) FROM generate_series(1, 1000);
\timing off

\echo ''
\if :has_lz4
\timing on
INSERT INTO bench_lz4_repetitive (data) SELECT repeat('AAABBBCCCDDDEEEFFF', 5000) FROM generate_series(1, 1000);
\timing off
\endif

\echo ''
\if :has_zstd
\timing on
INSERT INTO bench_zstd_repetitive (data) SELECT repeat('AAABBBCCCDDDEEEFFF', 5000) FROM generate_series(1, 1000);
\timing off
\endif

-- Force everything to disk
CHECKPOINT;

-- Ensure catalog stats (reltuples/relpages) are populated for later reporting.
\echo ''
\echo '--- Collecting statistics (ANALYZE) for storage reporting ---'
ANALYZE;

------------------------------------------------------------
-- BENCHMARK 2: Decompression Speed (SELECT FULL)
------------------------------------------------------------

\echo ''
\echo '============================================================'
\echo 'BENCHMARK 2: DECOMPRESSION SPEED (FULL ROW READS)'
\echo '============================================================'

\echo ''
\echo '--- 2a. Product JSON (full scan) ---'
\timing on
SELECT count(*), sum(jsonb_array_length(data->'reviews')) FROM bench_pglz_products;
\timing off

\echo ''
\if :has_lz4
\timing on
SELECT count(*), sum(jsonb_array_length(data->'reviews')) FROM bench_lz4_products;
\timing off
\endif

\echo ''
\if :has_zstd
\timing on
SELECT count(*), sum(jsonb_array_length(data->'reviews')) FROM bench_zstd_products;
\timing off
\endif

\echo ''
\echo '--- 2b. Log Data (full scan with length) ---'
\timing on
SELECT count(*), avg(length(data)), max(length(data)) FROM bench_pglz_logs;
\timing off

\echo ''
\if :has_lz4
\timing on
SELECT count(*), avg(length(data)), max(length(data)) FROM bench_lz4_logs;
\timing off
\endif

\echo ''
\if :has_zstd
\timing on
SELECT count(*), avg(length(data)), max(length(data)) FROM bench_zstd_logs;
\timing off
\endif

\echo ''
\echo '--- 2c. HTML Documents (full scan) ---'
\timing on
SELECT count(*), sum(length(data)) FROM bench_pglz_html;
\timing off

\echo ''
\if :has_lz4
\timing on
SELECT count(*), sum(length(data)) FROM bench_lz4_html;
\timing off
\endif

\echo ''
\if :has_zstd
\timing on
SELECT count(*), sum(length(data)) FROM bench_zstd_html;
\timing off
\endif

\echo ''
\echo '--- 2d. Repetitive Data (full scan) ---'
\timing on
SELECT count(*), sum(length(data)) FROM bench_pglz_repetitive;
\timing off

\echo ''
\if :has_lz4
\timing on
SELECT count(*), sum(length(data)) FROM bench_lz4_repetitive;
\timing off
\endif

\echo ''
\if :has_zstd
\timing on
SELECT count(*), sum(length(data)) FROM bench_zstd_repetitive;
\timing off
\endif

------------------------------------------------------------
-- BENCHMARK 3: Slice Access Performance
------------------------------------------------------------

\echo ''
\echo '============================================================'
\echo 'BENCHMARK 3: SLICE ACCESS (PARTIAL DECOMPRESSION)'
\echo '============================================================'

\echo ''
\echo '--- 3a. Log Data (first 500 chars) ---'
\timing on
SELECT count(*), sum(length(substring(data, 1, 500))) FROM bench_pglz_logs;
\timing off

\echo ''
\if :has_lz4
\timing on
SELECT count(*), sum(length(substring(data, 1, 500))) FROM bench_lz4_logs;
\timing off
\endif

\echo ''
\if :has_zstd
\timing on
SELECT count(*), sum(length(substring(data, 1, 500))) FROM bench_zstd_logs;
\timing off
\endif

\echo ''
\echo '--- 3b. HTML (first 2000 chars) ---'
\timing on
SELECT count(*), sum(length(substring(data, 1, 2000))) FROM bench_pglz_html;
\timing off

\echo ''
\if :has_lz4
\timing on
SELECT count(*), sum(length(substring(data, 1, 2000))) FROM bench_lz4_html;
\timing off
\endif

\echo ''
\if :has_zstd
\timing on
SELECT count(*), sum(length(substring(data, 1, 2000))) FROM bench_zstd_html;
\timing off
\endif

------------------------------------------------------------
-- BENCHMARK 4: Compression Ratios & Storage Metrics
------------------------------------------------------------

\echo ''
\echo '============================================================'
\echo 'BENCHMARK 4: COMPRESSION VERIFICATION & STORAGE ANALYSIS'
\echo '============================================================'

-- Create helper view for TOAST table analysis
CREATE OR REPLACE VIEW toast_table_info AS
SELECT
    c.relname AS main_table,
    t.relname AS toast_table,
    t.oid AS toast_oid,
    pg_relation_size(c.oid) AS main_table_size,
    pg_relation_size(t.oid) AS toast_table_size,
    pg_total_relation_size(c.oid) AS total_size,
    (SELECT count(*) FROM pg_class tc WHERE tc.reltoastrelid = c.oid AND tc.relname = t.relname) as has_toast,
    (SELECT reltuples::bigint FROM pg_class WHERE oid = t.oid) AS toast_row_count,
    (SELECT relpages FROM pg_class WHERE oid = t.oid) AS toast_pages
FROM pg_class c
JOIN pg_class t ON c.reltoastrelid = t.oid
WHERE c.relnamespace = (SELECT oid FROM pg_namespace WHERE nspname = 'toast_bench')
ORDER BY c.relname;

\echo ''
\echo '--- 4a. Product JSON: Verify Compression Methods Used ---'
SELECT
    'PGLZ' as declared_method,
    count(*) as total_rows,
    count(*) FILTER (WHERE pg_column_toast_chunk_id(data) IS NOT NULL) as external_rows,
    count(*) FILTER (WHERE pg_column_compression(data::text) = 'pglz') as pglz_compressed,
    count(*) FILTER (WHERE pg_column_compression(data::text) = 'lz4') as lz4_compressed,
    count(*) FILTER (WHERE pg_column_compression(data::text) = 'zstd') as zstd_compressed,
    count(*) FILTER (WHERE pg_column_compression(data::text) IS NULL) as uncompressed,
    pg_size_pretty(pg_total_relation_size('bench_pglz_products')) as total_storage
FROM bench_pglz_products;

\if :has_lz4
SELECT
    'LZ4' as declared_method,
    count(*) as total_rows,
    count(*) FILTER (WHERE pg_column_toast_chunk_id(data) IS NOT NULL) as external_rows,
    count(*) FILTER (WHERE pg_column_compression(data::text) = 'pglz') as pglz_compressed,
    count(*) FILTER (WHERE pg_column_compression(data::text) = 'lz4') as lz4_compressed,
    count(*) FILTER (WHERE pg_column_compression(data::text) = 'zstd') as zstd_compressed,
    count(*) FILTER (WHERE pg_column_compression(data::text) IS NULL) as uncompressed,
    pg_size_pretty(pg_total_relation_size('bench_lz4_products')) as total_storage
FROM bench_lz4_products;
\endif

\if :has_zstd
SELECT
    'ZSTD' as declared_method,
    count(*) as total_rows,
    count(*) FILTER (WHERE pg_column_toast_chunk_id(data) IS NOT NULL) as external_rows,
    count(*) FILTER (WHERE pg_column_compression(data::text) = 'pglz') as pglz_compressed,
    count(*) FILTER (WHERE pg_column_compression(data::text) = 'lz4') as lz4_compressed,
    count(*) FILTER (WHERE pg_column_compression(data::text) = 'zstd') as zstd_compressed,
    count(*) FILTER (WHERE pg_column_compression(data::text) IS NULL) as uncompressed,
    pg_size_pretty(pg_total_relation_size('bench_zstd_products')) as total_storage
FROM bench_zstd_products;
\endif

\echo ''
\echo '--- 4b. Log Data: Verify Compression Methods Used ---'
SELECT
    'PGLZ' as declared_method,
    count(*) as total_rows,
    count(*) FILTER (WHERE pg_column_toast_chunk_id(data) IS NOT NULL) as external_rows,
    count(*) FILTER (WHERE pg_column_compression(data) = 'pglz') as pglz_compressed,
    count(*) FILTER (WHERE pg_column_compression(data) = 'lz4') as lz4_compressed,
    count(*) FILTER (WHERE pg_column_compression(data) = 'zstd') as zstd_compressed,
    count(*) FILTER (WHERE pg_column_compression(data) IS NULL) as uncompressed,
    pg_size_pretty(avg(length(data))::bigint) as avg_uncompressed,
    pg_size_pretty(pg_total_relation_size('bench_pglz_logs')) as total_storage
FROM bench_pglz_logs;

\if :has_lz4
SELECT
    'LZ4' as declared_method,
    count(*) as total_rows,
    count(*) FILTER (WHERE pg_column_toast_chunk_id(data) IS NOT NULL) as external_rows,
    count(*) FILTER (WHERE pg_column_compression(data) = 'pglz') as pglz_compressed,
    count(*) FILTER (WHERE pg_column_compression(data) = 'lz4') as lz4_compressed,
    count(*) FILTER (WHERE pg_column_compression(data) = 'zstd') as zstd_compressed,
    count(*) FILTER (WHERE pg_column_compression(data) IS NULL) as uncompressed,
    pg_size_pretty(avg(length(data))::bigint) as avg_uncompressed,
    pg_size_pretty(pg_total_relation_size('bench_lz4_logs')) as total_storage
FROM bench_lz4_logs;
\endif

\if :has_zstd
SELECT
    'ZSTD' as declared_method,
    count(*) as total_rows,
    count(*) FILTER (WHERE pg_column_toast_chunk_id(data) IS NOT NULL) as external_rows,
    count(*) FILTER (WHERE pg_column_compression(data) = 'pglz') as pglz_compressed,
    count(*) FILTER (WHERE pg_column_compression(data) = 'lz4') as lz4_compressed,
    count(*) FILTER (WHERE pg_column_compression(data) = 'zstd') as zstd_compressed,
    count(*) FILTER (WHERE pg_column_compression(data) IS NULL) as uncompressed,
    pg_size_pretty(avg(length(data))::bigint) as avg_uncompressed,
    pg_size_pretty(pg_total_relation_size('bench_zstd_logs')) as total_storage
FROM bench_zstd_logs;
\endif

\echo ''
\echo '--- 4c. Base64 Data: Verify Compression Methods Used ---'
SELECT
    'PGLZ' as declared_method,
    count(*) as total_rows,
    count(*) FILTER (WHERE pg_column_toast_chunk_id(data) IS NOT NULL) as external_rows,
    count(*) FILTER (WHERE pg_column_compression(data) = 'pglz') as pglz_compressed,
    count(*) FILTER (WHERE pg_column_compression(data) = 'lz4') as lz4_compressed,
    count(*) FILTER (WHERE pg_column_compression(data) = 'zstd') as zstd_compressed,
    count(*) FILTER (WHERE pg_column_compression(data) IS NULL) as uncompressed,
    pg_size_pretty(pg_total_relation_size('bench_pglz_base64')) as total_storage
FROM bench_pglz_base64;

\if :has_lz4
SELECT
    'LZ4' as declared_method,
    count(*) as total_rows,
    count(*) FILTER (WHERE pg_column_toast_chunk_id(data) IS NOT NULL) as external_rows,
    count(*) FILTER (WHERE pg_column_compression(data) = 'pglz') as pglz_compressed,
    count(*) FILTER (WHERE pg_column_compression(data) = 'lz4') as lz4_compressed,
    count(*) FILTER (WHERE pg_column_compression(data) = 'zstd') as zstd_compressed,
    count(*) FILTER (WHERE pg_column_compression(data) IS NULL) as uncompressed,
    pg_size_pretty(pg_total_relation_size('bench_lz4_base64')) as total_storage
FROM bench_lz4_base64;
\endif

\if :has_zstd
SELECT
    'ZSTD' as declared_method,
    count(*) as total_rows,
    count(*) FILTER (WHERE pg_column_toast_chunk_id(data) IS NOT NULL) as external_rows,
    count(*) FILTER (WHERE pg_column_compression(data) = 'pglz') as pglz_compressed,
    count(*) FILTER (WHERE pg_column_compression(data) = 'lz4') as lz4_compressed,
    count(*) FILTER (WHERE pg_column_compression(data) = 'zstd') as zstd_compressed,
    count(*) FILTER (WHERE pg_column_compression(data) IS NULL) as uncompressed,
    pg_size_pretty(pg_total_relation_size('bench_zstd_base64')) as total_storage
FROM bench_zstd_base64;
\endif

------------------------------------------------------------
-- BENCHMARK 5: Overall Summary
------------------------------------------------------------

\echo ''
\echo '============================================================'
\echo 'FINAL SUMMARY: OVERALL STORAGE COMPARISON'
\echo '============================================================'

CREATE TEMP TABLE summary_stats (
    compression_method text,
    total_bytes bigint,
    total_toast_rows bigint,
    total_toast_blocks bigint
);

INSERT INTO summary_stats
SELECT
    'pglz' AS compression_method,
    pg_total_relation_size('bench_pglz_products') +
    pg_total_relation_size('bench_pglz_logs') +
    pg_total_relation_size('bench_pglz_html') +
    pg_total_relation_size('bench_pglz_base64') +
    pg_total_relation_size('bench_pglz_csv') +
    pg_total_relation_size('bench_pglz_repetitive') AS total_bytes,
    (SELECT sum(toast_row_count) FROM toast_table_info WHERE main_table LIKE 'bench_pglz%') AS total_toast_rows,
    (SELECT sum(toast_pages) FROM toast_table_info WHERE main_table LIKE 'bench_pglz%') AS total_toast_blocks;

\if :has_lz4
INSERT INTO summary_stats
SELECT
    'lz4',
    pg_total_relation_size('bench_lz4_products') +
    pg_total_relation_size('bench_lz4_logs') +
    pg_total_relation_size('bench_lz4_html') +
    pg_total_relation_size('bench_lz4_base64') +
    pg_total_relation_size('bench_lz4_csv') +
    pg_total_relation_size('bench_lz4_repetitive'),
    (SELECT sum(toast_row_count) FROM toast_table_info WHERE main_table LIKE 'bench_lz4%'),
    (SELECT sum(toast_pages) FROM toast_table_info WHERE main_table LIKE 'bench_lz4%');
\endif

\if :has_zstd
INSERT INTO summary_stats
SELECT
    'zstd',
    pg_total_relation_size('bench_zstd_products') +
    pg_total_relation_size('bench_zstd_logs') +
    pg_total_relation_size('bench_zstd_html') +
    pg_total_relation_size('bench_zstd_base64') +
    pg_total_relation_size('bench_zstd_csv') +
    pg_total_relation_size('bench_zstd_repetitive'),
    (SELECT sum(toast_row_count) FROM toast_table_info WHERE main_table LIKE 'bench_zstd%'),
    (SELECT sum(toast_pages) FROM toast_table_info WHERE main_table LIKE 'bench_zstd%');
\endif

\echo ''
SELECT
    compression_method,
    pg_size_pretty(total_bytes) AS total_storage,
    total_bytes,
    total_toast_rows,
    total_toast_blocks,
    pg_size_pretty(total_bytes / NULLIF(total_toast_rows, 0)) AS avg_bytes_per_toast_row,
    round(100.0 * total_bytes / NULLIF((SELECT max(total_bytes) FROM summary_stats), 0), 2) AS pct_of_largest,
    round(100.0 * (1 - total_bytes::numeric / NULLIF((SELECT max(total_bytes) FROM summary_stats), 0)), 2) AS space_saved_pct
FROM summary_stats
ORDER BY total_bytes;

\echo ''
\echo '--- Compression Ratio Summary (vs uncompressed) ---'
-- Estimate uncompressed size from one table
WITH uncompressed AS (
    SELECT sum(length(data)) as total FROM bench_pglz_logs
    UNION ALL SELECT sum(length(data)) FROM bench_pglz_html
    UNION ALL SELECT sum(length(data::text)) FROM bench_pglz_products
    UNION ALL SELECT sum(length(data)) FROM bench_pglz_base64
    UNION ALL SELECT sum(length(data)) FROM bench_pglz_csv
    UNION ALL SELECT sum(length(data)) FROM bench_pglz_repetitive
)
SELECT
    s.compression_method,
    pg_size_pretty((SELECT sum(total) FROM uncompressed)) AS estimated_uncompressed,
    pg_size_pretty(s.total_bytes) AS compressed_size,
    round((SELECT sum(total)::numeric FROM uncompressed) / NULLIF(s.total_bytes, 0), 2) AS compression_ratio
FROM summary_stats s
ORDER BY s.total_bytes;

\echo ''
\echo '============================================================'
\echo 'BENCHMARK COMPLETE'
\echo '============================================================'
\echo ''
\echo 'To clean up: DROP SCHEMA toast_bench CASCADE;'
\echo ''