benchmark_toast_compression.sql
application/octet-stream
Filename: benchmark_toast_compression.sql
Type: application/octet-stream
Part: 0
--
-- Comprehensive TOAST Compression Benchmark: PGLZ vs LZ4 vs ZSTD
--
-- Measures compression ratios, speed, storage efficiency, and TOAST behavior
-- across diverse workloads. Data is generated inline per INSERT to ensure
-- each compression method processes identical fresh input.
--
-- Usage: psql -f benchmark_toast_compression.sql > results.txt
--
\timing on
\pset pager off
\pset format wrapped
\pset columns 120
\set ON_ERROR_STOP on
DROP SCHEMA IF EXISTS toast_bench CASCADE;
CREATE SCHEMA toast_bench;
SET search_path TO toast_bench;
\echo ''
\echo '============================================================'
\echo 'PostgreSQL TOAST Compression Benchmark'
\echo '============================================================'
\echo ''
-- Detect available compression methods
\set has_lz4 0
\set has_zstd 0
SELECT
(enumvals @> '{lz4}')::int AS has_lz4,
(enumvals @> '{zstd}')::int AS has_zstd
FROM pg_settings
WHERE name = 'default_toast_compression'
\gset
\echo 'Compression methods available:'
\echo ' PGLZ: yes (built-in)'
\echo ' LZ4:' :has_lz4
\echo ' ZSTD:' :has_zstd
\echo ''
------------------------------------------------------------
-- Helper Functions for Data Generation
------------------------------------------------------------
CREATE OR REPLACE FUNCTION generate_product_json(product_id integer) RETURNS jsonb AS $$
BEGIN
RETURN jsonb_build_object(
'id', product_id,
'name', 'Product Name ' || product_id,
'description', 'This is a detailed product description for item ' || product_id ||
'. It includes features, specifications, and usage information. ' ||
'The product is designed for high-quality performance and reliability. ' ||
repeat('Additional details about quality, manufacturing, and warranty. ', 5),
'price', (random() * 1000)::numeric(10,2),
'category', ARRAY['Electronics', 'Computers', 'Laptops', 'Gaming'],
'reviews', (
SELECT jsonb_agg(
jsonb_build_object(
'user', 'user_' || i,
'rating', 1 + floor(random() * 5)::int,
'comment', 'This is review number ' || i || ' for product ' || product_id || '. ' ||
repeat('Great product with excellent features. ', 3),
'date', '2025-' || lpad((1 + floor(random() * 12))::text, 2, '0') || '-' ||
lpad((1 + floor(random() * 28))::text, 2, '0')
)
)
FROM generate_series(1, 10) i
),
'specifications', jsonb_build_object(
'weight', (random() * 10)::numeric(4,2) || ' kg',
'dimensions', jsonb_build_object('length', 30 + random() * 20, 'width', 20 + random() * 10, 'height', 5 + random() * 5),
'warranty', '2 years',
'manufacturer', 'TechCorp Inc.',
'model', 'TC-' || product_id || '-2025'
),
'metadata', jsonb_build_object(
'created_at', now() - (random() * 365 || ' days')::interval,
'updated_at', now() - (random() * 30 || ' days')::interval,
'views', floor(random() * 10000)::int,
'sales', floor(random() * 1000)::int
)
);
END;
$$ LANGUAGE plpgsql;
-- Server/application logs
CREATE OR REPLACE FUNCTION generate_log_entries(num_entries integer) RETURNS text
LANGUAGE sql VOLATILE AS $$
SELECT string_agg(
format('%s [%s] %s-%s - Request id=%s user=user_%s endpoint=/api/v1/resource/%s method=%s duration=%sms status=%s message="Processing completed for operation type %s"',
to_char(clock_timestamp() - (random() * 86400 || ' seconds')::interval, 'YYYY-MM-DD HH24:MI:SS.MS'),
(ARRAY['DEBUG','INFO','WARN','ERROR','FATAL'])[1 + floor(random() * 5)::int],
(ARRAY['AuthService','DataService','CacheService','APIGateway','DatabasePool'])[1 + floor(random() * 5)::int],
floor(random() * 100)::int,
md5(random()::text || clock_timestamp()::text || g::text),
floor(random() * 1000)::int,
floor(random() * 100)::int,
(ARRAY['GET','POST','PUT','DELETE'])[1 + floor(random() * 4)::int],
floor(random() * 5000)::int,
(ARRAY[200, 201, 400, 404, 500])[1 + floor(random() * 5)::int],
g)
, E'\n' ORDER BY g)
FROM generate_series(1, num_entries) AS g;
$$;
-- HTML/XML documents
CREATE OR REPLACE FUNCTION generate_html_document(page_num integer) RETURNS text AS $$
DECLARE
result text := '<!DOCTYPE html><html><head><meta charset="UTF-8"><title>Document ' || page_num || '</title>';
BEGIN
result := result || '<style>body{font-family:Arial;margin:20px;} .section{margin:10px;padding:10px;}</style></head><body>';
result := result || '<h1>Document Title ' || page_num || '</h1>';
FOR i IN 1..20 LOOP
result := result || '<div class="section"><h2>Section ' || i || '</h2>';
result := result || '<p>' || repeat('This is paragraph text for section ' || i || ' of document ' || page_num || '. ', 20) || '</p>';
result := result || '<ul><li>Item 1</li><li>Item 2</li><li>Item 3</li><li>Item 4</li></ul>';
result := result || '</div>';
END LOOP;
result := result || '</body></html>';
RETURN result;
END;
$$ LANGUAGE plpgsql;
-- Base64-encoded binary-like data
CREATE OR REPLACE FUNCTION generate_base64_data(size_kb integer) RETURNS text
LANGUAGE sql VOLATILE AS $$
SELECT substr(
string_agg(md5(random()::text || clock_timestamp()::text || g::text), '' ORDER BY g),
1,
size_kb * 1024)
FROM generate_series(
1,
ceil((size_kb * 1024) / 32.0)::int) AS g;
$$;
-- CSV-like data
CREATE OR REPLACE FUNCTION generate_csv_data(num_rows integer) RETURNS text
LANGUAGE sql VOLATILE AS $$
SELECT
'id,timestamp,user_id,action,resource,duration_ms,status,ip_address,user_agent' || E'\n' ||
string_agg(
format('%s,%s,%s,%s,%s,%s,%s,%s,%s',
g,
to_char(clock_timestamp() - (random() * 86400 || ' seconds')::interval, 'YYYY-MM-DD HH24:MI:SS'),
floor(random() * 10000)::int,
(ARRAY['login','logout','create','read','update','delete'])[1 + floor(random() * 6)::int],
'/api/v1/resource/' || floor(random() * 100)::int,
floor(random() * 5000)::int,
(ARRAY['success','failure','timeout'])[1 + floor(random() * 3)::int],
(10 + floor(random() * 245))::int || '.' ||
floor(random() * 256)::int || '.' ||
floor(random() * 256)::int || '.' ||
floor(random() * 256)::int,
'Mozilla/5.0 (Platform) Browser/' || floor(random() * 100)::int),
E'\n' ORDER BY g)
FROM generate_series(1, num_rows) AS g;
$$;
------------------------------------------------------------
-- Create Benchmark Tables (one per compression method)
------------------------------------------------------------
\echo '--- Creating benchmark tables ---'
-- Product JSON data
CREATE TABLE bench_pglz_products (id serial PRIMARY KEY, data jsonb COMPRESSION pglz);
\if :has_lz4
CREATE TABLE bench_lz4_products (id serial PRIMARY KEY, data jsonb COMPRESSION lz4);
\endif
\if :has_zstd
CREATE TABLE bench_zstd_products (id serial PRIMARY KEY, data jsonb COMPRESSION zstd);
\endif
-- Log data
CREATE TABLE bench_pglz_logs (id serial PRIMARY KEY, data text COMPRESSION pglz);
\if :has_lz4
CREATE TABLE bench_lz4_logs (id serial PRIMARY KEY, data text COMPRESSION lz4);
\endif
\if :has_zstd
CREATE TABLE bench_zstd_logs (id serial PRIMARY KEY, data text COMPRESSION zstd);
\endif
-- HTML documents
CREATE TABLE bench_pglz_html (id serial PRIMARY KEY, data text COMPRESSION pglz);
\if :has_lz4
CREATE TABLE bench_lz4_html (id serial PRIMARY KEY, data text COMPRESSION lz4);
\endif
\if :has_zstd
CREATE TABLE bench_zstd_html (id serial PRIMARY KEY, data text COMPRESSION zstd);
\endif
-- Base64 data (low compressibility)
CREATE TABLE bench_pglz_base64 (id serial PRIMARY KEY, data text COMPRESSION pglz);
\if :has_lz4
CREATE TABLE bench_lz4_base64 (id serial PRIMARY KEY, data text COMPRESSION lz4);
\endif
\if :has_zstd
CREATE TABLE bench_zstd_base64 (id serial PRIMARY KEY, data text COMPRESSION zstd);
\endif
-- CSV data
CREATE TABLE bench_pglz_csv (id serial PRIMARY KEY, data text COMPRESSION pglz);
\if :has_lz4
CREATE TABLE bench_lz4_csv (id serial PRIMARY KEY, data text COMPRESSION lz4);
\endif
\if :has_zstd
CREATE TABLE bench_zstd_csv (id serial PRIMARY KEY, data text COMPRESSION zstd);
\endif
-- Highly repetitive data
CREATE TABLE bench_pglz_repetitive (id serial PRIMARY KEY, data text COMPRESSION pglz);
\if :has_lz4
CREATE TABLE bench_lz4_repetitive (id serial PRIMARY KEY, data text COMPRESSION lz4);
\endif
\if :has_zstd
CREATE TABLE bench_zstd_repetitive (id serial PRIMARY KEY, data text COMPRESSION zstd);
\endif
------------------------------------------------------------
-- BENCHMARK 1: Compression Speed (INSERT)
------------------------------------------------------------
\echo ''
\echo '============================================================'
\echo 'BENCHMARK 1: COMPRESSION SPEED (INSERT PERFORMANCE)'
\echo '============================================================'
\echo ''
\echo '--- 1a. Product JSON Data (500 rows, ~3KB each) ---'
\timing on
INSERT INTO bench_pglz_products (data) SELECT generate_product_json(i) FROM generate_series(1, 500) i;
\timing off
\echo ''
\if :has_lz4
\timing on
INSERT INTO bench_lz4_products (data) SELECT generate_product_json(i) FROM generate_series(1, 500) i;
\timing off
\endif
\echo ''
\if :has_zstd
\timing on
INSERT INTO bench_zstd_products (data) SELECT generate_product_json(i) FROM generate_series(1, 500) i;
\timing off
\endif
\echo ''
\echo '--- 1b. Log Data (200 rows, ~100KB each) ---'
\timing on
INSERT INTO bench_pglz_logs (data) SELECT generate_log_entries(1000) FROM generate_series(1, 200);
\timing off
\echo ''
\if :has_lz4
\timing on
INSERT INTO bench_lz4_logs (data) SELECT generate_log_entries(1000) FROM generate_series(1, 200);
\timing off
\endif
\echo ''
\if :has_zstd
\timing on
INSERT INTO bench_zstd_logs (data) SELECT generate_log_entries(1000) FROM generate_series(1, 200);
\timing off
\endif
\echo ''
\echo '--- 1c. HTML Documents (300 rows, ~50KB each) ---'
\timing on
INSERT INTO bench_pglz_html (data) SELECT generate_html_document(i) FROM generate_series(1, 300) i;
\timing off
\echo ''
\if :has_lz4
\timing on
INSERT INTO bench_lz4_html (data) SELECT generate_html_document(i) FROM generate_series(1, 300) i;
\timing off
\endif
\echo ''
\if :has_zstd
\timing on
INSERT INTO bench_zstd_html (data) SELECT generate_html_document(i) FROM generate_series(1, 300) i;
\timing off
\endif
\echo ''
\echo '--- 1d. Base64 Data (100 rows, 100KB each - low compressibility) ---'
\timing on
INSERT INTO bench_pglz_base64 (data) SELECT generate_base64_data(100) FROM generate_series(1, 100);
\timing off
\echo ''
\if :has_lz4
\timing on
INSERT INTO bench_lz4_base64 (data) SELECT generate_base64_data(100) FROM generate_series(1, 100);
\timing off
\endif
\echo ''
\if :has_zstd
\timing on
INSERT INTO bench_zstd_base64 (data) SELECT generate_base64_data(100) FROM generate_series(1, 100);
\timing off
\endif
\echo ''
\echo '--- 1e. CSV Data (400 rows, ~50KB each) ---'
\timing on
INSERT INTO bench_pglz_csv (data) SELECT generate_csv_data(500) FROM generate_series(1, 400);
\timing off
\echo ''
\if :has_lz4
\timing on
INSERT INTO bench_lz4_csv (data) SELECT generate_csv_data(500) FROM generate_series(1, 400);
\timing off
\endif
\echo ''
\if :has_zstd
\timing on
INSERT INTO bench_zstd_csv (data) SELECT generate_csv_data(500) FROM generate_series(1, 400);
\timing off
\endif
\echo ''
\echo '--- 1f. Highly Repetitive Data (1000 rows, ~90KB each) ---'
\timing on
INSERT INTO bench_pglz_repetitive (data) SELECT repeat('AAABBBCCCDDDEEEFFF', 5000) FROM generate_series(1, 1000);
\timing off
\echo ''
\if :has_lz4
\timing on
INSERT INTO bench_lz4_repetitive (data) SELECT repeat('AAABBBCCCDDDEEEFFF', 5000) FROM generate_series(1, 1000);
\timing off
\endif
\echo ''
\if :has_zstd
\timing on
INSERT INTO bench_zstd_repetitive (data) SELECT repeat('AAABBBCCCDDDEEEFFF', 5000) FROM generate_series(1, 1000);
\timing off
\endif
-- Force everything to disk
CHECKPOINT;
-- Ensure catalog stats (reltuples/relpages) are populated for later reporting.
\echo ''
\echo '--- Collecting statistics (ANALYZE) for storage reporting ---'
ANALYZE;
------------------------------------------------------------
-- BENCHMARK 2: Decompression Speed (SELECT FULL)
------------------------------------------------------------
\echo ''
\echo '============================================================'
\echo 'BENCHMARK 2: DECOMPRESSION SPEED (FULL ROW READS)'
\echo '============================================================'
\echo ''
\echo '--- 2a. Product JSON (full scan) ---'
\timing on
SELECT count(*), sum(jsonb_array_length(data->'reviews')) FROM bench_pglz_products;
\timing off
\echo ''
\if :has_lz4
\timing on
SELECT count(*), sum(jsonb_array_length(data->'reviews')) FROM bench_lz4_products;
\timing off
\endif
\echo ''
\if :has_zstd
\timing on
SELECT count(*), sum(jsonb_array_length(data->'reviews')) FROM bench_zstd_products;
\timing off
\endif
\echo ''
\echo '--- 2b. Log Data (full scan with length) ---'
\timing on
SELECT count(*), avg(length(data)), max(length(data)) FROM bench_pglz_logs;
\timing off
\echo ''
\if :has_lz4
\timing on
SELECT count(*), avg(length(data)), max(length(data)) FROM bench_lz4_logs;
\timing off
\endif
\echo ''
\if :has_zstd
\timing on
SELECT count(*), avg(length(data)), max(length(data)) FROM bench_zstd_logs;
\timing off
\endif
\echo ''
\echo '--- 2c. HTML Documents (full scan) ---'
\timing on
SELECT count(*), sum(length(data)) FROM bench_pglz_html;
\timing off
\echo ''
\if :has_lz4
\timing on
SELECT count(*), sum(length(data)) FROM bench_lz4_html;
\timing off
\endif
\echo ''
\if :has_zstd
\timing on
SELECT count(*), sum(length(data)) FROM bench_zstd_html;
\timing off
\endif
\echo ''
\echo '--- 2d. Repetitive Data (full scan) ---'
\timing on
SELECT count(*), sum(length(data)) FROM bench_pglz_repetitive;
\timing off
\echo ''
\if :has_lz4
\timing on
SELECT count(*), sum(length(data)) FROM bench_lz4_repetitive;
\timing off
\endif
\echo ''
\if :has_zstd
\timing on
SELECT count(*), sum(length(data)) FROM bench_zstd_repetitive;
\timing off
\endif
------------------------------------------------------------
-- BENCHMARK 3: Slice Access Performance
------------------------------------------------------------
\echo ''
\echo '============================================================'
\echo 'BENCHMARK 3: SLICE ACCESS (PARTIAL DECOMPRESSION)'
\echo '============================================================'
\echo ''
\echo '--- 3a. Log Data (first 500 chars) ---'
\timing on
SELECT count(*), sum(length(substring(data, 1, 500))) FROM bench_pglz_logs;
\timing off
\echo ''
\if :has_lz4
\timing on
SELECT count(*), sum(length(substring(data, 1, 500))) FROM bench_lz4_logs;
\timing off
\endif
\echo ''
\if :has_zstd
\timing on
SELECT count(*), sum(length(substring(data, 1, 500))) FROM bench_zstd_logs;
\timing off
\endif
\echo ''
\echo '--- 3b. HTML (first 2000 chars) ---'
\timing on
SELECT count(*), sum(length(substring(data, 1, 2000))) FROM bench_pglz_html;
\timing off
\echo ''
\if :has_lz4
\timing on
SELECT count(*), sum(length(substring(data, 1, 2000))) FROM bench_lz4_html;
\timing off
\endif
\echo ''
\if :has_zstd
\timing on
SELECT count(*), sum(length(substring(data, 1, 2000))) FROM bench_zstd_html;
\timing off
\endif
------------------------------------------------------------
-- BENCHMARK 4: Compression Ratios & Storage Metrics
------------------------------------------------------------
\echo ''
\echo '============================================================'
\echo 'BENCHMARK 4: COMPRESSION VERIFICATION & STORAGE ANALYSIS'
\echo '============================================================'
-- Create helper view for TOAST table analysis
CREATE OR REPLACE VIEW toast_table_info AS
SELECT
c.relname AS main_table,
t.relname AS toast_table,
t.oid AS toast_oid,
pg_relation_size(c.oid) AS main_table_size,
pg_relation_size(t.oid) AS toast_table_size,
pg_total_relation_size(c.oid) AS total_size,
(SELECT count(*) FROM pg_class tc WHERE tc.reltoastrelid = c.oid AND tc.relname = t.relname) as has_toast,
(SELECT reltuples::bigint FROM pg_class WHERE oid = t.oid) AS toast_row_count,
(SELECT relpages FROM pg_class WHERE oid = t.oid) AS toast_pages
FROM pg_class c
JOIN pg_class t ON c.reltoastrelid = t.oid
WHERE c.relnamespace = (SELECT oid FROM pg_namespace WHERE nspname = 'toast_bench')
ORDER BY c.relname;
\echo ''
\echo '--- 4a. Product JSON: Verify Compression Methods Used ---'
SELECT
'PGLZ' as declared_method,
count(*) as total_rows,
count(*) FILTER (WHERE pg_column_toast_chunk_id(data) IS NOT NULL) as external_rows,
count(*) FILTER (WHERE pg_column_compression(data::text) = 'pglz') as pglz_compressed,
count(*) FILTER (WHERE pg_column_compression(data::text) = 'lz4') as lz4_compressed,
count(*) FILTER (WHERE pg_column_compression(data::text) = 'zstd') as zstd_compressed,
count(*) FILTER (WHERE pg_column_compression(data::text) IS NULL) as uncompressed,
pg_size_pretty(pg_total_relation_size('bench_pglz_products')) as total_storage
FROM bench_pglz_products;
\if :has_lz4
SELECT
'LZ4' as declared_method,
count(*) as total_rows,
count(*) FILTER (WHERE pg_column_toast_chunk_id(data) IS NOT NULL) as external_rows,
count(*) FILTER (WHERE pg_column_compression(data::text) = 'pglz') as pglz_compressed,
count(*) FILTER (WHERE pg_column_compression(data::text) = 'lz4') as lz4_compressed,
count(*) FILTER (WHERE pg_column_compression(data::text) = 'zstd') as zstd_compressed,
count(*) FILTER (WHERE pg_column_compression(data::text) IS NULL) as uncompressed,
pg_size_pretty(pg_total_relation_size('bench_lz4_products')) as total_storage
FROM bench_lz4_products;
\endif
\if :has_zstd
SELECT
'ZSTD' as declared_method,
count(*) as total_rows,
count(*) FILTER (WHERE pg_column_toast_chunk_id(data) IS NOT NULL) as external_rows,
count(*) FILTER (WHERE pg_column_compression(data::text) = 'pglz') as pglz_compressed,
count(*) FILTER (WHERE pg_column_compression(data::text) = 'lz4') as lz4_compressed,
count(*) FILTER (WHERE pg_column_compression(data::text) = 'zstd') as zstd_compressed,
count(*) FILTER (WHERE pg_column_compression(data::text) IS NULL) as uncompressed,
pg_size_pretty(pg_total_relation_size('bench_zstd_products')) as total_storage
FROM bench_zstd_products;
\endif
\echo ''
\echo '--- 4b. Log Data: Verify Compression Methods Used ---'
SELECT
'PGLZ' as declared_method,
count(*) as total_rows,
count(*) FILTER (WHERE pg_column_toast_chunk_id(data) IS NOT NULL) as external_rows,
count(*) FILTER (WHERE pg_column_compression(data) = 'pglz') as pglz_compressed,
count(*) FILTER (WHERE pg_column_compression(data) = 'lz4') as lz4_compressed,
count(*) FILTER (WHERE pg_column_compression(data) = 'zstd') as zstd_compressed,
count(*) FILTER (WHERE pg_column_compression(data) IS NULL) as uncompressed,
pg_size_pretty(avg(length(data))::bigint) as avg_uncompressed,
pg_size_pretty(pg_total_relation_size('bench_pglz_logs')) as total_storage
FROM bench_pglz_logs;
\if :has_lz4
SELECT
'LZ4' as declared_method,
count(*) as total_rows,
count(*) FILTER (WHERE pg_column_toast_chunk_id(data) IS NOT NULL) as external_rows,
count(*) FILTER (WHERE pg_column_compression(data) = 'pglz') as pglz_compressed,
count(*) FILTER (WHERE pg_column_compression(data) = 'lz4') as lz4_compressed,
count(*) FILTER (WHERE pg_column_compression(data) = 'zstd') as zstd_compressed,
count(*) FILTER (WHERE pg_column_compression(data) IS NULL) as uncompressed,
pg_size_pretty(avg(length(data))::bigint) as avg_uncompressed,
pg_size_pretty(pg_total_relation_size('bench_lz4_logs')) as total_storage
FROM bench_lz4_logs;
\endif
\if :has_zstd
SELECT
'ZSTD' as declared_method,
count(*) as total_rows,
count(*) FILTER (WHERE pg_column_toast_chunk_id(data) IS NOT NULL) as external_rows,
count(*) FILTER (WHERE pg_column_compression(data) = 'pglz') as pglz_compressed,
count(*) FILTER (WHERE pg_column_compression(data) = 'lz4') as lz4_compressed,
count(*) FILTER (WHERE pg_column_compression(data) = 'zstd') as zstd_compressed,
count(*) FILTER (WHERE pg_column_compression(data) IS NULL) as uncompressed,
pg_size_pretty(avg(length(data))::bigint) as avg_uncompressed,
pg_size_pretty(pg_total_relation_size('bench_zstd_logs')) as total_storage
FROM bench_zstd_logs;
\endif
\echo ''
\echo '--- 4c. Base64 Data: Verify Compression Methods Used ---'
SELECT
'PGLZ' as declared_method,
count(*) as total_rows,
count(*) FILTER (WHERE pg_column_toast_chunk_id(data) IS NOT NULL) as external_rows,
count(*) FILTER (WHERE pg_column_compression(data) = 'pglz') as pglz_compressed,
count(*) FILTER (WHERE pg_column_compression(data) = 'lz4') as lz4_compressed,
count(*) FILTER (WHERE pg_column_compression(data) = 'zstd') as zstd_compressed,
count(*) FILTER (WHERE pg_column_compression(data) IS NULL) as uncompressed,
pg_size_pretty(pg_total_relation_size('bench_pglz_base64')) as total_storage
FROM bench_pglz_base64;
\if :has_lz4
SELECT
'LZ4' as declared_method,
count(*) as total_rows,
count(*) FILTER (WHERE pg_column_toast_chunk_id(data) IS NOT NULL) as external_rows,
count(*) FILTER (WHERE pg_column_compression(data) = 'pglz') as pglz_compressed,
count(*) FILTER (WHERE pg_column_compression(data) = 'lz4') as lz4_compressed,
count(*) FILTER (WHERE pg_column_compression(data) = 'zstd') as zstd_compressed,
count(*) FILTER (WHERE pg_column_compression(data) IS NULL) as uncompressed,
pg_size_pretty(pg_total_relation_size('bench_lz4_base64')) as total_storage
FROM bench_lz4_base64;
\endif
\if :has_zstd
SELECT
'ZSTD' as declared_method,
count(*) as total_rows,
count(*) FILTER (WHERE pg_column_toast_chunk_id(data) IS NOT NULL) as external_rows,
count(*) FILTER (WHERE pg_column_compression(data) = 'pglz') as pglz_compressed,
count(*) FILTER (WHERE pg_column_compression(data) = 'lz4') as lz4_compressed,
count(*) FILTER (WHERE pg_column_compression(data) = 'zstd') as zstd_compressed,
count(*) FILTER (WHERE pg_column_compression(data) IS NULL) as uncompressed,
pg_size_pretty(pg_total_relation_size('bench_zstd_base64')) as total_storage
FROM bench_zstd_base64;
\endif
------------------------------------------------------------
-- BENCHMARK 5: Overall Summary
------------------------------------------------------------
\echo ''
\echo '============================================================'
\echo 'FINAL SUMMARY: OVERALL STORAGE COMPARISON'
\echo '============================================================'
CREATE TEMP TABLE summary_stats (
compression_method text,
total_bytes bigint,
total_toast_rows bigint,
total_toast_blocks bigint
);
INSERT INTO summary_stats
SELECT
'pglz' AS compression_method,
pg_total_relation_size('bench_pglz_products') +
pg_total_relation_size('bench_pglz_logs') +
pg_total_relation_size('bench_pglz_html') +
pg_total_relation_size('bench_pglz_base64') +
pg_total_relation_size('bench_pglz_csv') +
pg_total_relation_size('bench_pglz_repetitive') AS total_bytes,
(SELECT sum(toast_row_count) FROM toast_table_info WHERE main_table LIKE 'bench_pglz%') AS total_toast_rows,
(SELECT sum(toast_pages) FROM toast_table_info WHERE main_table LIKE 'bench_pglz%') AS total_toast_blocks;
\if :has_lz4
INSERT INTO summary_stats
SELECT
'lz4',
pg_total_relation_size('bench_lz4_products') +
pg_total_relation_size('bench_lz4_logs') +
pg_total_relation_size('bench_lz4_html') +
pg_total_relation_size('bench_lz4_base64') +
pg_total_relation_size('bench_lz4_csv') +
pg_total_relation_size('bench_lz4_repetitive'),
(SELECT sum(toast_row_count) FROM toast_table_info WHERE main_table LIKE 'bench_lz4%'),
(SELECT sum(toast_pages) FROM toast_table_info WHERE main_table LIKE 'bench_lz4%');
\endif
\if :has_zstd
INSERT INTO summary_stats
SELECT
'zstd',
pg_total_relation_size('bench_zstd_products') +
pg_total_relation_size('bench_zstd_logs') +
pg_total_relation_size('bench_zstd_html') +
pg_total_relation_size('bench_zstd_base64') +
pg_total_relation_size('bench_zstd_csv') +
pg_total_relation_size('bench_zstd_repetitive'),
(SELECT sum(toast_row_count) FROM toast_table_info WHERE main_table LIKE 'bench_zstd%'),
(SELECT sum(toast_pages) FROM toast_table_info WHERE main_table LIKE 'bench_zstd%');
\endif
\echo ''
SELECT
compression_method,
pg_size_pretty(total_bytes) AS total_storage,
total_bytes,
total_toast_rows,
total_toast_blocks,
pg_size_pretty(total_bytes / NULLIF(total_toast_rows, 0)) AS avg_bytes_per_toast_row,
round(100.0 * total_bytes / NULLIF((SELECT max(total_bytes) FROM summary_stats), 0), 2) AS pct_of_largest,
round(100.0 * (1 - total_bytes::numeric / NULLIF((SELECT max(total_bytes) FROM summary_stats), 0)), 2) AS space_saved_pct
FROM summary_stats
ORDER BY total_bytes;
\echo ''
\echo '--- Compression Ratio Summary (vs uncompressed) ---'
-- Estimate uncompressed size from one table
WITH uncompressed AS (
SELECT sum(length(data)) as total FROM bench_pglz_logs
UNION ALL SELECT sum(length(data)) FROM bench_pglz_html
UNION ALL SELECT sum(length(data::text)) FROM bench_pglz_products
UNION ALL SELECT sum(length(data)) FROM bench_pglz_base64
UNION ALL SELECT sum(length(data)) FROM bench_pglz_csv
UNION ALL SELECT sum(length(data)) FROM bench_pglz_repetitive
)
SELECT
s.compression_method,
pg_size_pretty((SELECT sum(total) FROM uncompressed)) AS estimated_uncompressed,
pg_size_pretty(s.total_bytes) AS compressed_size,
round((SELECT sum(total)::numeric FROM uncompressed) / NULLIF(s.total_bytes, 0), 2) AS compression_ratio
FROM summary_stats s
ORDER BY s.total_bytes;
\echo ''
\echo '============================================================'
\echo 'BENCHMARK COMPLETE'
\echo '============================================================'
\echo ''
\echo 'To clean up: DROP SCHEMA toast_bench CASCADE;'
\echo ''