v52-0005-Use-64-bit-XIDs.patch
application/octet-stream
Filename: v52-0005-Use-64-bit-XIDs.patch
Type: application/octet-stream
Part: 4
Patch
Same data as JSON:
GET /api/v1/attachments/:id/patch
the parsed metadata as JSON — format, series position, per-file stats; never the diff bytes.
API reference →
Format: format-patch
Series: patch v52-0005
Subject: Use 64-bit XIDs
| File | + | − |
|---|---|---|
| contrib/amcheck/verify_heapam.c | 43 | 76 |
| contrib/amcheck/verify_nbtree.c | 1 | 1 |
| contrib/hstore/hstore_io.c | 2 | 0 |
| contrib/pageinspect/btreefuncs.c | 13 | 5 |
| contrib/pageinspect/expected/btree.out | 2 | 2 |
| contrib/pageinspect/expected/hash_1.out | 166 | 0 |
| contrib/pageinspect/expected/oldextversions.out | 5 | 5 |
| contrib/pageinspect/expected/page.out | 14 | 14 |
| contrib/pageinspect/heapfuncs.c | 6 | 3 |
| contrib/pageinspect/Makefile | 2 | 1 |
| contrib/pageinspect/pageinspect--1.12--1.13.sql | 145 | 0 |
| contrib/pageinspect/pageinspect--1.5.sql | 2 | 0 |
| contrib/pageinspect/rawpage.c | 31 | 4 |
| contrib/pageinspect/sql/btree.sql | 2 | 1 |
| contrib/pgrowlocks/pgrowlocks.c | 1 | 1 |
| contrib/pgstattuple/pgstatapprox.c | 2 | 0 |
| contrib/pgstattuple/pgstatindex.c | 1 | 1 |
| contrib/pg_surgery/heap_surgery.c | 15 | 2 |
| contrib/pg_visibility/pg_visibility.c | 5 | 2 |
| contrib/postgres_fdw/expected/postgres_fdw.out | 36 | 19 |
| contrib/postgres_fdw/postgres_fdw.c | 5 | 4 |
| contrib/postgres_fdw/sql/postgres_fdw.sql | 10 | 5 |
| src/backend/access/common/heaptuple.c | 8 | 2 |
| src/backend/access/common/reloptions.c | 6 | 6 |
| src/backend/access/hash/hashvalidate.c | 2 | 3 |
| src/backend/access/heap/heapam.c | 1076 | 152 |
| src/backend/access/heap/heapam_handler.c | 44 | 18 |
| src/backend/access/heap/heapam_visibility.c | 87 | 86 |
| src/backend/access/heap/heaptoast.c | 3 | 0 |
| src/backend/access/heap/hio.c | 32 | 4 |
| src/backend/access/heap/pruneheap.c | 73 | 19 |
| src/backend/access/heap/rewriteheap.c | 80 | 21 |
| src/backend/access/heap/vacuumlazy.c | 38 | 112 |
| src/backend/access/nbtree/nbtpage.c | 2 | 0 |
| src/backend/access/nbtree/nbtsplitloc.c | 15 | 1 |
| src/backend/access/rmgrdesc/gistdesc.c | 2 | 2 |
| src/backend/access/rmgrdesc/heapdesc.c | 32 | 0 |
| src/backend/access/rmgrdesc/nbtdesc.c | 2 | 2 |
| src/backend/access/rmgrdesc/xactdesc.c | 4 | 2 |
| src/backend/access/rmgrdesc/xlogdesc.c | 1 | 1 |
| src/backend/access/transam/clog.c | 3 | 16 |
| src/backend/access/transam/commit_ts.c | 0 | 19 |
| src/backend/access/transam/multixact.c | 31 | 593 |
| src/backend/access/transam/slru.c | 6 | 5 |
| src/backend/access/transam/subtrans.c | 5 | 4 |
| src/backend/access/transam/transam.c | 9 | 9 |
| src/backend/access/transam/twophase.c | 6 | 47 |
| src/backend/access/transam/varsup.c | 8 | 169 |
| src/backend/access/transam/xact.c | 24 | 11 |
| src/backend/access/transam/xlog.c | 4 | 5 |
| src/backend/access/transam/xloginsert.c | 7 | 0 |
| src/backend/access/transam/xlogreader.c | 0 | 34 |
| src/backend/bootstrap/bootstrap.c | 1 | 1 |
| src/backend/catalog/heap.c | 4 | 4 |
| src/backend/catalog/pg_inherits.c | 1 | 1 |
| src/backend/commands/async.c | 1 | 1 |
| src/backend/commands/dbcommands.c | 6 | 3 |
| src/backend/commands/indexcmds.c | 3 | 3 |
| src/backend/commands/sequence.c | 25 | 5 |
| src/backend/commands/vacuum.c | 14 | 8 |
| src/backend/executor/execExprInterp.c | 1 | 0 |
| src/backend/executor/execUtils.c | 1 | 0 |
| src/backend/executor/nodeModifyTable.c | 1 | 0 |
| src/backend/executor/spi.c | 1 | 0 |
| src/backend/nodes/gen_node_support.pl | 3 | 3 |
| src/backend/optimizer/util/plancat.c | 1 | 1 |
| src/backend/postmaster/autovacuum.c | 38 | 34 |
| src/backend/replication/logical/decode.c | 21 | 3 |
| src/backend/replication/logical/proto.c | 25 | 25 |
| src/backend/replication/logical/reorderbuffer.c | 12 | 5 |
| src/backend/replication/logical/snapbuild.c | 2 | 2 |
| src/backend/replication/logical/worker.c | 1 | 1 |
| src/backend/replication/walreceiver.c | 4 | 24 |
| src/backend/replication/walsender.c | 8 | 65 |
| src/backend/statistics/extended_stats.c | 1 | 0 |
| src/backend/storage/buffer/bufmgr.c | 131 | 5 |
| src/backend/storage/buffer/heap_convert.c | 549 | 0 |
| src/backend/storage/buffer/Makefile | 2 | 1 |
| src/backend/storage/buffer/meson.build | 1 | 0 |
| src/backend/storage/ipc/procarray.c | 68 | 114 |
| src/backend/storage/ipc/sinvaladt.c | 2 | 2 |
| src/backend/storage/ipc/standby.c | 2 | 2 |
| src/backend/storage/lmgr/lmgr.c | 10 | 4 |
| src/backend/storage/lmgr/lock.c | 2 | 2 |
| src/backend/storage/lmgr/predicate.c | 3 | 28 |
| src/backend/storage/lmgr/proc.c | 7 | 5 |
| src/backend/storage/page/bufpage.c | 237 | 17 |
| src/backend/utils/adt/enum.c | 1 | 1 |
| src/backend/utils/adt/jsonfuncs.c | 2 | 0 |
| src/backend/utils/adt/lockfuncs.c | 6 | 3 |
| src/backend/utils/adt/pgstatfuncs.c | 1 | 0 |
| src/backend/utils/adt/rowtypes.c | 12 | 0 |
| src/backend/utils/adt/xid8funcs.c | 21 | 62 |
| src/backend/utils/adt/xid.c | 24 | 13 |
| src/backend/utils/cache/catcache.c | 1 | 0 |
| src/backend/utils/cache/relcache.c | 1 | 2 |
| src/backend/utils/fmgr/fmgr.c | 2 | 2 |
| src/backend/utils/misc/guc_tables.c | 82 | 82 |
| src/backend/utils/misc/help_config.c | 7 | 1 |
| src/backend/utils/misc/pg_controldata.c | 1 | 1 |
| src/backend/utils/misc/postgresql.conf.sample | 1 | 1 |
| src/backend/utils/sort/tuplesortvariants.c | 12 | 2 |
| src/backend/utils/time/combocid.c | 10 | 10 |
| src/backend/utils/time/snapmgr.c | 12 | 11 |
| src/bin/pg_amcheck/t/004_verify_heapam.pl | 160 | 25 |
| src/bin/pg_controldata/pg_controldata.c | 1 | 1 |
| src/bin/pg_dump/pg_dump.c | 17 | 10 |
| src/bin/pg_dump/pg_dump.h | 4 | 4 |
| src/bin/pg_resetwal/pg_resetwal.c | 13 | 42 |
| src/bin/pg_resetwal/t/001_basic.pl | 5 | 13 |
| src/bin/pg_upgrade/check.c | 148 | 1 |
| src/bin/pg_upgrade/controldata.c | 9 | 8 |
| src/bin/pg_upgrade/file.c | 98 | 1 |
| src/bin/pg_upgrade/Makefile | 1 | 0 |
| src/bin/pg_upgrade/meson.build | 1 | 0 |
| src/bin/pg_upgrade/pg_upgrade.c | 117 | 27 |
| src/bin/pg_upgrade/pg_upgrade.h | 26 | 8 |
| src/bin/pg_upgrade/relfilenumber.c | 28 | 6 |
| src/bin/pg_upgrade/segresize.c | 586 | 0 |
| src/bin/pg_upgrade/t/002_pg_upgrade.pl | 18 | 0 |
| src/bin/pg_upgrade/version.c | 86 | 18 |
| src/bin/pg_waldump/pg_waldump.c | 1 | 1 |
| src/bin/pg_waldump/t/001_basic.pl | 2 | 1 |
| src/include/access/ginblock.h | 9 | 2 |
| src/include/access/gist.h | 1 | 1 |
| src/include/access/heapam.h | 16 | 8 |
| src/include/access/heapam_xlog.h | 28 | 3 |
| src/include/access/heaptoast.h | 10 | 1 |
| src/include/access/htup_details.h | 251 | 28 |
| src/include/access/htup.h | 13 | 6 |
| src/include/access/multixact.h | 5 | 6 |
| src/include/access/nbtree.h | 10 | 0 |
| src/include/access/reloptions.h | 1 | 1 |
| src/include/access/rewriteheap.h | 2 | 2 |
| src/include/access/rmgrlist.h | 1 | 0 |
| src/include/access/slru.h | 1 | 9 |
| src/include/access/tableam.h | 1 | 1 |
| src/include/access/transam.h | 54 | 38 |
| src/include/access/tupmacs.h | 2 | 1 |
| src/include/access/xact.h | 9 | 4 |
| src/include/access/xloginsert.h | 1 | 0 |
| src/include/access/xlogreader.h | 0 | 4 |
| src/include/access/xlogrecord.h | 2 | 3 |
| src/include/catalog/catversion.h | 2 | 1 |
| src/include/catalog/pg_amproc.dat | 2 | 2 |
| src/include/catalog/pg_operator.dat | 4 | 4 |
| src/include/catalog/pg_proc.dat | 6 | 6 |
| src/include/catalog/pg_type.dat | 2 | 2 |
| src/include/catalog/pg_type.h | 5 | 0 |
| src/include/c.h | 16 | 7 |
| src/include/commands/vacuum.h | 11 | 11 |
| src/include/fmgr.h | 2 | 0 |
| src/include/nodes/pg_list.h | 4 | 0 |
| src/include/pg_config.h.in | 3 | 0 |
| src/include/port/pg_lfind.h | 107 | 56 |
| src/include/postgres.h | 6 | 3 |
| src/include/postmaster/autovacuum.h | 2 | 2 |
| src/include/storage/buf_internals.h | 3 | 2 |
| src/include/storage/bufmgr.h | 6 | 0 |
| src/include/storage/bufpage.h | 219 | 13 |
| src/include/storage/itemid.h | 2 | 0 |
| src/include/storage/lock.h | 7 | 7 |
| src/include/storage/proc.h | 4 | 3 |
| src/include/utils/combocid.h | 1 | 1 |
| src/include/utils/rel.h | 6 | 6 |
| src/include/utils/xid8.h | 2 | 2 |
| src/pl/plperl/plperl.c | 2 | 2 |
| src/pl/plpgsql/src/pl_comp.c | 2 | 2 |
| src/pl/plpgsql/src/pl_exec.c | 2 | 0 |
| src/pl/plpython/plpy_procedure.c | 2 | 2 |
| src/pl/tcl/pltcl.c | 2 | 2 |
| src/test/Makefile | 2 | 1 |
| src/test/meson.build | 1 | 0 |
| src/test/modules/test_lfind/test_lfind.c | 15 | 15 |
| src/test/perl/PostgreSQL/Test/AdjustUpgrade.pm | 4 | 0 |
| src/test/recovery/t/003_recovery_targets.pl | 1 | 1 |
| src/test/recovery/t/039_end_of_wal.pl | 14 | 10 |
| src/test/regress/expected/indirect_toast.out | 8 | 0 |
| src/test/regress/expected/insert.out | 15 | 1 |
| src/test/regress/expected/opr_sanity.out | 3 | 3 |
| src/test/regress/expected/select_views.out | 43 | 43 |
| src/test/regress/expected/txid.out | 5 | 3 |
| src/test/regress/expected/type_sanity.out | 3 | 2 |
| src/test/regress/expected/xid64.out | 92 | 0 |
| src/test/regress/expected/xid.out | 12 | 10 |
| src/test/regress/parallel_schedule | 1 | 1 |
| src/test/regress/regress.c | 292 | 0 |
| src/test/regress/sql/indirect_toast.sql | 11 | 0 |
| src/test/regress/sql/insert.sql | 16 | 1 |
| src/test/regress/sql/select_views.sql | 1 | 1 |
| src/test/regress/sql/type_sanity.sql | 3 | 2 |
| src/test/regress/sql/xid64.sql | 84 | 0 |
| src/test/regress/sql/xid.sql | 1 | 1 |
| src/test/xid-64/Makefile | 22 | 0 |
| src/test/xid-64/meson.build | 16 | 0 |
| src/test/xid-64/README | 16 | 0 |
| src/test/xid-64/t/002_test_gucs.pl | 79 | 0 |
| src/test/xid-64/t/003_test_integrity.pl | 58 | 0 |
| src/test/xid-64/t/004_test_relminmxid.pl | 90 | 0 |
| src/test/xid-64/t/005_stream_subxact.pl | 100 | 0 |
| src/test/xid-64/t/006_zeropage.pl | 33 | 0 |
| src/test/xid-64/t/007_first_multi.pl | 83 | 0 |
| src/tools/pgindent/typedefs.list | 2 | 2 |
From ce3daeeb1b213b1a2efb5965fc9aa082608cd591 Mon Sep 17 00:00:00 2001
From: Maxim Orlov <m.orlov@postgrespro.ru>
Date: Fri, 11 Mar 2022 11:37:29 +0300
Subject: [PATCH v52 5/7] Use 64-bit XIDs
- change TransactionId to 64bit
- disk tuple format (HeapTupleHeader) is (almost) unchanged: xmin and xmax remains 32bit
-- now 32bit xid is named ShortTransactionId
- heap page format is changed to contain xid and multixact base value, tuple's
xmin and xmax are offsets from.
-- xid_base and multi_base are stored as a page special data. PageHeader remains unmodified.
- in-memory tuple (HeapTuple) were enriched with precalulated 64bit xmin/xmax.
Authors:
- Alexander Korotkov <aekorotkov@gmail.com>
- Teodor Sigaev <teodor@sigaev.ru>
- Nikita Glukhov <n.gluhov@postgrespro.ru>
- Maxim Orlov <orlovmg@gmail.com>
- Pavel Borisov <pashkin.elfe@gmail.com>
- Yura Sokolov <y.sokolov@postgrespro.ru> <funny.falcon@gmail.com>
- Aleksander Alekseev <aleksander@timescale.com>
Discussion: https://postgr.es/m/CACG%3DezZe1NQSCnfHOr78AtAZxJZeCvxrts0ygrxYwe%3DpyyjVWA%40mail.gmail.com
Discussion: https://postgr.es/m/CAJ7c6TPDOYBYrnCAeyndkBktO0WG2xSdYduTF0nxq%2BvfkmTF5Q%40mail.gmail.com
---
contrib/amcheck/verify_heapam.c | 119 +-
contrib/amcheck/verify_nbtree.c | 2 +-
contrib/hstore/hstore_io.c | 2 +
contrib/pageinspect/Makefile | 3 +-
contrib/pageinspect/btreefuncs.c | 18 +-
contrib/pageinspect/expected/btree.out | 4 +-
contrib/pageinspect/expected/hash_1.out | 166 +++
.../pageinspect/expected/oldextversions.out | 10 +-
contrib/pageinspect/expected/page.out | 28 +-
contrib/pageinspect/heapfuncs.c | 9 +-
.../pageinspect/pageinspect--1.12--1.13.sql | 145 ++
contrib/pageinspect/pageinspect--1.5.sql | 2 +
contrib/pageinspect/rawpage.c | 35 +-
contrib/pageinspect/sql/btree.sql | 3 +-
contrib/pg_surgery/heap_surgery.c | 17 +-
contrib/pg_visibility/pg_visibility.c | 7 +-
contrib/pgrowlocks/pgrowlocks.c | 2 +-
contrib/pgstattuple/pgstatapprox.c | 2 +
contrib/pgstattuple/pgstatindex.c | 2 +-
.../postgres_fdw/expected/postgres_fdw.out | 55 +-
contrib/postgres_fdw/postgres_fdw.c | 9 +-
contrib/postgres_fdw/sql/postgres_fdw.sql | 15 +-
src/backend/access/common/heaptuple.c | 10 +-
src/backend/access/common/reloptions.c | 12 +-
src/backend/access/hash/hashvalidate.c | 5 +-
src/backend/access/heap/heapam.c | 1228 +++++++++++++++--
src/backend/access/heap/heapam_handler.c | 62 +-
src/backend/access/heap/heapam_visibility.c | 173 +--
src/backend/access/heap/heaptoast.c | 3 +
src/backend/access/heap/hio.c | 36 +-
src/backend/access/heap/pruneheap.c | 92 +-
src/backend/access/heap/rewriteheap.c | 101 +-
src/backend/access/heap/vacuumlazy.c | 150 +-
src/backend/access/nbtree/nbtpage.c | 2 +
src/backend/access/nbtree/nbtsplitloc.c | 16 +-
src/backend/access/rmgrdesc/gistdesc.c | 4 +-
src/backend/access/rmgrdesc/heapdesc.c | 32 +
src/backend/access/rmgrdesc/nbtdesc.c | 4 +-
src/backend/access/rmgrdesc/xactdesc.c | 6 +-
src/backend/access/rmgrdesc/xlogdesc.c | 2 +-
src/backend/access/transam/clog.c | 19 +-
src/backend/access/transam/commit_ts.c | 19 -
src/backend/access/transam/multixact.c | 624 +--------
src/backend/access/transam/slru.c | 11 +-
src/backend/access/transam/subtrans.c | 9 +-
src/backend/access/transam/transam.c | 18 +-
src/backend/access/transam/twophase.c | 53 +-
src/backend/access/transam/varsup.c | 177 +--
src/backend/access/transam/xact.c | 35 +-
src/backend/access/transam/xlog.c | 9 +-
src/backend/access/transam/xloginsert.c | 7 +
src/backend/access/transam/xlogreader.c | 34 -
src/backend/bootstrap/bootstrap.c | 2 +-
src/backend/catalog/heap.c | 8 +-
src/backend/catalog/pg_inherits.c | 2 +-
src/backend/commands/async.c | 2 +-
src/backend/commands/dbcommands.c | 9 +-
src/backend/commands/indexcmds.c | 6 +-
src/backend/commands/sequence.c | 30 +-
src/backend/commands/vacuum.c | 22 +-
src/backend/executor/execExprInterp.c | 1 +
src/backend/executor/execUtils.c | 1 +
src/backend/executor/nodeModifyTable.c | 1 +
src/backend/executor/spi.c | 1 +
src/backend/nodes/gen_node_support.pl | 6 +-
src/backend/optimizer/util/plancat.c | 2 +-
src/backend/postmaster/autovacuum.c | 72 +-
src/backend/replication/logical/decode.c | 24 +-
src/backend/replication/logical/proto.c | 50 +-
.../replication/logical/reorderbuffer.c | 17 +-
src/backend/replication/logical/snapbuild.c | 4 +-
src/backend/replication/logical/worker.c | 2 +-
src/backend/replication/walreceiver.c | 28 +-
src/backend/replication/walsender.c | 73 +-
src/backend/statistics/extended_stats.c | 1 +
src/backend/storage/buffer/Makefile | 3 +-
src/backend/storage/buffer/bufmgr.c | 136 +-
src/backend/storage/buffer/heap_convert.c | 549 ++++++++
src/backend/storage/buffer/meson.build | 1 +
src/backend/storage/ipc/procarray.c | 182 +--
src/backend/storage/ipc/sinvaladt.c | 4 +-
src/backend/storage/ipc/standby.c | 4 +-
src/backend/storage/lmgr/lmgr.c | 14 +-
src/backend/storage/lmgr/lock.c | 4 +-
src/backend/storage/lmgr/predicate.c | 31 +-
src/backend/storage/lmgr/proc.c | 12 +-
src/backend/storage/page/bufpage.c | 254 +++-
src/backend/utils/adt/enum.c | 2 +-
src/backend/utils/adt/jsonfuncs.c | 2 +
src/backend/utils/adt/lockfuncs.c | 9 +-
src/backend/utils/adt/pgstatfuncs.c | 1 +
src/backend/utils/adt/rowtypes.c | 12 +
src/backend/utils/adt/xid.c | 37 +-
src/backend/utils/adt/xid8funcs.c | 83 +-
src/backend/utils/cache/catcache.c | 1 +
src/backend/utils/cache/relcache.c | 3 +-
src/backend/utils/fmgr/fmgr.c | 4 +-
src/backend/utils/misc/guc_tables.c | 164 +--
src/backend/utils/misc/help_config.c | 8 +-
src/backend/utils/misc/pg_controldata.c | 2 +-
src/backend/utils/misc/postgresql.conf.sample | 2 +-
src/backend/utils/sort/tuplesortvariants.c | 14 +-
src/backend/utils/time/combocid.c | 20 +-
src/backend/utils/time/snapmgr.c | 23 +-
src/bin/pg_amcheck/t/004_verify_heapam.pl | 185 ++-
src/bin/pg_controldata/pg_controldata.c | 2 +-
src/bin/pg_dump/pg_dump.c | 27 +-
src/bin/pg_dump/pg_dump.h | 8 +-
src/bin/pg_resetwal/pg_resetwal.c | 55 +-
src/bin/pg_resetwal/t/001_basic.pl | 18 +-
src/bin/pg_upgrade/Makefile | 1 +
src/bin/pg_upgrade/check.c | 149 +-
src/bin/pg_upgrade/controldata.c | 17 +-
src/bin/pg_upgrade/file.c | 99 +-
src/bin/pg_upgrade/meson.build | 1 +
src/bin/pg_upgrade/pg_upgrade.c | 144 +-
src/bin/pg_upgrade/pg_upgrade.h | 34 +-
src/bin/pg_upgrade/relfilenumber.c | 34 +-
src/bin/pg_upgrade/segresize.c | 586 ++++++++
src/bin/pg_upgrade/t/002_pg_upgrade.pl | 18 +
src/bin/pg_upgrade/version.c | 104 +-
src/bin/pg_waldump/pg_waldump.c | 2 +-
src/bin/pg_waldump/t/001_basic.pl | 3 +-
src/include/access/ginblock.h | 11 +-
src/include/access/gist.h | 2 +-
src/include/access/heapam.h | 24 +-
src/include/access/heapam_xlog.h | 31 +-
src/include/access/heaptoast.h | 11 +-
src/include/access/htup.h | 19 +-
src/include/access/htup_details.h | 279 +++-
src/include/access/multixact.h | 11 +-
src/include/access/nbtree.h | 10 +
src/include/access/reloptions.h | 2 +-
src/include/access/rewriteheap.h | 4 +-
src/include/access/rmgrlist.h | 1 +
src/include/access/slru.h | 10 +-
src/include/access/tableam.h | 2 +-
src/include/access/transam.h | 92 +-
src/include/access/tupmacs.h | 3 +-
src/include/access/xact.h | 13 +-
src/include/access/xloginsert.h | 1 +
src/include/access/xlogreader.h | 4 -
src/include/access/xlogrecord.h | 5 +-
src/include/c.h | 23 +-
src/include/catalog/catversion.h | 3 +-
src/include/catalog/pg_amproc.dat | 4 +-
src/include/catalog/pg_operator.dat | 8 +-
src/include/catalog/pg_proc.dat | 12 +-
src/include/catalog/pg_type.dat | 4 +-
src/include/catalog/pg_type.h | 5 +
src/include/commands/vacuum.h | 22 +-
src/include/fmgr.h | 2 +
src/include/nodes/pg_list.h | 4 +
src/include/pg_config.h.in | 3 +
src/include/port/pg_lfind.h | 163 ++-
src/include/postgres.h | 9 +-
src/include/postmaster/autovacuum.h | 4 +-
src/include/storage/buf_internals.h | 5 +-
src/include/storage/bufmgr.h | 6 +
src/include/storage/bufpage.h | 232 +++-
src/include/storage/itemid.h | 2 +
src/include/storage/lock.h | 14 +-
src/include/storage/proc.h | 7 +-
src/include/utils/combocid.h | 2 +-
src/include/utils/rel.h | 12 +-
src/include/utils/xid8.h | 4 +-
src/pl/plperl/plperl.c | 4 +-
src/pl/plpgsql/src/pl_comp.c | 4 +-
src/pl/plpgsql/src/pl_exec.c | 2 +
src/pl/plpython/plpy_procedure.c | 4 +-
src/pl/tcl/pltcl.c | 4 +-
src/test/Makefile | 3 +-
src/test/meson.build | 1 +
src/test/modules/test_lfind/test_lfind.c | 30 +-
.../perl/PostgreSQL/Test/AdjustUpgrade.pm | 4 +
src/test/recovery/t/003_recovery_targets.pl | 2 +-
src/test/recovery/t/039_end_of_wal.pl | 24 +-
src/test/regress/expected/indirect_toast.out | 8 +
src/test/regress/expected/insert.out | 16 +-
src/test/regress/expected/opr_sanity.out | 6 +-
src/test/regress/expected/select_views.out | 86 +-
src/test/regress/expected/txid.out | 8 +-
src/test/regress/expected/type_sanity.out | 5 +-
src/test/regress/expected/xid.out | 22 +-
src/test/regress/expected/xid64.out | 92 ++
src/test/regress/parallel_schedule | 2 +-
src/test/regress/regress.c | 292 ++++
src/test/regress/sql/indirect_toast.sql | 11 +
src/test/regress/sql/insert.sql | 17 +-
src/test/regress/sql/select_views.sql | 2 +-
src/test/regress/sql/type_sanity.sql | 5 +-
src/test/regress/sql/xid.sql | 2 +-
src/test/regress/sql/xid64.sql | 84 ++
src/test/xid-64/Makefile | 22 +
src/test/xid-64/README | 16 +
src/test/xid-64/meson.build | 16 +
src/test/xid-64/t/002_test_gucs.pl | 79 ++
src/test/xid-64/t/003_test_integrity.pl | 58 +
src/test/xid-64/t/004_test_relminmxid.pl | 90 ++
src/test/xid-64/t/005_stream_subxact.pl | 100 ++
src/test/xid-64/t/006_zeropage.pl | 33 +
src/test/xid-64/t/007_first_multi.pl | 83 ++
src/tools/pgindent/typedefs.list | 4 +-
203 files changed, 6800 insertions(+), 2566 deletions(-)
create mode 100644 contrib/pageinspect/expected/hash_1.out
create mode 100644 contrib/pageinspect/pageinspect--1.12--1.13.sql
create mode 100644 src/backend/storage/buffer/heap_convert.c
create mode 100644 src/bin/pg_upgrade/segresize.c
create mode 100644 src/test/regress/expected/xid64.out
create mode 100644 src/test/regress/sql/xid64.sql
create mode 100644 src/test/xid-64/Makefile
create mode 100644 src/test/xid-64/README
create mode 100644 src/test/xid-64/meson.build
create mode 100644 src/test/xid-64/t/002_test_gucs.pl
create mode 100644 src/test/xid-64/t/003_test_integrity.pl
create mode 100644 src/test/xid-64/t/004_test_relminmxid.pl
create mode 100644 src/test/xid-64/t/005_stream_subxact.pl
create mode 100644 src/test/xid-64/t/006_zeropage.pl
create mode 100644 src/test/xid-64/t/007_first_multi.pl
diff --git a/contrib/amcheck/verify_heapam.c b/contrib/amcheck/verify_heapam.c
index 00a65b5448..d109ccd66b 100644
--- a/contrib/amcheck/verify_heapam.c
+++ b/contrib/amcheck/verify_heapam.c
@@ -17,6 +17,7 @@
#include "access/multixact.h"
#include "access/toast_internals.h"
#include "access/visibilitymap.h"
+#include "catalog/catalog.h"
#include "catalog/pg_am.h"
#include "funcapi.h"
#include "miscadmin.h"
@@ -85,7 +86,7 @@ typedef struct HeapCheckContext
* from them.
*/
FullTransactionId next_fxid; /* ShmemVariableCache->nextXid */
- TransactionId next_xid; /* 32-bit version of next_fxid */
+ TransactionId next_xid; /* 64-bit version of next_fxid */
TransactionId oldest_xid; /* ShmemVariableCache->oldestXid */
FullTransactionId oldest_fxid; /* 64-bit version of oldest_xid, computed
* relative to next_fxid */
@@ -126,6 +127,7 @@ typedef struct HeapCheckContext
uint16 lp_len;
uint16 lp_off;
HeapTupleHeader tuphdr;
+ HeapTupleData tuple;
int natts;
/* Values for iterating over attributes within the tuple */
@@ -169,8 +171,6 @@ static bool check_tuple_visibility(HeapCheckContext *ctx,
static void report_corruption(HeapCheckContext *ctx, char *msg);
static void report_toast_corruption(HeapCheckContext *ctx,
ToastedAttribute *ta, char *msg);
-static FullTransactionId FullTransactionIdFromXidAndCtx(TransactionId xid,
- const HeapCheckContext *ctx);
static void update_cached_xid_range(HeapCheckContext *ctx);
static void update_cached_mxid_range(HeapCheckContext *ctx);
static XidBoundsViolation check_mxid_in_range(MultiXactId mxid,
@@ -394,7 +394,7 @@ verify_heapam(PG_FUNCTION_ARGS)
update_cached_xid_range(&ctx);
update_cached_mxid_range(&ctx);
ctx.relfrozenxid = ctx.rel->rd_rel->relfrozenxid;
- ctx.relfrozenfxid = FullTransactionIdFromXidAndCtx(ctx.relfrozenxid, &ctx);
+ ctx.relfrozenfxid = FullTransactionIdFromXid(ctx.relfrozenxid);
ctx.relminmxid = ctx.rel->rd_rel->relminmxid;
if (TransactionIdIsNormal(ctx.relfrozenxid))
@@ -555,6 +555,12 @@ verify_heapam(PG_FUNCTION_ARGS)
ctx.tuphdr = (HeapTupleHeader) PageGetItem(ctx.page, ctx.itemid);
ctx.natts = HeapTupleHeaderGetNatts(ctx.tuphdr);
+ ctx.tuple.t_data = ctx.tuphdr;
+ ctx.tuple.t_len = ItemIdGetLength(ctx.itemid);
+ ctx.tuple.t_tableOid = RelationGetRelid(ctx.rel);
+ HeapTupleCopyXidsFromPage(ctx.buffer, &ctx.tuple, ctx.page,
+ IsToastRelation(ctx.rel));
+
/* Ok, ready to check this next tuple */
check_tuple(&ctx,
&xmin_commit_status_ok[ctx.offnum],
@@ -588,6 +594,8 @@ verify_heapam(PG_FUNCTION_ARGS)
TransactionId curr_xmax;
TransactionId next_xmin;
OffsetNumber nextoffnum = successor[ctx.offnum];
+ HeapTupleData curr_tup;
+ HeapTupleData next_tup;
/*
* The current line pointer may not have a successor, either
@@ -650,9 +658,13 @@ verify_heapam(PG_FUNCTION_ARGS)
if (ItemIdIsRedirected(next_lp))
continue;
curr_htup = (HeapTupleHeader) PageGetItem(ctx.page, curr_lp);
- curr_xmax = HeapTupleHeaderGetUpdateXid(curr_htup);
+ curr_tup.t_data = curr_htup;
+ HeapTupleCopyXidsFromPage(ctx.buffer, &curr_tup, ctx.page, false);
+ curr_xmax = HeapTupleGetUpdateXidAny(&curr_tup);
next_htup = (HeapTupleHeader) PageGetItem(ctx.page, next_lp);
- next_xmin = HeapTupleHeaderGetXmin(next_htup);
+ next_tup.t_data = next_htup;
+ HeapTupleCopyXidsFromPage(ctx.buffer, &next_tup, ctx.page, false);
+ next_xmin = HeapTupleGetXmin(&next_tup);
if (!TransactionIdIsValid(curr_xmax) ||
!TransactionIdEquals(curr_xmax, next_xmin))
continue;
@@ -706,7 +718,7 @@ verify_heapam(PG_FUNCTION_ARGS)
* xmin. This should be safe because the xmin itself can't have
* changed, only its commit status.
*/
- curr_xmin = HeapTupleHeaderGetXmin(curr_htup);
+ curr_xmin = HeapTupleGetXmin(&curr_tup);
if (xmin_commit_status_ok[ctx.offnum] &&
xmin_commit_status[ctx.offnum] == XID_IN_PROGRESS &&
xmin_commit_status_ok[nextoffnum] &&
@@ -904,7 +916,7 @@ check_tuple_header(HeapCheckContext *ctx)
{
HeapTupleHeader tuphdr = ctx->tuphdr;
uint16 infomask = tuphdr->t_infomask;
- TransactionId curr_xmax = HeapTupleHeaderGetUpdateXid(tuphdr);
+ TransactionId curr_xmax = HeapTupleGetUpdateXidAny(&ctx->tuple);
bool result = true;
unsigned expected_hoff;
@@ -1022,13 +1034,14 @@ check_tuple_visibility(HeapCheckContext *ctx, bool *xmin_commit_status_ok,
XidCommitStatus xmin_status;
XidCommitStatus xvac_status;
XidCommitStatus xmax_status;
+ HeapTuple tuple = &ctx->tuple;
HeapTupleHeader tuphdr = ctx->tuphdr;
ctx->tuple_could_be_pruned = true; /* have not yet proven otherwise */
*xmin_commit_status_ok = false; /* have not yet proven otherwise */
/* If xmin is normal, it should be within valid range */
- xmin = HeapTupleHeaderGetXmin(tuphdr);
+ xmin = HeapTupleGetXmin(tuple);
switch (get_xid_status(xmin, ctx, &xmin_status))
{
case XID_INVALID:
@@ -1042,19 +1055,19 @@ check_tuple_visibility(HeapCheckContext *ctx, bool *xmin_commit_status_ok,
report_corruption(ctx,
psprintf("xmin %llu equals or exceeds next valid transaction ID %llu",
(unsigned long long) xmin,
- (unsigned long long) U64FromFullTransactionId(ctx->next_fxid)));
+ (unsigned long long) XidFromFullTransactionId(ctx->next_fxid)));
return false;
case XID_PRECEDES_CLUSTERMIN:
report_corruption(ctx,
psprintf("xmin %llu precedes oldest valid transaction ID %llu",
(unsigned long long) xmin,
- (unsigned long long) U64FromFullTransactionId(ctx->oldest_fxid)));
+ (unsigned long long) XidFromFullTransactionId(ctx->oldest_fxid)));
return false;
case XID_PRECEDES_RELMIN:
report_corruption(ctx,
psprintf("xmin %llu precedes relation freeze threshold %llu",
(unsigned long long) xmin,
- (unsigned long long) U64FromFullTransactionId(ctx->relfrozenfxid)));
+ (unsigned long long) XidFromFullTransactionId(ctx->relfrozenfxid)));
return false;
}
@@ -1080,19 +1093,19 @@ check_tuple_visibility(HeapCheckContext *ctx, bool *xmin_commit_status_ok,
report_corruption(ctx,
psprintf("old-style VACUUM FULL transaction ID %llu for moved off tuple equals or exceeds next valid transaction ID %llu",
(unsigned long long) xvac,
- (unsigned long long) U64FromFullTransactionId(ctx->next_fxid)));
+ (unsigned long long) XidFromFullTransactionId(ctx->next_fxid)));
return false;
case XID_PRECEDES_RELMIN:
report_corruption(ctx,
psprintf("old-style VACUUM FULL transaction ID %llu for moved off tuple precedes relation freeze threshold %llu",
(unsigned long long) xvac,
- (unsigned long long) U64FromFullTransactionId(ctx->relfrozenfxid)));
+ (unsigned long long) XidFromFullTransactionId(ctx->relfrozenfxid)));
return false;
case XID_PRECEDES_CLUSTERMIN:
report_corruption(ctx,
psprintf("old-style VACUUM FULL transaction ID %llu for moved off tuple precedes oldest valid transaction ID %llu",
(unsigned long long) xvac,
- (unsigned long long) U64FromFullTransactionId(ctx->oldest_fxid)));
+ (unsigned long long) XidFromFullTransactionId(ctx->oldest_fxid)));
return false;
case XID_BOUNDS_OK:
break;
@@ -1146,19 +1159,19 @@ check_tuple_visibility(HeapCheckContext *ctx, bool *xmin_commit_status_ok,
report_corruption(ctx,
psprintf("old-style VACUUM FULL transaction ID %llu for moved in tuple equals or exceeds next valid transaction ID %llu",
(unsigned long long) xvac,
- (unsigned long long) U64FromFullTransactionId(ctx->next_fxid)));
+ (unsigned long long) XidFromFullTransactionId(ctx->next_fxid)));
return false;
case XID_PRECEDES_RELMIN:
report_corruption(ctx,
psprintf("old-style VACUUM FULL transaction ID %llu for moved in tuple precedes relation freeze threshold %llu",
(unsigned long long) xvac,
- (unsigned long long) U64FromFullTransactionId(ctx->relfrozenfxid)));
+ (unsigned long long) XidFromFullTransactionId(ctx->relfrozenfxid)));
return false;
case XID_PRECEDES_CLUSTERMIN:
report_corruption(ctx,
psprintf("old-style VACUUM FULL transaction ID %llu for moved in tuple precedes oldest valid transaction ID %llu",
(unsigned long long) xvac,
- (unsigned long long) U64FromFullTransactionId(ctx->oldest_fxid)));
+ (unsigned long long) XidFromFullTransactionId(ctx->oldest_fxid)));
return false;
case XID_BOUNDS_OK:
break;
@@ -1235,7 +1248,7 @@ check_tuple_visibility(HeapCheckContext *ctx, bool *xmin_commit_status_ok,
* HEAP_XMAX_IS_LOCKED_ONLY is true, but for now we err on the side of
* avoiding possibly-bogus complaints about missing TOAST entries.
*/
- xmax = HeapTupleHeaderGetRawXmax(tuphdr);
+ xmax = HeapTupleGetRawXmax(tuple);
switch (check_mxid_valid_in_rel(xmax, ctx))
{
case XID_INVALID:
@@ -1294,7 +1307,7 @@ check_tuple_visibility(HeapCheckContext *ctx, bool *xmin_commit_status_ok,
* We already checked above that this multixact is within limits for
* this table. Now check the update xid from this multixact.
*/
- xmax = HeapTupleGetUpdateXid(tuphdr);
+ xmax = HeapTupleGetUpdateXid(tuple);
switch (get_xid_status(xmax, ctx, &xmax_status))
{
case XID_INVALID:
@@ -1306,19 +1319,19 @@ check_tuple_visibility(HeapCheckContext *ctx, bool *xmin_commit_status_ok,
report_corruption(ctx,
psprintf("update xid %llu equals or exceeds next valid transaction ID %llu",
(unsigned long long) xmax,
- (unsigned long long) U64FromFullTransactionId(ctx->next_fxid)));
+ (unsigned long long) XidFromFullTransactionId(ctx->next_fxid)));
return true;
case XID_PRECEDES_RELMIN:
report_corruption(ctx,
psprintf("update xid %llu precedes relation freeze threshold %llu",
(unsigned long long) xmax,
- (unsigned long long) U64FromFullTransactionId(ctx->relfrozenfxid)));
+ (unsigned long long) XidFromFullTransactionId(ctx->relfrozenfxid)));
return true;
case XID_PRECEDES_CLUSTERMIN:
report_corruption(ctx,
psprintf("update xid %llu precedes oldest valid transaction ID %llu",
(unsigned long long) xmax,
- (unsigned long long) U64FromFullTransactionId(ctx->oldest_fxid)));
+ (unsigned long long) XidFromFullTransactionId(ctx->oldest_fxid)));
return true;
case XID_BOUNDS_OK:
break;
@@ -1358,7 +1371,7 @@ check_tuple_visibility(HeapCheckContext *ctx, bool *xmin_commit_status_ok,
}
/* xmax is an XID, not a MXID. Sanity check it. */
- xmax = HeapTupleHeaderGetRawXmax(tuphdr);
+ xmax = HeapTupleGetRawXmax(tuple);
switch (get_xid_status(xmax, ctx, &xmax_status))
{
case XID_INVALID:
@@ -1368,19 +1381,19 @@ check_tuple_visibility(HeapCheckContext *ctx, bool *xmin_commit_status_ok,
report_corruption(ctx,
psprintf("xmax %llu equals or exceeds next valid transaction ID %llu",
(unsigned long long) xmax,
- (unsigned long long) U64FromFullTransactionId(ctx->next_fxid)));
+ (unsigned long long) XidFromFullTransactionId(ctx->next_fxid)));
return false; /* corrupt */
case XID_PRECEDES_RELMIN:
report_corruption(ctx,
psprintf("xmax %llu precedes relation freeze threshold %llu",
(unsigned long long) xmax,
- (unsigned long long) U64FromFullTransactionId(ctx->relfrozenfxid)));
+ (unsigned long long) XidFromFullTransactionId(ctx->relfrozenfxid)));
return false; /* corrupt */
case XID_PRECEDES_CLUSTERMIN:
report_corruption(ctx,
psprintf("xmax %llu precedes oldest valid transaction ID %llu",
(unsigned long long) xmax,
- (unsigned long long) U64FromFullTransactionId(ctx->oldest_fxid)));
+ (unsigned long long) XidFromFullTransactionId(ctx->oldest_fxid)));
return false; /* corrupt */
case XID_BOUNDS_OK:
break;
@@ -1859,50 +1872,6 @@ check_tuple(HeapCheckContext *ctx, bool *xmin_commit_status_ok,
ctx->attnum = -1;
}
-/*
- * Convert a TransactionId into a FullTransactionId using our cached values of
- * the valid transaction ID range. It is the caller's responsibility to have
- * already updated the cached values, if necessary.
- */
-static FullTransactionId
-FullTransactionIdFromXidAndCtx(TransactionId xid, const HeapCheckContext *ctx)
-{
- uint64 nextfxid_i;
- int32 diff;
- FullTransactionId fxid;
-
- Assert(TransactionIdIsNormal(ctx->next_xid));
- Assert(FullTransactionIdIsNormal(ctx->next_fxid));
- Assert(XidFromFullTransactionId(ctx->next_fxid) == ctx->next_xid);
-
- if (!TransactionIdIsNormal(xid))
- return FullTransactionIdFromEpochAndXid(0, xid);
-
- nextfxid_i = U64FromFullTransactionId(ctx->next_fxid);
-
- /* compute the 32bit modulo difference */
- diff = (int32) (ctx->next_xid - xid);
-
- /*
- * In cases of corruption we might see a 32bit xid that is before epoch 0.
- * We can't represent that as a 64bit xid, due to 64bit xids being
- * unsigned integers, without the modulo arithmetic of 32bit xid. There's
- * no really nice way to deal with that, but it works ok enough to use
- * FirstNormalFullTransactionId in that case, as a freshly initdb'd
- * cluster already has a newer horizon.
- */
- if (diff > 0 && (nextfxid_i - FirstNormalTransactionId) < (int64) diff)
- {
- Assert(EpochFromFullTransactionId(ctx->next_fxid) == 0);
- fxid = FirstNormalFullTransactionId;
- }
- else
- fxid = FullTransactionIdFromU64(nextfxid_i - diff);
-
- Assert(FullTransactionIdIsNormal(fxid));
- return fxid;
-}
-
/*
* Update our cached range of valid transaction IDs.
*/
@@ -1916,8 +1885,8 @@ update_cached_xid_range(HeapCheckContext *ctx)
LWLockRelease(XidGenLock);
/* And compute alternate versions of the same */
+ ctx->oldest_fxid = FullTransactionIdFromXid(ctx->oldest_xid);
ctx->next_xid = XidFromFullTransactionId(ctx->next_fxid);
- ctx->oldest_fxid = FullTransactionIdFromXidAndCtx(ctx->oldest_xid, ctx);
}
/*
@@ -2016,7 +1985,7 @@ get_xid_status(TransactionId xid, HeapCheckContext *ctx,
}
/* Check if the xid is within bounds */
- fxid = FullTransactionIdFromXidAndCtx(xid, ctx);
+ fxid = FullTransactionIdFromXid(xid);
if (!fxid_in_cached_range(fxid, ctx))
{
/*
@@ -2025,7 +1994,6 @@ get_xid_status(TransactionId xid, HeapCheckContext *ctx,
* performed the full xid conversion, reconvert.
*/
update_cached_xid_range(ctx);
- fxid = FullTransactionIdFromXidAndCtx(xid, ctx);
}
if (FullTransactionIdPrecedesOrEquals(ctx->next_fxid, fxid))
@@ -2049,8 +2017,7 @@ get_xid_status(TransactionId xid, HeapCheckContext *ctx,
*status = XID_COMMITTED;
LWLockAcquire(XactTruncationLock, LW_SHARED);
clog_horizon =
- FullTransactionIdFromXidAndCtx(ShmemVariableCache->oldestClogXid,
- ctx);
+ FullTransactionIdFromXid(ShmemVariableCache->oldestClogXid);
if (FullTransactionIdPrecedesOrEquals(clog_horizon, fxid))
{
if (TransactionIdIsCurrentTransactionId(xid))
diff --git a/contrib/amcheck/verify_nbtree.c b/contrib/amcheck/verify_nbtree.c
index bcff849aa9..860fc10cfb 100644
--- a/contrib/amcheck/verify_nbtree.c
+++ b/contrib/amcheck/verify_nbtree.c
@@ -571,7 +571,7 @@ bt_check_every_level(Relation rel, Relation heaprel, bool heapkeyspace,
* avoid this.
*/
if (IsolationUsesXactSnapshot() && rel->rd_index->indcheckxmin &&
- !TransactionIdPrecedes(HeapTupleHeaderGetXmin(rel->rd_indextuple->t_data),
+ !TransactionIdPrecedes(HeapTupleGetXmin(rel->rd_indextuple),
snapshot->xmin))
ereport(ERROR,
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
diff --git a/contrib/hstore/hstore_io.c b/contrib/hstore/hstore_io.c
index 999ddad76d..f7e2fa847e 100644
--- a/contrib/hstore/hstore_io.c
+++ b/contrib/hstore/hstore_io.c
@@ -914,6 +914,7 @@ hstore_from_record(PG_FUNCTION_ARGS)
ItemPointerSetInvalid(&(tuple.t_self));
tuple.t_tableOid = InvalidOid;
tuple.t_data = rec;
+ HeapTupleSetZeroXids(&tuple);
values = (Datum *) palloc(ncolumns * sizeof(Datum));
nulls = (bool *) palloc(ncolumns * sizeof(bool));
@@ -1067,6 +1068,7 @@ hstore_populate_record(PG_FUNCTION_ARGS)
ItemPointerSetInvalid(&(tuple.t_self));
tuple.t_tableOid = InvalidOid;
tuple.t_data = rec;
+ HeapTupleSetZeroXids(&tuple);
}
/*
diff --git a/contrib/pageinspect/Makefile b/contrib/pageinspect/Makefile
index 95e030b396..446721018e 100644
--- a/contrib/pageinspect/Makefile
+++ b/contrib/pageinspect/Makefile
@@ -13,7 +13,8 @@ OBJS = \
rawpage.o
EXTENSION = pageinspect
-DATA = pageinspect--1.11--1.12.sql pageinspect--1.10--1.11.sql \
+DATA = pageinspect--1.12--1.13.sql \
+ pageinspect--1.11--1.12.sql pageinspect--1.10--1.11.sql \
pageinspect--1.9--1.10.sql pageinspect--1.8--1.9.sql \
pageinspect--1.7--1.8.sql pageinspect--1.6--1.7.sql \
pageinspect--1.5.sql pageinspect--1.5--1.6.sql \
diff --git a/contrib/pageinspect/btreefuncs.c b/contrib/pageinspect/btreefuncs.c
index afa1947fad..a810be4530 100644
--- a/contrib/pageinspect/btreefuncs.c
+++ b/contrib/pageinspect/btreefuncs.c
@@ -122,6 +122,9 @@ GetBTPageStatistics(BlockNumber blkno, Buffer buffer, BTPageStat *stat)
stat->page_size = PageGetPageSize(page);
+ stat->btpo_prev = opaque->btpo_prev;
+ stat->btpo_level = opaque->btpo_level;
+
/* page type (flags) */
if (P_ISDELETED(opaque))
{
@@ -143,11 +146,18 @@ GetBTPageStatistics(BlockNumber blkno, Buffer buffer, BTPageStat *stat)
FullTransactionId safexid = BTPageGetDeleteXid(page);
elog(DEBUG2, "deleted page from block %u has safexid %llu",
- blkno, (unsigned long long) U64FromFullTransactionId(safexid));
+ blkno, (unsigned long long) XidFromFullTransactionId(safexid));
}
else
- elog(DEBUG2, "deleted page from block %u has safexid %llu",
- blkno, (unsigned long long) opaque->btpo_level);
+ {
+ ShortTransactionId safexid = BTP_GET_XACT(opaque);
+
+ stat->btpo_prev = 0;
+ stat->btpo_level = 0;
+
+ elog(DEBUG2, "deleted page from block %u has safexid %u",
+ blkno, safexid);
+ }
/* Don't interpret BTDeletedPageData as index tuples */
maxoff = InvalidOffsetNumber;
@@ -162,9 +172,7 @@ GetBTPageStatistics(BlockNumber blkno, Buffer buffer, BTPageStat *stat)
stat->type = 'i';
/* btpage opaque data */
- stat->btpo_prev = opaque->btpo_prev;
stat->btpo_next = opaque->btpo_next;
- stat->btpo_level = opaque->btpo_level;
stat->btpo_flags = opaque->btpo_flags;
stat->btpo_cycleid = opaque->btpo_cycleid;
diff --git a/contrib/pageinspect/expected/btree.out b/contrib/pageinspect/expected/btree.out
index 0aa5d73322..f5b05dbc06 100644
--- a/contrib/pageinspect/expected/btree.out
+++ b/contrib/pageinspect/expected/btree.out
@@ -207,8 +207,8 @@ SELECT bt_page_items('aaa'::bytea);
ERROR: invalid page size
-- invalid special area size
CREATE INDEX test1_a_brin ON test1 USING brin(a);
-SELECT bt_page_items(get_raw_page('test1', 0));
-ERROR: input page is not a valid btree page
+-- XXX: false positive in 64xids due to equal sizes of BTPageOpaque and HeapPageSpecialData
+-- SELECT bt_page_items(get_raw_page('test1', 0));
SELECT bt_page_items(get_raw_page('test1_a_brin', 0));
ERROR: input page is not a valid btree page
\set VERBOSITY default
diff --git a/contrib/pageinspect/expected/hash_1.out b/contrib/pageinspect/expected/hash_1.out
new file mode 100644
index 0000000000..5e64eb9260
--- /dev/null
+++ b/contrib/pageinspect/expected/hash_1.out
@@ -0,0 +1,166 @@
+CREATE TABLE test_hash (a int, b text);
+INSERT INTO test_hash VALUES (1, 'one');
+CREATE INDEX test_hash_a_idx ON test_hash USING hash (a);
+\x
+SELECT hash_page_type(get_raw_page('test_hash_a_idx', 0));
+-[ RECORD 1 ]--+---------
+hash_page_type | metapage
+
+SELECT hash_page_type(get_raw_page('test_hash_a_idx', 1));
+-[ RECORD 1 ]--+-------
+hash_page_type | bucket
+
+SELECT hash_page_type(get_raw_page('test_hash_a_idx', 2));
+-[ RECORD 1 ]--+-------
+hash_page_type | bucket
+
+SELECT hash_page_type(get_raw_page('test_hash_a_idx', 3));
+-[ RECORD 1 ]--+-------
+hash_page_type | bucket
+
+SELECT hash_page_type(get_raw_page('test_hash_a_idx', 4));
+-[ RECORD 1 ]--+-------
+hash_page_type | bucket
+
+SELECT hash_page_type(get_raw_page('test_hash_a_idx', 5));
+-[ RECORD 1 ]--+-------
+hash_page_type | bitmap
+
+SELECT hash_page_type(get_raw_page('test_hash_a_idx', 6));
+ERROR: block number 6 is out of range for relation "test_hash_a_idx"
+SELECT * FROM hash_bitmap_info('test_hash_a_idx', -1);
+ERROR: invalid block number
+SELECT * FROM hash_bitmap_info('test_hash_a_idx', 0);
+ERROR: invalid overflow block number 0
+SELECT * FROM hash_bitmap_info('test_hash_a_idx', 1);
+ERROR: invalid overflow block number 1
+SELECT * FROM hash_bitmap_info('test_hash_a_idx', 2);
+ERROR: invalid overflow block number 2
+SELECT * FROM hash_bitmap_info('test_hash_a_idx', 3);
+ERROR: invalid overflow block number 3
+SELECT * FROM hash_bitmap_info('test_hash_a_idx', 4);
+ERROR: invalid overflow block number 4
+SELECT * FROM hash_bitmap_info('test_hash_a_idx', 5);
+ERROR: invalid overflow block number 5
+SELECT * FROM hash_bitmap_info('test_hash_a_idx', 6);
+ERROR: block number 6 is out of range for relation "test_hash_a_idx"
+SELECT magic, version, ntuples, bsize, bmsize, bmshift, maxbucket, highmask,
+lowmask, ovflpoint, firstfree, nmaps, procid, spares, mapp FROM
+hash_metapage_info(get_raw_page('test_hash_a_idx', 0));
+-[ RECORD 1 ]--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+magic | 105121344
+version | 4
+ntuples | 1
+bsize | 8156
+bmsize | 4096
+bmshift | 15
+maxbucket | 3
+highmask | 7
+lowmask | 3
+ovflpoint | 2
+firstfree | 0
+nmaps | 1
+procid | 450
+spares | {0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}
+mapp | {5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}
+
+SELECT magic, version, ntuples, bsize, bmsize, bmshift, maxbucket, highmask,
+lowmask, ovflpoint, firstfree, nmaps, procid, spares, mapp FROM
+hash_metapage_info(get_raw_page('test_hash_a_idx', 1));
+ERROR: page is not a hash meta page
+SELECT magic, version, ntuples, bsize, bmsize, bmshift, maxbucket, highmask,
+lowmask, ovflpoint, firstfree, nmaps, procid, spares, mapp FROM
+hash_metapage_info(get_raw_page('test_hash_a_idx', 2));
+ERROR: page is not a hash meta page
+SELECT magic, version, ntuples, bsize, bmsize, bmshift, maxbucket, highmask,
+lowmask, ovflpoint, firstfree, nmaps, procid, spares, mapp FROM
+hash_metapage_info(get_raw_page('test_hash_a_idx', 3));
+ERROR: page is not a hash meta page
+SELECT magic, version, ntuples, bsize, bmsize, bmshift, maxbucket, highmask,
+lowmask, ovflpoint, firstfree, nmaps, procid, spares, mapp FROM
+hash_metapage_info(get_raw_page('test_hash_a_idx', 4));
+ERROR: page is not a hash meta page
+SELECT magic, version, ntuples, bsize, bmsize, bmshift, maxbucket, highmask,
+lowmask, ovflpoint, firstfree, nmaps, procid, spares, mapp FROM
+hash_metapage_info(get_raw_page('test_hash_a_idx', 5));
+ERROR: page is not a hash meta page
+SELECT live_items, dead_items, page_size, hasho_prevblkno, hasho_nextblkno,
+hasho_bucket, hasho_flag, hasho_page_id FROM
+hash_page_stats(get_raw_page('test_hash_a_idx', 0));
+ERROR: page is not a hash bucket or overflow page
+SELECT live_items, dead_items, page_size, hasho_prevblkno, hasho_nextblkno,
+hasho_bucket, hasho_flag, hasho_page_id FROM
+hash_page_stats(get_raw_page('test_hash_a_idx', 1));
+-[ RECORD 1 ]---+-----------
+live_items | 0
+dead_items | 0
+page_size | 8192
+hasho_prevblkno | 3
+hasho_nextblkno | 4294967295
+hasho_bucket | 0
+hasho_flag | 2
+hasho_page_id | 65408
+
+SELECT live_items, dead_items, page_size, hasho_prevblkno, hasho_nextblkno,
+hasho_bucket, hasho_flag, hasho_page_id FROM
+hash_page_stats(get_raw_page('test_hash_a_idx', 2));
+-[ RECORD 1 ]---+-----------
+live_items | 0
+dead_items | 0
+page_size | 8192
+hasho_prevblkno | 3
+hasho_nextblkno | 4294967295
+hasho_bucket | 1
+hasho_flag | 2
+hasho_page_id | 65408
+
+SELECT live_items, dead_items, page_size, hasho_prevblkno, hasho_nextblkno,
+hasho_bucket, hasho_flag, hasho_page_id FROM
+hash_page_stats(get_raw_page('test_hash_a_idx', 3));
+-[ RECORD 1 ]---+-----------
+live_items | 1
+dead_items | 0
+page_size | 8192
+hasho_prevblkno | 3
+hasho_nextblkno | 4294967295
+hasho_bucket | 2
+hasho_flag | 2
+hasho_page_id | 65408
+
+SELECT live_items, dead_items, page_size, hasho_prevblkno, hasho_nextblkno,
+hasho_bucket, hasho_flag, hasho_page_id FROM
+hash_page_stats(get_raw_page('test_hash_a_idx', 4));
+-[ RECORD 1 ]---+-----------
+live_items | 0
+dead_items | 0
+page_size | 8192
+hasho_prevblkno | 3
+hasho_nextblkno | 4294967295
+hasho_bucket | 3
+hasho_flag | 2
+hasho_page_id | 65408
+
+SELECT live_items, dead_items, page_size, hasho_prevblkno, hasho_nextblkno,
+hasho_bucket, hasho_flag, hasho_page_id FROM
+hash_page_stats(get_raw_page('test_hash_a_idx', 5));
+ERROR: page is not a hash bucket or overflow page
+SELECT * FROM hash_page_items(get_raw_page('test_hash_a_idx', 0));
+ERROR: page is not a hash bucket or overflow page
+SELECT * FROM hash_page_items(get_raw_page('test_hash_a_idx', 1));
+(0 rows)
+
+SELECT * FROM hash_page_items(get_raw_page('test_hash_a_idx', 2));
+(0 rows)
+
+SELECT * FROM hash_page_items(get_raw_page('test_hash_a_idx', 3));
+-[ RECORD 1 ]----------
+itemoffset | 1
+ctid | (0,1)
+data | 2389907270
+
+SELECT * FROM hash_page_items(get_raw_page('test_hash_a_idx', 4));
+(0 rows)
+
+SELECT * FROM hash_page_items(get_raw_page('test_hash_a_idx', 5));
+ERROR: page is not a hash bucket or overflow page
+DROP TABLE test_hash;
diff --git a/contrib/pageinspect/expected/oldextversions.out b/contrib/pageinspect/expected/oldextversions.out
index f5c4b61bd7..00323d392d 100644
--- a/contrib/pageinspect/expected/oldextversions.out
+++ b/contrib/pageinspect/expected/oldextversions.out
@@ -40,16 +40,16 @@ SELECT * FROM bt_page_items('test1_a_idx', 1);
-- pagesize in pageinspect >= 1.10.
ALTER EXTENSION pageinspect UPDATE TO '1.9';
\df page_header
- List of functions
- Schema | Name | Result data type | Argument data types | Type
---------+-------------+------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------
- public | page_header | record | page bytea, OUT lsn pg_lsn, OUT checksum smallint, OUT flags smallint, OUT lower smallint, OUT upper smallint, OUT special smallint, OUT pagesize smallint, OUT version smallint, OUT prune_xid xid | func
+ List of functions
+ Schema | Name | Result data type | Argument data types | Type
+--------+-------------+------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------
+ public | page_header | record | page bytea, OUT lsn pg_lsn, OUT checksum smallint, OUT flags smallint, OUT lower smallint, OUT upper smallint, OUT special smallint, OUT pagesize smallint, OUT version smallint, OUT xid_base xid, OUT multi_base xid, OUT prune_xid xid | func
(1 row)
SELECT pagesize, version FROM page_header(get_raw_page('test1', 0));
pagesize | version
----------+---------
- 8192 | 4
+ 8192 | 5
(1 row)
DROP TABLE test1;
diff --git a/contrib/pageinspect/expected/page.out b/contrib/pageinspect/expected/page.out
index 80ddb45a60..631b6c7be4 100644
--- a/contrib/pageinspect/expected/page.out
+++ b/contrib/pageinspect/expected/page.out
@@ -49,7 +49,7 @@ SELECT get_raw_page('test1', 0) = get_raw_page('test1', 'main', 0);
SELECT pagesize, version FROM page_header(get_raw_page('test1', 0));
pagesize | version
----------+---------
- 8192 | 4
+ 8192 | 5
(1 row)
SELECT page_checksum(get_raw_page('test1', 0), 0) IS NOT NULL AS silly_checksum_test;
@@ -70,19 +70,19 @@ SELECT tuple_data_split('test1'::regclass, t_data, t_infomask, t_infomask2, t_bi
SELECT * FROM fsm_page_contents(get_raw_page('test1', 'fsm', 0));
fsm_page_contents
-------------------
- 0: 254 +
- 1: 254 +
- 3: 254 +
- 7: 254 +
- 15: 254 +
- 31: 254 +
- 63: 254 +
- 127: 254 +
- 255: 254 +
- 511: 254 +
- 1023: 254 +
- 2047: 254 +
- 4095: 254 +
+ 0: 253 +
+ 1: 253 +
+ 3: 253 +
+ 7: 253 +
+ 15: 253 +
+ 31: 253 +
+ 63: 253 +
+ 127: 253 +
+ 255: 253 +
+ 511: 253 +
+ 1023: 253 +
+ 2047: 253 +
+ 4095: 253 +
fp_next_slot: 0 +
(1 row)
diff --git a/contrib/pageinspect/heapfuncs.c b/contrib/pageinspect/heapfuncs.c
index 0f0252558c..5687f469a8 100644
--- a/contrib/pageinspect/heapfuncs.c
+++ b/contrib/pageinspect/heapfuncs.c
@@ -163,7 +163,7 @@ heap_page_items(PG_FUNCTION_ARGS)
inter_call_data->tupd = tupdesc;
inter_call_data->offset = FirstOffsetNumber;
- inter_call_data->page = VARDATA(raw_page);
+ inter_call_data->page = get_page_from_raw(raw_page);
fctx->max_calls = PageGetMaxOffsetNumber(inter_call_data->page);
fctx->user_fctx = inter_call_data;
@@ -211,6 +211,7 @@ heap_page_items(PG_FUNCTION_ARGS)
lp_offset == MAXALIGN(lp_offset) &&
lp_offset + lp_len <= raw_page_size)
{
+ HeapTupleData tup;
HeapTupleHeader tuphdr;
bytea *tuple_data_bytea;
int tuple_data_len;
@@ -218,9 +219,11 @@ heap_page_items(PG_FUNCTION_ARGS)
/* Extract information from the tuple header */
tuphdr = (HeapTupleHeader) PageGetItem(page, id);
+ tup.t_data = tuphdr;
+ HeapTupleCopyXidsFromPage(InvalidBuffer, &tup, page, false);
- values[4] = UInt32GetDatum(HeapTupleHeaderGetRawXmin(tuphdr));
- values[5] = UInt32GetDatum(HeapTupleHeaderGetRawXmax(tuphdr));
+ values[4] = TransactionIdGetDatum(HeapTupleGetXmin(&tup));
+ values[5] = TransactionIdGetDatum(HeapTupleGetRawXmax(&tup));
/* shared with xvac */
values[6] = UInt32GetDatum(HeapTupleHeaderGetRawCommandId(tuphdr));
values[7] = PointerGetDatum(&tuphdr->t_ctid);
diff --git a/contrib/pageinspect/pageinspect--1.12--1.13.sql b/contrib/pageinspect/pageinspect--1.12--1.13.sql
new file mode 100644
index 0000000000..a2e0232a10
--- /dev/null
+++ b/contrib/pageinspect/pageinspect--1.12--1.13.sql
@@ -0,0 +1,145 @@
+/* contrib/pageinspect/pageinspect--1.12--1.13.sql */
+
+-- complain if script is sourced in psql, rather than via ALTER EXTENSION
+\echo Use "ALTER EXTENSION pageinspect UPDATE TO '1.13'" to load this file. \quit
+
+--
+-- gist_page_opaque_info()
+--
+DROP FUNCTION gist_page_opaque_info(bytea);
+CREATE FUNCTION gist_page_opaque_info(IN page bytea,
+ OUT lsn pg_lsn,
+ OUT nsn pg_lsn,
+ OUT rightlink bigint,
+ OUT flags text[])
+AS 'MODULE_PATHNAME', 'gist_page_opaque_info'
+LANGUAGE C STRICT PARALLEL SAFE;
+
+
+--
+-- gist_page_items_bytea()
+--
+DROP FUNCTION gist_page_items_bytea(bytea);
+CREATE FUNCTION gist_page_items_bytea(IN page bytea,
+ OUT itemoffset smallint,
+ OUT ctid tid,
+ OUT itemlen smallint,
+ OUT dead boolean,
+ OUT key_data bytea)
+RETURNS SETOF record
+AS 'MODULE_PATHNAME', 'gist_page_items_bytea'
+LANGUAGE C STRICT PARALLEL SAFE;
+
+--
+-- gist_page_items()
+--
+DROP FUNCTION gist_page_items(bytea, regclass);
+CREATE FUNCTION gist_page_items(IN page bytea,
+ IN index_oid regclass,
+ OUT itemoffset smallint,
+ OUT ctid tid,
+ OUT itemlen smallint,
+ OUT dead boolean,
+ OUT keys text)
+RETURNS SETOF record
+AS 'MODULE_PATHNAME', 'gist_page_items'
+LANGUAGE C STRICT PARALLEL SAFE;
+
+--
+-- get_raw_page()
+--
+DROP FUNCTION get_raw_page(text, int8);
+DROP FUNCTION IF EXISTS get_raw_page(text, int4);
+CREATE FUNCTION get_raw_page(text, int8)
+RETURNS bytea
+AS 'MODULE_PATHNAME', 'get_raw_page_1_9'
+LANGUAGE C STRICT PARALLEL SAFE;
+
+DROP FUNCTION get_raw_page(text, text, int8);
+DROP FUNCTION IF EXISTS get_raw_page(text, text, int4);
+CREATE FUNCTION get_raw_page(text, text, int8)
+RETURNS bytea
+AS 'MODULE_PATHNAME', 'get_raw_page_fork_1_9'
+LANGUAGE C STRICT PARALLEL SAFE;
+
+--
+-- page_checksum()
+--
+DROP FUNCTION page_checksum(IN page bytea, IN blkno int8);
+DROP FUNCTION IF EXISTS page_checksum(IN page bytea, IN blkno int4);
+CREATE FUNCTION page_checksum(IN page bytea, IN blkno int8)
+RETURNS smallint
+AS 'MODULE_PATHNAME', 'page_checksum_1_9'
+LANGUAGE C STRICT PARALLEL SAFE;
+
+--
+-- bt_metap()
+--
+DROP FUNCTION bt_metap(text);
+CREATE FUNCTION bt_metap(IN relname text,
+ OUT magic int4,
+ OUT version int4,
+ OUT root int8,
+ OUT level int8,
+ OUT fastroot int8,
+ OUT fastlevel int8,
+ OUT last_cleanup_num_delpages int8,
+ OUT last_cleanup_num_tuples float8,
+ OUT allequalimage boolean)
+AS 'MODULE_PATHNAME', 'bt_metap'
+LANGUAGE C STRICT PARALLEL SAFE;
+
+--
+-- bt_page_stats()
+--
+DROP FUNCTION bt_page_stats(text, int8);
+DROP FUNCTION IF EXISTS bt_page_stats(text, int4);
+CREATE FUNCTION bt_page_stats(IN relname text, IN blkno int8,
+ OUT blkno int8,
+ OUT type "char",
+ OUT live_items int4,
+ OUT dead_items int4,
+ OUT avg_item_size int4,
+ OUT page_size int4,
+ OUT free_size int4,
+ OUT btpo_prev int8,
+ OUT btpo_next int8,
+ OUT btpo_level int8,
+ OUT btpo_flags int4)
+AS 'MODULE_PATHNAME', 'bt_page_stats_1_9'
+LANGUAGE C STRICT PARALLEL SAFE;
+
+--
+-- bt_page_items()
+--
+DROP FUNCTION bt_page_items(text, int8);
+DROP FUNCTION IF EXISTS bt_page_items(text, int4);
+CREATE FUNCTION bt_page_items(IN relname text, IN blkno int8,
+ OUT itemoffset smallint,
+ OUT ctid tid,
+ OUT itemlen smallint,
+ OUT nulls bool,
+ OUT vars bool,
+ OUT data text,
+ OUT dead boolean,
+ OUT htid tid,
+ OUT tids tid[])
+RETURNS SETOF record
+AS 'MODULE_PATHNAME', 'bt_page_items_1_9'
+LANGUAGE C STRICT PARALLEL SAFE;
+
+--
+-- brin_page_items()
+--
+DROP FUNCTION brin_page_items(IN page bytea, IN index_oid regclass);
+CREATE FUNCTION brin_page_items(IN page bytea, IN index_oid regclass,
+ OUT itemoffset int,
+ OUT blknum int8,
+ OUT attnum int,
+ OUT allnulls bool,
+ OUT hasnulls bool,
+ OUT placeholder bool,
+ OUT value text)
+RETURNS SETOF record
+AS 'MODULE_PATHNAME', 'brin_page_items'
+LANGUAGE C STRICT PARALLEL SAFE;
diff --git a/contrib/pageinspect/pageinspect--1.5.sql b/contrib/pageinspect/pageinspect--1.5.sql
index 1e40c3c97e..fdbd2995a2 100644
--- a/contrib/pageinspect/pageinspect--1.5.sql
+++ b/contrib/pageinspect/pageinspect--1.5.sql
@@ -28,6 +28,8 @@ CREATE FUNCTION page_header(IN page bytea,
OUT special smallint,
OUT pagesize smallint,
OUT version smallint,
+ OUT xid_base xid,
+ OUT multi_base xid,
OUT prune_xid xid)
AS 'MODULE_PATHNAME', 'page_header'
LANGUAGE C STRICT PARALLEL SAFE;
diff --git a/contrib/pageinspect/rawpage.c b/contrib/pageinspect/rawpage.c
index b25a63cbd6..5e6b8c6cfa 100644
--- a/contrib/pageinspect/rawpage.c
+++ b/contrib/pageinspect/rawpage.c
@@ -17,6 +17,7 @@
#include "access/htup_details.h"
#include "access/relation.h"
+#include "commands/sequence.h"
#include "catalog/namespace.h"
#include "catalog/pg_type.h"
#include "funcapi.h"
@@ -251,8 +252,9 @@ page_header(PG_FUNCTION_ARGS)
Datum result;
HeapTuple tuple;
- Datum values[9];
- bool nulls[9];
+ Datum values[11];
+ bool nulls[11];
+ bool is_toast;
Page page;
PageHeader pageheader;
@@ -314,12 +316,37 @@ page_header(PG_FUNCTION_ARGS)
}
values[7] = UInt16GetDatum(PageGetPageLayoutVersion(page));
- values[8] = TransactionIdGetDatum(pageheader->pd_prune_xid);
+ is_toast = PageGetSpecialSize(page) ==
+ MAXALIGN(sizeof(ToastPageSpecialData));
+ values[8] = TransactionIdGetDatum(HeapPageGetPruneXidNoAssert((Page) page,
+ is_toast));
/* Build and return the tuple. */
-
memset(nulls, 0, sizeof(nulls));
+ if (PageGetSpecialSize(page) == MAXALIGN(sizeof(HeapPageSpecialData)))
+ {
+ /* Heap page */
+ HeapPageSpecial pageSpecial = HeapPageGetSpecial((Page) page);
+
+ values[9] = TransactionIdGetDatum(pageSpecial->pd_xid_base);
+ values[10] = TransactionIdGetDatum(pageSpecial->pd_multi_base);
+ }
+ else if (PageGetSpecialSize(page) == MAXALIGN(sizeof(ToastPageSpecialData)))
+ {
+ /* TOAST page */
+ ToastPageSpecial pageSpecial = ToastPageGetSpecial((Page) page);
+
+ values[9] = TransactionIdGetDatum(pageSpecial->pd_xid_base);
+ nulls[10] = true;
+ }
+ else
+ {
+ /* Double xmax page */
+ nulls[9] = true;
+ nulls[10] = true;
+ }
+
tuple = heap_form_tuple(tupdesc, values, nulls);
result = HeapTupleGetDatum(tuple);
diff --git a/contrib/pageinspect/sql/btree.sql b/contrib/pageinspect/sql/btree.sql
index 102ebdefe3..87f202fb9f 100644
--- a/contrib/pageinspect/sql/btree.sql
+++ b/contrib/pageinspect/sql/btree.sql
@@ -51,7 +51,8 @@ SELECT bt_page_items(get_raw_page('test1_b_gist', 0));
SELECT bt_page_items('aaa'::bytea);
-- invalid special area size
CREATE INDEX test1_a_brin ON test1 USING brin(a);
-SELECT bt_page_items(get_raw_page('test1', 0));
+-- XXX: false positive in 64xids due to equal sizes of BTPageOpaque and HeapPageSpecialData
+-- SELECT bt_page_items(get_raw_page('test1', 0));
SELECT bt_page_items(get_raw_page('test1_a_brin', 0));
\set VERBOSITY default
diff --git a/contrib/pg_surgery/heap_surgery.c b/contrib/pg_surgery/heap_surgery.c
index 4308d1933b..bb57e5dd9c 100644
--- a/contrib/pg_surgery/heap_surgery.c
+++ b/contrib/pg_surgery/heap_surgery.c
@@ -15,6 +15,7 @@
#include "access/heapam.h"
#include "access/visibilitymap.h"
#include "access/xloginsert.h"
+#include "catalog/catalog.h"
#include "catalog/pg_am_d.h"
#include "catalog/pg_proc_d.h"
#include "miscadmin.h"
@@ -272,11 +273,20 @@ heap_force_common(FunctionCallInfo fcinfo, HeapTupleForceOption heap_force_opt)
else
{
HeapTupleHeader htup;
+ HeapTupleData tuple;
+ bool is_toast;
Assert(heap_force_opt == HEAP_FORCE_FREEZE);
+ is_toast = IsToastRelation(rel);
+
htup = (HeapTupleHeader) PageGetItem(page, itemid);
+ tuple.t_data = htup;
+ tuple.t_len = ItemIdGetLength(itemid);
+ tuple.t_tableOid = RelationGetRelid(rel);
+ HeapTupleCopyXidsFromPage(buf, &tuple, page, is_toast);
+
/*
* Reset all visibility-related fields of the tuple. This
* logic should mimic heap_execute_freeze_tuple(), but we
@@ -284,8 +294,11 @@ heap_force_common(FunctionCallInfo fcinfo, HeapTupleForceOption heap_force_opt)
* potentially-garbled data is left behind.
*/
ItemPointerSet(&htup->t_ctid, blkno, curoff);
- HeapTupleHeaderSetXmin(htup, FrozenTransactionId);
- HeapTupleHeaderSetXmax(htup, InvalidTransactionId);
+ HeapTupleAndHeaderSetXmin(page, &tuple, FrozenTransactionId,
+ is_toast);
+ HeapTupleAndHeaderSetXmax(page, &tuple, InvalidTransactionId,
+ is_toast);
+
if (htup->t_infomask & HEAP_MOVED)
{
if (htup->t_infomask & HEAP_MOVED_OFF)
diff --git a/contrib/pg_visibility/pg_visibility.c b/contrib/pg_visibility/pg_visibility.c
index 2a4acfd1ee..f6d574a5c1 100644
--- a/contrib/pg_visibility/pg_visibility.c
+++ b/contrib/pg_visibility/pg_visibility.c
@@ -14,6 +14,7 @@
#include "access/htup_details.h"
#include "access/visibilitymap.h"
#include "access/xloginsert.h"
+#include "catalog/catalog.h"
#include "catalog/pg_type.h"
#include "catalog/storage_xlog.h"
#include "funcapi.h"
@@ -650,6 +651,8 @@ collect_corrupt_items(Oid relid, bool all_visible, bool all_frozen)
tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
tuple.t_len = ItemIdGetLength(itemid);
tuple.t_tableOid = relid;
+ HeapTupleCopyXidsFromPage(buffer, &tuple, page,
+ IsToastRelation(rel));
/*
* If we're checking whether the page is all-visible, we expect
@@ -693,7 +696,7 @@ collect_corrupt_items(Oid relid, bool all_visible, bool all_frozen)
*/
if (check_frozen)
{
- if (heap_tuple_needs_eventual_freeze(tuple.t_data))
+ if (heap_tuple_needs_eventual_freeze(&tuple))
record_corrupt_item(items, &tuple.t_self);
}
}
@@ -756,7 +759,7 @@ tuple_all_visible(HeapTuple tup, TransactionId OldestXmin, Buffer buffer)
* be set here. So just check the xmin.
*/
- xmin = HeapTupleHeaderGetXmin(tup->t_data);
+ xmin = HeapTupleGetXmin(tup);
if (!TransactionIdPrecedes(xmin, OldestXmin))
return false; /* xmin not old enough for all to see */
diff --git a/contrib/pgrowlocks/pgrowlocks.c b/contrib/pgrowlocks/pgrowlocks.c
index dea76d8dcb..5616ea64f5 100644
--- a/contrib/pgrowlocks/pgrowlocks.c
+++ b/contrib/pgrowlocks/pgrowlocks.c
@@ -130,7 +130,7 @@ pgrowlocks(PG_FUNCTION_ARGS)
htsu = HeapTupleSatisfiesUpdate(tuple,
GetCurrentCommandId(false),
hscan->rs_cbuf);
- xmax = HeapTupleHeaderGetRawXmax(tuple->t_data);
+ xmax = HeapTupleGetRawXmax(tuple);
infomask = tuple->t_data->t_infomask;
/*
diff --git a/contrib/pgstattuple/pgstatapprox.c b/contrib/pgstattuple/pgstatapprox.c
index f601dc6121..40a45727c1 100644
--- a/contrib/pgstattuple/pgstatapprox.c
+++ b/contrib/pgstattuple/pgstatapprox.c
@@ -19,6 +19,7 @@
#include "access/transam.h"
#include "access/visibilitymap.h"
#include "access/xact.h"
+#include "catalog/catalog.h"
#include "catalog/namespace.h"
#include "catalog/pg_am_d.h"
#include "commands/vacuum.h"
@@ -153,6 +154,7 @@ statapprox_heap(Relation rel, output_type *stat)
tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
tuple.t_len = ItemIdGetLength(itemid);
tuple.t_tableOid = RelationGetRelid(rel);
+ HeapTupleCopyXidsFromPage(buf, &tuple, page, IsToastRelation(rel));
/*
* We follow VACUUM's lead in counting INSERT_IN_PROGRESS tuples
diff --git a/contrib/pgstattuple/pgstatindex.c b/contrib/pgstattuple/pgstatindex.c
index 8e5a4d6a66..55ef89d8e2 100644
--- a/contrib/pgstattuple/pgstatindex.c
+++ b/contrib/pgstattuple/pgstatindex.c
@@ -631,7 +631,7 @@ pgstathashindex(PG_FUNCTION_ARGS)
metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ, LH_META_PAGE);
metap = HashPageGetMeta(BufferGetPage(metabuf));
stats.version = metap->hashm_version;
- stats.space_per_page = metap->hashm_bsize;
+ stats.space_per_page = BLCKSZ - SizeOfPageHeaderData - MAXALIGN(sizeof(HashPageOpaqueData));
_hash_relbuf(rel, metabuf);
/* Get the current relation length */
diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out
index c988745b92..152f3295c8 100644
--- a/contrib/postgres_fdw/expected/postgres_fdw.out
+++ b/contrib/postgres_fdw/expected/postgres_fdw.out
@@ -4915,16 +4915,24 @@ UPDATE ft2 SET c2 = c2 + 300, c3 = c3 || '_update3' WHERE c1 % 10 = 3;
UPDATE ft2 SET c2 = c2 + 300, c3 = c3 || '_update3' WHERE c1 % 10 = 3;
EXPLAIN (verbose, costs off)
-UPDATE ft2 SET c2 = c2 + 400, c3 = c3 || '_update7' WHERE c1 % 10 = 7 RETURNING *; -- can be pushed down
- QUERY PLAN
-------------------------------------------------------------------------------------------------------------------------------------------------------------
- Update on public.ft2
- Output: c1, c2, c3, c4, c5, c6, c7, c8
- -> Foreign Update on public.ft2
- Remote SQL: UPDATE "S 1"."T 1" SET c2 = (c2 + 400), c3 = (c3 || '_update7') WHERE ((("C 1" % 10) = 7)) RETURNING "C 1", c2, c3, c4, c5, c6, c7, c8
-(4 rows)
+WITH t AS (UPDATE ft2 SET c2 = c2 + 400, c3 = c3 || '_update7' WHERE c1 % 10 = 7 RETURNING *)
+SELECT * FROM t ORDER BY c1; -- can be pushed down
+ QUERY PLAN
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ Sort
+ Output: t.c1, t.c2, t.c3, t.c4, t.c5, t.c6, t.c7, t.c8
+ Sort Key: t.c1
+ CTE t
+ -> Update on public.ft2
+ Output: ft2.c1, ft2.c2, ft2.c3, ft2.c4, ft2.c5, ft2.c6, ft2.c7, ft2.c8
+ -> Foreign Update on public.ft2
+ Remote SQL: UPDATE "S 1"."T 1" SET c2 = (c2 + 400), c3 = (c3 || '_update7') WHERE ((("C 1" % 10) = 7)) RETURNING "C 1", c2, c3, c4, c5, c6, c7, c8
+ -> CTE Scan on t
+ Output: t.c1, t.c2, t.c3, t.c4, t.c5, t.c6, t.c7, t.c8
+(10 rows)
-UPDATE ft2 SET c2 = c2 + 400, c3 = c3 || '_update7' WHERE c1 % 10 = 7 RETURNING *;
+WITH t AS (UPDATE ft2 SET c2 = c2 + 400, c3 = c3 || '_update7' WHERE c1 % 10 = 7 RETURNING *)
+SELECT * FROM t ORDER BY c1;
c1 | c2 | c3 | c4 | c5 | c6 | c7 | c8
------+-----+--------------------+------------------------------+--------------------------+----+------------+-----
7 | 407 | 00007_update7 | Thu Jan 08 00:00:00 1970 PST | Thu Jan 08 00:00:00 1970 | 7 | 7 | foo
@@ -5044,16 +5052,24 @@ UPDATE ft2 SET c2 = ft2.c2 + 500, c3 = ft2.c3 || '_update9', c7 = DEFAULT
UPDATE ft2 SET c2 = ft2.c2 + 500, c3 = ft2.c3 || '_update9', c7 = DEFAULT
FROM ft1 WHERE ft1.c1 = ft2.c2 AND ft1.c1 % 10 = 9;
EXPLAIN (verbose, costs off)
- DELETE FROM ft2 WHERE c1 % 10 = 5 RETURNING c1, c4; -- can be pushed down
- QUERY PLAN
---------------------------------------------------------------------------------------------
- Delete on public.ft2
- Output: c1, c4
- -> Foreign Delete on public.ft2
- Remote SQL: DELETE FROM "S 1"."T 1" WHERE ((("C 1" % 10) = 5)) RETURNING "C 1", c4
-(4 rows)
+ WITH t AS (DELETE FROM ft2 WHERE c1 % 10 = 5 RETURNING c1, c4)
+ SELECT * FROM t ORDER BY c1; -- can be pushed down
+ QUERY PLAN
+----------------------------------------------------------------------------------------------------
+ Sort
+ Output: t.c1, t.c4
+ Sort Key: t.c1
+ CTE t
+ -> Delete on public.ft2
+ Output: ft2.c1, ft2.c4
+ -> Foreign Delete on public.ft2
+ Remote SQL: DELETE FROM "S 1"."T 1" WHERE ((("C 1" % 10) = 5)) RETURNING "C 1", c4
+ -> CTE Scan on t
+ Output: t.c1, t.c4
+(10 rows)
-DELETE FROM ft2 WHERE c1 % 10 = 5 RETURNING c1, c4;
+WITH t AS (DELETE FROM ft2 WHERE c1 % 10 = 5 RETURNING c1, c4)
+SELECT * FROM t ORDER BY c1;
c1 | c4
------+------------------------------
5 | Tue Jan 06 00:00:00 1970 PST
@@ -6314,7 +6330,8 @@ INSERT INTO ft2 (c1,c2,c3,c6) VALUES (1218, 818, 'ggg', '(--;') RETURNING *;
1218 | 818 | ggg_trig_update | | | (--; | ft2 |
(1 row)
-UPDATE ft2 SET c2 = c2 + 600 WHERE c1 % 10 = 8 AND c1 < 1200 RETURNING *;
+WITH t AS (UPDATE ft2 SET c2 = c2 + 600 WHERE c1 % 10 = 8 AND c1 < 1200 RETURNING *)
+SELECT * FROM t ORDER BY c1;
c1 | c2 | c3 | c4 | c5 | c6 | c7 | c8
------+-----+------------------------+------------------------------+--------------------------+----+------------+-----
8 | 608 | 00008_trig_update | Fri Jan 09 00:00:00 1970 PST | Fri Jan 09 00:00:00 1970 | 8 | 8 | foo
diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c
index e9144beb62..0ffb8b61be 100644
--- a/contrib/postgres_fdw/postgres_fdw.c
+++ b/contrib/postgres_fdw/postgres_fdw.c
@@ -4818,8 +4818,8 @@ apply_returning_filter(PgFdwDirectModifyState *dmstate,
* Note: no need to care about tableoid here because it will be
* initialized in ExecProcessReturning().
*/
- HeapTupleHeaderSetXmin(resultTup->t_data, InvalidTransactionId);
- HeapTupleHeaderSetXmax(resultTup->t_data, InvalidTransactionId);
+ HeapTupleSetXmin(resultTup, InvalidTransactionId);
+ HeapTupleSetXmax(resultTup, InvalidTransactionId);
HeapTupleHeaderSetCmin(resultTup->t_data, InvalidTransactionId);
}
@@ -7640,6 +7640,7 @@ make_tuple_from_result_row(PGresult *res,
*/
if (ctid)
tuple->t_self = tuple->t_data->t_ctid = *ctid;
+ HeapTupleSetZeroXids(tuple);
/*
* Stomp on the xmin, xmax, and cmin fields from the tuple created by
@@ -7649,8 +7650,8 @@ make_tuple_from_result_row(PGresult *res,
* assumption. If we don't do this then, for example, the tuple length
* ends up in the xmin field, which isn't what we want.
*/
- HeapTupleHeaderSetXmax(tuple->t_data, InvalidTransactionId);
- HeapTupleHeaderSetXmin(tuple->t_data, InvalidTransactionId);
+ HeapTupleSetXmax(tuple, InvalidTransactionId);
+ HeapTupleSetXmin(tuple, InvalidTransactionId);
HeapTupleHeaderSetCmin(tuple->t_data, InvalidTransactionId);
/* Clean up */
diff --git a/contrib/postgres_fdw/sql/postgres_fdw.sql b/contrib/postgres_fdw/sql/postgres_fdw.sql
index cb40540702..4b1c43637c 100644
--- a/contrib/postgres_fdw/sql/postgres_fdw.sql
+++ b/contrib/postgres_fdw/sql/postgres_fdw.sql
@@ -1442,16 +1442,20 @@ EXPLAIN (verbose, costs off)
UPDATE ft2 SET c2 = c2 + 300, c3 = c3 || '_update3' WHERE c1 % 10 = 3; -- can be pushed down
UPDATE ft2 SET c2 = c2 + 300, c3 = c3 || '_update3' WHERE c1 % 10 = 3;
EXPLAIN (verbose, costs off)
-UPDATE ft2 SET c2 = c2 + 400, c3 = c3 || '_update7' WHERE c1 % 10 = 7 RETURNING *; -- can be pushed down
-UPDATE ft2 SET c2 = c2 + 400, c3 = c3 || '_update7' WHERE c1 % 10 = 7 RETURNING *;
+WITH t AS (UPDATE ft2 SET c2 = c2 + 400, c3 = c3 || '_update7' WHERE c1 % 10 = 7 RETURNING *)
+SELECT * FROM t ORDER BY c1; -- can be pushed down
+WITH t AS (UPDATE ft2 SET c2 = c2 + 400, c3 = c3 || '_update7' WHERE c1 % 10 = 7 RETURNING *)
+SELECT * FROM t ORDER BY c1;
EXPLAIN (verbose, costs off)
UPDATE ft2 SET c2 = ft2.c2 + 500, c3 = ft2.c3 || '_update9', c7 = DEFAULT
FROM ft1 WHERE ft1.c1 = ft2.c2 AND ft1.c1 % 10 = 9; -- can be pushed down
UPDATE ft2 SET c2 = ft2.c2 + 500, c3 = ft2.c3 || '_update9', c7 = DEFAULT
FROM ft1 WHERE ft1.c1 = ft2.c2 AND ft1.c1 % 10 = 9;
EXPLAIN (verbose, costs off)
- DELETE FROM ft2 WHERE c1 % 10 = 5 RETURNING c1, c4; -- can be pushed down
-DELETE FROM ft2 WHERE c1 % 10 = 5 RETURNING c1, c4;
+ WITH t AS (DELETE FROM ft2 WHERE c1 % 10 = 5 RETURNING c1, c4)
+ SELECT * FROM t ORDER BY c1; -- can be pushed down
+WITH t AS (DELETE FROM ft2 WHERE c1 % 10 = 5 RETURNING c1, c4)
+SELECT * FROM t ORDER BY c1;
EXPLAIN (verbose, costs off)
DELETE FROM ft2 USING ft1 WHERE ft1.c1 = ft2.c2 AND ft1.c1 % 10 = 2; -- can be pushed down
DELETE FROM ft2 USING ft1 WHERE ft1.c1 = ft2.c2 AND ft1.c1 % 10 = 2;
@@ -1558,7 +1562,8 @@ CREATE TRIGGER t1_br_insert BEFORE INSERT OR UPDATE
INSERT INTO ft2 (c1,c2,c3) VALUES (1208, 818, 'fff') RETURNING *;
INSERT INTO ft2 (c1,c2,c3,c6) VALUES (1218, 818, 'ggg', '(--;') RETURNING *;
-UPDATE ft2 SET c2 = c2 + 600 WHERE c1 % 10 = 8 AND c1 < 1200 RETURNING *;
+WITH t AS (UPDATE ft2 SET c2 = c2 + 600 WHERE c1 % 10 = 8 AND c1 < 1200 RETURNING *)
+SELECT * FROM t ORDER BY c1;
-- Test errors thrown on remote side during update
ALTER TABLE "S 1"."T 1" ADD CONSTRAINT c2positive CHECK (c2 >= 0);
diff --git a/src/backend/access/common/heaptuple.c b/src/backend/access/common/heaptuple.c
index c52d40dce0..7413304ddd 100644
--- a/src/backend/access/common/heaptuple.c
+++ b/src/backend/access/common/heaptuple.c
@@ -737,10 +737,10 @@ heap_getsysattr(HeapTuple tup, int attnum, TupleDesc tupleDesc, bool *isnull)
result = PointerGetDatum(&(tup->t_self));
break;
case MinTransactionIdAttributeNumber:
- result = TransactionIdGetDatum(HeapTupleHeaderGetRawXmin(tup->t_data));
+ result = TransactionIdGetDatum(HeapTupleGetRawXmin(tup));
break;
case MaxTransactionIdAttributeNumber:
- result = TransactionIdGetDatum(HeapTupleHeaderGetRawXmax(tup->t_data));
+ result = TransactionIdGetDatum(HeapTupleGetRawXmax(tup));
break;
case MinCommandIdAttributeNumber:
case MaxCommandIdAttributeNumber:
@@ -785,6 +785,7 @@ heap_copytuple(HeapTuple tuple)
newTuple->t_len = tuple->t_len;
newTuple->t_self = tuple->t_self;
newTuple->t_tableOid = tuple->t_tableOid;
+ HeapTupleCopyXids(newTuple, tuple);
newTuple->t_data = (HeapTupleHeader) ((char *) newTuple + HEAPTUPLESIZE);
memcpy((char *) newTuple->t_data, (char *) tuple->t_data, tuple->t_len);
return newTuple;
@@ -811,6 +812,7 @@ heap_copytuple_with_tuple(HeapTuple src, HeapTuple dest)
dest->t_len = src->t_len;
dest->t_self = src->t_self;
dest->t_tableOid = src->t_tableOid;
+ HeapTupleCopyXids(dest, src);
dest->t_data = (HeapTupleHeader) palloc(src->t_len);
memcpy((char *) dest->t_data, (char *) src->t_data, src->t_len);
}
@@ -1174,6 +1176,7 @@ heap_form_tuple(TupleDesc tupleDescriptor,
tuple->t_len = len;
ItemPointerSetInvalid(&(tuple->t_self));
tuple->t_tableOid = InvalidOid;
+ HeapTupleSetZeroXids(tuple);
HeapTupleHeaderSetDatumLength(td, len);
HeapTupleHeaderSetTypeId(td, tupleDescriptor->tdtypeid);
@@ -1258,6 +1261,7 @@ heap_modify_tuple(HeapTuple tuple,
newTuple->t_data->t_ctid = tuple->t_data->t_ctid;
newTuple->t_self = tuple->t_self;
newTuple->t_tableOid = tuple->t_tableOid;
+ HeapTupleCopyXids(newTuple, tuple);
return newTuple;
}
@@ -1321,6 +1325,7 @@ heap_modify_tuple_by_cols(HeapTuple tuple,
newTuple->t_data->t_ctid = tuple->t_data->t_ctid;
newTuple->t_self = tuple->t_self;
newTuple->t_tableOid = tuple->t_tableOid;
+ HeapTupleCopyXids(newTuple, tuple);
return newTuple;
}
@@ -1561,6 +1566,7 @@ heap_tuple_from_minimal_tuple(MinimalTuple mtup)
result->t_len = len;
ItemPointerSetInvalid(&(result->t_self));
result->t_tableOid = InvalidOid;
+ HeapTupleSetZeroXids(result);
result->t_data = (HeapTupleHeader) ((char *) result + HEAPTUPLESIZE);
memcpy((char *) result->t_data + MINIMAL_TUPLE_OFFSET, mtup, mtup->t_len);
memset(result->t_data, 0, offsetof(HeapTupleHeaderData, t_infomask2));
diff --git a/src/backend/access/common/reloptions.c b/src/backend/access/common/reloptions.c
index c852b1fb74..0498115c92 100644
--- a/src/backend/access/common/reloptions.c
+++ b/src/backend/access/common/reloptions.c
@@ -1922,17 +1922,17 @@ default_reloptions(Datum reloptions, bool validate, relopt_kind kind)
offsetof(StdRdOptions, autovacuum) + offsetof(AutoVacOpts, analyze_threshold)},
{"autovacuum_vacuum_cost_limit", RELOPT_TYPE_INT,
offsetof(StdRdOptions, autovacuum) + offsetof(AutoVacOpts, vacuum_cost_limit)},
- {"autovacuum_freeze_min_age", RELOPT_TYPE_INT,
+ {"autovacuum_freeze_min_age", RELOPT_TYPE_INT64,
offsetof(StdRdOptions, autovacuum) + offsetof(AutoVacOpts, freeze_min_age)},
- {"autovacuum_freeze_max_age", RELOPT_TYPE_INT,
+ {"autovacuum_freeze_max_age", RELOPT_TYPE_INT64,
offsetof(StdRdOptions, autovacuum) + offsetof(AutoVacOpts, freeze_max_age)},
- {"autovacuum_freeze_table_age", RELOPT_TYPE_INT,
+ {"autovacuum_freeze_table_age", RELOPT_TYPE_INT64,
offsetof(StdRdOptions, autovacuum) + offsetof(AutoVacOpts, freeze_table_age)},
- {"autovacuum_multixact_freeze_min_age", RELOPT_TYPE_INT,
+ {"autovacuum_multixact_freeze_min_age", RELOPT_TYPE_INT64,
offsetof(StdRdOptions, autovacuum) + offsetof(AutoVacOpts, multixact_freeze_min_age)},
- {"autovacuum_multixact_freeze_max_age", RELOPT_TYPE_INT,
+ {"autovacuum_multixact_freeze_max_age", RELOPT_TYPE_INT64,
offsetof(StdRdOptions, autovacuum) + offsetof(AutoVacOpts, multixact_freeze_max_age)},
- {"autovacuum_multixact_freeze_table_age", RELOPT_TYPE_INT,
+ {"autovacuum_multixact_freeze_table_age", RELOPT_TYPE_INT64,
offsetof(StdRdOptions, autovacuum) + offsetof(AutoVacOpts, multixact_freeze_table_age)},
{"log_autovacuum_min_duration", RELOPT_TYPE_INT,
offsetof(StdRdOptions, autovacuum) + offsetof(AutoVacOpts, log_min_duration)},
diff --git a/src/backend/access/hash/hashvalidate.c b/src/backend/access/hash/hashvalidate.c
index 24bab58499..3fd0c4609d 100644
--- a/src/backend/access/hash/hashvalidate.c
+++ b/src/backend/access/hash/hashvalidate.c
@@ -317,11 +317,10 @@ check_hash_func_signature(Oid funcid, int16 amprocnum, Oid argtype)
* INTERNAL and allowing any such function seems too scary.
*/
if ((funcid == F_HASHINT4 || funcid == F_HASHINT4EXTENDED) &&
- (argtype == DATEOID ||
- argtype == XIDOID || argtype == CIDOID))
+ (argtype == DATEOID || argtype == CIDOID))
/* okay, allowed use of hashint4() */ ;
else if ((funcid == F_HASHINT8 || funcid == F_HASHINT8EXTENDED) &&
- (argtype == XID8OID))
+ (argtype == XID8OID || argtype == XIDOID))
/* okay, allowed use of hashint8() */ ;
else if ((funcid == F_TIMESTAMP_HASH ||
funcid == F_TIMESTAMP_HASH_EXTENDED) &&
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index b6ee64d856..a33cbc207b 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -52,11 +52,14 @@
#include "access/xloginsert.h"
#include "access/xlogutils.h"
#include "catalog/catalog.h"
+#include "catalog/index.h"
+#include "catalog/namespace.h"
#include "commands/vacuum.h"
#include "miscadmin.h"
#include "pgstat.h"
#include "port/atomics.h"
#include "port/pg_bitutils.h"
+#include "storage/buf_internals.h"
#include "storage/bufmgr.h"
#include "storage/freespace.h"
#include "storage/lmgr.h"
@@ -74,7 +77,7 @@
static HeapTuple heap_prepare_insert(Relation relation, HeapTuple tup,
- TransactionId xid, CommandId cid, int options);
+ CommandId cid, int options);
static XLogRecPtr log_heap_update(Relation reln, Buffer oldbuf,
Buffer newbuf, HeapTuple oldtup,
HeapTuple newtup, HeapTuple old_key_tuple,
@@ -114,6 +117,8 @@ static int bottomup_sort_and_shrink(TM_IndexDeleteOp *delstate);
static XLogRecPtr log_heap_new_cid(Relation relation, HeapTuple tup);
static HeapTuple ExtractReplicaIdentity(Relation relation, HeapTuple tp, bool key_required,
bool *copy);
+static bool heap_page_prepare_for_xid(Relation relation, Buffer buffer,
+ TransactionId xid, bool multi);
/*
@@ -462,6 +467,8 @@ heapgetpage(TableScanDesc sscan, BlockNumber block)
loctup.t_tableOid = RelationGetRelid(scan->rs_base.rs_rd);
loctup.t_data = (HeapTupleHeader) PageGetItem(page, lpp);
loctup.t_len = ItemIdGetLength(lpp);
+ HeapTupleCopyXidsFromPage(buffer, &loctup, page,
+ IsToastRelation(scan->rs_base.rs_rd));
ItemPointerSet(&(loctup.t_self), block, lineoff);
if (all_visible)
@@ -473,7 +480,16 @@ heapgetpage(TableScanDesc sscan, BlockNumber block)
&loctup, buffer, snapshot);
if (valid)
- scan->rs_vistuples[ntup++] = lineoff;
+ {
+ scan->rs_vistuples[ntup] = lineoff;
+ /*
+ * Since there is no lock futher and xmin or xmax may be
+ * changed while base shift, copy them here.
+ */
+ scan->rs_xmin[ntup] = loctup.t_xmin;
+ scan->rs_xmax[ntup] = loctup.t_xmax;
+ ++ntup;
+ }
}
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
@@ -777,6 +793,8 @@ continue_page:
tuple->t_data = (HeapTupleHeader) PageGetItem(page, lpp);
tuple->t_len = ItemIdGetLength(lpp);
+ HeapTupleCopyXidsFromPage(scan->rs_cbuf, tuple, page,
+ IsToastRelation(scan->rs_base.rs_rd));
ItemPointerSet(&(tuple->t_self), block, lineoff);
visible = HeapTupleSatisfiesVisibility(tuple,
@@ -867,6 +885,9 @@ heapgettup_pagemode(HeapScanDesc scan,
linesleft = scan->rs_cindex;
/* lineindex now references the next or previous visible tid */
+ tuple->t_xmin = scan->rs_xmin[scan->rs_cindex];
+ tuple->t_xmax = scan->rs_xmax[scan->rs_cindex];
+
goto continue_page;
}
@@ -895,6 +916,8 @@ continue_page:
tuple->t_data = (HeapTupleHeader) PageGetItem(page, lpp);
tuple->t_len = ItemIdGetLength(lpp);
+ tuple->t_xmin = scan->rs_xmin[lineindex];
+ tuple->t_xmax = scan->rs_xmax[lineindex];
ItemPointerSet(&(tuple->t_self), block, lineoff);
/* skip any tuples that don't match the scan key */
@@ -1403,6 +1426,7 @@ heap_fetch(Relation relation,
tuple->t_data = (HeapTupleHeader) PageGetItem(page, lp);
tuple->t_len = ItemIdGetLength(lp);
tuple->t_tableOid = RelationGetRelid(relation);
+ HeapTupleCopyXidsFromPage(buffer, tuple, page, IsToastRelation(relation));
/*
* check tuple visibility, then release lock
@@ -1411,7 +1435,7 @@ heap_fetch(Relation relation,
if (valid)
PredicateLockTID(relation, &(tuple->t_self), snapshot,
- HeapTupleHeaderGetXmin(tuple->t_data));
+ HeapTupleGetXmin(tuple));
HeapCheckForSerializableConflictOut(valid, relation, tuple, buffer, snapshot);
@@ -1488,6 +1512,8 @@ heap_hot_search_buffer(ItemPointer tid, Relation relation, Buffer buffer,
Assert(TransactionIdIsValid(RecentXmin));
Assert(BufferGetBlockNumber(buffer) == blkno);
+ heapTuple->t_self = *tid;
+
/* Scan through possible multiple members of HOT-chain */
for (;;)
{
@@ -1523,6 +1549,8 @@ heap_hot_search_buffer(ItemPointer tid, Relation relation, Buffer buffer,
heapTuple->t_data = (HeapTupleHeader) PageGetItem(page, lp);
heapTuple->t_len = ItemIdGetLength(lp);
heapTuple->t_tableOid = RelationGetRelid(relation);
+ HeapTupleCopyXidsFromPage(buffer, heapTuple, page,
+ IsToastRelation(relation));
ItemPointerSet(&heapTuple->t_self, blkno, offnum);
/*
@@ -1537,7 +1565,7 @@ heap_hot_search_buffer(ItemPointer tid, Relation relation, Buffer buffer,
*/
if (TransactionIdIsValid(prev_xmax) &&
!TransactionIdEquals(prev_xmax,
- HeapTupleHeaderGetXmin(heapTuple->t_data)))
+ HeapTupleGetXmin(heapTuple)))
break;
/*
@@ -1558,7 +1586,7 @@ heap_hot_search_buffer(ItemPointer tid, Relation relation, Buffer buffer,
{
ItemPointerSetOffsetNumber(tid, offnum);
PredicateLockTID(relation, &heapTuple->t_self, snapshot,
- HeapTupleHeaderGetXmin(heapTuple->t_data));
+ HeapTupleGetXmin(heapTuple));
if (all_dead)
*all_dead = false;
return true;
@@ -1593,7 +1621,7 @@ heap_hot_search_buffer(ItemPointer tid, Relation relation, Buffer buffer,
blkno);
offnum = ItemPointerGetOffsetNumber(&heapTuple->t_data->t_ctid);
at_chain_start = false;
- prev_xmax = HeapTupleHeaderGetUpdateXid(heapTuple->t_data);
+ prev_xmax = HeapTupleGetUpdateXidAny(heapTuple);
}
else
break; /* end of chain */
@@ -1679,13 +1707,14 @@ heap_get_latest_tid(TableScanDesc sscan,
tp.t_data = (HeapTupleHeader) PageGetItem(page, lp);
tp.t_len = ItemIdGetLength(lp);
tp.t_tableOid = RelationGetRelid(relation);
+ HeapTupleCopyXidsFromPage(buffer, &tp, page, IsToastRelation(relation));
/*
* After following a t_ctid link, we might arrive at an unrelated
* tuple. Check for XMIN match.
*/
if (TransactionIdIsValid(priorXmax) &&
- !TransactionIdEquals(priorXmax, HeapTupleHeaderGetXmin(tp.t_data)))
+ !TransactionIdEquals(priorXmax, HeapTupleGetXmin(&tp)))
{
UnlockReleaseBuffer(buffer);
break;
@@ -1704,7 +1733,7 @@ heap_get_latest_tid(TableScanDesc sscan,
* If there's a valid t_ctid link, follow it, else we're done.
*/
if ((tp.t_data->t_infomask & HEAP_XMAX_INVALID) ||
- HeapTupleHeaderIsOnlyLocked(tp.t_data) ||
+ HeapTupleIsOnlyLocked(&tp) ||
HeapTupleHeaderIndicatesMovedPartitions(tp.t_data) ||
ItemPointerEquals(&tp.t_self, &tp.t_data->t_ctid))
{
@@ -1713,7 +1742,7 @@ heap_get_latest_tid(TableScanDesc sscan,
}
ctid = tp.t_data->t_ctid;
- priorXmax = HeapTupleHeaderGetUpdateXid(tp.t_data);
+ priorXmax = HeapTupleGetUpdateXidAny(&tp);
UnlockReleaseBuffer(buffer);
} /* end of loop */
}
@@ -1738,7 +1767,7 @@ heap_get_latest_tid(TableScanDesc sscan,
static void
UpdateXmaxHintBits(HeapTupleHeader tuple, Buffer buffer, TransactionId xid)
{
- Assert(TransactionIdEquals(HeapTupleHeaderGetRawXmax(tuple), xid));
+ Assert(TransactionIdEquals(HeapTupleHeaderGetRawXmax(BufferGetPage(buffer), tuple), xid));
Assert(!(tuple->t_infomask & HEAP_XMAX_IS_MULTI));
if (!(tuple->t_infomask & (HEAP_XMAX_COMMITTED | HEAP_XMAX_INVALID)))
@@ -1805,6 +1834,31 @@ ReleaseBulkInsertStatePin(BulkInsertState bistate)
bistate->last_free = InvalidBlockNumber;
}
+/*
+ * Add xid_base and multi base to the WAL record.
+ *
+ * WAL record must being constructed.
+ */
+static inline void
+xlog_register_base(Page page, bool is_toast, TransactionId *xid_base,
+ TransactionId *multi_base)
+{
+ if (is_toast)
+ {
+ *xid_base = ToastPageGetSpecial(page)->pd_xid_base;
+ *multi_base = InvalidTransactionId;
+ }
+ else
+ {
+ HeapPageSpecial special = HeapPageGetSpecial(page);
+
+ *xid_base = special->pd_xid_base;
+ *multi_base = special->pd_multi_base;
+ }
+
+ XLogRegisterData((char *) xid_base, sizeof(*xid_base));
+ XLogRegisterData((char *) multi_base, sizeof(*multi_base));
+}
/*
* heap_insert - insert tuple into a heap
@@ -1844,7 +1898,7 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid,
* Note: below this point, heaptup is the data we actually intend to store
* into the relation; tup is the caller's original untoasted data.
*/
- heaptup = heap_prepare_insert(relation, tup, xid, cid, options);
+ heaptup = heap_prepare_insert(relation, tup, cid, options);
/*
* Find buffer to insert this tuple into. If the page is all visible,
@@ -1872,6 +1926,9 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid,
*/
CheckForSerializableConflictIn(relation, NULL, InvalidBlockNumber);
+ heap_page_prepare_for_xid(relation, buffer, xid, false);
+ HeapTupleSetXmin(heaptup, xid);
+
/* NO EREPORT(ERROR) from here till changes are logged */
START_CRIT_SECTION();
@@ -1909,6 +1966,8 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid,
Page page = BufferGetPage(buffer);
uint8 info = XLOG_HEAP_INSERT;
int bufflags = 0;
+ TransactionId xid_base,
+ multi_base;
/*
* If this is a catalog, we need to transmit combo CIDs to properly
@@ -1947,12 +2006,17 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid,
{
xlrec.flags |= XLH_INSERT_CONTAINS_NEW_TUPLE;
bufflags |= REGBUF_KEEP_DATA;
-
- if (IsToastRelation(relation))
- xlrec.flags |= XLH_INSERT_ON_TOAST_RELATION;
}
+ if (IsToastRelation(relation))
+ xlrec.flags |= XLH_INSERT_ON_TOAST_RELATION;
+
XLogBeginInsert();
+
+ if (info & XLOG_HEAP_INIT_PAGE)
+ xlog_register_base(page, IsToastRelation(relation), &xid_base,
+ &multi_base);
+
XLogRegisterData((char *) &xlrec, SizeOfHeapInsert);
xlhdr.t_infomask2 = heaptup->t_data->t_infomask2;
@@ -2014,7 +2078,7 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid,
* that in any case, the header fields are also set in the original tuple.
*/
static HeapTuple
-heap_prepare_insert(Relation relation, HeapTuple tup, TransactionId xid,
+heap_prepare_insert(Relation relation, HeapTuple tup,
CommandId cid, int options)
{
/*
@@ -2031,12 +2095,12 @@ heap_prepare_insert(Relation relation, HeapTuple tup, TransactionId xid,
tup->t_data->t_infomask &= ~(HEAP_XACT_MASK);
tup->t_data->t_infomask2 &= ~(HEAP2_XACT_MASK);
tup->t_data->t_infomask |= HEAP_XMAX_INVALID;
- HeapTupleHeaderSetXmin(tup->t_data, xid);
+ HeapTupleSetXmin(tup, InvalidTransactionId);
if (options & HEAP_INSERT_FROZEN)
- HeapTupleHeaderSetXminFrozen(tup->t_data);
+ HeapTupleHeaderStoreXminFrozen(tup->t_data);
HeapTupleHeaderSetCmin(tup->t_data, cid);
- HeapTupleHeaderSetXmax(tup->t_data, 0); /* for cleanliness */
+ HeapTupleSetXmax(tup, 0); /* for cleanliness */
tup->t_tableOid = RelationGetRelid(relation);
/*
@@ -2128,8 +2192,7 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples,
tuple = ExecFetchSlotHeapTuple(slots[i], true, NULL);
slots[i]->tts_tableOid = RelationGetRelid(relation);
tuple->t_tableOid = slots[i]->tts_tableOid;
- heaptuples[i] = heap_prepare_insert(relation, tuple, xid, cid,
- options);
+ heaptuples[i] = heap_prepare_insert(relation, tuple, cid, options);
}
/*
@@ -2204,6 +2267,8 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples,
if (starting_with_empty_page && (options & HEAP_INSERT_FROZEN))
all_frozen_set = true;
+ heap_page_prepare_for_xid(relation, buffer, xid, false);
+
/* NO EREPORT(ERROR) from here till changes are logged */
START_CRIT_SECTION();
@@ -2211,6 +2276,7 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples,
* RelationGetBufferForTuple has ensured that the first tuple fits.
* Put that on the page, and then as many other tuples as fit.
*/
+ HeapTupleSetXmin(heaptuples[ndone], xid);
RelationPutHeapTuple(relation, buffer, heaptuples[ndone], false);
/*
@@ -2227,6 +2293,7 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples,
if (PageGetHeapFreeSpace(page) < MAXALIGN(heaptup->t_len) + saveFreeSpace)
break;
+ HeapTupleSetXmin(heaptup, xid);
RelationPutHeapTuple(relation, buffer, heaptup, false);
/*
@@ -2272,6 +2339,8 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples,
char *scratchptr = scratch.data;
bool init;
int bufflags = 0;
+ TransactionId xid_base,
+ multi_base;
/*
* If the page was previously empty, we can reinit the page
@@ -2362,6 +2431,11 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples,
bufflags |= REGBUF_KEEP_DATA;
XLogBeginInsert();
+
+ if (info & XLOG_HEAP_INIT_PAGE)
+ xlog_register_base(page, IsToastRelation(relation), &xid_base,
+ &multi_base);
+
XLogRegisterData((char *) xlrec, tupledata - scratch.data);
XLogRegisterBuffer(0, buffer, REGBUF_STANDARD | bufflags);
@@ -2569,6 +2643,7 @@ heap_delete(Relation relation, ItemPointer tid,
tp.t_data = (HeapTupleHeader) PageGetItem(page, lp);
tp.t_len = ItemIdGetLength(lp);
tp.t_self = *tid;
+ HeapTupleCopyXidsFromPage(buffer, &tp, page, IsToastRelation(relation));
l1:
@@ -2600,7 +2675,7 @@ l1:
uint16 infomask;
/* must copy state data before unlocking buffer */
- xwait = HeapTupleHeaderGetRawXmax(tp.t_data);
+ xwait = HeapTupleGetRawXmax(&tp);
infomask = tp.t_data->t_infomask;
/*
@@ -2639,6 +2714,10 @@ l1:
NULL);
LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
+ /* Copy possibly updated xid base after relocking */
+ HeapTupleCopyXidsFromPage(buffer, &tp, page,
+ IsToastRelation(relation));
+
/*
* If xwait had just locked the tuple then some other xact
* could update this tuple before we get to this point. Check
@@ -2649,7 +2728,7 @@ l1:
*/
if ((vmbuffer == InvalidBuffer && PageIsAllVisible(page)) ||
xmax_infomask_changed(tp.t_data->t_infomask, infomask) ||
- !TransactionIdEquals(HeapTupleHeaderGetRawXmax(tp.t_data),
+ !TransactionIdEquals(HeapTupleGetRawXmax(&tp),
xwait))
goto l1;
}
@@ -2676,6 +2755,10 @@ l1:
XactLockTableWait(xwait, relation, &(tp.t_self), XLTW_Delete);
LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
+ /* Copy possibly updated xid base after relocking */
+ HeapTupleCopyXidsFromPage(buffer, &tp, page,
+ IsToastRelation(relation));
+
/*
* xwait is done, but if xwait had just locked the tuple then some
* other xact could update this tuple before we get to this point.
@@ -2686,7 +2769,7 @@ l1:
*/
if ((vmbuffer == InvalidBuffer && PageIsAllVisible(page)) ||
xmax_infomask_changed(tp.t_data->t_infomask, infomask) ||
- !TransactionIdEquals(HeapTupleHeaderGetRawXmax(tp.t_data),
+ !TransactionIdEquals(HeapTupleGetRawXmax(&tp),
xwait))
goto l1;
@@ -2700,7 +2783,7 @@ l1:
*/
if ((tp.t_data->t_infomask & HEAP_XMAX_INVALID) ||
HEAP_XMAX_IS_LOCKED_ONLY(tp.t_data->t_infomask) ||
- HeapTupleHeaderIsOnlyLocked(tp.t_data))
+ HeapTupleIsOnlyLocked(&tp))
result = TM_Ok;
else if (!ItemPointerEquals(&tp.t_self, &tp.t_data->t_ctid))
result = TM_Updated;
@@ -2730,9 +2813,9 @@ l1:
if (result != TM_Ok)
{
tmfd->ctid = tp.t_data->t_ctid;
- tmfd->xmax = HeapTupleHeaderGetUpdateXid(tp.t_data);
+ tmfd->xmax = HeapTupleGetUpdateXidAny(&tp);
if (result == TM_SelfModified)
- tmfd->cmax = HeapTupleHeaderGetCmax(tp.t_data);
+ tmfd->cmax = HeapTupleGetCmax(&tp);
else
tmfd->cmax = InvalidCommandId;
UnlockReleaseBuffer(buffer);
@@ -2755,7 +2838,7 @@ l1:
CheckForSerializableConflictIn(relation, tid, BufferGetBlockNumber(buffer));
/* replace cid with a combo CID if necessary */
- HeapTupleHeaderAdjustCmax(tp.t_data, &cid, &iscombo);
+ HeapTupleAdjustCmax(&tp, &cid, &iscombo);
/*
* Compute replica identity tuple before entering the critical section so
@@ -2773,11 +2856,20 @@ l1:
*/
MultiXactIdSetOldestMember();
- compute_new_xmax_infomask(HeapTupleHeaderGetRawXmax(tp.t_data),
+ compute_new_xmax_infomask(HeapTupleGetRawXmax(&tp),
tp.t_data->t_infomask, tp.t_data->t_infomask2,
xid, LockTupleExclusive, true,
&new_xmax, &new_infomask, &new_infomask2);
+#ifdef USE_ASSERT_CHECKING
+ if (IsToastRelation(relation))
+ Assert((new_infomask & HEAP_XMAX_IS_MULTI) == 0);
+#endif
+
+ heap_page_prepare_for_xid(relation, buffer, new_xmax,
+ (new_infomask & HEAP_XMAX_IS_MULTI) != 0);
+ HeapTupleCopyXidsFromPage(buffer, &tp, page, IsToastRelation(relation));
+
START_CRIT_SECTION();
/*
@@ -2787,7 +2879,7 @@ l1:
* the subsequent page pruning will be a no-op and the hint will be
* cleared.
*/
- PageSetPrunable(page, xid);
+ PageSetPrunable(page, xid, IsToastRelation(relation));
if (PageIsAllVisible(page))
{
@@ -2803,7 +2895,7 @@ l1:
tp.t_data->t_infomask |= new_infomask;
tp.t_data->t_infomask2 |= new_infomask2;
HeapTupleHeaderClearHotUpdated(tp.t_data);
- HeapTupleHeaderSetXmax(tp.t_data, new_xmax);
+ HeapTupleAndHeaderSetXmax(page, &tp, new_xmax, IsToastRelation(relation));
HeapTupleHeaderSetCmax(tp.t_data, cid, iscombo);
/* Make sure there is no forward chain link in t_ctid */
tp.t_data->t_ctid = tp.t_self;
@@ -2842,6 +2934,8 @@ l1:
tp.t_data->t_infomask2);
xlrec.offnum = ItemPointerGetOffsetNumber(&tp.t_self);
xlrec.xmax = new_xmax;
+ if (IsToastRelation(relation))
+ xlrec.flags |= XLH_DELETE_PAGE_ON_TOAST_RELATION;
if (old_key_tuple != NULL)
{
@@ -2999,7 +3093,8 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup,
HeapTuple heaptup;
HeapTuple old_key_tuple = NULL;
bool old_key_copied = false;
- Page page;
+ Page page,
+ newpage;
BlockNumber block;
MultiXactStatus mxact_status;
Buffer buffer,
@@ -3026,6 +3121,7 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup,
infomask_new_tuple,
infomask2_new_tuple;
+ Assert(!IsToastRelation(relation));
Assert(ItemPointerIsValid(otid));
/* Cheap, simplistic check that the tuple matches the rel's rowtype. */
@@ -3097,6 +3193,7 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup,
oldtup.t_data = (HeapTupleHeader) PageGetItem(page, lp);
oldtup.t_len = ItemIdGetLength(lp);
oldtup.t_self = *otid;
+ HeapTupleCopyXidsFromPage(buffer, &oldtup, page, false);
/* the new tuple is ready, except for this: */
newtup->t_tableOid = RelationGetRelid(relation);
@@ -3190,7 +3287,7 @@ l2:
*/
/* must copy state data before unlocking buffer */
- xwait = HeapTupleHeaderGetRawXmax(oldtup.t_data);
+ xwait = HeapTupleGetRawXmax(&oldtup);
infomask = oldtup.t_data->t_infomask;
/*
@@ -3241,6 +3338,7 @@ l2:
checked_lockers = true;
locker_remains = remain != 0;
LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
+ HeapTupleCopyXidsFromPage(buffer, &oldtup, page, false);
/*
* If xwait had just locked the tuple then some other xact
@@ -3249,7 +3347,7 @@ l2:
*/
if (xmax_infomask_changed(oldtup.t_data->t_infomask,
infomask) ||
- !TransactionIdEquals(HeapTupleHeaderGetRawXmax(oldtup.t_data),
+ !TransactionIdEquals(HeapTupleGetRawXmax(&oldtup),
xwait))
goto l2;
}
@@ -3275,7 +3373,7 @@ l2:
* subxact aborts.
*/
if (!HEAP_XMAX_IS_LOCKED_ONLY(oldtup.t_data->t_infomask))
- update_xact = HeapTupleGetUpdateXid(oldtup.t_data);
+ update_xact = HeapTupleGetUpdateXid(&oldtup);
else
update_xact = InvalidTransactionId;
@@ -3322,7 +3420,7 @@ l2:
XLTW_Update);
checked_lockers = true;
LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
-
+ HeapTupleCopyXidsFromPage(buffer, &oldtup, page, false);
/*
* xwait is done, but if xwait had just locked the tuple then some
* other xact could update this tuple before we get to this point.
@@ -3330,7 +3428,7 @@ l2:
*/
if (xmax_infomask_changed(oldtup.t_data->t_infomask, infomask) ||
!TransactionIdEquals(xwait,
- HeapTupleHeaderGetRawXmax(oldtup.t_data)))
+ HeapTupleGetRawXmax(&oldtup)))
goto l2;
/* Otherwise check if it committed or aborted */
@@ -3369,9 +3467,9 @@ l2:
if (result != TM_Ok)
{
tmfd->ctid = oldtup.t_data->t_ctid;
- tmfd->xmax = HeapTupleHeaderGetUpdateXid(oldtup.t_data);
+ tmfd->xmax = HeapTupleGetUpdateXidAny(&oldtup);
if (result == TM_SelfModified)
- tmfd->cmax = HeapTupleHeaderGetCmax(oldtup.t_data);
+ tmfd->cmax = HeapTupleGetCmax(&oldtup);
else
tmfd->cmax = InvalidCommandId;
UnlockReleaseBuffer(buffer);
@@ -3404,6 +3502,7 @@ l2:
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
visibilitymap_pin(relation, block, &vmbuffer);
LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
+ HeapTupleCopyXidsFromPage(buffer, &oldtup, page, false);
goto l2;
}
@@ -3413,7 +3512,7 @@ l2:
* If the tuple we're updating is locked, we need to preserve the locking
* info in the old tuple's Xmax. Prepare a new Xmax value for this.
*/
- compute_new_xmax_infomask(HeapTupleHeaderGetRawXmax(oldtup.t_data),
+ compute_new_xmax_infomask(HeapTupleGetRawXmax(&oldtup),
oldtup.t_data->t_infomask,
oldtup.t_data->t_infomask2,
xid, *lockmode, true,
@@ -3432,7 +3531,7 @@ l2:
(checked_lockers && !locker_remains))
xmax_new_tuple = InvalidTransactionId;
else
- xmax_new_tuple = HeapTupleHeaderGetRawXmax(oldtup.t_data);
+ xmax_new_tuple = HeapTupleGetRawXmax(&oldtup);
if (!TransactionIdIsValid(xmax_new_tuple))
{
@@ -3465,17 +3564,15 @@ l2:
*/
newtup->t_data->t_infomask &= ~(HEAP_XACT_MASK);
newtup->t_data->t_infomask2 &= ~(HEAP2_XACT_MASK);
- HeapTupleHeaderSetXmin(newtup->t_data, xid);
HeapTupleHeaderSetCmin(newtup->t_data, cid);
newtup->t_data->t_infomask |= HEAP_UPDATED | infomask_new_tuple;
newtup->t_data->t_infomask2 |= infomask2_new_tuple;
- HeapTupleHeaderSetXmax(newtup->t_data, xmax_new_tuple);
/*
* Replace cid with a combo CID if necessary. Note that we already put
* the plain cid into the new tuple.
*/
- HeapTupleHeaderAdjustCmax(oldtup.t_data, &cid, &iscombo);
+ HeapTupleAdjustCmax(&oldtup, &cid, &iscombo);
/*
* If the toaster needs to be activated, OR if the new tuple will not fit
@@ -3505,7 +3602,7 @@ l2:
newtupsize = MAXALIGN(newtup->t_len);
- if (need_toast || newtupsize > pagefree)
+ if (need_toast || newtupsize > pagefree || HeapPageIsDoubleXmax(page))
{
TransactionId xmax_lock_old_tuple;
uint16 infomask_lock_old_tuple,
@@ -3530,7 +3627,7 @@ l2:
* updating, because the potentially created multixact would otherwise
* be wrong.
*/
- compute_new_xmax_infomask(HeapTupleHeaderGetRawXmax(oldtup.t_data),
+ compute_new_xmax_infomask(HeapTupleGetRawXmax(&oldtup),
oldtup.t_data->t_infomask,
oldtup.t_data->t_infomask2,
xid, *lockmode, false,
@@ -3539,6 +3636,10 @@ l2:
Assert(HEAP_XMAX_IS_LOCKED_ONLY(infomask_lock_old_tuple));
+ heap_page_prepare_for_xid(relation, buffer, xmax_lock_old_tuple,
+ (infomask_lock_old_tuple & HEAP_XMAX_IS_MULTI) != 0);
+ HeapTupleCopyXidsFromPage(buffer, &oldtup, page, false);
+
START_CRIT_SECTION();
/* Clear obsolete visibility flags ... */
@@ -3547,9 +3648,9 @@ l2:
HeapTupleClearHotUpdated(&oldtup);
/* ... and store info about transaction updating this tuple */
Assert(TransactionIdIsValid(xmax_lock_old_tuple));
- HeapTupleHeaderSetXmax(oldtup.t_data, xmax_lock_old_tuple);
oldtup.t_data->t_infomask |= infomask_lock_old_tuple;
oldtup.t_data->t_infomask2 |= infomask2_lock_old_tuple;
+ HeapTupleAndHeaderSetXmax(page, &oldtup, xmax_lock_old_tuple, false);
HeapTupleHeaderSetCmax(oldtup.t_data, cid, iscombo);
/* temporarily make it look not-updated, but locked */
@@ -3632,7 +3733,11 @@ l2:
*/
for (;;)
{
- if (newtupsize > pagefree)
+ /*
+ * We can't fit new tuple to "double xmax" page, since it's
+ * impossible to set xmin there.
+ */
+ if (newtupsize > pagefree || HeapPageIsDoubleXmax(page))
{
/* It doesn't fit, must use RelationGetBufferForTuple. */
newbuf = RelationGetBufferForTuple(relation, heaptup->t_len,
@@ -3666,6 +3771,9 @@ l2:
break;
}
}
+
+ /* Copy possibly updated xid base to old tuple after relocking */
+ HeapTupleCopyXidsFromPage(buffer, &oldtup, page, false);
}
else
{
@@ -3737,6 +3845,33 @@ l2:
id_has_external,
&old_key_copied);
+ newpage = BufferGetPage(newbuf);
+
+ /*
+ * Prepare pages for the current xid, that witten to the new tuple's Xmax
+ * and old page's pd_prune_xid.
+ */
+ heap_page_prepare_for_xid(relation, buffer, xid, false);
+ if (newbuf != buffer)
+ heap_page_prepare_for_xid(relation, newbuf, xid, false);
+
+ /* Prepare pages for tuple's Xmax */
+ heap_page_prepare_for_xid(relation, buffer, xmax_old_tuple,
+ (infomask_old_tuple & HEAP_XMAX_IS_MULTI) != 0);
+ heap_page_prepare_for_xid(relation, newbuf, xmax_new_tuple,
+ (heaptup->t_data->t_infomask & HEAP_XMAX_IS_MULTI) != 0);
+
+ /* Copy possibly updated Xid bases to the both tuples. */
+ HeapTupleCopyXidsFromPage(buffer, &oldtup, page, false);
+
+ /*
+ * Set new tuple's Xmin/Xmax, old tuple's Xmin/Xmax were already shifted.
+ */
+ HeapTupleAndHeaderSetXmin(newpage, heaptup, xid,
+ IsToastRelation(relation));
+ HeapTupleAndHeaderSetXmax(newpage, heaptup, xmax_new_tuple,
+ IsToastRelation(relation));
+
/* NO EREPORT(ERROR) from here till changes are logged */
START_CRIT_SECTION();
@@ -3752,7 +3887,7 @@ l2:
* not to optimize for aborts. Note that heap_xlog_update must be kept in
* sync if this decision changes.
*/
- PageSetPrunable(page, xid);
+ PageSetPrunable(page, xid, false);
if (use_hot_update)
{
@@ -3779,10 +3914,11 @@ l2:
oldtup.t_data->t_infomask2 &= ~HEAP_KEYS_UPDATED;
/* ... and store info about transaction updating this tuple */
Assert(TransactionIdIsValid(xmax_old_tuple));
- HeapTupleHeaderSetXmax(oldtup.t_data, xmax_old_tuple);
oldtup.t_data->t_infomask |= infomask_old_tuple;
oldtup.t_data->t_infomask2 |= infomask2_old_tuple;
+ HeapTupleAndHeaderSetXmax(page, &oldtup, xmax_old_tuple, false);
HeapTupleHeaderSetCmax(oldtup.t_data, cid, iscombo);
+ HeapTupleCopyXidsFromPage(buffer, &oldtup, page, false);
/* record address of new tuple in t_ctid of old one */
oldtup.t_data->t_ctid = heaptup->t_self;
@@ -3836,6 +3972,18 @@ l2:
END_CRIT_SECTION();
+ if (newtup != heaptup)
+ {
+ /*
+ * Set new tuple's Xmin/Xmax only after both xid base preparations.
+ * Old tuple's Xmin/Xmax were already shifted because old tuple is on
+ * the page.
+ */
+ Assert(!IsToastRelation(relation));
+ HeapTupleAndHeaderSetXmin(newpage, heaptup, xid, false);
+ HeapTupleAndHeaderSetXmax(newpage, newtup, xmax_new_tuple, false);
+ }
+
if (newbuf != buffer)
LockBuffer(newbuf, BUFFER_LOCK_UNLOCK);
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
@@ -4183,6 +4331,7 @@ heap_lock_tuple(Relation relation, HeapTuple tuple,
tuple->t_data = (HeapTupleHeader) PageGetItem(page, lp);
tuple->t_len = ItemIdGetLength(lp);
tuple->t_tableOid = RelationGetRelid(relation);
+ HeapTupleCopyXidsFromPage(*buffer, tuple, page, false);
l3:
result = HeapTupleSatisfiesUpdate(tuple, cid, *buffer);
@@ -4209,7 +4358,7 @@ l3:
ItemPointerData t_ctid;
/* must copy state data before unlocking buffer */
- xwait = HeapTupleHeaderGetRawXmax(tuple->t_data);
+ xwait = HeapTupleGetRawXmax(tuple);
infomask = tuple->t_data->t_infomask;
infomask2 = tuple->t_data->t_infomask2;
ItemPointerCopy(&tuple->t_data->t_ctid, &t_ctid);
@@ -4367,11 +4516,13 @@ l3:
result = res;
/* recovery code expects to have buffer lock held */
LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
+ HeapTupleCopyXidsFromPage(*buffer, tuple, page, false);
goto failed;
}
}
LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
+ HeapTupleCopyXidsFromPage(*buffer, tuple, page, false);
/*
* Make sure it's still an appropriate lock, else start over.
@@ -4380,7 +4531,7 @@ l3:
* now need to follow the update chain to lock the new
* versions.
*/
- if (!HeapTupleHeaderIsOnlyLocked(tuple->t_data) &&
+ if (!HeapTupleIsOnlyLocked(tuple) &&
((tuple->t_data->t_infomask2 & HEAP_KEYS_UPDATED) ||
!updated))
goto l3;
@@ -4407,6 +4558,7 @@ l3:
!HEAP_XMAX_IS_EXCL_LOCKED(infomask))
{
LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
+ HeapTupleCopyXidsFromPage(*buffer, tuple, page, false);
/*
* Make sure it's still an appropriate lock, else start over.
@@ -4435,8 +4587,10 @@ l3:
* meantime, start over.
*/
LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
+ HeapTupleCopyXidsFromPage(*buffer, tuple, page, false);
+
if (xmax_infomask_changed(tuple->t_data->t_infomask, infomask) ||
- !TransactionIdEquals(HeapTupleHeaderGetRawXmax(tuple->t_data),
+ !TransactionIdEquals(HeapTupleGetRawXmax(tuple),
xwait))
goto l3;
@@ -4447,10 +4601,11 @@ l3:
else if (HEAP_XMAX_IS_KEYSHR_LOCKED(infomask))
{
LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
+ HeapTupleCopyXidsFromPage(*buffer, tuple, page, false);
/* if the xmax changed in the meantime, start over */
if (xmax_infomask_changed(tuple->t_data->t_infomask, infomask) ||
- !TransactionIdEquals(HeapTupleHeaderGetRawXmax(tuple->t_data),
+ !TransactionIdEquals(HeapTupleGetRawXmax(tuple),
xwait))
goto l3;
/* otherwise, we're good */
@@ -4475,8 +4630,10 @@ l3:
{
/* ... but if the xmax changed in the meantime, start over */
LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
+ HeapTupleCopyXidsFromPage(*buffer, tuple, page, false);
+
if (xmax_infomask_changed(tuple->t_data->t_infomask, infomask) ||
- !TransactionIdEquals(HeapTupleHeaderGetRawXmax(tuple->t_data),
+ !TransactionIdEquals(HeapTupleGetRawXmax(tuple),
xwait))
goto l3;
Assert(HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_data->t_infomask));
@@ -4497,6 +4654,7 @@ l3:
if (require_sleep && (result == TM_Updated || result == TM_Deleted))
{
LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
+ HeapTupleCopyXidsFromPage(*buffer, tuple, page, false);
goto failed;
}
else if (require_sleep)
@@ -4522,6 +4680,7 @@ l3:
result = TM_WouldBlock;
/* recovery code expects to have buffer lock held */
LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
+ HeapTupleCopyXidsFromPage(*buffer, tuple, page, false);
goto failed;
}
@@ -4548,6 +4707,8 @@ l3:
result = TM_WouldBlock;
/* recovery code expects to have buffer lock held */
LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
+ HeapTupleCopyXidsFromPage(*buffer, tuple, page,
+ false);
goto failed;
}
break;
@@ -4588,6 +4749,8 @@ l3:
result = TM_WouldBlock;
/* recovery code expects to have buffer lock held */
LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
+ HeapTupleCopyXidsFromPage(*buffer, tuple, page,
+ false);
goto failed;
}
break;
@@ -4614,11 +4777,13 @@ l3:
result = res;
/* recovery code expects to have buffer lock held */
LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
+ HeapTupleCopyXidsFromPage(*buffer, tuple, page, false);
goto failed;
}
}
LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
+ HeapTupleCopyXidsFromPage(*buffer, tuple, page, false);
/*
* xwait is done, but if xwait had just locked the tuple then some
@@ -4626,7 +4791,7 @@ l3:
* Check for xmax change, and start over if so.
*/
if (xmax_infomask_changed(tuple->t_data->t_infomask, infomask) ||
- !TransactionIdEquals(HeapTupleHeaderGetRawXmax(tuple->t_data),
+ !TransactionIdEquals(HeapTupleGetRawXmax(tuple),
xwait))
goto l3;
@@ -4654,7 +4819,7 @@ l3:
if (!require_sleep ||
(tuple->t_data->t_infomask & HEAP_XMAX_INVALID) ||
HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_data->t_infomask) ||
- HeapTupleHeaderIsOnlyLocked(tuple->t_data))
+ HeapTupleIsOnlyLocked(tuple))
result = TM_Ok;
else if (!ItemPointerEquals(&tuple->t_self, &tuple->t_data->t_ctid))
result = TM_Updated;
@@ -4680,9 +4845,9 @@ failed:
Assert(result != TM_Updated ||
!ItemPointerEquals(&tuple->t_self, &tuple->t_data->t_ctid));
tmfd->ctid = tuple->t_data->t_ctid;
- tmfd->xmax = HeapTupleHeaderGetUpdateXid(tuple->t_data);
+ tmfd->xmax = HeapTupleGetUpdateXidAny(tuple);
if (result == TM_SelfModified)
- tmfd->cmax = HeapTupleHeaderGetCmax(tuple->t_data);
+ tmfd->cmax = HeapTupleGetCmax(tuple);
else
tmfd->cmax = InvalidCommandId;
goto out_locked;
@@ -4702,10 +4867,11 @@ failed:
LockBuffer(*buffer, BUFFER_LOCK_UNLOCK);
visibilitymap_pin(relation, block, &vmbuffer);
LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
+ HeapTupleCopyXidsFromPage(*buffer, tuple, page, false);
goto l3;
}
- xmax = HeapTupleHeaderGetRawXmax(tuple->t_data);
+ xmax = HeapTupleGetRawXmax(tuple);
old_infomask = tuple->t_data->t_infomask;
/*
@@ -4727,6 +4893,10 @@ failed:
GetCurrentTransactionId(), mode, false,
&xid, &new_infomask, &new_infomask2);
+ heap_page_prepare_for_xid(relation, *buffer, xid,
+ (new_infomask & HEAP_XMAX_IS_MULTI) != 0);
+ HeapTupleCopyXidsFromPage(*buffer, tuple, page, false);
+
START_CRIT_SECTION();
/*
@@ -4745,7 +4915,8 @@ failed:
tuple->t_data->t_infomask2 |= new_infomask2;
if (HEAP_XMAX_IS_LOCKED_ONLY(new_infomask))
HeapTupleHeaderClearHotUpdated(tuple->t_data);
- HeapTupleHeaderSetXmax(tuple->t_data, xid);
+ Assert(!IsToastRelation(relation));
+ HeapTupleAndHeaderSetXmax(page, tuple, xid, false);
/*
* Make sure there is no forward chain link in t_ctid. Note that in the
@@ -5339,12 +5510,19 @@ l4:
LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
}
+ /*
+ * Copy xid base after buffer relocking, it could have changed since
+ * heap_fetch().
+ */
+ HeapTupleCopyXidsFromPage(buf, &mytup, BufferGetPage(buf),
+ IsToastRelation(rel));
+
/*
* Check the tuple XMIN against prior XMAX, if any. If we reached the
* end of the chain, we're done, so return success.
*/
if (TransactionIdIsValid(priorXmax) &&
- !TransactionIdEquals(HeapTupleHeaderGetXmin(mytup.t_data),
+ !TransactionIdEquals(HeapTupleGetXmin(&mytup),
priorXmax))
{
result = TM_Ok;
@@ -5356,7 +5534,7 @@ l4:
* (sub)transaction, then we already locked the last live one in the
* chain, thus we're done, so return success.
*/
- if (TransactionIdDidAbort(HeapTupleHeaderGetXmin(mytup.t_data)))
+ if (TransactionIdDidAbort(HeapTupleGetXmin(&mytup)))
{
result = TM_Ok;
goto out_locked;
@@ -5364,7 +5542,7 @@ l4:
old_infomask = mytup.t_data->t_infomask;
old_infomask2 = mytup.t_data->t_infomask2;
- xmax = HeapTupleHeaderGetRawXmax(mytup.t_data);
+ xmax = HeapTupleGetRawXmax(&mytup);
/*
* If this tuple version has been updated or locked by some concurrent
@@ -5377,7 +5555,7 @@ l4:
TransactionId rawxmax;
bool needwait;
- rawxmax = HeapTupleHeaderGetRawXmax(mytup.t_data);
+ rawxmax = HeapTupleGetRawXmax(&mytup);
if (old_infomask & HEAP_XMAX_IS_MULTI)
{
int nmembers;
@@ -5518,14 +5696,25 @@ l4:
VISIBILITYMAP_ALL_FROZEN))
cleared_all_frozen = true;
+#ifdef USE_ASSERT_CHECKING
+ if (IsToastRelation(rel))
+ Assert((new_infomask & HEAP_XMAX_IS_MULTI) == 0);
+#endif
+
+ heap_page_prepare_for_xid(rel, buf, new_xmax,
+ (new_infomask & HEAP_XMAX_IS_MULTI) != 0);
+ HeapTupleCopyXidsFromPage(buf, &mytup, BufferGetPage(buf),
+ IsToastRelation(rel));
+
START_CRIT_SECTION();
/* ... and set them */
- HeapTupleHeaderSetXmax(mytup.t_data, new_xmax);
mytup.t_data->t_infomask &= ~HEAP_XMAX_BITS;
mytup.t_data->t_infomask2 &= ~HEAP_KEYS_UPDATED;
mytup.t_data->t_infomask |= new_infomask;
mytup.t_data->t_infomask2 |= new_infomask2;
+ Assert(!IsToastRelation(rel));
+ HeapTupleAndHeaderSetXmax(BufferGetPage(buf), &mytup, new_xmax, false);
MarkBufferDirty(buf);
@@ -5559,14 +5748,14 @@ next:
if (mytup.t_data->t_infomask & HEAP_XMAX_INVALID ||
HeapTupleHeaderIndicatesMovedPartitions(mytup.t_data) ||
ItemPointerEquals(&mytup.t_self, &mytup.t_data->t_ctid) ||
- HeapTupleHeaderIsOnlyLocked(mytup.t_data))
+ HeapTupleIsOnlyLocked(&mytup))
{
result = TM_Ok;
goto out_locked;
}
/* tail recursion */
- priorXmax = HeapTupleHeaderGetUpdateXid(mytup.t_data);
+ priorXmax = HeapTupleGetUpdateXidAny(&mytup);
ItemPointerCopy(&(mytup.t_data->t_ctid), &tupid);
UnlockReleaseBuffer(buf);
}
@@ -5769,12 +5958,13 @@ heap_abort_speculative(Relation relation, ItemPointer tid)
tp.t_data = (HeapTupleHeader) PageGetItem(page, lp);
tp.t_len = ItemIdGetLength(lp);
tp.t_self = *tid;
+ HeapTupleCopyXidsFromPage(buffer, &tp, page, IsToastRelation(relation));
/*
* Sanity check that the tuple really is a speculatively inserted tuple,
* inserted by us.
*/
- if (tp.t_data->t_choice.t_heap.t_xmin != xid)
+ if (HeapTupleGetRawXmin(&tp) != xid)
elog(ERROR, "attempted to kill a tuple inserted by another transaction");
if (!(IsToastRelation(relation) || HeapTupleHeaderIsSpeculative(tp.t_data)))
elog(ERROR, "attempted to kill a non-speculative tuple");
@@ -5803,7 +5993,9 @@ heap_abort_speculative(Relation relation, ItemPointer tid)
prune_xid = relation->rd_rel->relfrozenxid;
else
prune_xid = TransactionXmin;
- PageSetPrunable(page, prune_xid);
+ Assert(TransactionIdIsValid(prune_xid));
+ heap_page_prepare_for_xid(relation, buffer, prune_xid, false);
+ PageSetPrunable(page, prune_xid, IsToastRelation(relation));
/* store transaction information of xact deleting the tuple */
tp.t_data->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
@@ -5812,9 +6004,12 @@ heap_abort_speculative(Relation relation, ItemPointer tid)
/*
* Set the tuple header xmin to InvalidTransactionId. This makes the
* tuple immediately invisible everyone. (In particular, to any
- * transactions waiting on the speculative token, woken up later.)
+ * transactions waiting on the speculative token, woken up later.) Don't
+ * need to reload xid base from page because InvalidTransactionId doesn't
+ * require xid base to be valid.
*/
- HeapTupleHeaderSetXmin(tp.t_data, InvalidTransactionId);
+ HeapTupleAndHeaderSetXmin(page, &tp, InvalidTransactionId,
+ IsToastRelation(relation));
/* Clear the speculative insertion token too */
tp.t_data->t_ctid = tp.t_self;
@@ -5833,6 +6028,8 @@ heap_abort_speculative(Relation relation, ItemPointer tid)
XLogRecPtr recptr;
xlrec.flags = XLH_DELETE_IS_SUPER;
+ if (IsToastRelation(relation))
+ xlrec.flags |= XLH_DELETE_PAGE_ON_TOAST_RELATION;
xlrec.infobits_set = compute_infobits(tp.t_data->t_infomask,
tp.t_data->t_infomask2);
xlrec.offnum = ItemPointerGetOffsetNumber(&tp.t_self);
@@ -6101,7 +6298,7 @@ FreezeMultiXactId(MultiXactId multi, uint16 t_infomask,
* been pruned away instead, since updater XID is < OldestXmin).
* Just remove xmax.
*/
- if (TransactionIdDidCommit(update_xact))
+ if (!TransactionIdDidAbort(update_xact))
ereport(ERROR,
(errcode(ERRCODE_DATA_CORRUPTED),
errmsg_internal("multixact %llu contains non-aborted update XID %llu from before removable cutoff %llu",
@@ -6199,7 +6396,7 @@ FreezeMultiXactId(MultiXactId multi, uint16 t_infomask,
* even member XIDs >= OldestXmin often won't be kept by second pass.
*/
nnewmembers = 0;
- newmembers = palloc(sizeof(MultiXactMember) * nmembers);
+ newmembers = palloc0(sizeof(MultiXactMember) * nmembers);
has_lockers = false;
update_xid = InvalidTransactionId;
update_committed = false;
@@ -6385,7 +6582,7 @@ FreezeMultiXactId(MultiXactId multi, uint16 t_infomask,
* then caller had better have an exclusive lock on it already.
*/
bool
-heap_prepare_freeze_tuple(HeapTupleHeader tuple,
+heap_prepare_freeze_tuple(HeapTuple htup,
const struct VacuumCutoffs *cutoffs,
HeapPageFreeze *pagefrz,
HeapTupleFreeze *frz, bool *totally_frozen)
@@ -6397,8 +6594,9 @@ heap_prepare_freeze_tuple(HeapTupleHeader tuple,
replace_xmax = false,
freeze_xmax = false;
TransactionId xid;
+ HeapTupleHeader tuple = htup->t_data;
- frz->xmax = HeapTupleHeaderGetRawXmax(tuple);
+ frz->xmax = HeapTupleGetRawXmax(htup);
frz->t_infomask2 = tuple->t_infomask2;
frz->t_infomask = tuple->t_infomask;
frz->frzflags = 0;
@@ -6409,7 +6607,7 @@ heap_prepare_freeze_tuple(HeapTupleHeader tuple,
* will become frozen iff our freeze plan is executed by caller (could be
* neither).
*/
- xid = HeapTupleHeaderGetXmin(tuple);
+ xid = HeapTupleGetXmin(htup);
if (!TransactionIdIsNormal(xid))
xmin_already_frozen = true;
else
@@ -6551,6 +6749,15 @@ heap_prepare_freeze_tuple(HeapTupleHeader tuple,
/* MultiXactId processing forces freezing (barring FRM_NOOP case) */
Assert(pagefrz->freeze_required || (!freeze_xmax && !replace_xmax));
}
+ else if ((tuple->t_infomask & HEAP_XMAX_INVALID) &&
+ TransactionIdIsNormal(xid))
+ {
+ /*
+ * To reset xmax without reading clog.
+ * This prevent excess growth of xmax.
+ */
+ freeze_xmax = true;
+ }
else if (TransactionIdIsNormal(xid))
{
/* Raw xmax is normal XID */
@@ -6572,7 +6779,7 @@ heap_prepare_freeze_tuple(HeapTupleHeader tuple,
if (freeze_xmax && !HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask))
frz->checkflags |= HEAP_FREEZE_CHECK_XMAX_ABORTED;
}
- else if (!TransactionIdIsValid(xid))
+ else if (!TransactionIdIsValid(HeapTupleGetRawXmax(htup)))
{
/* Raw xmax is InvalidTransactionId XID */
Assert((tuple->t_infomask & HEAP_XMAX_IS_MULTI) == 0);
@@ -6642,7 +6849,7 @@ heap_prepare_freeze_tuple(HeapTupleHeader tuple,
* Does this tuple force caller to freeze the entire page?
*/
pagefrz->freeze_required =
- heap_tuple_should_freeze(tuple, cutoffs,
+ heap_tuple_should_freeze(htup, cutoffs,
&pagefrz->NoFreezePageRelfrozenXid,
&pagefrz->NoFreezePageRelminMxid);
}
@@ -6661,18 +6868,32 @@ heap_prepare_freeze_tuple(HeapTupleHeader tuple,
* in private storage (which is what CLUSTER and friends do).
*/
static inline void
-heap_execute_freeze_tuple(HeapTupleHeader tuple, HeapTupleFreeze *frz)
+heap_execute_freeze_tuple(HeapTuple htup, HeapTupleFreeze *frz)
{
- HeapTupleHeaderSetXmax(tuple, frz->xmax);
+ HeapTupleHeader tuple = htup->t_data;
+
+ tuple->t_infomask = frz->t_infomask;
+ tuple->t_infomask2 = frz->t_infomask2;
+
+ HeapTupleSetXmax(htup, frz->xmax);
if (frz->frzflags & XLH_FREEZE_XVAC)
HeapTupleHeaderSetXvac(tuple, FrozenTransactionId);
if (frz->frzflags & XLH_INVALID_XVAC)
HeapTupleHeaderSetXvac(tuple, InvalidTransactionId);
+}
- tuple->t_infomask = frz->t_infomask;
- tuple->t_infomask2 = frz->t_infomask2;
+static inline void
+heap_execute_freeze_tuple_page(Page page, HeapTupleHeader htup,
+ HeapTupleFreeze *frz, bool is_toast)
+{
+ HeapTupleData tuple;
+
+ tuple.t_data = htup;
+ heap_execute_freeze_tuple(&tuple, frz);
+
+ HeapTupleHeaderStoreXmax(page, &tuple, is_toast);
}
/*
@@ -6709,34 +6930,31 @@ heap_freeze_execute_prepared(Relation rel, Buffer buffer,
{
HeapTupleFreeze *frz = tuples + i;
ItemId itemid = PageGetItemId(page, frz->offset);
- HeapTupleHeader htup;
+ HeapTupleData tuple;
- htup = (HeapTupleHeader) PageGetItem(page, itemid);
+ tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
+ tuple.t_len = ItemIdGetLength(itemid);
+ tuple.t_tableOid = RelationGetRelid(rel);
+ HeapTupleCopyXidsFromPage(buffer, &tuple, page, IsToastRelation(rel));
/* Deliberately avoid relying on tuple hint bits here */
if (frz->checkflags & HEAP_FREEZE_CHECK_XMIN_COMMITTED)
{
- TransactionId xmin = HeapTupleHeaderGetRawXmin(htup);
+ TransactionId xmin = HeapTupleGetXmin(&tuple);
- Assert(!HeapTupleHeaderXminFrozen(htup));
+ Assert(!HeapTupleHeaderXminFrozen(tuple.t_data));
if (unlikely(!TransactionIdDidCommit(xmin)))
ereport(ERROR,
(errcode(ERRCODE_DATA_CORRUPTED),
errmsg_internal("uncommitted xmin %llu needs to be frozen",
(unsigned long long) xmin)));
}
-
- /*
- * TransactionIdDidAbort won't work reliably in the presence of XIDs
- * left behind by transactions that were in progress during a crash,
- * so we can only check that xmax didn't commit
- */
if (frz->checkflags & HEAP_FREEZE_CHECK_XMAX_ABORTED)
{
- TransactionId xmax = HeapTupleHeaderGetRawXmax(htup);
+ TransactionId xmax = HeapTupleGetRawXmax(&tuple);
Assert(TransactionIdIsNormal(xmax));
- if (unlikely(TransactionIdDidCommit(xmax)))
+ if (unlikely(!TransactionIdDidAbort(xmax)))
ereport(ERROR,
(errcode(ERRCODE_DATA_CORRUPTED),
errmsg_internal("cannot freeze non-aborted xmax %llu",
@@ -6753,7 +6971,8 @@ heap_freeze_execute_prepared(Relation rel, Buffer buffer,
HeapTupleHeader htup;
htup = (HeapTupleHeader) PageGetItem(page, itemid);
- heap_execute_freeze_tuple(htup, frz);
+ heap_execute_freeze_tuple_page(page, htup, frz,
+ IsToastRelation(rel));
}
MarkBufferDirty(buffer);
@@ -6764,7 +6983,7 @@ heap_freeze_execute_prepared(Relation rel, Buffer buffer,
xl_heap_freeze_plan plans[MaxHeapTuplesPerPage];
OffsetNumber offsets[MaxHeapTuplesPerPage];
int nplans;
- xl_heap_freeze_page xlrec;
+ xl_heap_freeze_page xlrec = {0};
XLogRecPtr recptr;
/* Prepare deduplicated representation for use in WAL record */
@@ -6773,6 +6992,8 @@ heap_freeze_execute_prepared(Relation rel, Buffer buffer,
xlrec.snapshotConflictHorizon = snapshotConflictHorizon;
xlrec.isCatalogRel = RelationIsAccessibleInLogicalDecoding(rel);
xlrec.nplans = nplans;
+ if (IsToastRelation(rel))
+ xlrec.flags = XLH_FREEZE_PAGE_ON_TOAST_RELATION;
XLogBeginInsert();
XLogRegisterData((char *) &xlrec, SizeOfHeapFreezePage);
@@ -6941,7 +7162,7 @@ heap_log_freeze_plan(HeapTupleFreeze *tuples, int ntuples,
* Useful for callers like CLUSTER that perform their own WAL logging.
*/
bool
-heap_freeze_tuple(HeapTupleHeader tuple,
+heap_freeze_tuple(HeapTuple tuple,
TransactionId relfrozenxid, TransactionId relminmxid,
TransactionId FreezeLimit, TransactionId MultiXactCutoff)
{
@@ -7118,10 +7339,10 @@ MultiXactIdGetUpdateXid(TransactionId xmax, uint16 t_infomask)
* checking the hint bits.
*/
TransactionId
-HeapTupleGetUpdateXid(HeapTupleHeader tuple)
+HeapTupleGetUpdateXid(HeapTuple tuple)
{
- return MultiXactIdGetUpdateXid(HeapTupleHeaderGetRawXmax(tuple),
- tuple->t_infomask);
+ return MultiXactIdGetUpdateXid(HeapTupleGetRawXmax(tuple),
+ tuple->t_data->t_infomask);
}
/*
@@ -7347,15 +7568,18 @@ ConditionalMultiXactIdWait(MultiXactId multi, MultiXactStatus status,
* will eventually require freezing (if tuple isn't removed by pruning first).
*/
bool
-heap_tuple_needs_eventual_freeze(HeapTupleHeader tuple)
+heap_tuple_needs_eventual_freeze(HeapTuple htup)
{
TransactionId xid;
+ HeapTupleHeader tuple;
+
+ tuple = htup->t_data;
/*
* If xmin is a normal transaction ID, this tuple is definitely not
* frozen.
*/
- xid = HeapTupleHeaderGetXmin(tuple);
+ xid = HeapTupleGetXmin(htup);
if (TransactionIdIsNormal(xid))
return true;
@@ -7366,13 +7590,13 @@ heap_tuple_needs_eventual_freeze(HeapTupleHeader tuple)
{
MultiXactId multi;
- multi = HeapTupleHeaderGetRawXmax(tuple);
+ multi = HeapTupleGetRawXmax(htup);
if (MultiXactIdIsValid(multi))
return true;
}
else
{
- xid = HeapTupleHeaderGetRawXmax(tuple);
+ xid = HeapTupleGetRawXmax(htup);
if (TransactionIdIsNormal(xid))
return true;
}
@@ -7402,17 +7626,18 @@ heap_tuple_needs_eventual_freeze(HeapTupleHeader tuple)
* point that it fully commits to not freezing the tuple/page in question.
*/
bool
-heap_tuple_should_freeze(HeapTupleHeader tuple,
+heap_tuple_should_freeze(HeapTuple htup,
const struct VacuumCutoffs *cutoffs,
TransactionId *NoFreezePageRelfrozenXid,
MultiXactId *NoFreezePageRelminMxid)
{
TransactionId xid;
MultiXactId multi;
+ HeapTupleHeader tuple = htup->t_data;
bool freeze = false;
/* First deal with xmin */
- xid = HeapTupleHeaderGetXmin(tuple);
+ xid = HeapTupleGetXmin(htup);
if (TransactionIdIsNormal(xid))
{
Assert(TransactionIdPrecedesOrEquals(cutoffs->relfrozenxid, xid));
@@ -7426,9 +7651,9 @@ heap_tuple_should_freeze(HeapTupleHeader tuple,
xid = InvalidTransactionId;
multi = InvalidMultiXactId;
if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
- multi = HeapTupleHeaderGetRawXmax(tuple);
+ multi = HeapTupleGetRawXmax(htup);
else
- xid = HeapTupleHeaderGetRawXmax(tuple);
+ xid = HeapTupleGetRawXmax(htup);
if (TransactionIdIsNormal(xid))
{
@@ -7439,6 +7664,14 @@ heap_tuple_should_freeze(HeapTupleHeader tuple,
if (TransactionIdPrecedes(xid, cutoffs->FreezeLimit))
freeze = true;
}
+ else if ((tuple->t_infomask & HEAP_XMAX_INVALID) &&
+ TransactionIdIsNormal(xid))
+ {
+ /*
+ * To reset xmax without reading clog.
+ */
+ freeze = true;
+ }
else if (!MultiXactIdIsValid(multi))
{
/* xmax is a permanent XID or invalid MultiXactId/XID */
@@ -7510,14 +7743,14 @@ heap_tuple_should_freeze(HeapTupleHeader tuple,
* caller's WAL record) by REDO routine when it replays caller's operation.
*/
void
-HeapTupleHeaderAdvanceConflictHorizon(HeapTupleHeader tuple,
+HeapTupleHeaderAdvanceConflictHorizon(HeapTuple tuple,
TransactionId *snapshotConflictHorizon)
{
- TransactionId xmin = HeapTupleHeaderGetXmin(tuple);
- TransactionId xmax = HeapTupleHeaderGetUpdateXid(tuple);
- TransactionId xvac = HeapTupleHeaderGetXvac(tuple);
+ TransactionId xmin = HeapTupleGetXmin(tuple);
+ TransactionId xmax = HeapTupleGetUpdateXidAny(tuple);
+ TransactionId xvac = HeapTupleHeaderGetXvac(tuple->t_data);
- if (tuple->t_infomask & HEAP_MOVED)
+ if (tuple->t_data->t_infomask & HEAP_MOVED)
{
if (TransactionIdPrecedes(*snapshotConflictHorizon, xvac))
*snapshotConflictHorizon = xvac;
@@ -7529,8 +7762,8 @@ HeapTupleHeaderAdvanceConflictHorizon(HeapTupleHeader tuple,
*
* Look for a committed hint bit, or if no xmin bit is set, check clog.
*/
- if (HeapTupleHeaderXminCommitted(tuple) ||
- (!HeapTupleHeaderXminInvalid(tuple) && TransactionIdDidCommit(xmin)))
+ if (HeapTupleHeaderXminCommitted(tuple->t_data) ||
+ (!HeapTupleHeaderXminInvalid(tuple->t_data) && TransactionIdDidCommit(xmin)))
{
if (xmax != xmin &&
TransactionIdFollows(xmax, *snapshotConflictHorizon))
@@ -7878,7 +8111,7 @@ heap_index_delete_tuples(Relation rel, TM_IndexDeleteOp *delstate)
for (;;)
{
ItemId lp;
- HeapTupleHeader htup;
+ HeapTupleData htup;
/* Sanity check (pure paranoia) */
if (offnum < FirstOffsetNumber)
@@ -7915,16 +8148,18 @@ heap_index_delete_tuples(Relation rel, TM_IndexDeleteOp *delstate)
if (!ItemIdIsNormal(lp))
break;
- htup = (HeapTupleHeader) PageGetItem(page, lp);
+ htup.t_data = (HeapTupleHeader) PageGetItem(page, lp);
+ htup.t_len = ItemIdGetLength(lp);
+ HeapTupleCopyXidsFromPage(buf, &htup, page, IsToastRelation(rel));
/*
* Check the tuple XMIN against prior XMAX, if any
*/
if (TransactionIdIsValid(priorXmax) &&
- !TransactionIdEquals(HeapTupleHeaderGetXmin(htup), priorXmax))
+ !TransactionIdEquals(HeapTupleGetXmin(&htup), priorXmax))
break;
- HeapTupleHeaderAdvanceConflictHorizon(htup,
+ HeapTupleHeaderAdvanceConflictHorizon(&htup,
&snapshotConflictHorizon);
/*
@@ -7933,13 +8168,13 @@ heap_index_delete_tuples(Relation rel, TM_IndexDeleteOp *delstate)
* chain (they get their own index entries) -- just move on to
* next htid from index AM caller.
*/
- if (!HeapTupleHeaderIsHotUpdated(htup))
+ if (!HeapTupleHeaderIsHotUpdated(htup.t_data))
break;
/* Advance to next HOT chain member */
- Assert(ItemPointerGetBlockNumber(&htup->t_ctid) == blkno);
- offnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
- priorXmax = HeapTupleHeaderGetUpdateXid(htup);
+ Assert(ItemPointerGetBlockNumber(&htup.t_data->t_ctid) == blkno);
+ offnum = ItemPointerGetOffsetNumber(&htup.t_data->t_ctid);
+ priorXmax = HeapTupleGetUpdateXidAny(&htup);
}
/* Enable further/final shrinking of deltids for caller */
@@ -8382,6 +8617,8 @@ log_heap_update(Relation reln, Buffer oldbuf,
bool all_visible_cleared, bool new_all_visible_cleared)
{
xl_heap_update xlrec;
+ TransactionId xid_base,
+ multi_base;
xl_heap_header xlhdr;
xl_heap_header xlhdr_idx;
uint8 info;
@@ -8490,13 +8727,13 @@ log_heap_update(Relation reln, Buffer oldbuf,
/* Prepare WAL data for the old page */
xlrec.old_offnum = ItemPointerGetOffsetNumber(&oldtup->t_self);
- xlrec.old_xmax = HeapTupleHeaderGetRawXmax(oldtup->t_data);
+ xlrec.old_xmax = HeapTupleGetRawXmax(oldtup);
xlrec.old_infobits_set = compute_infobits(oldtup->t_data->t_infomask,
oldtup->t_data->t_infomask2);
/* Prepare WAL data for the new page */
xlrec.new_offnum = ItemPointerGetOffsetNumber(&newtup->t_self);
- xlrec.new_xmax = HeapTupleHeaderGetRawXmax(newtup->t_data);
+ xlrec.new_xmax = HeapTupleGetRawXmax(newtup);
bufflags = REGBUF_STANDARD;
if (init)
@@ -8508,6 +8745,17 @@ log_heap_update(Relation reln, Buffer oldbuf,
if (oldbuf != newbuf)
XLogRegisterBuffer(1, oldbuf, REGBUF_STANDARD);
+ if (info & XLOG_HEAP_INIT_PAGE)
+ {
+ HeapPageSpecial special = HeapPageGetSpecial(page);
+
+ Assert(!IsToastRelation(reln));
+ xid_base = special->pd_xid_base;
+ multi_base = special->pd_multi_base;
+ XLogRegisterData((char *) &xid_base, sizeof(xid_base));
+ XLogRegisterData((char *) &multi_base, sizeof(multi_base));
+ }
+
XLogRegisterData((char *) &xlrec, SizeOfHeapUpdate);
/*
@@ -8620,8 +8868,8 @@ log_heap_new_cid(Relation relation, HeapTuple tup)
{
Assert(!(hdr->t_infomask & HEAP_XMAX_INVALID));
Assert(!HeapTupleHeaderXminInvalid(hdr));
- xlrec.cmin = HeapTupleHeaderGetCmin(hdr);
- xlrec.cmax = HeapTupleHeaderGetCmax(hdr);
+ xlrec.cmin = HeapTupleGetCmin(tup);
+ xlrec.cmax = HeapTupleGetCmax(tup);
xlrec.combocid = HeapTupleHeaderGetRawCommandId(hdr);
}
/* No combo CID, so only cmin or cmax can be set by this TX */
@@ -8825,7 +9073,9 @@ heap_xlog_prune(XLogReaderState *record)
heap_page_prune_execute(buffer,
redirected, nredirected,
nowdead, ndead,
- nowunused, nunused);
+ nowunused, nunused,
+ (xlrec->flags & XLH_PRUNE_REPAIR_FRAGMENTATION) != 0,
+ (xlrec->flags & XLH_PRUNE_ON_TOAST_RELATION) != 0);
/*
* Note: we don't worry about updating the page's prunability hints.
@@ -9121,7 +9371,8 @@ heap_xlog_freeze_page(XLogReaderState *record)
lp = PageGetItemId(page, offset);
tuple = (HeapTupleHeader) PageGetItem(page, lp);
- heap_execute_freeze_tuple(tuple, &frz);
+ heap_execute_freeze_tuple_page(page, tuple, &frz,
+ (xlrec->flags & XLH_FREEZE_PAGE_ON_TOAST_RELATION) != 0);
}
}
@@ -9193,6 +9444,8 @@ heap_xlog_delete(XLogReaderState *record)
if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
{
+ HeapTupleData tuple;
+
page = BufferGetPage(buffer);
if (PageGetMaxOffsetNumber(page) >= xlrec->offnum)
@@ -9208,14 +9461,19 @@ heap_xlog_delete(XLogReaderState *record)
HeapTupleHeaderClearHotUpdated(htup);
fix_infomask_from_infobits(xlrec->infobits_set,
&htup->t_infomask, &htup->t_infomask2);
+ tuple.t_data = htup;
+
if (!(xlrec->flags & XLH_DELETE_IS_SUPER))
- HeapTupleHeaderSetXmax(htup, xlrec->xmax);
+ HeapTupleAndHeaderSetXmax(page, &tuple, xlrec->xmax,
+ (xlrec->flags & XLH_DELETE_PAGE_ON_TOAST_RELATION) != 0);
else
- HeapTupleHeaderSetXmin(htup, InvalidTransactionId);
+ HeapTupleAndHeaderSetXmin(page, &tuple, InvalidTransactionId,
+ (xlrec->flags & XLH_DELETE_PAGE_ON_TOAST_RELATION) != 0);
HeapTupleHeaderSetCmax(htup, FirstCommandId, false);
/* Mark the page as a candidate for pruning */
- PageSetPrunable(page, XLogRecGetXid(record));
+ PageSetPrunable(page, XLogRecGetXid(record),
+ (xlrec->flags & XLH_DELETE_PAGE_ON_TOAST_RELATION) != 0);
if (xlrec->flags & XLH_DELETE_ALL_VISIBLE_CLEARED)
PageClearAllVisible(page);
@@ -9236,7 +9494,7 @@ static void
heap_xlog_insert(XLogReaderState *record)
{
XLogRecPtr lsn = record->EndRecPtr;
- xl_heap_insert *xlrec = (xl_heap_insert *) XLogRecGetData(record);
+ xl_heap_insert *xlrec;
Buffer buffer;
Page page;
union
@@ -9252,6 +9510,20 @@ heap_xlog_insert(XLogReaderState *record)
BlockNumber blkno;
ItemPointerData target_tid;
XLogRedoAction action;
+ bool isinit = (XLogRecGetInfo(record) & XLOG_HEAP_INIT_PAGE) != 0;
+ Pointer rec_data = (Pointer) XLogRecGetData(record);
+ TransactionId xid_base = InvalidTransactionId;
+ TransactionId multi_base = InvalidTransactionId;
+
+ if (isinit)
+ {
+ xid_base = *((TransactionId *) rec_data);
+ rec_data += sizeof(TransactionId);
+ multi_base = *((TransactionId *) rec_data);
+ rec_data += sizeof(TransactionId);
+ }
+
+ xlrec = (xl_heap_insert *) rec_data;
XLogRecGetBlockTag(record, 0, &target_locator, NULL, &blkno);
ItemPointerSetBlockNumber(&target_tid, blkno);
@@ -9276,11 +9548,28 @@ heap_xlog_insert(XLogReaderState *record)
* If we inserted the first and only tuple on the page, re-initialize the
* page from scratch.
*/
- if (XLogRecGetInfo(record) & XLOG_HEAP_INIT_PAGE)
+ if (isinit)
{
buffer = XLogInitBufferForRedo(record, 0);
page = BufferGetPage(buffer);
- PageInit(page, BufferGetPageSize(buffer), 0);
+
+ if (xlrec->flags & XLH_INSERT_ON_TOAST_RELATION)
+ {
+ PageInit(page, BufferGetPageSize(buffer),
+ sizeof(ToastPageSpecialData));
+ ToastPageGetSpecial(page)->pd_xid_base = xid_base;
+ }
+ else
+ {
+ HeapPageSpecial special;
+
+ PageInit(page, BufferGetPageSize(buffer),
+ sizeof(HeapPageSpecialData));
+ special = HeapPageGetSpecial(page);
+ special->pd_xid_base = xid_base;
+ special->pd_multi_base = multi_base;
+ }
+
action = BLK_NEEDS_REDO;
}
else
@@ -9289,6 +9578,7 @@ heap_xlog_insert(XLogReaderState *record)
{
Size datalen;
char *data;
+ HeapTupleData tuple;
page = BufferGetPage(buffer);
@@ -9312,7 +9602,9 @@ heap_xlog_insert(XLogReaderState *record)
htup->t_infomask2 = xlhdr.t_infomask2;
htup->t_infomask = xlhdr.t_infomask;
htup->t_hoff = xlhdr.t_hoff;
- HeapTupleHeaderSetXmin(htup, XLogRecGetXid(record));
+ tuple.t_data = htup;
+ HeapTupleAndHeaderSetXmin(page, &tuple, XLogRecGetXid(record),
+ (xlrec->flags & XLH_INSERT_ON_TOAST_RELATION) != 0);
HeapTupleHeaderSetCmin(htup, FirstCommandId);
htup->t_ctid = target_tid;
@@ -9372,12 +9664,22 @@ heap_xlog_multi_insert(XLogReaderState *record)
int i;
bool isinit = (XLogRecGetInfo(record) & XLOG_HEAP_INIT_PAGE) != 0;
XLogRedoAction action;
+ TransactionId xid_base = InvalidTransactionId,
+ multi_base = InvalidTransactionId;
+ Pointer rec_data = (Pointer) XLogRecGetData(record);
/*
* Insertion doesn't overwrite MVCC data, so no conflict processing is
* required.
*/
- xlrec = (xl_heap_multi_insert *) XLogRecGetData(record);
+ if (isinit)
+ {
+ xid_base = *((TransactionId *) rec_data);
+ rec_data += sizeof(TransactionId);
+ multi_base = *((TransactionId *) rec_data);
+ rec_data += sizeof(TransactionId);
+ }
+ xlrec = (xl_heap_multi_insert *) rec_data;
XLogRecGetBlockTag(record, 0, &rlocator, NULL, &blkno);
@@ -9404,7 +9706,22 @@ heap_xlog_multi_insert(XLogReaderState *record)
{
buffer = XLogInitBufferForRedo(record, 0);
page = BufferGetPage(buffer);
- PageInit(page, BufferGetPageSize(buffer), 0);
+
+ if ((xlrec->flags & XLH_INSERT_ON_TOAST_RELATION) != 0)
+ {
+ PageInit(page, BufferGetPageSize(buffer), sizeof(ToastPageSpecialData));
+ ToastPageGetSpecial(page)->pd_xid_base = xid_base;
+ }
+ else
+ {
+ HeapPageSpecial special;
+
+ PageInit(page, BufferGetPageSize(buffer), sizeof(HeapPageSpecialData));
+ special = HeapPageGetSpecial(page);
+ special->pd_xid_base = xid_base;
+ special->pd_multi_base = multi_base;
+ }
+
action = BLK_NEEDS_REDO;
}
else
@@ -9425,6 +9742,7 @@ heap_xlog_multi_insert(XLogReaderState *record)
{
OffsetNumber offnum;
xl_multi_insert_tuple *xlhdr;
+ HeapTupleData tuple;
/*
* If we're reinitializing the page, the tuples are stored in
@@ -9455,7 +9773,9 @@ heap_xlog_multi_insert(XLogReaderState *record)
htup->t_infomask2 = xlhdr->t_infomask2;
htup->t_infomask = xlhdr->t_infomask;
htup->t_hoff = xlhdr->t_hoff;
- HeapTupleHeaderSetXmin(htup, XLogRecGetXid(record));
+ tuple.t_data = htup;
+ HeapTupleAndHeaderSetXmin(page, &tuple, XLogRecGetXid(record),
+ false);
HeapTupleHeaderSetCmin(htup, FirstCommandId);
ItemPointerSetBlockNumber(&htup->t_ctid, blkno);
ItemPointerSetOffsetNumber(&htup->t_ctid, offnum);
@@ -9503,8 +9823,8 @@ static void
heap_xlog_update(XLogReaderState *record, bool hot_update)
{
XLogRecPtr lsn = record->EndRecPtr;
- xl_heap_update *xlrec = (xl_heap_update *) XLogRecGetData(record);
RelFileLocator rlocator;
+ xl_heap_update *xlrec;
BlockNumber oldblk;
BlockNumber newblk;
ItemPointerData newtid;
@@ -9528,6 +9848,20 @@ heap_xlog_update(XLogReaderState *record, bool hot_update)
Size freespace = 0;
XLogRedoAction oldaction;
XLogRedoAction newaction;
+ bool isinit = (XLogRecGetInfo(record) & XLOG_HEAP_INIT_PAGE) != 0;
+ Pointer rec_data = (Pointer) XLogRecGetData(record);
+ TransactionId xid_base = InvalidTransactionId,
+ multi_base = InvalidTransactionId;
+
+ if (isinit)
+ {
+ xid_base = *((TransactionId *) rec_data);
+ rec_data += sizeof(TransactionId);
+ multi_base = *((TransactionId *) rec_data);
+ rec_data += sizeof(TransactionId);
+ }
+
+ xlrec = (xl_heap_update *) rec_data;
/* initialize to keep the compiler quiet */
oldtup.t_data = NULL;
@@ -9574,6 +9908,8 @@ heap_xlog_update(XLogReaderState *record, bool hot_update)
&obuffer);
if (oldaction == BLK_NEEDS_REDO)
{
+ HeapTupleData tuple;
+
page = BufferGetPage(obuffer);
offnum = xlrec->old_offnum;
if (PageGetMaxOffsetNumber(page) >= offnum)
@@ -9586,6 +9922,8 @@ heap_xlog_update(XLogReaderState *record, bool hot_update)
oldtup.t_data = htup;
oldtup.t_len = ItemIdGetLength(lp);
+ /* Toast tuples are never updated. */
+ HeapTupleCopyXidsFromPage(obuffer, &oldtup, page, false);
htup->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
htup->t_infomask2 &= ~HEAP_KEYS_UPDATED;
@@ -9595,13 +9933,15 @@ heap_xlog_update(XLogReaderState *record, bool hot_update)
HeapTupleHeaderClearHotUpdated(htup);
fix_infomask_from_infobits(xlrec->old_infobits_set, &htup->t_infomask,
&htup->t_infomask2);
- HeapTupleHeaderSetXmax(htup, xlrec->old_xmax);
+ tuple.t_data = htup;
+ HeapTupleAndHeaderSetXmax(page, &tuple, xlrec->old_xmax, false);
HeapTupleHeaderSetCmax(htup, FirstCommandId, false);
/* Set forward chain link in t_ctid */
htup->t_ctid = newtid;
/* Mark the page as a candidate for pruning */
- PageSetPrunable(page, XLogRecGetXid(record));
+ /* Toast tuples are never updated. */
+ PageSetPrunable(page, XLogRecGetXid(record), false);
if (xlrec->flags & XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED)
PageClearAllVisible(page);
@@ -9618,11 +9958,18 @@ heap_xlog_update(XLogReaderState *record, bool hot_update)
nbuffer = obuffer;
newaction = oldaction;
}
- else if (XLogRecGetInfo(record) & XLOG_HEAP_INIT_PAGE)
+ else if (isinit)
{
+ HeapPageSpecial special;
+
nbuffer = XLogInitBufferForRedo(record, 0);
page = (Page) BufferGetPage(nbuffer);
- PageInit(page, BufferGetPageSize(nbuffer), 0);
+
+ /* Toast tuples are never updated. */
+ PageInit(page, BufferGetPageSize(nbuffer), sizeof(HeapPageSpecialData));
+ special = HeapPageGetSpecial(page);
+ special->pd_xid_base = xid_base;
+ special->pd_multi_base = multi_base;
newaction = BLK_NEEDS_REDO;
}
else
@@ -9650,6 +9997,7 @@ heap_xlog_update(XLogReaderState *record, bool hot_update)
char *recdata_end;
Size datalen;
Size tuplen;
+ HeapTupleData tuple;
recdata = XLogRecGetBlockData(record, 0, &datalen);
recdata_end = recdata + datalen;
@@ -9728,9 +10076,10 @@ heap_xlog_update(XLogReaderState *record, bool hot_update)
htup->t_infomask = xlhdr.t_infomask;
htup->t_hoff = xlhdr.t_hoff;
- HeapTupleHeaderSetXmin(htup, XLogRecGetXid(record));
+ tuple.t_data = htup;
+ HeapTupleAndHeaderSetXmin(page, &tuple, XLogRecGetXid(record), false);
HeapTupleHeaderSetCmin(htup, FirstCommandId);
- HeapTupleHeaderSetXmax(htup, xlrec->new_xmax);
+ HeapTupleAndHeaderSetXmax(page, &tuple, xlrec->new_xmax, false);
/* Make sure there is no forward chain link in t_ctid */
htup->t_ctid = newtid;
@@ -9841,6 +10190,8 @@ heap_xlog_lock(XLogReaderState *record)
if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
{
+ HeapTupleData tuple;
+
page = (Page) BufferGetPage(buffer);
offnum = xlrec->offnum;
@@ -9869,7 +10220,9 @@ heap_xlog_lock(XLogReaderState *record)
BufferGetBlockNumber(buffer),
offnum);
}
- HeapTupleHeaderSetXmax(htup, xlrec->xmax);
+
+ tuple.t_data = htup;
+ HeapTupleAndHeaderSetXmax(page, &tuple, xlrec->xmax, false);
HeapTupleHeaderSetCmax(htup, FirstCommandId, false);
PageSetLSN(page, lsn);
MarkBufferDirty(buffer);
@@ -9914,6 +10267,8 @@ heap_xlog_lock_updated(XLogReaderState *record)
if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
{
+ HeapTupleData tuple;
+
page = BufferGetPage(buffer);
offnum = xlrec->offnum;
@@ -9929,7 +10284,8 @@ heap_xlog_lock_updated(XLogReaderState *record)
htup->t_infomask2 &= ~HEAP_KEYS_UPDATED;
fix_infomask_from_infobits(xlrec->infobits_set, &htup->t_infomask,
&htup->t_infomask2);
- HeapTupleHeaderSetXmax(htup, xlrec->xmax);
+ tuple.t_data = htup;
+ HeapTupleAndHeaderSetXmax(page, &tuple, xlrec->xmax, false);
PageSetLSN(page, lsn);
MarkBufferDirty(buffer);
@@ -10077,6 +10433,10 @@ heap_mask(char *pagedata, BlockNumber blkno)
mask_page_lsn_and_checksum(page);
mask_page_hint_bits(page);
+
+ /* Ignore prune_xid (it's like a hint-bit) */
+ HeapPageSetPruneXid(page, InvalidTransactionId, false);
+
mask_unused_space(page);
for (off = 1; off <= PageGetMaxOffsetNumber(page); off++)
@@ -10192,14 +10552,14 @@ HeapCheckForSerializableConflictOut(bool visible, Relation relation,
case HEAPTUPLE_LIVE:
if (visible)
return;
- xid = HeapTupleHeaderGetXmin(tuple->t_data);
+ xid = HeapTupleGetXmin(tuple);
break;
case HEAPTUPLE_RECENTLY_DEAD:
case HEAPTUPLE_DELETE_IN_PROGRESS:
if (visible)
- xid = HeapTupleHeaderGetUpdateXid(tuple->t_data);
+ xid = HeapTupleGetUpdateXidAny(tuple);
else
- xid = HeapTupleHeaderGetXmin(tuple->t_data);
+ xid = HeapTupleGetXmin(tuple);
if (TransactionIdPrecedes(xid, TransactionXmin))
{
@@ -10209,7 +10569,7 @@ HeapCheckForSerializableConflictOut(bool visible, Relation relation,
}
break;
case HEAPTUPLE_INSERT_IN_PROGRESS:
- xid = HeapTupleHeaderGetXmin(tuple->t_data);
+ xid = HeapTupleGetXmin(tuple);
break;
case HEAPTUPLE_DEAD:
Assert(!visible);
@@ -10247,3 +10607,567 @@ HeapCheckForSerializableConflictOut(bool visible, Relation relation,
CheckForSerializableConflictOut(relation, xid, snapshot);
}
+
+static void
+xid_min_max(ShortTransactionId *min, ShortTransactionId *max,
+ ShortTransactionId xid,
+ bool *found)
+{
+ Assert(TransactionIdIsNormal(xid));
+ Assert(xid <= MaxShortTransactionId);
+
+ if (!*found)
+ {
+ *min = *max = xid;
+ *found = true;
+ }
+ else
+ {
+ *min = Min(*min, xid);
+ *max = Max(*max, xid);
+ }
+}
+
+/*
+ * Find minimum and maximum short transaction ids which occurs in the page.
+ *
+ * Works for multi and non multi transaction. Which is defined by "multi"
+ * argument.
+ */
+static bool
+heap_page_xid_min_max(Page page, bool multi,
+ ShortTransactionId *min, ShortTransactionId *max,
+ bool is_toast)
+{
+ bool found;
+ OffsetNumber offnum,
+ maxoff;
+ ItemId itemid;
+ HeapTupleHeader htup;
+
+ maxoff = PageGetMaxOffsetNumber(page);
+ found = false;
+
+ Assert(!multi || !is_toast);
+
+ for (offnum = FirstOffsetNumber;
+ offnum <= maxoff;
+ offnum = OffsetNumberNext(offnum))
+ {
+ itemid = PageGetItemId(page, offnum);
+
+ if (!ItemIdIsNormal(itemid))
+ continue;
+
+ htup = (HeapTupleHeader) PageGetItem(page, itemid);
+
+ if (!multi)
+ {
+ /*
+ * For non multi transactions we should see inside the tuple for
+ * update transaction.
+ */
+ Assert(!is_toast || !(htup->t_infomask & HEAP_XMAX_IS_MULTI));
+
+ if (TransactionIdIsNormal(htup->t_choice.t_heap.t_xmin) &&
+ !HeapTupleHeaderXminFrozen(htup))
+ {
+ xid_min_max(min, max, htup->t_choice.t_heap.t_xmin, &found);
+ }
+
+ if ((htup->t_infomask & HEAP_XMAX_IS_MULTI) &&
+ (!(htup->t_infomask & HEAP_XMAX_LOCK_ONLY)))
+ {
+ TransactionId update_xid;
+ ShortTransactionId xid;
+
+ Assert(!is_toast);
+ update_xid = MultiXactIdGetUpdateXid(HeapTupleHeaderGetRawXmax(page, htup),
+ htup->t_infomask);
+ xid = NormalTransactionIdToShort(HeapPageGetSpecial(page)->pd_xid_base,
+ update_xid);
+
+ xid_min_max(min, max, xid, &found);
+ }
+ }
+
+ if (!TransactionIdIsNormal(htup->t_choice.t_heap.t_xmax))
+ continue;
+
+ if (multi != ((htup->t_infomask & HEAP_XMAX_IS_MULTI) != 0))
+ continue;
+
+ xid_min_max(min, max, htup->t_choice.t_heap.t_xmax, &found);
+ }
+
+ Assert(!found || (*min > InvalidTransactionId && *max <= MaxShortTransactionId));
+
+ return found;
+}
+
+/*
+ * Shift xid base in the page. WAL-logged if buffer is specified.
+ */
+static void
+heap_page_shift_base(Relation relation, Buffer buffer, Page page,
+ bool multi, int64 delta, bool is_toast)
+{
+ TransactionId *xid_base,
+ *multi_base;
+ OffsetNumber offnum,
+ maxoff;
+ ItemId itemid;
+ HeapTupleHeader htup;
+
+ Assert(IsBufferLockedExclusive(buffer));
+
+ START_CRIT_SECTION();
+
+ if (is_toast)
+ {
+ Assert(!multi);
+ xid_base = &ToastPageGetSpecial(page)->pd_xid_base;
+ multi_base = NULL;
+ }
+ else
+ {
+ HeapPageSpecial special = HeapPageGetSpecial(page);
+
+ xid_base = &special->pd_xid_base;
+ multi_base = &special->pd_multi_base;
+ }
+
+ /* Iterate over page items */
+ maxoff = PageGetMaxOffsetNumber(page);
+ for (offnum = FirstOffsetNumber;
+ offnum <= maxoff;
+ offnum = OffsetNumberNext(offnum))
+ {
+ itemid = PageGetItemId(page, offnum);
+
+ if (!ItemIdIsNormal(itemid))
+ continue;
+
+ htup = (HeapTupleHeader) PageGetItem(page, itemid);
+
+ /* Apply xid shift to heap tuple */
+ if (!multi)
+ {
+ /* shift xmin */
+ if (TransactionIdIsNormal(htup->t_choice.t_heap.t_xmin) &&
+ !HeapTupleHeaderXminFrozen(htup))
+ {
+ Assert(htup->t_choice.t_heap.t_xmin - delta >= FirstNormalTransactionId);
+ Assert(htup->t_choice.t_heap.t_xmin - delta <= MaxShortTransactionId);
+ htup->t_choice.t_heap.t_xmin -= delta;
+ }
+ }
+
+ /* shift xmax */
+ if (!TransactionIdIsNormal(htup->t_choice.t_heap.t_xmax))
+ continue;
+
+ if (multi != (bool) (htup->t_infomask & HEAP_XMAX_IS_MULTI))
+ continue;
+
+ Assert(htup->t_choice.t_heap.t_xmax - delta >= FirstNormalTransactionId);
+ Assert(htup->t_choice.t_heap.t_xmax - delta <= MaxShortTransactionId);
+ htup->t_choice.t_heap.t_xmax -= delta;
+ }
+
+ /* Apply xid shift to base as well */
+ if (!multi)
+ *xid_base += delta;
+ else
+ *multi_base += delta;
+
+ if (BufferIsValid(buffer))
+ MarkBufferDirty(buffer);
+
+ /* Write WAL record if needed */
+ if (relation && RelationNeedsWAL(relation) && maxoff != 0)
+ {
+ XLogRecPtr recptr;
+ xl_heap_base_shift xlrec;
+
+ xlrec.delta = delta;
+ xlrec.multi = multi;
+ xlrec.flags = 0;
+ if (IsToastRelation(relation))
+ xlrec.flags |= XLH_BASE_SHIFT_ON_TOAST_RELATION;
+
+ XLogBeginInsert();
+ XLogRegisterData((char *) &xlrec, SizeOfHeapBaseShift);
+ XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
+
+ recptr = XLogInsert(RM_HEAP3_ID, XLOG_HEAP3_BASE_SHIFT);
+
+ PageSetLSN(page, recptr);
+ }
+
+ END_CRIT_SECTION();
+}
+
+/*
+ * Freeze xids in the single heap page. Useful when we can't fit new xid even
+ * with base shift.
+ */
+static void
+freeze_single_heap_page(Relation relation, Buffer buffer)
+{
+ Page page = BufferGetPage(buffer);
+ OffsetNumber offnum,
+ maxoff;
+ HeapTupleData tuple;
+ int nfrozen = 0;
+ HeapTupleFreeze *frozen;
+ TransactionId FreezeXid;
+ GlobalVisState *vistest;
+ ItemId itemid;
+ bool totally_frozen;
+ VacuumParams params = {0};
+ struct VacuumCutoffs cutoffs = {0};
+ HeapPageFreeze pagefrz;
+ PruneResult presult;
+
+ vacuum_get_cutoffs(relation, ¶ms, &cutoffs);
+ FreezeXid = cutoffs.relfrozenxid; /* ??? cutoffs.FreezeLimit; */
+ pagefrz.freeze_required = true;
+ pagefrz.FreezePageRelfrozenXid = cutoffs.FreezeLimit;
+ pagefrz.FreezePageRelminMxid = cutoffs.MultiXactCutoff;
+ pagefrz.NoFreezePageRelfrozenXid = cutoffs.FreezeLimit;
+ pagefrz.NoFreezePageRelminMxid = cutoffs.MultiXactCutoff;
+
+ vistest = GlobalVisTestFor(relation);
+
+ heap_page_prune(relation, buffer, vistest, &presult, &offnum, false);
+ if (presult.ndeleted > presult.nnewlpdead)
+ pgstat_update_heap_dead_tuples(relation,
+ presult.ndeleted - presult.nnewlpdead);
+
+ /*
+ * Now scan the page to collect vacuumable items and check for tuples
+ * requiring freezing.
+ */
+ maxoff = PageGetMaxOffsetNumber(page);
+ frozen = palloc(sizeof(HeapTupleFreeze) * MaxHeapTuplesPerPage);
+
+ /*
+ * Note: If you change anything in the loop below, also look at
+ * heap_page_is_all_visible to see if that needs to be changed.
+ */
+ for (offnum = FirstOffsetNumber;
+ offnum <= maxoff;
+ offnum = OffsetNumberNext(offnum))
+ {
+ itemid = PageGetItemId(page, offnum);
+
+ if (!ItemIdIsNormal(itemid))
+ continue;
+
+ tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
+ tuple.t_len = ItemIdGetLength(itemid);
+ tuple.t_tableOid = RelationGetRelid(relation);
+ HeapTupleCopyXidsFromPage(buffer, &tuple, page,
+ IsToastRelation(relation));
+
+ /*
+ * Each non-removable tuple must be checked to see if it needs
+ * freezing. Note we already have exclusive buffer lock.
+ */
+ if (heap_prepare_freeze_tuple(&tuple, &cutoffs, &pagefrz,
+ &frozen[nfrozen], &totally_frozen))
+ frozen[nfrozen++].offset = offnum;
+ }
+
+ /*
+ * If we froze any tuples, mark the buffer dirty, and write a WAL record
+ * recording the changes. We must log the changes to be crash-safe
+ * against future truncation of CLOG.
+ */
+ if (nfrozen > 0)
+ heap_freeze_execute_prepared(relation, buffer, FreezeXid, frozen,
+ nfrozen);
+
+ pfree(frozen);
+}
+
+/*
+ * Check if xid still fits on a page with given base and delta.
+ */
+static inline bool
+is_delta_fits_heap_page(TransactionId xid, TransactionId base, int64 delta)
+{
+ return xid >= base + delta + FirstNormalTransactionId &&
+ xid <= base + delta + MaxShortTransactionId;
+}
+
+/*
+ * Check if xid fits on a page with given base.
+ */
+static inline bool
+is_xid_fits_heap_page(TransactionId xid, TransactionId base)
+{
+ return xid >= base + FirstNormalTransactionId &&
+ xid <= base + MaxShortTransactionId;
+}
+
+/*
+ * Check if delta fits on a page.
+ *
+ * If delta does not fits, never return.
+ */
+static void
+heap_page_check_delta(Buffer buffer,
+ TransactionId xid, TransactionId base,
+ ShortTransactionId min, ShortTransactionId max,
+ int64 delta, int64 *freeDelta, int64 *requiredDelta)
+{
+ BufferDesc *buf;
+ char *path;
+ BackendId backend;
+
+ Assert((freeDelta == NULL) == (requiredDelta == NULL));
+
+ /*
+ * If delta fits the page, we good to go ...
+ */
+ if (is_delta_fits_heap_page(xid, base, delta))
+ return;
+
+ /*
+ * ... otherwise handle the error.
+ */
+ if (buffer == InvalidBuffer)
+ return;
+
+ if (BufferIsLocal(buffer))
+ {
+ buf = GetLocalBufferDescriptor(-buffer - 1);
+ backend = MyBackendId;
+ }
+ else
+ {
+ buf = GetBufferDescriptor(buffer - 1);
+ backend = InvalidBackendId;
+ }
+
+ path = relpathbackend(BufTagGetRelFileLocator(&buf->tag), backend,
+ buf->tag.forkNum);
+
+ if (freeDelta == NULL)
+ elog(FATAL, "Fatal xid base calculation error: xid = %llu, base = %llu, min = %u, max = %u, delta = %lld (rel=%s, blockNum=%u)",
+ (unsigned long long) xid, (unsigned long long) base,
+ min, max,
+ (long long) delta,
+ path, buf->tag.blockNum);
+
+ elog(FATAL, "Fatal xid base calculation error: xid = %llu, base = %llu, min = %u, max = %u, freeDelta = %lld, requiredDelta = %lld, delta = %lld (rel=%s, blockNum=%u)",
+ (unsigned long long) xid, (unsigned long long) base,
+ min, max,
+ (long long) *freeDelta, (long long) *requiredDelta,
+ (long long) delta,
+ path, buf->tag.blockNum);
+}
+
+/*
+ * Shift page base.
+ */
+static void
+heap_page_apply_delta(Relation relation, Buffer buffer, Page page,
+ TransactionId xid, bool multi,
+ TransactionId base, int64 delta, bool is_toast)
+{
+ Assert(is_delta_fits_heap_page(xid, base, delta));
+
+ heap_page_shift_base(relation, buffer, page, multi, delta, is_toast);
+
+#ifdef USE_ASSERT_CHECKING
+ if (is_toast)
+ {
+ Assert(!multi);
+ base = ToastPageGetSpecial(page)->pd_xid_base;
+ }
+ else
+ base = multi ? HeapPageGetSpecial(page)->pd_multi_base :
+ HeapPageGetSpecial(page)->pd_xid_base;
+
+ Assert(is_xid_fits_heap_page(xid, base));
+#endif /* USE_ASSERT_CHECKING */
+}
+
+/*
+ * Try to fit xid on a page.
+ */
+static int
+heap_page_try_prepare_for_xid(Relation relation, Buffer buffer, Page page,
+ TransactionId xid, bool multi, bool is_toast)
+{
+ TransactionId base;
+ ShortTransactionId min = InvalidTransactionId,
+ max = InvalidTransactionId;
+ int64 delta,
+ freeDelta,
+ requiredDelta;
+
+ if (is_toast)
+ {
+ Assert(!multi);
+ base = ToastPageGetSpecial(page)->pd_xid_base;
+ }
+ else
+ base = multi ? HeapPageGetSpecial(page)->pd_multi_base :
+ HeapPageGetSpecial(page)->pd_xid_base;
+
+ /* If xid fits the page no action needed. */
+ if (is_xid_fits_heap_page(xid, base))
+ return 0;
+
+ /* No items on the page? */
+ if (!heap_page_xid_min_max(page, multi, &min, &max, is_toast))
+ {
+ delta = (int64) (xid - FirstNormalTransactionId) - (int64) base;
+ heap_page_check_delta(buffer, xid, base, min, max, delta, NULL, NULL);
+ heap_page_apply_delta(relation, buffer, page, xid, multi, base, delta,
+ is_toast);
+ return 0;
+ }
+
+ /* Can we just shift base on the page? */
+ if (xid < base + FirstNormalTransactionId)
+ {
+ freeDelta = MaxShortTransactionId - max;
+ requiredDelta = (base + FirstNormalTransactionId) - xid;
+ /* Shouldn't consider setting base less than 0 */
+ freeDelta = Min(freeDelta, base);
+
+ if (requiredDelta > freeDelta)
+ return -1;
+
+ delta = -(freeDelta + requiredDelta) / 2;
+ }
+ else
+ {
+ freeDelta = min - FirstNormalTransactionId;
+ requiredDelta = xid - (base + MaxShortTransactionId);
+
+ if (requiredDelta > freeDelta)
+ return -1;
+
+ delta = (freeDelta + requiredDelta) / 2;
+ }
+
+ heap_page_check_delta(buffer, xid, base, min, max,
+ delta, &freeDelta, &requiredDelta);
+ heap_page_apply_delta(relation, buffer, page, xid, multi, base,
+ delta, is_toast);
+
+ return 0;
+}
+
+static void
+heap_xlog_base_shift(XLogReaderState *record)
+{
+ XLogRecPtr lsn = record->EndRecPtr;
+ xl_heap_base_shift *xlrec = (xl_heap_base_shift *) XLogRecGetData(record);
+ Buffer buffer;
+ Page page;
+ BlockNumber blkno;
+ RelFileLocator target_node;
+
+ XLogRecGetBlockTag(record, 0, &target_node, NULL, &blkno);
+
+ if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
+ {
+ page = BufferGetPage(buffer);
+ heap_page_shift_base(NULL, InvalidBuffer, page, xlrec->multi,
+ xlrec->delta,
+ xlrec->flags & XLH_BASE_SHIFT_ON_TOAST_RELATION);
+ PageSetLSN(page, lsn);
+ MarkBufferDirty(buffer);
+ }
+
+ if (BufferIsValid(buffer))
+ UnlockReleaseBuffer(buffer);
+}
+
+/*
+ * Ensure that given xid fits base of given page.
+ */
+static bool
+heap_page_prepare_for_xid(Relation relation, Buffer buffer,
+ TransactionId xid, bool multi)
+{
+ Page page = BufferGetPage(buffer);
+ int res;
+
+ /* "Double xmax" page format doesn't require any preparation */
+ if (HeapPageIsDoubleXmax(page))
+ return false;
+
+ if (!TransactionIdIsNormal(xid))
+ return false;
+
+ res = heap_page_try_prepare_for_xid(relation, buffer, page, xid, multi,
+ IsToastRelation(relation));
+ if (res != -1)
+ return res == 1;
+
+ /* Have to try freeing the page... */
+ freeze_single_heap_page(relation, buffer);
+
+ res = heap_page_try_prepare_for_xid(relation, buffer, page, xid, multi,
+ IsToastRelation(relation));
+ if (res != -1)
+ return res == 1;
+
+ elog(ERROR, "could not fit xid into page");
+
+ return false;
+}
+
+/*
+ * Ensure that given xid fits base of given page.
+ */
+void
+rewrite_page_prepare_for_xid(Page page, HeapTuple tup, bool is_toast)
+{
+ TransactionId xid;
+ int res;
+
+ /* xmin */
+ xid = HeapTupleGetXmin(tup);
+ if (TransactionIdIsNormal(xid))
+ {
+ res = heap_page_try_prepare_for_xid(NULL, InvalidBuffer, page, xid,
+ false, is_toast);
+ if (res == -1)
+ elog(ERROR, "could not fit xid into page");
+ }
+
+ /* xmax */
+ xid = HeapTupleGetRawXmax(tup);
+ if (TransactionIdIsNormal(xid))
+ {
+ res = heap_page_try_prepare_for_xid(NULL, InvalidBuffer, page, xid,
+ tup->t_data->t_infomask & HEAP_XMAX_IS_MULTI,
+ is_toast);
+ if (res == -1)
+ elog(ERROR, "could not fit xid into page");
+ }
+}
+
+void
+heap3_redo(XLogReaderState *record)
+{
+ uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
+
+ switch (info & XLOG_HEAP_OPMASK)
+ {
+ case XLOG_HEAP3_BASE_SHIFT:
+ heap_xlog_base_shift(record);
+ break;
+ default:
+ elog(PANIC, "heap3_redo: unknown op code %u", info);
+ }
+}
diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c
index 0ed612e244..7d02aa52bb 100644
--- a/src/backend/access/heap/heapam_handler.c
+++ b/src/backend/access/heap/heapam_handler.c
@@ -424,7 +424,7 @@ tuple_lock_retry:
* changes in an existing tuple, except to invalid or
* frozen, and neither of those can match priorXmax.)
*/
- if (!TransactionIdEquals(HeapTupleHeaderGetXmin(tuple->t_data),
+ if (!TransactionIdEquals(HeapTupleGetXmin(tuple),
priorXmax))
{
ReleaseBuffer(buffer);
@@ -484,7 +484,7 @@ tuple_lock_retry:
* variable instead of doing HeapTupleHeaderGetXmin again.
*/
if (TransactionIdIsCurrentTransactionId(priorXmax) &&
- HeapTupleHeaderGetCmin(tuple->t_data) >= cid)
+ HeapTupleGetCmin(tuple) >= cid)
{
tmfd->xmax = priorXmax;
@@ -492,7 +492,7 @@ tuple_lock_retry:
* Cmin is the problematic value, so store that. See
* above.
*/
- tmfd->cmax = HeapTupleHeaderGetCmin(tuple->t_data);
+ tmfd->cmax = HeapTupleGetCmin(tuple);
ReleaseBuffer(buffer);
return TM_SelfModified;
}
@@ -518,7 +518,7 @@ tuple_lock_retry:
/*
* As above, if xmin isn't what we're expecting, do nothing.
*/
- if (!TransactionIdEquals(HeapTupleHeaderGetXmin(tuple->t_data),
+ if (!TransactionIdEquals(HeapTupleGetXmin(tuple),
priorXmax))
{
ReleaseBuffer(buffer);
@@ -549,7 +549,7 @@ tuple_lock_retry:
/* updated, so look at the updated row */
*tid = tuple->t_data->t_ctid;
/* updated row should have xmin matching this xmax */
- priorXmax = HeapTupleHeaderGetUpdateXid(tuple->t_data);
+ priorXmax = HeapTupleGetUpdateXidAny(tuple);
ReleaseBuffer(buffer);
/* loop back to fetch next in chain */
}
@@ -865,7 +865,7 @@ heapam_relation_copy_for_cluster(Relation OldHeap, Relation NewHeap,
* case we had better copy it.
*/
if (!is_system_catalog &&
- !TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmin(tuple->t_data)))
+ !TransactionIdIsCurrentTransactionId(HeapTupleGetXmin(tuple)))
elog(WARNING, "concurrent insert in progress within table \"%s\"",
RelationGetRelationName(OldHeap));
/* treat as live */
@@ -877,7 +877,7 @@ heapam_relation_copy_for_cluster(Relation OldHeap, Relation NewHeap,
* Similar situation to INSERT_IN_PROGRESS case.
*/
if (!is_system_catalog &&
- !TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetUpdateXid(tuple->t_data)))
+ !TransactionIdIsCurrentTransactionId(HeapTupleGetUpdateXidAny(tuple)))
elog(WARNING, "concurrent delete in progress within table \"%s\"",
RelationGetRelationName(OldHeap));
/* treat as recently dead */
@@ -1062,6 +1062,8 @@ heapam_scan_analyze_next_tuple(TableScanDesc scan, TransactionId OldestXmin,
targtuple->t_tableOid = RelationGetRelid(scan->rs_rd);
targtuple->t_data = (HeapTupleHeader) PageGetItem(targpage, itemid);
targtuple->t_len = ItemIdGetLength(itemid);
+ HeapTupleCopyXidsFromPage(hscan->rs_cbuf, targtuple, targpage,
+ IsToastRelation(scan->rs_rd));
switch (HeapTupleSatisfiesVacuum(targtuple, OldestXmin,
hscan->rs_cbuf))
@@ -1097,7 +1099,7 @@ heapam_scan_analyze_next_tuple(TableScanDesc scan, TransactionId OldestXmin,
* numbers we report to the cumulative stats system to make
* this come out right.)
*/
- if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmin(targtuple->t_data)))
+ if (TransactionIdIsCurrentTransactionId(HeapTupleGetXmin(targtuple)))
{
sample_it = true;
*liverows += 1;
@@ -1128,7 +1130,7 @@ heapam_scan_analyze_next_tuple(TableScanDesc scan, TransactionId OldestXmin,
* but not the post-image. We also get sane results if the
* concurrent transaction never commits.
*/
- if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetUpdateXid(targtuple->t_data)))
+ if (TransactionIdIsCurrentTransactionId(HeapTupleGetUpdateXidAny(targtuple)))
*deadrows += 1;
else
{
@@ -1377,7 +1379,8 @@ heapam_index_build_range_scan(Relation heapRelation,
Page page = BufferGetPage(hscan->rs_cbuf);
LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_SHARE);
- heap_get_root_tuples(page, root_offsets);
+ heap_get_root_tuples(heapRelation, hscan->rs_cbuf, page,
+ root_offsets);
LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_UNLOCK);
root_blkno = hscan->rs_cblock;
@@ -1470,7 +1473,7 @@ heapam_index_build_range_scan(Relation heapRelation,
* before commit there. Give a warning if neither case
* applies.
*/
- xwait = HeapTupleHeaderGetXmin(heapTuple->t_data);
+ xwait = HeapTupleGetXmin(heapTuple);
if (!TransactionIdIsCurrentTransactionId(xwait))
{
if (!is_system_catalog)
@@ -1529,7 +1532,7 @@ heapam_index_build_range_scan(Relation heapRelation,
break;
}
- xwait = HeapTupleHeaderGetUpdateXid(heapTuple->t_data);
+ xwait = HeapTupleGetUpdateXidAny(heapTuple);
if (!TransactionIdIsCurrentTransactionId(xwait))
{
if (!is_system_catalog)
@@ -1674,7 +1677,8 @@ heapam_index_build_range_scan(Relation heapRelation,
Page page = BufferGetPage(hscan->rs_cbuf);
LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_SHARE);
- heap_get_root_tuples(page, root_offsets);
+ heap_get_root_tuples(heapRelation, hscan->rs_cbuf, page,
+ root_offsets);
LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_UNLOCK);
}
@@ -1840,7 +1844,8 @@ heapam_index_validate_scan(Relation heapRelation,
Page page = BufferGetPage(hscan->rs_cbuf);
LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_SHARE);
- heap_get_root_tuples(page, root_offsets);
+ heap_get_root_tuples(heapRelation, hscan->rs_cbuf, page,
+ root_offsets);
LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_UNLOCK);
memset(in_index, 0, sizeof(in_index));
@@ -2180,7 +2185,12 @@ heapam_scan_bitmap_next_block(TableScanDesc scan,
ItemPointerSet(&tid, block, offnum);
if (heap_hot_search_buffer(&tid, scan->rs_rd, buffer, snapshot,
&heapTuple, NULL, true))
- hscan->rs_vistuples[ntup++] = ItemPointerGetOffsetNumber(&tid);
+ {
+ hscan->rs_vistuples[ntup] = ItemPointerGetOffsetNumber(&tid);
+ hscan->rs_xmin[ntup] = heapTuple.t_xmin;
+ hscan->rs_xmax[ntup] = heapTuple.t_xmax;
+ ++ntup;
+ }
}
}
else
@@ -2205,13 +2215,18 @@ heapam_scan_bitmap_next_block(TableScanDesc scan,
loctup.t_data = (HeapTupleHeader) PageGetItem(page, lp);
loctup.t_len = ItemIdGetLength(lp);
loctup.t_tableOid = scan->rs_rd->rd_id;
+ HeapTupleCopyXidsFromPage(hscan->rs_cbuf, &loctup, page,
+ IsToastRelation(scan->rs_rd));
ItemPointerSet(&loctup.t_self, block, offnum);
valid = HeapTupleSatisfiesVisibility(&loctup, snapshot, buffer);
if (valid)
{
- hscan->rs_vistuples[ntup++] = offnum;
+ hscan->rs_vistuples[ntup] = offnum;
+ hscan->rs_xmin[ntup] = loctup.t_xmin;
+ hscan->rs_xmax[ntup] = loctup.t_xmax;
+ ++ntup;
PredicateLockTID(scan->rs_rd, &loctup.t_self, snapshot,
- HeapTupleHeaderGetXmin(loctup.t_data));
+ HeapTupleGetXmin(&loctup));
}
HeapCheckForSerializableConflictOut(valid, scan->rs_rd, &loctup,
buffer, snapshot);
@@ -2250,6 +2265,8 @@ heapam_scan_bitmap_next_tuple(TableScanDesc scan,
hscan->rs_ctup.t_data = (HeapTupleHeader) PageGetItem(page, lp);
hscan->rs_ctup.t_len = ItemIdGetLength(lp);
hscan->rs_ctup.t_tableOid = scan->rs_rd->rd_id;
+ hscan->rs_ctup.t_xmin = hscan->rs_xmin[hscan->rs_cindex];
+ hscan->rs_ctup.t_xmax = hscan->rs_xmax[hscan->rs_cindex];
ItemPointerSet(&hscan->rs_ctup.t_self, hscan->rs_cblock, targoffset);
pgstat_count_heap_fetch(scan->rs_rd);
@@ -2390,8 +2407,17 @@ heapam_scan_sample_next_tuple(TableScanDesc scan, SampleScanState *scanstate,
tuple->t_data = (HeapTupleHeader) PageGetItem(page, itemid);
tuple->t_len = ItemIdGetLength(itemid);
- ItemPointerSet(&(tuple->t_self), blockno, tupoffset);
+ if (pagemode)
+ {
+ tuple->t_xmin = InvalidTransactionId;
+ tuple->t_xmax = InvalidTransactionId;
+ }
+ else
+ HeapTupleCopyXidsFromPage(hscan->rs_cbuf, tuple, page,
+ IsToastRelation(scan->rs_rd));
+
+ ItemPointerSet(&(tuple->t_self), blockno, tupoffset);
if (all_visible)
visible = true;
diff --git a/src/backend/access/heap/heapam_visibility.c b/src/backend/access/heap/heapam_visibility.c
index a716001341..26fa51e6eb 100644
--- a/src/backend/access/heap/heapam_visibility.c
+++ b/src/backend/access/heap/heapam_visibility.c
@@ -220,7 +220,7 @@ HeapTupleSatisfiesSelf(HeapTuple htup, Snapshot snapshot, Buffer buffer)
}
}
}
- else if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmin(tuple)))
+ else if (TransactionIdIsCurrentTransactionId(HeapTupleGetRawXmin(htup)))
{
if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid */
return true;
@@ -232,7 +232,7 @@ HeapTupleSatisfiesSelf(HeapTuple htup, Snapshot snapshot, Buffer buffer)
{
TransactionId xmax;
- xmax = HeapTupleGetUpdateXid(tuple);
+ xmax = HeapTupleGetUpdateXid(htup);
/* not LOCKED_ONLY, so it has to have an xmax */
Assert(TransactionIdIsValid(xmax));
@@ -244,7 +244,7 @@ HeapTupleSatisfiesSelf(HeapTuple htup, Snapshot snapshot, Buffer buffer)
return false;
}
- if (!TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmax(tuple)))
+ if (!TransactionIdIsCurrentTransactionId(HeapTupleGetRawXmax(htup)))
{
/* deleting subtransaction must have aborted */
SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
@@ -254,11 +254,11 @@ HeapTupleSatisfiesSelf(HeapTuple htup, Snapshot snapshot, Buffer buffer)
return false;
}
- else if (TransactionIdIsInProgress(HeapTupleHeaderGetRawXmin(tuple)))
+ else if (TransactionIdIsInProgress(HeapTupleGetRawXmin(htup)))
return false;
- else if (TransactionIdDidCommit(HeapTupleHeaderGetRawXmin(tuple)))
+ else if (TransactionIdDidCommit(HeapTupleGetRawXmin(htup)))
SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
- HeapTupleHeaderGetRawXmin(tuple));
+ HeapTupleGetRawXmin(htup));
else
{
/* it must have aborted or crashed */
@@ -287,7 +287,7 @@ HeapTupleSatisfiesSelf(HeapTuple htup, Snapshot snapshot, Buffer buffer)
if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask))
return true;
- xmax = HeapTupleGetUpdateXid(tuple);
+ xmax = HeapTupleGetUpdateXid(htup);
/* not LOCKED_ONLY, so it has to have an xmax */
Assert(TransactionIdIsValid(xmax));
@@ -302,17 +302,17 @@ HeapTupleSatisfiesSelf(HeapTuple htup, Snapshot snapshot, Buffer buffer)
return true;
}
- if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmax(tuple)))
+ if (TransactionIdIsCurrentTransactionId(HeapTupleGetRawXmax(htup)))
{
if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask))
return true;
return false;
}
- if (TransactionIdIsInProgress(HeapTupleHeaderGetRawXmax(tuple)))
+ if (TransactionIdIsInProgress(HeapTupleGetRawXmax(htup)))
return true;
- if (!TransactionIdDidCommit(HeapTupleHeaderGetRawXmax(tuple)))
+ if (!TransactionIdDidCommit(HeapTupleGetRawXmax(htup)))
{
/* it must have aborted or crashed */
SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
@@ -330,7 +330,7 @@ HeapTupleSatisfiesSelf(HeapTuple htup, Snapshot snapshot, Buffer buffer)
}
SetHintBits(tuple, buffer, HEAP_XMAX_COMMITTED,
- HeapTupleHeaderGetRawXmax(tuple));
+ HeapTupleGetRawXmax(htup));
return false;
}
@@ -419,7 +419,7 @@ HeapTupleSatisfiesToast(HeapTuple htup, Snapshot snapshot,
* is canceled by super-deleting the tuple. This also applies to
* TOAST tuples created during speculative insertion.
*/
- else if (!TransactionIdIsValid(HeapTupleHeaderGetXmin(tuple)))
+ else if (!TransactionIdIsValid(HeapTupleGetXmin(htup)))
return false;
}
@@ -509,9 +509,9 @@ HeapTupleSatisfiesUpdate(HeapTuple htup, CommandId curcid,
}
}
}
- else if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmin(tuple)))
+ else if (TransactionIdIsCurrentTransactionId(HeapTupleGetRawXmin(htup)))
{
- if (HeapTupleHeaderGetCmin(tuple) >= curcid)
+ if (HeapTupleGetCmin(htup) >= curcid)
return TM_Invisible; /* inserted after scan started */
if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid */
@@ -521,7 +521,7 @@ HeapTupleSatisfiesUpdate(HeapTuple htup, CommandId curcid,
{
TransactionId xmax;
- xmax = HeapTupleHeaderGetRawXmax(tuple);
+ xmax = HeapTupleGetRawXmax(htup);
/*
* Careful here: even though this tuple was created by our own
@@ -552,7 +552,7 @@ HeapTupleSatisfiesUpdate(HeapTuple htup, CommandId curcid,
{
TransactionId xmax;
- xmax = HeapTupleGetUpdateXid(tuple);
+ xmax = HeapTupleGetUpdateXid(htup);
/* not LOCKED_ONLY, so it has to have an xmax */
Assert(TransactionIdIsValid(xmax));
@@ -560,21 +560,21 @@ HeapTupleSatisfiesUpdate(HeapTuple htup, CommandId curcid,
/* deleting subtransaction must have aborted */
if (!TransactionIdIsCurrentTransactionId(xmax))
{
- if (MultiXactIdIsRunning(HeapTupleHeaderGetRawXmax(tuple),
+ if (MultiXactIdIsRunning(HeapTupleGetRawXmax(htup),
false))
return TM_BeingModified;
return TM_Ok;
}
else
{
- if (HeapTupleHeaderGetCmax(tuple) >= curcid)
+ if (HeapTupleGetCmax(htup) >= curcid)
return TM_SelfModified; /* updated after scan started */
else
return TM_Invisible; /* updated before scan started */
}
}
- if (!TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmax(tuple)))
+ if (!TransactionIdIsCurrentTransactionId(HeapTupleGetRawXmax(htup)))
{
/* deleting subtransaction must have aborted */
SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
@@ -582,16 +582,16 @@ HeapTupleSatisfiesUpdate(HeapTuple htup, CommandId curcid,
return TM_Ok;
}
- if (HeapTupleHeaderGetCmax(tuple) >= curcid)
+ if (HeapTupleGetCmax(htup) >= curcid)
return TM_SelfModified; /* updated after scan started */
else
return TM_Invisible; /* updated before scan started */
}
- else if (TransactionIdIsInProgress(HeapTupleHeaderGetRawXmin(tuple)))
+ else if (TransactionIdIsInProgress(HeapTupleGetRawXmin(htup)))
return TM_Invisible;
- else if (TransactionIdDidCommit(HeapTupleHeaderGetRawXmin(tuple)))
+ else if (TransactionIdDidCommit(HeapTupleGetRawXmin(htup)))
SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
- HeapTupleHeaderGetRawXmin(tuple));
+ HeapTupleGetRawXmin(htup));
else
{
/* it must have aborted or crashed */
@@ -625,17 +625,17 @@ HeapTupleSatisfiesUpdate(HeapTuple htup, CommandId curcid,
if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask))
{
- if (MultiXactIdIsRunning(HeapTupleHeaderGetRawXmax(tuple), true))
+ if (MultiXactIdIsRunning(HeapTupleGetRawXmax(htup), true))
return TM_BeingModified;
SetHintBits(tuple, buffer, HEAP_XMAX_INVALID, InvalidTransactionId);
return TM_Ok;
}
- xmax = HeapTupleGetUpdateXid(tuple);
+ xmax = HeapTupleGetUpdateXid(htup);
if (!TransactionIdIsValid(xmax))
{
- if (MultiXactIdIsRunning(HeapTupleHeaderGetRawXmax(tuple), false))
+ if (MultiXactIdIsRunning(HeapTupleGetRawXmax(htup), false))
return TM_BeingModified;
}
@@ -644,13 +644,13 @@ HeapTupleSatisfiesUpdate(HeapTuple htup, CommandId curcid,
if (TransactionIdIsCurrentTransactionId(xmax))
{
- if (HeapTupleHeaderGetCmax(tuple) >= curcid)
+ if (HeapTupleGetCmax(htup) >= curcid)
return TM_SelfModified; /* updated after scan started */
else
return TM_Invisible; /* updated before scan started */
}
- if (MultiXactIdIsRunning(HeapTupleHeaderGetRawXmax(tuple), false))
+ if (MultiXactIdIsRunning(HeapTupleGetRawXmax(htup), false))
return TM_BeingModified;
if (TransactionIdDidCommit(xmax))
@@ -666,7 +666,7 @@ HeapTupleSatisfiesUpdate(HeapTuple htup, CommandId curcid,
* what about the other members?
*/
- if (!MultiXactIdIsRunning(HeapTupleHeaderGetRawXmax(tuple), false))
+ if (!MultiXactIdIsRunning(HeapTupleGetRawXmax(htup), false))
{
/*
* There's no member, even just a locker, alive anymore, so we can
@@ -683,20 +683,20 @@ HeapTupleSatisfiesUpdate(HeapTuple htup, CommandId curcid,
}
}
- if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmax(tuple)))
+ if (TransactionIdIsCurrentTransactionId(HeapTupleGetRawXmax(htup)))
{
if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask))
return TM_BeingModified;
- if (HeapTupleHeaderGetCmax(tuple) >= curcid)
+ if (HeapTupleGetCmax(htup) >= curcid)
return TM_SelfModified; /* updated after scan started */
else
return TM_Invisible; /* updated before scan started */
}
- if (TransactionIdIsInProgress(HeapTupleHeaderGetRawXmax(tuple)))
+ if (TransactionIdIsInProgress(HeapTupleGetRawXmax(htup)))
return TM_BeingModified;
- if (!TransactionIdDidCommit(HeapTupleHeaderGetRawXmax(tuple)))
+ if (!TransactionIdDidCommit(HeapTupleGetRawXmax(htup)))
{
/* it must have aborted or crashed */
SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
@@ -714,7 +714,7 @@ HeapTupleSatisfiesUpdate(HeapTuple htup, CommandId curcid,
}
SetHintBits(tuple, buffer, HEAP_XMAX_COMMITTED,
- HeapTupleHeaderGetRawXmax(tuple));
+ HeapTupleGetRawXmax(htup));
if (!ItemPointerEquals(&htup->t_self, &tuple->t_ctid))
return TM_Updated; /* updated by other */
else
@@ -797,7 +797,7 @@ HeapTupleSatisfiesDirty(HeapTuple htup, Snapshot snapshot,
}
}
}
- else if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmin(tuple)))
+ else if (TransactionIdIsCurrentTransactionId(HeapTupleGetRawXmin(htup)))
{
if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid */
return true;
@@ -809,7 +809,7 @@ HeapTupleSatisfiesDirty(HeapTuple htup, Snapshot snapshot,
{
TransactionId xmax;
- xmax = HeapTupleGetUpdateXid(tuple);
+ xmax = HeapTupleGetUpdateXid(htup);
/* not LOCKED_ONLY, so it has to have an xmax */
Assert(TransactionIdIsValid(xmax));
@@ -821,7 +821,7 @@ HeapTupleSatisfiesDirty(HeapTuple htup, Snapshot snapshot,
return false;
}
- if (!TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmax(tuple)))
+ if (!TransactionIdIsCurrentTransactionId(HeapTupleGetRawXmax(htup)))
{
/* deleting subtransaction must have aborted */
SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
@@ -831,7 +831,7 @@ HeapTupleSatisfiesDirty(HeapTuple htup, Snapshot snapshot,
return false;
}
- else if (TransactionIdIsInProgress(HeapTupleHeaderGetRawXmin(tuple)))
+ else if (TransactionIdIsInProgress(HeapTupleGetRawXmin(htup)))
{
/*
* Return the speculative token to caller. Caller can worry about
@@ -847,13 +847,13 @@ HeapTupleSatisfiesDirty(HeapTuple htup, Snapshot snapshot,
Assert(snapshot->speculativeToken != 0);
}
- snapshot->xmin = HeapTupleHeaderGetRawXmin(tuple);
+ snapshot->xmin = HeapTupleGetRawXmin(htup);
/* XXX shouldn't we fall through to look at xmax? */
return true; /* in insertion by other */
}
- else if (TransactionIdDidCommit(HeapTupleHeaderGetRawXmin(tuple)))
+ else if (TransactionIdDidCommit(HeapTupleGetRawXmin(htup)))
SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
- HeapTupleHeaderGetRawXmin(tuple));
+ HeapTupleGetRawXmin(htup));
else
{
/* it must have aborted or crashed */
@@ -882,7 +882,7 @@ HeapTupleSatisfiesDirty(HeapTuple htup, Snapshot snapshot,
if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask))
return true;
- xmax = HeapTupleGetUpdateXid(tuple);
+ xmax = HeapTupleGetUpdateXid(htup);
/* not LOCKED_ONLY, so it has to have an xmax */
Assert(TransactionIdIsValid(xmax));
@@ -900,21 +900,21 @@ HeapTupleSatisfiesDirty(HeapTuple htup, Snapshot snapshot,
return true;
}
- if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmax(tuple)))
+ if (TransactionIdIsCurrentTransactionId(HeapTupleGetRawXmax(htup)))
{
if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask))
return true;
return false;
}
- if (TransactionIdIsInProgress(HeapTupleHeaderGetRawXmax(tuple)))
+ if (TransactionIdIsInProgress(HeapTupleGetRawXmax(htup)))
{
if (!HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask))
- snapshot->xmax = HeapTupleHeaderGetRawXmax(tuple);
+ snapshot->xmax = HeapTupleGetRawXmax(htup);
return true;
}
- if (!TransactionIdDidCommit(HeapTupleHeaderGetRawXmax(tuple)))
+ if (!TransactionIdDidCommit(HeapTupleGetRawXmax(htup)))
{
/* it must have aborted or crashed */
SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
@@ -932,7 +932,7 @@ HeapTupleSatisfiesDirty(HeapTuple htup, Snapshot snapshot,
}
SetHintBits(tuple, buffer, HEAP_XMAX_COMMITTED,
- HeapTupleHeaderGetRawXmax(tuple));
+ HeapTupleGetRawXmax(htup));
return false; /* updated by other */
}
@@ -1011,9 +1011,9 @@ HeapTupleSatisfiesMVCC(HeapTuple htup, Snapshot snapshot,
}
}
}
- else if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmin(tuple)))
+ else if (TransactionIdIsCurrentTransactionId(HeapTupleGetRawXmin(htup)))
{
- if (HeapTupleHeaderGetCmin(tuple) >= snapshot->curcid)
+ if (HeapTupleGetCmin(htup) >= snapshot->curcid)
return false; /* inserted after scan started */
if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid */
@@ -1026,7 +1026,7 @@ HeapTupleSatisfiesMVCC(HeapTuple htup, Snapshot snapshot,
{
TransactionId xmax;
- xmax = HeapTupleGetUpdateXid(tuple);
+ xmax = HeapTupleGetUpdateXid(htup);
/* not LOCKED_ONLY, so it has to have an xmax */
Assert(TransactionIdIsValid(xmax));
@@ -1034,13 +1034,13 @@ HeapTupleSatisfiesMVCC(HeapTuple htup, Snapshot snapshot,
/* updating subtransaction must have aborted */
if (!TransactionIdIsCurrentTransactionId(xmax))
return true;
- else if (HeapTupleHeaderGetCmax(tuple) >= snapshot->curcid)
+ else if (HeapTupleGetCmax(htup) >= snapshot->curcid)
return true; /* updated after scan started */
else
return false; /* updated before scan started */
}
- if (!TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmax(tuple)))
+ if (!TransactionIdIsCurrentTransactionId(HeapTupleGetRawXmax(htup)))
{
/* deleting subtransaction must have aborted */
SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
@@ -1048,16 +1048,16 @@ HeapTupleSatisfiesMVCC(HeapTuple htup, Snapshot snapshot,
return true;
}
- if (HeapTupleHeaderGetCmax(tuple) >= snapshot->curcid)
+ if (HeapTupleGetCmax(htup) >= snapshot->curcid)
return true; /* deleted after scan started */
else
return false; /* deleted before scan started */
}
- else if (XidInMVCCSnapshot(HeapTupleHeaderGetRawXmin(tuple), snapshot))
+ else if (XidInMVCCSnapshot(HeapTupleGetRawXmin(htup), snapshot))
return false;
- else if (TransactionIdDidCommit(HeapTupleHeaderGetRawXmin(tuple)))
+ else if (TransactionIdDidCommit(HeapTupleGetRawXmin(htup)))
SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
- HeapTupleHeaderGetRawXmin(tuple));
+ HeapTupleGetRawXmin(htup));
else
{
/* it must have aborted or crashed */
@@ -1070,7 +1070,7 @@ HeapTupleSatisfiesMVCC(HeapTuple htup, Snapshot snapshot,
{
/* xmin is committed, but maybe not according to our snapshot */
if (!HeapTupleHeaderXminFrozen(tuple) &&
- XidInMVCCSnapshot(HeapTupleHeaderGetRawXmin(tuple), snapshot))
+ XidInMVCCSnapshot(HeapTupleGetRawXmin(htup), snapshot))
return false; /* treat as still in progress */
}
@@ -1089,14 +1089,14 @@ HeapTupleSatisfiesMVCC(HeapTuple htup, Snapshot snapshot,
/* already checked above */
Assert(!HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask));
- xmax = HeapTupleGetUpdateXid(tuple);
+ xmax = HeapTupleGetUpdateXid(htup);
/* not LOCKED_ONLY, so it has to have an xmax */
Assert(TransactionIdIsValid(xmax));
if (TransactionIdIsCurrentTransactionId(xmax))
{
- if (HeapTupleHeaderGetCmax(tuple) >= snapshot->curcid)
+ if (HeapTupleGetCmax(htup) >= snapshot->curcid)
return true; /* deleted after scan started */
else
return false; /* deleted before scan started */
@@ -1111,18 +1111,18 @@ HeapTupleSatisfiesMVCC(HeapTuple htup, Snapshot snapshot,
if (!(tuple->t_infomask & HEAP_XMAX_COMMITTED))
{
- if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmax(tuple)))
+ if (TransactionIdIsCurrentTransactionId(HeapTupleGetRawXmax(htup)))
{
- if (HeapTupleHeaderGetCmax(tuple) >= snapshot->curcid)
+ if (HeapTupleGetCmax(htup) >= snapshot->curcid)
return true; /* deleted after scan started */
else
return false; /* deleted before scan started */
}
- if (XidInMVCCSnapshot(HeapTupleHeaderGetRawXmax(tuple), snapshot))
+ if (XidInMVCCSnapshot(HeapTupleGetRawXmax(htup), snapshot))
return true;
- if (!TransactionIdDidCommit(HeapTupleHeaderGetRawXmax(tuple)))
+ if (!TransactionIdDidCommit(HeapTupleGetRawXmax(htup)))
{
/* it must have aborted or crashed */
SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
@@ -1132,12 +1132,12 @@ HeapTupleSatisfiesMVCC(HeapTuple htup, Snapshot snapshot,
/* xmax transaction committed */
SetHintBits(tuple, buffer, HEAP_XMAX_COMMITTED,
- HeapTupleHeaderGetRawXmax(tuple));
+ HeapTupleGetRawXmax(htup));
}
else
{
/* xmax is committed, but maybe not according to our snapshot */
- if (XidInMVCCSnapshot(HeapTupleHeaderGetRawXmax(tuple), snapshot))
+ if (XidInMVCCSnapshot(HeapTupleGetRawXmax(htup), snapshot))
return true; /* treat as still in progress */
}
@@ -1252,21 +1252,21 @@ HeapTupleSatisfiesVacuumHorizon(HeapTuple htup, Buffer buffer, TransactionId *de
return HEAPTUPLE_DEAD;
}
}
- else if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmin(tuple)))
+ else if (TransactionIdIsCurrentTransactionId(HeapTupleGetRawXmin(htup)))
{
if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid */
return HEAPTUPLE_INSERT_IN_PROGRESS;
/* only locked? run infomask-only check first, for performance */
if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask) ||
- HeapTupleHeaderIsOnlyLocked(tuple))
+ HeapTupleIsOnlyLocked(htup))
return HEAPTUPLE_INSERT_IN_PROGRESS;
/* inserted and then deleted by same xact */
- if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetUpdateXid(tuple)))
+ if (TransactionIdIsCurrentTransactionId(HeapTupleGetUpdateXidAny(htup)))
return HEAPTUPLE_DELETE_IN_PROGRESS;
/* deleting subtransaction must have aborted */
return HEAPTUPLE_INSERT_IN_PROGRESS;
}
- else if (TransactionIdIsInProgress(HeapTupleHeaderGetRawXmin(tuple)))
+ else if (TransactionIdIsInProgress(HeapTupleGetRawXmin(htup)))
{
/*
* It'd be possible to discern between INSERT/DELETE in progress
@@ -1278,9 +1278,9 @@ HeapTupleSatisfiesVacuumHorizon(HeapTuple htup, Buffer buffer, TransactionId *de
*/
return HEAPTUPLE_INSERT_IN_PROGRESS;
}
- else if (TransactionIdDidCommit(HeapTupleHeaderGetRawXmin(tuple)))
+ else if (TransactionIdDidCommit(HeapTupleGetRawXmin(htup)))
SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
- HeapTupleHeaderGetRawXmin(tuple));
+ HeapTupleGetRawXmin(htup));
else
{
/*
@@ -1322,14 +1322,14 @@ HeapTupleSatisfiesVacuumHorizon(HeapTuple htup, Buffer buffer, TransactionId *de
* possibly be running; otherwise have to check.
*/
if (!HEAP_LOCKED_UPGRADED(tuple->t_infomask) &&
- MultiXactIdIsRunning(HeapTupleHeaderGetRawXmax(tuple),
+ MultiXactIdIsRunning(HeapTupleGetRawXmax(htup),
true))
return HEAPTUPLE_LIVE;
SetHintBits(tuple, buffer, HEAP_XMAX_INVALID, InvalidTransactionId);
}
else
{
- if (TransactionIdIsInProgress(HeapTupleHeaderGetRawXmax(tuple)))
+ if (TransactionIdIsInProgress(HeapTupleGetRawXmax(htup)))
return HEAPTUPLE_LIVE;
SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
InvalidTransactionId);
@@ -1347,7 +1347,7 @@ HeapTupleSatisfiesVacuumHorizon(HeapTuple htup, Buffer buffer, TransactionId *de
if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
{
- TransactionId xmax = HeapTupleGetUpdateXid(tuple);
+ TransactionId xmax = HeapTupleGetUpdateXid(htup);
/* already checked above */
Assert(!HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask));
@@ -1370,7 +1370,7 @@ HeapTupleSatisfiesVacuumHorizon(HeapTuple htup, Buffer buffer, TransactionId *de
*dead_after = xmax;
return HEAPTUPLE_RECENTLY_DEAD;
}
- else if (!MultiXactIdIsRunning(HeapTupleHeaderGetRawXmax(tuple), false))
+ else if (!MultiXactIdIsRunning(HeapTupleGetRawXmax(htup), false))
{
/*
* Not in Progress, Not Committed, so either Aborted or crashed.
@@ -1384,11 +1384,11 @@ HeapTupleSatisfiesVacuumHorizon(HeapTuple htup, Buffer buffer, TransactionId *de
if (!(tuple->t_infomask & HEAP_XMAX_COMMITTED))
{
- if (TransactionIdIsInProgress(HeapTupleHeaderGetRawXmax(tuple)))
+ if (TransactionIdIsInProgress(HeapTupleGetRawXmax(htup)))
return HEAPTUPLE_DELETE_IN_PROGRESS;
- else if (TransactionIdDidCommit(HeapTupleHeaderGetRawXmax(tuple)))
+ else if (TransactionIdDidCommit(HeapTupleGetRawXmax(htup)))
SetHintBits(tuple, buffer, HEAP_XMAX_COMMITTED,
- HeapTupleHeaderGetRawXmax(tuple));
+ HeapTupleGetRawXmax(htup));
else
{
/*
@@ -1410,7 +1410,7 @@ HeapTupleSatisfiesVacuumHorizon(HeapTuple htup, Buffer buffer, TransactionId *de
* Deleter committed, allow caller to check if it was recent enough that
* some open transactions could still see the tuple.
*/
- *dead_after = HeapTupleHeaderGetRawXmax(tuple);
+ *dead_after = HeapTupleGetRawXmax(htup);
return HEAPTUPLE_RECENTLY_DEAD;
}
@@ -1506,7 +1506,7 @@ HeapTupleIsSurelyDead(HeapTuple htup, GlobalVisState *vistest)
/* Deleter committed, so tuple is dead if the XID is old enough. */
return GlobalVisTestIsRemovableXid(vistest,
- HeapTupleHeaderGetRawXmax(tuple));
+ HeapTupleGetRawXmax(htup));
}
/*
@@ -1519,8 +1519,9 @@ HeapTupleIsSurelyDead(HeapTuple htup, GlobalVisState *vistest)
* at the top of this file.
*/
bool
-HeapTupleHeaderIsOnlyLocked(HeapTupleHeader tuple)
+HeapTupleIsOnlyLocked(HeapTuple htup)
{
+ HeapTupleHeader tuple = htup->t_data;
TransactionId xmax;
/* if there's no valid Xmax, then there's obviously no update either */
@@ -1531,7 +1532,7 @@ HeapTupleHeaderIsOnlyLocked(HeapTupleHeader tuple)
return true;
/* invalid xmax means no update */
- if (!TransactionIdIsValid(HeapTupleHeaderGetRawXmax(tuple)))
+ if (!TransactionIdIsValid(HeapTupleGetRawXmax(htup)))
return true;
/*
@@ -1542,7 +1543,7 @@ HeapTupleHeaderIsOnlyLocked(HeapTupleHeader tuple)
return false;
/* ... but if it's a multi, then perhaps the updating Xid aborted. */
- xmax = HeapTupleGetUpdateXid(tuple);
+ xmax = HeapTupleGetUpdateXid(htup);
/* not LOCKED_ONLY, so it has to have an xmax */
Assert(TransactionIdIsValid(xmax));
@@ -1590,8 +1591,8 @@ HeapTupleSatisfiesHistoricMVCC(HeapTuple htup, Snapshot snapshot,
Buffer buffer)
{
HeapTupleHeader tuple = htup->t_data;
- TransactionId xmin = HeapTupleHeaderGetXmin(tuple);
- TransactionId xmax = HeapTupleHeaderGetRawXmax(tuple);
+ TransactionId xmin = HeapTupleGetXmin(htup);
+ TransactionId xmax = HeapTupleGetRawXmax(htup);
Assert(ItemPointerIsValid(&htup->t_self));
Assert(htup->t_tableOid != InvalidOid);
@@ -1691,7 +1692,7 @@ HeapTupleSatisfiesHistoricMVCC(HeapTuple htup, Snapshot snapshot,
*/
else if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
{
- xmax = HeapTupleGetUpdateXid(tuple);
+ xmax = HeapTupleGetUpdateXid(htup);
}
/* check if it's one of our txids, toplevel is also in there */
diff --git a/src/backend/access/heap/heaptoast.c b/src/backend/access/heap/heaptoast.c
index 52ecd45654..3d0e62ba64 100644
--- a/src/backend/access/heap/heaptoast.c
+++ b/src/backend/access/heap/heaptoast.c
@@ -307,6 +307,7 @@ heap_toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup,
result_tuple->t_len = new_tuple_len;
result_tuple->t_self = newtup->t_self;
result_tuple->t_tableOid = newtup->t_tableOid;
+ HeapTupleCopyXids(result_tuple, newtup);
new_data = (HeapTupleHeader) ((char *) result_tuple + HEAPTUPLESIZE);
result_tuple->t_data = new_data;
@@ -395,6 +396,7 @@ toast_flatten_tuple(HeapTuple tup, TupleDesc tupleDesc)
*/
new_tuple->t_self = tup->t_self;
new_tuple->t_tableOid = tup->t_tableOid;
+ HeapTupleCopyXids(new_tuple, tup);
new_tuple->t_data->t_choice = tup->t_data->t_choice;
new_tuple->t_data->t_ctid = tup->t_data->t_ctid;
@@ -467,6 +469,7 @@ toast_flatten_tuple_to_datum(HeapTupleHeader tup,
ItemPointerSetInvalid(&(tmptup.t_self));
tmptup.t_tableOid = InvalidOid;
tmptup.t_data = tup;
+ HeapTupleSetZeroXids(&tmptup);
/*
* Break down the tuple into fields.
diff --git a/src/backend/access/heap/hio.c b/src/backend/access/heap/hio.c
index ccc4c6966a..9e6a0a3d5d 100644
--- a/src/backend/access/heap/hio.c
+++ b/src/backend/access/heap/hio.c
@@ -19,6 +19,7 @@
#include "access/hio.h"
#include "access/htup_details.h"
#include "access/visibilitymap.h"
+#include "catalog/catalog.h"
#include "storage/bufmgr.h"
#include "storage/freespace.h"
#include "storage/lmgr.h"
@@ -59,6 +60,9 @@ RelationPutHeapTuple(Relation relation,
/* Add the tuple to the page */
pageHeader = BufferGetPage(buffer);
+ HeapTupleHeaderStoreXmin(pageHeader, tuple, IsToastRelation(relation));
+ HeapTupleHeaderStoreXmax(pageHeader, tuple, IsToastRelation(relation));
+
offnum = PageAddItem(pageHeader, (Item) tuple->t_data,
tuple->t_len, InvalidOffsetNumber, false, true);
@@ -361,7 +365,17 @@ RelationAddBlocks(Relation relation, BulkInsertState bistate,
first_block,
RelationGetRelationName(relation));
- PageInit(page, BufferGetPageSize(buffer), 0);
+ if (IsToastRelation(relation))
+ {
+ PageInit(page, BufferGetPageSize(buffer), sizeof(ToastPageSpecialData));
+ ToastPageGetSpecial(page)->pd_xid_base = RecentXmin - FirstNormalTransactionId;
+ }
+ else
+ {
+ PageInit(page, BufferGetPageSize(buffer), sizeof(HeapPageSpecialData));
+ HeapPageGetSpecial(page)->pd_xid_base = RecentXmin - FirstNormalTransactionId;
+ }
+
MarkBufferDirty(buffer);
/*
@@ -394,7 +408,7 @@ RelationAddBlocks(Relation relation, BulkInsertState bistate,
if (use_fsm && i >= not_in_fsm_pages)
{
Size freespace = BufferGetPageSize(victim_buffers[i]) -
- SizeOfPageHeaderData;
+ SizeOfPageHeaderData - MAXALIGN(sizeof(HeapPageSpecialData));
RecordPageWithFreeSpace(relation, curBlock, freespace);
}
@@ -685,6 +699,9 @@ loop:
/*
* Now we can check to see if there's enough free space here. If so,
* we're done.
+ *
+ * "Double xmax" page is not suitable for any new tuple, since xmin
+ * can't be set there.
*/
page = BufferGetPage(buffer);
@@ -696,12 +713,23 @@ loop:
*/
if (PageIsNew(page))
{
- PageInit(page, BufferGetPageSize(buffer), 0);
+ if (IsToastRelation(relation))
+ {
+ PageInit(page, BufferGetPageSize(buffer), sizeof(ToastPageSpecialData));
+ ToastPageGetSpecial(page)->pd_xid_base = RecentXmin - FirstNormalTransactionId;
+ }
+ else
+ {
+ PageInit(page, BufferGetPageSize(buffer), sizeof(HeapPageSpecialData));
+ HeapPageGetSpecial(page)->pd_xid_base = RecentXmin - FirstNormalTransactionId;
+ }
+
MarkBufferDirty(buffer);
}
pageFreeSpace = PageGetHeapFreeSpace(page);
- if (targetFreeSpace <= pageFreeSpace)
+ if (targetFreeSpace <= pageFreeSpace &&
+ !HeapPageIsDoubleXmax(page))
{
/* use this page as future insert target, too */
RelationSetTargetBlock(relation, targetBlock);
diff --git a/src/backend/access/heap/pruneheap.c b/src/backend/access/heap/pruneheap.c
index c5f1abd95a..9ab46bfc31 100644
--- a/src/backend/access/heap/pruneheap.c
+++ b/src/backend/access/heap/pruneheap.c
@@ -70,6 +70,17 @@ static void heap_prune_record_dead(PruneState *prstate, OffsetNumber offnum);
static void heap_prune_record_unused(PruneState *prstate, OffsetNumber offnum);
static void page_verify_redirects(Page page);
+static inline bool
+XidFitsPage(Page page, TransactionId xid, bool is_toast)
+{
+ TransactionId base;
+
+ base = is_toast ? ToastPageGetSpecial(page)->pd_xid_base :
+ HeapPageGetSpecial(page)->pd_xid_base;
+
+ return xid >= base + FirstNormalTransactionId &&
+ xid <= base + MaxShortTransactionId;
+}
/*
* Optionally prune and repair fragmentation in the specified page.
@@ -104,7 +115,8 @@ heap_page_prune_opt(Relation relation, Buffer buffer)
* determining the appropriate horizon is a waste if there's no prune_xid
* (i.e. no updates/deletes left potentially dead tuples around).
*/
- prune_xid = ((PageHeader) page)->pd_prune_xid;
+ prune_xid = HeapPageGetPruneXidNoAssert(page, IsToastRelation(relation));
+
if (!TransactionIdIsValid(prune_xid))
return;
@@ -148,7 +160,7 @@ heap_page_prune_opt(Relation relation, Buffer buffer)
{
PruneResult presult;
- heap_page_prune(relation, buffer, vistest, &presult, NULL);
+ heap_page_prune(relation, buffer, vistest, &presult, NULL, false);
/*
* Report the number of tuples reclaimed to pgstats. This is
@@ -204,7 +216,8 @@ void
heap_page_prune(Relation relation, Buffer buffer,
GlobalVisState *vistest,
PruneResult *presult,
- OffsetNumber *off_loc)
+ OffsetNumber *off_loc,
+ bool repairFragmentation)
{
Page page = BufferGetPage(buffer);
BlockNumber blockno = BufferGetBlockNumber(buffer);
@@ -278,6 +291,8 @@ heap_page_prune(Relation relation, Buffer buffer,
htup = (HeapTupleHeader) PageGetItem(page, itemid);
tup.t_data = htup;
tup.t_len = ItemIdGetLength(itemid);
+ HeapTupleCopyXidsFromPage(buffer, &tup, page,
+ IsToastRelation(relation));
ItemPointerSet(&(tup.t_self), blockno, offnum);
/*
@@ -333,13 +348,17 @@ heap_page_prune(Relation relation, Buffer buffer,
heap_page_prune_execute(buffer,
prstate.redirected, prstate.nredirected,
prstate.nowdead, prstate.ndead,
- prstate.nowunused, prstate.nunused);
+ prstate.nowunused, prstate.nunused,
+ repairFragmentation,
+ IsToastRelation(relation));
/*
* Update the page's pd_prune_xid field to either zero, or the lowest
* XID of any soon-prunable tuple.
*/
- ((PageHeader) page)->pd_prune_xid = prstate.new_prune_xid;
+ if (XidFitsPage(page, prstate.new_prune_xid, IsToastRelation(relation)))
+ HeapPageSetPruneXid(page, prstate.new_prune_xid,
+ IsToastRelation(relation));
/*
* Also clear the "page is full" flag, since there's no point in
@@ -362,6 +381,13 @@ heap_page_prune(Relation relation, Buffer buffer,
xlrec.snapshotConflictHorizon = prstate.snapshotConflictHorizon;
xlrec.nredirected = prstate.nredirected;
xlrec.ndead = prstate.ndead;
+ xlrec.flags = 0;
+
+ if (IsToastRelation(relation))
+ xlrec.flags |= XLH_PRUNE_ON_TOAST_RELATION;
+
+ if (repairFragmentation)
+ xlrec.flags |= XLH_PRUNE_REPAIR_FRAGMENTATION;
XLogBeginInsert();
XLogRegisterData((char *) &xlrec, SizeOfHeapPrune);
@@ -402,10 +428,12 @@ heap_page_prune(Relation relation, Buffer buffer,
* point in repeating the prune/defrag process until something else
* happens to the page.
*/
- if (((PageHeader) page)->pd_prune_xid != prstate.new_prune_xid ||
+ bool is_toast = IsToastRelation(relation);
+
+ if (HeapPageGetPruneXid(page, is_toast) != prstate.new_prune_xid ||
PageIsFull(page))
{
- ((PageHeader) page)->pd_prune_xid = prstate.new_prune_xid;
+ HeapPageSetPruneXid(page, prstate.new_prune_xid, is_toast);
PageClearFull(page);
MarkBufferDirtyHint(buffer, true);
}
@@ -485,6 +513,9 @@ heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum,
OffsetNumber chainitems[MaxHeapTuplesPerPage];
int nchain = 0,
i;
+ HeapTupleData tup;
+
+ tup.t_tableOid = RelationGetRelid(prstate->rel);
rootlp = PageGetItemId(dp, rootoffnum);
@@ -496,6 +527,12 @@ heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum,
Assert(htsv[rootoffnum] != -1);
htup = (HeapTupleHeader) PageGetItem(dp, rootlp);
+ tup.t_data = htup;
+ tup.t_len = ItemIdGetLength(rootlp);
+ ItemPointerSet(&(tup.t_self), BufferGetBlockNumber(buffer), rootoffnum);
+ HeapTupleCopyXidsFromPage(buffer, &tup, dp,
+ IsToastRelation(prstate->rel));
+
if (HeapTupleHeaderIsHeapOnly(htup))
{
/*
@@ -520,7 +557,7 @@ heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum,
!HeapTupleHeaderIsHotUpdated(htup))
{
heap_prune_record_unused(prstate, rootoffnum);
- HeapTupleHeaderAdvanceConflictHorizon(htup,
+ HeapTupleHeaderAdvanceConflictHorizon(&tup,
&prstate->snapshotConflictHorizon);
ndeleted++;
}
@@ -586,11 +623,17 @@ heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum,
Assert(ItemIdIsNormal(lp));
htup = (HeapTupleHeader) PageGetItem(dp, lp);
+ tup.t_data = htup;
+ tup.t_len = ItemIdGetLength(lp);
+ HeapTupleCopyXidsFromPage(buffer, &tup, dp,
+ IsToastRelation(prstate->rel));
+ ItemPointerSet(&(tup.t_self), BufferGetBlockNumber(buffer), offnum);
+
/*
* Check the tuple XMIN against prior XMAX, if any
*/
if (TransactionIdIsValid(priorXmax) &&
- !TransactionIdEquals(HeapTupleHeaderGetXmin(htup), priorXmax))
+ !TransactionIdEquals(HeapTupleGetXmin(&tup), priorXmax))
break;
/*
@@ -617,7 +660,7 @@ heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum,
* that the page is reconsidered for pruning in future.
*/
heap_prune_record_prunable(prstate,
- HeapTupleHeaderGetUpdateXid(htup));
+ HeapTupleGetUpdateXidAny(&tup));
break;
case HEAPTUPLE_DELETE_IN_PROGRESS:
@@ -627,7 +670,7 @@ heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum,
* that the page is reconsidered for pruning in future.
*/
heap_prune_record_prunable(prstate,
- HeapTupleHeaderGetUpdateXid(htup));
+ HeapTupleGetUpdateXidAny(&tup));
break;
case HEAPTUPLE_LIVE:
@@ -656,7 +699,7 @@ heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum,
if (tupdead)
{
latestdead = offnum;
- HeapTupleHeaderAdvanceConflictHorizon(htup,
+ HeapTupleHeaderAdvanceConflictHorizon(&tup,
&prstate->snapshotConflictHorizon);
}
else if (!recent_dead)
@@ -678,7 +721,7 @@ heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum,
Assert(ItemPointerGetBlockNumber(&htup->t_ctid) ==
BufferGetBlockNumber(buffer));
offnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
- priorXmax = HeapTupleHeaderGetUpdateXid(htup);
+ priorXmax = HeapTupleGetUpdateXidAny(&tup);
}
/*
@@ -795,7 +838,9 @@ void
heap_page_prune_execute(Buffer buffer,
OffsetNumber *redirected, int nredirected,
OffsetNumber *nowdead, int ndead,
- OffsetNumber *nowunused, int nunused)
+ OffsetNumber *nowunused, int nunused,
+ bool repairFragmentation,
+ bool is_toast)
{
Page page = (Page) BufferGetPage(buffer);
OffsetNumber *offnum;
@@ -919,7 +964,8 @@ heap_page_prune_execute(Buffer buffer,
* Finally, repair any fragmentation, and update the page's hint bit about
* whether it has free pointers.
*/
- PageRepairFragmentation(page);
+ if (repairFragmentation)
+ PageRepairFragmentation(page, is_toast);
/*
* Now that the page has been modified, assert that redirect items still
@@ -991,7 +1037,8 @@ page_verify_redirects(Page page)
* and reused by a completely unrelated tuple.
*/
void
-heap_get_root_tuples(Page page, OffsetNumber *root_offsets)
+heap_get_root_tuples(Relation relation, Buffer buffer, Page page,
+ OffsetNumber *root_offsets)
{
OffsetNumber offnum,
maxoff;
@@ -1006,6 +1053,7 @@ heap_get_root_tuples(Page page, OffsetNumber *root_offsets)
HeapTupleHeader htup;
OffsetNumber nextoffnum;
TransactionId priorXmax;
+ HeapTupleData tup;
/* skip unused and dead items */
if (!ItemIdIsUsed(lp) || ItemIdIsDead(lp))
@@ -1014,6 +1062,9 @@ heap_get_root_tuples(Page page, OffsetNumber *root_offsets)
if (ItemIdIsNormal(lp))
{
htup = (HeapTupleHeader) PageGetItem(page, lp);
+ tup.t_data = htup;
+ HeapTupleCopyXidsFromPage(buffer, &tup, page,
+ IsToastRelation(relation));
/*
* Check if this tuple is part of a HOT-chain rooted at some other
@@ -1035,7 +1086,7 @@ heap_get_root_tuples(Page page, OffsetNumber *root_offsets)
/* Set up to scan the HOT-chain */
nextoffnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
- priorXmax = HeapTupleHeaderGetUpdateXid(htup);
+ priorXmax = HeapTupleGetUpdateXidAny(&tup);
}
else
{
@@ -1074,9 +1125,12 @@ heap_get_root_tuples(Page page, OffsetNumber *root_offsets)
break;
htup = (HeapTupleHeader) PageGetItem(page, lp);
+ tup.t_data = htup;
+ HeapTupleCopyXidsFromPage(buffer, &tup, page,
+ IsToastRelation(relation));
if (TransactionIdIsValid(priorXmax) &&
- !TransactionIdEquals(priorXmax, HeapTupleHeaderGetXmin(htup)))
+ !TransactionIdEquals(priorXmax, HeapTupleGetXmin(&tup)))
break;
/* Remember the root line pointer for this item */
@@ -1090,7 +1144,7 @@ heap_get_root_tuples(Page page, OffsetNumber *root_offsets)
Assert(!HeapTupleHeaderIndicatesMovedPartitions(htup));
nextoffnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
- priorXmax = HeapTupleHeaderGetUpdateXid(htup);
+ priorXmax = HeapTupleGetUpdateXidAny(&tup);
}
}
}
diff --git a/src/backend/access/heap/rewriteheap.c b/src/backend/access/heap/rewriteheap.c
index 424958912c..2b485cdc72 100644
--- a/src/backend/access/heap/rewriteheap.c
+++ b/src/backend/access/heap/rewriteheap.c
@@ -378,6 +378,7 @@ rewrite_heap_tuple(RewriteState state,
&old_tuple->t_data->t_choice.t_heap,
sizeof(HeapTupleFields));
+ HeapTupleCopyXids(new_tuple, old_tuple);
new_tuple->t_data->t_infomask &= ~HEAP_XACT_MASK;
new_tuple->t_data->t_infomask2 &= ~HEAP2_XACT_MASK;
new_tuple->t_data->t_infomask |=
@@ -387,7 +388,7 @@ rewrite_heap_tuple(RewriteState state,
* While we have our hands on the tuple, we may as well freeze any
* eligible xmin or xmax, so that future VACUUM effort can be saved.
*/
- heap_freeze_tuple(new_tuple->t_data,
+ heap_freeze_tuple(new_tuple,
state->rs_old_rel->rd_rel->relfrozenxid,
state->rs_old_rel->rd_rel->relminmxid,
state->rs_freeze_xid,
@@ -403,7 +404,7 @@ rewrite_heap_tuple(RewriteState state,
* If the tuple has been updated, check the old-to-new mapping hash table.
*/
if (!((old_tuple->t_data->t_infomask & HEAP_XMAX_INVALID) ||
- HeapTupleHeaderIsOnlyLocked(old_tuple->t_data)) &&
+ HeapTupleIsOnlyLocked(old_tuple)) &&
!HeapTupleHeaderIndicatesMovedPartitions(old_tuple->t_data) &&
!(ItemPointerEquals(&(old_tuple->t_self),
&(old_tuple->t_data->t_ctid))))
@@ -411,7 +412,7 @@ rewrite_heap_tuple(RewriteState state,
OldToNewMapping mapping;
memset(&hashkey, 0, sizeof(hashkey));
- hashkey.xmin = HeapTupleHeaderGetUpdateXid(old_tuple->t_data);
+ hashkey.xmin = HeapTupleGetUpdateXidAny(old_tuple);
hashkey.tid = old_tuple->t_data->t_ctid;
mapping = (OldToNewMapping)
@@ -484,7 +485,7 @@ rewrite_heap_tuple(RewriteState state,
* RECENTLY_DEAD if and only if the xmin is not before OldestXmin.
*/
if ((new_tuple->t_data->t_infomask & HEAP_UPDATED) &&
- !TransactionIdPrecedes(HeapTupleHeaderGetXmin(new_tuple->t_data),
+ !TransactionIdPrecedes(HeapTupleGetXmin(new_tuple),
state->rs_oldest_xmin))
{
/*
@@ -493,7 +494,7 @@ rewrite_heap_tuple(RewriteState state,
UnresolvedTup unresolved;
memset(&hashkey, 0, sizeof(hashkey));
- hashkey.xmin = HeapTupleHeaderGetXmin(new_tuple->t_data);
+ hashkey.xmin = HeapTupleGetXmin(new_tuple);
hashkey.tid = old_tid;
unresolved = hash_search(state->rs_unresolved_tups, &hashkey,
@@ -581,7 +582,7 @@ rewrite_heap_dead_tuple(RewriteState state, HeapTuple old_tuple)
bool found;
memset(&hashkey, 0, sizeof(hashkey));
- hashkey.xmin = HeapTupleHeaderGetXmin(old_tuple->t_data);
+ hashkey.xmin = HeapTupleGetXmin(old_tuple);
hashkey.tid = old_tuple->t_self;
unresolved = hash_search(state->rs_unresolved_tups, &hashkey,
@@ -617,6 +618,8 @@ raw_heap_insert(RewriteState state, HeapTuple tup)
Size len;
OffsetNumber newoff;
HeapTuple heaptup;
+ TransactionId xmin;
+ bool immutable_tuple;
/*
* If the new tuple is too big for storage or contains already toasted
@@ -651,9 +654,19 @@ raw_heap_insert(RewriteState state, HeapTuple tup)
len = MAXALIGN(heaptup->t_len); /* be conservative */
/*
- * If we're gonna fail for oversize tuple, do it right away
+ * Due to update to 64-xid maximum plain tuple size was decreased due to adding
+ * PageSpecial to a heap page. Pages with tuple that became too large to fit,
+ * should remain in Double Xmax format (read only). Inserting plain tuples with
+ * size over new MaxHeapTupleSizs is prohibited anyway, but vaccum full will
+ * transfer this page to a rebuild relation unmodified.
*/
- if (len > MaxHeapTupleSize)
+ immutable_tuple = len <= MaxHeapTupleSize_32 && len > MaxHeapTupleSize;
+
+ /*
+ * If we're gonna fail for oversize tuple, do it right away. But allow to process
+ * immutable_tuple (see above).
+ */
+ if (len > MaxHeapTupleSize && !immutable_tuple)
ereport(ERROR,
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
errmsg("row is too big: size %zu, maximum size %zu",
@@ -702,10 +715,42 @@ raw_heap_insert(RewriteState state, HeapTuple tup)
if (!state->rs_buffer_valid)
{
/* Initialize a new empty page */
- PageInit(page, BLCKSZ, 0);
+ if (immutable_tuple)
+ /* Initialize DoubleXmax page */
+ PageInit(page, BLCKSZ, 0);
+ else
+ {
+ Size special_size;
+
+ special_size = IsToastRelation(state->rs_new_rel) ?
+ sizeof(ToastPageSpecialData) :
+ sizeof(HeapPageSpecialData);
+ PageInit(page, BLCKSZ, special_size);
+ }
state->rs_buffer_valid = true;
}
+ rewrite_page_prepare_for_xid(page, heaptup,
+ IsToastRelation(state->rs_new_rel));
+
+ /*
+ * Tuple with HEAP_XMIN_FROZEN in t_infomask should have xmin set
+ * to FrozenTransactionId to avoid these tuples be treated like normal.
+ */
+ xmin = HeapTupleGetXmin(heaptup);
+ HeapTupleSetXmin(heaptup, xmin);
+
+ /*
+ * Tuples on DoubleXmax page could not appear modified after they had been
+ * frozen by pg_upgrade. Just check this to be safe.
+ */
+ Assert(!immutable_tuple || xmin == FrozenTransactionId);
+
+ if (!immutable_tuple)
+ HeapTupleAndHeaderSetXmin(page, heaptup, xmin, false);
+
+ HeapTupleHeaderStoreXmax(page, heaptup, false);
+
/* And now we can insert the tuple into the page */
newoff = PageAddItem(page, (Item) heaptup->t_data, heaptup->t_len,
InvalidOffsetNumber, false, true);
@@ -986,19 +1031,24 @@ logical_rewrite_log_mapping(RewriteState state, TransactionId xid,
*/
if (!found)
{
- char path[MAXPGPATH];
- Oid dboid;
+ char path[MAXPGPATH];
+ Oid dboid;
+ TransactionId current_xid;
if (state->rs_old_rel->rd_rel->relisshared)
dboid = InvalidOid;
else
dboid = MyDatabaseId;
+ current_xid = GetCurrentTransactionId();
snprintf(path, MAXPGPATH,
"pg_logical/mappings/" LOGICAL_REWRITE_FORMAT,
dboid, relid,
LSN_FORMAT_ARGS(state->rs_begin_lsn),
- xid, GetCurrentTransactionId());
+ (uint32) (xid >> 32),
+ (uint32) xid,
+ (uint32) (current_xid >> 32),
+ (uint32) current_xid);
dclist_init(&src->mappings);
src->off = 0;
@@ -1045,9 +1095,9 @@ logical_rewrite_heap_tuple(RewriteState state, ItemPointerData old_tid,
if (!state->rs_logical_rewrite)
return;
- xmin = HeapTupleHeaderGetXmin(new_tuple->t_data);
+ xmin = HeapTupleGetXmin(new_tuple);
/* use *GetUpdateXid to correctly deal with multixacts */
- xmax = HeapTupleHeaderGetUpdateXid(new_tuple->t_data);
+ xmax = HeapTupleGetUpdateXidAny(new_tuple);
/*
* Log the mapping iff the tuple has been created recently.
@@ -1111,14 +1161,19 @@ heap_xlog_logical_rewrite(XLogReaderState *r)
xl_heap_rewrite_mapping *xlrec;
uint32 len;
char *data;
+ TransactionId xid;
xlrec = (xl_heap_rewrite_mapping *) XLogRecGetData(r);
+ xid = XLogRecGetXid(r);
snprintf(path, MAXPGPATH,
"pg_logical/mappings/" LOGICAL_REWRITE_FORMAT,
xlrec->mapped_db, xlrec->mapped_rel,
LSN_FORMAT_ARGS(xlrec->start_lsn),
- xlrec->mapped_xid, XLogRecGetXid(r));
+ (uint32) (xlrec->mapped_xid >> 32),
+ (uint32) xlrec->mapped_xid,
+ (uint32) (xid >> 32),
+ (uint32) xid);
fd = OpenTransientFile(path,
O_CREAT | O_WRONLY | PG_BINARY);
@@ -1213,10 +1268,12 @@ CheckPointLogicalRewriteHeap(void)
Oid dboid;
Oid relid;
XLogRecPtr lsn;
- TransactionId rewrite_xid;
- TransactionId create_xid;
- uint32 hi,
- lo;
+ uint32 lsn_hi,
+ lsn_lo,
+ rewrite_xid_hi,
+ rewrite_xid_lo,
+ create_xid_hi,
+ create_xid_lo;
PGFileType de_type;
if (strcmp(mapping_de->d_name, ".") == 0 ||
@@ -1234,10 +1291,12 @@ CheckPointLogicalRewriteHeap(void)
continue;
if (sscanf(mapping_de->d_name, LOGICAL_REWRITE_FORMAT,
- &dboid, &relid, &hi, &lo, &rewrite_xid, &create_xid) != 6)
+ &dboid, &relid, &lsn_hi, &lsn_lo,
+ &rewrite_xid_hi, &rewrite_xid_lo,
+ &create_xid_hi, &create_xid_lo) != 8)
elog(ERROR, "could not parse filename \"%s\"", mapping_de->d_name);
- lsn = ((uint64) hi) << 32 | lo;
+ lsn = ((uint64) lsn_hi) << 32 | lsn_lo;
if (lsn < cutoff || cutoff == InvalidXLogRecPtr)
{
diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c
index 3623f13b07..3afd41b9b9 100644
--- a/src/backend/access/heap/vacuumlazy.c
+++ b/src/backend/access/heap/vacuumlazy.c
@@ -46,6 +46,7 @@
#include "access/xlog.h"
#include "access/xloginsert.h"
#include "catalog/index.h"
+#include "catalog/catalog.h"
#include "catalog/storage.h"
#include "commands/dbcommands.h"
#include "commands/progress.h"
@@ -259,7 +260,6 @@ static bool lazy_vacuum_all_indexes(LVRelState *vacrel);
static void lazy_vacuum_heap_rel(LVRelState *vacrel);
static int lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno,
Buffer buffer, int index, Buffer vmbuffer);
-static bool lazy_check_wraparound_failsafe(LVRelState *vacrel);
static void lazy_cleanup_all_indexes(LVRelState *vacrel);
static IndexBulkDeleteResult *lazy_vacuum_one_index(Relation indrel,
IndexBulkDeleteResult *istat,
@@ -496,7 +496,6 @@ heap_vacuum_rel(Relation rel, VacuumParams *params,
* ensure that parallel VACUUM won't be attempted at all when relfrozenxid
* is already dangerously old.)
*/
- lazy_check_wraparound_failsafe(vacrel);
dead_items_alloc(vacrel, params->nworkers);
/*
@@ -613,7 +612,7 @@ heap_vacuum_rel(Relation rel, VacuumParams *params,
WalUsage walusage;
StringInfoData buf;
char *msgfmt;
- int32 diff;
+ int64 diff;
int64 PageHitOp = VacuumPageHit - StartPageHit,
PageMissOp = VacuumPageMiss - StartPageMiss,
PageDirtyOp = VacuumPageDirty - StartPageDirty;
@@ -666,16 +665,17 @@ heap_vacuum_rel(Relation rel, VacuumParams *params,
orig_rel_pages == 0 ? 100.0 :
100.0 * vacrel->scanned_pages / orig_rel_pages);
appendStringInfo(&buf,
- _("tuples: %lld removed, %lld remain, %lld are dead but not yet removable\n"),
+ _("tuples: %lld removed, %lld remain, %lld are dead but not yet removable, oldest xmin: %llu\n"),
(long long) vacrel->tuples_deleted,
(long long) vacrel->new_rel_tuples,
- (long long) vacrel->recently_dead_tuples);
+ (long long) vacrel->recently_dead_tuples,
+ (unsigned long long) vacrel->cutoffs.OldestXmin);
if (vacrel->missed_dead_tuples > 0)
appendStringInfo(&buf,
_("tuples missed: %lld dead from %u pages not removed due to cleanup lock contention\n"),
(long long) vacrel->missed_dead_tuples,
vacrel->missed_dead_pages);
- diff = (int32) (ReadNextTransactionId() -
+ diff = (int64) (ReadNextTransactionId() -
vacrel->cutoffs.OldestXmin);
appendStringInfo(&buf,
_("removable cutoff: %llu, which was %lld XIDs old when operation ended\n"),
@@ -683,7 +683,7 @@ heap_vacuum_rel(Relation rel, VacuumParams *params,
(long long) diff);
if (frozenxid_updated)
{
- diff = (int32) (vacrel->NewRelfrozenXid -
+ diff = (int64) (vacrel->NewRelfrozenXid -
vacrel->cutoffs.relfrozenxid);
appendStringInfo(&buf,
_("new relfrozenxid: %llu, which is %lld XIDs ahead of previous value\n"),
@@ -692,7 +692,7 @@ heap_vacuum_rel(Relation rel, VacuumParams *params,
}
if (minmulti_updated)
{
- diff = (int32) (vacrel->NewRelminMxid -
+ diff = (int64) (vacrel->NewRelminMxid -
vacrel->cutoffs.relminmxid);
appendStringInfo(&buf,
_("new relminmxid: %llu, which is %lld MXIDs ahead of previous value\n"),
@@ -894,18 +894,6 @@ lazy_scan_heap(LVRelState *vacrel)
vacuum_delay_point();
- /*
- * Regularly check if wraparound failsafe should trigger.
- *
- * There is a similar check inside lazy_vacuum_all_indexes(), but
- * relfrozenxid might start to look dangerously old before we reach
- * that point. This check also provides failsafe coverage for the
- * one-pass strategy, and the two-pass strategy with the index_cleanup
- * param set to 'off'.
- */
- if (vacrel->scanned_pages % FAILSAFE_EVERY_PAGES == 0)
- lazy_check_wraparound_failsafe(vacrel);
-
/*
* Consider if we definitely have enough space to process TIDs on page
* already. If we are close to overrunning the available space for
@@ -1448,7 +1436,14 @@ lazy_scan_new_or_empty(LVRelState *vacrel, Buffer buf, BlockNumber blkno,
if (GetRecordedFreeSpace(vacrel->rel, blkno) == 0)
{
- freespace = BLCKSZ - SizeOfPageHeaderData;
+ Size special_size;
+
+ special_size = IsToastRelation(vacrel->rel) ?
+ sizeof(ToastPageSpecialData) :
+ sizeof(HeapPageSpecialData);
+ freespace = BufferGetPageSize(buf)
+ - SizeOfPageHeaderData
+ - special_size;
RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
}
@@ -1552,6 +1547,7 @@ lazy_scan_prune(LVRelState *vacrel,
OffsetNumber offnum,
maxoff;
ItemId itemid;
+ HeapTupleData tuple;
PruneResult presult;
int tuples_frozen,
lpdead_items,
@@ -1590,7 +1586,7 @@ lazy_scan_prune(LVRelState *vacrel,
* lpdead_items's final value can be thought of as the number of tuples
* that were deleted from indexes.
*/
- heap_page_prune(rel, buf, vacrel->vistest, &presult, &vacrel->offnum);
+ heap_page_prune(rel, buf, vacrel->vistest, &presult, &vacrel->offnum, true);
/*
* Now scan the page to collect LP_DEAD items and check for tuples
@@ -1650,6 +1646,11 @@ lazy_scan_prune(LVRelState *vacrel,
Assert(ItemIdIsNormal(itemid));
htup = (HeapTupleHeader) PageGetItem(page, itemid);
+ ItemPointerSet(&(tuple.t_self), blkno, offnum);
+ tuple.t_data = htup;
+ tuple.t_len = ItemIdGetLength(itemid);
+ tuple.t_tableOid = RelationGetRelid(rel);
+ HeapTupleCopyXidsFromPage(buf, &tuple, page, IsToastRelation(rel));
/*
* The criteria for counting a tuple as live in this block need to
@@ -1702,7 +1703,7 @@ lazy_scan_prune(LVRelState *vacrel,
* The inserter definitely committed. But is it old enough
* that everyone sees it as committed?
*/
- xmin = HeapTupleHeaderGetXmin(htup);
+ xmin = HeapTupleGetXmin(&tuple);
if (!TransactionIdPrecedes(xmin,
vacrel->cutoffs.OldestXmin))
{
@@ -1756,7 +1757,7 @@ lazy_scan_prune(LVRelState *vacrel,
prunestate->hastup = true; /* page makes rel truncation unsafe */
/* Tuple with storage -- consider need to freeze */
- if (heap_prepare_freeze_tuple(htup, &vacrel->cutoffs, &pagefrz,
+ if (heap_prepare_freeze_tuple(&tuple, &vacrel->cutoffs, &pagefrz,
&frozen[tuples_frozen], &totally_frozen))
{
/* Save prepared freeze plan for later */
@@ -1956,7 +1957,6 @@ lazy_scan_noprune(LVRelState *vacrel,
live_tuples,
recently_dead_tuples,
missed_dead_tuples;
- HeapTupleHeader tupleheader;
TransactionId NoFreezePageRelfrozenXid = vacrel->NewRelfrozenXid;
MultiXactId NoFreezePageRelminMxid = vacrel->NewRelminMxid;
OffsetNumber deadoffsets[MaxHeapTuplesPerPage];
@@ -2002,8 +2002,13 @@ lazy_scan_noprune(LVRelState *vacrel,
}
*hastup = true; /* page prevents rel truncation */
- tupleheader = (HeapTupleHeader) PageGetItem(page, itemid);
- if (heap_tuple_should_freeze(tupleheader, &vacrel->cutoffs,
+ tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
+ tuple.t_len = ItemIdGetLength(itemid);
+ tuple.t_tableOid = RelationGetRelid(vacrel->rel);
+ HeapTupleCopyXidsFromPage(buf, &tuple, page,
+ IsToastRelation(vacrel->rel));
+ ItemPointerSet(&(tuple.t_self), blkno, offnum);
+ if (heap_tuple_should_freeze(&tuple, &vacrel->cutoffs,
&NoFreezePageRelfrozenXid,
&NoFreezePageRelminMxid))
{
@@ -2039,6 +2044,8 @@ lazy_scan_noprune(LVRelState *vacrel,
tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
tuple.t_len = ItemIdGetLength(itemid);
tuple.t_tableOid = RelationGetRelid(vacrel->rel);
+ HeapTupleCopyXidsFromPage(buf, &tuple, page,
+ IsToastRelation(vacrel->rel));
switch (HeapTupleSatisfiesVacuum(&tuple, vacrel->cutoffs.OldestXmin,
buf))
@@ -2322,13 +2329,6 @@ lazy_vacuum_all_indexes(LVRelState *vacrel)
Assert(vacrel->do_index_vacuuming);
Assert(vacrel->do_index_cleanup);
- /* Precheck for XID wraparound emergencies */
- if (lazy_check_wraparound_failsafe(vacrel))
- {
- /* Wraparound emergency -- don't even start an index scan */
- return false;
- }
-
/*
* Report that we are now vacuuming indexes and the number of indexes to
* vacuum.
@@ -2352,12 +2352,6 @@ lazy_vacuum_all_indexes(LVRelState *vacrel)
pgstat_progress_update_param(PROGRESS_VACUUM_INDEXES_PROCESSED,
idx + 1);
- if (lazy_check_wraparound_failsafe(vacrel))
- {
- /* Wraparound emergency -- end current index scan */
- allindexes = false;
- break;
- }
}
}
else
@@ -2365,13 +2359,6 @@ lazy_vacuum_all_indexes(LVRelState *vacrel)
/* Outsource everything to parallel variant */
parallel_vacuum_bulkdel_all_indexes(vacrel->pvs, old_live_tuples,
vacrel->num_index_scans);
-
- /*
- * Do a postcheck to consider applying wraparound failsafe now. Note
- * that parallel VACUUM only gets the precheck and this postcheck.
- */
- if (lazy_check_wraparound_failsafe(vacrel))
- allindexes = false;
}
/*
@@ -2612,68 +2599,6 @@ lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno, Buffer buffer,
return index;
}
-/*
- * Trigger the failsafe to avoid wraparound failure when vacrel table has a
- * relfrozenxid and/or relminmxid that is dangerously far in the past.
- * Triggering the failsafe makes the ongoing VACUUM bypass any further index
- * vacuuming and heap vacuuming. Truncating the heap is also bypassed.
- *
- * Any remaining work (work that VACUUM cannot just bypass) is typically sped
- * up when the failsafe triggers. VACUUM stops applying any cost-based delay
- * that it started out with.
- *
- * Returns true when failsafe has been triggered.
- */
-static bool
-lazy_check_wraparound_failsafe(LVRelState *vacrel)
-{
- /* Don't warn more than once per VACUUM */
- if (VacuumFailsafeActive)
- return true;
-
- if (unlikely(vacuum_xid_failsafe_check(&vacrel->cutoffs)))
- {
- const int progress_index[] = {
- PROGRESS_VACUUM_INDEXES_TOTAL,
- PROGRESS_VACUUM_INDEXES_PROCESSED
- };
- int64 progress_val[2] = {0, 0};
-
- VacuumFailsafeActive = true;
-
- /*
- * Abandon use of a buffer access strategy to allow use of all of
- * shared buffers. We assume the caller who allocated the memory for
- * the BufferAccessStrategy will free it.
- */
- vacrel->bstrategy = NULL;
-
- /* Disable index vacuuming, index cleanup, and heap rel truncation */
- vacrel->do_index_vacuuming = false;
- vacrel->do_index_cleanup = false;
- vacrel->do_rel_truncate = false;
-
- /* Reset the progress counters */
- pgstat_progress_update_multi_param(2, progress_index, progress_val);
-
- ereport(WARNING,
- (errmsg("bypassing nonessential maintenance of table \"%s.%s.%s\" as a failsafe after %d index scans",
- vacrel->dbname, vacrel->relnamespace, vacrel->relname,
- vacrel->num_index_scans),
- errdetail("The table's relfrozenxid or relminmxid is too far in the past."),
- errhint("Consider increasing configuration parameter maintenance_work_mem or autovacuum_work_mem.\n"
- "You might also need to consider other ways for VACUUM to keep up with the allocation of transaction IDs.")));
-
- /* Stop applying cost limits from this point on */
- VacuumCostActive = false;
- VacuumCostBalance = 0;
-
- return true;
- }
-
- return false;
-}
-
/*
* lazy_cleanup_all_indexes() -- cleanup all indexes of relation.
*/
@@ -3319,7 +3244,8 @@ heap_page_is_all_visible(LVRelState *vacrel, Buffer buf,
tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
tuple.t_len = ItemIdGetLength(itemid);
tuple.t_tableOid = RelationGetRelid(vacrel->rel);
-
+ HeapTupleCopyXidsFromPage(buf, &tuple, page,
+ IsToastRelation(vacrel->rel));
switch (HeapTupleSatisfiesVacuum(&tuple, vacrel->cutoffs.OldestXmin,
buf))
{
@@ -3339,7 +3265,7 @@ heap_page_is_all_visible(LVRelState *vacrel, Buffer buf,
* The inserter definitely committed. But is it old enough
* that everyone sees it as committed?
*/
- xmin = HeapTupleHeaderGetXmin(tuple.t_data);
+ xmin = HeapTupleGetXmin(&tuple);
if (!TransactionIdPrecedes(xmin,
vacrel->cutoffs.OldestXmin))
{
@@ -3355,7 +3281,7 @@ heap_page_is_all_visible(LVRelState *vacrel, Buffer buf,
/* Check whether this tuple is already frozen or not */
if (all_visible && *all_frozen &&
- heap_tuple_needs_eventual_freeze(tuple.t_data))
+ heap_tuple_needs_eventual_freeze(&tuple))
*all_frozen = false;
}
break;
diff --git a/src/backend/access/nbtree/nbtpage.c b/src/backend/access/nbtree/nbtpage.c
index b7660a459e..726edb24a3 100644
--- a/src/backend/access/nbtree/nbtpage.c
+++ b/src/backend/access/nbtree/nbtpage.c
@@ -549,6 +549,7 @@ _bt_getroot(Relation rel, Relation heaprel, int access)
rootblkno = rootopaque->btpo_next;
}
+ /* Note: can't check btpo_level on deleted pages */
if (rootopaque->btpo_level != rootlevel)
elog(ERROR, "root page %u of index \"%s\" has level %u, expected %u",
rootblkno, RelationGetRelationName(rel),
@@ -652,6 +653,7 @@ _bt_gettrueroot(Relation rel)
rootblkno = rootopaque->btpo_next;
}
+ /* Note: can't check btpo_level on deleted pages */
if (rootopaque->btpo_level != rootlevel)
elog(ERROR, "root page %u of index \"%s\" has level %u, expected %u",
rootblkno, RelationGetRelationName(rel),
diff --git a/src/backend/access/nbtree/nbtsplitloc.c b/src/backend/access/nbtree/nbtsplitloc.c
index 85834c3dd7..fd985f9282 100644
--- a/src/backend/access/nbtree/nbtsplitloc.c
+++ b/src/backend/access/nbtree/nbtsplitloc.c
@@ -140,6 +140,7 @@ _bt_findsplitloc(Relation rel,
olddataitemstoleft,
perfectpenalty,
leaffillfactor;
+ int maxTupleEnd PG_USED_FOR_ASSERTS_ONLY;
FindSplitData state;
FindSplitStrat strategy;
ItemId itemid;
@@ -153,6 +154,7 @@ _bt_findsplitloc(Relation rel,
opaque = BTPageGetOpaque(origpage);
maxoff = PageGetMaxOffsetNumber(origpage);
+ maxTupleEnd = ItemIdGetTupleEnd(PageGetItemId(origpage, P_HIKEY));
/* Total free space available on a btree page, after fixed overhead */
leftspace = rightspace =
@@ -214,6 +216,18 @@ _bt_findsplitloc(Relation rel,
itemid = PageGetItemId(origpage, offnum);
itemsz = MAXALIGN(ItemIdGetLength(itemid)) + sizeof(ItemIdData);
+#ifdef USE_ASSERT_CHECKING
+
+ /*
+ * Ending of rightmost tuple on a page can be shifted relative to left
+ * boundary of BTPageOpaqueData due to conversion from EE96, which
+ * used different BTPageOpaqueData layout. It is only checked in the
+ * assert below.
+ */
+ if (maxTupleEnd < ItemIdGetTupleEnd(itemid))
+ maxTupleEnd = ItemIdGetTupleEnd(itemid);
+#endif
+
/*
* When item offset number is not newitemoff, neither side of the
* split can be newitem. Record a split after the previous data item
@@ -248,7 +262,7 @@ _bt_findsplitloc(Relation rel,
* (Though only when it's possible that newitem will end up alone on new
* right page.)
*/
- Assert(olddataitemstoleft == olddataitemstotal);
+ Assert(olddataitemstoleft + ((PageHeader) origpage)->pd_special - maxTupleEnd == olddataitemstotal);
if (newitemoff > maxoff)
_bt_recsplitloc(&state, newitemoff, false, olddataitemstotal, 0);
diff --git a/src/backend/access/rmgrdesc/gistdesc.c b/src/backend/access/rmgrdesc/gistdesc.c
index baac2a65b3..fd9ca7df94 100644
--- a/src/backend/access/rmgrdesc/gistdesc.c
+++ b/src/backend/access/rmgrdesc/gistdesc.c
@@ -29,7 +29,7 @@ out_gistxlogPageReuse(StringInfo buf, gistxlogPageReuse *xlrec)
appendStringInfo(buf, "rel %u/%u/%u; blk %u; snapshotConflictHorizon %llu",
xlrec->locator.spcOid, xlrec->locator.dbOid,
xlrec->locator.relNumber, xlrec->block,
- (unsigned long long) U64FromFullTransactionId(xlrec->snapshotConflictHorizon));
+ (unsigned long long) XidFromFullTransactionId(xlrec->snapshotConflictHorizon));
}
static void
@@ -51,7 +51,7 @@ static void
out_gistxlogPageDelete(StringInfo buf, gistxlogPageDelete *xlrec)
{
appendStringInfo(buf, "deleteXid %llu; downlink %u",
- (unsigned long long) U64FromFullTransactionId(xlrec->deleteXid),
+ (unsigned long long) XidFromFullTransactionId(xlrec->deleteXid),
xlrec->downlinkOffset);
}
diff --git a/src/backend/access/rmgrdesc/heapdesc.c b/src/backend/access/rmgrdesc/heapdesc.c
index 1bf2c1ab85..4b6d6c3904 100644
--- a/src/backend/access/rmgrdesc/heapdesc.c
+++ b/src/backend/access/rmgrdesc/heapdesc.c
@@ -308,6 +308,23 @@ heap2_desc(StringInfo buf, XLogReaderState *record)
}
}
+void
+heap3_desc(StringInfo buf, XLogReaderState *record)
+{
+ char *rec = XLogRecGetData(record);
+ uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
+
+ info &= XLOG_HEAP_OPMASK;
+ if (info == XLOG_HEAP3_BASE_SHIFT)
+ {
+ xl_heap_base_shift *xlrec = (xl_heap_base_shift *) rec;
+
+ appendStringInfo(buf, "%s delta %lld ",
+ xlrec->multi ? "MultiXactId" : "XactId",
+ (long long) xlrec->delta);
+ }
+}
+
const char *
heap_identify(uint8 info)
{
@@ -391,3 +408,18 @@ heap2_identify(uint8 info)
return id;
}
+
+const char *
+heap3_identify(uint8 info)
+{
+ const char *id = NULL;
+
+ switch (info & ~XLR_INFO_MASK)
+ {
+ case XLOG_HEAP3_BASE_SHIFT:
+ id = "BASE_SHIFT";
+ break;
+ }
+
+ return id;
+}
diff --git a/src/backend/access/rmgrdesc/nbtdesc.c b/src/backend/access/rmgrdesc/nbtdesc.c
index f26664f32d..f42de19c87 100644
--- a/src/backend/access/rmgrdesc/nbtdesc.c
+++ b/src/backend/access/rmgrdesc/nbtdesc.c
@@ -95,7 +95,7 @@ btree_desc(StringInfo buf, XLogReaderState *record)
appendStringInfo(buf, "left: %u, right: %u, level: %u, safexid: %llu, ",
xlrec->leftsib, xlrec->rightsib, xlrec->level,
- (unsigned long long) U64FromFullTransactionId(xlrec->safexid));
+ (unsigned long long) XidFromFullTransactionId(xlrec->safexid));
appendStringInfo(buf, "leafleft: %u, leafright: %u, leaftopparent: %u",
xlrec->leafleftsib, xlrec->leafrightsib,
xlrec->leaftopparent);
@@ -115,7 +115,7 @@ btree_desc(StringInfo buf, XLogReaderState *record)
appendStringInfo(buf, "rel: %u/%u/%u, snapshotConflictHorizon: %llu",
xlrec->locator.spcOid, xlrec->locator.dbOid,
xlrec->locator.relNumber,
- (unsigned long long) U64FromFullTransactionId(xlrec->snapshotConflictHorizon));
+ (unsigned long long) XidFromFullTransactionId(xlrec->snapshotConflictHorizon));
break;
}
case XLOG_BTREE_META_CLEANUP:
diff --git a/src/backend/access/rmgrdesc/xactdesc.c b/src/backend/access/rmgrdesc/xactdesc.c
index 6b3e8fe9e5..dfc7741e79 100644
--- a/src/backend/access/rmgrdesc/xactdesc.c
+++ b/src/backend/access/rmgrdesc/xactdesc.c
@@ -110,7 +110,8 @@ ParseCommitRecord(uint8 info, xl_xact_commit *xlrec, xl_xact_parsed_commit *pars
{
xl_xact_twophase *xl_twophase = (xl_xact_twophase *) data;
- parsed->twophase_xid = xl_twophase->xid;
+ parsed->twophase_xid =
+ ((uint64) xl_twophase->xid_hi << 32) | xl_twophase->xid_lo;
data += sizeof(xl_xact_twophase);
@@ -205,7 +206,8 @@ ParseAbortRecord(uint8 info, xl_xact_abort *xlrec, xl_xact_parsed_abort *parsed)
{
xl_xact_twophase *xl_twophase = (xl_xact_twophase *) data;
- parsed->twophase_xid = xl_twophase->xid;
+ parsed->twophase_xid =
+ ((uint64) xl_twophase->xid_hi << 32) | xl_twophase->xid_lo;
data += sizeof(xl_xact_twophase);
diff --git a/src/backend/access/rmgrdesc/xlogdesc.c b/src/backend/access/rmgrdesc/xlogdesc.c
index 48fb5022e0..108991680d 100644
--- a/src/backend/access/rmgrdesc/xlogdesc.c
+++ b/src/backend/access/rmgrdesc/xlogdesc.c
@@ -53,7 +53,7 @@ xlog_desc(StringInfo buf, XLogReaderState *record)
checkpoint->ThisTimeLineID,
checkpoint->PrevTimeLineID,
checkpoint->fullPageWrites ? "true" : "false",
- (unsigned long long) U64FromFullTransactionId(checkpoint->nextXid),
+ (unsigned long long) XidFromFullTransactionId(checkpoint->nextXid),
checkpoint->nextOid,
(unsigned long long) checkpoint->nextMulti,
(unsigned long long) checkpoint->nextMultiOffset,
diff --git a/src/backend/access/transam/clog.c b/src/backend/access/transam/clog.c
index 0e75dd2ca0..e96b739c8c 100644
--- a/src/backend/access/transam/clog.c
+++ b/src/backend/access/transam/clog.c
@@ -302,7 +302,7 @@ TransactionIdSetPageStatus(TransactionId xid, int nsubxids,
* sub-XIDs and all of the XIDs for which we're adjusting clog should be
* on the same page. Check those conditions, too.
*/
- if (all_xact_same_page && xid == MyProc->xid &&
+ if (all_xact_same_page && xid == pg_atomic_read_u64(&MyProc->xid) &&
nsubxids <= THRESHOLD_SUBTRANS_CLOG_OPT &&
nsubxids == MyProc->subxidStatus.count &&
(nsubxids == 0 ||
@@ -920,24 +920,11 @@ TruncateCLOG(TransactionId oldestXact, Oid oldestxid_datoid)
SimpleLruTruncate(XactCtl, cutoffPage);
}
-
/*
* Decide whether a CLOG page number is "older" for truncation purposes.
*
- * We need to use comparison of TransactionIds here in order to do the right
- * thing with wraparound XID arithmetic. However, TransactionIdPrecedes()
- * would get weird about permanent xact IDs. So, offset both such that xid1,
- * xid2, and xid2 + CLOG_XACTS_PER_PAGE - 1 are all normal XIDs; this offset
- * is relevant to page 0 and to the page preceding page 0.
- *
- * The page containing oldestXact-2^31 is the important edge case. The
- * portion of that page equaling or following oldestXact-2^31 is expendable,
- * but the portion preceding oldestXact-2^31 is not. When oldestXact-2^31 is
- * the first XID of a page and segment, the entire page and segment is
- * expendable, and we could truncate the segment. Recognizing that case would
- * require making oldestXact, not just the page containing oldestXact,
- * available to this callback. The benefit would be rare and small, so we
- * don't optimize that edge case.
+ * With 64xid this function is just "<", but we left it as a function in order
+ * for its calls remain "vanilla" like.
*/
static bool
CLOGPagePrecedes(int64 page1, int64 page2)
diff --git a/src/backend/access/transam/commit_ts.c b/src/backend/access/transam/commit_ts.c
index f157854e70..36582e5b4f 100644
--- a/src/backend/access/transam/commit_ts.c
+++ b/src/backend/access/transam/commit_ts.c
@@ -908,25 +908,6 @@ AdvanceOldestCommitTsXid(TransactionId oldestXact)
/*
* Decide whether a commitTS page number is "older" for truncation purposes.
* Analogous to CLOGPagePrecedes().
- *
- * At default BLCKSZ, (1 << 31) % COMMIT_TS_XACTS_PER_PAGE == 128. This
- * introduces differences compared to CLOG and the other SLRUs having (1 <<
- * 31) % per_page == 0. This function never tests exactly
- * TransactionIdPrecedes(x-2^31, x). When the system reaches xidStopLimit,
- * there are two possible counts of page boundaries between oldestXact and the
- * latest XID assigned, depending on whether oldestXact is within the first
- * 128 entries of its page. Since this function doesn't know the location of
- * oldestXact within page2, it returns false for one page that actually is
- * expendable. This is a wider (yet still negligible) version of the
- * truncation opportunity that CLOGPagePrecedes() cannot recognize.
- *
- * For the sake of a worked example, number entries with decimal values such
- * that page1==1 entries range from 1.0 to 1.999. Let N+0.15 be the number of
- * pages that 2^31 entries will span (N is an integer). If oldestXact=N+2.1,
- * then the final safe XID assignment leaves newestXact=1.95. We keep page 2,
- * because entry=2.85 is the border that toggles whether entries precede the
- * last entry of the oldestXact page. While page 2 is expendable at
- * oldestXact=N+2.1, it would be precious at oldestXact=N+2.9.
*/
static bool
CommitTsPagePrecedes(int64 page1, int64 page2)
diff --git a/src/backend/access/transam/multixact.c b/src/backend/access/transam/multixact.c
index 4c30525250..c14efe56f6 100644
--- a/src/backend/access/transam/multixact.c
+++ b/src/backend/access/transam/multixact.c
@@ -112,15 +112,15 @@
((xid) / (MultiXactOffset) MULTIXACT_OFFSETS_PER_PAGE)
#define MultiXactIdToOffsetEntry(xid) \
((xid) % (MultiXactOffset) MULTIXACT_OFFSETS_PER_PAGE)
-#define MultiXactIdToOffsetSegment(xid) (MultiXactIdToOffsetPage(xid) / SLRU_PAGES_PER_SEGMENT)
+#define MultiXactIdToOffsetSegment(xid) ((uint64)(MultiXactIdToOffsetPage(xid) / SLRU_PAGES_PER_SEGMENT))
/*
* The situation for members is a bit more complex: we store one byte of
* additional flag bits for each TransactionId. To do this without getting
- * into alignment issues, we store four bytes of flags, and then the
- * corresponding 4 Xids. Each such 5-word (20-byte) set we call a "group", and
- * are stored as a whole in pages. Thus, with 8kB BLCKSZ, we keep 409 groups
- * per page. This wastes 12 bytes per page, but that's OK -- simplicity (and
+ * into alignment issues, we store eight bytes of flags, and then the
+ * corresponding 8 Xids. Each such 9-word (72-byte) set we call a "group", and
+ * are stored as a whole in pages. Thus, with 8kB BLCKSZ, we keep 113 groups
+ * per page. This wastes 56 bytes per page, but that's OK -- simplicity (and
* performance) trumps space efficiency here.
*
* Note that the "offset" macros work with byte offset, not array indexes, so
@@ -132,7 +132,7 @@
#define MXACT_MEMBER_XACT_BITMASK ((1 << MXACT_MEMBER_BITS_PER_XACT) - 1)
/* how many full bytes of flags are there in a group? */
-#define MULTIXACT_FLAGBYTES_PER_GROUP 4
+#define MULTIXACT_FLAGBYTES_PER_GROUP 8
#define MULTIXACT_MEMBERS_PER_MEMBERGROUP \
(MULTIXACT_FLAGBYTES_PER_GROUP * MXACT_MEMBER_FLAGS_PER_BYTE)
/* size in bytes of a complete group */
@@ -142,22 +142,9 @@
#define MULTIXACT_MEMBERS_PER_PAGE \
(MULTIXACT_MEMBERGROUPS_PER_PAGE * MULTIXACT_MEMBERS_PER_MEMBERGROUP)
-/*
- * Because the number of items per page is not a divisor of the last item
- * number (member 0xFFFFFFFF), the last segment does not use the maximum number
- * of pages, and moreover the last used page therein does not use the same
- * number of items as previous pages. (Another way to say it is that the
- * 0xFFFFFFFF member is somewhere in the middle of the last page, so the page
- * has some empty space after that item.)
- *
- * This constant is the number of members in the last page of the last segment.
- */
-#define MAX_MEMBERS_IN_LAST_MEMBERS_PAGE \
- ((uint32) ((0xFFFFFFFF % MULTIXACT_MEMBERS_PER_PAGE) + 1))
-
/* page in which a member is to be found */
#define MXOffsetToMemberPage(xid) ((xid) / (TransactionId) MULTIXACT_MEMBERS_PER_PAGE)
-#define MXOffsetToMemberSegment(xid) (MXOffsetToMemberPage(xid) / SLRU_PAGES_PER_SEGMENT)
+#define MXOffsetToMemberSegment(xid) ((uint64)(MXOffsetToMemberPage(xid) / SLRU_PAGES_PER_SEGMENT))
/* Location (byte offset within page) of flag word for a given member */
#define MXOffsetToFlagsOffset(xid) \
@@ -216,22 +203,8 @@ typedef struct MultiXactStateData
MultiXactId oldestMultiXactId;
Oid oldestMultiXactDB;
- /*
- * Oldest multixact offset that is potentially referenced by a multixact
- * referenced by a relation. We don't always know this value, so there's
- * a flag here to indicate whether or not we currently do.
- */
- MultiXactOffset oldestOffset;
- bool oldestOffsetKnown;
-
/* support for anti-wraparound measures */
MultiXactId multiVacLimit;
- MultiXactId multiWarnLimit;
- MultiXactId multiStopLimit;
- MultiXactId multiWrapLimit;
-
- /* support for members anti-wraparound measures */
- MultiXactOffset offsetStopLimit; /* known if oldestOffsetKnown */
/*
* Per-backend data starts here. We have two arrays stored in the area
@@ -362,9 +335,6 @@ static bool MultiXactOffsetPrecedes(MultiXactOffset offset1,
MultiXactOffset offset2);
static void ExtendMultiXactOffset(MultiXactId multi);
static void ExtendMultiXactMember(MultiXactOffset offset, int nmembers);
-static bool MultiXactOffsetWouldWrap(MultiXactOffset boundary,
- MultiXactOffset start, uint32 distance);
-static bool SetOffsetVacuumLimit(bool is_startup);
static bool find_multixact_start(MultiXactId multi, MultiXactOffset *result);
static void WriteMZeroPageXlogRec(int64 pageno, uint8 info);
static void WriteMTruncateXlogRec(Oid oldestMultiDB,
@@ -398,6 +368,9 @@ MultiXactIdCreate(TransactionId xid1, MultiXactStatus status1,
/* MultiXactIdSetOldestMember() must have been called already. */
Assert(MultiXactIdIsValid(OldestMemberMXactId[MyBackendId]));
+ /* memset members array because with 64-bit xids it has a padding hole */
+ MemSet(members, 0, sizeof(members));
+
/*
* Note: unlike MultiXactIdExpand, we don't bother to check that both XIDs
* are still running. In typical usage, xid2 will be our own XID and the
@@ -513,7 +486,7 @@ MultiXactIdExpand(MultiXactId multi, TransactionId xid, MultiXactStatus status)
* end of the loop.
*/
newMembers = (MultiXactMember *)
- palloc(sizeof(MultiXactMember) * (nmembers + 1));
+ palloc0(sizeof(MultiXactMember) * (nmembers + 1));
for (i = 0, j = 0; i < nmembers; i++)
{
@@ -528,7 +501,6 @@ MultiXactIdExpand(MultiXactId multi, TransactionId xid, MultiXactStatus status)
newMembers[j].xid = xid;
newMembers[j++].status = status;
-
newMulti = MultiXactIdCreateFromMembers(j, newMembers);
pfree(members);
@@ -905,8 +877,8 @@ RecordNewMultiXact(MultiXactId multi, MultiXactOffset offset,
for (i = 0; i < nmembers; i++, offset++)
{
TransactionId *memberptr;
- uint32 *flagsptr;
- uint32 flagsval;
+ uint64 *flagsptr;
+ uint64 flagsval;
int bshift;
int flagsoff;
int memberoff;
@@ -929,12 +901,12 @@ RecordNewMultiXact(MultiXactId multi, MultiXactOffset offset,
*memberptr = members[i].xid;
- flagsptr = (uint32 *)
+ flagsptr = (uint64 *)
(MultiXactMemberCtl->shared->page_buffer[slotno] + flagsoff);
flagsval = *flagsptr;
- flagsval &= ~(((1 << MXACT_MEMBER_BITS_PER_XACT) - 1) << bshift);
- flagsval |= (members[i].status << bshift);
+ flagsval &= ~((uint64) ((1ULL << MXACT_MEMBER_BITS_PER_XACT) - 1) << bshift);
+ flagsval |= ((uint64) members[i].status << bshift);
*flagsptr = flagsval;
MultiXactMemberCtl->shared->page_dirty[slotno] = true;
@@ -987,8 +959,6 @@ GetNewMultiXactId(int nmembers, MultiXactOffset *offset)
* If we're past multiVacLimit or the safe threshold for member storage
* space, or we don't know what the safe threshold for member storage is,
* start trying to force autovacuum cycles.
- * If we're past multiWarnLimit, start issuing warnings.
- * If we're past multiStopLimit, refuse to create new MultiXactIds.
*
* Note these are pretty much the same protections in GetNewTransactionId.
*----------
@@ -1002,41 +972,9 @@ GetNewMultiXactId(int nmembers, MultiXactOffset *offset)
* possibility of deadlock while doing get_database_name(). First,
* copy all the shared values we'll need in this path.
*/
- MultiXactId multiWarnLimit = MultiXactState->multiWarnLimit;
- MultiXactId multiStopLimit = MultiXactState->multiStopLimit;
- MultiXactId multiWrapLimit = MultiXactState->multiWrapLimit;
- Oid oldest_datoid = MultiXactState->oldestMultiXactDB;
LWLockRelease(MultiXactGenLock);
- if (IsUnderPostmaster &&
- !MultiXactIdPrecedes(result, multiStopLimit))
- {
- char *oldest_datname = get_database_name(oldest_datoid);
-
- /*
- * Immediately kick autovacuum into action as we're already in
- * ERROR territory.
- */
- SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER);
-
- /* complain even if that DB has disappeared */
- if (oldest_datname)
- ereport(ERROR,
- (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
- errmsg("database is not accepting commands that assign new MultiXactIds to avoid wraparound data loss in database \"%s\"",
- oldest_datname),
- errhint("Execute a database-wide VACUUM in that database.\n"
- "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
- else
- ereport(ERROR,
- (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
- errmsg("database is not accepting commands that assign new MultiXactIds to avoid wraparound data loss in database with OID %u",
- oldest_datoid),
- errhint("Execute a database-wide VACUUM in that database.\n"
- "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
- }
-
/*
* To avoid swamping the postmaster with signals, we issue the autovac
* request only once per 64K multis generated. This still gives
@@ -1045,31 +983,6 @@ GetNewMultiXactId(int nmembers, MultiXactOffset *offset)
if (IsUnderPostmaster && (result % 65536) == 0)
SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER);
- if (!MultiXactIdPrecedes(result, multiWarnLimit))
- {
- char *oldest_datname = get_database_name(oldest_datoid);
-
- /* complain even if that DB has disappeared */
- if (oldest_datname)
- ereport(WARNING,
- (errmsg_plural("database \"%s\" must be vacuumed before %u more MultiXactId is used",
- "database \"%s\" must be vacuumed before %u more MultiXactIds are used",
- multiWrapLimit - result,
- oldest_datname,
- multiWrapLimit - result),
- errhint("Execute a database-wide VACUUM in that database.\n"
- "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
- else
- ereport(WARNING,
- (errmsg_plural("database with OID %u must be vacuumed before %u more MultiXactId is used",
- "database with OID %u must be vacuumed before %u more MultiXactIds are used",
- multiWrapLimit - result,
- oldest_datoid,
- multiWrapLimit - result),
- errhint("Execute a database-wide VACUUM in that database.\n"
- "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
- }
-
/* Re-acquire lock and start over */
LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE);
result = MultiXactState->nextMXact;
@@ -1094,78 +1007,6 @@ GetNewMultiXactId(int nmembers, MultiXactOffset *offset)
else
*offset = nextOffset;
- /*----------
- * Protect against overrun of the members space as well, with the
- * following rules:
- *
- * If we're past offsetStopLimit, refuse to generate more multis.
- * If we're close to offsetStopLimit, emit a warning.
- *
- * Arbitrarily, we start emitting warnings when we're 20 segments or less
- * from offsetStopLimit.
- *
- * Note we haven't updated the shared state yet, so if we fail at this
- * point, the multixact ID we grabbed can still be used by the next guy.
- *
- * Note that there is no point in forcing autovacuum runs here: the
- * multixact freeze settings would have to be reduced for that to have any
- * effect.
- *----------
- */
-#define OFFSET_WARN_SEGMENTS 20
- if (MultiXactState->oldestOffsetKnown &&
- MultiXactOffsetWouldWrap(MultiXactState->offsetStopLimit, nextOffset,
- nmembers))
- {
- /* see comment in the corresponding offsets wraparound case */
- SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER);
-
- ereport(ERROR,
- (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
- errmsg("multixact \"members\" limit exceeded"),
- errdetail_plural("This command would create a multixact with %u members, but the remaining space is only enough for %u member.",
- "This command would create a multixact with %u members, but the remaining space is only enough for %u members.",
- MultiXactState->offsetStopLimit - nextOffset - 1,
- nmembers,
- MultiXactState->offsetStopLimit - nextOffset - 1),
- errhint("Execute a database-wide VACUUM in database with OID %u with reduced vacuum_multixact_freeze_min_age and vacuum_multixact_freeze_table_age settings.",
- MultiXactState->oldestMultiXactDB)));
- }
-
- /*
- * Check whether we should kick autovacuum into action, to prevent members
- * wraparound. NB we use a much larger window to trigger autovacuum than
- * just the warning limit. The warning is just a measure of last resort -
- * this is in line with GetNewTransactionId's behaviour.
- */
- if (!MultiXactState->oldestOffsetKnown ||
- (MultiXactState->nextOffset - MultiXactState->oldestOffset
- > MULTIXACT_MEMBER_SAFE_THRESHOLD))
- {
- /*
- * To avoid swamping the postmaster with signals, we issue the autovac
- * request only when crossing a segment boundary. With default
- * compilation settings that's roughly after 50k members. This still
- * gives plenty of chances before we get into real trouble.
- */
- if ((MXOffsetToMemberPage(nextOffset) / SLRU_PAGES_PER_SEGMENT) !=
- (MXOffsetToMemberPage(nextOffset + nmembers) / SLRU_PAGES_PER_SEGMENT))
- SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER);
- }
-
- if (MultiXactState->oldestOffsetKnown &&
- MultiXactOffsetWouldWrap(MultiXactState->offsetStopLimit,
- nextOffset,
- nmembers + MULTIXACT_MEMBERS_PER_PAGE * SLRU_PAGES_PER_SEGMENT * OFFSET_WARN_SEGMENTS))
- ereport(WARNING,
- (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
- errmsg_plural("database with OID %u must be vacuumed before %d more multixact member is used",
- "database with OID %u must be vacuumed before %d more multixact members are used",
- MultiXactState->offsetStopLimit - nextOffset + nmembers,
- MultiXactState->oldestMultiXactDB,
- MultiXactState->offsetStopLimit - nextOffset + nmembers),
- errhint("Execute a database-wide VACUUM in that database with reduced vacuum_multixact_freeze_min_age and vacuum_multixact_freeze_table_age settings.")));
-
ExtendMultiXactMember(nextOffset, nmembers);
/*
@@ -1358,7 +1199,10 @@ retry:
offptr += entryno;
offset = *offptr;
- Assert(offset != 0);
+ if (offset == 0)
+ ereport(ERROR,
+ (errmsg("found invalid zero offset in multixact %llu",
+ (unsigned long long) multi)));
/*
* Use the same increment rule as GetNewMultiXactId(), that is, don't
@@ -1405,7 +1249,7 @@ retry:
LWLockRelease(MultiXactOffsetSLRULock);
- ptr = (MultiXactMember *) palloc(length * sizeof(MultiXactMember));
+ ptr = (MultiXactMember *) palloc0(length * sizeof(MultiXactMember));
/* Now get the members themselves. */
LWLockAcquire(MultiXactMemberSLRULock, LW_EXCLUSIVE);
@@ -1415,7 +1259,7 @@ retry:
for (i = 0; i < length; i++, offset++)
{
TransactionId *xactptr;
- uint32 *flagsptr;
+ uint64 *flagsptr;
int flagsoff;
int bshift;
int memberoff;
@@ -1441,7 +1285,7 @@ retry:
flagsoff = MXOffsetToFlagsOffset(offset);
bshift = MXOffsetToFlagsBitShift(offset);
- flagsptr = (uint32 *) (MultiXactMemberCtl->shared->page_buffer[slotno] + flagsoff);
+ flagsptr = (uint64 *) (MultiXactMemberCtl->shared->page_buffer[slotno] + flagsoff);
ptr[truelength].xid = *xactptr;
ptr[truelength].status = (*flagsptr >> bshift) & MXACT_MEMBER_XACT_BITMASK;
@@ -2228,47 +2072,9 @@ SetMultiXactIdLimit(MultiXactId oldest_datminmxid, Oid oldest_datoid,
bool is_startup)
{
MultiXactId multiVacLimit;
- MultiXactId multiWarnLimit;
- MultiXactId multiStopLimit;
- MultiXactId multiWrapLimit;
- MultiXactId curMulti;
- bool needs_offset_vacuum;
Assert(MultiXactIdIsValid(oldest_datminmxid));
- /*
- * We pretend that a wrap will happen halfway through the multixact ID
- * space, but that's not really true, because multixacts wrap differently
- * from transaction IDs. Note that, separately from any concern about
- * multixact IDs wrapping, we must ensure that multixact members do not
- * wrap. Limits for that are set in SetOffsetVacuumLimit, not here.
- */
- multiWrapLimit = oldest_datminmxid + (MaxMultiXactId >> 1);
- if (multiWrapLimit < FirstMultiXactId)
- multiWrapLimit += FirstMultiXactId;
-
- /*
- * We'll refuse to continue assigning MultiXactIds once we get within 3M
- * multi of data loss. See SetTransactionIdLimit.
- */
- multiStopLimit = multiWrapLimit - 3000000;
- if (multiStopLimit < FirstMultiXactId)
- multiStopLimit -= FirstMultiXactId;
-
- /*
- * We'll start complaining loudly when we get within 40M multis of data
- * loss. This is kind of arbitrary, but if you let your gas gauge get
- * down to 2% of full, would you be looking for the next gas station? We
- * need to be fairly liberal about this number because there are lots of
- * scenarios where most transactions are done by automatic clients that
- * won't pay attention to warnings. (No, we're not gonna make this
- * configurable. If you know enough to configure it, you know enough to
- * not get in this kind of trouble in the first place.)
- */
- multiWarnLimit = multiWrapLimit - 40000000;
- if (multiWarnLimit < FirstMultiXactId)
- multiWarnLimit -= FirstMultiXactId;
-
/*
* We'll start trying to force autovacuums when oldest_datminmxid gets to
* be more than autovacuum_multixact_freeze_max_age mxids old.
@@ -2278,25 +2084,14 @@ SetMultiXactIdLimit(MultiXactId oldest_datminmxid, Oid oldest_datoid,
* its value. See SetTransactionIdLimit.
*/
multiVacLimit = oldest_datminmxid + autovacuum_multixact_freeze_max_age;
- if (multiVacLimit < FirstMultiXactId)
- multiVacLimit += FirstMultiXactId;
/* Grab lock for just long enough to set the new limit values */
LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE);
MultiXactState->oldestMultiXactId = oldest_datminmxid;
MultiXactState->oldestMultiXactDB = oldest_datoid;
MultiXactState->multiVacLimit = multiVacLimit;
- MultiXactState->multiWarnLimit = multiWarnLimit;
- MultiXactState->multiStopLimit = multiStopLimit;
- MultiXactState->multiWrapLimit = multiWrapLimit;
- curMulti = MultiXactState->nextMXact;
LWLockRelease(MultiXactGenLock);
- /* Log the info */
- ereport(DEBUG1,
- (errmsg_internal("MultiXactId wrap limit is %u, limited by database with OID %u",
- multiWrapLimit, oldest_datoid)));
-
/*
* Computing the actual limits is only possible once the data directory is
* in a consistent state. There's no need to compute the limits while
@@ -2308,59 +2103,6 @@ SetMultiXactIdLimit(MultiXactId oldest_datminmxid, Oid oldest_datoid,
return;
Assert(!InRecovery);
-
- /* Set limits for offset vacuum. */
- needs_offset_vacuum = SetOffsetVacuumLimit(is_startup);
-
- /*
- * If past the autovacuum force point, immediately signal an autovac
- * request. The reason for this is that autovac only processes one
- * database per invocation. Once it's finished cleaning up the oldest
- * database, it'll call here, and we'll signal the postmaster to start
- * another iteration immediately if there are still any old databases.
- */
- if ((MultiXactIdPrecedes(multiVacLimit, curMulti) ||
- needs_offset_vacuum) && IsUnderPostmaster)
- SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER);
-
- /* Give an immediate warning if past the wrap warn point */
- if (MultiXactIdPrecedes(multiWarnLimit, curMulti))
- {
- char *oldest_datname;
-
- /*
- * We can be called when not inside a transaction, for example during
- * StartupXLOG(). In such a case we cannot do database access, so we
- * must just report the oldest DB's OID.
- *
- * Note: it's also possible that get_database_name fails and returns
- * NULL, for example because the database just got dropped. We'll
- * still warn, even though the warning might now be unnecessary.
- */
- if (IsTransactionState())
- oldest_datname = get_database_name(oldest_datoid);
- else
- oldest_datname = NULL;
-
- if (oldest_datname)
- ereport(WARNING,
- (errmsg_plural("database \"%s\" must be vacuumed before %u more MultiXactId is used",
- "database \"%s\" must be vacuumed before %u more MultiXactIds are used",
- multiWrapLimit - curMulti,
- oldest_datname,
- multiWrapLimit - curMulti),
- errhint("To avoid MultiXactId assignment failures, execute a database-wide VACUUM in that database.\n"
- "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
- else
- ereport(WARNING,
- (errmsg_plural("database with OID %u must be vacuumed before %u more MultiXactId is used",
- "database with OID %u must be vacuumed before %u more MultiXactIds are used",
- multiWrapLimit - curMulti,
- oldest_datoid,
- multiWrapLimit - curMulti),
- errhint("To avoid MultiXactId assignment failures, execute a database-wide VACUUM in that database.\n"
- "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
- }
}
/*
@@ -2458,7 +2200,7 @@ ExtendMultiXactMember(MultiXactOffset offset, int nmembers)
{
int flagsoff;
int flagsbit;
- uint32 difference;
+ uint64 difference;
/*
* Only zero when at first entry of a page.
@@ -2479,23 +2221,7 @@ ExtendMultiXactMember(MultiXactOffset offset, int nmembers)
LWLockRelease(MultiXactMemberSLRULock);
}
- /*
- * Compute the number of items till end of current page. Careful: if
- * addition of unsigned ints wraps around, we're at the last page of
- * the last segment; since that page holds a different number of items
- * than other pages, we need to do it differently.
- */
- if (offset + MAX_MEMBERS_IN_LAST_MEMBERS_PAGE < offset)
- {
- /*
- * This is the last page of the last segment; we can compute the
- * number of items left to allocate in it without modulo
- * arithmetic.
- */
- difference = MaxMultiXactOffset - offset + 1;
- }
- else
- difference = MULTIXACT_MEMBERS_PER_PAGE - offset % MULTIXACT_MEMBERS_PER_PAGE;
+ difference = MULTIXACT_MEMBERS_PER_PAGE - offset % MULTIXACT_MEMBERS_PER_PAGE;
/*
* Advance to next page, taking care to properly handle the wraparound
@@ -2560,184 +2286,6 @@ GetOldestMultiXactId(void)
return oldestMXact;
}
-/*
- * Determine how aggressively we need to vacuum in order to prevent member
- * wraparound.
- *
- * To do so determine what's the oldest member offset and install the limit
- * info in MultiXactState, where it can be used to prevent overrun of old data
- * in the members SLRU area.
- *
- * The return value is true if emergency autovacuum is required and false
- * otherwise.
- */
-static bool
-SetOffsetVacuumLimit(bool is_startup)
-{
- MultiXactId oldestMultiXactId;
- MultiXactId nextMXact;
- MultiXactOffset oldestOffset = 0; /* placate compiler */
- MultiXactOffset prevOldestOffset;
- MultiXactOffset nextOffset;
- bool oldestOffsetKnown = false;
- bool prevOldestOffsetKnown;
- MultiXactOffset offsetStopLimit = 0;
- MultiXactOffset prevOffsetStopLimit;
-
- /*
- * NB: Have to prevent concurrent truncation, we might otherwise try to
- * lookup an oldestMulti that's concurrently getting truncated away.
- */
- LWLockAcquire(MultiXactTruncationLock, LW_SHARED);
-
- /* Read relevant fields from shared memory. */
- LWLockAcquire(MultiXactGenLock, LW_SHARED);
- oldestMultiXactId = MultiXactState->oldestMultiXactId;
- nextMXact = MultiXactState->nextMXact;
- nextOffset = MultiXactState->nextOffset;
- prevOldestOffsetKnown = MultiXactState->oldestOffsetKnown;
- prevOldestOffset = MultiXactState->oldestOffset;
- prevOffsetStopLimit = MultiXactState->offsetStopLimit;
- Assert(MultiXactState->finishedStartup);
- LWLockRelease(MultiXactGenLock);
-
- /*
- * Determine the offset of the oldest multixact. Normally, we can read
- * the offset from the multixact itself, but there's an important special
- * case: if there are no multixacts in existence at all, oldestMXact
- * obviously can't point to one. It will instead point to the multixact
- * ID that will be assigned the next time one is needed.
- */
- if (oldestMultiXactId == nextMXact)
- {
- /*
- * When the next multixact gets created, it will be stored at the next
- * offset.
- */
- oldestOffset = nextOffset;
- oldestOffsetKnown = true;
- }
- else
- {
- /*
- * Figure out where the oldest existing multixact's offsets are
- * stored. Due to bugs in early release of PostgreSQL 9.3.X and 9.4.X,
- * the supposedly-earliest multixact might not really exist. We are
- * careful not to fail in that case.
- */
- oldestOffsetKnown =
- find_multixact_start(oldestMultiXactId, &oldestOffset);
-
- if (oldestOffsetKnown)
- ereport(DEBUG1,
- (errmsg_internal("oldest MultiXactId member is at offset %llu",
- (unsigned long long) oldestOffset)));
- else
- ereport(LOG,
- (errmsg("MultiXact member wraparound protections are disabled because oldest checkpointed MultiXact %llu does not exist on disk",
- (unsigned long long) oldestMultiXactId)));
- }
-
- LWLockRelease(MultiXactTruncationLock);
-
- /*
- * If we can, compute limits (and install them MultiXactState) to prevent
- * overrun of old data in the members SLRU area. We can only do so if the
- * oldest offset is known though.
- */
- if (oldestOffsetKnown)
- {
- /* move back to start of the corresponding segment */
- offsetStopLimit = oldestOffset - (oldestOffset %
- (MULTIXACT_MEMBERS_PER_PAGE * SLRU_PAGES_PER_SEGMENT));
-
- /* always leave one segment before the wraparound point */
- offsetStopLimit -= (MULTIXACT_MEMBERS_PER_PAGE * SLRU_PAGES_PER_SEGMENT);
-
- if (!prevOldestOffsetKnown && !is_startup)
- ereport(LOG,
- (errmsg("MultiXact member wraparound protections are now enabled")));
-
- ereport(DEBUG1,
- (errmsg_internal("MultiXact member stop limit is now %llu based on MultiXact %llu",
- (unsigned long long) offsetStopLimit,
- (unsigned long long) oldestMultiXactId)));
- }
- else if (prevOldestOffsetKnown)
- {
- /*
- * If we failed to get the oldest offset this time, but we have a
- * value from a previous pass through this function, use the old
- * values rather than automatically forcing an emergency autovacuum
- * cycle again.
- */
- oldestOffset = prevOldestOffset;
- oldestOffsetKnown = true;
- offsetStopLimit = prevOffsetStopLimit;
- }
-
- /* Install the computed values */
- LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE);
- MultiXactState->oldestOffset = oldestOffset;
- MultiXactState->oldestOffsetKnown = oldestOffsetKnown;
- MultiXactState->offsetStopLimit = offsetStopLimit;
- LWLockRelease(MultiXactGenLock);
-
- /*
- * Do we need an emergency autovacuum? If we're not sure, assume yes.
- */
- return !oldestOffsetKnown ||
- (nextOffset - oldestOffset > MULTIXACT_MEMBER_SAFE_THRESHOLD);
-}
-
-/*
- * Return whether adding "distance" to "start" would move past "boundary".
- *
- * We use this to determine whether the addition is "wrapping around" the
- * boundary point, hence the name. The reason we don't want to use the regular
- * 2^31-modulo arithmetic here is that we want to be able to use the whole of
- * the 2^32-1 space here, allowing for more multixacts than would fit
- * otherwise.
- */
-static bool
-MultiXactOffsetWouldWrap(MultiXactOffset boundary, MultiXactOffset start,
- uint32 distance)
-{
- MultiXactOffset finish;
-
- /*
- * Note that offset number 0 is not used (see GetMultiXactIdMembers), so
- * if the addition wraps around the UINT_MAX boundary, skip that value.
- */
- finish = start + distance;
- if (finish < start)
- finish++;
-
- /*-----------------------------------------------------------------------
- * When the boundary is numerically greater than the starting point, any
- * value numerically between the two is not wrapped:
- *
- * <----S----B---->
- * [---) = F wrapped past B (and UINT_MAX)
- * [---) = F not wrapped
- * [----] = F wrapped past B
- *
- * When the boundary is numerically less than the starting point (i.e. the
- * UINT_MAX wraparound occurs somewhere in between) then all values in
- * between are wrapped:
- *
- * <----B----S---->
- * [---) = F not wrapped past B (but wrapped past UINT_MAX)
- * [---) = F wrapped past B (and UINT_MAX)
- * [----] = F not wrapped
- *-----------------------------------------------------------------------
- */
- if (start < boundary)
- return finish >= boundary || finish < start;
- else
- return finish >= boundary && finish < start;
-}
-
/*
* Find the starting offset of the given MultiXactId.
*
@@ -2781,93 +2329,6 @@ find_multixact_start(MultiXactId multi, MultiXactOffset *result)
return true;
}
-/*
- * Determine how many multixacts, and how many multixact members, currently
- * exist. Return false if unable to determine.
- */
-static bool
-ReadMultiXactCounts(uint32 *multixacts, MultiXactOffset *members)
-{
- MultiXactOffset nextOffset;
- MultiXactOffset oldestOffset;
- MultiXactId oldestMultiXactId;
- MultiXactId nextMultiXactId;
- bool oldestOffsetKnown;
-
- LWLockAcquire(MultiXactGenLock, LW_SHARED);
- nextOffset = MultiXactState->nextOffset;
- oldestMultiXactId = MultiXactState->oldestMultiXactId;
- nextMultiXactId = MultiXactState->nextMXact;
- oldestOffset = MultiXactState->oldestOffset;
- oldestOffsetKnown = MultiXactState->oldestOffsetKnown;
- LWLockRelease(MultiXactGenLock);
-
- if (!oldestOffsetKnown)
- return false;
-
- *members = nextOffset - oldestOffset;
- *multixacts = nextMultiXactId - oldestMultiXactId;
- return true;
-}
-
-/*
- * Multixact members can be removed once the multixacts that refer to them
- * are older than every datminmxid. autovacuum_multixact_freeze_max_age and
- * vacuum_multixact_freeze_table_age work together to make sure we never have
- * too many multixacts; we hope that, at least under normal circumstances,
- * this will also be sufficient to keep us from using too many offsets.
- * However, if the average multixact has many members, we might exhaust the
- * members space while still using few enough members that these limits fail
- * to trigger relminmxid advancement by VACUUM. At that point, we'd have no
- * choice but to start failing multixact-creating operations with an error.
- *
- * To prevent that, if more than a threshold portion of the members space is
- * used, we effectively reduce autovacuum_multixact_freeze_max_age and
- * to a value just less than the number of multixacts in use. We hope that
- * this will quickly trigger autovacuuming on the table or tables with the
- * oldest relminmxid, thus allowing datminmxid values to advance and removing
- * some members.
- *
- * As the fraction of the member space currently in use grows, we become
- * more aggressive in clamping this value. That not only causes autovacuum
- * to ramp up, but also makes any manual vacuums the user issues more
- * aggressive. This happens because vacuum_get_cutoffs() will clamp the
- * freeze table and the minimum freeze age cutoffs based on the effective
- * autovacuum_multixact_freeze_max_age this function returns. In the worst
- * case, we'll claim the freeze_max_age to zero, and every vacuum of any
- * table will freeze every multixact.
- */
-int
-MultiXactMemberFreezeThreshold(void)
-{
- MultiXactOffset members;
- uint32 multixacts;
- uint32 victim_multixacts;
- double fraction;
-
- /* If we can't determine member space utilization, assume the worst. */
- if (!ReadMultiXactCounts(&multixacts, &members))
- return 0;
-
- /* If member space utilization is low, no special action is required. */
- if (members <= MULTIXACT_MEMBER_SAFE_THRESHOLD)
- return autovacuum_multixact_freeze_max_age;
-
- /*
- * Compute a target for relminmxid advancement. The number of multixacts
- * we try to eliminate from the system is based on how far we are past
- * MULTIXACT_MEMBER_SAFE_THRESHOLD.
- */
- fraction = (double) (members - MULTIXACT_MEMBER_SAFE_THRESHOLD) /
- (MULTIXACT_MEMBER_DANGER_THRESHOLD - MULTIXACT_MEMBER_SAFE_THRESHOLD);
- victim_multixacts = multixacts * fraction;
-
- /* fraction could be > 1.0, but lowest possible freeze age is zero */
- if (victim_multixacts > multixacts)
- return 0;
- return multixacts - victim_multixacts;
-}
-
typedef struct mxtruncinfo
{
int64 earliestExistingPage;
@@ -2894,35 +2355,12 @@ SlruScanDirCbFindEarliest(SlruCtl ctl, char *filename, int64 segpage, void *data
/*
* Delete members segments [oldest, newOldest)
- *
- * The members SLRU can, in contrast to the offsets one, be filled to almost
- * the full range at once. This means SimpleLruTruncate() can't trivially be
- * used - instead the to-be-deleted range is computed using the offsets
- * SLRU. C.f. TruncateMultiXact().
*/
static void
PerformMembersTruncation(MultiXactOffset oldestOffset, MultiXactOffset newOldestOffset)
{
- const int maxsegment = MXOffsetToMemberSegment(MaxMultiXactOffset);
- int startsegment = MXOffsetToMemberSegment(oldestOffset);
- int endsegment = MXOffsetToMemberSegment(newOldestOffset);
- int segment = startsegment;
-
- /*
- * Delete all the segments but the last one. The last segment can still
- * contain, possibly partially, valid data.
- */
- while (segment != endsegment)
- {
- elog(DEBUG2, "truncating multixact members segment %x", segment);
- SlruDeleteSegment(MultiXactMemberCtl, segment);
-
- /* move to next segment, handling wraparound correctly */
- if (segment == maxsegment)
- segment = 0;
- else
- segment += 1;
- }
+ SimpleLruTruncate(MultiXactMemberCtl,
+ MXOffsetToMemberPage(newOldestOffset));
}
/*
@@ -3147,7 +2585,7 @@ MultiXactOffsetPagePrecedes(int64 page1, int64 page2)
/*
* Decide whether a MultiXactMember page number is "older" for truncation
- * purposes. There is no "invalid offset number" so use the numbers verbatim.
+ * purposes. There is no "invalid offset number" so use the numbers verbatim.
*/
static bool
MultiXactMemberPagePrecedes(int64 page1, int64 page2)
@@ -3172,7 +2610,7 @@ MultiXactMemberPagePrecedes(int64 page1, int64 page2)
bool
MultiXactIdPrecedes(MultiXactId multi1, MultiXactId multi2)
{
- int32 diff = (int32) (multi1 - multi2);
+ int64 diff = (int64) (multi1 - multi2);
return (diff < 0);
}
@@ -3186,7 +2624,7 @@ MultiXactIdPrecedes(MultiXactId multi1, MultiXactId multi2)
bool
MultiXactIdPrecedesOrEquals(MultiXactId multi1, MultiXactId multi2)
{
- int32 diff = (int32) (multi1 - multi2);
+ int64 diff = (int64) (multi1 - multi2);
return (diff <= 0);
}
@@ -3198,7 +2636,7 @@ MultiXactIdPrecedesOrEquals(MultiXactId multi1, MultiXactId multi2)
static bool
MultiXactOffsetPrecedes(MultiXactOffset offset1, MultiXactOffset offset2)
{
- int32 diff = (int32) (offset1 - offset2);
+ int64 diff = (int64) (offset1 - offset2);
return (diff < 0);
}
diff --git a/src/backend/access/transam/slru.c b/src/backend/access/transam/slru.c
index ce1730740a..89669fbfb4 100644
--- a/src/backend/access/transam/slru.c
+++ b/src/backend/access/transam/slru.c
@@ -1453,7 +1453,7 @@ SlruPagePrecedesTestOffset(SlruCtl ctl, int per_page, uint32 offset)
* must not assign.
*/
lhs = per_page + offset; /* skip first page to avoid non-normal XIDs */
- rhs = lhs + (1U << 31);
+ rhs = lhs + (1ULL << 63);
Assert(TransactionIdPrecedes(lhs, rhs));
Assert(TransactionIdPrecedes(rhs, lhs));
Assert(!TransactionIdPrecedes(lhs - 1, rhs));
@@ -1469,13 +1469,14 @@ SlruPagePrecedesTestOffset(SlruCtl ctl, int per_page, uint32 offset)
Assert(ctl->PagePrecedes(rhs / per_page, (lhs - 3 * per_page) / per_page));
Assert(ctl->PagePrecedes(rhs / per_page, (lhs - 2 * per_page) / per_page));
Assert(ctl->PagePrecedes(rhs / per_page, (lhs - 1 * per_page) / per_page)
- || (1U << 31) % per_page != 0); /* See CommitTsPagePrecedes() */
+ || (1ULL << 63) % per_page != 0); /* See CommitTsPagePrecedes() */
Assert(ctl->PagePrecedes((lhs + 1 * per_page) / per_page, rhs / per_page)
- || (1U << 31) % per_page != 0);
+ || (1ULL << 63) % per_page != 0);
Assert(ctl->PagePrecedes((lhs + 2 * per_page) / per_page, rhs / per_page));
Assert(ctl->PagePrecedes((lhs + 3 * per_page) / per_page, rhs / per_page));
Assert(!ctl->PagePrecedes(rhs / per_page, (lhs + per_page) / per_page));
+
/*
* GetNewTransactionId() has assigned the last XID it can safely use, and
* that XID is in the *LAST* page of the second segment. We must not
@@ -1485,7 +1486,7 @@ SlruPagePrecedesTestOffset(SlruCtl ctl, int per_page, uint32 offset)
newestXact = newestPage * per_page + offset;
Assert(newestXact / per_page == newestPage);
oldestXact = newestXact + 1;
- oldestXact -= 1U << 31;
+ oldestXact -= 1ULL << 63;
oldestPage = oldestXact / per_page;
Assert(!SlruMayDeleteSegment(ctl,
(newestPage -
@@ -1501,7 +1502,7 @@ SlruPagePrecedesTestOffset(SlruCtl ctl, int per_page, uint32 offset)
newestXact = newestPage * per_page + offset;
Assert(newestXact / per_page == newestPage);
oldestXact = newestXact + 1;
- oldestXact -= 1U << 31;
+ oldestXact -= 1ULL << 63;
oldestPage = oldestXact / per_page;
Assert(!SlruMayDeleteSegment(ctl,
(newestPage -
diff --git a/src/backend/access/transam/subtrans.c b/src/backend/access/transam/subtrans.c
index f45836192b..39260bf64b 100644
--- a/src/backend/access/transam/subtrans.c
+++ b/src/backend/access/transam/subtrans.c
@@ -222,11 +222,14 @@ void
BootStrapSUBTRANS(void)
{
int slotno;
+ int64 pageno;
+
+ pageno = TransactionIdToPage(XidFromFullTransactionId(ShmemVariableCache->nextXid));
LWLockAcquire(SubtransSLRULock, LW_EXCLUSIVE);
/* Create and zero the first page of the subtrans log */
- slotno = ZeroSUBTRANSPage(0);
+ slotno = ZeroSUBTRANSPage(pageno);
/* Make sure it's written out */
SimpleLruWritePage(SubTransCtl, slotno);
@@ -279,9 +282,6 @@ StartupSUBTRANS(TransactionId oldestActiveXID)
{
(void) ZeroSUBTRANSPage(startPage);
startPage++;
- /* must account for wraparound */
- if (startPage > TransactionIdToPage(MaxTransactionId))
- startPage = 0;
}
(void) ZeroSUBTRANSPage(startPage);
@@ -358,6 +358,7 @@ TruncateSUBTRANS(TransactionId oldestXact)
* a page and oldestXact == next XID. In that case, if we didn't subtract
* one, we'd trigger SimpleLruTruncate's wraparound detection.
*/
+
TransactionIdRetreat(oldestXact);
cutoffPage = TransactionIdToPage(oldestXact);
diff --git a/src/backend/access/transam/transam.c b/src/backend/access/transam/transam.c
index beb7d54f4d..de3b30cec8 100644
--- a/src/backend/access/transam/transam.c
+++ b/src/backend/access/transam/transam.c
@@ -281,14 +281,14 @@ TransactionIdPrecedes(TransactionId id1, TransactionId id2)
{
/*
* If either ID is a permanent XID then we can just do unsigned
- * comparison. If both are normal, do a modulo-2^32 comparison.
+ * comparison. If both are normal, do a modulo-2^64 comparison.
*/
- int32 diff;
+ int64 diff;
if (!TransactionIdIsNormal(id1) || !TransactionIdIsNormal(id2))
return (id1 < id2);
- diff = (int32) (id1 - id2);
+ diff = (int64) (id1 - id2);
return (diff < 0);
}
@@ -298,12 +298,12 @@ TransactionIdPrecedes(TransactionId id1, TransactionId id2)
bool
TransactionIdPrecedesOrEquals(TransactionId id1, TransactionId id2)
{
- int32 diff;
+ int64 diff;
if (!TransactionIdIsNormal(id1) || !TransactionIdIsNormal(id2))
return (id1 <= id2);
- diff = (int32) (id1 - id2);
+ diff = (int64) (id1 - id2);
return (diff <= 0);
}
@@ -313,12 +313,12 @@ TransactionIdPrecedesOrEquals(TransactionId id1, TransactionId id2)
bool
TransactionIdFollows(TransactionId id1, TransactionId id2)
{
- int32 diff;
+ int64 diff;
if (!TransactionIdIsNormal(id1) || !TransactionIdIsNormal(id2))
return (id1 > id2);
- diff = (int32) (id1 - id2);
+ diff = (int64) (id1 - id2);
return (diff > 0);
}
@@ -328,12 +328,12 @@ TransactionIdFollows(TransactionId id1, TransactionId id2)
bool
TransactionIdFollowsOrEquals(TransactionId id1, TransactionId id2)
{
- int32 diff;
+ int64 diff;
if (!TransactionIdIsNormal(id1) || !TransactionIdIsNormal(id2))
return (id1 >= id2);
- diff = (int32) (id1 - id2);
+ diff = (int64) (id1 - id2);
return (diff >= 0);
}
diff --git a/src/backend/access/transam/twophase.c b/src/backend/access/transam/twophase.c
index 6007fd50f0..205d577584 100644
--- a/src/backend/access/transam/twophase.c
+++ b/src/backend/access/transam/twophase.c
@@ -477,8 +477,8 @@ MarkAsPreparingGuts(GlobalTransaction gxact, TransactionId xid, const char *gid,
proc->lxid = xid;
proc->backendId = InvalidBackendId;
}
- proc->xid = xid;
- Assert(proc->xmin == InvalidTransactionId);
+ pg_atomic_write_u64(&proc->xid, xid);
+ Assert(pg_atomic_read_u64(&proc->xmin) == InvalidTransactionId);
proc->delayChkptFlags = 0;
proc->statusFlags = 0;
proc->pid = 0;
@@ -793,7 +793,7 @@ pg_prepared_xact(PG_FUNCTION_ARGS)
* Form tuple with appropriate data.
*/
- values[0] = TransactionIdGetDatum(proc->xid);
+ values[0] = TransactionIdGetDatum(pg_atomic_read_u64(&proc->xid));
values[1] = CStringGetTextDatum(gxact->gid);
values[2] = TimestampTzGetDatum(gxact->prepared_at);
values[3] = ObjectIdGetDatum(gxact->owner);
@@ -943,46 +943,8 @@ TwoPhaseGetDummyProc(TransactionId xid, bool lock_held)
/* State file support */
/************************************************************************/
-/*
- * Compute the FullTransactionId for the given TransactionId.
- *
- * The wrap logic is safe here because the span of active xids cannot exceed one
- * epoch at any given time.
- */
-static inline FullTransactionId
-AdjustToFullTransactionId(TransactionId xid)
-{
- FullTransactionId nextFullXid;
- TransactionId nextXid;
- uint32 epoch;
-
- Assert(TransactionIdIsValid(xid));
-
- LWLockAcquire(XidGenLock, LW_SHARED);
- nextFullXid = ShmemVariableCache->nextXid;
- LWLockRelease(XidGenLock);
-
- nextXid = XidFromFullTransactionId(nextFullXid);
- epoch = EpochFromFullTransactionId(nextFullXid);
- if (unlikely(xid > nextXid))
- {
- /* Wraparound occurred, must be from a prev epoch. */
- Assert(epoch > 0);
- epoch--;
- }
-
- return FullTransactionIdFromEpochAndXid(epoch, xid);
-}
-
-static inline int
-TwoPhaseFilePath(char *path, TransactionId xid)
-{
- FullTransactionId fxid = AdjustToFullTransactionId(xid);
-
- return snprintf(path, MAXPGPATH, TWOPHASE_DIR "/%08X%08X",
- EpochFromFullTransactionId(fxid),
- XidFromFullTransactionId(fxid));
-}
+#define TwoPhaseFilePath(path, xid) \
+ snprintf(path, MAXPGPATH, TWOPHASE_DIR "/%016llX", (unsigned long long) xid)
/*
* 2PC state file format:
@@ -1925,11 +1887,9 @@ restoreTwoPhaseData(void)
strspn(clde->d_name, "0123456789ABCDEF") == 16)
{
TransactionId xid;
- FullTransactionId fxid;
char *buf;
- fxid = FullTransactionIdFromU64(strtou64(clde->d_name, NULL, 16));
- xid = XidFromFullTransactionId(fxid);
+ xid = (TransactionId) strtou64(clde->d_name, NULL, 16);
buf = ProcessTwoPhaseBuffer(xid, InvalidXLogRecPtr,
true, false, false);
@@ -2261,7 +2221,6 @@ ProcessTwoPhaseBuffer(TransactionId xid,
if (fromdisk)
{
- /* Read and validate file */
buf = ReadTwoPhaseFile(xid, false);
}
else
diff --git a/src/backend/access/transam/varsup.c b/src/backend/access/transam/varsup.c
index 4be1055c1a..b8eefe7d3f 100644
--- a/src/backend/access/transam/varsup.c
+++ b/src/backend/access/transam/varsup.c
@@ -66,9 +66,9 @@ GetNewTransactionId(bool isSubXact)
if (IsBootstrapProcessingMode())
{
Assert(!isSubXact);
- MyProc->xid = BootstrapTransactionId;
- ProcGlobal->xids[MyProc->pgxactoff] = BootstrapTransactionId;
- return FullTransactionIdFromEpochAndXid(0, BootstrapTransactionId);
+ pg_atomic_write_u64(&MyProc->xid, BootstrapTransactionId);
+ pg_atomic_write_u64(&ProcGlobal->xids[MyProc->pgxactoff], BootstrapTransactionId);
+ return FullTransactionIdFromXid(BootstrapTransactionId);
}
/* safety check, we should never get this far in a HS standby */
@@ -80,19 +80,6 @@ GetNewTransactionId(bool isSubXact)
full_xid = ShmemVariableCache->nextXid;
xid = XidFromFullTransactionId(full_xid);
- /*----------
- * Check to see if it's safe to assign another XID. This protects against
- * catastrophic data loss due to XID wraparound. The basic rules are:
- *
- * If we're past xidVacLimit, start trying to force autovacuum cycles.
- * If we're past xidWarnLimit, start issuing warnings.
- * If we're past xidStopLimit, refuse to execute transactions, unless
- * we are running in single-user mode (which gives an escape hatch
- * to the DBA who somehow got past the earlier defenses).
- *
- * Note that this coding also appears in GetNewMultiXactId.
- *----------
- */
if (TransactionIdFollowsOrEquals(xid, ShmemVariableCache->xidVacLimit))
{
/*
@@ -102,11 +89,6 @@ GetNewTransactionId(bool isSubXact)
* possibility of deadlock while doing get_database_name(). First,
* copy all the shared values we'll need in this path.
*/
- TransactionId xidWarnLimit = ShmemVariableCache->xidWarnLimit;
- TransactionId xidStopLimit = ShmemVariableCache->xidStopLimit;
- TransactionId xidWrapLimit = ShmemVariableCache->xidWrapLimit;
- Oid oldest_datoid = ShmemVariableCache->oldestXidDB;
-
LWLockRelease(XidGenLock);
/*
@@ -117,48 +99,6 @@ GetNewTransactionId(bool isSubXact)
if (IsUnderPostmaster && (xid % 65536) == 0)
SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER);
- if (IsUnderPostmaster &&
- TransactionIdFollowsOrEquals(xid, xidStopLimit))
- {
- char *oldest_datname = get_database_name(oldest_datoid);
-
- /* complain even if that DB has disappeared */
- if (oldest_datname)
- ereport(ERROR,
- (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
- errmsg("database is not accepting commands that assign new XIDs to avoid wraparound data loss in database \"%s\"",
- oldest_datname),
- errhint("Execute a database-wide VACUUM in that database.\n"
- "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
- else
- ereport(ERROR,
- (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
- errmsg("database is not accepting commands that assign new XIDs to avoid wraparound data loss in database with OID %u",
- oldest_datoid),
- errhint("Execute a database-wide VACUUM in that database.\n"
- "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
- }
- else if (TransactionIdFollowsOrEquals(xid, xidWarnLimit))
- {
- char *oldest_datname = get_database_name(oldest_datoid);
-
- /* complain even if that DB has disappeared */
- if (oldest_datname)
- ereport(WARNING,
- (errmsg("database \"%s\" must be vacuumed within %llu transactions",
- oldest_datname,
- (unsigned long long) xidWrapLimit - xid),
- errhint("To avoid XID assignment failures, execute a database-wide VACUUM in that database.\n"
- "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
- else
- ereport(WARNING,
- (errmsg("database with OID %u must be vacuumed within %llu transactions",
- oldest_datoid,
- (unsigned long long) xidWrapLimit - xid),
- errhint("To avoid XID assignment failures, execute a database-wide VACUUM in that database.\n"
- "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
- }
-
/* Re-acquire lock and start over */
LWLockAcquire(XidGenLock, LW_EXCLUSIVE);
full_xid = ShmemVariableCache->nextXid;
@@ -228,8 +168,8 @@ GetNewTransactionId(bool isSubXact)
Assert(!MyProc->subxidStatus.overflowed);
/* LWLockRelease acts as barrier */
- MyProc->xid = xid;
- ProcGlobal->xids[MyProc->pgxactoff] = xid;
+ pg_atomic_write_u64(&MyProc->xid, xid);
+ pg_atomic_write_u64(&ProcGlobal->xids[MyProc->pgxactoff], xid);
}
else
{
@@ -270,7 +210,7 @@ ReadNextFullTransactionId(void)
}
/*
- * Advance nextXid to the value after a given xid. The epoch is inferred.
+ * Advance nextXid to the value after a given xid.
* This must only be called during recovery or from two-phase start-up code.
*/
void
@@ -278,7 +218,6 @@ AdvanceNextFullTransactionIdPastXid(TransactionId xid)
{
FullTransactionId newNextFullXid;
TransactionId next_xid;
- uint32 epoch;
/*
* It is safe to read nextXid without a lock, because this is only called
@@ -292,19 +231,9 @@ AdvanceNextFullTransactionIdPastXid(TransactionId xid)
if (!TransactionIdFollowsOrEquals(xid, next_xid))
return;
- /*
- * Compute the FullTransactionId that comes after the given xid. To do
- * this, we preserve the existing epoch, but detect when we've wrapped
- * into a new epoch. This is necessary because WAL records and 2PC state
- * currently contain 32 bit xids. The wrap logic is safe in those cases
- * because the span of active xids cannot exceed one epoch at any given
- * point in the WAL stream.
- */
+ /* Compute the FullTransactionId that comes after the given xid. */
TransactionIdAdvance(xid);
- epoch = EpochFromFullTransactionId(ShmemVariableCache->nextXid);
- if (unlikely(xid < next_xid))
- ++epoch;
- newNextFullXid = FullTransactionIdFromEpochAndXid(epoch, xid);
+ newNextFullXid = FullTransactionIdFromXid(xid);
/*
* We still need to take a lock to modify the value when there are
@@ -345,61 +274,14 @@ void
SetTransactionIdLimit(TransactionId oldest_datfrozenxid, Oid oldest_datoid)
{
TransactionId xidVacLimit;
- TransactionId xidWarnLimit;
- TransactionId xidStopLimit;
- TransactionId xidWrapLimit;
TransactionId curXid;
Assert(TransactionIdIsNormal(oldest_datfrozenxid));
- /*
- * The place where we actually get into deep trouble is halfway around
- * from the oldest potentially-existing XID. (This calculation is
- * probably off by one or two counts, because the special XIDs reduce the
- * size of the loop a little bit. But we throw in plenty of slop below,
- * so it doesn't matter.)
- */
- xidWrapLimit = oldest_datfrozenxid + (MaxTransactionId >> 1);
- if (xidWrapLimit < FirstNormalTransactionId)
- xidWrapLimit += FirstNormalTransactionId;
-
- /*
- * We'll refuse to continue assigning XIDs in interactive mode once we get
- * within 3M transactions of data loss. This leaves lots of room for the
- * DBA to fool around fixing things in a standalone backend, while not
- * being significant compared to total XID space. (VACUUM requires an XID
- * if it truncates at wal_level!=minimal. "VACUUM (ANALYZE)", which a DBA
- * might do by reflex, assigns an XID. Hence, we had better be sure
- * there's lots of XIDs left...) Also, at default BLCKSZ, this leaves two
- * completely-idle segments. In the event of edge-case bugs involving
- * page or segment arithmetic, idle segments render the bugs unreachable
- * outside of single-user mode.
- */
- xidStopLimit = xidWrapLimit - 3000000;
- if (xidStopLimit < FirstNormalTransactionId)
- xidStopLimit -= FirstNormalTransactionId;
-
- /*
- * We'll start complaining loudly when we get within 40M transactions of
- * data loss. This is kind of arbitrary, but if you let your gas gauge
- * get down to 2% of full, would you be looking for the next gas station?
- * We need to be fairly liberal about this number because there are lots
- * of scenarios where most transactions are done by automatic clients that
- * won't pay attention to warnings. (No, we're not gonna make this
- * configurable. If you know enough to configure it, you know enough to
- * not get in this kind of trouble in the first place.)
- */
- xidWarnLimit = xidWrapLimit - 40000000;
- if (xidWarnLimit < FirstNormalTransactionId)
- xidWarnLimit -= FirstNormalTransactionId;
-
/*
* We'll start trying to force autovacuums when oldest_datfrozenxid gets
* to be more than autovacuum_freeze_max_age transactions old.
*
- * Note: guc.c ensures that autovacuum_freeze_max_age is in a sane range,
- * so that xidVacLimit will be well before xidWarnLimit.
- *
* Note: autovacuum_freeze_max_age is a PGC_POSTMASTER parameter so that
* we don't have to worry about dealing with on-the-fly changes in its
* value. It doesn't look practical to update shared state from a GUC
@@ -416,18 +298,10 @@ SetTransactionIdLimit(TransactionId oldest_datfrozenxid, Oid oldest_datoid)
LWLockAcquire(XidGenLock, LW_EXCLUSIVE);
ShmemVariableCache->oldestXid = oldest_datfrozenxid;
ShmemVariableCache->xidVacLimit = xidVacLimit;
- ShmemVariableCache->xidWarnLimit = xidWarnLimit;
- ShmemVariableCache->xidStopLimit = xidStopLimit;
- ShmemVariableCache->xidWrapLimit = xidWrapLimit;
ShmemVariableCache->oldestXidDB = oldest_datoid;
curXid = XidFromFullTransactionId(ShmemVariableCache->nextXid);
LWLockRelease(XidGenLock);
- /* Log the info */
- ereport(DEBUG1,
- (errmsg_internal("transaction ID wrap limit is %llu, limited by database with OID %u",
- (unsigned long long) xidWrapLimit, oldest_datoid)));
-
/*
* If past the autovacuum force point, immediately signal an autovac
* request. The reason for this is that autovac only processes one
@@ -438,41 +312,6 @@ SetTransactionIdLimit(TransactionId oldest_datfrozenxid, Oid oldest_datoid)
if (TransactionIdFollowsOrEquals(curXid, xidVacLimit) &&
IsUnderPostmaster && !InRecovery)
SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER);
-
- /* Give an immediate warning if past the wrap warn point */
- if (TransactionIdFollowsOrEquals(curXid, xidWarnLimit) && !InRecovery)
- {
- char *oldest_datname;
-
- /*
- * We can be called when not inside a transaction, for example during
- * StartupXLOG(). In such a case we cannot do database access, so we
- * must just report the oldest DB's OID.
- *
- * Note: it's also possible that get_database_name fails and returns
- * NULL, for example because the database just got dropped. We'll
- * still warn, even though the warning might now be unnecessary.
- */
- if (IsTransactionState())
- oldest_datname = get_database_name(oldest_datoid);
- else
- oldest_datname = NULL;
-
- if (oldest_datname)
- ereport(WARNING,
- (errmsg("database \"%s\" must be vacuumed within %llu transactions",
- oldest_datname,
- (unsigned long long) xidWrapLimit - curXid),
- errhint("To avoid XID assignment failures, execute a database-wide VACUUM in that database.\n"
- "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
- else
- ereport(WARNING,
- (errmsg("database with OID %u must be vacuumed within %llu transactions",
- oldest_datoid,
- (unsigned long long) xidWrapLimit - curXid),
- errhint("To avoid XID assignment failures, execute a database-wide VACUUM in that database.\n"
- "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
- }
}
diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c
index 8d7427115b..483afac8b7 100644
--- a/src/backend/access/transam/xact.c
+++ b/src/backend/access/transam/xact.c
@@ -5714,6 +5714,17 @@ XactLogCommitRecord(TimestampTz commit_time,
xl_subxacts.nsubxacts = nsubxacts;
}
+ if (TransactionIdIsValid(twophase_xid))
+ {
+ xl_xinfo.xinfo |= XACT_XINFO_HAS_TWOPHASE;
+ xl_twophase.xid_lo = (uint32) (twophase_xid & 0xFFFFFFFF);
+ xl_twophase.xid_hi = (uint32) (twophase_xid >> 32);
+ Assert(twophase_gid != NULL);
+
+ if (XLogLogicalInfoActive())
+ xl_xinfo.xinfo |= XACT_XINFO_HAS_GID;
+ }
+
if (nrels > 0)
{
xl_xinfo.xinfo |= XACT_XINFO_HAS_RELFILELOCATORS;
@@ -5733,16 +5744,6 @@ XactLogCommitRecord(TimestampTz commit_time,
xl_invals.nmsgs = nmsgs;
}
- if (TransactionIdIsValid(twophase_xid))
- {
- xl_xinfo.xinfo |= XACT_XINFO_HAS_TWOPHASE;
- xl_twophase.xid = twophase_xid;
- Assert(twophase_gid != NULL);
-
- if (XLogLogicalInfoActive())
- xl_xinfo.xinfo |= XACT_XINFO_HAS_GID;
- }
-
/* dump transaction origin information */
if (replorigin_session_origin != InvalidRepOriginId)
{
@@ -5863,6 +5864,17 @@ XactLogAbortRecord(TimestampTz abort_time,
xl_subxacts.nsubxacts = nsubxacts;
}
+ if (TransactionIdIsValid(twophase_xid))
+ {
+ xl_xinfo.xinfo |= XACT_XINFO_HAS_TWOPHASE;
+ xl_twophase.xid_lo = (uint32) (twophase_xid & 0xFFFFFFFF);
+ xl_twophase.xid_hi = (uint32) (twophase_xid >> 32);
+ Assert(twophase_gid != NULL);
+
+ if (XLogLogicalInfoActive())
+ xl_xinfo.xinfo |= XACT_XINFO_HAS_GID;
+ }
+
if (nrels > 0)
{
xl_xinfo.xinfo |= XACT_XINFO_HAS_RELFILELOCATORS;
@@ -5879,7 +5891,8 @@ XactLogAbortRecord(TimestampTz abort_time,
if (TransactionIdIsValid(twophase_xid))
{
xl_xinfo.xinfo |= XACT_XINFO_HAS_TWOPHASE;
- xl_twophase.xid = twophase_xid;
+ xl_twophase.xid_lo = (uint32) (twophase_xid & 0xFFFFFFFF);
+ xl_twophase.xid_hi = (uint32) (twophase_xid >> 32);
Assert(twophase_gid != NULL);
if (XLogLogicalInfoActive())
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 2d603d8dee..6613563aff 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -4805,8 +4805,7 @@ BootStrapXLOG(void)
checkPoint.ThisTimeLineID = BootstrapTimeLineID;
checkPoint.PrevTimeLineID = BootstrapTimeLineID;
checkPoint.fullPageWrites = fullPageWrites;
- checkPoint.nextXid =
- FullTransactionIdFromEpochAndXid(0, FirstNormalTransactionId);
+ checkPoint.nextXid = FullTransactionIdFromXid(FirstNormalTransactionId);
checkPoint.nextOid = FirstGenbkiObjectId;
checkPoint.nextMulti = FirstMultiXactId;
checkPoint.nextMultiOffset = 0;
@@ -6945,7 +6944,7 @@ CreateCheckPoint(int flags)
UpdateControlFile();
LWLockRelease(ControlFileLock);
- /* Update shared-memory copy of checkpoint XID/epoch */
+ /* Update shared-memory copy of checkpoint XID/base */
SpinLockAcquire(&XLogCtl->info_lck);
XLogCtl->ckptFullXid = checkPoint.nextXid;
SpinLockRelease(&XLogCtl->info_lck);
@@ -7976,7 +7975,7 @@ xlog_redo(XLogReaderState *record)
ControlFile->checkPointCopy.nextXid = checkPoint.nextXid;
LWLockRelease(ControlFileLock);
- /* Update shared-memory copy of checkpoint XID/epoch */
+ /* Update shared-memory copy of checkpoint XID/base */
SpinLockAcquire(&XLogCtl->info_lck);
XLogCtl->ckptFullXid = checkPoint.nextXid;
SpinLockRelease(&XLogCtl->info_lck);
@@ -8037,7 +8036,7 @@ xlog_redo(XLogReaderState *record)
ControlFile->checkPointCopy.nextXid = checkPoint.nextXid;
LWLockRelease(ControlFileLock);
- /* Update shared-memory copy of checkpoint XID/epoch */
+ /* Update shared-memory copy of checkpoint XID/base */
SpinLockAcquire(&XLogCtl->info_lck);
XLogCtl->ckptFullXid = checkPoint.nextXid;
SpinLockRelease(&XLogCtl->info_lck);
diff --git a/src/backend/access/transam/xloginsert.c b/src/backend/access/transam/xloginsert.c
index e4aaa551a0..e84261684c 100644
--- a/src/backend/access/transam/xloginsert.c
+++ b/src/backend/access/transam/xloginsert.c
@@ -274,6 +274,11 @@ XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags)
BufferGetTag(buffer, ®buf->rlocator, ®buf->forkno, ®buf->block);
regbuf->page = BufferGetPage(buffer);
regbuf->flags = flags;
+ if (IsBufferConverted(buffer))
+ {
+ regbuf->flags |= REGBUF_CONVERTED;
+ MarkBufferConverted(buffer, false);
+ }
regbuf->rdata_tail = (XLogRecData *) ®buf->rdata_head;
regbuf->rdata_len = 0;
@@ -607,6 +612,8 @@ XLogRecordAssemble(RmgrId rmid, uint8 info,
needs_backup = true;
else if (regbuf->flags & REGBUF_NO_IMAGE)
needs_backup = false;
+ else if (regbuf->flags & REGBUF_CONVERTED)
+ needs_backup = true;
else if (!doPageWrites)
needs_backup = false;
else
diff --git a/src/backend/access/transam/xlogreader.c b/src/backend/access/transam/xlogreader.c
index e0baa86bd3..5cbd428e52 100644
--- a/src/backend/access/transam/xlogreader.c
+++ b/src/backend/access/transam/xlogreader.c
@@ -2134,37 +2134,3 @@ RestoreBlockImage(XLogReaderState *record, uint8 block_id, char *page)
return true;
}
-
-#ifndef FRONTEND
-
-/*
- * Extract the FullTransactionId from a WAL record.
- */
-FullTransactionId
-XLogRecGetFullXid(XLogReaderState *record)
-{
- TransactionId xid,
- next_xid;
- uint32 epoch;
-
- /*
- * This function is only safe during replay, because it depends on the
- * replay state. See AdvanceNextFullTransactionIdPastXid() for more.
- */
- Assert(AmStartupProcess() || !IsUnderPostmaster);
-
- xid = XLogRecGetXid(record);
- next_xid = XidFromFullTransactionId(ShmemVariableCache->nextXid);
- epoch = EpochFromFullTransactionId(ShmemVariableCache->nextXid);
-
- /*
- * If xid is numerically greater than next_xid, it has to be from the last
- * epoch.
- */
- if (unlikely(xid > next_xid))
- --epoch;
-
- return FullTransactionIdFromEpochAndXid(epoch, xid);
-}
-
-#endif
diff --git a/src/backend/bootstrap/bootstrap.c b/src/backend/bootstrap/bootstrap.c
index e01dca9b7c..16de61f5ed 100644
--- a/src/backend/bootstrap/bootstrap.c
+++ b/src/backend/bootstrap/bootstrap.c
@@ -120,7 +120,7 @@ static const struct typinfo TypInfo[] = {
F_OIDIN, F_OIDOUT},
{"tid", TIDOID, 0, 6, false, TYPALIGN_SHORT, TYPSTORAGE_PLAIN, InvalidOid,
F_TIDIN, F_TIDOUT},
- {"xid", XIDOID, 0, 4, true, TYPALIGN_INT, TYPSTORAGE_PLAIN, InvalidOid,
+ {"xid", XIDOID, 0, 8, FLOAT8PASSBYVAL, TYPALIGN_XID, TYPSTORAGE_PLAIN, InvalidOid,
F_XIDIN, F_XIDOUT},
{"cid", CIDOID, 0, 4, true, TYPALIGN_INT, TYPSTORAGE_PLAIN, InvalidOid,
F_CIDIN, F_CIDOUT},
diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c
index 7224d96695..3313d309ee 100644
--- a/src/backend/catalog/heap.c
+++ b/src/backend/catalog/heap.c
@@ -159,8 +159,8 @@ static const FormData_pg_attribute a2 = {
.attnum = MinTransactionIdAttributeNumber,
.attcacheoff = -1,
.atttypmod = -1,
- .attbyval = true,
- .attalign = TYPALIGN_INT,
+ .attbyval = FLOAT8PASSBYVAL,
+ .attalign = TYPALIGN_XID,
.attstorage = TYPSTORAGE_PLAIN,
.attnotnull = true,
.attislocal = true,
@@ -187,8 +187,8 @@ static const FormData_pg_attribute a4 = {
.attnum = MaxTransactionIdAttributeNumber,
.attcacheoff = -1,
.atttypmod = -1,
- .attbyval = true,
- .attalign = TYPALIGN_INT,
+ .attbyval = FLOAT8PASSBYVAL,
+ .attalign = TYPALIGN_XID,
.attstorage = TYPSTORAGE_PLAIN,
.attnotnull = true,
.attislocal = true,
diff --git a/src/backend/catalog/pg_inherits.c b/src/backend/catalog/pg_inherits.c
index da969bd2f9..ac6fd7cc2a 100644
--- a/src/backend/catalog/pg_inherits.c
+++ b/src/backend/catalog/pg_inherits.c
@@ -146,7 +146,7 @@ find_inheritance_children_extended(Oid parentrelId, bool omit_detached,
TransactionId xmin;
Snapshot snap;
- xmin = HeapTupleHeaderGetXmin(inheritsTuple->t_data);
+ xmin = HeapTupleGetXmin(inheritsTuple);
snap = GetActiveSnapshot();
if (!XidInMVCCSnapshot(xmin, snap))
diff --git a/src/backend/commands/async.c b/src/backend/commands/async.c
index 264f25a8f9..ff60d303e1 100644
--- a/src/backend/commands/async.c
+++ b/src/backend/commands/async.c
@@ -186,7 +186,7 @@ typedef struct AsyncQueueEntry
} AsyncQueueEntry;
/* Currently, no field of AsyncQueueEntry requires more than int alignment */
-#define QUEUEALIGN(len) INTALIGN(len)
+#define QUEUEALIGN(len) TYPEALIGN(8, len)
#define AsyncQueueEntryEmptySize (offsetof(AsyncQueueEntry, data) + 2)
diff --git a/src/backend/commands/dbcommands.c b/src/backend/commands/dbcommands.c
index ae38f83024..246220a1de 100644
--- a/src/backend/commands/dbcommands.c
+++ b/src/backend/commands/dbcommands.c
@@ -130,7 +130,8 @@ static void CreateDatabaseUsingWalLog(Oid src_dboid, Oid dst_dboid, Oid src_tsid
static List *ScanSourceDatabasePgClass(Oid tbid, Oid dbid, char *srcpath);
static List *ScanSourceDatabasePgClassPage(Page page, Buffer buf, Oid tbid,
Oid dbid, char *srcpath,
- List *rlocatorlist, Snapshot snapshot);
+ List *rlocatorlist, Snapshot snapshot,
+ bool is_toast);
static CreateDBRelInfo *ScanSourceDatabasePgClassTuple(HeapTupleData *tuple,
Oid tbid, Oid dbid,
char *srcpath);
@@ -308,9 +309,10 @@ ScanSourceDatabasePgClass(Oid tbid, Oid dbid, char *srcpath)
}
/* Append relevant pg_class tuples for current page to rlocatorlist. */
+ /* No toast is expected in sys tables */
rlocatorlist = ScanSourceDatabasePgClassPage(page, buf, tbid, dbid,
srcpath, rlocatorlist,
- snapshot);
+ snapshot, false);
UnlockReleaseBuffer(buf);
}
@@ -328,7 +330,7 @@ ScanSourceDatabasePgClass(Oid tbid, Oid dbid, char *srcpath)
static List *
ScanSourceDatabasePgClassPage(Page page, Buffer buf, Oid tbid, Oid dbid,
char *srcpath, List *rlocatorlist,
- Snapshot snapshot)
+ Snapshot snapshot, bool is_toast)
{
BlockNumber blkno = BufferGetBlockNumber(buf);
OffsetNumber offnum;
@@ -358,6 +360,7 @@ ScanSourceDatabasePgClassPage(Page page, Buffer buf, Oid tbid, Oid dbid,
tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
tuple.t_len = ItemIdGetLength(itemid);
tuple.t_tableOid = RelationRelationId;
+ HeapTupleCopyXidsFromPage(buf, &tuple, page, is_toast);
/* Skip tuples that are not visible to this snapshot. */
if (HeapTupleSatisfiesVisibility(&tuple, snapshot, buf))
diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c
index e56205abd8..d0aac5d080 100644
--- a/src/backend/commands/indexcmds.c
+++ b/src/backend/commands/indexcmds.c
@@ -1760,7 +1760,7 @@ DefineIndex(Oid tableId,
set_indexsafe_procflags();
/* We should now definitely not be advertising any xmin. */
- Assert(MyProc->xmin == InvalidTransactionId);
+ Assert(pg_atomic_read_u64(&MyProc->xmin) == InvalidTransactionId);
/*
* The index is now valid in the sense that it contains all currently
@@ -4433,8 +4433,8 @@ set_indexsafe_procflags(void)
* This should only be called before installing xid or xmin in MyProc;
* otherwise, concurrent processes could see an Xmin that moves backwards.
*/
- Assert(MyProc->xid == InvalidTransactionId &&
- MyProc->xmin == InvalidTransactionId);
+ Assert(pg_atomic_read_u64(&MyProc->xid) == InvalidTransactionId &&
+ pg_atomic_read_u64(&MyProc->xmin) == InvalidTransactionId);
LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
MyProc->statusFlags |= PROC_IN_SAFE_IC;
diff --git a/src/backend/commands/sequence.c b/src/backend/commands/sequence.c
index da2ace79cc..47e7b879ea 100644
--- a/src/backend/commands/sequence.c
+++ b/src/backend/commands/sequence.c
@@ -48,6 +48,25 @@
#include "utils/syscache.h"
#include "utils/varlena.h"
+static inline void
+SeqTupleSetXmin(HeapTuple htup, TransactionId xid)
+{
+ htup->t_xmin = xid;
+ htup->t_data->t_choice.t_heap.t_xmin = xid;
+}
+
+static inline void
+SeqTupleSetXmax(HeapTuple htup, TransactionId xid)
+{
+ htup->t_xmin = xid;
+ htup->t_data->t_choice.t_heap.t_xmax = xid;
+}
+
+static inline TransactionId
+SeqTupleHeaderGetRawXmax(HeapTupleHeader htup)
+{
+ return htup->t_choice.t_heap.t_xmax;
+}
/*
* We don't want to log each fetching of a value from a sequence,
@@ -383,10 +402,10 @@ fill_seq_fork_with_data(Relation rel, HeapTuple tuple, ForkNumber forkNum)
* because if the current transaction aborts, no other xact will ever
* examine the sequence tuple anyway.
*/
- HeapTupleHeaderSetXmin(tuple->t_data, FrozenTransactionId);
- HeapTupleHeaderSetXminFrozen(tuple->t_data);
+ SeqTupleSetXmin(tuple, FrozenTransactionId);
+ HeapTupleHeaderStoreXminFrozen(tuple->t_data);
HeapTupleHeaderSetCmin(tuple->t_data, FirstCommandId);
- HeapTupleHeaderSetXmax(tuple->t_data, InvalidTransactionId);
+ SeqTupleSetXmax(tuple, InvalidTransactionId);
tuple->t_data->t_infomask |= HEAP_XMAX_INVALID;
ItemPointerSet(&tuple->t_data->t_ctid, 0, FirstOffsetNumber);
@@ -1208,6 +1227,7 @@ read_seq_tuple(Relation rel, Buffer *buf, HeapTuple seqdatatuple)
/* Note we currently only bother to set these two fields of *seqdatatuple */
seqdatatuple->t_data = (HeapTupleHeader) PageGetItem(page, lp);
seqdatatuple->t_len = ItemIdGetLength(lp);
+ HeapTupleCopyHeaderXids(seqdatatuple);
/*
* Previous releases of Postgres neglected to prevent SELECT FOR UPDATE on
@@ -1218,9 +1238,9 @@ read_seq_tuple(Relation rel, Buffer *buf, HeapTuple seqdatatuple)
* this again if the update gets lost.
*/
Assert(!(seqdatatuple->t_data->t_infomask & HEAP_XMAX_IS_MULTI));
- if (HeapTupleHeaderGetRawXmax(seqdatatuple->t_data) != InvalidTransactionId)
+ if (SeqTupleHeaderGetRawXmax(seqdatatuple->t_data) != InvalidTransactionId)
{
- HeapTupleHeaderSetXmax(seqdatatuple->t_data, InvalidTransactionId);
+ SeqTupleSetXmax(seqdatatuple, InvalidTransactionId);
seqdatatuple->t_data->t_infomask &= ~HEAP_XMAX_COMMITTED;
seqdatatuple->t_data->t_infomask |= HEAP_XMAX_INVALID;
MarkBufferDirtyHint(*buf, true);
diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c
index 087ac3e3b4..3931c075ee 100644
--- a/src/backend/commands/vacuum.c
+++ b/src/backend/commands/vacuum.c
@@ -66,12 +66,12 @@
/*
* GUC parameters
*/
-int vacuum_freeze_min_age;
-int vacuum_freeze_table_age;
-int vacuum_multixact_freeze_min_age;
-int vacuum_multixact_freeze_table_age;
-int vacuum_failsafe_age;
-int vacuum_multixact_failsafe_age;
+int64 vacuum_freeze_min_age;
+int64 vacuum_freeze_table_age;
+int64 vacuum_multixact_freeze_min_age;
+int64 vacuum_multixact_freeze_table_age;
+int64 vacuum_failsafe_age;
+int64 vacuum_multixact_failsafe_age;
/*
* Variables for cost-based vacuum delay. The defaults differ between
@@ -1075,7 +1075,7 @@ bool
vacuum_get_cutoffs(Relation rel, const VacuumParams *params,
struct VacuumCutoffs *cutoffs)
{
- int freeze_min_age,
+ int64 freeze_min_age,
multixact_freeze_min_age,
freeze_table_age,
multixact_freeze_table_age,
@@ -1125,7 +1125,7 @@ vacuum_get_cutoffs(Relation rel, const VacuumParams *params,
* normally autovacuum_multixact_freeze_max_age, but may be less if we are
* short of multixact member space.
*/
- effective_multixact_freeze_max_age = MultiXactMemberFreezeThreshold();
+ effective_multixact_freeze_max_age = autovacuum_multixact_freeze_max_age;
/*
* Almost ready to set freeze output parameters; check if OldestXmin or
@@ -1484,6 +1484,9 @@ vac_update_relstats(Relation relation,
futurexid = false;
if (frozenxid_updated)
*frozenxid_updated = false;
+
+ Assert(TransactionIdPrecedesOrEquals(frozenxid, ReadNextTransactionId()));
+
if (TransactionIdIsNormal(frozenxid) && oldfrozenxid != frozenxid)
{
bool update = false;
@@ -1507,6 +1510,9 @@ vac_update_relstats(Relation relation,
futuremxid = false;
if (minmulti_updated)
*minmulti_updated = false;
+
+ Assert(MultiXactIdPrecedesOrEquals(minmulti, ReadNextMultiXactId()));
+
if (MultiXactIdIsValid(minmulti) && oldminmulti != minmulti)
{
bool update = false;
diff --git a/src/backend/executor/execExprInterp.c b/src/backend/executor/execExprInterp.c
index 24c2b60c62..56d09d28c3 100644
--- a/src/backend/executor/execExprInterp.c
+++ b/src/backend/executor/execExprInterp.c
@@ -3196,6 +3196,7 @@ ExecEvalFieldStoreDeForm(ExprState *state, ExprEvalStep *op, ExprContext *econte
tmptup.t_len = HeapTupleHeaderGetDatumLength(tuphdr);
ItemPointerSetInvalid(&(tmptup.t_self));
tmptup.t_tableOid = InvalidOid;
+ HeapTupleSetZeroXids(&tmptup);
tmptup.t_data = tuphdr;
/*
diff --git a/src/backend/executor/execUtils.c b/src/backend/executor/execUtils.c
index 16704c0c2f..5d299e0083 100644
--- a/src/backend/executor/execUtils.c
+++ b/src/backend/executor/execUtils.c
@@ -1047,6 +1047,7 @@ GetAttributeByName(HeapTupleHeader tuple, const char *attname, bool *isNull)
tmptup.t_len = HeapTupleHeaderGetDatumLength(tuple);
ItemPointerSetInvalid(&(tmptup.t_self));
tmptup.t_tableOid = InvalidOid;
+ HeapTupleSetZeroXids(&tmptup);
tmptup.t_data = tuple;
result = heap_getattr(&tmptup,
diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c
index b16fbe9e22..2981dc4934 100644
--- a/src/backend/executor/nodeModifyTable.c
+++ b/src/backend/executor/nodeModifyTable.c
@@ -3783,6 +3783,7 @@ ExecModifyTable(PlanState *pstate)
HeapTupleHeaderGetDatumLength(oldtupdata.t_data);
ItemPointerSetInvalid(&(oldtupdata.t_self));
/* Historically, view triggers see invalid t_tableOid. */
+ HeapTupleCopyHeaderXids(&oldtupdata);
oldtupdata.t_tableOid =
(relkind == RELKIND_VIEW) ? InvalidOid :
RelationGetRelid(resultRelInfo->ri_RelationDesc);
diff --git a/src/backend/executor/spi.c b/src/backend/executor/spi.c
index 0e46c59d25..88846e4fd2 100644
--- a/src/backend/executor/spi.c
+++ b/src/backend/executor/spi.c
@@ -1154,6 +1154,7 @@ SPI_modifytuple(Relation rel, HeapTuple tuple, int natts, int *attnum,
mtuple->t_data->t_ctid = tuple->t_data->t_ctid;
mtuple->t_self = tuple->t_self;
mtuple->t_tableOid = tuple->t_tableOid;
+ HeapTupleCopyXids(mtuple, tuple);
}
else
{
diff --git a/src/backend/nodes/gen_node_support.pl b/src/backend/nodes/gen_node_support.pl
index 72c7963578..e32335ea3c 100644
--- a/src/backend/nodes/gen_node_support.pl
+++ b/src/backend/nodes/gen_node_support.pl
@@ -1027,14 +1027,14 @@ _read${n}(void)
elsif ($t eq 'uint32'
|| $t eq 'bits32'
|| $t eq 'BlockNumber'
- || $t eq 'Index'
- || $t eq 'SubTransactionId')
+ || $t eq 'Index')
{
print $off "\tWRITE_UINT_FIELD($f);\n";
print $rff "\tREAD_UINT_FIELD($f);\n" unless $no_read;
}
elsif ($t eq 'uint64'
- || $t eq 'AclMode')
+ || $t eq 'AclMode'
+ || $t eq 'SubTransactionId')
{
print $off "\tWRITE_UINT64_FIELD($f);\n";
print $rff "\tREAD_UINT64_FIELD($f);\n" unless $no_read;
diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c
index 7159c775fb..825d23a18e 100644
--- a/src/backend/optimizer/util/plancat.c
+++ b/src/backend/optimizer/util/plancat.c
@@ -243,7 +243,7 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent,
* src/backend/access/heap/README.HOT for discussion.
*/
if (index->indcheckxmin &&
- !TransactionIdPrecedes(HeapTupleHeaderGetXmin(indexRelation->rd_indextuple->t_data),
+ !TransactionIdPrecedes(HeapTupleGetXmin(indexRelation->rd_indextuple),
TransactionXmin))
{
root->glob->transientPlan = true;
diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c
index 3e037248d6..a508d5d694 100644
--- a/src/backend/postmaster/autovacuum.c
+++ b/src/backend/postmaster/autovacuum.c
@@ -123,8 +123,8 @@ int autovacuum_vac_ins_thresh;
double autovacuum_vac_ins_scale;
int autovacuum_anl_thresh;
double autovacuum_anl_scale;
-int autovacuum_freeze_max_age;
-int autovacuum_multixact_freeze_max_age;
+int64 autovacuum_freeze_max_age;
+int64 autovacuum_multixact_freeze_max_age;
double autovacuum_vac_cost_delay;
int autovacuum_vac_cost_limit;
@@ -159,10 +159,10 @@ static TransactionId recentXid;
static MultiXactId recentMulti;
/* Default freeze ages to use for autovacuum (varies by database) */
-static int default_freeze_min_age;
-static int default_freeze_table_age;
-static int default_multixact_freeze_min_age;
-static int default_multixact_freeze_table_age;
+static int64 default_freeze_min_age;
+static int64 default_freeze_table_age;
+static int64 default_multixact_freeze_min_age;
+static int64 default_multixact_freeze_table_age;
/* Memory context for long-lived data */
static MemoryContext AutovacMemCxt;
@@ -338,15 +338,15 @@ static void FreeWorkerInfo(int code, Datum arg);
static autovac_table *table_recheck_autovac(Oid relid, HTAB *table_toast_map,
TupleDesc pg_class_desc,
- int effective_multixact_freeze_max_age);
+ int64 effective_multixact_freeze_max_age);
static void recheck_relation_needs_vacanalyze(Oid relid, AutoVacOpts *avopts,
Form_pg_class classForm,
- int effective_multixact_freeze_max_age,
+ int64 effective_multixact_freeze_max_age,
bool *dovacuum, bool *doanalyze, bool *wraparound);
static void relation_needs_vacanalyze(Oid relid, AutoVacOpts *relopts,
Form_pg_class classForm,
PgStat_StatTabEntry *tabentry,
- int effective_multixact_freeze_max_age,
+ int64 effective_multixact_freeze_max_age,
bool *dovacuum, bool *doanalyze, bool *wraparound);
static void autovacuum_do_vac_analyze(autovac_table *tab,
@@ -1143,6 +1143,7 @@ do_start_worker(void)
ListCell *cell;
TransactionId xidForceLimit;
MultiXactId multiForceLimit;
+ int64 multiMembersThreshold;
bool for_xid_wrap;
bool for_multi_wrap;
avw_dbase *avdb;
@@ -1179,17 +1180,18 @@ do_start_worker(void)
* particular tables, but not loosened.)
*/
recentXid = ReadNextTransactionId();
- xidForceLimit = recentXid - autovacuum_freeze_max_age;
- /* ensure it's a "normal" XID, else TransactionIdPrecedes misbehaves */
- /* this can cause the limit to go backwards by 3, but that's OK */
- if (xidForceLimit < FirstNormalTransactionId)
- xidForceLimit -= FirstNormalTransactionId;
+ if (recentXid > FirstNormalTransactionId + autovacuum_freeze_max_age)
+ xidForceLimit = recentXid - autovacuum_freeze_max_age;
+ else
+ xidForceLimit = FirstNormalTransactionId;
/* Also determine the oldest datminmxid we will consider. */
recentMulti = ReadNextMultiXactId();
- multiForceLimit = recentMulti - MultiXactMemberFreezeThreshold();
- if (multiForceLimit < FirstMultiXactId)
- multiForceLimit -= FirstMultiXactId;
+ multiMembersThreshold = autovacuum_multixact_freeze_max_age;
+ if (recentMulti > FirstMultiXactId + multiMembersThreshold)
+ multiForceLimit = recentMulti - multiMembersThreshold;
+ else
+ multiForceLimit = FirstMultiXactId;
/*
* Choose a database to connect to. We pick the database that was least
@@ -2010,7 +2012,7 @@ do_autovacuum(void)
BufferAccessStrategy bstrategy;
ScanKeyData key;
TupleDesc pg_class_desc;
- int effective_multixact_freeze_max_age;
+ int64 effective_multixact_freeze_max_age;
bool did_vacuum = false;
bool found_concurrent_worker = false;
int i;
@@ -2033,7 +2035,7 @@ do_autovacuum(void)
* normally autovacuum_multixact_freeze_max_age, but may be less if we are
* short of multixact member space.
*/
- effective_multixact_freeze_max_age = MultiXactMemberFreezeThreshold();
+ effective_multixact_freeze_max_age = autovacuum_multixact_freeze_max_age;
/*
* Find the pg_database entry and select the default freeze ages. We use
@@ -2811,7 +2813,7 @@ extract_autovac_opts(HeapTuple tup, TupleDesc pg_class_desc)
static autovac_table *
table_recheck_autovac(Oid relid, HTAB *table_toast_map,
TupleDesc pg_class_desc,
- int effective_multixact_freeze_max_age)
+ int64 effective_multixact_freeze_max_age)
{
Form_pg_class classForm;
HeapTuple classTup;
@@ -2850,10 +2852,10 @@ table_recheck_autovac(Oid relid, HTAB *table_toast_map,
/* OK, it needs something done */
if (doanalyze || dovacuum)
{
- int freeze_min_age;
- int freeze_table_age;
- int multixact_freeze_min_age;
- int multixact_freeze_table_age;
+ int64 freeze_min_age;
+ int64 freeze_table_age;
+ int64 multixact_freeze_min_age;
+ int64 multixact_freeze_table_age;
int log_min_duration;
/*
@@ -2951,7 +2953,7 @@ static void
recheck_relation_needs_vacanalyze(Oid relid,
AutoVacOpts *avopts,
Form_pg_class classForm,
- int effective_multixact_freeze_max_age,
+ int64 effective_multixact_freeze_max_age,
bool *dovacuum,
bool *doanalyze,
bool *wraparound)
@@ -3013,7 +3015,7 @@ relation_needs_vacanalyze(Oid relid,
AutoVacOpts *relopts,
Form_pg_class classForm,
PgStat_StatTabEntry *tabentry,
- int effective_multixact_freeze_max_age,
+ int64 effective_multixact_freeze_max_age,
/* output params below */
bool *dovacuum,
bool *doanalyze,
@@ -3042,8 +3044,8 @@ relation_needs_vacanalyze(Oid relid,
anltuples;
/* freeze parameters */
- int freeze_max_age;
- int multixact_freeze_max_age;
+ int64 freeze_max_age;
+ int64 multixact_freeze_max_age;
TransactionId xidForceLimit;
MultiXactId multiForceLimit;
@@ -3093,17 +3095,19 @@ relation_needs_vacanalyze(Oid relid,
av_enabled = (relopts ? relopts->enabled : true);
/* Force vacuum if table is at risk of wraparound */
- xidForceLimit = recentXid - freeze_max_age;
- if (xidForceLimit < FirstNormalTransactionId)
- xidForceLimit -= FirstNormalTransactionId;
+ if (recentXid > FirstNormalTransactionId + freeze_max_age)
+ xidForceLimit = recentXid - freeze_max_age;
+ else
+ xidForceLimit = FirstNormalTransactionId;
force_vacuum = (TransactionIdIsNormal(classForm->relfrozenxid) &&
TransactionIdPrecedes(classForm->relfrozenxid,
xidForceLimit));
if (!force_vacuum)
{
- multiForceLimit = recentMulti - multixact_freeze_max_age;
- if (multiForceLimit < FirstMultiXactId)
- multiForceLimit -= FirstMultiXactId;
+ if (recentMulti > FirstMultiXactId + multixact_freeze_max_age)
+ multiForceLimit = recentMulti - multixact_freeze_max_age;
+ else
+ multiForceLimit = FirstMultiXactId;
force_vacuum = MultiXactIdIsValid(classForm->relminmxid) &&
MultiXactIdPrecedes(classForm->relminmxid, multiForceLimit);
}
diff --git a/src/backend/replication/logical/decode.c b/src/backend/replication/logical/decode.c
index 1237118e84..ffc4ead751 100644
--- a/src/backend/replication/logical/decode.c
+++ b/src/backend/replication/logical/decode.c
@@ -902,8 +902,14 @@ DecodeInsert(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
xl_heap_insert *xlrec;
ReorderBufferChange *change;
RelFileLocator target_locator;
+ bool isinit = (XLogRecGetInfo(r) & XLOG_HEAP_INIT_PAGE) != 0;
+ Pointer rec_data = (Pointer) XLogRecGetData(r);
- xlrec = (xl_heap_insert *) XLogRecGetData(r);
+ /* Bypass pd_xid_base and pd_multi_base */
+ if (isinit)
+ rec_data += sizeof(TransactionId) * 2;
+
+ xlrec = (xl_heap_insert *) rec_data;
/*
* Ignore insert records without new tuples (this does happen when
@@ -959,8 +965,13 @@ DecodeUpdate(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
ReorderBufferChange *change;
char *data;
RelFileLocator target_locator;
+ bool isinit = (XLogRecGetInfo(r) & XLOG_HEAP_INIT_PAGE) != 0;
+ Pointer rec_data = (Pointer) XLogRecGetData(r);
- xlrec = (xl_heap_update *) XLogRecGetData(r);
+ /* Bypass pd_xid_base and pd_multi_base */
+ if (isinit)
+ rec_data += sizeof(TransactionId) * 2;
+ xlrec = (xl_heap_update *) rec_data;
/* only interested in our database */
XLogRecGetBlockTag(r, 0, &target_locator, NULL, NULL);
@@ -1120,8 +1131,13 @@ DecodeMultiInsert(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
char *tupledata;
Size tuplelen;
RelFileLocator rlocator;
+ bool isinit = (XLogRecGetInfo(r) & XLOG_HEAP_INIT_PAGE) != 0;
+ Pointer rec_data = (Pointer) XLogRecGetData(r);
- xlrec = (xl_heap_multi_insert *) XLogRecGetData(r);
+ /* Bypass pd_xid_base and pd_multi_base */
+ if (isinit)
+ rec_data += sizeof(TransactionId) * 2;
+ xlrec = (xl_heap_multi_insert *) rec_data;
/*
* Ignore insert records without new tuples. This happens when a
@@ -1178,6 +1194,7 @@ DecodeMultiInsert(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
* We can only figure this out after reassembling the transactions.
*/
tuple->tuple.t_tableOid = InvalidOid;
+ HeapTupleSetZeroXids(&tuple->tuple);
tuple->tuple.t_len = datalen + SizeofHeapTupleHeader;
@@ -1269,6 +1286,7 @@ DecodeXLogTuple(char *data, Size len, ReorderBufferTupleBuf *tuple)
/* we can only figure this out after reassembling the transactions */
tuple->tuple.t_tableOid = InvalidOid;
+ HeapTupleSetZeroXids(&tuple->tuple);
/* data is not stored aligned, copy to aligned storage */
memcpy((char *) &xlhdr,
diff --git a/src/backend/replication/logical/proto.c b/src/backend/replication/logical/proto.c
index aa471dccdf..21cd2a5202 100644
--- a/src/backend/replication/logical/proto.c
+++ b/src/backend/replication/logical/proto.c
@@ -64,7 +64,7 @@ logicalrep_write_begin(StringInfo out, ReorderBufferTXN *txn)
/* fixed fields */
pq_sendint64(out, txn->final_lsn);
pq_sendint64(out, txn->xact_time.commit_time);
- pq_sendint32(out, txn->xid);
+ pq_sendint64(out, txn->xid);
}
/*
@@ -78,7 +78,7 @@ logicalrep_read_begin(StringInfo in, LogicalRepBeginData *begin_data)
if (begin_data->final_lsn == InvalidXLogRecPtr)
elog(ERROR, "final_lsn not set in begin message");
begin_data->committime = pq_getmsgint64(in);
- begin_data->xid = pq_getmsgint(in, 4);
+ begin_data->xid = pq_getmsgint64(in);
}
@@ -132,7 +132,7 @@ logicalrep_write_begin_prepare(StringInfo out, ReorderBufferTXN *txn)
pq_sendint64(out, txn->final_lsn);
pq_sendint64(out, txn->end_lsn);
pq_sendint64(out, txn->xact_time.prepare_time);
- pq_sendint32(out, txn->xid);
+ pq_sendint64(out, txn->xid);
/* send gid */
pq_sendstring(out, txn->gid);
@@ -152,7 +152,7 @@ logicalrep_read_begin_prepare(StringInfo in, LogicalRepPreparedTxnData *begin_da
if (begin_data->end_lsn == InvalidXLogRecPtr)
elog(ERROR, "end_lsn not set in begin prepare message");
begin_data->prepare_time = pq_getmsgint64(in);
- begin_data->xid = pq_getmsgint(in, 4);
+ begin_data->xid = pq_getmsgint64(in);
/* read gid (copy it into a pre-allocated buffer) */
strlcpy(begin_data->gid, pq_getmsgstring(in), sizeof(begin_data->gid));
@@ -185,7 +185,7 @@ logicalrep_write_prepare_common(StringInfo out, LogicalRepMsgType type,
pq_sendint64(out, prepare_lsn);
pq_sendint64(out, txn->end_lsn);
pq_sendint64(out, txn->xact_time.prepare_time);
- pq_sendint32(out, txn->xid);
+ pq_sendint64(out, txn->xid);
/* send gid */
pq_sendstring(out, txn->gid);
@@ -224,7 +224,7 @@ logicalrep_read_prepare_common(StringInfo in, char *msgtype,
if (prepare_data->end_lsn == InvalidXLogRecPtr)
elog(ERROR, "end_lsn is not set in %s message", msgtype);
prepare_data->prepare_time = pq_getmsgint64(in);
- prepare_data->xid = pq_getmsgint(in, 4);
+ prepare_data->xid = pq_getmsgint64(in);
if (prepare_data->xid == InvalidTransactionId)
elog(ERROR, "invalid two-phase transaction ID in %s message", msgtype);
@@ -265,7 +265,7 @@ logicalrep_write_commit_prepared(StringInfo out, ReorderBufferTXN *txn,
pq_sendint64(out, commit_lsn);
pq_sendint64(out, txn->end_lsn);
pq_sendint64(out, txn->xact_time.commit_time);
- pq_sendint32(out, txn->xid);
+ pq_sendint64(out, txn->xid);
/* send gid */
pq_sendstring(out, txn->gid);
@@ -291,7 +291,7 @@ logicalrep_read_commit_prepared(StringInfo in, LogicalRepCommitPreparedTxnData *
if (prepare_data->end_lsn == InvalidXLogRecPtr)
elog(ERROR, "end_lsn is not set in commit prepared message");
prepare_data->commit_time = pq_getmsgint64(in);
- prepare_data->xid = pq_getmsgint(in, 4);
+ prepare_data->xid = pq_getmsgint64(in);
/* read gid (copy it into a pre-allocated buffer) */
strlcpy(prepare_data->gid, pq_getmsgstring(in), sizeof(prepare_data->gid));
@@ -323,7 +323,7 @@ logicalrep_write_rollback_prepared(StringInfo out, ReorderBufferTXN *txn,
pq_sendint64(out, txn->end_lsn);
pq_sendint64(out, prepare_time);
pq_sendint64(out, txn->xact_time.commit_time);
- pq_sendint32(out, txn->xid);
+ pq_sendint64(out, txn->xid);
/* send gid */
pq_sendstring(out, txn->gid);
@@ -351,7 +351,7 @@ logicalrep_read_rollback_prepared(StringInfo in,
elog(ERROR, "rollback_end_lsn is not set in rollback prepared message");
rollback_data->prepare_time = pq_getmsgint64(in);
rollback_data->rollback_time = pq_getmsgint64(in);
- rollback_data->xid = pq_getmsgint(in, 4);
+ rollback_data->xid = pq_getmsgint64(in);
/* read gid (copy it into a pre-allocated buffer) */
strlcpy(rollback_data->gid, pq_getmsgstring(in), sizeof(rollback_data->gid));
@@ -418,7 +418,7 @@ logicalrep_write_insert(StringInfo out, TransactionId xid, Relation rel,
/* transaction ID (if not valid, we're not streaming) */
if (TransactionIdIsValid(xid))
- pq_sendint32(out, xid);
+ pq_sendint64(out, xid);
/* use Oid as relation identifier */
pq_sendint32(out, RelationGetRelid(rel));
@@ -467,7 +467,7 @@ logicalrep_write_update(StringInfo out, TransactionId xid, Relation rel,
/* transaction ID (if not valid, we're not streaming) */
if (TransactionIdIsValid(xid))
- pq_sendint32(out, xid);
+ pq_sendint64(out, xid);
/* use Oid as relation identifier */
pq_sendint32(out, RelationGetRelid(rel));
@@ -542,7 +542,7 @@ logicalrep_write_delete(StringInfo out, TransactionId xid, Relation rel,
/* transaction ID (if not valid, we're not streaming) */
if (TransactionIdIsValid(xid))
- pq_sendint32(out, xid);
+ pq_sendint64(out, xid);
/* use Oid as relation identifier */
pq_sendint32(out, RelationGetRelid(rel));
@@ -596,7 +596,7 @@ logicalrep_write_truncate(StringInfo out,
/* transaction ID (if not valid, we're not streaming) */
if (TransactionIdIsValid(xid))
- pq_sendint32(out, xid);
+ pq_sendint64(out, xid);
pq_sendint32(out, nrelids);
@@ -654,7 +654,7 @@ logicalrep_write_message(StringInfo out, TransactionId xid, XLogRecPtr lsn,
/* transaction ID (if not valid, we're not streaming) */
if (TransactionIdIsValid(xid))
- pq_sendint32(out, xid);
+ pq_sendint64(out, xid);
pq_sendint8(out, flags);
pq_sendint64(out, lsn);
@@ -676,7 +676,7 @@ logicalrep_write_rel(StringInfo out, TransactionId xid, Relation rel,
/* transaction ID (if not valid, we're not streaming) */
if (TransactionIdIsValid(xid))
- pq_sendint32(out, xid);
+ pq_sendint64(out, xid);
/* use Oid as relation identifier */
pq_sendint32(out, RelationGetRelid(rel));
@@ -732,7 +732,7 @@ logicalrep_write_typ(StringInfo out, TransactionId xid, Oid typoid)
/* transaction ID (if not valid, we're not streaming) */
if (TransactionIdIsValid(xid))
- pq_sendint32(out, xid);
+ pq_sendint64(out, xid);
tup = SearchSysCache1(TYPEOID, ObjectIdGetDatum(basetypoid));
if (!HeapTupleIsValid(tup))
@@ -1074,7 +1074,7 @@ logicalrep_write_stream_start(StringInfo out,
Assert(TransactionIdIsValid(xid));
/* transaction ID (we're starting to stream, so must be valid) */
- pq_sendint32(out, xid);
+ pq_sendint64(out, xid);
/* 1 if this is the first streaming segment for this xid */
pq_sendbyte(out, first_segment ? 1 : 0);
@@ -1090,7 +1090,7 @@ logicalrep_read_stream_start(StringInfo in, bool *first_segment)
Assert(first_segment);
- xid = pq_getmsgint(in, 4);
+ xid = pq_getmsgint64(in);
*first_segment = (pq_getmsgbyte(in) == 1);
return xid;
@@ -1119,7 +1119,7 @@ logicalrep_write_stream_commit(StringInfo out, ReorderBufferTXN *txn,
Assert(TransactionIdIsValid(txn->xid));
/* transaction ID */
- pq_sendint32(out, txn->xid);
+ pq_sendint64(out, txn->xid);
/* send the flags field (unused for now) */
pq_sendbyte(out, flags);
@@ -1139,7 +1139,7 @@ logicalrep_read_stream_commit(StringInfo in, LogicalRepCommitData *commit_data)
TransactionId xid;
uint8 flags;
- xid = pq_getmsgint(in, 4);
+ xid = pq_getmsgint64(in);
/* read flags (unused for now) */
flags = pq_getmsgbyte(in);
@@ -1172,8 +1172,8 @@ logicalrep_write_stream_abort(StringInfo out, TransactionId xid,
Assert(TransactionIdIsValid(xid) && TransactionIdIsValid(subxid));
/* transaction ID */
- pq_sendint32(out, xid);
- pq_sendint32(out, subxid);
+ pq_sendint64(out, xid);
+ pq_sendint64(out, subxid);
if (write_abort_info)
{
@@ -1195,8 +1195,8 @@ logicalrep_read_stream_abort(StringInfo in,
{
Assert(abort_data);
- abort_data->xid = pq_getmsgint(in, 4);
- abort_data->subxid = pq_getmsgint(in, 4);
+ abort_data->xid = pq_getmsgint64(in);
+ abort_data->subxid = pq_getmsgint64(in);
if (read_abort_info)
{
diff --git a/src/backend/replication/logical/reorderbuffer.c b/src/backend/replication/logical/reorderbuffer.c
index 4c4b1c4868..f355e26257 100644
--- a/src/backend/replication/logical/reorderbuffer.c
+++ b/src/backend/replication/logical/reorderbuffer.c
@@ -5151,8 +5151,12 @@ UpdateLogicalMappings(HTAB *tuplecid_data, Oid relid, Snapshot snapshot)
TransactionId f_mapped_xid;
TransactionId f_create_xid;
XLogRecPtr f_lsn;
- uint32 f_hi,
- f_lo;
+ uint32 f_lsn_hi,
+ f_lsn_lo,
+ f_mapped_xid_hi,
+ f_mapped_xid_lo,
+ f_create_xid_hi,
+ f_create_xid_lo;
RewriteMappingFile *f;
if (strcmp(mapping_de->d_name, ".") == 0 ||
@@ -5164,11 +5168,14 @@ UpdateLogicalMappings(HTAB *tuplecid_data, Oid relid, Snapshot snapshot)
continue;
if (sscanf(mapping_de->d_name, LOGICAL_REWRITE_FORMAT,
- &f_dboid, &f_relid, &f_hi, &f_lo,
- &f_mapped_xid, &f_create_xid) != 6)
+ &f_dboid, &f_relid, &f_lsn_hi, &f_lsn_lo,
+ &f_mapped_xid_hi, &f_mapped_xid_lo,
+ &f_create_xid_hi, &f_create_xid_lo) != 8)
elog(ERROR, "could not parse filename \"%s\"", mapping_de->d_name);
- f_lsn = ((uint64) f_hi) << 32 | f_lo;
+ f_lsn = ((uint64) f_lsn_hi) << 32 | f_lsn_lo;
+ f_mapped_xid = ((uint64) f_mapped_xid_hi) << 32 | f_mapped_xid_lo;
+ f_create_xid = ((uint64) f_create_xid_hi) << 32 | f_create_xid_lo;
/* mapping for another database */
if (f_dboid != dboid)
diff --git a/src/backend/replication/logical/snapbuild.c b/src/backend/replication/logical/snapbuild.c
index 5c630116ec..95871f5ad9 100644
--- a/src/backend/replication/logical/snapbuild.c
+++ b/src/backend/replication/logical/snapbuild.c
@@ -591,7 +591,7 @@ SnapBuildInitialSnapshot(SnapBuild *builder)
elog(ERROR, "cannot build an initial slot snapshot, not all transactions are monitored anymore");
/* so we don't overwrite the existing value */
- if (TransactionIdIsValid(MyProc->xmin))
+ if (TransactionIdIsValid(pg_atomic_read_u64(&MyProc->xmin)))
elog(ERROR, "cannot build an initial slot snapshot when MyProc->xmin already is valid");
snap = SnapBuildBuildSnapshot(builder);
@@ -613,7 +613,7 @@ SnapBuildInitialSnapshot(SnapBuild *builder)
elog(ERROR, "cannot build an initial slot snapshot as oldest safe xid %llu follows snapshot's xmin %llu",
(unsigned long long) safeXid, (unsigned long long) snap->xmin);
- MyProc->xmin = snap->xmin;
+ pg_atomic_write_u64(&MyProc->xmin, snap->xmin);
/* allocate in transaction context */
newxip = (TransactionId *)
diff --git a/src/backend/replication/logical/worker.c b/src/backend/replication/logical/worker.c
index 899ea5502d..37a8cc944f 100644
--- a/src/backend/replication/logical/worker.c
+++ b/src/backend/replication/logical/worker.c
@@ -601,7 +601,7 @@ handle_streamed_transaction(LogicalRepMsgType action, StringInfo s)
* We should have received XID of the subxact as the first part of the
* message, so extract it.
*/
- current_xid = pq_getmsgint(s, 4);
+ current_xid = pq_getmsgint64(s);
if (!TransactionIdIsValid(current_xid))
ereport(ERROR,
diff --git a/src/backend/replication/walreceiver.c b/src/backend/replication/walreceiver.c
index 5507e2e1f6..2fc25a298d 100644
--- a/src/backend/replication/walreceiver.c
+++ b/src/backend/replication/walreceiver.c
@@ -1167,10 +1167,6 @@ static void
XLogWalRcvSendHSFeedback(bool immed)
{
TimestampTz now;
- FullTransactionId nextFullXid;
- TransactionId nextXid;
- uint32 xmin_epoch,
- catalog_xmin_epoch;
TransactionId xmin,
catalog_xmin;
@@ -1222,31 +1218,15 @@ XLogWalRcvSendHSFeedback(bool immed)
catalog_xmin = InvalidTransactionId;
}
- /*
- * Get epoch and adjust if nextXid and oldestXmin are different sides of
- * the epoch boundary.
- */
- nextFullXid = ReadNextFullTransactionId();
- nextXid = XidFromFullTransactionId(nextFullXid);
- xmin_epoch = EpochFromFullTransactionId(nextFullXid);
- catalog_xmin_epoch = xmin_epoch;
- if (nextXid < xmin)
- xmin_epoch--;
- if (nextXid < catalog_xmin)
- catalog_xmin_epoch--;
-
- elog(DEBUG2, "sending hot standby feedback xmin %llu epoch %u catalog_xmin %llu catalog_xmin_epoch %u",
- (unsigned long long) xmin, xmin_epoch,
- (unsigned long long) catalog_xmin, catalog_xmin_epoch);
+ elog(DEBUG2, "sending hot standby feedback xmin %llu catalog_xmin %llu",
+ (unsigned long long) xmin, (unsigned long long) catalog_xmin);
/* Construct the message and send it. */
resetStringInfo(&reply_message);
pq_sendbyte(&reply_message, 'h');
pq_sendint64(&reply_message, GetCurrentTimestamp());
- pq_sendint32(&reply_message, xmin);
- pq_sendint32(&reply_message, xmin_epoch);
- pq_sendint32(&reply_message, catalog_xmin);
- pq_sendint32(&reply_message, catalog_xmin_epoch);
+ pq_sendint64(&reply_message, xmin);
+ pq_sendint64(&reply_message, catalog_xmin);
walrcv_send(wrconn, reply_message.data, reply_message.len);
if (TransactionIdIsValid(xmin) || TransactionIdIsValid(catalog_xmin))
primary_has_standby_xmin = true;
diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c
index bdfa5e8fa3..719dc89365 100644
--- a/src/backend/replication/walsender.c
+++ b/src/backend/replication/walsender.c
@@ -255,7 +255,6 @@ static void WalSndUpdateProgress(LogicalDecodingContext *ctx, XLogRecPtr lsn, Tr
static XLogRecPtr WalSndWaitForWal(XLogRecPtr loc);
static void LagTrackerWrite(XLogRecPtr lsn, TimestampTz local_flush_time);
static TimeOffset LagTrackerRead(int head, XLogRecPtr lsn, TimestampTz now);
-static bool TransactionIdInRecentPast(TransactionId xid, uint32 epoch);
static void WalSndSegmentOpen(XLogReaderState *state, XLogSegNo nextSegNo,
TimeLineID *tli_p);
@@ -293,7 +292,7 @@ InitWalSender(void)
*/
if (MyDatabaseId == InvalidOid)
{
- Assert(MyProc->xmin == InvalidTransactionId);
+ Assert(pg_atomic_read_u64(&MyProc->xmin) == InvalidTransactionId);
LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
MyProc->statusFlags |= PROC_AFFECTS_ALL_HORIZONS;
ProcGlobal->statusFlags[MyProc->pgxactoff] = MyProc->statusFlags;
@@ -2174,7 +2173,7 @@ PhysicalReplicationSlotNewXmin(TransactionId feedbackXmin, TransactionId feedbac
ReplicationSlot *slot = MyReplicationSlot;
SpinLockAcquire(&slot->mutex);
- MyProc->xmin = InvalidTransactionId;
+ pg_atomic_write_u64(&MyProc->xmin, InvalidTransactionId);
/*
* For physical replication we don't need the interlock provided by xmin
@@ -2206,44 +2205,6 @@ PhysicalReplicationSlotNewXmin(TransactionId feedbackXmin, TransactionId feedbac
}
}
-/*
- * Check that the provided xmin/epoch are sane, that is, not in the future
- * and not so far back as to be already wrapped around.
- *
- * Epoch of nextXid should be same as standby, or if the counter has
- * wrapped, then one greater than standby.
- *
- * This check doesn't care about whether clog exists for these xids
- * at all.
- */
-static bool
-TransactionIdInRecentPast(TransactionId xid, uint32 epoch)
-{
- FullTransactionId nextFullXid;
- TransactionId nextXid;
- uint32 nextEpoch;
-
- nextFullXid = ReadNextFullTransactionId();
- nextXid = XidFromFullTransactionId(nextFullXid);
- nextEpoch = EpochFromFullTransactionId(nextFullXid);
-
- if (xid <= nextXid)
- {
- if (epoch != nextEpoch)
- return false;
- }
- else
- {
- if (epoch + 1 != nextEpoch)
- return false;
- }
-
- if (!TransactionIdPrecedesOrEquals(xid, nextXid))
- return false; /* epoch OK, but it's wrapped around */
-
- return true;
-}
-
/*
* Hot Standby feedback
*/
@@ -2251,9 +2212,7 @@ static void
ProcessStandbyHSFeedbackMessage(void)
{
TransactionId feedbackXmin;
- uint32 feedbackEpoch;
TransactionId feedbackCatalogXmin;
- uint32 feedbackCatalogEpoch;
TimestampTz replyTime;
/*
@@ -2262,10 +2221,8 @@ ProcessStandbyHSFeedbackMessage(void)
* of this message.
*/
replyTime = pq_getmsgint64(&reply_message);
- feedbackXmin = pq_getmsgint(&reply_message, 4);
- feedbackEpoch = pq_getmsgint(&reply_message, 4);
- feedbackCatalogXmin = pq_getmsgint(&reply_message, 4);
- feedbackCatalogEpoch = pq_getmsgint(&reply_message, 4);
+ feedbackXmin = pq_getmsgint64(&reply_message);
+ feedbackCatalogXmin = pq_getmsgint64(&reply_message);
if (message_level_is_interesting(DEBUG2))
{
@@ -2274,11 +2231,9 @@ ProcessStandbyHSFeedbackMessage(void)
/* Copy because timestamptz_to_str returns a static buffer */
replyTimeStr = pstrdup(timestamptz_to_str(replyTime));
- elog(DEBUG2, "hot standby feedback xmin %llu epoch %u, catalog_xmin %llu epoch %u reply_time %s",
+ elog(DEBUG2, "hot standby feedback xmin %llu, catalog_xmin %llu reply_time %s",
(unsigned long long) feedbackXmin,
- feedbackEpoch,
(unsigned long long) feedbackCatalogXmin,
- feedbackCatalogEpoch,
replyTimeStr);
pfree(replyTimeStr);
@@ -2303,24 +2258,12 @@ ProcessStandbyHSFeedbackMessage(void)
if (!TransactionIdIsNormal(feedbackXmin)
&& !TransactionIdIsNormal(feedbackCatalogXmin))
{
- MyProc->xmin = InvalidTransactionId;
+ pg_atomic_write_u64(&MyProc->xmin, InvalidTransactionId);
if (MyReplicationSlot != NULL)
PhysicalReplicationSlotNewXmin(feedbackXmin, feedbackCatalogXmin);
return;
}
- /*
- * Check that the provided xmin/epoch are sane, that is, not in the future
- * and not so far back as to be already wrapped around. Ignore if not.
- */
- if (TransactionIdIsNormal(feedbackXmin) &&
- !TransactionIdInRecentPast(feedbackXmin, feedbackEpoch))
- return;
-
- if (TransactionIdIsNormal(feedbackCatalogXmin) &&
- !TransactionIdInRecentPast(feedbackCatalogXmin, feedbackCatalogEpoch))
- return;
-
/*
* Set the WalSender's xmin equal to the standby's requested xmin, so that
* the xmin will be taken into account by GetSnapshotData() /
@@ -2358,9 +2301,9 @@ ProcessStandbyHSFeedbackMessage(void)
{
if (TransactionIdIsNormal(feedbackCatalogXmin)
&& TransactionIdPrecedes(feedbackCatalogXmin, feedbackXmin))
- MyProc->xmin = feedbackCatalogXmin;
+ pg_atomic_write_u64(&MyProc->xmin, feedbackCatalogXmin);
else
- MyProc->xmin = feedbackXmin;
+ pg_atomic_write_u64(&MyProc->xmin, feedbackXmin);
}
}
diff --git a/src/backend/statistics/extended_stats.c b/src/backend/statistics/extended_stats.c
index 7f014a0cbb..16e46a331a 100644
--- a/src/backend/statistics/extended_stats.c
+++ b/src/backend/statistics/extended_stats.c
@@ -2456,6 +2456,7 @@ statext_expressions_load(Oid stxoid, bool inh, int idx)
ItemPointerSetInvalid(&(tmptup.t_self));
tmptup.t_tableOid = InvalidOid;
tmptup.t_data = td;
+ HeapTupleCopyHeaderXids(&tmptup);
tup = heap_copytuple(&tmptup);
diff --git a/src/backend/storage/buffer/Makefile b/src/backend/storage/buffer/Makefile
index fd7c40dcb0..ffcc0fc290 100644
--- a/src/backend/storage/buffer/Makefile
+++ b/src/backend/storage/buffer/Makefile
@@ -17,6 +17,7 @@ OBJS = \
buf_table.o \
bufmgr.o \
freelist.o \
- localbuf.o
+ localbuf.o \
+ heap_convert.o
include $(top_srcdir)/src/backend/common.mk
diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index f7c67d504c..3487610497 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -472,7 +472,8 @@ ForgetPrivateRefCountEntry(PrivateRefCountEntry *ref)
)
-static Buffer ReadBuffer_common(SMgrRelation smgr, char relpersistence,
+static Buffer ReadBuffer_common(Relation reln,
+ SMgrRelation smgr, char relpersistence,
ForkNumber forkNum, BlockNumber blockNum,
ReadBufferMode mode, BufferAccessStrategy strategy,
bool *hit);
@@ -800,7 +801,8 @@ ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum,
* miss.
*/
pgstat_count_buffer_read(reln);
- buf = ReadBuffer_common(RelationGetSmgr(reln), reln->rd_rel->relpersistence,
+ buf = ReadBuffer_common(reln,
+ RelationGetSmgr(reln), reln->rd_rel->relpersistence,
forkNum, blockNum, mode, strategy, &hit);
if (hit)
pgstat_count_buffer_hit(reln);
@@ -827,7 +829,7 @@ ReadBufferWithoutRelcache(RelFileLocator rlocator, ForkNumber forkNum,
SMgrRelation smgr = smgropen(rlocator, InvalidBackendId);
- return ReadBuffer_common(smgr, permanent ? RELPERSISTENCE_PERMANENT :
+ return ReadBuffer_common(NULL, smgr, permanent ? RELPERSISTENCE_PERMANENT :
RELPERSISTENCE_UNLOGGED, forkNum, blockNum,
mode, strategy, &hit);
}
@@ -1002,7 +1004,7 @@ ExtendBufferedRelTo(BufferManagerRelation bmr,
bool hit;
Assert(extended_by == 0);
- buffer = ReadBuffer_common(bmr.smgr, bmr.relpersistence,
+ buffer = ReadBuffer_common(bmr.rel, bmr.smgr, bmr.relpersistence,
fork, extend_to - 1, mode, strategy,
&hit);
}
@@ -1016,7 +1018,8 @@ ExtendBufferedRelTo(BufferManagerRelation bmr,
* *hit is set to true if the request was satisfied from shared buffer cache.
*/
static Buffer
-ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
+ReadBuffer_common(Relation reln,
+ SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
BlockNumber blockNum, ReadBufferMode mode,
BufferAccessStrategy strategy, bool *hit)
{
@@ -1170,6 +1173,30 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
blockNum,
relpath(smgr->smgr_rlocator, forkNum))));
}
+
+ if (PageGetPageLayoutVersion(bufBlock) != PG_PAGE_LAYOUT_VERSION &&
+ !PageIsNew((Page) bufBlock))
+ {
+ Buffer buf = BufferDescriptorGetBuffer(bufHdr);
+
+ /*
+ * All the forks but MAIN_FORKNUM should be converted to the
+ * actual page layout version in pg_upgrade.
+ */
+ if (forkNum != MAIN_FORKNUM)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATA_CORRUPTED),
+ errmsg("invalid fork type (%d) in block %u of relation %s",
+ forkNum, blockNum,
+ relpath(smgr->smgr_rlocator, forkNum))));
+
+ LWLockAcquire(BufferDescriptorGetContentLock(bufHdr), LW_EXCLUSIVE);
+ /* Check for no concurrent changes */
+ if (PageGetPageLayoutVersion(bufBlock) != PG_PAGE_LAYOUT_VERSION)
+ convert_page(reln, bufBlock, buf, blockNum);
+
+ LWLockRelease(BufferDescriptorGetContentLock(bufHdr));
+ }
}
/*
@@ -4767,6 +4794,64 @@ MarkBufferDirtyHint(Buffer buffer, bool buffer_std)
}
}
+/*
+ * Mark buffer as converted - ie its format is changed without logical changes.
+ *
+ * It will override `full_page_write` GUC setting in XLogRecordAssemble.
+ */
+void
+MarkBufferConverted(Buffer buffer, bool converted)
+{
+ BufferDesc *bufHdr;
+ uint32 buf_state;
+ bool has_mark;
+
+ if (!BufferIsValid(buffer))
+ elog(ERROR, "bad buffer ID: %d", buffer);
+
+ Assert(!BufferIsLocal(buffer));
+
+ bufHdr = GetBufferDescriptor(buffer - 1);
+
+ Assert(GetPrivateRefCount(buffer) > 0);
+ if (converted)
+ {
+ /* here, either share or exclusive lock is OK */
+ Assert(LWLockHeldByMe(BufferDescriptorGetContentLock(bufHdr)));
+ }
+
+ buf_state = pg_atomic_read_u32(&bufHdr->state);
+ has_mark = (buf_state & BM_CONVERTED) != 0;
+ if (converted == has_mark)
+ return;
+
+ buf_state = LockBufHdr(bufHdr);
+ buf_state &= ~BM_CONVERTED;
+ if (converted)
+ buf_state |= BM_CONVERTED;
+ UnlockBufHdr(bufHdr, buf_state);
+}
+
+bool
+IsBufferConverted(Buffer buffer)
+{
+
+ BufferDesc *bufHdr;
+ uint32 buf_state;
+
+ if (!BufferIsValid(buffer))
+ elog(ERROR, "bad buffer ID: %d", buffer);
+
+ Assert(!BufferIsLocal(buffer));
+
+ bufHdr = GetBufferDescriptor(buffer - 1);
+
+ Assert(GetPrivateRefCount(buffer) > 0);
+
+ buf_state = pg_atomic_read_u32(&bufHdr->state);
+ return (buf_state & BM_CONVERTED) != 0;
+}
+
/*
* Release buffer content locks for shared buffers.
*
@@ -4801,6 +4886,47 @@ UnlockBuffers(void)
}
}
+/*
+ * Is shared buffer is locked?
+ */
+bool
+IsBufferLocked(Buffer buffer)
+{
+ BufferDesc *buf;
+
+ if (buffer == InvalidBuffer)
+ return true;
+
+ Assert(BufferIsPinned(buffer));
+ if (BufferIsLocal(buffer))
+ return true; /* local buffers need no lock */
+
+ buf = GetBufferDescriptor(buffer - 1);
+
+ return LWLockHeldByMe(BufferDescriptorGetContentLock(buf));
+}
+
+/*
+ * Is shared buffer is locked exclusive?
+ */
+bool
+IsBufferLockedExclusive(Buffer buffer)
+{
+ BufferDesc *buf;
+
+ if (buffer == InvalidBuffer)
+ return true;
+
+ Assert(BufferIsPinned(buffer));
+ if (BufferIsLocal(buffer))
+ return true; /* local buffers need no lock */
+
+ buf = GetBufferDescriptor(buffer - 1);
+
+ return LWLockHeldByMeInMode(BufferDescriptorGetContentLock(buf),
+ LW_EXCLUSIVE);
+}
+
/*
* Acquire or release the content_lock for the buffer.
*/
diff --git a/src/backend/storage/buffer/heap_convert.c b/src/backend/storage/buffer/heap_convert.c
new file mode 100644
index 0000000000..2609f11072
--- /dev/null
+++ b/src/backend/storage/buffer/heap_convert.c
@@ -0,0 +1,549 @@
+/*-------------------------------------------------------------------------
+ *
+ * heap_convert.c
+ * Heap page converter from 32bit to 64bit xid format
+ *
+ * Copyright (c) 2015-2022, Postgres Professional
+ *
+ * IDENTIFICATION
+ * src/backend/storage/buffer/heap_convert.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "access/generic_xlog.h"
+#include "access/heapam.h"
+#include "access/multixact.h"
+#include "catalog/catalog.h"
+#include "storage/bufmgr.h"
+#include "storage/checksum.h"
+
+static void repack_heap_tuples(Relation rel, Page page, Buffer buf,
+ BlockNumber blkno, bool double_xmax);
+
+/*
+ * itemoffcompare
+ * Sorting support for repack_tuples()
+ */
+int
+itemoffcompare(const void *item1, const void *item2)
+{
+ /* Sort in decreasing itemoff order */
+ return ((ItemIdCompactData *) item2)->itemoff -
+ ((ItemIdCompactData *) item1)->itemoff;
+}
+
+/*
+ * Lazy page conversion from 32-bit to 64-bit XID at first read.
+ */
+void
+convert_page(Relation rel, Page page, Buffer buf, BlockNumber blkno)
+{
+ static unsigned logcnt = 0;
+ bool logit;
+ PageHeader hdr = (PageHeader) page;
+ GenericXLogState *state = NULL;
+ uint16 checksum;
+ bool try_double_xmax;
+
+ /* Not during XLog replaying */
+ Assert(rel != NULL);
+
+ /* Verify checksum */
+ if (hdr->pd_checksum)
+ {
+ checksum = pg_checksum_page((char *) page, blkno);
+ if (checksum != hdr->pd_checksum)
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("page verification failed, calculated checksum %u but expected %u",
+ checksum, hdr->pd_checksum)));
+ }
+
+ /*
+ * We occasionally force logging of page conversion, so never-changed
+ * pages are converted in the end. FORCE_LOG_EVERY is chosen arbitrarily
+ * to log neither too much nor too little.
+ */
+#define FORCE_LOG_EVERY 128
+ logit = !RecoveryInProgress() && XLogIsNeeded() && RelationNeedsWAL(rel);
+ logit = logit && (++logcnt % FORCE_LOG_EVERY) == 0;
+ if (logit)
+ {
+ state = GenericXLogStart(rel);
+ page = GenericXLogRegisterBuffer(state, buf,
+ GENERIC_XLOG_FULL_IMAGE);
+ hdr = (PageHeader) page;
+ }
+#ifdef USE_ASSERT_CHECKING
+ else
+ {
+ /* Not already converted */
+ Assert(PageGetPageLayoutVersion(page) != PG_PAGE_LAYOUT_VERSION);
+ /* Page in 32-bit xid format should not have PageSpecial. */
+ Assert(PageGetSpecialSize(page) == 0);
+ }
+#endif
+
+ switch (rel->rd_rel->relkind)
+ {
+ case 't':
+ try_double_xmax = hdr->pd_upper - hdr->pd_lower <
+ MAXALIGN(sizeof(ToastPageSpecialData));
+ repack_heap_tuples(rel, page, buf, blkno, try_double_xmax);
+ break;
+ case 'r':
+ case 'p':
+ case 'm':
+ try_double_xmax = hdr->pd_upper - hdr->pd_lower <
+ MAXALIGN(sizeof(HeapPageSpecialData));
+ repack_heap_tuples(rel, page, buf, blkno, try_double_xmax);
+ break;
+ case 'i':
+ /* no need to convert index */
+ case 'S':
+ /* no real need to convert sequences */
+ break;
+ default:
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("conversion for relation \"%s\" cannot be done",
+ RelationGetRelationName(rel)),
+ errdetail_relkind_not_supported(rel->rd_rel->relkind)));
+ }
+
+ hdr->pd_checksum = pg_checksum_page((char *) page, blkno);
+
+ PageSetPageSizeAndVersion(page, PageGetPageSize(page),
+ PG_PAGE_LAYOUT_VERSION);
+
+ if (logit)
+ {
+ /*
+ * Finish logging buffer conversion and mark buffer as dirty.
+ */
+ Assert(state != NULL);
+ MarkBufferDirty(buf);
+ GenericXLogFinish(state);
+ }
+ else
+ {
+ /*
+ * Otherwise, it will be logged with full-page-write record on first
+ * actual change.
+ */
+ MarkBufferConverted(buf, true);
+ }
+}
+
+/*
+ * Convert xmin and xmax in a tuple.
+ * This also considers special cases: "double xmax" page format and multixact
+ * in xmax.
+ */
+static void
+convert_heap_tuple_xids(HeapTupleHeader tuple, TransactionId xid_base,
+ MultiXactId multi_base, bool double_xmax)
+{
+ /* Convert xmin */
+ if (double_xmax)
+ {
+ /* Prepare tuple for "double xmax" page format */
+ tuple->t_infomask |= HEAP_XMIN_FROZEN;
+ tuple->t_choice.t_heap.t_xmin = 0;
+ }
+ else
+ {
+ TransactionId xmin = tuple->t_choice.t_heap.t_xmin;
+
+ if (TransactionIdIsNormal(xmin))
+ {
+ if (HeapTupleHeaderXminFrozen(tuple))
+ tuple->t_choice.t_heap.t_xmin = FrozenTransactionId;
+ else if (HeapTupleHeaderXminInvalid(tuple))
+ tuple->t_choice.t_heap.t_xmin = InvalidTransactionId;
+ else
+ {
+ Assert(xmin >= xid_base + FirstNormalTransactionId);
+ /* Subtract xid_base from normal xmin */
+ tuple->t_choice.t_heap.t_xmin = xmin - xid_base;
+ }
+ }
+ }
+
+ /* If tuple has multixact flag, handle mxid wraparound */
+ if ((tuple->t_infomask & HEAP_XMAX_IS_MULTI) &&
+ !(tuple->t_infomask & HEAP_XMAX_INVALID))
+ {
+ MultiXactId mxid = tuple->t_choice.t_heap.t_xmax;
+
+ /* Handle mxid wraparound */
+ if (mxid < multi_base)
+ {
+ mxid += ((MultiXactId) 1 << 32) - FirstMultiXactId;
+ Assert(mxid >= multi_base);
+ }
+
+ if (double_xmax)
+ {
+ /* Save converted mxid into "double xmax" format */
+ HeapTupleHeaderSetDoubleXmax(tuple, mxid);
+ }
+ else
+ {
+ /*
+ * Save converted mxid offset relative to (minmxid - 1), which
+ * will be page's mxid base.
+ */
+ Assert(mxid - multi_base + FirstMultiXactId <= PG_UINT32_MAX);
+ tuple->t_choice.t_heap.t_xmax =
+ (uint32) (mxid - multi_base + FirstMultiXactId);
+ }
+ }
+ /* Convert xmax */
+ else if (!(tuple->t_infomask & HEAP_XMAX_INVALID))
+ {
+ TransactionId xmax = tuple->t_choice.t_heap.t_xmax;
+
+ if (double_xmax)
+ {
+ /* Save converted xmax into "double xmax" format */
+ HeapTupleHeaderSetDoubleXmax(tuple, xmax);
+ }
+ else if (TransactionIdIsNormal(xmax))
+ {
+ /* Subtract xid_base from normal xmax */
+ Assert(xmax >= xid_base + FirstNormalTransactionId);
+ tuple->t_choice.t_heap.t_xmax = xmax - xid_base;
+ }
+ }
+ else
+ {
+ if (double_xmax)
+ HeapTupleHeaderSetDoubleXmax(tuple, InvalidTransactionId);
+ else
+ tuple->t_choice.t_heap.t_xmax = InvalidTransactionId;
+ }
+}
+
+/*
+ * Correct page xmin/xmax based on tuple xmin/xmax values.
+ */
+static void
+compute_xid_min_max(HeapTuple tuple, MultiXactId multi_base,
+ TransactionId *xid_min, TransactionId *xid_max,
+ MultiXactId *multi_min, MultiXactId *multi_max)
+{
+ /* xmin */
+ if (!HeapTupleHeaderXminInvalid(tuple->t_data) &&
+ !HeapTupleHeaderXminFrozen(tuple->t_data))
+ {
+ TransactionId xid = HeapTupleGetRawXmin(tuple);
+
+ if (TransactionIdIsNormal(xid))
+ {
+ *xid_max = Max(*xid_max, xid);
+ *xid_min = Min(*xid_min, xid);
+ }
+ }
+
+ /* xmax */
+ if (!(tuple->t_data->t_infomask & HEAP_XMAX_INVALID))
+ {
+ TransactionId xid;
+
+ if (tuple->t_data->t_infomask & HEAP_XMAX_IS_MULTI)
+ {
+ MultiXactId mxid = HeapTupleGetRawXmax(tuple);
+
+ Assert(MultiXactIdIsValid(mxid));
+
+ /* Handle mxid wraparound */
+ if (mxid < multi_base)
+ {
+ mxid += ((MultiXactId) 1 << 32) - FirstMultiXactId;
+ Assert(mxid >= multi_base);
+ }
+
+ *multi_max = Max(*multi_max, mxid);
+ *multi_min = Min(*multi_min, mxid);
+
+ /*
+ * Also take into account hidden update xid, which can be
+ * extracted by the vacuum.
+ */
+ if (tuple->t_data->t_infomask & HEAP_XMAX_LOCK_ONLY)
+ xid = InvalidTransactionId;
+ else
+ xid = HeapTupleGetUpdateXid(tuple);
+ }
+ else
+ {
+ xid = HeapTupleGetRawXmax(tuple);
+ }
+
+ if (TransactionIdIsNormal(xid))
+ {
+ *xid_max = Max(*xid_max, xid);
+ *xid_min = Min(*xid_min, xid);
+ }
+ }
+}
+
+/*
+ * Returns true if both:
+ * - xid_max: an uppper boundary of xmin's and xmax'es of all tuples on a page
+ * - xid_min: a lower boundary of xmin's and xmax'es of all tuples on a page
+ * can be expressed by 32-bit number relative to page's xid_base/multi_base
+ * or invalid.
+ *
+ * True value effectively means that these tuples can be directly put on one
+ * page in 64-xid format.
+ */
+static inline bool
+xids_fit_page(TransactionId xid_min, TransactionId xid_max,
+ MultiXactId multi_min, MultiXactId multi_max)
+{
+ bool xid_max_fits = false;
+ bool multi_max_fits = false;
+
+ if (xid_max == InvalidTransactionId)
+ xid_max_fits = true;
+
+ if (xid_max - xid_min <= MaxShortTransactionId - FirstNormalTransactionId)
+ xid_max_fits = true;
+
+ if (multi_max == InvalidMultiXactId)
+ multi_max_fits = true;
+
+ if (multi_max - multi_min <= MaxShortTransactionId - FirstMultiXactId)
+ multi_max_fits = true;
+
+ return xid_max_fits && multi_max_fits;
+}
+
+/*
+ * Set "base" for page in 64-bit XID format.
+ *
+ * This should not be called for double xmax pages. They do not have place for
+ * page special.
+ */
+static inline void
+heap_page_set_base(Page page,
+ TransactionId xid_min, TransactionId xid_max,
+ MultiXactId multi_min, MultiXactId multi_max,
+ TransactionId *xid_base, MultiXactId *multi_base,
+ bool is_toast)
+{
+ PageHeader hdr = (PageHeader) page;
+
+ if (xid_max != InvalidTransactionId)
+ *xid_base = xid_min - FirstNormalTransactionId;
+ else
+ *xid_base = InvalidTransactionId;
+
+ if (multi_max != InvalidMultiXactId)
+ *multi_base = multi_min - FirstMultiXactId;
+ else
+ *multi_base = InvalidMultiXactId;
+
+ if (is_toast)
+ {
+ ToastPageSpecial special;
+
+ hdr->pd_special = BLCKSZ - MAXALIGN(sizeof(ToastPageSpecialData));
+ special = ToastPageGetSpecial(page);
+ special->pd_xid_base = *xid_base;
+ }
+ else
+ {
+ HeapPageSpecial special;
+
+ hdr->pd_special = BLCKSZ - MAXALIGN(sizeof(HeapPageSpecialData));
+ special = HeapPageGetSpecial(page);
+ special->pd_xid_base = *xid_base;
+ special->pd_multi_base = *multi_base;
+ }
+}
+
+/*
+ * repack_heap_tuples
+ * Convert heap page format reusing space of dead tuples
+ */
+static void
+repack_heap_tuples(Relation rel, Page page, Buffer buf, BlockNumber blkno,
+ bool try_double_xmax)
+{
+ ItemIdCompactData items[MaxHeapTuplesPerPage];
+ ItemIdCompact itemPtr = items;
+ int nitems = 0,
+ maxoff = PageGetMaxOffsetNumber(page),
+ idx,
+ occupied_space = 0;
+ Offset upper;
+ bool double_xmax,
+ special_fits,
+ toast;
+ PageHeader hdr = (PageHeader) page,
+ new_hdr;
+ PGAlignedBlock zerobuf = {0};
+ Page new_page;
+ MultiXactId multi_base = rel->rd_rel->relminmxid,
+ multi_min = MaxMultiXactId,
+ multi_max = InvalidMultiXactId;
+ TransactionId xid_base = rel->rd_rel->relfrozenxid,
+ xid_min = MaxTransactionId,
+ xid_max = InvalidTransactionId;
+
+ toast = IsToastRelation(rel);
+
+ if (TransactionIdIsNormal(hdr->pd_prune_xid))
+ xid_min = xid_max = hdr->pd_prune_xid;
+
+ for (idx = 0; idx < maxoff; idx++)
+ {
+ HeapTupleData tuple;
+ ItemId lp;
+
+ lp = PageGetItemId(page, idx + 1);
+
+ /* Skip redirects and items without storage */
+ if (!ItemIdHasStorage(lp))
+ continue;
+
+ /* Build in-memory tuple representation */
+ tuple.t_tableOid = 1; /* doesn't matter in this case */
+ tuple.t_data = (HeapTupleHeader) PageGetItem(page, lp);
+ HeapTupleCopyHeaderXids(&tuple);
+ tuple.t_len = ItemIdGetLength(lp);
+ ItemPointerSet(&(tuple.t_self), blkno, ItemIdGetOffset(lp));
+
+ /*
+ * This is only needed to determine whether tuple is HEAPTUPLE_DEAD or
+ * HEAPTUPLE_RECENTLY_DEAD. And since this is the first time we read
+ * page after pg_upgrade, it cannot be HEAPTUPLE_RECENTLY_DEAD. See
+ * HeapTupleSatisfiesVacuum() for details
+ */
+ if (try_double_xmax &&
+ HeapTupleSatisfiesVacuum(&tuple,
+ (TransactionId) 1 << 32, buf) == HEAPTUPLE_DEAD)
+ {
+ ItemIdSetDead(lp);
+ }
+
+ if (ItemIdIsNormal(lp) && ItemIdHasStorage(lp))
+ {
+ itemPtr->offsetindex = idx;
+ itemPtr->itemoff = ItemIdGetOffset(lp);
+ if (unlikely(itemPtr->itemoff < hdr->pd_upper ||
+ itemPtr->itemoff >= hdr->pd_special))
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_DATA_CORRUPTED),
+ errmsg("corrupted item pointer: %u",
+ itemPtr->itemoff)));
+ }
+
+ itemPtr->alignedlen = MAXALIGN(ItemIdGetLength(lp));
+ occupied_space += itemPtr->alignedlen;
+ nitems++;
+ itemPtr++;
+ if (try_double_xmax)
+ {
+ HeapTupleSetXmin(&tuple, FrozenTransactionId);
+ HeapTupleHeaderStoreXminFrozen(tuple.t_data);
+ }
+
+ compute_xid_min_max(&tuple, multi_base,
+ &xid_min, &xid_max,
+ &multi_min, &multi_max);
+ }
+ }
+
+ new_page = (Page) zerobuf.data;
+ MemSet(new_page, 0, BLCKSZ);
+ /* Write new header */
+ new_hdr = (PageHeader) new_page;
+ *new_hdr = *hdr;
+ new_hdr->pd_lower = SizeOfPageHeaderData + maxoff * sizeof(ItemIdData);
+
+ if (toast)
+ special_fits = BLCKSZ - new_hdr->pd_lower - occupied_space >=
+ sizeof(ToastPageSpecialData);
+ else
+ special_fits = BLCKSZ - new_hdr->pd_lower - occupied_space >=
+ sizeof(HeapPageSpecialData);
+
+ double_xmax = !special_fits ||
+ !xids_fit_page(xid_min, xid_max, multi_min, multi_max);
+
+ if (!double_xmax)
+ {
+ Assert(xid_max == InvalidTransactionId || xid_max >= xid_min);
+ Assert(multi_max == InvalidMultiXactId || multi_max >= multi_min);
+
+ heap_page_set_base(new_page,
+ xid_min, xid_max,
+ multi_min, multi_max,
+ &xid_base, &multi_base,
+ toast);
+
+ HeapPageSetPruneXid(new_page, new_hdr->pd_prune_xid, toast);
+ }
+ else
+ {
+ /* No space for special area, switch to "double xmax" format */
+ elog(DEBUG2, "convert heap page %u of relation \"%s\" to double xmax format",
+ blkno, RelationGetRelationName(rel));
+
+ if (try_double_xmax)
+ {
+ xid_base = InvalidTransactionId;
+ multi_base = InvalidMultiXactId;
+ }
+ else
+ {
+ repack_heap_tuples(rel, page, buf, blkno, true);
+ return;
+ }
+ }
+
+ /* Copy ItemIds with an offset */
+ memcpy((char *) new_page + SizeOfPageHeaderData,
+ (char *) page + SizeOfPageHeaderData,
+ hdr->pd_lower - SizeOfPageHeaderData);
+
+ /* Move live tuples */
+ upper = new_hdr->pd_special;
+ for (idx = 0; idx < nitems; idx++)
+ {
+ HeapTupleHeader tuple;
+ ItemId lp;
+
+ itemPtr = &items[idx];
+ lp = PageGetItemId(new_page, itemPtr->offsetindex + 1);
+ upper -= itemPtr->alignedlen;
+ occupied_space -= itemPtr->alignedlen;
+
+ memcpy((char *) new_page + upper,
+ (char *) page + itemPtr->itemoff,
+ itemPtr->alignedlen);
+
+ tuple = (HeapTupleHeader) (((char *) new_page) + upper);
+
+ convert_heap_tuple_xids(tuple, xid_base, multi_base, double_xmax);
+
+ lp->lp_off = upper;
+ }
+
+ Assert(occupied_space == 0);
+
+ new_hdr->pd_upper = upper;
+ if (new_hdr->pd_lower > new_hdr->pd_upper)
+ elog(ERROR, "cannot convert block %u of relation \"%s\"",
+ blkno, RelationGetRelationName(rel));
+
+ memcpy(page, new_page, BLCKSZ);
+}
diff --git a/src/backend/storage/buffer/meson.build b/src/backend/storage/buffer/meson.build
index ea2f9c045a..e1ca4a23d4 100644
--- a/src/backend/storage/buffer/meson.build
+++ b/src/backend/storage/buffer/meson.build
@@ -5,5 +5,6 @@ backend_sources += files(
'buf_table.c',
'bufmgr.c',
'freelist.c',
+ 'heap_convert.c',
'localbuf.c',
)
diff --git a/src/backend/storage/ipc/procarray.c b/src/backend/storage/ipc/procarray.c
index 55d719816f..5967955150 100644
--- a/src/backend/storage/ipc/procarray.c
+++ b/src/backend/storage/ipc/procarray.c
@@ -66,7 +66,7 @@
#include "utils/rel.h"
#include "utils/snapmgr.h"
-#define UINT32_ACCESS_ONCE(var) ((uint32)(*((volatile uint32 *)&(var))))
+#define UINT64_ACCESS_ONCE(var) ((uint64)(*((volatile uint64 *)&(var))))
/* Our shared memory area */
typedef struct ProcArrayStruct
@@ -366,8 +366,6 @@ static void ProcArrayGroupClearXid(PGPROC *proc, TransactionId latestXid);
static void MaintainLatestCompletedXid(TransactionId latestXid);
static void MaintainLatestCompletedXidRecovery(TransactionId latestXid);
-static inline FullTransactionId FullXidRelativeTo(FullTransactionId rel,
- TransactionId xid);
static void GlobalVisUpdateApply(ComputeXidHorizonsResult *horizons);
/*
@@ -525,7 +523,8 @@ ProcArrayAdd(PGPROC *proc)
arrayP->pgprocnos[index] = proc->pgprocno;
proc->pgxactoff = index;
- ProcGlobal->xids[index] = proc->xid;
+ pg_atomic_write_u64(&ProcGlobal->xids[index],
+ pg_atomic_read_u64(&proc->xid));
ProcGlobal->subxidStates[index] = proc->subxidStatus;
ProcGlobal->statusFlags[index] = proc->statusFlags;
@@ -585,7 +584,7 @@ ProcArrayRemove(PGPROC *proc, TransactionId latestXid)
if (TransactionIdIsValid(latestXid))
{
- Assert(TransactionIdIsValid(ProcGlobal->xids[myoff]));
+ Assert(TransactionIdIsValid(pg_atomic_read_u64(&ProcGlobal->xids[myoff])));
/* Advance global latestCompletedXid while holding the lock */
MaintainLatestCompletedXid(latestXid);
@@ -593,17 +592,17 @@ ProcArrayRemove(PGPROC *proc, TransactionId latestXid)
/* Same with xactCompletionCount */
ShmemVariableCache->xactCompletionCount++;
- ProcGlobal->xids[myoff] = InvalidTransactionId;
+ pg_atomic_write_u64(&ProcGlobal->xids[myoff], InvalidTransactionId);
ProcGlobal->subxidStates[myoff].overflowed = false;
ProcGlobal->subxidStates[myoff].count = 0;
}
else
{
/* Shouldn't be trying to remove a live transaction here */
- Assert(!TransactionIdIsValid(ProcGlobal->xids[myoff]));
+ Assert(!TransactionIdIsValid(pg_atomic_read_u64(&(ProcGlobal->xids[myoff]))));
}
- Assert(!TransactionIdIsValid(ProcGlobal->xids[myoff]));
+ Assert(!TransactionIdIsValid(pg_atomic_read_u64(&(ProcGlobal->xids[myoff]))));
Assert(ProcGlobal->subxidStates[myoff].count == 0);
Assert(ProcGlobal->subxidStates[myoff].overflowed == false);
@@ -649,7 +648,6 @@ ProcArrayRemove(PGPROC *proc, TransactionId latestXid)
LWLockRelease(ProcArrayLock);
}
-
/*
* ProcArrayEndTransaction -- mark a transaction as no longer running
*
@@ -674,7 +672,7 @@ ProcArrayEndTransaction(PGPROC *proc, TransactionId latestXid)
* else is taking a snapshot. See discussion in
* src/backend/access/transam/README.
*/
- Assert(TransactionIdIsValid(proc->xid));
+ Assert(TransactionIdIsValid(pg_atomic_read_u64(&proc->xid)));
/*
* If we can immediately acquire ProcArrayLock, we clear our own XID
@@ -696,12 +694,12 @@ ProcArrayEndTransaction(PGPROC *proc, TransactionId latestXid)
* anyone else's calculation of a snapshot. We might change their
* estimate of global xmin, but that's OK.
*/
- Assert(!TransactionIdIsValid(proc->xid));
+ Assert(!TransactionIdIsValid(pg_atomic_read_u64(&proc->xid)));
Assert(proc->subxidStatus.count == 0);
Assert(!proc->subxidStatus.overflowed);
proc->lxid = InvalidLocalTransactionId;
- proc->xmin = InvalidTransactionId;
+ pg_atomic_write_u64(&proc->xmin, InvalidTransactionId);
/* be sure this is cleared in abort */
proc->delayChkptFlags = 0;
@@ -737,13 +735,14 @@ ProcArrayEndTransactionInternal(PGPROC *proc, TransactionId latestXid)
* processes' PGPROC entries.
*/
Assert(LWLockHeldByMeInMode(ProcArrayLock, LW_EXCLUSIVE));
- Assert(TransactionIdIsValid(ProcGlobal->xids[pgxactoff]));
- Assert(ProcGlobal->xids[pgxactoff] == proc->xid);
+ Assert(TransactionIdIsValid(pg_atomic_read_u64(&ProcGlobal->xids[pgxactoff])));
+ Assert(pg_atomic_read_u64(&ProcGlobal->xids[pgxactoff]) ==
+ pg_atomic_read_u64(&proc->xid));
- ProcGlobal->xids[pgxactoff] = InvalidTransactionId;
- proc->xid = InvalidTransactionId;
+ pg_atomic_write_u64(&ProcGlobal->xids[pgxactoff], InvalidTransactionId);
+ pg_atomic_write_u64(&proc->xid, InvalidTransactionId);
proc->lxid = InvalidLocalTransactionId;
- proc->xmin = InvalidTransactionId;
+ pg_atomic_write_u64(&proc->xmin, InvalidTransactionId);
/* be sure this is cleared in abort */
proc->delayChkptFlags = 0;
@@ -796,7 +795,7 @@ ProcArrayGroupClearXid(PGPROC *proc, TransactionId latestXid)
uint32 wakeidx;
/* We should definitely have an XID to clear. */
- Assert(TransactionIdIsValid(proc->xid));
+ Assert(TransactionIdIsValid(pg_atomic_read_u64(&proc->xid)));
/* Add ourselves to the list of processes needing a group XID clear. */
proc->procArrayGroupMember = true;
@@ -925,11 +924,11 @@ ProcArrayClearTransaction(PGPROC *proc)
pgxactoff = proc->pgxactoff;
- ProcGlobal->xids[pgxactoff] = InvalidTransactionId;
- proc->xid = InvalidTransactionId;
+ pg_atomic_write_u64(&ProcGlobal->xids[pgxactoff], InvalidTransactionId);
+ pg_atomic_write_u64(&proc->xid, InvalidTransactionId);
proc->lxid = InvalidLocalTransactionId;
- proc->xmin = InvalidTransactionId;
+ pg_atomic_write_u64(&proc->xmin, InvalidTransactionId);
proc->recoveryConflictPending = false;
Assert(!(proc->statusFlags & PROC_VACUUM_STATE_MASK));
@@ -973,8 +972,7 @@ MaintainLatestCompletedXid(TransactionId latestXid)
if (TransactionIdPrecedes(XidFromFullTransactionId(cur_latest), latestXid))
{
- ShmemVariableCache->latestCompletedXid =
- FullXidRelativeTo(cur_latest, latestXid);
+ ShmemVariableCache->latestCompletedXid = FullTransactionIdFromXid(latestXid);
}
Assert(IsBootstrapProcessingMode() ||
@@ -988,7 +986,6 @@ static void
MaintainLatestCompletedXidRecovery(TransactionId latestXid)
{
FullTransactionId cur_latest = ShmemVariableCache->latestCompletedXid;
- FullTransactionId rel;
Assert(AmStartupProcess() || !IsUnderPostmaster);
Assert(LWLockHeldByMe(ProcArrayLock));
@@ -998,14 +995,12 @@ MaintainLatestCompletedXidRecovery(TransactionId latestXid)
* latestCompletedXid to be initialized in recovery. But in recovery it's
* safe to access nextXid without a lock for the startup process.
*/
- rel = ShmemVariableCache->nextXid;
Assert(FullTransactionIdIsValid(ShmemVariableCache->nextXid));
if (!FullTransactionIdIsValid(cur_latest) ||
TransactionIdPrecedes(XidFromFullTransactionId(cur_latest), latestXid))
{
- ShmemVariableCache->latestCompletedXid =
- FullXidRelativeTo(rel, latestXid);
+ ShmemVariableCache->latestCompletedXid = FullTransactionIdFromXid(latestXid);
}
Assert(FullTransactionIdIsNormal(ShmemVariableCache->latestCompletedXid));
@@ -1383,7 +1378,7 @@ bool
TransactionIdIsInProgress(TransactionId xid)
{
static TransactionId *xids = NULL;
- static TransactionId *other_xids;
+ static pg_atomic_uint64 *other_xids;
XidCacheStatus *other_subxidstates;
int nxids = 0;
ProcArrayStruct *arrayP = procArray;
@@ -1479,7 +1474,7 @@ TransactionIdIsInProgress(TransactionId xid)
continue;
/* Fetch xid just once - see GetNewTransactionId */
- pxid = UINT32_ACCESS_ONCE(other_xids[pgxactoff]);
+ pxid = pg_atomic_read_u64(&(other_xids[pgxactoff]));
if (!TransactionIdIsValid(pxid))
continue;
@@ -1511,7 +1506,7 @@ TransactionIdIsInProgress(TransactionId xid)
for (j = pxids - 1; j >= 0; j--)
{
/* Fetch xid just once - see GetNewTransactionId */
- TransactionId cxid = UINT32_ACCESS_ONCE(proc->subxids.xids[j]);
+ TransactionId cxid = UINT64_ACCESS_ONCE(proc->subxids.xids[j]);
if (TransactionIdEquals(cxid, xid))
{
@@ -1596,7 +1591,7 @@ TransactionIdIsInProgress(TransactionId xid)
topxid = SubTransGetTopmostTransaction(xid);
Assert(TransactionIdIsValid(topxid));
if (!TransactionIdEquals(topxid, xid) &&
- pg_lfind32(topxid, xids, nxids))
+ pg_lfind64(topxid, xids, nxids))
return true;
cachedXidIsNotInProgress = xid;
@@ -1616,7 +1611,7 @@ TransactionIdIsActive(TransactionId xid)
{
bool result = false;
ProcArrayStruct *arrayP = procArray;
- TransactionId *other_xids = ProcGlobal->xids;
+ pg_atomic_uint64 *other_xids = ProcGlobal->xids;
int i;
/*
@@ -1635,7 +1630,7 @@ TransactionIdIsActive(TransactionId xid)
TransactionId pxid;
/* Fetch xid just once - see GetNewTransactionId */
- pxid = UINT32_ACCESS_ONCE(other_xids[i]);
+ pxid = pg_atomic_read_u64(&(other_xids[i]));
if (!TransactionIdIsValid(pxid))
continue;
@@ -1718,7 +1713,7 @@ ComputeXidHorizons(ComputeXidHorizonsResult *h)
ProcArrayStruct *arrayP = procArray;
TransactionId kaxmin;
bool in_recovery = RecoveryInProgress();
- TransactionId *other_xids = ProcGlobal->xids;
+ pg_atomic_uint64 *other_xids = ProcGlobal->xids;
/* inferred after ProcArrayLock is released */
h->catalog_oldest_nonremovable = InvalidTransactionId;
@@ -1734,7 +1729,8 @@ ComputeXidHorizons(ComputeXidHorizonsResult *h)
* additions.
*/
{
- TransactionId initial;
+ TransactionId initial,
+ xid;
initial = XidFromFullTransactionId(h->latest_completed);
Assert(TransactionIdIsValid(initial));
@@ -1756,8 +1752,9 @@ ComputeXidHorizons(ComputeXidHorizonsResult *h)
* definition, can't be any newer changes in the temp table than
* latestCompletedXid.
*/
- if (TransactionIdIsValid(MyProc->xid))
- h->temp_oldest_nonremovable = MyProc->xid;
+ xid = pg_atomic_read_u64(&MyProc->xid);
+ if (TransactionIdIsValid(xid))
+ h->temp_oldest_nonremovable = xid;
else
h->temp_oldest_nonremovable = initial;
}
@@ -1779,8 +1776,8 @@ ComputeXidHorizons(ComputeXidHorizonsResult *h)
TransactionId xmin;
/* Fetch xid just once - see GetNewTransactionId */
- xid = UINT32_ACCESS_ONCE(other_xids[index]);
- xmin = UINT32_ACCESS_ONCE(proc->xmin);
+ xid = pg_atomic_read_u64(&(other_xids[index]));
+ xmin = pg_atomic_read_u64(&proc->xmin);
/*
* Consider both the transaction's Xmin, and its Xid.
@@ -2106,8 +2103,8 @@ GetSnapshotDataReuse(Snapshot snapshot)
* requirement that concurrent GetSnapshotData() calls yield the same
* xmin.
*/
- if (!TransactionIdIsValid(MyProc->xmin))
- MyProc->xmin = TransactionXmin = snapshot->xmin;
+ if (!TransactionIdIsValid(pg_atomic_read_u64(&MyProc->xmin)))
+ pg_atomic_write_u64(&MyProc->xmin, TransactionXmin = snapshot->xmin);
RecentXmin = snapshot->xmin;
Assert(TransactionIdPrecedesOrEquals(TransactionXmin, RecentXmin));
@@ -2158,7 +2155,7 @@ Snapshot
GetSnapshotData(Snapshot snapshot)
{
ProcArrayStruct *arrayP = procArray;
- TransactionId *other_xids = ProcGlobal->xids;
+ pg_atomic_uint64 *other_xids = ProcGlobal->xids;
TransactionId xmin;
TransactionId xmax;
int count = 0;
@@ -2221,8 +2218,8 @@ GetSnapshotData(Snapshot snapshot)
latest_completed = ShmemVariableCache->latestCompletedXid;
mypgxactoff = MyProc->pgxactoff;
- myxid = other_xids[mypgxactoff];
- Assert(myxid == MyProc->xid);
+ myxid = pg_atomic_read_u64(&other_xids[mypgxactoff]);
+ Assert(myxid == pg_atomic_read_u64(&MyProc->xid));
oldestxid = ShmemVariableCache->oldestXid;
curXactCompletionCount = ShmemVariableCache->xactCompletionCount;
@@ -2256,7 +2253,7 @@ GetSnapshotData(Snapshot snapshot)
for (int pgxactoff = 0; pgxactoff < numProcs; pgxactoff++)
{
/* Fetch xid just once - see GetNewTransactionId */
- TransactionId xid = UINT32_ACCESS_ONCE(other_xids[pgxactoff]);
+ TransactionId xid = pg_atomic_read_u64(&(other_xids[pgxactoff]));
uint8 statusFlags;
Assert(allProcs[arrayP->pgprocnos[pgxactoff]].pgxactoff == pgxactoff);
@@ -2393,8 +2390,8 @@ GetSnapshotData(Snapshot snapshot)
replication_slot_xmin = procArray->replication_slot_xmin;
replication_slot_catalog_xmin = procArray->replication_slot_catalog_xmin;
- if (!TransactionIdIsValid(MyProc->xmin))
- MyProc->xmin = TransactionXmin = xmin;
+ if (!TransactionIdIsValid(pg_atomic_read_u64(&MyProc->xmin)))
+ pg_atomic_write_u64(&MyProc->xmin, TransactionXmin = xmin);
LWLockRelease(ProcArrayLock);
@@ -2406,12 +2403,7 @@ GetSnapshotData(Snapshot snapshot)
FullTransactionId def_vis_fxid_data;
FullTransactionId oldestfxid;
- /*
- * Converting oldestXid is only safe when xid horizon cannot advance,
- * i.e. holding locks. While we don't hold the lock anymore, all the
- * necessary data has been gathered with lock held.
- */
- oldestfxid = FullXidRelativeTo(latest_completed, oldestxid);
+ oldestfxid = FullTransactionIdFromXid(oldestxid);
/* Check whether there's a replication slot requiring an older xmin. */
def_vis_xid_data =
@@ -2430,8 +2422,8 @@ GetSnapshotData(Snapshot snapshot)
def_vis_xid =
TransactionIdOlder(replication_slot_catalog_xmin, def_vis_xid);
- def_vis_fxid = FullXidRelativeTo(latest_completed, def_vis_xid);
- def_vis_fxid_data = FullXidRelativeTo(latest_completed, def_vis_xid_data);
+ def_vis_fxid = FullTransactionIdFromXid(def_vis_xid);
+ def_vis_fxid_data = FullTransactionIdFromXid(def_vis_xid_data);
/*
* Check if we can increase upper bound. As a previous
@@ -2450,7 +2442,7 @@ GetSnapshotData(Snapshot snapshot)
/* See temp_oldest_nonremovable computation in ComputeXidHorizons() */
if (TransactionIdIsNormal(myxid))
GlobalVisTempRels.definitely_needed =
- FullXidRelativeTo(latest_completed, myxid);
+ FullTransactionIdFromXid(myxid);
else
{
GlobalVisTempRels.definitely_needed = latest_completed;
@@ -2557,7 +2549,7 @@ ProcArrayInstallImportedXmin(TransactionId xmin,
/*
* Likewise, let's just make real sure its xmin does cover us.
*/
- xid = UINT32_ACCESS_ONCE(proc->xmin);
+ xid = pg_atomic_read_u64(&proc->xmin);
if (!TransactionIdIsNormal(xid) ||
!TransactionIdPrecedesOrEquals(xid, xmin))
continue;
@@ -2568,7 +2560,7 @@ ProcArrayInstallImportedXmin(TransactionId xmin,
* GetSnapshotData first, we'll be overwriting a valid xmin here, so
* we don't check that.)
*/
- MyProc->xmin = TransactionXmin = xmin;
+ pg_atomic_write_u64(&MyProc->xmin, TransactionXmin = xmin);
result = true;
break;
@@ -2612,7 +2604,7 @@ ProcArrayInstallRestoredXmin(TransactionId xmin, PGPROC *proc)
* can't go backwards. Also, make sure it's running in the same database,
* so that the per-database xmin cannot go backwards.
*/
- xid = UINT32_ACCESS_ONCE(proc->xmin);
+ xid = pg_atomic_read_u64(&proc->xmin);
if (proc->databaseId == MyDatabaseId &&
TransactionIdIsNormal(xid) &&
TransactionIdPrecedesOrEquals(xid, xmin))
@@ -2621,7 +2613,7 @@ ProcArrayInstallRestoredXmin(TransactionId xmin, PGPROC *proc)
* Install xmin and propagate the statusFlags that affect how the
* value is interpreted by vacuum.
*/
- MyProc->xmin = TransactionXmin = xmin;
+ pg_atomic_write_u64(&MyProc->xmin, TransactionXmin = xmin);
MyProc->statusFlags = (MyProc->statusFlags & ~PROC_XMIN_FLAGS) |
(proc->statusFlags & PROC_XMIN_FLAGS);
ProcGlobal->statusFlags[MyProc->pgxactoff] = MyProc->statusFlags;
@@ -2672,7 +2664,7 @@ GetRunningTransactionData(void)
static RunningTransactionsData CurrentRunningXactsData;
ProcArrayStruct *arrayP = procArray;
- TransactionId *other_xids = ProcGlobal->xids;
+ pg_atomic_uint64 *other_xids = ProcGlobal->xids;
RunningTransactions CurrentRunningXacts = &CurrentRunningXactsData;
TransactionId latestCompletedXid;
TransactionId oldestRunningXid;
@@ -2731,7 +2723,7 @@ GetRunningTransactionData(void)
TransactionId xid;
/* Fetch xid just once - see GetNewTransactionId */
- xid = UINT32_ACCESS_ONCE(other_xids[index]);
+ xid = pg_atomic_read_u64(&(other_xids[index]));
/*
* We don't need to store transactions that don't have a TransactionId
@@ -2844,7 +2836,7 @@ TransactionId
GetOldestActiveTransactionId(void)
{
ProcArrayStruct *arrayP = procArray;
- TransactionId *other_xids = ProcGlobal->xids;
+ pg_atomic_uint64 *other_xids = ProcGlobal->xids;
TransactionId oldestRunningXid;
int index;
@@ -2870,7 +2862,7 @@ GetOldestActiveTransactionId(void)
TransactionId xid;
/* Fetch xid just once - see GetNewTransactionId */
- xid = UINT32_ACCESS_ONCE(other_xids[index]);
+ xid = pg_atomic_read_u64(&(other_xids[index]));
if (!TransactionIdIsNormal(xid))
continue;
@@ -2958,7 +2950,7 @@ GetOldestSafeDecodingTransactionId(bool catalogOnly)
*/
if (!recovery_in_progress)
{
- TransactionId *other_xids = ProcGlobal->xids;
+ pg_atomic_uint64 *other_xids = ProcGlobal->xids;
/*
* Spin over procArray collecting min(ProcGlobal->xids[i])
@@ -2968,7 +2960,7 @@ GetOldestSafeDecodingTransactionId(bool catalogOnly)
TransactionId xid;
/* Fetch xid just once - see GetNewTransactionId */
- xid = UINT32_ACCESS_ONCE(other_xids[index]);
+ xid = pg_atomic_read_u64(&(other_xids[index]));
if (!TransactionIdIsNormal(xid))
continue;
@@ -3163,7 +3155,7 @@ BackendXidGetPid(TransactionId xid)
{
int result = 0;
ProcArrayStruct *arrayP = procArray;
- TransactionId *other_xids = ProcGlobal->xids;
+ pg_atomic_uint64 *other_xids = ProcGlobal->xids;
int index;
if (xid == InvalidTransactionId) /* never match invalid xid */
@@ -3173,7 +3165,7 @@ BackendXidGetPid(TransactionId xid)
for (index = 0; index < arrayP->numProcs; index++)
{
- if (other_xids[index] == xid)
+ if (pg_atomic_read_u64(&other_xids[index]) == xid)
{
int pgprocno = arrayP->pgprocnos[index];
PGPROC *proc = &allProcs[pgprocno];
@@ -3257,7 +3249,7 @@ GetCurrentVirtualXIDs(TransactionId limitXmin, bool excludeXmin0,
if (allDbs || proc->databaseId == MyDatabaseId)
{
/* Fetch xmin just once - might change on us */
- TransactionId pxmin = UINT32_ACCESS_ONCE(proc->xmin);
+ TransactionId pxmin = pg_atomic_read_u64(&proc->xmin);
if (excludeXmin0 && !TransactionIdIsValid(pxmin))
continue;
@@ -3357,7 +3349,7 @@ GetConflictingVirtualXIDs(TransactionId limitXmin, Oid dbOid)
proc->databaseId == dbOid)
{
/* Fetch xmin just once - can't change on us, but good coding */
- TransactionId pxmin = UINT32_ACCESS_ONCE(proc->xmin);
+ TransactionId pxmin = pg_atomic_read_u64(&proc->xmin);
/*
* We ignore an invalid pxmin because this means that backend has
@@ -3484,7 +3476,7 @@ MinimumActiveBackends(int min)
continue; /* do not count deleted entries */
if (proc == MyProc)
continue; /* do not count myself */
- if (proc->xid == InvalidTransactionId)
+ if (pg_atomic_read_u64(&proc->xid) == InvalidTransactionId)
continue; /* do not count if no XID assigned */
if (proc->pid == 0)
continue; /* do not count prepared xacts */
@@ -4071,17 +4063,13 @@ static void
GlobalVisUpdateApply(ComputeXidHorizonsResult *horizons)
{
GlobalVisSharedRels.maybe_needed =
- FullXidRelativeTo(horizons->latest_completed,
- horizons->shared_oldest_nonremovable);
+ FullTransactionIdFromXid(horizons->shared_oldest_nonremovable);
GlobalVisCatalogRels.maybe_needed =
- FullXidRelativeTo(horizons->latest_completed,
- horizons->catalog_oldest_nonremovable);
+ FullTransactionIdFromXid(horizons->catalog_oldest_nonremovable);
GlobalVisDataRels.maybe_needed =
- FullXidRelativeTo(horizons->latest_completed,
- horizons->data_oldest_nonremovable);
+ FullTransactionIdFromXid(horizons->data_oldest_nonremovable);
GlobalVisTempRels.maybe_needed =
- FullXidRelativeTo(horizons->latest_completed,
- horizons->temp_oldest_nonremovable);
+ FullTransactionIdFromXid(horizons->temp_oldest_nonremovable);
/*
* In longer running transactions it's possible that transactions we
@@ -4170,15 +4158,7 @@ GlobalVisTestIsRemovableXid(GlobalVisState *state, TransactionId xid)
{
FullTransactionId fxid;
- /*
- * Convert 32 bit argument to FullTransactionId. We can do so safely
- * because we know the xid has to, at the very least, be between
- * [oldestXid, nextXid), i.e. within 2 billion of xid. To avoid taking a
- * lock to determine either, we can just compare with
- * state->definitely_needed, which was based on those value at the time
- * the current snapshot was built.
- */
- fxid = FullXidRelativeTo(state->definitely_needed, xid);
+ fxid = FullTransactionIdFromXid(xid);
return GlobalVisTestIsRemovableFullXid(state, fxid);
}
@@ -4241,32 +4221,6 @@ GlobalVisCheckRemovableXid(Relation rel, TransactionId xid)
return GlobalVisTestIsRemovableXid(state, xid);
}
-/*
- * Convert a 32 bit transaction id into 64 bit transaction id, by assuming it
- * is within MaxTransactionId / 2 of XidFromFullTransactionId(rel).
- *
- * Be very careful about when to use this function. It can only safely be used
- * when there is a guarantee that xid is within MaxTransactionId / 2 xids of
- * rel. That e.g. can be guaranteed if the caller assures a snapshot is
- * held by the backend and xid is from a table (where vacuum/freezing ensures
- * the xid has to be within that range), or if xid is from the procarray and
- * prevents xid wraparound that way.
- */
-static inline FullTransactionId
-FullXidRelativeTo(FullTransactionId rel, TransactionId xid)
-{
- TransactionId rel_xid = XidFromFullTransactionId(rel);
-
- Assert(TransactionIdIsValid(xid));
- Assert(TransactionIdIsValid(rel_xid));
-
- /* not guaranteed to find issues, but likely to catch mistakes */
- AssertTransactionIdInAllowableRange(xid);
-
- return FullTransactionIdFromU64(U64FromFullTransactionId(rel)
- + (int32) (xid - rel_xid));
-}
-
/* ----------------------------------------------
* KnownAssignedTransactionIds sub-module
diff --git a/src/backend/storage/ipc/sinvaladt.c b/src/backend/storage/ipc/sinvaladt.c
index 3d97c75bf1..4170653cc6 100644
--- a/src/backend/storage/ipc/sinvaladt.c
+++ b/src/backend/storage/ipc/sinvaladt.c
@@ -429,8 +429,8 @@ BackendIdGetTransactionIds(int backendID, TransactionId *xid,
if (proc != NULL)
{
- *xid = proc->xid;
- *xmin = proc->xmin;
+ *xid = pg_atomic_read_u64(&proc->xid);
+ *xmin = pg_atomic_read_u64(&proc->xmin);
*nsubxid = proc->subxidStatus.count;
*overflowed = proc->subxidStatus.overflowed;
}
diff --git a/src/backend/storage/ipc/standby.c b/src/backend/storage/ipc/standby.c
index 9352428e99..010ab63c2f 100644
--- a/src/backend/storage/ipc/standby.c
+++ b/src/backend/storage/ipc/standby.c
@@ -522,8 +522,8 @@ ResolveRecoveryConflictWithSnapshotFullXid(FullTransactionId snapshotConflictHor
FullTransactionId nextXid = ReadNextFullTransactionId();
uint64 diff;
- diff = U64FromFullTransactionId(nextXid) -
- U64FromFullTransactionId(snapshotConflictHorizon);
+ diff = XidFromFullTransactionId(nextXid) -
+ XidFromFullTransactionId(snapshotConflictHorizon);
if (diff < MaxTransactionId / 2)
{
TransactionId truncated;
diff --git a/src/backend/storage/lmgr/lmgr.c b/src/backend/storage/lmgr/lmgr.c
index b447ddf11b..27dc00ac6a 100644
--- a/src/backend/storage/lmgr/lmgr.c
+++ b/src/backend/storage/lmgr/lmgr.c
@@ -1240,10 +1240,16 @@ DescribeLockTag(StringInfo buf, const LOCKTAG *tag)
tag->locktag_field1);
break;
case LOCKTAG_TRANSACTION:
- appendStringInfo(buf,
- _("transaction %u"),
- tag->locktag_field1);
- break;
+ {
+ TransactionId xid;
+
+ xid = (TransactionId) tag->locktag_field2 << 32;
+ xid += tag->locktag_field1;
+
+ appendStringInfo(buf, _("transaction %llu"),
+ (unsigned long long) xid);
+ break;
+ }
case LOCKTAG_VIRTUALTRANSACTION:
appendStringInfo(buf,
_("virtual transaction %d/%u"),
diff --git a/src/backend/storage/lmgr/lock.c b/src/backend/storage/lmgr/lock.c
index b8c57b3e16..3e2a55e25e 100644
--- a/src/backend/storage/lmgr/lock.c
+++ b/src/backend/storage/lmgr/lock.c
@@ -3981,7 +3981,7 @@ GetRunningTransactionLocks(int *nlocks)
{
PGPROC *proc = proclock->tag.myProc;
LOCK *lock = proclock->tag.myLock;
- TransactionId xid = proc->xid;
+ TransactionId xid = pg_atomic_read_u64(&proc->xid);
/*
* Don't record locks for transactions if we know they have
@@ -4601,7 +4601,7 @@ VirtualXactLock(VirtualTransactionId vxid, bool wait)
* so we won't save an XID of a different VXID. It doesn't matter whether
* we save this before or after setting up the primary lock table entry.
*/
- xid = proc->xid;
+ xid = pg_atomic_read_u64(&proc->xid);
/* Done with proc->fpLockBits */
LWLockRelease(&proc->fpInfoLock);
diff --git a/src/backend/storage/lmgr/predicate.c b/src/backend/storage/lmgr/predicate.c
index 8bfff14a86..1fd326736a 100644
--- a/src/backend/storage/lmgr/predicate.c
+++ b/src/backend/storage/lmgr/predicate.c
@@ -334,9 +334,9 @@ static SlruCtlData SerialSlruCtlData;
#define SerialValue(slotno, xid) (*((SerCommitSeqNo *) \
(SerialSlruCtl->shared->page_buffer[slotno] + \
- ((((uint32) (xid)) % SERIAL_ENTRIESPERPAGE) * SERIAL_ENTRYSIZE))))
+ ((((uint64) (xid)) % SERIAL_ENTRIESPERPAGE) * SERIAL_ENTRYSIZE))))
-#define SerialPage(xid) (((uint32) (xid)) / SERIAL_ENTRIESPERPAGE)
+#define SerialPage(xid) ((int64) (((uint64) (xid)) / SERIAL_ENTRIESPERPAGE))
typedef struct SerialControlData
{
@@ -1042,31 +1042,6 @@ CheckPointPredicate(void)
/*----------
* The SLRU is no longer needed. Truncate to head before we set head
* invalid.
- *
- * XXX: It's possible that the SLRU is not needed again until XID
- * wrap-around has happened, so that the segment containing headPage
- * that we leave behind will appear to be new again. In that case it
- * won't be removed until XID horizon advances enough to make it
- * current again.
- *
- * XXX: This should happen in vac_truncate_clog(), not in checkpoints.
- * Consider this scenario, starting from a system with no in-progress
- * transactions and VACUUM FREEZE having maximized oldestXact:
- * - Start a SERIALIZABLE transaction.
- * - Start, finish, and summarize a SERIALIZABLE transaction, creating
- * one SLRU page.
- * - Consume XIDs to reach xidStopLimit.
- * - Finish all transactions. Due to the long-running SERIALIZABLE
- * transaction, earlier checkpoints did not touch headPage. The
- * next checkpoint will change it, but that checkpoint happens after
- * the end of the scenario.
- * - VACUUM to advance XID limits.
- * - Consume ~2M XIDs, crossing the former xidWrapLimit.
- * - Start, finish, and summarize a SERIALIZABLE transaction.
- * SerialAdd() declines to create the targetPage, because headPage
- * is not regarded as in the past relative to that targetPage. The
- * transaction instigating the summarize fails in
- * SimpleLruReadPage().
*/
truncateCutoffPage = serialControl->headPage;
serialControl->headPage = -1;
@@ -3935,7 +3910,7 @@ XidIsConcurrent(TransactionId xid)
if (TransactionIdFollowsOrEquals(xid, snap->xmax))
return true;
- return pg_lfind32(xid, snap->xip, snap->xcnt);
+ return pg_lfind64(xid, snap->xip, snap->xcnt);
}
bool
diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c
index b6451d9d08..cdfdc47ddb 100644
--- a/src/backend/storage/lmgr/proc.c
+++ b/src/backend/storage/lmgr/proc.c
@@ -203,7 +203,7 @@ InitProcGlobal(void)
* how hotly they are accessed.
*/
ProcGlobal->xids =
- (TransactionId *) ShmemAlloc(TotalProcs * sizeof(*ProcGlobal->xids));
+ (pg_atomic_uint64 *) ShmemAlloc(TotalProcs * sizeof(*ProcGlobal->xids));
MemSet(ProcGlobal->xids, 0, TotalProcs * sizeof(*ProcGlobal->xids));
ProcGlobal->subxidStates = (XidCacheStatus *) ShmemAlloc(TotalProcs * sizeof(*ProcGlobal->subxidStates));
MemSet(ProcGlobal->subxidStates, 0, TotalProcs * sizeof(*ProcGlobal->subxidStates));
@@ -216,6 +216,8 @@ InitProcGlobal(void)
/* Common initialization for all PGPROCs, regardless of type. */
+ pg_atomic_init_u64(&ProcGlobal->xids[i], 0);
+
/*
* Set up per-PGPROC semaphore, latch, and fpInfoLock. Prepared xact
* dummy PGPROCs don't need these though - they're never associated
@@ -377,8 +379,8 @@ InitProcess(void)
MyProc->lxid = InvalidLocalTransactionId;
MyProc->fpVXIDLock = false;
MyProc->fpLocalTransactionId = InvalidLocalTransactionId;
- MyProc->xid = InvalidTransactionId;
- MyProc->xmin = InvalidTransactionId;
+ pg_atomic_init_u64(&MyProc->xid, InvalidTransactionId);
+ pg_atomic_init_u64(&MyProc->xmin, InvalidTransactionId);
MyProc->pid = MyProcPid;
/* backendId, databaseId and roleId will be filled in later */
MyProc->backendId = InvalidBackendId;
@@ -574,8 +576,8 @@ InitAuxiliaryProcess(void)
MyProc->lxid = InvalidLocalTransactionId;
MyProc->fpVXIDLock = false;
MyProc->fpLocalTransactionId = InvalidLocalTransactionId;
- MyProc->xid = InvalidTransactionId;
- MyProc->xmin = InvalidTransactionId;
+ pg_atomic_init_u64(&MyProc->xid, InvalidTransactionId);
+ pg_atomic_init_u64(&MyProc->xmin, InvalidTransactionId);
MyProc->backendId = InvalidBackendId;
MyProc->databaseId = InvalidOid;
MyProc->roleId = InvalidOid;
diff --git a/src/backend/storage/page/bufpage.c b/src/backend/storage/page/bufpage.c
index 9a302ddc30..572c6d6b63 100644
--- a/src/backend/storage/page/bufpage.c
+++ b/src/backend/storage/page/bufpage.c
@@ -21,11 +21,31 @@
#include "storage/checksum.h"
#include "utils/memdebug.h"
#include "utils/memutils.h"
+#include "utils/snapmgr.h"
/* GUC variable */
bool ignore_checksum_failure = false;
+/*
+ * HeapPageSpecialData used when pd_special == BLCKSZ. This is special format
+ * used when page with 32-bit xids doesn't fit HeapPageSpecialData. Then
+ * all xmin's are frozen (can do this for all live tuples after pg_upgrade),
+ * while 64-bit xmax is stored in both t_heap.t_xmin and t_heap.t_xmax.
+ * This is so-called "double xmax" format.
+ */
+static HeapPageSpecialData heapDoubleXmaxSpecialData =
+{
+ .pd_xid_base = MaxTransactionId,
+ .pd_multi_base = MaxTransactionId
+};
+HeapPageSpecial heapDoubleXmaxSpecial = &heapDoubleXmaxSpecialData;
+
+static ToastPageSpecialData toastDoubleXmaxSpecialData =
+{
+ .pd_xid_base = MaxTransactionId
+};
+ToastPageSpecial toastDoubleXmaxSpecial = &toastDoubleXmaxSpecialData;
/* ----------------------------------------------------------------
* Page support functions
@@ -432,15 +452,144 @@ PageRestoreTempPage(Page tempPage, Page oldPage)
}
/*
- * Tuple defrag support for PageRepairFragmentation and PageIndexMultiDelete
+ * Get minimum and maximum values of xid and multixact on "double xmax" page.
*/
-typedef struct itemIdCompactData
+static void
+heap_page_double_xmax_get_min_max(Page page,
+ TransactionId *xid_min,
+ TransactionId *xid_max,
+ MultiXactId *multi_min,
+ MultiXactId *multi_max)
{
- uint16 offsetindex; /* linp array index */
- int16 itemoff; /* page offset of item data */
- uint16 alignedlen; /* MAXALIGN(item data len) */
-} itemIdCompactData;
-typedef itemIdCompactData *itemIdCompact;
+ bool xid_found = false,
+ multi_found = false;
+ OffsetNumber offnum,
+ maxoff;
+
+ maxoff = PageGetMaxOffsetNumber(page);
+
+ for (offnum = FirstOffsetNumber;
+ offnum <= maxoff;
+ offnum = OffsetNumberNext(offnum))
+ {
+ ItemId itemid;
+ HeapTupleHeader htup;
+ TransactionId xmax;
+
+ itemid = PageGetItemId(page, offnum);
+
+ if (!ItemIdIsNormal(itemid))
+ continue;
+
+ htup = (HeapTupleHeader) PageGetItem(page, itemid);
+
+ xmax = HeapTupleHeaderGetDoubleXmax(htup);
+
+ if (!TransactionIdIsNormal(xmax))
+ continue;
+
+ if (!(htup->t_infomask & HEAP_XMAX_IS_MULTI))
+ {
+ if (!xid_found)
+ {
+ *xid_min = *xid_max = xmax;
+ xid_found = true;
+ }
+ else
+ {
+ *xid_min = Min(*xid_min, xmax);
+ *xid_max = Max(*xid_max, xmax);
+ }
+ }
+ else
+ {
+ if (!multi_found)
+ {
+ *multi_min = *multi_max = xmax;
+ multi_found = true;
+ }
+ else
+ {
+ *multi_min = Min(*multi_min, xmax);
+ *multi_max = Max(*multi_max, xmax);
+ }
+ }
+ }
+}
+
+/*
+ * Add special area to heap page, so convert from "double xmax" to normal
+ * format.
+ */
+static void
+heap_page_add_special_area(ItemIdCompact itemidbase, int nitems, Page page,
+ TransactionId xid_base, MultiXactId multi_base,
+ bool is_toast)
+{
+ char newPage[BLCKSZ];
+ PageHeader phdr = (PageHeader) page;
+ PageHeader new_phdr = (PageHeader) newPage;
+ Offset upper;
+ int i;
+
+ memcpy(newPage, page, phdr->pd_lower);
+
+ /* Add special area */
+ if (is_toast)
+ {
+ ToastPageSpecial special;
+
+ new_phdr->pd_special = PageGetPageSize(newPage) - sizeof(ToastPageSpecialData);
+ special = (ToastPageSpecial) ((Pointer) (newPage) + new_phdr->pd_special);
+ special->pd_xid_base = xid_base;
+ }
+ else
+ {
+ HeapPageSpecial special;
+
+ new_phdr->pd_special = PageGetPageSize(newPage) - sizeof(HeapPageSpecialData);
+ special = (HeapPageSpecial) ((Pointer) (newPage) + new_phdr->pd_special);
+ special->pd_xid_base = xid_base;
+ special->pd_multi_base = multi_base;
+ }
+
+ /* sort itemIdSortData array into decreasing itemoff order */
+ qsort((char *) itemidbase, nitems, sizeof(ItemIdCompactData),
+ itemoffcompare);
+
+ upper = new_phdr->pd_special;
+ for (i = 0; i < nitems; i++)
+ {
+ ItemIdCompact itemidptr = &itemidbase[i];
+ ItemId lp;
+ HeapTupleHeader old_htup;
+ HeapTupleHeader new_htup;
+ TransactionId xmax;
+
+ lp = PageGetItemId(page, itemidptr->offsetindex + 1);
+ old_htup = (HeapTupleHeader) PageGetItem(page, lp);
+ upper -= itemidptr->alignedlen;
+ memcpy((Pointer) newPage + upper,
+ (Pointer) page + itemidptr->itemoff,
+ itemidptr->alignedlen);
+ lp = PageGetItemId(newPage, itemidptr->offsetindex + 1);
+ lp->lp_off = upper;
+ new_htup = (HeapTupleHeader) PageGetItem(newPage, lp);
+
+ /* Convert xmax value */
+ new_htup->t_choice.t_heap.t_xmin = FrozenTransactionId;
+ xmax = HeapTupleHeaderGetDoubleXmax(old_htup);
+ if (!(new_htup->t_infomask & HEAP_XMAX_IS_MULTI))
+ new_htup->t_choice.t_heap.t_xmax = NormalTransactionIdToShort(xid_base, xmax);
+ else
+ new_htup->t_choice.t_heap.t_xmax = NormalTransactionIdToShort(multi_base, xmax);
+ }
+
+ new_phdr->pd_upper = upper;
+
+ memcpy(page, newPage, PageGetPageSize(newPage));
+ elog(DEBUG2, "convert heap page from double xmax to normal format");
+}
/*
* After removing or marking some line pointers unused, move the tuples to
@@ -471,21 +620,47 @@ typedef itemIdCompactData *itemIdCompact;
* Callers must ensure that nitems is > 0
*/
static void
-compactify_tuples(itemIdCompact itemidbase, int nitems, Page page, bool presorted)
+compactify_tuples(ItemIdCompact itemidbase, int nitems, Page page,
+ bool presorted, bool addspecial, bool is_toast)
{
PageHeader phdr = (PageHeader) page;
Offset upper;
Offset copy_tail;
Offset copy_head;
- itemIdCompact itemidptr;
+ ItemIdCompact itemidptr;
int i;
/* Code within will not work correctly if nitems == 0 */
Assert(nitems > 0);
- if (presorted)
+ /* Add special area to the heap page if possible */
+ if (addspecial)
{
+ TransactionId xid_min = FirstNormalTransactionId,
+ xid_max = FirstNormalTransactionId;
+ MultiXactId multi_min = FirstNormalTransactionId,
+ multi_max = FirstNormalTransactionId;
+ Assert(phdr->pd_special == PageGetPageSize(page));
+
+ heap_page_double_xmax_get_min_max(page, &xid_min, &xid_max,
+ &multi_min, &multi_max);
+
+ if (xid_max - xid_min < (TransactionId) (MaxShortTransactionId - FirstNormalTransactionId) &&
+ multi_max - multi_min < (TransactionId) (MaxShortTransactionId - FirstNormalTransactionId))
+ {
+ Assert(xid_min >= FirstNormalTransactionId);
+ Assert(multi_min >= FirstNormalTransactionId);
+ heap_page_add_special_area(itemidbase, nitems, page,
+ xid_min - FirstNormalTransactionId,
+ multi_min - FirstNormalTransactionId,
+ is_toast);
+ return;
+ }
+ }
+
+ if (presorted)
+ {
#ifdef USE_ASSERT_CHECKING
{
/*
@@ -696,14 +871,14 @@ compactify_tuples(itemIdCompact itemidbase, int nitems, Page page, bool presorte
* the line pointer array following array truncation.
*/
void
-PageRepairFragmentation(Page page)
+PageRepairFragmentation(Page page, bool is_toast)
{
Offset pd_lower = ((PageHeader) page)->pd_lower;
Offset pd_upper = ((PageHeader) page)->pd_upper;
Offset pd_special = ((PageHeader) page)->pd_special;
Offset last_offset;
- itemIdCompactData itemidbase[MaxHeapTuplesPerPage];
- itemIdCompact itemidptr;
+ ItemIdCompactData itemidbase[MaxHeapTuplesPerPage];
+ ItemIdCompact itemidptr;
ItemId lp;
int nline,
nstorage,
@@ -777,11 +952,30 @@ PageRepairFragmentation(Page page)
nstorage = itemidptr - itemidbase;
if (nstorage == 0)
{
+ if (pd_special == PageGetPageSize(page))
+ {
+ if (is_toast)
+ {
+ pd_special = PageGetPageSize(page) - sizeof(ToastPageSpecialData);
+ ((PageHeader) page)->pd_special = pd_special;
+ ToastPageGetSpecial(page)->pd_xid_base = 0;
+ }
+ else
+ {
+ pd_special = PageGetPageSize(page) - sizeof(HeapPageSpecialData);
+ ((PageHeader) page)->pd_special = pd_special;
+ HeapPageGetSpecial(page)->pd_xid_base = 0;
+ HeapPageGetSpecial(page)->pd_multi_base = 0;
+ }
+ }
+
/* Page is completely empty, so just reset it quickly */
((PageHeader) page)->pd_upper = pd_special;
}
else
{
+ bool addspecial = false;
+
/* Need to compact the page the hard way */
if (totallen > (Size) (pd_special - pd_lower))
ereport(ERROR,
@@ -789,7 +983,25 @@ PageRepairFragmentation(Page page)
errmsg("corrupted item lengths: total %u, available space %u",
(unsigned int) totallen, pd_special - pd_lower)));
- compactify_tuples(itemidbase, nstorage, page, presorted);
+ /*
+ * Try to add special area to the heap page if it has enough of free
+ * space.
+ */
+ if (pd_special == PageGetPageSize(page))
+ {
+ Size special_size,
+ actual_size;
+
+ special_size = is_toast ? sizeof(ToastPageSpecialData) :
+ sizeof(HeapPageSpecialData);
+ actual_size = (Size) (pd_special - pd_lower) - totallen;
+
+ if (actual_size >= special_size)
+ addspecial = true;
+ }
+
+ compactify_tuples(itemidbase, nstorage, page, presorted, addspecial,
+ is_toast);
}
if (finalusedlp != nline)
@@ -992,6 +1204,9 @@ PageGetHeapFreeSpace(Page page)
{
Size space;
+ if (HeapPageIsDoubleXmax(page))
+ return 0;
+
space = PageGetFreeSpace(page);
if (space > 0)
{
@@ -1165,9 +1380,9 @@ PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems)
Offset pd_upper = phdr->pd_upper;
Offset pd_special = phdr->pd_special;
Offset last_offset;
- itemIdCompactData itemidbase[MaxIndexTuplesPerPage];
+ ItemIdCompactData itemidbase[MaxIndexTuplesPerPage];
ItemIdData newitemids[MaxIndexTuplesPerPage];
- itemIdCompact itemidptr;
+ ItemIdCompact itemidptr;
ItemId lp;
int nline,
nused;
@@ -1275,7 +1490,12 @@ PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems)
/* and compactify the tuple data */
if (nused > 0)
- compactify_tuples(itemidbase, nused, page, presorted);
+ {
+ bool is_toast;
+
+ is_toast = BLCKSZ - pd_special == sizeof(ToastPageSpecialData);
+ compactify_tuples(itemidbase, nused, page, presorted, false, is_toast);
+ }
else
phdr->pd_upper = pd_special;
}
diff --git a/src/backend/utils/adt/enum.c b/src/backend/utils/adt/enum.c
index fdfdf7d0d2..5886c60200 100644
--- a/src/backend/utils/adt/enum.c
+++ b/src/backend/utils/adt/enum.c
@@ -76,7 +76,7 @@ check_safe_enum_use(HeapTuple enumval_tup)
* Usually, a row would get hinted as committed when it's read or loaded
* into syscache; but just in case not, let's check the xmin directly.
*/
- xmin = HeapTupleHeaderGetXmin(enumval_tup->t_data);
+ xmin = HeapTupleGetXmin(enumval_tup);
if (!TransactionIdIsInProgress(xmin) &&
TransactionIdDidCommit(xmin))
return;
diff --git a/src/backend/utils/adt/jsonfuncs.c b/src/backend/utils/adt/jsonfuncs.c
index aa37c401e5..7b5e522465 100644
--- a/src/backend/utils/adt/jsonfuncs.c
+++ b/src/backend/utils/adt/jsonfuncs.c
@@ -3310,6 +3310,7 @@ populate_record(TupleDesc tupdesc,
tuple.t_len = HeapTupleHeaderGetDatumLength(defaultval);
ItemPointerSetInvalid(&(tuple.t_self));
tuple.t_tableOid = InvalidOid;
+ HeapTupleSetZeroXids(&tuple);
tuple.t_data = defaultval;
/* Break down the tuple into fields */
@@ -3756,6 +3757,7 @@ populate_recordset_record(PopulateRecordsetState *state, JsObject *obj)
tuple.t_len = HeapTupleHeaderGetDatumLength(tuphead);
ItemPointerSetInvalid(&(tuple.t_self));
tuple.t_tableOid = InvalidOid;
+ HeapTupleSetZeroXids(&tuple);
tuple.t_data = tuphead;
tuplestore_puttuple(state->tuple_store, &tuple);
diff --git a/src/backend/utils/adt/lockfuncs.c b/src/backend/utils/adt/lockfuncs.c
index 974aa4fb43..a1e9406009 100644
--- a/src/backend/utils/adt/lockfuncs.c
+++ b/src/backend/utils/adt/lockfuncs.c
@@ -79,7 +79,7 @@ VXIDGetDatum(BackendId bid, LocalTransactionId lxid)
* The representation is "<bid>/<lxid>", decimal and unsigned decimal
* respectively. Note that elog.c also knows how to format a vxid.
*/
- char vxidstr[32];
+ char vxidstr[64];
snprintf(vxidstr, sizeof(vxidstr), "%d/%llu", bid,
(unsigned long long) lxid);
@@ -293,7 +293,9 @@ pg_lock_status(PG_FUNCTION_ARGS)
break;
case LOCKTAG_TRANSACTION:
values[6] =
- TransactionIdGetDatum(instance->locktag.locktag_field1);
+ TransactionIdGetDatum(
+ (TransactionId) instance->locktag.locktag_field1 |
+ ((TransactionId) instance->locktag.locktag_field2 << 32));
nulls[1] = true;
nulls[2] = true;
nulls[3] = true;
@@ -305,7 +307,8 @@ pg_lock_status(PG_FUNCTION_ARGS)
break;
case LOCKTAG_VIRTUALTRANSACTION:
values[5] = VXIDGetDatum(instance->locktag.locktag_field1,
- instance->locktag.locktag_field2);
+ (TransactionId) instance->locktag.locktag_field2 |
+ ((TransactionId) instance->locktag.locktag_field3 << 32));
nulls[1] = true;
nulls[2] = true;
nulls[3] = true;
diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c
index 0cea320c00..77c54f5d9f 100644
--- a/src/backend/utils/adt/pgstatfuncs.c
+++ b/src/backend/utils/adt/pgstatfuncs.c
@@ -15,6 +15,7 @@
#include "postgres.h"
#include "access/htup_details.h"
+#include "access/xact.h"
#include "access/xlog.h"
#include "access/xlogprefetcher.h"
#include "catalog/catalog.h"
diff --git a/src/backend/utils/adt/rowtypes.c b/src/backend/utils/adt/rowtypes.c
index eb8fe95933..37abb9061f 100644
--- a/src/backend/utils/adt/rowtypes.c
+++ b/src/backend/utils/adt/rowtypes.c
@@ -354,6 +354,7 @@ record_out(PG_FUNCTION_ARGS)
tuple.t_len = HeapTupleHeaderGetDatumLength(rec);
ItemPointerSetInvalid(&(tuple.t_self));
tuple.t_tableOid = InvalidOid;
+ HeapTupleSetZeroXids(&tuple);
tuple.t_data = rec;
/*
@@ -712,6 +713,7 @@ record_send(PG_FUNCTION_ARGS)
tuple.t_len = HeapTupleHeaderGetDatumLength(rec);
ItemPointerSetInvalid(&(tuple.t_self));
tuple.t_tableOid = InvalidOid;
+ HeapTupleSetZeroXids(&tuple);
tuple.t_data = rec;
/*
@@ -862,10 +864,12 @@ record_cmp(FunctionCallInfo fcinfo)
tuple1.t_len = HeapTupleHeaderGetDatumLength(record1);
ItemPointerSetInvalid(&(tuple1.t_self));
tuple1.t_tableOid = InvalidOid;
+ HeapTupleSetZeroXids(&tuple1);
tuple1.t_data = record1;
tuple2.t_len = HeapTupleHeaderGetDatumLength(record2);
ItemPointerSetInvalid(&(tuple2.t_self));
tuple2.t_tableOid = InvalidOid;
+ HeapTupleSetZeroXids(&tuple2);
tuple2.t_data = record2;
/*
@@ -1107,10 +1111,12 @@ record_eq(PG_FUNCTION_ARGS)
ItemPointerSetInvalid(&(tuple1.t_self));
tuple1.t_tableOid = InvalidOid;
tuple1.t_data = record1;
+ HeapTupleSetZeroXids(&tuple1);
tuple2.t_len = HeapTupleHeaderGetDatumLength(record2);
ItemPointerSetInvalid(&(tuple2.t_self));
tuple2.t_tableOid = InvalidOid;
tuple2.t_data = record2;
+ HeapTupleSetZeroXids(&tuple2);
/*
* We arrange to look up the needed comparison info just once per series
@@ -1369,10 +1375,12 @@ record_image_cmp(FunctionCallInfo fcinfo)
ItemPointerSetInvalid(&(tuple1.t_self));
tuple1.t_tableOid = InvalidOid;
tuple1.t_data = record1;
+ HeapTupleSetZeroXids(&tuple1);
tuple2.t_len = HeapTupleHeaderGetDatumLength(record2);
ItemPointerSetInvalid(&(tuple2.t_self));
tuple2.t_tableOid = InvalidOid;
tuple2.t_data = record2;
+ HeapTupleSetZeroXids(&tuple2);
/*
* We arrange to look up the needed comparison info just once per series
@@ -1615,10 +1623,12 @@ record_image_eq(PG_FUNCTION_ARGS)
ItemPointerSetInvalid(&(tuple1.t_self));
tuple1.t_tableOid = InvalidOid;
tuple1.t_data = record1;
+ HeapTupleSetZeroXids(&tuple1);
tuple2.t_len = HeapTupleHeaderGetDatumLength(record2);
ItemPointerSetInvalid(&(tuple2.t_self));
tuple2.t_tableOid = InvalidOid;
tuple2.t_data = record2;
+ HeapTupleSetZeroXids(&tuple2);
/*
* We arrange to look up the needed comparison info just once per series
@@ -1818,6 +1828,7 @@ hash_record(PG_FUNCTION_ARGS)
ItemPointerSetInvalid(&(tuple.t_self));
tuple.t_tableOid = InvalidOid;
tuple.t_data = record;
+ HeapTupleSetZeroXids(&tuple);
/*
* We arrange to look up the needed hashing info just once per series of
@@ -1939,6 +1950,7 @@ hash_record_extended(PG_FUNCTION_ARGS)
ItemPointerSetInvalid(&(tuple.t_self));
tuple.t_tableOid = InvalidOid;
tuple.t_data = record;
+ HeapTupleSetZeroXids(&tuple);
/*
* We arrange to look up the needed hashing info just once per series of
diff --git a/src/backend/utils/adt/xid.c b/src/backend/utils/adt/xid.c
index 8ac1679c38..8e0830f4cc 100644
--- a/src/backend/utils/adt/xid.c
+++ b/src/backend/utils/adt/xid.c
@@ -33,7 +33,7 @@ xidin(PG_FUNCTION_ARGS)
char *str = PG_GETARG_CSTRING(0);
TransactionId result;
- result = uint32in_subr(str, NULL, "xid", fcinfo->context);
+ result = uint64in_subr(str, NULL, "xid", fcinfo->context);
PG_RETURN_TRANSACTIONID(result);
}
@@ -41,9 +41,9 @@ Datum
xidout(PG_FUNCTION_ARGS)
{
TransactionId transactionId = PG_GETARG_TRANSACTIONID(0);
- char *result = (char *) palloc(16);
+ char *result = (char *) palloc(32);
- snprintf(result, 16, "%lu", (unsigned long) transactionId);
+ snprintf(result, 32, "%llu", (unsigned long long) transactionId);
PG_RETURN_CSTRING(result);
}
@@ -54,8 +54,13 @@ Datum
xidrecv(PG_FUNCTION_ARGS)
{
StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
+ uint32 lo,
+ hi;
+
+ lo = (uint32) pq_getmsgint(buf, sizeof(TransactionId));
+ hi = (uint32) pq_getmsgint(buf, sizeof(TransactionId));
- PG_RETURN_TRANSACTIONID((TransactionId) pq_getmsgint(buf, sizeof(TransactionId)));
+ PG_RETURN_TRANSACTIONID((uint64) lo + ((uint64) hi << 32));
}
/*
@@ -66,9 +71,15 @@ xidsend(PG_FUNCTION_ARGS)
{
TransactionId arg1 = PG_GETARG_TRANSACTIONID(0);
StringInfoData buf;
+ uint32 lo,
+ hi;
+
+ lo = (uint32) (arg1 & 0xFFFFFFFF);
+ hi = (uint32) (arg1 >> 32);
pq_begintypsend(&buf);
- pq_sendint32(&buf, arg1);
+ pq_sendint(&buf, lo, sizeof(lo));
+ pq_sendint(&buf, hi, sizeof(hi));
PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
}
@@ -107,9 +118,9 @@ xid_age(PG_FUNCTION_ARGS)
/* Permanent XIDs are always infinitely old */
if (!TransactionIdIsNormal(xid))
- PG_RETURN_INT32(INT_MAX);
+ PG_RETURN_INT64(PG_INT8_MAX);
- PG_RETURN_INT32((int32) (now - xid));
+ PG_RETURN_INT64((int64) (now - xid));
}
/*
@@ -122,9 +133,9 @@ mxid_age(PG_FUNCTION_ARGS)
MultiXactId now = ReadNextMultiXactId();
if (!MultiXactIdIsValid(xid))
- PG_RETURN_INT32(INT_MAX);
+ PG_RETURN_INT64(PG_INT8_MAX);
- PG_RETURN_INT32((int32) (now - xid));
+ PG_RETURN_INT64((int64) (now - xid));
}
/*
@@ -188,7 +199,7 @@ xid8in(PG_FUNCTION_ARGS)
uint64 result;
result = uint64in_subr(str, NULL, "xid8", fcinfo->context);
- PG_RETURN_FULLTRANSACTIONID(FullTransactionIdFromU64(result));
+ PG_RETURN_FULLTRANSACTIONID(FullTransactionIdFromXid(result));
}
Datum
@@ -197,7 +208,7 @@ xid8out(PG_FUNCTION_ARGS)
FullTransactionId fxid = PG_GETARG_FULLTRANSACTIONID(0);
char *result = (char *) palloc(21);
- snprintf(result, 21, UINT64_FORMAT, U64FromFullTransactionId(fxid));
+ snprintf(result, 21, UINT64_FORMAT, XidFromFullTransactionId(fxid));
PG_RETURN_CSTRING(result);
}
@@ -208,7 +219,7 @@ xid8recv(PG_FUNCTION_ARGS)
uint64 value;
value = (uint64) pq_getmsgint64(buf);
- PG_RETURN_FULLTRANSACTIONID(FullTransactionIdFromU64(value));
+ PG_RETURN_FULLTRANSACTIONID(FullTransactionIdFromXid(value));
}
Datum
@@ -218,7 +229,7 @@ xid8send(PG_FUNCTION_ARGS)
StringInfoData buf;
pq_begintypsend(&buf);
- pq_sendint64(&buf, (uint64) U64FromFullTransactionId(arg1));
+ pq_sendint64(&buf, (uint64) XidFromFullTransactionId(arg1));
PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
}
diff --git a/src/backend/utils/adt/xid8funcs.c b/src/backend/utils/adt/xid8funcs.c
index 06ae940df6..becd220981 100644
--- a/src/backend/utils/adt/xid8funcs.c
+++ b/src/backend/utils/adt/xid8funcs.c
@@ -88,8 +88,7 @@ StaticAssertDecl(MAX_BACKENDS * 2 <= PG_SNAPSHOT_MAX_NXIP,
* It is an ERROR if the xid is in the future. Otherwise, returns true if
* the transaction is still new enough that we can determine whether it
* committed and false otherwise. If *extracted_xid is not NULL, it is set
- * to the low 32 bits of the transaction ID (i.e. the actual XID, without the
- * epoch).
+ * to the actual transaction ID.
*
* The caller must hold XactTruncationLock since it's dealing with arbitrary
* XIDs, and must continue to hold it until it's done with any clog lookups
@@ -98,15 +97,10 @@ StaticAssertDecl(MAX_BACKENDS * 2 <= PG_SNAPSHOT_MAX_NXIP,
static bool
TransactionIdInRecentPast(FullTransactionId fxid, TransactionId *extracted_xid)
{
- uint32 xid_epoch = EpochFromFullTransactionId(fxid);
TransactionId xid = XidFromFullTransactionId(fxid);
- uint32 now_epoch;
- TransactionId now_epoch_next_xid;
FullTransactionId now_fullxid;
now_fullxid = ReadNextFullTransactionId();
- now_epoch_next_xid = XidFromFullTransactionId(now_fullxid);
- now_epoch = EpochFromFullTransactionId(now_fullxid);
if (extracted_xid != NULL)
*extracted_xid = xid;
@@ -123,7 +117,7 @@ TransactionIdInRecentPast(FullTransactionId fxid, TransactionId *extracted_xid)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("transaction ID %llu is in the future",
- (unsigned long long) U64FromFullTransactionId(fxid))));
+ (unsigned long long) XidFromFullTransactionId(fxid))));
/*
* ShmemVariableCache->oldestClogXid is protected by XactTruncationLock,
@@ -135,48 +129,15 @@ TransactionIdInRecentPast(FullTransactionId fxid, TransactionId *extracted_xid)
Assert(LWLockHeldByMe(XactTruncationLock));
/*
- * If the transaction ID has wrapped around, it's definitely too old to
- * determine the commit status. Otherwise, we can compare it to
- * ShmemVariableCache->oldestClogXid to determine whether the relevant
- * CLOG entry is guaranteed to still exist.
+ * We compare xid to ShmemVariableCache->oldestClogXid to determine
+ * whether the relevant CLOG entry is guaranteed to still exist.
*/
- if (xid_epoch + 1 < now_epoch
- || (xid_epoch + 1 == now_epoch && xid < now_epoch_next_xid)
- || TransactionIdPrecedes(xid, ShmemVariableCache->oldestClogXid))
+ if (TransactionIdPrecedes(xid, ShmemVariableCache->oldestClogXid))
return false;
return true;
}
-/*
- * Convert a TransactionId obtained from a snapshot held by the caller to a
- * FullTransactionId. Use next_fxid as a reference FullTransactionId, so that
- * we can compute the high order bits. It must have been obtained by the
- * caller with ReadNextFullTransactionId() after the snapshot was created.
- */
-static FullTransactionId
-widen_snapshot_xid(TransactionId xid, FullTransactionId next_fxid)
-{
- TransactionId next_xid = XidFromFullTransactionId(next_fxid);
- uint32 epoch = EpochFromFullTransactionId(next_fxid);
-
- /* Special transaction ID. */
- if (!TransactionIdIsNormal(xid))
- return FullTransactionIdFromEpochAndXid(0, xid);
-
- /*
- * The 64 bit result must be <= next_fxid, since next_fxid hadn't been
- * issued yet when the snapshot was created. Every TransactionId in the
- * snapshot must therefore be from the same epoch as next_fxid, or the
- * epoch before. We know this because next_fxid is never allow to get
- * more than one epoch ahead of the TransactionIds in any snapshot.
- */
- if (xid > next_xid)
- epoch--;
-
- return FullTransactionIdFromEpochAndXid(epoch, xid);
-}
-
/*
* txid comparator for qsort/bsearch
*/
@@ -303,12 +264,12 @@ parse_snapshot(const char *str, Node *escontext)
char *endp;
StringInfo buf;
- xmin = FullTransactionIdFromU64(strtou64(str, &endp, 10));
+ xmin = FullTransactionIdFromXid(strtou64(str, &endp, 10));
if (*endp != ':')
goto bad_format;
str = endp + 1;
- xmax = FullTransactionIdFromU64(strtou64(str, &endp, 10));
+ xmax = FullTransactionIdFromXid(strtou64(str, &endp, 10));
if (*endp != ':')
goto bad_format;
str = endp + 1;
@@ -326,7 +287,7 @@ parse_snapshot(const char *str, Node *escontext)
while (*str != '\0')
{
/* read next value */
- val = FullTransactionIdFromU64(strtou64(str, &endp, 10));
+ val = FullTransactionIdFromXid(strtou64(str, &endp, 10));
str = endp;
/* require the input to be in order */
@@ -404,7 +365,6 @@ pg_current_snapshot(PG_FUNCTION_ARGS)
uint32 nxip,
i;
Snapshot cur;
- FullTransactionId next_fxid = ReadNextFullTransactionId();
cur = GetActiveSnapshot();
if (cur == NULL)
@@ -415,11 +375,11 @@ pg_current_snapshot(PG_FUNCTION_ARGS)
snap = palloc(PG_SNAPSHOT_SIZE(nxip));
/* fill */
- snap->xmin = widen_snapshot_xid(cur->xmin, next_fxid);
- snap->xmax = widen_snapshot_xid(cur->xmax, next_fxid);
+ snap->xmin = FullTransactionIdFromXid(cur->xmin);
+ snap->xmax = FullTransactionIdFromXid(cur->xmax);
snap->nxip = nxip;
for (i = 0; i < nxip; i++)
- snap->xip[i] = widen_snapshot_xid(cur->xip[i], next_fxid);
+ snap->xip[i] = FullTransactionIdFromXid(cur->xip[i]);
/*
* We want them guaranteed to be in ascending order. This also removes
@@ -467,16 +427,16 @@ pg_snapshot_out(PG_FUNCTION_ARGS)
initStringInfo(&str);
appendStringInfo(&str, UINT64_FORMAT ":",
- U64FromFullTransactionId(snap->xmin));
+ XidFromFullTransactionId(snap->xmin));
appendStringInfo(&str, UINT64_FORMAT ":",
- U64FromFullTransactionId(snap->xmax));
+ XidFromFullTransactionId(snap->xmax));
for (i = 0; i < snap->nxip; i++)
{
if (i > 0)
appendStringInfoChar(&str, ',');
appendStringInfo(&str, UINT64_FORMAT,
- U64FromFullTransactionId(snap->xip[i]));
+ XidFromFullTransactionId(snap->xip[i]));
}
PG_RETURN_CSTRING(str.data);
@@ -505,8 +465,8 @@ pg_snapshot_recv(PG_FUNCTION_ARGS)
if (nxip < 0 || nxip > PG_SNAPSHOT_MAX_NXIP)
goto bad_format;
- xmin = FullTransactionIdFromU64((uint64) pq_getmsgint64(buf));
- xmax = FullTransactionIdFromU64((uint64) pq_getmsgint64(buf));
+ xmin = FullTransactionIdFromXid((uint64) pq_getmsgint64(buf));
+ xmax = FullTransactionIdFromXid((uint64) pq_getmsgint64(buf));
if (!FullTransactionIdIsValid(xmin) ||
!FullTransactionIdIsValid(xmax) ||
FullTransactionIdPrecedes(xmax, xmin))
@@ -519,7 +479,7 @@ pg_snapshot_recv(PG_FUNCTION_ARGS)
for (i = 0; i < nxip; i++)
{
FullTransactionId cur =
- FullTransactionIdFromU64((uint64) pq_getmsgint64(buf));
+ FullTransactionIdFromXid((uint64) pq_getmsgint64(buf));
if (FullTransactionIdPrecedes(cur, last) ||
FullTransactionIdPrecedes(cur, xmin) ||
@@ -564,10 +524,10 @@ pg_snapshot_send(PG_FUNCTION_ARGS)
pq_begintypsend(&buf);
pq_sendint32(&buf, snap->nxip);
- pq_sendint64(&buf, (int64) U64FromFullTransactionId(snap->xmin));
- pq_sendint64(&buf, (int64) U64FromFullTransactionId(snap->xmax));
+ pq_sendint64(&buf, (int64) XidFromFullTransactionId(snap->xmin));
+ pq_sendint64(&buf, (int64) XidFromFullTransactionId(snap->xmax));
for (i = 0; i < snap->nxip; i++)
- pq_sendint64(&buf, (int64) U64FromFullTransactionId(snap->xip[i]));
+ pq_sendint64(&buf, (int64) XidFromFullTransactionId(snap->xip[i]));
PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
}
@@ -655,8 +615,7 @@ pg_snapshot_xip(PG_FUNCTION_ARGS)
* Report the status of a recent transaction ID, or null for wrapped,
* truncated away or otherwise too old XIDs.
*
- * The passed epoch-qualified xid is treated as a normal xid, not a
- * multixact id.
+ * The passed xid is treated as a normal xid, not a multixact id.
*
* If it points to a committed subxact the result is the subxact status even
* though the parent xact may still be in progress or may have aborted.
diff --git a/src/backend/utils/cache/catcache.c b/src/backend/utils/cache/catcache.c
index 2e2e4d9f1f..674e9ecd8e 100644
--- a/src/backend/utils/cache/catcache.c
+++ b/src/backend/utils/cache/catcache.c
@@ -1908,6 +1908,7 @@ CatalogCacheCreateEntry(CatCache *cache, HeapTuple ntp, Datum *arguments,
memcpy((char *) ct->tuple.t_data,
(const char *) dtp->t_data,
dtp->t_len);
+ HeapTupleCopyXids(&ct->tuple, dtp);
MemoryContextSwitchTo(oldcxt);
if (dtp != ntp)
diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c
index b3faccbefe..c4c1b0dbbd 100644
--- a/src/backend/utils/cache/relcache.c
+++ b/src/backend/utils/cache/relcache.c
@@ -2341,8 +2341,7 @@ RelationReloadIndexInfo(Relation relation)
relation->rd_index->indisreplident = index->indisreplident;
/* Copy xmin too, as that is needed to make sense of indcheckxmin */
- HeapTupleHeaderSetXmin(relation->rd_indextuple->t_data,
- HeapTupleHeaderGetXmin(tuple->t_data));
+ HeapTupleSetXmin(relation->rd_indextuple, HeapTupleGetXmin(tuple));
ReleaseSysCache(tuple);
}
diff --git a/src/backend/utils/fmgr/fmgr.c b/src/backend/utils/fmgr/fmgr.c
index 9dfdf890c5..c3122b317c 100644
--- a/src/backend/utils/fmgr/fmgr.c
+++ b/src/backend/utils/fmgr/fmgr.c
@@ -526,7 +526,7 @@ lookup_C_func(HeapTuple procedureTuple)
NULL);
if (entry == NULL)
return NULL; /* no such entry */
- if (entry->fn_xmin == HeapTupleHeaderGetRawXmin(procedureTuple->t_data) &&
+ if (entry->fn_xmin == HeapTupleGetRawXmin(procedureTuple) &&
ItemPointerEquals(&entry->fn_tid, &procedureTuple->t_self))
return entry; /* OK */
return NULL; /* entry is out of date */
@@ -562,7 +562,7 @@ record_C_func(HeapTuple procedureTuple,
HASH_ENTER,
&found);
/* OID is already filled in */
- entry->fn_xmin = HeapTupleHeaderGetRawXmin(procedureTuple->t_data);
+ entry->fn_xmin = HeapTupleGetRawXmin(procedureTuple);
entry->fn_tid = procedureTuple->t_self;
entry->user_fn = user_fn;
entry->inforec = inforec;
diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c
index fd0bc11a00..0b4d2e875d 100644
--- a/src/backend/utils/misc/guc_tables.c
+++ b/src/backend/utils/misc/guc_tables.c
@@ -2556,65 +2556,6 @@ struct config_int ConfigureNamesInt[] =
NULL, NULL, NULL
},
- {
- {"vacuum_freeze_min_age", PGC_USERSET, CLIENT_CONN_STATEMENT,
- gettext_noop("Minimum age at which VACUUM should freeze a table row."),
- NULL
- },
- &vacuum_freeze_min_age,
- 50000000, 0, 1000000000,
- NULL, NULL, NULL
- },
-
- {
- {"vacuum_freeze_table_age", PGC_USERSET, CLIENT_CONN_STATEMENT,
- gettext_noop("Age at which VACUUM should scan whole table to freeze tuples."),
- NULL
- },
- &vacuum_freeze_table_age,
- 150000000, 0, 2000000000,
- NULL, NULL, NULL
- },
-
- {
- {"vacuum_multixact_freeze_min_age", PGC_USERSET, CLIENT_CONN_STATEMENT,
- gettext_noop("Minimum age at which VACUUM should freeze a MultiXactId in a table row."),
- NULL
- },
- &vacuum_multixact_freeze_min_age,
- 5000000, 0, 1000000000,
- NULL, NULL, NULL
- },
-
- {
- {"vacuum_multixact_freeze_table_age", PGC_USERSET, CLIENT_CONN_STATEMENT,
- gettext_noop("Multixact age at which VACUUM should scan whole table to freeze tuples."),
- NULL
- },
- &vacuum_multixact_freeze_table_age,
- 150000000, 0, 2000000000,
- NULL, NULL, NULL
- },
-
- {
- {"vacuum_failsafe_age", PGC_USERSET, CLIENT_CONN_STATEMENT,
- gettext_noop("Age at which VACUUM should trigger failsafe to avoid a wraparound outage."),
- NULL
- },
- &vacuum_failsafe_age,
- 1600000000, 0, 2100000000,
- NULL, NULL, NULL
- },
- {
- {"vacuum_multixact_failsafe_age", PGC_USERSET, CLIENT_CONN_STATEMENT,
- gettext_noop("Multixact age at which VACUUM should trigger failsafe to avoid a wraparound outage."),
- NULL
- },
- &vacuum_multixact_failsafe_age,
- 1600000000, 0, 2100000000,
- NULL, NULL, NULL
- },
-
/*
* See also CheckRequiredParameterValues() if this parameter changes
*/
@@ -3239,28 +3180,6 @@ struct config_int ConfigureNamesInt[] =
50, 0, INT_MAX,
NULL, NULL, NULL
},
- {
- /* see varsup.c for why this is PGC_POSTMASTER not PGC_SIGHUP */
- {"autovacuum_freeze_max_age", PGC_POSTMASTER, AUTOVACUUM,
- gettext_noop("Age at which to autovacuum a table to prevent transaction ID wraparound."),
- NULL
- },
- &autovacuum_freeze_max_age,
-
- /* see vacuum_failsafe_age if you change the upper-limit value. */
- 200000000, 100000, 2000000000,
- NULL, NULL, NULL
- },
- {
- /* see multixact.c for why this is PGC_POSTMASTER not PGC_SIGHUP */
- {"autovacuum_multixact_freeze_max_age", PGC_POSTMASTER, AUTOVACUUM,
- gettext_noop("Multixact age at which to autovacuum a table to prevent multixact wraparound."),
- NULL
- },
- &autovacuum_multixact_freeze_max_age,
- 400000000, 10000, 2000000000,
- NULL, NULL, NULL
- },
{
/* see max_connections */
{"autovacuum_max_workers", PGC_POSTMASTER, AUTOVACUUM,
@@ -3528,7 +3447,6 @@ struct config_int ConfigureNamesInt[] =
SCRAM_SHA_256_DEFAULT_ITERATIONS, 1, INT_MAX,
NULL, NULL, NULL
},
-
/* End-of-list marker */
{
{NULL, 0, 0, NULL, NULL}, NULL, 0, 0, 0, NULL, NULL, NULL
@@ -3538,6 +3456,87 @@ struct config_int ConfigureNamesInt[] =
struct config_int64 ConfigureNamesInt64[] =
{
+ {
+ {"vacuum_freeze_min_age", PGC_USERSET, CLIENT_CONN_STATEMENT,
+ gettext_noop("Minimum age at which VACUUM should freeze a table row."),
+ NULL
+ },
+ &vacuum_freeze_min_age,
+ INT64CONST(50000000), INT64CONST(0), INT64CONST(0x7FFFFFFFFFFFFFFF),
+ NULL, NULL, NULL
+ },
+
+ {
+ {"vacuum_freeze_table_age", PGC_USERSET, CLIENT_CONN_STATEMENT,
+ gettext_noop("Age at which VACUUM should scan whole table to freeze tuples."),
+ NULL
+ },
+ &vacuum_freeze_table_age,
+ INT64CONST(150000000), INT64CONST(0), INT64CONST(0x7FFFFFFFFFFFFFFF),
+ NULL, NULL, NULL
+ },
+
+ {
+ {"vacuum_multixact_freeze_min_age", PGC_USERSET, CLIENT_CONN_STATEMENT,
+ gettext_noop("Minimum age at which VACUUM should freeze a MultiXactId in a table row."),
+ NULL
+ },
+ &vacuum_multixact_freeze_min_age,
+ INT64CONST(5000000), INT64CONST(0), INT64CONST(0x7FFFFFFFFFFFFFFF),
+ NULL, NULL, NULL
+ },
+
+ {
+ {"vacuum_multixact_freeze_table_age", PGC_USERSET, CLIENT_CONN_STATEMENT,
+ gettext_noop("Multixact age at which VACUUM should scan whole table to freeze tuples."),
+ NULL
+ },
+ &vacuum_multixact_freeze_table_age,
+ INT64CONST(150000000), INT64CONST(0), INT64CONST(0x7FFFFFFFFFFFFFFF),
+ NULL, NULL, NULL
+ },
+
+ {
+ {"vacuum_failsafe_age", PGC_USERSET, CLIENT_CONN_STATEMENT,
+ gettext_noop("Age at which VACUUM should trigger failsafe to avoid a wraparound outage."),
+ NULL
+ },
+ &vacuum_failsafe_age,
+ INT64CONST(1600000000), INT64CONST(0), INT64CONST(2100000000),
+ NULL, NULL, NULL
+ },
+ {
+ {"vacuum_multixact_failsafe_age", PGC_USERSET, CLIENT_CONN_STATEMENT,
+ gettext_noop("Multixact age at which VACUUM should trigger failsafe to avoid a wraparound outage."),
+ NULL
+ },
+ &vacuum_multixact_failsafe_age,
+ INT64CONST(1600000000), INT64CONST(0), INT64CONST(2100000000),
+ NULL, NULL, NULL
+ },
+ {
+ /* see varsup.c for why this is PGC_POSTMASTER not PGC_SIGHUP */
+ {"autovacuum_freeze_max_age", PGC_POSTMASTER, AUTOVACUUM,
+ gettext_noop("Age at which to autovacuum a table to prevent transaction ID wraparound."),
+ NULL
+ },
+ &autovacuum_freeze_max_age,
+
+ /* see vacuum_failsafe_age if you change the upper-limit value. */
+ INT64CONST(10000000000), INT64CONST(100000), INT64CONST(0x7FFFFFFFFFFFFFFF),
+ NULL, NULL, NULL
+ },
+ {
+ /* see multixact.c for why this is PGC_POSTMASTER not PGC_SIGHUP */
+ {"autovacuum_multixact_freeze_max_age", PGC_POSTMASTER, AUTOVACUUM,
+ gettext_noop("Multixact age at which to autovacuum a table to prevent multixact wraparound."),
+ NULL
+ },
+ &autovacuum_multixact_freeze_max_age,
+ INT64CONST(20000000000), INT64CONST(10000), INT64CONST(0x7FFFFFFFFFFFFFFF),
+ NULL, NULL, NULL
+ },
+
/* End-of-list marker */
{
{NULL, 0, 0, NULL, NULL}, NULL, 0, 0, 0, NULL, NULL, NULL
@@ -3545,6 +3544,7 @@ struct config_int64 ConfigureNamesInt64[] =
};
+
struct config_real ConfigureNamesReal[] =
{
{
diff --git a/src/backend/utils/misc/help_config.c b/src/backend/utils/misc/help_config.c
index 94c8a16ac1..6e2ce1e325 100644
--- a/src/backend/utils/misc/help_config.c
+++ b/src/backend/utils/misc/help_config.c
@@ -33,6 +33,7 @@ typedef union
struct config_bool _bool;
struct config_real real;
struct config_int integer;
+ struct config_int64 integer8;
struct config_string string;
struct config_enum _enum;
} mixedStruct;
@@ -106,7 +107,12 @@ printMixedStruct(mixedStruct *structToPrint)
structToPrint->integer.min,
structToPrint->integer.max);
break;
-
+ case PGC_INT64:
+ printf("INT64\t%lld\t%lld\t%lld\t",
+ (long long) structToPrint->integer8.reset_val,
+ (long long) structToPrint->integer8.min,
+ (long long) structToPrint->integer8.max);
+ break;
case PGC_REAL:
printf("REAL\t%g\t%g\t%g\t",
structToPrint->real.reset_val,
diff --git a/src/backend/utils/misc/pg_controldata.c b/src/backend/utils/misc/pg_controldata.c
index 7cbe700295..b0eb7cf0cf 100644
--- a/src/backend/utils/misc/pg_controldata.c
+++ b/src/backend/utils/misc/pg_controldata.c
@@ -118,7 +118,7 @@ pg_control_checkpoint(PG_FUNCTION_ARGS)
nulls[5] = false;
values[6] = CStringGetTextDatum(psprintf("%llu",
- (unsigned long long) U64FromFullTransactionId(ControlFile->checkPointCopy.nextXid)));
+ (unsigned long long) XidFromFullTransactionId(ControlFile->checkPointCopy.nextXid)));
nulls[6] = false;
values[7] = ObjectIdGetDatum(ControlFile->checkPointCopy.nextOid);
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index cf9f283cfe..f97bcdf264 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -653,7 +653,7 @@
#autovacuum_vacuum_insert_scale_factor = 0.2 # fraction of inserts over table
# size before insert vacuum
#autovacuum_analyze_scale_factor = 0.1 # fraction of table size before analyze
-#autovacuum_freeze_max_age = 200000000 # maximum XID age before forced vacuum
+#autovacuum_freeze_max_age = 10000000000 # maximum XID age before forced vacuum
# (change requires restart)
#autovacuum_multixact_freeze_max_age = 400000000 # maximum multixact age
# before forced vacuum
diff --git a/src/backend/utils/sort/tuplesortvariants.c b/src/backend/utils/sort/tuplesortvariants.c
index 2cd508e513..97a607ab6c 100644
--- a/src/backend/utils/sort/tuplesortvariants.c
+++ b/src/backend/utils/sort/tuplesortvariants.c
@@ -1217,11 +1217,16 @@ writetup_cluster(Tuplesortstate *state, LogicalTape *tape, SortTuple *stup)
{
TuplesortPublic *base = TuplesortstateGetPublic(state);
HeapTuple tuple = (HeapTuple) stup->tuple;
- unsigned int tuplen = tuple->t_len + sizeof(ItemPointerData) + sizeof(int);
+ unsigned int tuplen = tuple->t_len +
+ sizeof(ItemPointerData) +
+ 2 * sizeof(TransactionId) + /* tuple xmin, xmax */
+ sizeof(int);
/* We need to store t_self, but not other fields of HeapTupleData */
LogicalTapeWrite(tape, &tuplen, sizeof(tuplen));
LogicalTapeWrite(tape, &tuple->t_self, sizeof(ItemPointerData));
+ LogicalTapeWrite(tape, &tuple->t_xmin, sizeof(TransactionId));
+ LogicalTapeWrite(tape, &tuple->t_xmax, sizeof(TransactionId));
LogicalTapeWrite(tape, tuple->t_data, tuple->t_len);
if (base->sortopt & TUPLESORT_RANDOMACCESS) /* need trailing length word? */
LogicalTapeWrite(tape, &tuplen, sizeof(tuplen));
@@ -1233,7 +1238,10 @@ readtup_cluster(Tuplesortstate *state, SortTuple *stup,
{
TuplesortPublic *base = TuplesortstateGetPublic(state);
TuplesortClusterArg *arg = (TuplesortClusterArg *) base->arg;
- unsigned int t_len = tuplen - sizeof(ItemPointerData) - sizeof(int);
+ unsigned int t_len = tuplen -
+ sizeof(ItemPointerData) -
+ 2 * sizeof(TransactionId) - /* tuple xmin, xmax */
+ sizeof(int);
HeapTuple tuple = (HeapTuple) tuplesort_readtup_alloc(state,
t_len + HEAPTUPLESIZE);
@@ -1241,6 +1249,8 @@ readtup_cluster(Tuplesortstate *state, SortTuple *stup,
tuple->t_data = (HeapTupleHeader) ((char *) tuple + HEAPTUPLESIZE);
tuple->t_len = t_len;
LogicalTapeReadExact(tape, &tuple->t_self, sizeof(ItemPointerData));
+ LogicalTapeReadExact(tape, &tuple->t_xmin, sizeof(TransactionId));
+ LogicalTapeReadExact(tape, &tuple->t_xmax, sizeof(TransactionId));
/* We don't currently bother to reconstruct t_tableOid */
tuple->t_tableOid = InvalidOid;
/* Read in the tuple body */
diff --git a/src/backend/utils/time/combocid.c b/src/backend/utils/time/combocid.c
index 0e94bc93f7..f760a78072 100644
--- a/src/backend/utils/time/combocid.c
+++ b/src/backend/utils/time/combocid.c
@@ -101,12 +101,13 @@ static CommandId GetRealCmax(CommandId combocid);
*/
CommandId
-HeapTupleHeaderGetCmin(HeapTupleHeader tup)
+HeapTupleGetCmin(HeapTuple tuple)
{
+ HeapTupleHeader tup = tuple->t_data;
CommandId cid = HeapTupleHeaderGetRawCommandId(tup);
Assert(!(tup->t_infomask & HEAP_MOVED));
- Assert(TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmin(tup)));
+ Assert(TransactionIdIsCurrentTransactionId(HeapTupleGetXmin(tuple)));
if (tup->t_infomask & HEAP_COMBOCID)
return GetRealCmin(cid);
@@ -115,8 +116,9 @@ HeapTupleHeaderGetCmin(HeapTupleHeader tup)
}
CommandId
-HeapTupleHeaderGetCmax(HeapTupleHeader tup)
+HeapTupleGetCmax(HeapTuple tuple)
{
+ HeapTupleHeader tup = tuple->t_data;
CommandId cid = HeapTupleHeaderGetRawCommandId(tup);
Assert(!(tup->t_infomask & HEAP_MOVED));
@@ -128,7 +130,7 @@ HeapTupleHeaderGetCmax(HeapTupleHeader tup)
* things too much.
*/
Assert(CritSectionCount > 0 ||
- TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetUpdateXid(tup)));
+ TransactionIdIsCurrentTransactionId(HeapTupleGetUpdateXidAny(tuple)));
if (tup->t_infomask & HEAP_COMBOCID)
return GetRealCmax(cid);
@@ -150,9 +152,7 @@ HeapTupleHeaderGetCmax(HeapTupleHeader tup)
* changes the tuple in shared buffers.
*/
void
-HeapTupleHeaderAdjustCmax(HeapTupleHeader tup,
- CommandId *cmax,
- bool *iscombo)
+HeapTupleAdjustCmax(HeapTuple tup, CommandId *cmax, bool *iscombo)
{
/*
* If we're marking a tuple deleted that was inserted by (any
@@ -160,10 +160,10 @@ HeapTupleHeaderAdjustCmax(HeapTupleHeader tup,
* Test for HeapTupleHeaderXminCommitted() first, because it's cheaper
* than a TransactionIdIsCurrentTransactionId call.
*/
- if (!HeapTupleHeaderXminCommitted(tup) &&
- TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmin(tup)))
+ if (!HeapTupleHeaderXminCommitted(tup->t_data) &&
+ TransactionIdIsCurrentTransactionId(HeapTupleGetRawXmin(tup)))
{
- CommandId cmin = HeapTupleHeaderGetCmin(tup);
+ CommandId cmin = HeapTupleGetCmin(tup);
*cmax = GetComboCommandId(cmin, *cmax);
*iscombo = true;
diff --git a/src/backend/utils/time/snapmgr.c b/src/backend/utils/time/snapmgr.c
index 6a4327f917..8091b24341 100644
--- a/src/backend/utils/time/snapmgr.c
+++ b/src/backend/utils/time/snapmgr.c
@@ -927,15 +927,15 @@ SnapshotResetXmin(void)
if (pairingheap_is_empty(&RegisteredSnapshots))
{
- MyProc->xmin = InvalidTransactionId;
+ pg_atomic_write_u64(&MyProc->xmin, InvalidTransactionId);
return;
}
minSnapshot = pairingheap_container(SnapshotData, ph_node,
pairingheap_first(&RegisteredSnapshots));
- if (TransactionIdPrecedes(MyProc->xmin, minSnapshot->xmin))
- MyProc->xmin = minSnapshot->xmin;
+ if (TransactionIdPrecedes(pg_atomic_read_u64(&MyProc->xmin), minSnapshot->xmin))
+ pg_atomic_write_u64(&MyProc->xmin, minSnapshot->xmin);
}
/*
@@ -1088,7 +1088,7 @@ AtEOXact_Snapshot(bool isCommit, bool resetXmin)
if (resetXmin)
SnapshotResetXmin();
- Assert(resetXmin || MyProc->xmin == 0);
+ Assert(resetXmin || pg_atomic_read_u64(&MyProc->xmin) == 0);
}
@@ -1153,8 +1153,9 @@ ExportSnapshot(Snapshot snapshot)
* Generate file path for the snapshot. We start numbering of snapshots
* inside the transaction from 1.
*/
- snprintf(path, sizeof(path), SNAPSHOT_EXPORT_DIR "/%08X-%08X-%d",
- MyProc->backendId, MyProc->lxid, list_length(exportedSnapshots) + 1);
+ snprintf(path, sizeof(path), SNAPSHOT_EXPORT_DIR "/%08X-%08X%08X-%d",
+ MyProc->backendId, (uint32) (MyProc->lxid >> 32),
+ (uint32) MyProc->lxid, list_length(exportedSnapshots) + 1);
/*
* Copy the snapshot into TopTransactionContext, add it to the
@@ -1330,7 +1331,7 @@ parseXidFromText(const char *prefix, char **s, const char *filename)
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("invalid snapshot data in file \"%s\"", filename)));
ptr += prefixlen;
- if (sscanf(ptr, "%u", &val) != 1)
+ if (sscanf(ptr, "%" INT64_MODIFIER "u", &val) != 1)
ereport(ERROR,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("invalid snapshot data in file \"%s\"", filename)));
@@ -1355,7 +1356,7 @@ parseVxidFromText(const char *prefix, char **s, const char *filename,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("invalid snapshot data in file \"%s\"", filename)));
ptr += prefixlen;
- if (sscanf(ptr, "%d/%u", &vxid->backendId, &vxid->localTransactionId) != 2)
+ if (sscanf(ptr, "%d/%" INT64_MODIFIER "u", &vxid->backendId, &vxid->localTransactionId) != 2)
ereport(ERROR,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("invalid snapshot data in file \"%s\"", filename)));
@@ -1896,7 +1897,7 @@ XidInMVCCSnapshot(TransactionId xid, Snapshot snapshot)
if (!snapshot->suboverflowed)
{
/* we have full data, so search subxip */
- if (pg_lfind32(xid, snapshot->subxip, snapshot->subxcnt))
+ if (pg_lfind64(xid, snapshot->subxip, snapshot->subxcnt))
return true;
/* not there, fall through to search xip[] */
@@ -1918,7 +1919,7 @@ XidInMVCCSnapshot(TransactionId xid, Snapshot snapshot)
return false;
}
- if (pg_lfind32(xid, snapshot->xip, snapshot->xcnt))
+ if (pg_lfind64(xid, snapshot->xip, snapshot->xcnt))
return true;
}
else
@@ -1952,7 +1953,7 @@ XidInMVCCSnapshot(TransactionId xid, Snapshot snapshot)
* indeterminate xid. We don't know whether it's top level or subxact
* but it doesn't matter. If it's present, the xid is visible.
*/
- if (pg_lfind32(xid, snapshot->subxip, snapshot->subxcnt))
+ if (pg_lfind64(xid, snapshot->subxip, snapshot->subxcnt))
return true;
}
diff --git a/src/bin/pg_amcheck/t/004_verify_heapam.pl b/src/bin/pg_amcheck/t/004_verify_heapam.pl
index add07f7fca..a6ad931c45 100644
--- a/src/bin/pg_amcheck/t/004_verify_heapam.pl
+++ b/src/bin/pg_amcheck/t/004_verify_heapam.pl
@@ -8,6 +8,7 @@ use PostgreSQL::Test::Cluster;
use PostgreSQL::Test::Utils;
use Test::More;
+use Data::Dumper;
# This regression test demonstrates that the pg_amcheck binary correctly
# identifies specific kinds of corruption within pages. To test this, we need
@@ -85,6 +86,65 @@ use Test::More;
use constant HEAPTUPLE_PACK_CODE => 'LLLSSSSSCCLLCCCCCCCCCCllLL';
use constant HEAPTUPLE_PACK_LENGTH => 58; # Total size
+use constant HEAPPAGE_SPECIAL_PACK_CODE => 'QQ';
+use constant HEAPPAGE_SPECIAL_PACK_LENGTH => 16;
+use constant HEAPPAGE_SIZE => 8192;
+
+# Some #define constants from access/htup_details.h for use while corrupting.
+use constant HEAP_HASNULL => 0x0001;
+use constant HEAP_XMAX_LOCK_ONLY => 0x0080;
+use constant HEAP_XMIN_COMMITTED => 0x0100;
+use constant HEAP_XMIN_INVALID => 0x0200;
+use constant HEAP_XMAX_COMMITTED => 0x0400;
+use constant HEAP_XMAX_INVALID => 0x0800;
+use constant HEAP_NATTS_MASK => 0x07FF;
+use constant HEAP_XMAX_IS_MULTI => 0x1000;
+use constant HEAP_KEYS_UPDATED => 0x2000;
+use constant HEAP_HOT_UPDATED => 0x4000;
+use constant HEAP_ONLY_TUPLE => 0x8000;
+use constant HEAP_UPDATED => 0x2000;
+
+use constant FIRST_NORMAL_TRANSACTION_ID => 3;
+
+# Read page special data
+sub read_special_data
+{
+ my ($fh, $offset) = @_;
+ my ($buffer, %special);
+
+ $offset -= $offset % HEAPPAGE_SIZE;
+ $offset += HEAPPAGE_SIZE - HEAPPAGE_SPECIAL_PACK_LENGTH;
+
+ sysseek($fh, $offset, 0)
+ or BAIL_OUT("sysseek failed: $!");
+ defined(sysread($fh, $buffer, HEAPPAGE_SPECIAL_PACK_LENGTH))
+ or BAIL_OUT("sysread failed: $!");
+
+ @_ = unpack(HEAPPAGE_SPECIAL_PACK_CODE, $buffer);
+ %special = (
+ pd_xid_base => shift,
+ pd_multi_base => shift);
+ return \%special;
+}
+
+# Write page special data
+sub write_special_data
+{
+ my ($fh, $offset, $special) = @_;
+
+ $offset -= $offset % HEAPPAGE_SIZE;
+ $offset += HEAPPAGE_SIZE - HEAPPAGE_SPECIAL_PACK_LENGTH;
+
+ my $buffer = pack(
+ HEAPPAGE_SPECIAL_PACK_CODE,
+ $special->{pd_xid_base}, $special->{pd_multi_base});
+
+ sysseek($fh, $offset, 0)
+ or BAIL_OUT("sysseek failed: $!");
+ defined(syswrite($fh, $buffer, HEAPPAGE_SPECIAL_PACK_LENGTH))
+ or BAIL_OUT("syswrite failed: $!");
+ return;
+}
# Read a tuple of our table from a heap page.
#
@@ -96,8 +156,9 @@ use constant HEAPTUPLE_PACK_LENGTH => 58; # Total size
#
sub read_tuple
{
- my ($fh, $offset) = @_;
+ my ($fh, $offset, $raw) = @_;
my ($buffer, %tup);
+
sysseek($fh, $offset, 0)
or BAIL_OUT("sysseek failed: $!");
defined(sysread($fh, $buffer, HEAPTUPLE_PACK_LENGTH))
@@ -133,6 +194,18 @@ sub read_tuple
c_va_toastrelid => shift);
# Stitch together the text for column 'b'
$tup{b} = join('', map { chr($tup{"b_body$_"}) } (1 .. 7));
+
+ if (!$raw)
+ {
+ my $special = read_special_data($fh, $offset);
+
+ $tup{t_xmin} += $special->{pd_xid_base};
+ my $is_multi = $tup{t_infomask} & HEAP_XMAX_IS_MULTI;
+ $tup{t_xmax} += !$is_multi ?
+ $special->{pd_xid_base} :
+ $special->{pd_multi_base};
+ }
+
return \%tup;
}
@@ -148,7 +221,39 @@ sub read_tuple
#
sub write_tuple
{
- my ($fh, $offset, $tup) = @_;
+ my ($fh, $offset, $tup, $raw) = @_;
+
+ if (!$raw)
+ {
+ my $special = read_special_data($fh, $offset);
+
+ if ($tup->{t_xmin} >= 3)
+ {
+ my $xmin = $tup->{t_xmin} - $special->{pd_xid_base};
+ die "tuple x_min $tup->{t_xmin} is too smal for pd_xid_base $special->{pd_xid_base}"
+ if $xmin < 3;
+ $tup->{t_xmin} = $xmin;
+ }
+
+ if ($tup->{t_xmax} >= 3)
+ {
+ if (($tup->{t_infomask} & HEAP_XMAX_IS_MULTI) == 0)
+ {
+ my $xmax = $tup->{t_xmax} - $special->{pd_xid_base};
+ die "tuple x_max $tup->{t_xmax} is too smal for pd_xid_base $special->{pd_xid_base}"
+ if $xmax < 3;
+ $tup->{t_xmax} = $xmax;
+ }
+ else
+ {
+ my $xmax = $tup->{t_xmax} - $special->{pd_multi_base};
+ die "tuple multi x_max $tup->{t_xmax} is too smal for pd_multi_base $special->{pd_multi_base}"
+ if $xmax < 3;
+ $tup->{t_xmax} = $xmax;
+ }
+ }
+ }
+
my $buffer = pack(
HEAPTUPLE_PACK_CODE,
$tup->{t_xmin}, $tup->{t_xmax},
@@ -171,6 +276,42 @@ sub write_tuple
return;
}
+# move pd_xid_base and pd_multi_base to more suitable position for tests.
+sub fixup_page
+{
+ my ($fh, $page, $xid_base, $multi_base, $lp_off) = @_;
+ my $offset = $page * HEAPPAGE_SIZE;
+ my $special = read_special_data($fh, $offset);
+
+ die "xid_base $xid_base should be lesser than existed $special->{pd_xid_base}"
+ if ($xid_base > $special->{pd_xid_base});
+ die "multi_base $multi_base should be lesser than existed $special->{pd_multi_base}"
+ if ($multi_base > $special->{pd_multi_base} && $special->{pd_multi_base} != 0);
+ return if ($xid_base == $special->{pd_xid_base} &&
+ $multi_base == $special->{pd_multi_base});
+
+ my $xid_delta = $special->{pd_xid_base} - $xid_base;
+ my $multi_delta = $special->{pd_multi_base} - $multi_base;
+
+ for my $off (@$lp_off)
+ {
+ # change only tuples on this page.
+ next if ($off < $offset && $off > $offset + HEAPPAGE_SIZE);
+ next if ($off == -1);
+
+ my $tup = read_tuple($fh, $off, 1);
+ $tup->{t_xmin} += $xid_delta;
+ my $is_multi = $tup->{t_infomask} & HEAP_XMAX_IS_MULTI;
+ $tup->{t_xmax} += !$is_multi ? $xid_delta : $multi_delta;
+ write_tuple($fh, $off, $tup, 1);
+ }
+
+ $special->{pd_xid_base} = $xid_base;
+ $special->{pd_multi_base} = $multi_base;
+
+ write_special_data($fh, $offset, $special);
+}
+
# Set umask so test directories and files are created with default permissions
umask(0077);
@@ -320,6 +461,8 @@ my $relfrozenxid = $node->safe_psql('postgres',
q(select relfrozenxid from pg_class where relname = 'test'));
my $datfrozenxid = $node->safe_psql('postgres',
q(select datfrozenxid from pg_database where datname = 'postgres'));
+my $datminmxid = $node->safe_psql('postgres',
+ q(select datminmxid from pg_database where datname = 'postgres'));
# Sanity check that our 'test' table has a relfrozenxid newer than the
# datfrozenxid for the database, and that the datfrozenxid is greater than the
@@ -377,6 +520,11 @@ for (my $tupidx = 0; $tupidx < $ROWCOUNT; $tupidx++)
# Determine endianness of current platform from the 1-byte varlena header
$ENDIANNESS = $tup->{b_header} == 0x11 ? "little" : "big";
}
+
+# Set 64bit xid bases a bit in the past therefore we can set xmin/xmax a bit
+# in the past
+fixup_page($file, 0, $datfrozenxid - 100, $datminmxid, \@lp_off);
+
close($file)
or BAIL_OUT("close failed: $!");
$node->start;
@@ -394,20 +542,6 @@ $node->command_ok([ 'pg_amcheck', '-p', $port, 'postgres' ],
$node->stop;
-# Some #define constants from access/htup_details.h for use while corrupting.
-use constant HEAP_HASNULL => 0x0001;
-use constant HEAP_XMAX_LOCK_ONLY => 0x0080;
-use constant HEAP_XMIN_COMMITTED => 0x0100;
-use constant HEAP_XMIN_INVALID => 0x0200;
-use constant HEAP_XMAX_COMMITTED => 0x0400;
-use constant HEAP_XMAX_INVALID => 0x0800;
-use constant HEAP_NATTS_MASK => 0x07FF;
-use constant HEAP_XMAX_IS_MULTI => 0x1000;
-use constant HEAP_KEYS_UPDATED => 0x2000;
-use constant HEAP_HOT_UPDATED => 0x4000;
-use constant HEAP_ONLY_TUPLE => 0x8000;
-use constant HEAP_UPDATED => 0x2000;
-
# Helper function to generate a regular expression matching the header we
# expect verify_heapam() to return given which fields we expect to be non-null.
sub header
@@ -442,6 +576,8 @@ for (my $tupidx = 0; $tupidx < $ROWCOUNT; $tupidx++)
# Read tuple, if there is one.
my $tup = $offset == -1 ? undef : read_tuple($file, $offset);
+ # Read page special, if there is one.
+ my $special = $offset == -1 ? undef : read_special_data($file, $offset);
if ($offnum == 1)
{
@@ -458,7 +594,7 @@ for (my $tupidx = 0; $tupidx < $ROWCOUNT; $tupidx++)
elsif ($offnum == 2)
{
# Corruptly set xmin < datfrozenxid
- my $xmin = 3;
+ my $xmin = $datfrozenxid - 12;
$tup->{t_xmin} = $xmin;
$tup->{t_infomask} &= ~HEAP_XMIN_COMMITTED;
$tup->{t_infomask} &= ~HEAP_XMIN_INVALID;
@@ -468,25 +604,24 @@ for (my $tupidx = 0; $tupidx < $ROWCOUNT; $tupidx++)
}
elsif ($offnum == 3)
{
- # Corruptly set xmin < datfrozenxid, further back, noting circularity
- # of xid comparison.
- my $xmin = 4026531839;
+ # Corruptly set xmin > next transaction id.
+ my $xmin = $relfrozenxid + 4026531839;
$tup->{t_xmin} = $xmin;
$tup->{t_infomask} &= ~HEAP_XMIN_COMMITTED;
$tup->{t_infomask} &= ~HEAP_XMIN_INVALID;
push @expected,
- qr/${$header}xmin ${xmin} precedes oldest valid transaction ID \d+/;
+ qr/${$header}xmin ${xmin} equals or exceeds next valid transaction ID \d+/;
}
elsif ($offnum == 4)
{
- # Corruptly set xmax < relminmxid;
- my $xmax = 4026531839;
+ # Corruptly set xmax > relminmxid;
+ my $xmax = $relfrozenxid + 4026531839;
$tup->{t_xmax} = $xmax;
$tup->{t_infomask} &= ~HEAP_XMAX_INVALID;
push @expected,
- qr/${$header}xmax ${xmax} precedes oldest valid transaction ID \d+/;
+ qr/${$header}xmax ${xmax} equals or exceeds next valid transaction ID \d+/;
}
elsif ($offnum == 5)
{
@@ -602,7 +737,7 @@ for (my $tupidx = 0; $tupidx < $ROWCOUNT; $tupidx++)
$tup->{t_xmax} = 4000000000;
push @expected,
- qr/${header}multitransaction ID 4000000000 precedes relation minimum multitransaction ID threshold 1/;
+ qr/${header}multitransaction ID 4000000000 equals or exceeds next valid multitransaction ID 1/;
}
elsif ($offnum == 16) # Last offnum must equal ROWCOUNT
{
diff --git a/src/bin/pg_controldata/pg_controldata.c b/src/bin/pg_controldata/pg_controldata.c
index 76837bc72c..6178b6f345 100644
--- a/src/bin/pg_controldata/pg_controldata.c
+++ b/src/bin/pg_controldata/pg_controldata.c
@@ -247,7 +247,7 @@ main(int argc, char *argv[])
printf(_("Latest checkpoint's full_page_writes: %s\n"),
ControlFile->checkPointCopy.fullPageWrites ? _("on") : _("off"));
printf(_("Latest checkpoint's NextXID: %llu\n"),
- (unsigned long long) U64FromFullTransactionId(ControlFile->checkPointCopy.nextXid));
+ (unsigned long long) XidFromFullTransactionId(ControlFile->checkPointCopy.nextXid));
printf(_("Latest checkpoint's NextOID: %u\n"),
ControlFile->checkPointCopy.nextOid);
printf(_("Latest checkpoint's NextMultiXactId: %llu\n"),
diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c
index e2273ff873..41f7cb16ab 100644
--- a/src/bin/pg_dump/pg_dump.c
+++ b/src/bin/pg_dump/pg_dump.c
@@ -41,6 +41,7 @@
#include "access/attnum.h"
#include "access/sysattr.h"
#include "access/transam.h"
+#include "c.h"
#include "catalog/pg_aggregate_d.h"
#include "catalog/pg_am_d.h"
#include "catalog/pg_attribute_d.h"
@@ -2985,7 +2986,7 @@ dumpDatabase(Archive *fout)
*datistemplate,
*datconnlimit,
*tablespace;
- uint32 frozenxid,
+ uint64 frozenxid,
minmxid;
char *qdatname;
@@ -3055,8 +3056,8 @@ dumpDatabase(Archive *fout)
icurules = PQgetvalue(res, 0, i_daticurules);
else
icurules = NULL;
- frozenxid = atooid(PQgetvalue(res, 0, i_frozenxid));
- minmxid = atooid(PQgetvalue(res, 0, i_minmxid));
+ frozenxid = strtou64(PQgetvalue(res, 0, i_frozenxid), NULL, 0);
+ minmxid = strtou64(PQgetvalue(res, 0, i_minmxid), NULL, 0);
dbdacl.acl = PQgetvalue(res, 0, i_datacl);
dbdacl.acldefault = PQgetvalue(res, 0, i_acldefault);
datistemplate = PQgetvalue(res, 0, i_datistemplate);
@@ -3352,10 +3353,16 @@ dumpDatabase(Archive *fout)
RelFileNumber relfilenumber;
appendPQExpBuffer(loHorizonQry, "UPDATE pg_catalog.pg_class\n"
- "SET relfrozenxid = '%u', relminmxid = '%u'\n"
+ "SET relfrozenxid = '%llu', relminmxid = '%llu'\n"
"WHERE oid = %u;\n",
- atooid(PQgetvalue(lo_res, i, ii_relfrozenxid)),
- atooid(PQgetvalue(lo_res, i, ii_relminmxid)),
+ (unsigned long long) strtou64(PQgetvalue(lo_res,
+ i,
+ ii_relfrozenxid),
+ NULL, 0),
+ (unsigned long long) strtou64(PQgetvalue(lo_res,
+ i,
+ ii_relminmxid),
+ NULL, 0),
atooid(PQgetvalue(lo_res, i, ii_oid)));
oid = atooid(PQgetvalue(lo_res, i, ii_oid));
@@ -6665,11 +6672,11 @@ getTables(Archive *fout, int *numTables)
tblinfo[i].relreplident = *(PQgetvalue(res, i, i_relreplident));
tblinfo[i].rowsec = (strcmp(PQgetvalue(res, i, i_relrowsec), "t") == 0);
tblinfo[i].forcerowsec = (strcmp(PQgetvalue(res, i, i_relforcerowsec), "t") == 0);
- tblinfo[i].frozenxid = atooid(PQgetvalue(res, i, i_relfrozenxid));
- tblinfo[i].toast_frozenxid = atooid(PQgetvalue(res, i, i_toastfrozenxid));
+ tblinfo[i].frozenxid = strtou64(PQgetvalue(res, i, i_relfrozenxid), NULL, 0);
+ tblinfo[i].toast_frozenxid = strtou64(PQgetvalue(res, i, i_toastfrozenxid), NULL, 0);
tblinfo[i].toast_oid = atooid(PQgetvalue(res, i, i_toastoid));
- tblinfo[i].minmxid = atooid(PQgetvalue(res, i, i_relminmxid));
- tblinfo[i].toast_minmxid = atooid(PQgetvalue(res, i, i_toastminmxid));
+ tblinfo[i].minmxid = strtou64(PQgetvalue(res, i, i_relminmxid), NULL, 0);
+ tblinfo[i].toast_minmxid = strtou64(PQgetvalue(res, i, i_toastminmxid), NULL, 0);
tblinfo[i].reloptions = pg_strdup(PQgetvalue(res, i, i_reloptions));
if (PQgetisnull(res, i, i_checkoption))
tblinfo[i].checkoption = NULL;
diff --git a/src/bin/pg_dump/pg_dump.h b/src/bin/pg_dump/pg_dump.h
index 2fe3cbed9a..8a5d2eb1fa 100644
--- a/src/bin/pg_dump/pg_dump.h
+++ b/src/bin/pg_dump/pg_dump.h
@@ -299,11 +299,11 @@ typedef struct _tableInfo
bool rowsec; /* is row security enabled? */
bool forcerowsec; /* is row security forced? */
bool hasoids; /* does it have OIDs? */
- uint32 frozenxid; /* table's relfrozenxid */
- uint32 minmxid; /* table's relminmxid */
+ uint64 frozenxid; /* table's relfrozenxid */
+ uint64 minmxid; /* table's relminmxid */
Oid toast_oid; /* toast table's OID, or 0 if none */
- uint32 toast_frozenxid; /* toast table's relfrozenxid, if any */
- uint32 toast_minmxid; /* toast table's relminmxid */
+ uint64 toast_frozenxid; /* toast table's relfrozenxid, if any */
+ uint64 toast_minmxid; /* toast table's relminmxid */
int ncheck; /* # of CHECK expressions */
Oid reltype; /* OID of table's composite type, if any */
Oid reloftype; /* underlying type for typed table */
diff --git a/src/bin/pg_resetwal/pg_resetwal.c b/src/bin/pg_resetwal/pg_resetwal.c
index 6f0af6471e..1603977542 100644
--- a/src/bin/pg_resetwal/pg_resetwal.c
+++ b/src/bin/pg_resetwal/pg_resetwal.c
@@ -63,7 +63,6 @@ static ControlFileData ControlFile; /* pg_control values */
static XLogSegNo newXlogSegNo; /* new XLOG segment # */
static bool guessed = false; /* T if we had to guess at any values */
static const char *progname;
-static uint32 set_xid_epoch = (uint32) -1;
static TransactionId set_oldest_xid = 0;
static TransactionId set_xid = 0;
static TransactionId set_oldest_commit_ts_xid = 0;
@@ -95,7 +94,6 @@ main(int argc, char *argv[])
static struct option long_options[] = {
{"commit-timestamp-ids", required_argument, NULL, 'c'},
{"pgdata", required_argument, NULL, 'D'},
- {"epoch", required_argument, NULL, 'e'},
{"force", no_argument, NULL, 'f'},
{"next-wal-file", required_argument, NULL, 'l'},
{"multixact-ids", required_argument, NULL, 'm'},
@@ -137,7 +135,7 @@ main(int argc, char *argv[])
}
- while ((c = getopt_long(argc, argv, "c:D:e:fl:m:no:O:u:x:", long_options, NULL)) != -1)
+ while ((c = getopt_long(argc, argv, "c:D:fl:m:no:O:u:x:", long_options, NULL)) != -1)
{
switch (c)
{
@@ -153,24 +151,9 @@ main(int argc, char *argv[])
noupdate = true;
break;
- case 'e':
- errno = 0;
- set_xid_epoch = strtoul(optarg, &endptr, 0);
- if (endptr == optarg || *endptr != '\0' || errno != 0)
- {
- /*------
- translator: the second %s is a command line argument (-e, etc) */
- pg_log_error("invalid argument for option %s", "-e");
- pg_log_error_hint("Try \"%s --help\" for more information.", progname);
- exit(1);
- }
- if (set_xid_epoch == -1)
- pg_fatal("transaction ID epoch (-e) must not be -1");
- break;
-
case 'u':
errno = 0;
- set_oldest_xid = strtoul(optarg, &endptr, 0);
+ set_oldest_xid = strtou64(optarg, &endptr, 0);
if (endptr == optarg || *endptr != '\0' || errno != 0)
{
pg_log_error("invalid argument for option %s", "-u");
@@ -184,7 +167,7 @@ main(int argc, char *argv[])
case 'x':
errno = 0;
- set_xid = strtoul(optarg, &endptr, 0);
+ set_xid = strtou64(optarg, &endptr, 0);
if (endptr == optarg || *endptr != '\0' || errno != 0)
{
pg_log_error("invalid argument for option %s", "-x");
@@ -198,14 +181,14 @@ main(int argc, char *argv[])
case 'c':
errno = 0;
- set_oldest_commit_ts_xid = strtoul(optarg, &endptr, 0);
+ set_oldest_commit_ts_xid = strtou64(optarg, &endptr, 0);
if (endptr == optarg || *endptr != ',' || errno != 0)
{
pg_log_error("invalid argument for option %s", "-c");
pg_log_error_hint("Try \"%s --help\" for more information.", progname);
exit(1);
}
- set_newest_commit_ts_xid = strtoul(endptr + 1, &endptr2, 0);
+ set_newest_commit_ts_xid = strtou64(endptr + 1, &endptr2, 0);
if (endptr2 == endptr + 1 || *endptr2 != '\0' || errno != 0)
{
pg_log_error("invalid argument for option %s", "-c");
@@ -241,7 +224,7 @@ main(int argc, char *argv[])
case 'm':
errno = 0;
- set_mxid = strtoul(optarg, &endptr, 0);
+ set_mxid = strtou64(optarg, &endptr, 0);
if (endptr == optarg || *endptr != ',' || errno != 0)
{
pg_log_error("invalid argument for option %s", "-m");
@@ -249,7 +232,7 @@ main(int argc, char *argv[])
exit(1);
}
- set_oldestmxid = strtoul(endptr + 1, &endptr2, 0);
+ set_oldestmxid = strtou64(endptr + 1, &endptr2, 0);
if (endptr2 == endptr + 1 || *endptr2 != '\0' || errno != 0)
{
pg_log_error("invalid argument for option %s", "-m");
@@ -269,7 +252,7 @@ main(int argc, char *argv[])
case 'O':
errno = 0;
- set_mxoff = strtoul(optarg, &endptr, 0);
+ set_mxoff = strtou64(optarg, &endptr, 0);
if (endptr == optarg || *endptr != '\0' || errno != 0)
{
pg_log_error("invalid argument for option %s", "-O");
@@ -415,11 +398,6 @@ main(int argc, char *argv[])
* Adjust fields if required by switches. (Do this now so that printout,
* if any, includes these values.)
*/
- if (set_xid_epoch != -1)
- ControlFile.checkPointCopy.nextXid =
- FullTransactionIdFromEpochAndXid(set_xid_epoch,
- XidFromFullTransactionId(ControlFile.checkPointCopy.nextXid));
-
if (set_oldest_xid != 0)
{
ControlFile.checkPointCopy.oldestXid = set_oldest_xid;
@@ -427,9 +405,7 @@ main(int argc, char *argv[])
}
if (set_xid != 0)
- ControlFile.checkPointCopy.nextXid =
- FullTransactionIdFromEpochAndXid(EpochFromFullTransactionId(ControlFile.checkPointCopy.nextXid),
- set_xid);
+ ControlFile.checkPointCopy.nextXid = FullTransactionIdFromXid(set_xid);
if (set_oldest_commit_ts_xid != 0)
ControlFile.checkPointCopy.oldestCommitTsXid = set_oldest_commit_ts_xid;
@@ -664,7 +640,7 @@ GuessControlValues(void)
ControlFile.checkPointCopy.PrevTimeLineID = 1;
ControlFile.checkPointCopy.fullPageWrites = false;
ControlFile.checkPointCopy.nextXid =
- FullTransactionIdFromEpochAndXid(0, FirstNormalTransactionId);
+ FullTransactionIdFromXid(FirstNormalTransactionId);
ControlFile.checkPointCopy.nextOid = FirstGenbkiObjectId;
ControlFile.checkPointCopy.nextMulti = FirstMultiXactId;
ControlFile.checkPointCopy.nextMultiOffset = 0;
@@ -715,6 +691,8 @@ GuessControlValues(void)
*
* NB: this display should be just those fields that will not be
* reset by RewriteControlFile().
+ *
+ * Special macros help to make translatable strings.
*/
static void
PrintControlValues(bool guessed)
@@ -734,8 +712,7 @@ PrintControlValues(bool guessed)
ControlFile.checkPointCopy.ThisTimeLineID);
printf(_("Latest checkpoint's full_page_writes: %s\n"),
ControlFile.checkPointCopy.fullPageWrites ? _("on") : _("off"));
- printf(_("Latest checkpoint's NextXID: %u:%llu\n"),
- EpochFromFullTransactionId(ControlFile.checkPointCopy.nextXid),
+ printf(_("Latest checkpoint's NextXID: %llu\n"),
(unsigned long long) XidFromFullTransactionId(ControlFile.checkPointCopy.nextXid));
printf(_("Latest checkpoint's NextOID: %u\n"),
ControlFile.checkPointCopy.nextOid);
@@ -833,12 +810,6 @@ PrintNewControlValues(void)
ControlFile.checkPointCopy.oldestXidDB);
}
- if (set_xid_epoch != -1)
- {
- printf(_("NextXID epoch: %u\n"),
- EpochFromFullTransactionId(ControlFile.checkPointCopy.nextXid));
- }
-
if (set_oldest_commit_ts_xid != 0)
{
printf(_("oldestCommitTsXid: %llu\n"),
diff --git a/src/bin/pg_resetwal/t/001_basic.pl b/src/bin/pg_resetwal/t/001_basic.pl
index 18d0882cb1..471503c6b3 100644
--- a/src/bin/pg_resetwal/t/001_basic.pl
+++ b/src/bin/pg_resetwal/t/001_basic.pl
@@ -95,15 +95,6 @@ command_fails_like(
[ 'pg_resetwal', '-c', '10,1', $node->data_dir ],
qr/greater than/,
'fails with -c value 1 part 2');
-# -e
-command_fails_like(
- [ 'pg_resetwal', '-e', 'foo', $node->data_dir ],
- qr/error: invalid argument for option -e/,
- 'fails with incorrect -e option');
-command_fails_like(
- [ 'pg_resetwal', '-e', '-1', $node->data_dir ],
- qr/must not be -1/,
- 'fails with -e value -1');
# -l
command_fails_like(
[ 'pg_resetwal', '-l', 'foo', $node->data_dir ],
@@ -181,7 +172,6 @@ my $blcksz = $1;
my @cmd = ('pg_resetwal', '-D', $node->data_dir);
# some not-so-critical hardcoded values
-push @cmd, '-e', 1;
push @cmd, '-l', '00000001000000320000004B';
push @cmd, '-o', 100_000;
push @cmd, '--wal-segsize', 1;
@@ -205,8 +195,10 @@ push @cmd,
'-c',
sprintf("%d,%d", hex($files[0]) == 0 ? 3 : hex($files[0]), hex($files[-1]));
+my $A = 2;
+my $B = 1;
@files = get_slru_files('pg_multixact/offsets');
-$mult = 32 * $blcksz / 4;
+$mult = $A * $blcksz / $B;
# -m argument is "new,old"
push @cmd, '-m',
sprintf("%d,%d",
@@ -214,11 +206,11 @@ push @cmd, '-m',
hex($files[0]) == 0 ? 1 : hex($files[0] * $mult));
@files = get_slru_files('pg_multixact/members');
-$mult = 32 * int($blcksz / 20) * 4;
+$mult = $A * int($blcksz / 20) * $B;
push @cmd, '-O', (hex($files[-1]) + 1) * $mult;
@files = get_slru_files('pg_xact');
-$mult = 32 * $blcksz * 4;
+$mult = $A * $blcksz * $B;
push @cmd,
'-u', (hex($files[0]) == 0 ? 3 : hex($files[0]) * $mult),
'-x', ((hex($files[-1]) + 1) * $mult);
diff --git a/src/bin/pg_upgrade/Makefile b/src/bin/pg_upgrade/Makefile
index bde91e2beb..6de46f8c2d 100644
--- a/src/bin/pg_upgrade/Makefile
+++ b/src/bin/pg_upgrade/Makefile
@@ -23,6 +23,7 @@ OBJS = \
parallel.o \
pg_upgrade.o \
relfilenumber.o \
+ segresize.o \
server.o \
tablespace.o \
util.o \
diff --git a/src/bin/pg_upgrade/check.c b/src/bin/pg_upgrade/check.c
index fa52aa2c22..3d34681699 100644
--- a/src/bin/pg_upgrade/check.c
+++ b/src/bin/pg_upgrade/check.c
@@ -35,6 +35,8 @@ static void check_for_new_tablespace_dir(void);
static void check_for_user_defined_encoding_conversions(ClusterInfo *cluster);
static void check_new_cluster_logical_replication_slots(void);
static void check_old_cluster_for_valid_slots(bool live_check);
+static void check_for_32bit_xid_usage(ClusterInfo *cluster);
+static bool is_xid_wraparound(ClusterInfo *cluster);
/*
@@ -84,7 +86,7 @@ output_check_banner(bool live_check)
void
-check_and_dump_old_cluster(bool live_check)
+check_and_dump_old_cluster(bool live_check, bool *is_wraparound)
{
/* -- OLD -- */
@@ -197,6 +199,17 @@ check_and_dump_old_cluster(bool live_check)
if (GET_MAJOR_VERSION(old_cluster.major_version) <= 903)
old_9_3_check_for_line_data_type_usage(&old_cluster);
+ /* Prepare for 64bit xid */
+ if (old_cluster.controldata.cat_ver < XID_FORMATCHANGE_CAT_VER)
+ {
+ /* Check if 32-bit xid type is used in tables */
+ check_for_32bit_xid_usage(&old_cluster);
+ /* Check indexes to be upgraded */
+ invalidate_spgist_indexes(&old_cluster, true);
+ invalidate_gin_indexes(&old_cluster, true);
+ invalidate_external_indexes(&old_cluster, true);
+ }
+
/*
* While not a check option, we do this now because this is the only time
* the old server is running.
@@ -204,6 +217,8 @@ check_and_dump_old_cluster(bool live_check)
if (!user_opts.check)
generate_old_dump();
+ *is_wraparound = is_xid_wraparound(&old_cluster);
+
if (!live_check)
stop_postmaster(false);
}
@@ -274,6 +289,17 @@ issue_warnings_and_set_wal_level(void)
if (GET_MAJOR_VERSION(old_cluster.major_version) <= 906)
old_9_6_invalidate_hash_indexes(&new_cluster, false);
+ /* Raindex for 64bit xid */
+ if (old_cluster.controldata.cat_ver < XID_FORMATCHANGE_CAT_VER)
+ {
+ /* Check if 32-bit xid type is used in tables */
+ check_for_32bit_xid_usage(&old_cluster);
+ /* Check indexes to be upgraded */
+ invalidate_spgist_indexes(&old_cluster, true);
+ invalidate_gin_indexes(&old_cluster, true);
+ invalidate_external_indexes(&old_cluster, true);
+ }
+
report_extension_updates(&new_cluster);
stop_postmaster(false);
@@ -1613,3 +1639,124 @@ check_old_cluster_for_valid_slots(bool live_check)
check_ok();
}
+
+/*
+ * check_for_32bit_xid_usage()
+ *
+ * Postgres Pro Enterprise changes xid storage format to 64-bit. Check if
+ * xid type is used in tables.
+ */
+static void
+check_for_32bit_xid_usage(ClusterInfo *cluster)
+{
+ int dbnum;
+ FILE *script = NULL;
+ bool found = false;
+ char output_path[MAXPGPATH];
+
+ prep_status("Checking for incompatible \"xid\" data type");
+
+ snprintf(output_path, sizeof(output_path), "tables_using_xid.txt");
+
+ for (dbnum = 0; dbnum < cluster->dbarr.ndbs; dbnum++)
+ {
+ PGresult *res;
+ bool db_used = false;
+ int ntups;
+ int rowno;
+ int i_nspname,
+ i_relname,
+ i_attname;
+ DbInfo *active_db = &cluster->dbarr.dbs[dbnum];
+ PGconn *conn = connectToServer(cluster, active_db->db_name);
+
+ /*
+ * While several relkinds don't store any data, e.g. views, they can
+ * be used to define data types of other columns, so we check all
+ * relkinds.
+ */
+ res = executeQueryOrDie(conn,
+ "SELECT n.nspname, c.relname, a.attname "
+ "FROM pg_catalog.pg_class c, "
+ " pg_catalog.pg_namespace n, "
+ " pg_catalog.pg_attribute a "
+ "WHERE c.oid = a.attrelid AND "
+ " a.attnum >= 1 AND "
+ " a.atttypid = 'pg_catalog.xid'::pg_catalog.regtype AND "
+ " c.relnamespace = n.oid AND "
+ /* exclude possible orphaned temp tables */
+ " n.nspname !~ '^pg_temp_' AND "
+ " n.nspname NOT IN ('pg_catalog', 'information_schema')");
+
+ ntups = PQntuples(res);
+ i_nspname = PQfnumber(res, "nspname");
+ i_relname = PQfnumber(res, "relname");
+ i_attname = PQfnumber(res, "attname");
+ for (rowno = 0; rowno < ntups; rowno++)
+ {
+ found = true;
+ if (script == NULL && (script = fopen_priv(output_path, "w")) == NULL)
+ pg_fatal("could not open file \"%s\": %s\n",
+ output_path, strerror(errno));
+ if (!db_used)
+ {
+ fprintf(script, "Database: %s\n", active_db->db_name);
+ db_used = true;
+ }
+ fprintf(script, " %s.%s.%s\n",
+ PQgetvalue(res, rowno, i_nspname),
+ PQgetvalue(res, rowno, i_relname),
+ PQgetvalue(res, rowno, i_attname));
+ }
+
+ PQclear(res);
+
+ PQfinish(conn);
+ }
+
+ if (script)
+ fclose(script);
+
+ if (found)
+ {
+ pg_log(PG_REPORT, "fatal");
+ pg_fatal("Your installation contains the \"xid\" data type in user tables.\n"
+ "The internal format of \"xid\" changed in Postgres Pro Enterprise so this cluster\n"
+ "cannot currently be upgraded. Note that even dropped attributes cause a problem.\n"
+ "You can remove the problem tables and restart the upgrade.\n"
+ "A list of the problem columns is in the file:\n"
+ " %s", output_path);
+ }
+ else
+ check_ok();
+}
+
+/*
+ * is_xid_wraparound()
+ *
+ * Return true if 32-xid cluster had wraparound.
+ */
+static bool
+is_xid_wraparound(ClusterInfo *cluster)
+{
+ PGconn *conn;
+ PGresult *res;
+ bool is_wraparound;
+
+ conn = connectToServer(cluster, "template1");
+
+ /*
+ * txid_current is extended with an "epoch" counter, so to check
+ * wraparound in old 32-xid cluster we cut epoch by casting to int4.
+ */
+ res = executeQueryOrDie(conn,
+ "SELECT 1 "
+ "FROM pg_catalog.pg_database, txid_current() tx "
+ "WHERE (tx %% 4294967295)::bigint <= datfrozenxid::text::bigint "
+ "LIMIT 1");
+ is_wraparound = PQntuples(res) ? true : false;
+ PQclear(res);
+ PQfinish(conn);
+
+ return is_wraparound;
+}
diff --git a/src/bin/pg_upgrade/controldata.c b/src/bin/pg_upgrade/controldata.c
index 8ef6204137..eb6b8a1027 100644
--- a/src/bin/pg_upgrade/controldata.c
+++ b/src/bin/pg_upgrade/controldata.c
@@ -288,6 +288,8 @@ get_control_data(ClusterInfo *cluster, bool live_check)
xid.value = strtou64(p, NULL, 10);
/*
+ * Try to read 32-bit XID format 'epoch:xid'.
+ *
* Delimiter changed from '/' to ':' in 9.6. We don't test for
* the catalog version of the change because the catalog version
* is pulled from pg_controldata too, and it isn't worth adding an
@@ -303,8 +305,7 @@ get_control_data(ClusterInfo *cluster, bool live_check)
if (p == NULL)
{
/* FullTransactionId representation */
- cluster->controldata.chkpnt_nxtxid = XidFromFullTransactionId(xid);
- cluster->controldata.chkpnt_nxtepoch = EpochFromFullTransactionId(xid);
+ cluster->controldata.chkpnt_nxtxid = xid.value;
}
else
{
@@ -313,8 +314,8 @@ get_control_data(ClusterInfo *cluster, bool live_check)
/* Epoch:Xid representation */
p++; /* remove '/' or ':' char */
- cluster->controldata.chkpnt_nxtxid = str2uint(p);
- cluster->controldata.chkpnt_nxtepoch = (TransactionId) XidFromFullTransactionId(xid);
+ cluster->controldata.chkpnt_nxtxid = (XidFromFullTransactionId(xid)) << 32 |
+ (TransactionId) str2uint(p);
}
got_xid = true;
@@ -338,7 +339,7 @@ get_control_data(ClusterInfo *cluster, bool live_check)
pg_fatal("%d: controldata retrieval problem", __LINE__);
p++; /* remove ':' char */
- cluster->controldata.chkpnt_nxtmulti = str2uint(p);
+ cluster->controldata.chkpnt_nxtmulti = strtou64(p, NULL, 10);
got_multi = true;
}
else if ((p = strstr(bufin, "Latest checkpoint's oldestXID:")) != NULL)
@@ -349,7 +350,7 @@ get_control_data(ClusterInfo *cluster, bool live_check)
pg_fatal("%d: controldata retrieval problem", __LINE__);
p++; /* remove ':' char */
- cluster->controldata.chkpnt_oldstxid = str2uint(p);
+ cluster->controldata.chkpnt_oldstxid = strtou64(p, NULL, 10);
got_oldestxid = true;
}
else if ((p = strstr(bufin, "Latest checkpoint's oldestMultiXid:")) != NULL)
@@ -360,7 +361,7 @@ get_control_data(ClusterInfo *cluster, bool live_check)
pg_fatal("%d: controldata retrieval problem", __LINE__);
p++; /* remove ':' char */
- cluster->controldata.chkpnt_oldstMulti = str2uint(p);
+ cluster->controldata.chkpnt_oldstMulti = strtou64(p, NULL, 10);
got_oldestmulti = true;
}
else if ((p = strstr(bufin, "Latest checkpoint's NextMultiOffset:")) != NULL)
@@ -371,7 +372,7 @@ get_control_data(ClusterInfo *cluster, bool live_check)
pg_fatal("%d: controldata retrieval problem", __LINE__);
p++; /* remove ':' char */
- cluster->controldata.chkpnt_nxtmxoff = str2uint(p);
+ cluster->controldata.chkpnt_nxtmxoff = strtou64(p, NULL, 10);
got_mxoff = true;
}
else if ((p = strstr(bufin, "First log segment after reset:")) != NULL)
diff --git a/src/bin/pg_upgrade/file.c b/src/bin/pg_upgrade/file.c
index d173602882..ae00f4674f 100644
--- a/src/bin/pg_upgrade/file.c
+++ b/src/bin/pg_upgrade/file.c
@@ -174,7 +174,8 @@ linkFile(const char *src, const char *dst,
*/
void
rewriteVisibilityMap(const char *fromfile, const char *tofile,
- const char *schemaName, const char *relName)
+ const char *schemaName, const char *relName,
+ bool update_version)
{
int src_fd;
int dst_fd;
@@ -290,6 +291,11 @@ rewriteVisibilityMap(const char *fromfile, const char *tofile,
if (old_lastpart && empty)
break;
+ if (update_version)
+ PageSetPageSizeAndVersion((Page) new_vmbuf.data,
+ PageGetPageSize((Page) new_vmbuf.data),
+ PG_PAGE_LAYOUT_VERSION);
+
/* Set new checksum for visibility map page, if enabled */
if (new_cluster.controldata.data_checksum_version != 0)
((PageHeader) new_vmbuf.data)->pd_checksum =
@@ -316,6 +322,97 @@ rewriteVisibilityMap(const char *fromfile, const char *tofile,
close(src_fd);
}
+/*
+ * updateSegmentVersion()
+ *
+ * Transform a segment file, copying from src to dst.
+ * schemaName/relName are relation's SQL name (used for error messages only).
+ *
+ * Read segment pages one by one and set version to PG_PAGE_LAYOUT_VERSION.
+ *
+ * Although FSM and MV formats does not change while switch to 64-bit XIDs, we
+ * must upgrade pages version in order to avoid lazy conversion on first read.
+ */
+void
+updateSegmentPagesVersion(const char *fromfile, const char *tofile,
+ const char *schemaName, const char *relName)
+{
+ int src_fd;
+ int dst_fd;
+ struct stat statbuf;
+ ssize_t src_filesize;
+ ssize_t totalBytesRead;
+ ssize_t bytesRead;
+ BlockNumber blkno;
+ PGAlignedBlock buf;
+
+ if ((src_fd = open(fromfile, O_RDONLY | PG_BINARY, 0)) < 0)
+ pg_fatal("error while copying relation \"%s.%s\": could not open file \"%s\": %s",
+ schemaName, relName, fromfile, strerror(errno));
+
+ if (fstat(src_fd, &statbuf) != 0)
+ pg_fatal("error while copying relation \"%s.%s\": could not stat file \"%s\": %s",
+ schemaName, relName, fromfile, strerror(errno));
+
+ if ((dst_fd = open(tofile, O_RDWR | O_CREAT | O_EXCL | PG_BINARY,
+ pg_file_create_mode)) < 0)
+ pg_fatal("error while copying relation \"%s.%s\": could not create file \"%s\": %s",
+ schemaName, relName, tofile, strerror(errno));
+
+ /* Save old file size */
+ src_filesize = statbuf.st_size;
+ totalBytesRead = 0;
+ blkno = 0;
+
+ while (totalBytesRead < src_filesize)
+ {
+ errno = 0;
+ if ((bytesRead = read(src_fd, buf.data, BLCKSZ)) != BLCKSZ)
+ {
+ if (bytesRead < 0)
+ pg_fatal("error while copying relation \"%s.%s\": could not read file \"%s\": %s",
+ schemaName, relName, fromfile, strerror(errno));
+ else
+ pg_fatal("error while copying relation \"%s.%s\": partial page found in file \"%s\"",
+ schemaName, relName, fromfile);
+ }
+
+ totalBytesRead += BLCKSZ;
+ PageSetPageSizeAndVersion((Page) buf.data,
+ PageGetPageSize((Page) buf.data),
+ PG_PAGE_LAYOUT_VERSION);
+
+ /* Set new checksum for page, if enabled */
+ if (new_cluster.controldata.data_checksum_version != 0)
+ ((PageHeader) buf.data)->pd_checksum =
+ pg_checksum_page(buf.data, blkno);
+
+ /*
+ * We dealing here only with FSM and VM pages.
+ */
+ if (((PageHeader) buf.data)->pd_lower != SizeOfPageHeaderData ||
+ ((PageHeader) buf.data)->pd_upper != BLCKSZ)
+ pg_fatal("error while copying relation \"%s.%s\": unknown page format found in file \"%s\"",
+ schemaName, relName, fromfile);
+
+ errno = 0;
+ if (write(dst_fd, buf.data, BLCKSZ) != BLCKSZ)
+ {
+ /* if write didn't set errno, assume problem is no disk space */
+ if (errno == 0)
+ errno = ENOSPC;
+ pg_fatal("error while copying relation \"%s.%s\": could not write file \"%s\": %s",
+ schemaName, relName, tofile, strerror(errno));
+ }
+
+ blkno++;
+ }
+
+ /* Clean up */
+ close(dst_fd);
+ close(src_fd);
+}
+
void
check_file_clone(void)
{
diff --git a/src/bin/pg_upgrade/meson.build b/src/bin/pg_upgrade/meson.build
index 3e8a08e062..12243fd4f3 100644
--- a/src/bin/pg_upgrade/meson.build
+++ b/src/bin/pg_upgrade/meson.build
@@ -12,6 +12,7 @@ pg_upgrade_sources = files(
'parallel.c',
'pg_upgrade.c',
'relfilenumber.c',
+ 'segresize.c',
'server.c',
'tablespace.c',
'util.c',
diff --git a/src/bin/pg_upgrade/pg_upgrade.c b/src/bin/pg_upgrade/pg_upgrade.c
index c4ab30c773..f36e769f0a 100644
--- a/src/bin/pg_upgrade/pg_upgrade.c
+++ b/src/bin/pg_upgrade/pg_upgrade.c
@@ -44,6 +44,9 @@
#include <langinfo.h>
#endif
+#include "access/multixact.h"
+#include "access/transam.h"
+#include "access/xlog_internal.h"
#include "catalog/pg_class_d.h"
#include "common/file_perm.h"
#include "common/logging.h"
@@ -54,7 +57,7 @@
static void set_locale_and_encoding(void);
static void prepare_new_cluster(void);
static void prepare_new_globals(void);
-static void create_new_objects(void);
+static void create_new_objects(bool is_wraparound);
static void copy_xact_xlog_xid(void);
static void set_frozenxids(bool minmxid_only);
static void make_outputdirs(char *pgdata);
@@ -82,6 +85,7 @@ main(int argc, char **argv)
{
char *deletion_script_file_name = NULL;
bool live_check = false;
+ bool is_wraparound = false;
/*
* pg_upgrade doesn't currently use common/logging.c, but initialize it
@@ -127,7 +131,7 @@ main(int argc, char **argv)
check_cluster_compatibility(live_check);
- check_and_dump_old_cluster(live_check);
+ check_and_dump_old_cluster(live_check, &is_wraparound);
/* -- NEW -- */
@@ -160,7 +164,7 @@ main(int argc, char **argv)
prepare_new_globals();
- create_new_objects();
+ create_new_objects(is_wraparound);
stop_postmaster(false);
@@ -510,7 +514,7 @@ prepare_new_globals(void)
static void
-create_new_objects(void)
+create_new_objects(bool is_wraparound)
{
int dbnum;
@@ -602,11 +606,23 @@ create_new_objects(void)
check_ok();
/*
- * We don't have minmxids for databases or relations in pre-9.3 clusters,
- * so set those after we have restored the schema.
+ * Refix datfrozenxid and datminmxid
*/
if (GET_MAJOR_VERSION(old_cluster.major_version) <= 902)
set_frozenxids(true);
+ else if (old_cluster.controldata.cat_ver < XID_FORMATCHANGE_CAT_VER &&
+ new_cluster.controldata.cat_ver >= XID_FORMATCHANGE_CAT_VER)
+ {
+ /*
+ * During upgrade from 32-bit to 64-bit xids save relfrozenxids if
+ * there was no wraparound in old cluster. Otherwise, reset them to
+ * FirstNormalTransactionId value.
+ */
+ if (is_wraparound)
+ set_frozenxids(false);
+ else
+ set_frozenxids(true);
+ }
/* update new_cluster info now that we have objects in the databases */
get_db_rel_and_slot_infos(&new_cluster, false);
@@ -660,14 +676,37 @@ copy_subdir_files(const char *old_subdir, const char *new_subdir)
static void
copy_xact_xlog_xid(void)
{
- /*
- * Copy old commit logs to new data dir. pg_clog has been renamed to
- * pg_xact in post-10 clusters.
- */
- copy_subdir_files(GET_MAJOR_VERSION(old_cluster.major_version) <= 906 ?
- "pg_clog" : "pg_xact",
- GET_MAJOR_VERSION(new_cluster.major_version) <= 906 ?
- "pg_clog" : "pg_xact");
+ TransactionId next_xid;
+
+#define GetClogDirName(cluster) \
+ GET_MAJOR_VERSION(cluster.major_version) <= 906 ? "pg_clog" : "pg_xact"
+
+ /* Set next xid to 2^32 if we're upgrading from 32 bit postgres */
+ if (old_cluster.controldata.cat_ver < XID_FORMATCHANGE_CAT_VER &&
+ new_cluster.controldata.cat_ver >= XID_FORMATCHANGE_CAT_VER)
+ next_xid = ((TransactionId) 1 << 32);
+ else
+ next_xid = old_cluster.controldata.chkpnt_nxtxid;
+
+ if (old_cluster.controldata.cat_ver < XID_FORMATCHANGE_CAT_VER &&
+ new_cluster.controldata.cat_ver >= XID_FORMATCHANGE_CAT_VER)
+ {
+ /* Convert commit logs and copy to the new data dir */
+ prep_status("Transforming commit log segments");
+ convert_xact(psprintf("%s/%s", old_cluster.pgdata, GetClogDirName(old_cluster)),
+ psprintf("%s/%s", new_cluster.pgdata, GetClogDirName(new_cluster)));
+ check_ok();
+ }
+ else
+ {
+ /*
+ * Copy old commit logs to new data dir. pg_clog has been renamed to
+ * pg_xact in post-10 clusters.
+ */
+ prep_status("Copying commit log segments");
+ copy_subdir_files(GetClogDirName(old_cluster), GetClogDirName(new_cluster));
+ check_ok();
+ }
prep_status("Setting oldest XID for new cluster");
exec_prog(UTILITY_LOG_FILE, NULL, true, true,
@@ -681,19 +720,20 @@ copy_xact_xlog_xid(void)
prep_status("Setting next transaction ID and epoch for new cluster");
exec_prog(UTILITY_LOG_FILE, NULL, true, true,
"\"%s/pg_resetwal\" -f -x %llu \"%s\"",
- new_cluster.bindir,
- (unsigned long long) old_cluster.controldata.chkpnt_nxtxid,
+ new_cluster.bindir, (unsigned long long) next_xid,
new_cluster.pgdata);
+#ifdef NOT_USED
exec_prog(UTILITY_LOG_FILE, NULL, true, true,
"\"%s/pg_resetwal\" -f -e %u \"%s\"",
new_cluster.bindir, old_cluster.controldata.chkpnt_nxtepoch,
new_cluster.pgdata);
+#endif
/* must reset commit timestamp limits also */
exec_prog(UTILITY_LOG_FILE, NULL, true, true,
"\"%s/pg_resetwal\" -f -c %llu,%llu \"%s\"",
new_cluster.bindir,
- (unsigned long long) old_cluster.controldata.chkpnt_nxtxid,
- (unsigned long long) old_cluster.controldata.chkpnt_nxtxid,
+ (unsigned long long) next_xid,
+ (unsigned long long) next_xid,
new_cluster.pgdata);
check_ok();
@@ -706,8 +746,48 @@ copy_xact_xlog_xid(void)
if (old_cluster.controldata.cat_ver >= MULTIXACT_FORMATCHANGE_CAT_VER &&
new_cluster.controldata.cat_ver >= MULTIXACT_FORMATCHANGE_CAT_VER)
{
- copy_subdir_files("pg_multixact/offsets", "pg_multixact/offsets");
- copy_subdir_files("pg_multixact/members", "pg_multixact/members");
+ uint64 oldest_mxid = old_cluster.controldata.chkpnt_oldstMulti;
+ uint64 next_mxid = old_cluster.controldata.chkpnt_nxtmulti;
+ uint64 next_mxoff = old_cluster.controldata.chkpnt_nxtmxoff;
+
+ if (old_cluster.controldata.cat_ver >= XID_FORMATCHANGE_CAT_VER)
+ {
+ copy_subdir_files("pg_multixact/offsets", "pg_multixact/offsets");
+ copy_subdir_files("pg_multixact/members", "pg_multixact/members");
+ }
+ else
+ {
+ MultiXactOffset oldest_mxoff;
+
+ remove_new_subdir("pg_multixact/offsets", false);
+ oldest_mxoff = convert_multixact_offsets("pg_multixact/offsets", "pg_multixact/offsets");
+
+ remove_new_subdir("pg_multixact/members", false);
+ convert_multixact_members("pg_multixact/members", "pg_multixact/members", oldest_mxoff);
+
+ /*
+ * Handle wraparound if we're upgrading from 32 bit postgres.
+ * Invalid 0 mxids/offsets are skipped, so 1 becomes 2^32.
+ */
+ if (oldest_mxoff)
+ {
+ if (next_mxid < oldest_mxid)
+ next_mxid += ((MultiXactId) 1 << 32) - FirstMultiXactId;
+
+ if (next_mxoff < oldest_mxoff)
+ next_mxoff += ((MultiXactOffset) 1 << 32) - 1;
+
+ /* Offsets and members were rewritten, oldest_mxoff = 1 */
+ next_mxoff -= oldest_mxoff - 1;
+ oldest_mxoff = 1;
+
+ /*
+ * Save converted next_mxid for possible usage in
+ * set_frozenxids()
+ */
+ old_cluster.controldata.chkpnt_nxtmulti = next_mxid;
+ }
+ }
prep_status("Setting next multixact ID and offset for new cluster");
@@ -718,9 +798,9 @@ copy_xact_xlog_xid(void)
exec_prog(UTILITY_LOG_FILE, NULL, true, true,
"\"%s/pg_resetwal\" -O %llu -m %llu,%llu \"%s\"",
new_cluster.bindir,
- (unsigned long long) old_cluster.controldata.chkpnt_nxtmxoff,
- (unsigned long long) old_cluster.controldata.chkpnt_nxtmulti,
- (unsigned long long) old_cluster.controldata.chkpnt_oldstMulti,
+ (unsigned long long) next_mxoff,
+ (unsigned long long) next_mxid,
+ (unsigned long long) oldest_mxid,
new_cluster.pgdata);
check_ok();
}
@@ -794,6 +874,8 @@ set_frozenxids(bool minmxid_only)
int ntups;
int i_datname;
int i_datallowconn;
+ TransactionId frozen_xid;
+ MultiXactId minmxid;
if (!minmxid_only)
prep_status("Setting frozenxid and minmxid counters in new cluster");
@@ -802,18 +884,26 @@ set_frozenxids(bool minmxid_only)
conn_template1 = connectToServer(&new_cluster, "template1");
+ if (old_cluster.controldata.cat_ver < XID_FORMATCHANGE_CAT_VER &&
+ new_cluster.controldata.cat_ver >= XID_FORMATCHANGE_CAT_VER)
+ frozen_xid = FirstNormalTransactionId;
+ else
+ frozen_xid = old_cluster.controldata.chkpnt_nxtxid;
+
+ minmxid = old_cluster.controldata.chkpnt_nxtmulti;
+
if (!minmxid_only)
/* set pg_database.datfrozenxid */
PQclear(executeQueryOrDie(conn_template1,
"UPDATE pg_catalog.pg_database "
"SET datfrozenxid = '%llu'",
- (unsigned long long) old_cluster.controldata.chkpnt_nxtxid));
+ (unsigned long long) frozen_xid));
/* set pg_database.datminmxid */
PQclear(executeQueryOrDie(conn_template1,
"UPDATE pg_catalog.pg_database "
"SET datminmxid = '%llu'",
- (unsigned long long) old_cluster.controldata.chkpnt_nxtmulti));
+ (unsigned long long) minmxid));
/* get database names */
dbres = executeQueryOrDie(conn_template1,
@@ -853,7 +943,7 @@ set_frozenxids(bool minmxid_only)
CppAsString2(RELKIND_RELATION) ", "
CppAsString2(RELKIND_MATVIEW) ", "
CppAsString2(RELKIND_TOASTVALUE) ")",
- (unsigned long long) old_cluster.controldata.chkpnt_nxtxid));
+ (unsigned long long) frozen_xid));
/* set pg_class.relminmxid */
PQclear(executeQueryOrDie(conn,
@@ -864,7 +954,7 @@ set_frozenxids(bool minmxid_only)
CppAsString2(RELKIND_RELATION) ", "
CppAsString2(RELKIND_MATVIEW) ", "
CppAsString2(RELKIND_TOASTVALUE) ")",
- (unsigned long long) old_cluster.controldata.chkpnt_nxtmulti));
+ (unsigned long long) minmxid));
PQfinish(conn);
/* Reset datallowconn flag */
diff --git a/src/bin/pg_upgrade/pg_upgrade.h b/src/bin/pg_upgrade/pg_upgrade.h
index a710f325de..ad09f67710 100644
--- a/src/bin/pg_upgrade/pg_upgrade.h
+++ b/src/bin/pg_upgrade/pg_upgrade.h
@@ -115,6 +115,11 @@ extern char *output_files[];
*/
#define MULTIXACT_FORMATCHANGE_CAT_VER 201301231
+/*
+ * xid format changed from 32-bit to 64-bit.
+ */
+#define XID_FORMATCHANGE_CAT_VER 999999999
+
/*
* large object chunk size added to pg_controldata,
* commit 5f93c37805e7485488480916b4585e098d3cc883
@@ -225,13 +230,13 @@ typedef struct
uint32 ctrl_ver;
uint32 cat_ver;
char nextxlogfile[25];
- uint32 chkpnt_nxtxid;
- uint32 chkpnt_nxtepoch;
+ uint64 chkpnt_nxtxid;
+ uint32 chkpnt_nxtepoch; /* for 32bit xids only */
uint32 chkpnt_nxtoid;
- uint32 chkpnt_nxtmulti;
- uint32 chkpnt_nxtmxoff;
- uint32 chkpnt_oldstMulti;
- uint32 chkpnt_oldstxid;
+ uint64 chkpnt_nxtmulti;
+ uint64 chkpnt_nxtmxoff;
+ uint64 chkpnt_oldstMulti;
+ uint64 chkpnt_oldstxid;
uint32 align;
uint32 blocksz;
uint32 largesz;
@@ -361,7 +366,7 @@ extern OSInfo os_info;
/* check.c */
void output_check_banner(bool live_check);
-void check_and_dump_old_cluster(bool live_check);
+void check_and_dump_old_cluster(bool live_check, bool *is_wraparound);
void check_new_cluster(void);
void report_clusters_compatible(void);
void issue_warnings_and_set_wal_level(void);
@@ -402,7 +407,10 @@ void copyFile(const char *src, const char *dst,
void linkFile(const char *src, const char *dst,
const char *schemaName, const char *relName);
void rewriteVisibilityMap(const char *fromfile, const char *tofile,
- const char *schemaName, const char *relName);
+ const char *schemaName, const char *relName,
+ bool update_version);
+void updateSegmentPagesVersion(const char *fromfile, const char *tofile,
+ const char *schemaName, const char *relName);
void check_file_clone(void);
void check_hard_link(void);
@@ -485,6 +493,10 @@ void old_9_6_invalidate_hash_indexes(ClusterInfo *cluster,
void old_11_check_for_sql_identifier_data_type_usage(ClusterInfo *cluster);
void report_extension_updates(ClusterInfo *cluster);
+void invalidate_spgist_indexes(ClusterInfo *cluster, bool check_mode);
+void invalidate_gin_indexes(ClusterInfo *cluster, bool check_mode);
+void invalidate_external_indexes(ClusterInfo *cluster, bool check_mode);
+
/* parallel.c */
void parallel_exec_prog(const char *log_file, const char *opt_log_file,
const char *fmt,...) pg_attribute_printf(3, 4);
@@ -492,3 +504,9 @@ void parallel_transfer_all_new_dbs(DbInfoArr *old_db_arr, DbInfoArr *new_db_arr
char *old_pgdata, char *new_pgdata,
char *old_tablespace);
bool reap_child(bool wait_for_child);
+
+/* segresize.c */
+void convert_xact(const char *olddir, const char *newdir);
+MultiXactOffset convert_multixact_offsets(const char *olddir, const char *newdir);
+void convert_multixact_members(const char *olddir, const char *newdir,
+ MultiXactOffset oldest_mxoff);
diff --git a/src/bin/pg_upgrade/relfilenumber.c b/src/bin/pg_upgrade/relfilenumber.c
index 34bc9c1504..a0e10047ed 100644
--- a/src/bin/pg_upgrade/relfilenumber.c
+++ b/src/bin/pg_upgrade/relfilenumber.c
@@ -16,7 +16,8 @@
#include "pg_upgrade.h"
static void transfer_single_new_db(FileNameMap *maps, int size, char *old_tablespace);
-static void transfer_relfile(FileNameMap *map, const char *type_suffix, bool vm_must_add_frozenbit);
+static void transfer_relfile(FileNameMap *map, const char *type_suffix,
+ bool vm_must_add_frozenbit, bool update_version);
/*
@@ -136,6 +137,7 @@ transfer_single_new_db(FileNameMap *maps, int size, char *old_tablespace)
{
int mapnum;
bool vm_must_add_frozenbit = false;
+ bool update_version = false;
/*
* Do we need to rewrite visibilitymap?
@@ -144,19 +146,28 @@ transfer_single_new_db(FileNameMap *maps, int size, char *old_tablespace)
new_cluster.controldata.cat_ver >= VISIBILITY_MAP_FROZEN_BIT_CAT_VER)
vm_must_add_frozenbit = true;
+ /*
+ * Need to update FSM and VM pages version to avoid lazy conversion.
+ */
+ if (old_cluster.controldata.cat_ver < new_cluster.controldata.cat_ver)
+ update_version = true;
+
for (mapnum = 0; mapnum < size; mapnum++)
{
if (old_tablespace == NULL ||
strcmp(maps[mapnum].old_tablespace, old_tablespace) == 0)
{
/* transfer primary file */
- transfer_relfile(&maps[mapnum], "", vm_must_add_frozenbit);
+ transfer_relfile(&maps[mapnum], "", vm_must_add_frozenbit,
+ update_version);
/*
* Copy/link any fsm and vm files, if they exist
*/
- transfer_relfile(&maps[mapnum], "_fsm", vm_must_add_frozenbit);
- transfer_relfile(&maps[mapnum], "_vm", vm_must_add_frozenbit);
+ transfer_relfile(&maps[mapnum], "_fsm", vm_must_add_frozenbit,
+ update_version);
+ transfer_relfile(&maps[mapnum], "_vm", vm_must_add_frozenbit,
+ update_version);
}
}
}
@@ -170,7 +181,8 @@ transfer_single_new_db(FileNameMap *maps, int size, char *old_tablespace)
* mode.
*/
static void
-transfer_relfile(FileNameMap *map, const char *type_suffix, bool vm_must_add_frozenbit)
+transfer_relfile(FileNameMap *map, const char *type_suffix,
+ bool vm_must_add_frozenbit, bool update_version)
{
char old_file[MAXPGPATH];
char new_file[MAXPGPATH];
@@ -235,7 +247,17 @@ transfer_relfile(FileNameMap *map, const char *type_suffix, bool vm_must_add_fro
/* Need to rewrite visibility map format */
pg_log(PG_VERBOSE, "rewriting \"%s\" to \"%s\"",
old_file, new_file);
- rewriteVisibilityMap(old_file, new_file, map->nspname, map->relname);
+ rewriteVisibilityMap(old_file, new_file, map->nspname,
+ map->relname, update_version);
+ }
+ else if ((update_version && strcmp(type_suffix, "_vm") == 0) ||
+ (update_version && strcmp(type_suffix, "_fsm") == 0))
+ {
+ /* Need to update pages version */
+ pg_log(PG_VERBOSE, "rewriting \"%s\" to \"%s\"",
+ old_file, new_file);
+ updateSegmentPagesVersion(old_file, new_file, map->nspname,
+ map->relname);
}
else
switch (user_opts.transfer_mode)
diff --git a/src/bin/pg_upgrade/segresize.c b/src/bin/pg_upgrade/segresize.c
new file mode 100644
index 0000000000..1b14637c81
--- /dev/null
+++ b/src/bin/pg_upgrade/segresize.c
@@ -0,0 +1,586 @@
+/*-------------------------------------------------------------------------
+ *
+ * segresize.c
+ * SLRU segment resize utility from 32bit to 64bit xid format
+ *
+ * Copyright (c) 2015-2022, Postgres Professional
+ *
+ * IDENTIFICATION
+ * src/bin/pg_upgrade/segresize.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres_fe.h"
+
+#include "pg_upgrade.h"
+#include "access/multixact.h"
+#include "access/transam.h"
+
+#define SLRU_PAGES_PER_SEGMENT_OLD 32
+#define SLRU_PAGES_PER_SEGMENT 32 /* Should be equal to value from slru.h */
+
+#define CLOG_BITS_PER_XACT 2
+#define CLOG_XACTS_PER_BYTE 4
+#define CLOG_XACTS_PER_PAGE (BLCKSZ * CLOG_XACTS_PER_BYTE)
+
+typedef uint32 MultiXactId32;
+typedef uint32 MultiXactOffset32;
+typedef uint32 TransactionId32;
+
+#define MaxTransactionId32 ((TransactionId32) 0xFFFFFFFF)
+#define MaxMultiXactId32 ((MultiXactId32) 0xFFFFFFFF)
+#define MaxMultiXactOffset32 ((MultiXactOffset32) 0xFFFFFFFF)
+
+#define MULTIXACT_OFFSETS_PER_PAGE_OLD (BLCKSZ / sizeof(MultiXactOffset32))
+#define MULTIXACT_OFFSETS_PER_PAGE (BLCKSZ / sizeof(MultiXactOffset))
+
+#define MXACT_MEMBER_FLAGS_PER_BYTE 1
+
+/* 64xid */
+#define MULTIXACT_FLAGBYTES_PER_GROUP 8
+#define MULTIXACT_MEMBERS_PER_MEMBERGROUP \
+ (MULTIXACT_FLAGBYTES_PER_GROUP * MXACT_MEMBER_FLAGS_PER_BYTE)
+/* size in bytes of a complete group */
+#define MULTIXACT_MEMBERGROUP_SIZE \
+ (sizeof(TransactionId) * MULTIXACT_MEMBERS_PER_MEMBERGROUP + MULTIXACT_FLAGBYTES_PER_GROUP)
+#define MULTIXACT_MEMBERGROUPS_PER_PAGE (BLCKSZ / MULTIXACT_MEMBERGROUP_SIZE)
+
+/* 32xid */
+#define MULTIXACT_FLAGBYTES_PER_GROUP_OLD 4
+#define MULTIXACT_MEMBERS_PER_MEMBERGROUP_OLD \
+ (MULTIXACT_FLAGBYTES_PER_GROUP_OLD * MXACT_MEMBER_FLAGS_PER_BYTE)
+/* size in bytes of a complete group */
+#define MULTIXACT_MEMBERGROUP_SIZE_OLD \
+ (sizeof(TransactionId32) * MULTIXACT_MEMBERS_PER_MEMBERGROUP_OLD + MULTIXACT_FLAGBYTES_PER_GROUP_OLD)
+#define MULTIXACT_MEMBERGROUPS_PER_PAGE_OLD (BLCKSZ / MULTIXACT_MEMBERGROUP_SIZE_OLD)
+#define MULTIXACT_MEMBERS_PER_PAGE_OLD \
+ (MULTIXACT_MEMBERGROUPS_PER_PAGE_OLD * MULTIXACT_MEMBERS_PER_MEMBERGROUP_OLD)
+
+typedef struct SLRUSegmentState
+{
+ const char *dir;
+ FILE *file;
+ int64 segno;
+ int64 pageno;
+ bool is_empty_segment;
+} SLRUSegmentState;
+
+static char *
+slru_filename_old(const char *path, int64 segno)
+{
+ Assert(segno <= PG_INT32_MAX);
+ return psprintf("%s/%04X", path, (int) segno);
+}
+
+static char *
+slru_filename_new(const char *path, int64 segno)
+{
+ return psprintf("%s/%012llX", path, (long long) segno);
+}
+
+static inline FILE *
+open_file(SLRUSegmentState *state,
+ char * (filename_fn)(const char *path, int64 segno),
+ char *mode, char *fatal_msg)
+{
+ char *filename = filename_fn(state->dir, state->segno);
+ FILE *fd = fopen(filename, mode);
+
+ if (!fd)
+ pg_fatal(fatal_msg, filename);
+
+ pfree(filename);
+
+ return fd;
+}
+
+static void
+close_file(SLRUSegmentState *state,
+ char * (filename_fn)(const char *path, int64 segno))
+{
+ if (state->file != NULL)
+ {
+ if (fclose(state->file) != 0)
+ pg_fatal("could not close file \"%s\": %m",
+ filename_fn(state->dir, state->segno));
+ state->file = NULL;
+ }
+}
+
+static inline int
+read_file(SLRUSegmentState *state, void *buf)
+{
+ size_t n = fread(buf, sizeof(char), BLCKSZ, state->file);
+
+ if (n != 0)
+ return n;
+
+ if (ferror(state->file))
+ pg_fatal("could not read file \"%s\": %m",
+ slru_filename_old(state->dir, state->segno));
+
+ if (!feof(state->file))
+ pg_fatal("unknown file read state \"%s\": %m",
+ slru_filename_old(state->dir, state->segno));
+
+ close_file(state, slru_filename_old);
+
+ return 0;
+}
+
+static int
+read_old_segment_page(SLRUSegmentState *state, void *buf, bool *is_empty)
+{
+ int n;
+
+ /* Open next segment file, if needed */
+ if (!state->file)
+ {
+ state->file = open_file(state, slru_filename_old, "rb",
+ "could not open source file \"%s\": %m");
+
+ /* Set position to the needed page */
+ if (fseek(state->file, state->pageno * BLCKSZ, SEEK_SET))
+ close_file(state, slru_filename_old);
+
+ /*
+ * Skip segment conversion if segment file doesn't exist.
+ * First segment file should exist in any case.
+ */
+ if (state->segno != 0)
+ state->is_empty_segment = true;
+ }
+
+ if (state->file)
+ {
+ /* Segment file does exist, read page from it */
+ state->is_empty_segment = false;
+
+ /* Try to read BLCKSZ bytes */
+ n = read_file(state, buf);
+ *is_empty = (n == 0);
+
+ /* Zeroing buf tail if needed */
+ if (n)
+ memset((char *) buf + n, 0, BLCKSZ - n);
+ }
+ else
+ {
+ n = state->is_empty_segment ?
+ BLCKSZ : /* Skip empty block at the end of segment */
+ 0; /* We reached the last segment */
+ *is_empty = true;
+
+ if (n)
+ memset((char *) buf, 0, BLCKSZ);
+ }
+
+ state->pageno++;
+
+ if (state->pageno >= SLRU_PAGES_PER_SEGMENT_OLD)
+ {
+ /* Start new segment */
+ state->segno++;
+ state->pageno = 0;
+ close_file(state, slru_filename_old);
+ }
+
+ return n;
+}
+
+static void
+write_new_segment_page(SLRUSegmentState *state, void *buf, bool is_empty)
+{
+ /*
+ * Create a new segment file if we still didn't. Creation is postponed
+ * until the first non-empty page is found. This helps not to create
+ * completely empty segments.
+ */
+ if (!state->file && !is_empty)
+ {
+ state->file = open_file(state, slru_filename_new, "wb",
+ "could not open target file \"%s\": %m");
+
+ /* Write zeroes to the previously skipped prefix */
+ if (state->pageno > 0)
+ {
+ char zerobuf[BLCKSZ] = {0};
+
+ for (int64 i = 0; i < state->pageno; i++)
+ {
+ if (fwrite(zerobuf, sizeof(char), BLCKSZ, state->file) != BLCKSZ)
+ pg_fatal("could not write file \"%s\": %m",
+ slru_filename_new(state->dir, state->segno));
+ }
+ }
+
+ }
+
+ /* Write page to the new segment (if it was created) */
+ if (state->file)
+ {
+ if (fwrite(buf, sizeof(char), BLCKSZ, state->file) != BLCKSZ)
+ pg_fatal("could not write file \"%s\": %m",
+ slru_filename_new(state->dir, state->segno));
+ }
+
+ state->pageno++;
+
+ /*
+ * Did we reach the maximum page number? Then close segment file and
+ * create a new one on the next iteration
+ */
+ if (state->pageno >= SLRU_PAGES_PER_SEGMENT)
+ {
+ state->segno++;
+ state->pageno = 0;
+ close_file(state, slru_filename_new);
+ }
+}
+
+/*
+ * Convert pg_xact segments.
+ */
+void
+convert_xact(const char *old_subdir, const char *new_subdir)
+{
+ SLRUSegmentState oldseg = {0};
+ SLRUSegmentState newseg = {0};
+ TransactionId oldest_xid = old_cluster.controldata.chkpnt_oldstxid;
+ TransactionId next_xid = old_cluster.controldata.chkpnt_nxtxid;
+ TransactionId xid;
+ int64 pageno;
+ char buf[BLCKSZ] = {0};
+
+ oldseg.dir = old_subdir;
+ newseg.dir = new_subdir;
+
+ pageno = oldest_xid / CLOG_XACTS_PER_PAGE;
+
+ oldseg.segno = pageno / SLRU_PAGES_PER_SEGMENT_OLD;
+ oldseg.pageno = pageno % SLRU_PAGES_PER_SEGMENT_OLD;
+
+ newseg.segno = pageno / SLRU_PAGES_PER_SEGMENT;
+ newseg.pageno = pageno % SLRU_PAGES_PER_SEGMENT;
+
+ if (next_xid < oldest_xid)
+ next_xid += (TransactionId) 1 << 32; /* wraparound */
+
+ /* Copy xid flags reading only needed segment pages */
+ for (xid = oldest_xid & ~(CLOG_XACTS_PER_PAGE - 1);
+ xid <= ((next_xid - 1) & ~(CLOG_XACTS_PER_PAGE - 1));
+ xid += CLOG_XACTS_PER_PAGE)
+ {
+ bool is_empty;
+
+ /* Handle possible segment wraparound */
+ if (oldseg.segno > MaxTransactionId32 / CLOG_XACTS_PER_PAGE / SLRU_PAGES_PER_SEGMENT_OLD)
+ {
+ pageno = (MaxTransactionId32 + 1) / CLOG_XACTS_PER_PAGE;
+
+ Assert(oldseg.segno == pageno / SLRU_PAGES_PER_SEGMENT_OLD);
+ Assert(!oldseg.pageno);
+ Assert(!oldseg.file);
+ oldseg.segno = 0;
+
+ Assert(newseg.segno == pageno / SLRU_PAGES_PER_SEGMENT);
+ Assert(!newseg.pageno);
+ Assert(!newseg.file);
+ newseg.segno = 0;
+ }
+
+ read_old_segment_page(&oldseg, buf, &is_empty);
+ write_new_segment_page(&newseg, buf, is_empty);
+ }
+
+ /* Release resources */
+ close_file(&oldseg, slru_filename_old);
+ close_file(&newseg, slru_filename_new);
+}
+
+static inline SLRUSegmentState
+create_slru_segment_state(MultiXactId mxid,
+ int offsets_per_page,
+ int pages_per_segment,
+ char *dir)
+{
+ SLRUSegmentState seg = {0};
+ int64 n;
+
+ n = mxid / offsets_per_page;
+ seg.pageno = n % pages_per_segment;
+ seg.segno = n / pages_per_segment;
+ seg.dir = dir;
+
+ return seg;
+}
+
+/*
+ * Convert pg_multixact/offsets segments and return oldest mxid offset.
+ */
+MultiXactOffset
+convert_multixact_offsets(const char *old_subdir, const char *new_subdir)
+{
+ SLRUSegmentState oldseg,
+ newseg;
+ MultiXactOffset32 oldbuf[MULTIXACT_OFFSETS_PER_PAGE_OLD] = {0};
+ MultiXactOffset newbuf[MULTIXACT_OFFSETS_PER_PAGE] = {0};
+ MultiXactOffset32 oldest_mxoff = 0;
+ MultiXactId oldest_mxid,
+ next_mxid,
+ mxid;
+ uint64 old_entry,
+ new_entry;
+ bool oldest_mxoff_known = false;
+
+ StaticAssertStmt((sizeof(oldbuf) == BLCKSZ && sizeof(newbuf) == BLCKSZ),
+ "buf should be BLCKSZ");
+
+ oldest_mxid = old_cluster.controldata.chkpnt_oldstMulti;
+
+ oldseg = create_slru_segment_state(oldest_mxid,
+ MULTIXACT_OFFSETS_PER_PAGE_OLD,
+ SLRU_PAGES_PER_SEGMENT_OLD,
+ psprintf("%s/%s", old_cluster.pgdata,
+ old_subdir));
+
+ newseg = create_slru_segment_state(oldest_mxid,
+ MULTIXACT_OFFSETS_PER_PAGE,
+ SLRU_PAGES_PER_SEGMENT,
+ psprintf("%s/%s", new_cluster.pgdata,
+ new_subdir));
+
+ old_entry = oldest_mxid % MULTIXACT_OFFSETS_PER_PAGE_OLD;
+ new_entry = oldest_mxid % MULTIXACT_OFFSETS_PER_PAGE;
+
+ next_mxid = old_cluster.controldata.chkpnt_nxtmulti;
+ if (next_mxid < oldest_mxid)
+ next_mxid += (MultiXactId) 1 << 32; /* wraparound */
+
+ prep_status("Converting old %s to new format", old_subdir);
+
+ /* Copy mxid offsets reading only needed segment pages */
+ for (mxid = oldest_mxid; mxid < next_mxid; old_entry = 0)
+ {
+ int oldlen;
+ bool is_empty;
+
+ /* Handle possible segment wraparound */
+ if (oldseg.segno > MaxMultiXactId32 / MULTIXACT_OFFSETS_PER_PAGE_OLD / SLRU_PAGES_PER_SEGMENT_OLD) /* 0xFFFF */
+ oldseg.segno = 0;
+
+ oldlen = read_old_segment_page(&oldseg, oldbuf, &is_empty);
+
+ if (oldlen == 0 || is_empty)
+ pg_fatal("cannot read page %lld from segment: %s\n",
+ (long long) oldseg.pageno,
+ slru_filename_old(oldseg.dir, oldseg.segno));
+
+ /* Save oldest mxid offset */
+ if (!oldest_mxoff_known)
+ {
+ oldest_mxoff = oldbuf[old_entry];
+ oldest_mxoff_known = true;
+ }
+
+ /* Skip wrapped-around invalid MultiXactIds */
+ if (mxid == (MultiXactId) 1 << 32)
+ {
+ Assert(oldseg.segno == 0);
+ Assert(oldseg.pageno == 1);
+ Assert(old_entry == 0);
+ mxid += FirstMultiXactId;
+ old_entry = FirstMultiXactId;
+ }
+
+ /* Copy entries to the new page */
+ for (; mxid < next_mxid && old_entry < MULTIXACT_OFFSETS_PER_PAGE_OLD;
+ mxid++, old_entry++)
+ {
+ MultiXactOffset mxoff = oldbuf[old_entry];
+
+ /* Handle possible offset wraparound (1 becomes 2^32) */
+ if (mxoff < oldest_mxoff)
+ mxoff += ((MultiXactOffset) 1 << 32) - 1;
+
+ /* Subtract oldest_mxoff, so new offsets will start from 1 */
+ newbuf[new_entry++] = mxoff - oldest_mxoff + 1;
+
+ if (new_entry >= MULTIXACT_OFFSETS_PER_PAGE)
+ {
+ /* Write new page */
+ write_new_segment_page(&newseg, newbuf, false);
+ new_entry = 0;
+ }
+ }
+ }
+
+ /* Write the last incomplete page */
+ if (new_entry > 0 || oldest_mxid == next_mxid)
+ {
+ memset(&newbuf[new_entry], 0,
+ sizeof(newbuf[0]) * (MULTIXACT_OFFSETS_PER_PAGE - new_entry));
+ write_new_segment_page(&newseg, newbuf, false);
+ }
+
+ /* Use next_mxoff as oldest_mxoff, if oldest_mxid == next_mxid */
+ if (!oldest_mxoff_known)
+ {
+ Assert(oldest_mxid == next_mxid);
+ oldest_mxoff = (MultiXactOffset) old_cluster.controldata.chkpnt_nxtmxoff;
+ }
+
+ /* Release resources */
+ close_file(&oldseg, slru_filename_old);
+ close_file(&newseg, slru_filename_new);
+
+ pfree((char *) oldseg.dir);
+ pfree((char *) newseg.dir);
+
+ check_ok();
+
+ return oldest_mxoff;
+}
+
+/*
+ * Convert pg_multixact/members segments, offsets will start from 1.
+ */
+void
+convert_multixact_members(const char *old_subdir, const char *new_subdir,
+ MultiXactOffset oldest_mxoff)
+{
+ MultiXactOffset next_mxoff,
+ mxoff;
+ SLRUSegmentState oldseg,
+ newseg;
+ char oldbuf[BLCKSZ] = {0},
+ newbuf[BLCKSZ] = {0};
+ int newgroup,
+ newmember;
+ char *newflag = newbuf;
+ TransactionId *newxid;
+ int oldidx,
+ newidx;
+
+ prep_status("Converting old %s to new format", old_subdir);
+
+ next_mxoff = (MultiXactOffset) old_cluster.controldata.chkpnt_nxtmxoff;
+ if (next_mxoff < oldest_mxoff)
+ next_mxoff += (MultiXactOffset) 1 << 32;
+
+ newxid = (TransactionId *) (newflag + MXACT_MEMBER_FLAGS_PER_BYTE * MULTIXACT_MEMBERS_PER_MEMBERGROUP);
+
+ /* Initialize old starting position */
+ oldidx = oldest_mxoff % MULTIXACT_MEMBERS_PER_PAGE_OLD;
+ oldseg = create_slru_segment_state(oldest_mxoff,
+ MULTIXACT_MEMBERS_PER_PAGE_OLD,
+ SLRU_PAGES_PER_SEGMENT_OLD,
+ psprintf("%s/%s", old_cluster.pgdata,
+ old_subdir));
+
+ /* Initialize empty new segment */
+ newseg = create_slru_segment_state(0, 1, 1,
+ psprintf("%s/%s", new_cluster.pgdata,
+ new_subdir));
+
+ /* Initialize new starting position (skip invalid zero offset) */
+ newgroup = 0;
+ newidx = 1;
+ newmember = 1;
+ newflag++;
+ newxid++;
+
+ /* Iterate through the original directory */
+ for (mxoff = oldest_mxoff; mxoff < next_mxoff; oldidx = 0)
+ {
+ bool old_is_empty;
+ int oldlen;
+ int ngroups;
+ int oldgroup;
+ int oldmember;
+
+ oldlen = read_old_segment_page(&oldseg, oldbuf, &old_is_empty);
+
+ if (oldlen == 0 || old_is_empty)
+ pg_fatal("cannot read page %lld from segment: %s\n",
+ (long long) oldseg.pageno,
+ slru_filename_old(oldseg.dir, oldseg.segno));
+
+ ngroups = oldlen / MULTIXACT_MEMBERGROUP_SIZE_OLD;
+
+ /* Iterate through old member groups */
+ for (oldgroup = oldidx / MULTIXACT_MEMBERS_PER_MEMBERGROUP_OLD,
+ oldmember = oldidx % MULTIXACT_MEMBERS_PER_MEMBERGROUP_OLD;
+ oldgroup < ngroups && mxoff < next_mxoff;
+ oldgroup++, oldmember = 0)
+ {
+ char *oldflag = (char *) oldbuf + oldgroup * MULTIXACT_MEMBERGROUP_SIZE_OLD;
+ TransactionId32 *oldxid = (TransactionId32 *) (oldflag + MULTIXACT_FLAGBYTES_PER_GROUP_OLD);
+
+ oldxid += oldmember;
+ oldflag += oldmember;
+
+ /* Iterate through old members */
+ for (int i = oldidx % MULTIXACT_MEMBERS_PER_MEMBERGROUP_OLD;
+ i < MULTIXACT_MEMBERS_PER_MEMBERGROUP_OLD && mxoff < next_mxoff;
+ i++)
+ {
+ /* Copy member's xid and flags to the new page */
+ *newflag++ = *oldflag++;
+ *newxid++ = (TransactionId) * oldxid++;
+
+ newidx++;
+ oldidx++;
+ mxoff++;
+
+ if (++newmember >= MULTIXACT_MEMBERS_PER_MEMBERGROUP)
+ {
+ /* Start next member group */
+ newmember = 0;
+
+ if (++newgroup >= MULTIXACT_MEMBERGROUPS_PER_PAGE)
+ {
+ /* Write current page and start new */
+ newgroup = 0;
+ newidx = 0;
+ write_new_segment_page(&newseg, newbuf, false);
+ memset(newbuf, 0, BLCKSZ);
+ }
+
+ newflag = (char *) newbuf + newgroup * MULTIXACT_MEMBERGROUP_SIZE;
+ newxid = (TransactionId *) (newflag + MXACT_MEMBER_FLAGS_PER_BYTE * MULTIXACT_MEMBERS_PER_MEMBERGROUP);
+ }
+
+ /* Handle offset wraparound */
+ if (mxoff > MaxMultiXactOffset32)
+ {
+ Assert(mxoff == (MultiXactOffset) 1 << 32);
+ Assert(oldseg.segno == MaxMultiXactOffset32 / MULTIXACT_MEMBERS_PER_PAGE_OLD / SLRU_PAGES_PER_SEGMENT_OLD);
+ Assert(oldseg.pageno == MaxMultiXactOffset32 / MULTIXACT_MEMBERS_PER_PAGE_OLD % SLRU_PAGES_PER_SEGMENT_OLD);
+ Assert(oldmember == MaxMultiXactOffset32 % MULTIXACT_MEMBERS_PER_PAGE_OLD);
+
+ /* Switch to segment 0000 */
+ close_file(&oldseg, slru_filename_old);
+ oldseg.segno = 0;
+ oldseg.pageno = 0;
+
+ oldidx = 1; /* skip invalid zero mxid offset */
+ }
+ }
+ }
+ }
+
+ /* Write last page, unless it is empty */
+ if (newflag > (char *) newbuf || oldest_mxoff == next_mxoff)
+ write_new_segment_page(&newseg, newbuf, false);
+
+ /* Release resources */
+ close_file(&oldseg, slru_filename_old);
+ close_file(&newseg, slru_filename_new);
+
+ pfree((char *) oldseg.dir);
+ pfree((char *) newseg.dir);
+
+ check_ok();
+}
diff --git a/src/bin/pg_upgrade/t/002_pg_upgrade.pl b/src/bin/pg_upgrade/t/002_pg_upgrade.pl
index 997963082b..7a64c6d302 100644
--- a/src/bin/pg_upgrade/t/002_pg_upgrade.pl
+++ b/src/bin/pg_upgrade/t/002_pg_upgrade.pl
@@ -260,6 +260,14 @@ if (defined($ENV{oldinstall}))
}
}
+$oldnode->safe_psql('regression',
+ "CREATE TABLE t1 (id SERIAL NOT NULL PRIMARY KEY, plt text, pln NUMERIC(8, 4));
+ INSERT INTO t1 (plt, pln) SELECT md5(random()::text), random() * 9999 FROM generate_series(1, 1000);");
+my $relfrozenxid = $oldnode->safe_psql('regression',
+ "SELECT relfrozenxid FROM pg_class WHERE relname = 't1';");
+my $relminmxid = $oldnode->safe_psql('regression',
+ "SELECT relminmxid FROM pg_class WHERE relname = 't1';");
+
# Take a dump before performing the upgrade as a base comparison. Note
# that we need to use pg_dumpall from the new node here.
my @dump_command = (
@@ -399,6 +407,16 @@ ok( !-d $newnode->data_dir . "/pg_upgrade_output.d",
$newnode->start;
+my $relfrozenxid_new = $newnode->safe_psql('regression',
+ "SELECT relfrozenxid FROM pg_class WHERE relname = 't1';");
+
+is($relfrozenxid_new, $relfrozenxid, 'old and new relfrozenxid match after pg_upgrade');
+
+my $relminmxid_new = $newnode->safe_psql('regression',
+ "SELECT relminmxid FROM pg_class WHERE relname = 't1';");
+
+is($relminmxid_new, $relminmxid, 'old and new relminmxid match after pg_upgrade');
+
# Check if there are any logs coming from pg_upgrade, that would only be
# retained on failure.
my $log_path = $newnode->data_dir . "/pg_upgrade_output.d";
diff --git a/src/bin/pg_upgrade/version.c b/src/bin/pg_upgrade/version.c
index 403a6d7cfa..2bf9d5dac4 100644
--- a/src/bin/pg_upgrade/version.c
+++ b/src/bin/pg_upgrade/version.c
@@ -9,6 +9,7 @@
#include "postgres_fe.h"
+#include "access/transam.h"
#include "catalog/pg_class_d.h"
#include "fe_utils/string_utils.h"
#include "pg_upgrade.h"
@@ -242,19 +243,21 @@ old_9_6_check_for_unknown_data_type_usage(ClusterInfo *cluster)
}
/*
- * old_9_6_invalidate_hash_indexes()
- * 9.6 -> 10
- * Hash index binary format has changed from 9.6->10.0
+ * invalidate_indexes()
+ * Invalidates all indexes satisfying given predicate.
*/
-void
-old_9_6_invalidate_hash_indexes(ClusterInfo *cluster, bool check_mode)
+static void
+invalidate_indexes(ClusterInfo *cluster, bool check_mode,
+ const char *name, const char *pred)
{
int dbnum;
FILE *script = NULL;
bool found = false;
- char *output_path = "reindex_hash.sql";
+ char output_path[MAXPGPATH];
+
+ snprintf(output_path, sizeof(output_path), "reindex_%s.sql", name);
- prep_status("Checking for hash indexes");
+ prep_status("Checking for %s indexes", name);
for (dbnum = 0; dbnum < cluster->dbarr.ndbs; dbnum++)
{
@@ -267,9 +270,16 @@ old_9_6_invalidate_hash_indexes(ClusterInfo *cluster, bool check_mode)
DbInfo *active_db = &cluster->dbarr.dbs[dbnum];
PGconn *conn = connectToServer(cluster, active_db->db_name);
- /* find hash indexes */
- res = executeQueryOrDie(conn,
- "SELECT n.nspname, c.relname "
+
+ /*
+ * Find indexes satisfying predicate.
+ *
+ * System indexes (with oids < FirstNormalObjectId) are excluded from
+ * the search as they are recreated in the new cluster during initdb.
+ */
+ res = executeQueryOrDie(
+ conn,
+ "SELECT n.nspname, c.relname, i.indexrelid "
"FROM pg_catalog.pg_class c, "
" pg_catalog.pg_index i, "
" pg_catalog.pg_am a, "
@@ -277,8 +287,11 @@ old_9_6_invalidate_hash_indexes(ClusterInfo *cluster, bool check_mode)
"WHERE i.indexrelid = c.oid AND "
" c.relam = a.oid AND "
" c.relnamespace = n.oid AND "
- " a.amname = 'hash'"
- );
+ " i.indexrelid >= '%u'::pg_catalog.oid AND "
+ " %s "
+ "ORDER BY i.indexrelid ASC",
+ FirstNormalObjectId,
+ pred);
ntups = PQntuples(res);
i_nspname = PQfnumber(res, "nspname");
@@ -311,8 +324,14 @@ old_9_6_invalidate_hash_indexes(ClusterInfo *cluster, bool check_mode)
if (!check_mode && db_used)
{
- /* mark hash indexes as invalid */
- PQclear(executeQueryOrDie(conn,
+ /*
+ * Mark indexes satisfying predicate as invalid.
+ *
+ * System indexes (with oids < FirstNormalObjectId) are excluded
+ * from the search (see above).
+ */
+ PQclear(executeQueryOrDie(
+ conn,
"UPDATE pg_catalog.pg_index i "
"SET indisvalid = false "
"FROM pg_catalog.pg_class c, "
@@ -321,7 +340,10 @@ old_9_6_invalidate_hash_indexes(ClusterInfo *cluster, bool check_mode)
"WHERE i.indexrelid = c.oid AND "
" c.relam = a.oid AND "
" c.relnamespace = n.oid AND "
- " a.amname = 'hash'"));
+ " i.indexrelid >= '%u'::pg_catalog.oid AND "
+ " %s",
+ FirstNormalObjectId,
+ pred));
}
PQfinish(conn);
@@ -335,24 +357,37 @@ old_9_6_invalidate_hash_indexes(ClusterInfo *cluster, bool check_mode)
report_status(PG_WARNING, "warning");
if (check_mode)
pg_log(PG_WARNING, "\n"
- "Your installation contains hash indexes. These indexes have different\n"
+ "Your installation contains %s indexes. These indexes have different\n"
"internal formats between your old and new clusters, so they must be\n"
"reindexed with the REINDEX command. After upgrading, you will be given\n"
- "REINDEX instructions.");
+ "REINDEX instructions.",
+ name);
else
pg_log(PG_WARNING, "\n"
- "Your installation contains hash indexes. These indexes have different\n"
+ "Your installation contains %s indexes. These indexes have different\n"
"internal formats between your old and new clusters, so they must be\n"
"reindexed with the REINDEX command. The file\n"
" %s\n"
"when executed by psql by the database superuser will recreate all invalid\n"
"indexes; until then, none of these indexes will be used.",
+ name,
output_path);
}
else
check_ok();
}
+/*
+ * old_9_6_invalidate_hash_indexes()
+ * 9.6 -> 10
+ * Hash index binary format has changed from 9.6->10.0
+ */
+void
+old_9_6_invalidate_hash_indexes(ClusterInfo *cluster, bool check_mode)
+{
+ invalidate_indexes(cluster, check_mode, "hash", "a.amname = 'hash'");
+}
+
/*
* old_11_check_for_sql_identifier_data_type_usage()
* 11 -> 12
@@ -459,3 +494,36 @@ report_extension_updates(ClusterInfo *cluster)
else
check_ok();
}
+
+/*
+ * invalidate_spgist_indexes()
+ * 32bit -> 64bit
+ * SP-GIST contains xids.
+ */
+void
+invalidate_spgist_indexes(ClusterInfo *cluster, bool check_mode)
+{
+ invalidate_indexes(cluster, check_mode, "spgist", "a.amname = 'spgist'");
+}
+
+/*
+ * invalidate_gin_indexes()
+ * 32bit -> 64bit
+ * Gin indexes contains xids in deleted pages.
+ */
+void
+invalidate_gin_indexes(ClusterInfo *cluster, bool check_mode)
+{
+ invalidate_indexes(cluster, check_mode, "gin", "a.amname = 'gin'");
+}
+
+/*
+ * invalidate_external_indexes()
+ * Generate script to REINDEX non standard external indexes (like RUM etc)
+ */
+void
+invalidate_external_indexes(ClusterInfo *cluster, bool check_mode)
+{
+ invalidate_indexes(cluster, check_mode, "external",
+ "NOT a.amname IN ('btree', 'hash', 'gist', 'gin', 'spgist', 'brin')");
+}
diff --git a/src/bin/pg_waldump/pg_waldump.c b/src/bin/pg_waldump/pg_waldump.c
index 319c44c2b2..dea4dac02b 100644
--- a/src/bin/pg_waldump/pg_waldump.c
+++ b/src/bin/pg_waldump/pg_waldump.c
@@ -1050,7 +1050,7 @@ main(int argc, char **argv)
config.filter_by_fpw = true;
break;
case 'x':
- if (sscanf(optarg, "%u", &config.filter_by_xid) != 1)
+ if (sscanf(optarg, "%" INT64_MODIFIER "u", &config.filter_by_xid) != 1)
{
pg_log_error("invalid transaction ID specification: \"%s\"",
optarg);
diff --git a/src/bin/pg_waldump/t/001_basic.pl b/src/bin/pg_waldump/t/001_basic.pl
index 029a0d0521..8e6c7b5b09 100644
--- a/src/bin/pg_waldump/t/001_basic.pl
+++ b/src/bin/pg_waldump/t/001_basic.pl
@@ -47,7 +47,8 @@ BRIN
CommitTs
ReplicationOrigin
Generic
-LogicalMessage$/,
+LogicalMessage
+Heap3$/,
'rmgr list');
diff --git a/src/include/access/ginblock.h b/src/include/access/ginblock.h
index c59790ec5a..4b655baf73 100644
--- a/src/include/access/ginblock.h
+++ b/src/include/access/ginblock.h
@@ -133,8 +133,15 @@ typedef struct GinMetaPageData
* We should reclaim deleted page only once every transaction started before
* its deletion is over.
*/
-#define GinPageGetDeleteXid(page) ( ((PageHeader) (page))->pd_prune_xid )
-#define GinPageSetDeleteXid(page, xid) ( ((PageHeader) (page))->pd_prune_xid = xid)
+#define GinPageGetDeleteXid(page) ( \
+ (((PageHeader) (page))->pd_upper == BLCKSZ - sizeof(GinPageOpaqueData) - sizeof(TransactionId)) ? \
+ *((TransactionId *) ((char *) (page) + BLCKSZ - sizeof(GinPageOpaqueData) - sizeof(TransactionId))) : \
+ InvalidTransactionId )
+#define GinPageSetDeleteXid(page, xid) \
+ do { \
+ ((PageHeader) (page))->pd_upper = BLCKSZ - sizeof(GinPageOpaqueData) - sizeof(TransactionId); \
+ *((TransactionId *) ((char *) (page) + BLCKSZ - sizeof(GinPageOpaqueData) - sizeof(TransactionId))) = xid; \
+ } while (false)
extern bool GinPageIsRecyclable(Page page);
/*
diff --git a/src/include/access/gist.h b/src/include/access/gist.h
index 0235716c06..2996cdd486 100644
--- a/src/include/access/gist.h
+++ b/src/include/access/gist.h
@@ -223,7 +223,7 @@ GistPageGetDeleteXid(Page page)
return ((GISTDeletedPageContents *) PageGetContents(page))->deleteXid;
}
else
- return FullTransactionIdFromEpochAndXid(0, FirstNormalTransactionId);
+ return FullTransactionIdFromXid(FirstNormalTransactionId);
}
/*
diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h
index a2d7a0ea72..d7e3ebbc81 100644
--- a/src/include/access/heapam.h
+++ b/src/include/access/heapam.h
@@ -76,6 +76,8 @@ typedef struct HeapScanDescData
int rs_cindex; /* current tuple's index in vistuples */
int rs_ntuples; /* number of visible tuples on page */
OffsetNumber rs_vistuples[MaxHeapTuplesPerPage]; /* their offsets */
+ TransactionId rs_xmin[MaxHeapTuplesPerPage]; /* their xmins */
+ TransactionId rs_xmax[MaxHeapTuplesPerPage]; /* their xmaxs */
} HeapScanDescData;
typedef struct HeapScanDescData *HeapScanDesc;
@@ -272,6 +274,8 @@ extern void ReleaseBulkInsertStatePin(BulkInsertState bistate);
extern void heap_insert(Relation relation, HeapTuple tup, CommandId cid,
int options, BulkInsertState bistate);
+extern void rewrite_page_prepare_for_xid(Page page, HeapTuple tup,
+ bool is_toast);
extern void heap_multi_insert(Relation relation, struct TupleTableSlot **slots,
int ntuples, CommandId cid, int options,
BulkInsertState bistate);
@@ -291,21 +295,21 @@ extern TM_Result heap_lock_tuple(Relation relation, HeapTuple tuple,
Buffer *buffer, struct TM_FailureData *tmfd);
extern void heap_inplace_update(Relation relation, HeapTuple tuple);
-extern bool heap_prepare_freeze_tuple(HeapTupleHeader tuple,
+extern bool heap_prepare_freeze_tuple(HeapTuple tuple,
const struct VacuumCutoffs *cutoffs,
HeapPageFreeze *pagefrz,
HeapTupleFreeze *frz, bool *totally_frozen);
extern void heap_freeze_execute_prepared(Relation rel, Buffer buffer,
TransactionId snapshotConflictHorizon,
HeapTupleFreeze *tuples, int ntuples);
-extern bool heap_freeze_tuple(HeapTupleHeader tuple,
+extern bool heap_freeze_tuple(HeapTuple tuple,
TransactionId relfrozenxid, TransactionId relminmxid,
TransactionId FreezeLimit, TransactionId MultiXactCutoff);
-extern bool heap_tuple_should_freeze(HeapTupleHeader tuple,
+extern bool heap_tuple_should_freeze(HeapTuple htup,
const struct VacuumCutoffs *cutoffs,
TransactionId *NoFreezePageRelfrozenXid,
MultiXactId *NoFreezePageRelminMxid);
-extern bool heap_tuple_needs_eventual_freeze(HeapTupleHeader tuple);
+extern bool heap_tuple_needs_eventual_freeze(HeapTuple htup);
extern void simple_heap_insert(Relation relation, HeapTuple tup);
extern void simple_heap_delete(Relation relation, ItemPointer tid);
@@ -321,12 +325,16 @@ extern void heap_page_prune_opt(Relation relation, Buffer buffer);
extern void heap_page_prune(Relation relation, Buffer buffer,
struct GlobalVisState *vistest,
PruneResult *presult,
- OffsetNumber *off_loc);
+ OffsetNumber *off_loc,
+ bool repairFragmentation);
extern void heap_page_prune_execute(Buffer buffer,
OffsetNumber *redirected, int nredirected,
OffsetNumber *nowdead, int ndead,
- OffsetNumber *nowunused, int nunused);
-extern void heap_get_root_tuples(Page page, OffsetNumber *root_offsets);
+ OffsetNumber *nowunused, int nunused,
+ bool repairFragmentation,
+ bool is_toast);
+extern void heap_get_root_tuples(Relation relation, Buffer buffer, Page page,
+ OffsetNumber *root_offsets);
/* in heap/vacuumlazy.c */
struct VacuumParams;
@@ -344,7 +352,7 @@ extern HTSV_Result HeapTupleSatisfiesVacuumHorizon(HeapTuple htup, Buffer buffer
TransactionId *dead_after);
extern void HeapTupleSetHintBits(HeapTupleHeader tuple, Buffer buffer,
uint16 infomask, TransactionId xid);
-extern bool HeapTupleHeaderIsOnlyLocked(HeapTupleHeader tuple);
+extern bool HeapTupleIsOnlyLocked(HeapTuple htup);
extern bool HeapTupleIsSurelyDead(HeapTuple htup,
struct GlobalVisState *vistest);
diff --git a/src/include/access/heapam_xlog.h b/src/include/access/heapam_xlog.h
index a038450787..ba911404e7 100644
--- a/src/include/access/heapam_xlog.h
+++ b/src/include/access/heapam_xlog.h
@@ -59,6 +59,8 @@
#define XLOG_HEAP2_LOCK_UPDATED 0x60
#define XLOG_HEAP2_NEW_CID 0x70
+#define XLOG_HEAP3_BASE_SHIFT 0x00
+
/*
* xl_heap_insert/xl_heap_multi_insert flag values, 8 bits are available.
*/
@@ -98,6 +100,7 @@
#define XLH_DELETE_CONTAINS_OLD_KEY (1<<2)
#define XLH_DELETE_IS_SUPER (1<<3)
#define XLH_DELETE_IS_PARTITION_MOVE (1<<4)
+#define XLH_DELETE_PAGE_ON_TOAST_RELATION (1<<5)
/* convenience macro for checking whether any form of old tuple was logged */
#define XLH_DELETE_CONTAINS_OLD \
@@ -240,6 +243,9 @@ typedef struct xl_heap_update
*
* Acquires a full cleanup lock.
*/
+#define XLH_PRUNE_ON_TOAST_RELATION 0x01
+#define XLH_PRUNE_REPAIR_FRAGMENTATION 0x02
+
typedef struct xl_heap_prune
{
TransactionId snapshotConflictHorizon;
@@ -247,10 +253,11 @@ typedef struct xl_heap_prune
uint16 ndead;
bool isCatalogRel; /* to handle recovery conflict during logical
* decoding on standby */
+ uint8 flags;
/* OFFSET NUMBERS are in the block reference 0 */
} xl_heap_prune;
-#define SizeOfHeapPrune (offsetof(xl_heap_prune, isCatalogRel) + sizeof(bool))
+#define SizeOfHeapPrune (offsetof(xl_heap_prune, flags) + sizeof(uint8))
/*
* The vacuum page record is similar to the prune record, but can only mark
@@ -342,19 +349,22 @@ typedef struct xl_heap_freeze_plan
* Each such page offset number array corresponds to a single freeze plan
* (REDO routine freezes corresponding heap tuples using freeze plan).
*/
+#define XLH_FREEZE_PAGE_ON_TOAST_RELATION 0x01
+
typedef struct xl_heap_freeze_page
{
TransactionId snapshotConflictHorizon;
uint16 nplans;
bool isCatalogRel; /* to handle recovery conflict during logical
* decoding on standby */
+ uint8 flags;
/*
* In payload of blk 0 : FREEZE PLANS and OFFSET NUMBER ARRAY
*/
} xl_heap_freeze_page;
-#define SizeOfHeapFreezePage (offsetof(xl_heap_freeze_page, isCatalogRel) + sizeof(bool))
+#define SizeOfHeapFreezePage (offsetof(xl_heap_freeze_page, flags) + sizeof(uint8))
/*
* This is what we need to know about setting a visibility map bit
@@ -401,7 +411,19 @@ typedef struct xl_heap_rewrite_mapping
XLogRecPtr start_lsn; /* Insert LSN at begin of rewrite */
} xl_heap_rewrite_mapping;
-extern void HeapTupleHeaderAdvanceConflictHorizon(HeapTupleHeader tuple,
+#define XLH_BASE_SHIFT_ON_TOAST_RELATION 0x01
+
+/* shift the base of xids on heap page */
+typedef struct xl_heap_base_shift
+{
+ int64 delta; /* delta value to shift the base */
+ bool multi; /* true to shift multixact base */
+ uint8 flags;
+} xl_heap_base_shift;
+
+#define SizeOfHeapBaseShift (offsetof(xl_heap_base_shift, flags) + sizeof(uint8))
+
+extern void HeapTupleHeaderAdvanceConflictHorizon(HeapTuple tuple,
TransactionId *snapshotConflictHorizon);
extern void heap_redo(XLogReaderState *record);
@@ -411,6 +433,9 @@ extern void heap_mask(char *pagedata, BlockNumber blkno);
extern void heap2_redo(XLogReaderState *record);
extern void heap2_desc(StringInfo buf, XLogReaderState *record);
extern const char *heap2_identify(uint8 info);
+extern void heap3_redo(XLogReaderState *record);
+extern void heap3_desc(StringInfo buf, XLogReaderState *record);
+extern const char *heap3_identify(uint8 info);
extern void heap_xlog_logical_rewrite(XLogReaderState *r);
extern XLogRecPtr log_heap_visible(Relation rel, Buffer heap_buffer,
diff --git a/src/include/access/heaptoast.h b/src/include/access/heaptoast.h
index 5c0a796f66..f9c781abbe 100644
--- a/src/include/access/heaptoast.h
+++ b/src/include/access/heaptoast.h
@@ -20,10 +20,19 @@
/*
* Find the maximum size of a tuple if there are to be N tuples per page.
*/
+#if MAXIMUM_ALIGNOF == 8
#define MaximumBytesPerTuple(tuplesPerPage) \
MAXALIGN_DOWN((BLCKSZ - \
- MAXALIGN(SizeOfPageHeaderData + (tuplesPerPage) * sizeof(ItemIdData))) \
+ MAXALIGN(SizeOfPageHeaderData + (tuplesPerPage) * sizeof(ItemIdData)) - MAXALIGN(sizeof(HeapPageSpecialData))) \
/ (tuplesPerPage))
+#elif MAXIMUM_ALIGNOF == 4
+#define MaximumBytesPerTuple(tuplesPerPage) \
+ MAXALIGN_DOWN((BLCKSZ - \
+ MAXALIGN(SizeOfPageHeaderData + (tuplesPerPage) * sizeof(ItemIdData)) - MAXALIGN(sizeof(ToastPageSpecialData))) \
+ / (tuplesPerPage))
+#else
+#error "unknown arch bitness"
+#endif
/*
* These symbols control toaster activation. If a tuple is larger than
diff --git a/src/include/access/htup.h b/src/include/access/htup.h
index a8f7ff5dfe..063d9ac4c4 100644
--- a/src/include/access/htup.h
+++ b/src/include/access/htup.h
@@ -54,6 +54,12 @@ typedef MinimalTupleData *MinimalTuple;
* this can't be told apart from case #1 by inspection; code setting up
* or destroying this representation has to know what it's doing.
*
+ * t_xmin and t_xmax are TransactionId values stored in heap tuple header.
+ * Normally they are calculated from ShortTransactionId-sized on-disk tuple
+ * xmin/xmax representation:
+ * t_data->t_choice.t_heap.t_xmin/t_data->t_choice.t_heap.t_xmin
+ * and pd_xid_base and pd_multi_base commmon values for all tuples on a page.
+ *
* t_len should always be valid, except in the pointer-to-nothing case.
* t_self and t_tableOid should be valid if the HeapTupleData points to
* a disk buffer, or if it represents a copy of a tuple on disk. They
@@ -61,10 +67,12 @@ typedef MinimalTupleData *MinimalTuple;
*/
typedef struct HeapTupleData
{
+ TransactionId t_xmin; /* calculated tuple xmin */
+ TransactionId t_xmax; /* calculated tuple xmax */
uint32 t_len; /* length of *t_data */
ItemPointerData t_self; /* SelfItemPointer */
Oid t_tableOid; /* table the tuple came from */
-#define FIELDNO_HEAPTUPLEDATA_DATA 3
+#define FIELDNO_HEAPTUPLEDATA_DATA 5
HeapTupleHeader t_data; /* -> tuple header and data */
} HeapTupleData;
@@ -78,12 +86,11 @@ typedef HeapTupleData *HeapTuple;
#define HeapTupleIsValid(tuple) PointerIsValid(tuple)
/* HeapTupleHeader functions implemented in utils/time/combocid.c */
-extern CommandId HeapTupleHeaderGetCmin(HeapTupleHeader tup);
-extern CommandId HeapTupleHeaderGetCmax(HeapTupleHeader tup);
-extern void HeapTupleHeaderAdjustCmax(HeapTupleHeader tup,
- CommandId *cmax, bool *iscombo);
+extern CommandId HeapTupleGetCmin(HeapTuple tup);
+extern CommandId HeapTupleGetCmax(HeapTuple tup);
+extern void HeapTupleAdjustCmax(HeapTuple tup, CommandId *cmax, bool *iscombo);
/* Prototype for HeapTupleHeader accessors in heapam.c */
-extern TransactionId HeapTupleGetUpdateXid(HeapTupleHeader tuple);
+extern TransactionId HeapTupleGetUpdateXid(HeapTuple tuple);
#endif /* HTUP_H */
diff --git a/src/include/access/htup_details.h b/src/include/access/htup_details.h
index 6fd87dc108..a99bb70906 100644
--- a/src/include/access/htup_details.h
+++ b/src/include/access/htup_details.h
@@ -19,6 +19,7 @@
#include "access/tupdesc.h"
#include "access/tupmacs.h"
#include "storage/bufpage.h"
+#include "storage/bufmgr.h"
#include "varatt.h"
/*
@@ -121,13 +122,13 @@
typedef struct HeapTupleFields
{
- TransactionId t_xmin; /* inserting xact ID */
- TransactionId t_xmax; /* deleting or locking xact ID */
+ ShortTransactionId t_xmin; /* inserting xact ID */
+ ShortTransactionId t_xmax; /* deleting or locking xact ID */
union
{
CommandId t_cid; /* inserting or deleting command ID, or both */
- TransactionId t_xvac; /* old-style VACUUM FULL xact ID */
+ ShortTransactionId t_xvac; /* old-style VACUUM FULL xact ID */
} t_field3;
} HeapTupleFields;
@@ -223,7 +224,7 @@ struct HeapTupleHeaderData
* HEAP_XMAX_LOCK_ONLY bit is set; or, for pg_upgrade's sake, if the Xmax is
* not a multi and the EXCL_LOCK bit is set.
*
- * See also HeapTupleHeaderIsOnlyLocked, which also checks for a possible
+ * See also HeapTupleIsOnlyLocked, which also checks for a possible
* aborted updater transaction.
*
* Beware of multiple evaluations of the argument.
@@ -299,29 +300,81 @@ struct HeapTupleHeaderData
*/
/*
- * HeapTupleHeaderGetRawXmin returns the "raw" xmin field, which is the xid
+ * HeapTupleGetRawXmin returns the "raw" xmin field, which is the xid
* originally used to insert the tuple. However, the tuple might actually
- * be frozen (via HeapTupleHeaderSetXminFrozen) in which case the tuple's xmin
+ * be frozen (via HeapTupleHeaderStoreXminFrozen) in which case the tuple's xmin
* is visible to every snapshot. Prior to PostgreSQL 9.4, we actually changed
* the xmin to FrozenTransactionId, and that value may still be encountered
* on disk.
*/
-#define HeapTupleHeaderGetRawXmin(tup) \
+#define HeapTupleGetRawXmin(tup) \
( \
- (tup)->t_choice.t_heap.t_xmin \
+ ((tup)->t_xmin) \
)
-#define HeapTupleHeaderGetXmin(tup) \
+#define HeapTupleGetXmin(tup) \
( \
- HeapTupleHeaderXminFrozen(tup) ? \
- FrozenTransactionId : HeapTupleHeaderGetRawXmin(tup) \
+ HeapTupleHeaderXminFrozen((tup)->t_data) ? \
+ FrozenTransactionId : HeapTupleGetRawXmin(tup) \
)
-#define HeapTupleHeaderSetXmin(tup, xid) \
+#define HeapTupleSetXmin(tup, xid) \
( \
- (tup)->t_choice.t_heap.t_xmin = (xid) \
+ ((tup)->t_xmin = (xid)) \
)
+/*
+ * Functions for accessing "double xmax". On pg_upgraded instances, it might
+ * happend that we can't fit new special area to the page. But we still
+ * might neep to write xmax of tuples for updates and deletes. The trick is
+ * that we actually don't need xmin field. After pg_upgrade (wich implies
+ * restart) no insertions went to this page yet (otherwise special area could
+ * fit). So, if tuple is visible (othewise it would be deleted), then it's
+ * visible for everybody. Thus, t_xmin isn't needed. Therefore, we can use
+ * both t_xmin and t_xmax to store 64-bit xmax.
+ *
+ * See heap_convert.c for details.
+ */
+static inline TransactionId
+HeapTupleHeaderGetDoubleXmax(HeapTupleHeader htup)
+{
+ TransactionId xmax;
+
+ xmax = htup->t_choice.t_heap.t_xmin;
+ xmax <<= 32;
+ xmax += htup->t_choice.t_heap.t_xmax;
+
+ return xmax;
+}
+
+static inline void
+HeapTupleHeaderSetDoubleXmax(HeapTupleHeader htup, TransactionId xid)
+{
+ htup->t_choice.t_heap.t_xmax = xid & 0xFFFFFFFF;
+ htup->t_choice.t_heap.t_xmin = (xid >> 32) & 0xFFFFFFFF;
+}
+
+static inline void
+HeapTupleHeaderStoreXmin(Page page, HeapTuple htup, bool is_toast)
+{
+ TransactionId base;
+
+ Assert(!HeapPageIsDoubleXmax(page));
+
+ base = is_toast ? ToastPageGetSpecial(page)->pd_xid_base :
+ HeapPageGetSpecial((page))->pd_xid_base;
+ htup->t_data->t_choice.t_heap.t_xmin =
+ NormalTransactionIdToShort(base, htup->t_xmin);
+}
+
+static inline void
+HeapTupleAndHeaderSetXmin(Page page, HeapTuple tup, TransactionId xid,
+ bool is_toast)
+{
+ HeapTupleSetXmin(tup, xid);
+ HeapTupleHeaderStoreXmin(page, tup, is_toast);
+}
+
#define HeapTupleHeaderXminCommitted(tup) \
( \
((tup)->t_infomask & HEAP_XMIN_COMMITTED) != 0 \
@@ -338,6 +391,12 @@ struct HeapTupleHeaderData
((tup)->t_infomask & (HEAP_XMIN_FROZEN)) == HEAP_XMIN_FROZEN \
)
+#define HeapTupleHeaderStoreXminFrozen(tup) \
+( \
+ AssertMacro(!HeapTupleHeaderXminInvalid(tup)), \
+ ((tup)->t_infomask |= HEAP_XMIN_FROZEN) \
+)
+
#define HeapTupleHeaderSetXminCommitted(tup) \
( \
AssertMacro(!HeapTupleHeaderXminInvalid(tup)), \
@@ -363,30 +422,80 @@ struct HeapTupleHeaderData
* to resolve the MultiXactId if necessary. This might involve multixact I/O,
* so it should only be used if absolutely necessary.
*/
-#define HeapTupleHeaderGetUpdateXid(tup) \
+#define HeapTupleGetUpdateXidAny(tup) \
( \
- (!((tup)->t_infomask & HEAP_XMAX_INVALID) && \
- ((tup)->t_infomask & HEAP_XMAX_IS_MULTI) && \
- !((tup)->t_infomask & HEAP_XMAX_LOCK_ONLY)) ? \
+ (!((tup)->t_data->t_infomask & HEAP_XMAX_INVALID) && \
+ ((tup)->t_data->t_infomask & HEAP_XMAX_IS_MULTI) && \
+ !((tup)->t_data->t_infomask & HEAP_XMAX_LOCK_ONLY)) ? \
HeapTupleGetUpdateXid(tup) \
: \
- HeapTupleHeaderGetRawXmax(tup) \
+ HeapTupleGetRawXmax(tup) \
)
-#define HeapTupleHeaderGetRawXmax(tup) \
-( \
- (tup)->t_choice.t_heap.t_xmax \
-)
+static inline TransactionId
+HeapTupleHeaderGetRawXmax(Page page, HeapTupleHeader htup)
+{
+ TransactionId base;
+
+ if (HeapPageIsDoubleXmax(page))
+ return HeapTupleHeaderGetDoubleXmax(htup);
-#define HeapTupleHeaderSetXmax(tup, xid) \
+ base = (htup->t_infomask & HEAP_XMAX_IS_MULTI) ?
+ HeapPageGetSpecial(page)->pd_multi_base :
+ HeapPageGetSpecial(page)->pd_xid_base;
+ return ShortTransactionIdToNormal(base,
+ htup->t_choice.t_heap.t_xmax);
+}
+
+#define HeapTupleGetRawXmax(tup) \
( \
- (tup)->t_choice.t_heap.t_xmax = (xid) \
+ ((tup)->t_xmax) \
)
+#define HeapTupleSetXmax(tup, xid) \
+do { \
+ (tup)->t_xmax = (xid); \
+} while (0)
+
+/*
+ * Set xid as xmax for HeapTupleHeader.
+ */
+static inline void
+HeapTupleHeaderStoreXmax(Page page, HeapTuple htup, bool is_toast)
+{
+ TransactionId base;
+
+ if (HeapPageIsDoubleXmax(page))
+ {
+ HeapTupleHeaderSetDoubleXmax(htup->t_data, htup->t_xmax);
+ return;
+ }
+
+ if (is_toast)
+ base = ToastPageGetSpecial(page)->pd_xid_base;
+ else
+ base = (htup->t_data->t_infomask & HEAP_XMAX_IS_MULTI) != 0 ?
+ HeapPageGetSpecial(page)->pd_multi_base :
+ HeapPageGetSpecial(page)->pd_xid_base;
+ htup->t_data->t_choice.t_heap.t_xmax =
+ NormalTransactionIdToShort(base, htup->t_xmax);
+}
+
+/*
+ * Set xid as xmax for HeadTuple and HeapTupleHeader.
+ */
+static inline void
+HeapTupleAndHeaderSetXmax(Page page, HeapTuple htup, TransactionId xid,
+ bool is_toast)
+{
+ HeapTupleSetXmax(htup, xid);
+ HeapTupleHeaderStoreXmax(page, htup, is_toast);
+}
+
/*
* HeapTupleHeaderGetRawCommandId will give you what's in the header whether
- * it is useful or not. Most code should use HeapTupleHeaderGetCmin or
- * HeapTupleHeaderGetCmax instead, but note that those Assert that you can
+ * it is useful or not. Most code should use HeapTupleGetCmin or
+ * HeapTupleGetCmax instead, but note that those Assert that you can
* get a legitimate result, ie you are in the originating transaction!
*/
#define HeapTupleHeaderGetRawCommandId(tup) \
@@ -402,7 +511,7 @@ do { \
(tup)->t_infomask &= ~HEAP_COMBOCID; \
} while (0)
-/* SetCmax must be used after HeapTupleHeaderAdjustCmax; see combocid.c */
+/* SetCmax must be used after HeapTupleAdjustCmax; see combocid.c */
#define HeapTupleHeaderSetCmax(tup, cid, iscombo) \
do { \
Assert(!((tup)->t_infomask & HEAP_MOVED)); \
@@ -559,8 +668,16 @@ StaticAssertDecl(MaxOffsetNumber < SpecTokenOffsetNumber,
* an otherwise-empty page can indeed hold a tuple of this size. Because
* ItemIds and tuples have different alignment requirements, don't assume that
* you can, say, fit 2 tuples of size MaxHeapTupleSize/2 on the same page.
+ *
+ * On shift to 64-bit XIDs MaxHeapTupleSize decreased by sizeof(HeapPageSpecialData).
+ * Extant tuples with length over new MaxHeapTupleSize are inherited on DoubleXmax
+ * pages. They could be read, but can not be updated unless their length decreases
+ * to fit MaxHeapTupleSize. Vacuum full will also copy these double xmax pages
+ * without change.
*/
-#define MaxHeapTupleSize (BLCKSZ - MAXALIGN(SizeOfPageHeaderData + sizeof(ItemIdData)))
+
+#define MaxHeapTupleSize (BLCKSZ - MAXALIGN(SizeOfPageHeaderData + sizeof(ItemIdData)) - MAXALIGN(sizeof(HeapPageSpecialData)))
+#define MaxHeapTupleSize_32 (BLCKSZ - MAXALIGN(SizeOfPageHeaderData + sizeof(ItemIdData)))
#define MinHeapTupleSize MAXALIGN(SizeofHeapTupleHeader)
/*
@@ -694,6 +811,112 @@ struct MinimalTupleData
#define HeapTupleClearHeapOnly(tuple) \
HeapTupleHeaderClearHeapOnly((tuple)->t_data)
+/*
+ * Copy base values for xid and multixacts from one heap tuple to heap tuple.
+ * Should be called on tuple copy or making desc tuple on the base on src tuple
+ * saving visibility information.
+ */
+static inline void
+HeapTupleCopyXids(HeapTuple dest, HeapTuple src)
+{
+ dest->t_xmin = src->t_xmin;
+ dest->t_xmax = src->t_xmax;
+}
+
+/*
+ * Set base values for tuple xids/multixacts to zero. Used when visibility
+ * infromation is negligible or will be set later.
+ */
+static inline void
+HeapTupleSetZeroXids(HeapTuple htup)
+{
+ htup->t_xmin = 0;
+ htup->t_xmax = 0;
+}
+
+/*
+ * Copy HeapTupleHeader xmin/xmax in raw way ???
+ */
+static inline void
+HeapTupleCopyHeaderXids(HeapTuple htup) \
+{
+ htup->t_xmin = htup->t_data->t_choice.t_heap.t_xmin;
+ htup->t_xmax = htup->t_data->t_choice.t_heap.t_xmax;
+}
+
+static inline void
+HeapTupleCopyXminFromPage(HeapTuple tup, Page page, bool is_toast)
+{
+ TransactionId base;
+ ShortTransactionId xmin; /* short xmin from tuple header */
+
+ if (HeapTupleHeaderXminFrozen(tup->t_data))
+ {
+ tup->t_xmin = FrozenTransactionId;
+ return;
+ }
+
+ xmin = tup->t_data->t_choice.t_heap.t_xmin;
+
+ if (!TransactionIdIsNormal(xmin))
+ base = 0;
+ else if (is_toast)
+ base = ToastPageGetSpecial(page)->pd_xid_base;
+ else
+ base = HeapPageGetSpecial(page)->pd_xid_base;
+
+ tup->t_xmin = ShortTransactionIdToNormal(base, xmin);
+}
+
+static inline void
+HeapTupleCopyXmaxFromPage(HeapTuple tup, Page page, bool is_toast)
+{
+ TransactionId base;
+ ShortTransactionId xmax; /* short xmax from tuple header */
+
+ xmax = tup->t_data->t_choice.t_heap.t_xmax;
+
+ if (!TransactionIdIsNormal(xmax))
+ base = 0;
+ else if (is_toast)
+ /*
+ * Toast page is not expected to have multixacts in chunks and
+ * has shorter special.
+ */
+ base = ToastPageGetSpecial(page)->pd_xid_base;
+ else if (tup->t_data->t_infomask & HEAP_XMAX_IS_MULTI)
+ base = HeapPageGetSpecial(page)->pd_multi_base;
+ else
+ base = HeapPageGetSpecial(page)->pd_xid_base;
+
+ tup->t_xmax = ShortTransactionIdToNormal(base, xmax);
+}
+
+/*
+ * Copy base values for xid and multixacts from page to heap tuple. Should be
+ * called each time tuple is read from page. Otherwise, it would be impossible
+ * to correctly read tuple xmin and xmax.
+ */
+static inline void
+HeapTupleCopyXidsFromPage(Buffer buffer, HeapTuple tup, Page page,
+ bool is_toast)
+{
+ Assert(IsBufferLocked(buffer));
+
+ if (HeapPageIsDoubleXmax(page))
+ {
+ /*
+ * On double xmax pages, xmax is extracted from tuple header.
+ */
+ tup->t_xmin = FrozenTransactionId;
+ tup->t_xmax = HeapTupleHeaderGetDoubleXmax(tup->t_data);
+ return;
+ }
+
+ HeapTupleCopyXminFromPage(tup, page, is_toast);
+ HeapTupleCopyXmaxFromPage(tup, page, is_toast);
+}
+
/* prototypes for functions in common/heaptuple.c */
extern Size heap_compute_data_size(TupleDesc tupleDesc,
const Datum *values, const bool *isnull);
diff --git a/src/include/access/multixact.h b/src/include/access/multixact.h
index 0be1355892..b84eb12710 100644
--- a/src/include/access/multixact.h
+++ b/src/include/access/multixact.h
@@ -18,16 +18,16 @@
/*
* The first two MultiXactId values are reserved to store the truncation Xid
- * and epoch of the first segment, so we start assigning multixact values from
+ * and base of the first segment, so we start assigning multixact values from
* 2.
*/
-#define InvalidMultiXactId ((MultiXactId) 0)
-#define FirstMultiXactId ((MultiXactId) 1)
-#define MaxMultiXactId ((MultiXactId) 0xFFFFFFFF)
+#define InvalidMultiXactId UINT64CONST(0)
+#define FirstMultiXactId UINT64CONST(1)
+#define MaxMultiXactId UINT64CONST(0xFFFFFFFFFFFFFFFF)
#define MultiXactIdIsValid(multi) ((multi) != InvalidMultiXactId)
-#define MaxMultiXactOffset ((MultiXactOffset) 0xFFFFFFFF)
+#define MaxMultiXactOffset UINT64CONST(0xFFFFFFFFFFFFFFFF)
/* Number of SLRU buffers to use for multixact */
#define NUM_MULTIXACTOFFSET_BUFFERS 8
@@ -147,7 +147,6 @@ extern void MultiXactSetNextMXact(MultiXactId nextMulti,
extern void MultiXactAdvanceNextMXact(MultiXactId minMulti,
MultiXactOffset minMultiOffset);
extern void MultiXactAdvanceOldest(MultiXactId oldestMulti, Oid oldestMultiDB);
-extern int MultiXactMemberFreezeThreshold(void);
extern void multixact_twophase_recover(TransactionId xid, uint16 info,
void *recdata, uint32 len);
diff --git a/src/include/access/nbtree.h b/src/include/access/nbtree.h
index 7bfbf3086c..d579aabee1 100644
--- a/src/include/access/nbtree.h
+++ b/src/include/access/nbtree.h
@@ -62,8 +62,10 @@ typedef uint16 BTCycleId;
typedef struct BTPageOpaqueData
{
BlockNumber btpo_prev; /* left sibling, or P_NONE if leftmost */
+ /* ... or next transaction ID (lower part) */
BlockNumber btpo_next; /* right sibling, or P_NONE if rightmost */
uint32 btpo_level; /* tree level --- zero for leaf pages */
+ /* ... or next transaction ID (lower part) */
uint16 btpo_flags; /* flag bits, see below */
BTCycleId btpo_cycleid; /* vacuum cycle ID of latest split */
} BTPageOpaqueData;
@@ -92,6 +94,14 @@ typedef BTPageOpaqueData *BTPageOpaque;
*/
#define MAX_BT_CYCLE_ID 0xFF7F
+/* Macros for access xact */
+#define BTP_GET_XACT(opaque) (((uint64) ((BTPageOpaque) opaque)->btpo_prev << 32) | \
+ (uint64) ((BTPageOpaque) opaque)->btpo_level)
+#define BTP_SET_XACT(opaque, xact) \
+do { \
+ ((BTPageOpaque) opaque)->btpo_prev = (uint32) (xact >> 32); \
+ ((BTPageOpaque) opaque)->btpo_level = (uint32) xact; \
+} while (0)
/*
* The Meta page is always the first page in the btree index.
diff --git a/src/include/access/reloptions.h b/src/include/access/reloptions.h
index 1ad1352036..58d460fc03 100644
--- a/src/include/access/reloptions.h
+++ b/src/include/access/reloptions.h
@@ -110,7 +110,7 @@ typedef struct relopt_int64
int64 default_val;
int64 min;
int64 max;
-} relopt_int64;
+} relopt_int64;
typedef struct relopt_real
{
diff --git a/src/include/access/rewriteheap.h b/src/include/access/rewriteheap.h
index 1125457053..c0f0d456da 100644
--- a/src/include/access/rewriteheap.h
+++ b/src/include/access/rewriteheap.h
@@ -51,7 +51,7 @@ typedef struct LogicalRewriteMappingData
* 6) xid of the xact performing the mapping
* ---
*/
-#define LOGICAL_REWRITE_FORMAT "map-%x-%x-%X_%X-%x-%x"
-extern void CheckPointLogicalRewriteHeap(void);
+#define LOGICAL_REWRITE_FORMAT "map-%x-%x-%X_%X-%x_%x-%x_%x"
+extern void CheckPointLogicalRewriteHeap(void);
#endif /* REWRITE_HEAP_H */
diff --git a/src/include/access/rmgrlist.h b/src/include/access/rmgrlist.h
index 463bcb67c5..4bafc98190 100644
--- a/src/include/access/rmgrlist.h
+++ b/src/include/access/rmgrlist.h
@@ -47,3 +47,4 @@ PG_RMGR(RM_COMMIT_TS_ID, "CommitTs", commit_ts_redo, commit_ts_desc, commit_ts_i
PG_RMGR(RM_REPLORIGIN_ID, "ReplicationOrigin", replorigin_redo, replorigin_desc, replorigin_identify, NULL, NULL, NULL, NULL)
PG_RMGR(RM_GENERIC_ID, "Generic", generic_redo, generic_desc, generic_identify, NULL, NULL, generic_mask, NULL)
PG_RMGR(RM_LOGICALMSG_ID, "LogicalMessage", logicalmsg_redo, logicalmsg_desc, logicalmsg_identify, NULL, NULL, NULL, logicalmsg_decode)
+PG_RMGR(RM_HEAP3_ID, "Heap3", heap3_redo, heap3_desc, heap3_identify, NULL, NULL, heap_mask, NULL)
diff --git a/src/include/access/slru.h b/src/include/access/slru.h
index 091e2202c9..982b716b44 100644
--- a/src/include/access/slru.h
+++ b/src/include/access/slru.h
@@ -21,15 +21,7 @@
/*
* Define SLRU segment size. A page is the same BLCKSZ as is used everywhere
* else in Postgres. The segment size can be chosen somewhat arbitrarily;
- * we make it 32 pages by default, or 256Kb, i.e. 1M transactions for CLOG
- * or 64K transactions for SUBTRANS.
- *
- * Note: because TransactionIds are 32 bits and wrap around at 0xFFFFFFFF,
- * page numbering also wraps around at 0xFFFFFFFF/xxxx_XACTS_PER_PAGE (where
- * xxxx is CLOG or SUBTRANS, respectively), and segment numbering at
- * 0xFFFFFFFF/xxxx_XACTS_PER_PAGE/SLRU_PAGES_PER_SEGMENT. We need
- * take no explicit notice of that fact in slru.c, except when comparing
- * segment and page numbers in SimpleLruTruncate (see PagePrecedes()).
+ * we make it 32 pages by default.
*/
#define SLRU_PAGES_PER_SEGMENT 32
diff --git a/src/include/access/tableam.h b/src/include/access/tableam.h
index 87227d8bd2..5caf4760e8 100644
--- a/src/include/access/tableam.h
+++ b/src/include/access/tableam.h
@@ -134,7 +134,7 @@ typedef enum TU_UpdateIndexes
* cmax is the outdating command's CID, but only when the failure code is
* TM_SelfModified (i.e., something in the current transaction outdated the
* tuple); otherwise cmax is zero. (We make this restriction because
- * HeapTupleHeaderGetCmax doesn't work for tuples outdated in other
+ * HeapTupleGetCmax doesn't work for tuples outdated in other
* transactions.)
*/
typedef struct TM_FailureData
diff --git a/src/include/access/transam.h b/src/include/access/transam.h
index f5af6d3055..af9cb645a5 100644
--- a/src/include/access/transam.h
+++ b/src/include/access/transam.h
@@ -17,6 +17,10 @@
#include "access/xlogdefs.h"
+#ifndef FRONTEND
+#include "utils/elog.h"
+#endif
+
/* ----------------
* Special transaction ID values
*
@@ -28,11 +32,12 @@
* Note: if you need to change it, you must change pg_class.h as well.
* ----------------
*/
-#define InvalidTransactionId ((TransactionId) 0)
-#define BootstrapTransactionId ((TransactionId) 1)
-#define FrozenTransactionId ((TransactionId) 2)
-#define FirstNormalTransactionId ((TransactionId) 3)
-#define MaxTransactionId ((TransactionId) 0xFFFFFFFF)
+#define InvalidTransactionId UINT64CONST(0)
+#define BootstrapTransactionId UINT64CONST(1)
+#define FrozenTransactionId UINT64CONST(2)
+#define FirstNormalTransactionId UINT64CONST(3)
+#define MaxTransactionId UINT64CONST(0xFFFFFFFFFFFFFFFF)
+#define MaxShortTransactionId ((TransactionId) 0x7FFFFFFF)
/* ----------------
* transaction ID manipulation macros
@@ -44,8 +49,40 @@
#define TransactionIdStore(xid, dest) (*(dest) = (xid))
#define StoreInvalidTransactionId(dest) (*(dest) = InvalidTransactionId)
-#define EpochFromFullTransactionId(x) ((uint32) ((x).value >> 32))
-#define XidFromFullTransactionId(x) ((uint32) (x).value)
+/*
+ * Convert short xid from/to full xid. Assertion should fail if we full xid
+ * doesn't fit to xid base.
+ */
+static inline TransactionId
+ShortTransactionIdToNormal(TransactionId base, ShortTransactionId xid)
+{
+ if (!TransactionIdIsNormal(xid))
+ return (TransactionId) xid;
+
+#ifndef FRONTEND
+ /* xid + base should not overflow TransactionId */
+ Assert(xid + base >= base);
+#endif
+
+ return (TransactionId) (xid + base);
+}
+
+static inline ShortTransactionId
+NormalTransactionIdToShort(TransactionId base, TransactionId xid)
+{
+ if (!TransactionIdIsNormal(xid))
+ return (ShortTransactionId) (xid);
+
+#ifndef FRONTEND
+ /* xid should fit ShortTransactionId */
+ Assert(xid >= base + FirstNormalTransactionId &&
+ xid <= base + MaxShortTransactionId);
+#endif
+
+ return (ShortTransactionId) (xid - base);
+}
+
+#define XidFromFullTransactionId(x) ((x).value)
#define U64FromFullTransactionId(x) ((x).value)
#define FullTransactionIdEquals(a, b) ((a).value == (b).value)
#define FullTransactionIdPrecedes(a, b) ((a).value < (b).value)
@@ -53,8 +90,8 @@
#define FullTransactionIdFollows(a, b) ((a).value > (b).value)
#define FullTransactionIdFollowsOrEquals(a, b) ((a).value >= (b).value)
#define FullTransactionIdIsValid(x) TransactionIdIsValid(XidFromFullTransactionId(x))
-#define InvalidFullTransactionId FullTransactionIdFromEpochAndXid(0, InvalidTransactionId)
-#define FirstNormalFullTransactionId FullTransactionIdFromEpochAndXid(0, FirstNormalTransactionId)
+#define InvalidFullTransactionId FullTransactionIdFromXid(InvalidTransactionId)
+#define FirstNormalFullTransactionId FullTransactionIdFromXid(FirstNormalTransactionId)
#define FullTransactionIdIsNormal(x) FullTransactionIdFollowsOrEquals(x, FirstNormalFullTransactionId)
/*
@@ -68,21 +105,11 @@ typedef struct FullTransactionId
} FullTransactionId;
static inline FullTransactionId
-FullTransactionIdFromEpochAndXid(uint32 epoch, TransactionId xid)
+FullTransactionIdFromXid(TransactionId xid)
{
FullTransactionId result;
- result.value = ((uint64) epoch) << 32 | xid;
-
- return result;
-}
-
-static inline FullTransactionId
-FullTransactionIdFromU64(uint64 value)
-{
- FullTransactionId result;
-
- result.value = value;
+ result.value = xid;
return result;
}
@@ -91,8 +118,7 @@ FullTransactionIdFromU64(uint64 value)
#define TransactionIdAdvance(dest) \
do { \
(dest)++; \
- if ((dest) < FirstNormalTransactionId) \
- (dest) = FirstNormalTransactionId; \
+ Assert(TransactionIdIsNormal(dest)); \
} while(0)
/*
@@ -140,18 +166,19 @@ FullTransactionIdAdvance(FullTransactionId *dest)
/* back up a transaction ID variable, handling wraparound correctly */
#define TransactionIdRetreat(dest) \
do { \
+ Assert(TransactionIdIsNormal(dest)); \
(dest)--; \
- } while ((dest) < FirstNormalTransactionId)
+ } while(0)
/* compare two XIDs already known to be normal; this is a macro for speed */
#define NormalTransactionIdPrecedes(id1, id2) \
(AssertMacro(TransactionIdIsNormal(id1) && TransactionIdIsNormal(id2)), \
- (int32) ((id1) - (id2)) < 0)
+ (int64) ((id1) - (id2)) < 0)
/* compare two XIDs already known to be normal; this is a macro for speed */
#define NormalTransactionIdFollows(id1, id2) \
(AssertMacro(TransactionIdIsNormal(id1) && TransactionIdIsNormal(id2)), \
- (int32) ((id1) - (id2)) > 0)
+ (int64) ((id1) - (id2)) > 0)
/* ----------
* Object ID (OID) zero is InvalidOid.
@@ -201,10 +228,6 @@ FullTransactionIdAdvance(FullTransactionId *dest)
* OID and XID assignment state. For largely historical reasons, there is
* just one struct with different fields that are protected by different
* LWLocks.
- *
- * Note: xidWrapLimit and oldestXidDB are not "active" values, but are
- * used just to generate useful messages when xidWarnLimit or xidStopLimit
- * are exceeded.
*/
typedef struct VariableCacheData
{
@@ -221,9 +244,6 @@ typedef struct VariableCacheData
TransactionId oldestXid; /* cluster-wide minimum datfrozenxid */
TransactionId xidVacLimit; /* start forcing autovacuums here */
- TransactionId xidWarnLimit; /* start complaining here */
- TransactionId xidStopLimit; /* refuse to advance nextXid beyond here */
- TransactionId xidWrapLimit; /* where the world ends */
Oid oldestXidDB; /* database with minimum datfrozenxid */
/*
@@ -276,10 +296,6 @@ extern bool TransactionIdDidAbort(TransactionId transactionId);
extern void TransactionIdCommitTree(TransactionId xid, int nxids, TransactionId *xids);
extern void TransactionIdAsyncCommitTree(TransactionId xid, int nxids, TransactionId *xids, XLogRecPtr lsn);
extern void TransactionIdAbortTree(TransactionId xid, int nxids, TransactionId *xids);
-extern bool TransactionIdPrecedes(TransactionId id1, TransactionId id2);
-extern bool TransactionIdPrecedesOrEquals(TransactionId id1, TransactionId id2);
-extern bool TransactionIdFollows(TransactionId id1, TransactionId id2);
-extern bool TransactionIdFollowsOrEquals(TransactionId id1, TransactionId id2);
extern TransactionId TransactionIdLatest(TransactionId mainxid,
int nxids, const TransactionId *xids);
extern XLogRecPtr TransactionIdGetCommitLSN(TransactionId xid);
@@ -319,7 +335,7 @@ ReadNextTransactionId(void)
/* return transaction ID backed up by amount, handling wraparound correctly */
static inline TransactionId
-TransactionIdRetreatedBy(TransactionId xid, uint32 amount)
+TransactionIdRetreatedBy(TransactionId xid, uint64 amount)
{
xid -= amount;
diff --git a/src/include/access/tupmacs.h b/src/include/access/tupmacs.h
index 3414446597..83c1adcd6c 100644
--- a/src/include/access/tupmacs.h
+++ b/src/include/access/tupmacs.h
@@ -131,10 +131,11 @@ fetch_att(const void *T, bool attbyval, int attlen)
((attalign) == TYPALIGN_INT) ? INTALIGN(cur_offset) : \
(((attalign) == TYPALIGN_CHAR) ? (uintptr_t) (cur_offset) : \
(((attalign) == TYPALIGN_DOUBLE) ? DOUBLEALIGN(cur_offset) : \
+ (((attalign) == TYPALIGN_XID) ? MAXALIGN(cur_offset) : \
( \
AssertMacro((attalign) == TYPALIGN_SHORT), \
SHORTALIGN(cur_offset) \
- ))) \
+ )))) \
)
/*
diff --git a/src/include/access/xact.h b/src/include/access/xact.h
index cb90f227ce..f002edd469 100644
--- a/src/include/access/xact.h
+++ b/src/include/access/xact.h
@@ -249,7 +249,7 @@ typedef struct xl_xact_xinfo
* Commit records can be large, so copying large portions isn't
* attractive.
*/
- uint32 xinfo;
+ uint64 xinfo;
} xl_xact_xinfo;
typedef struct xl_xact_dbinfo
@@ -302,7 +302,12 @@ typedef struct xl_xact_invals
typedef struct xl_xact_twophase
{
- TransactionId xid;
+ /*
+ * TransactionId is split into 32-bit parts because xl_xact_twophase is
+ * only int-aligned.
+ */
+ uint32 xid_lo;
+ uint32 xid_hi;
} xl_xact_twophase;
typedef struct xl_xact_origin
@@ -321,7 +326,7 @@ typedef struct xl_xact_commit
/* xl_xact_relfilelocators follows if XINFO_HAS_RELFILELOCATORS */
/* xl_xact_stats_items follows if XINFO_HAS_DROPPED_STATS */
/* xl_xact_invals follows if XINFO_HAS_INVALS */
- /* xl_xact_twophase follows if XINFO_HAS_TWOPHASE */
+ /* xl_xact_twophase follows if XINFO_HAS_TWOPHASE (xid is int-aligned!) */
/* twophase_gid follows if XINFO_HAS_GID. As a null-terminated string. */
/* xl_xact_origin follows if XINFO_HAS_ORIGIN, stored unaligned! */
} xl_xact_commit;
@@ -337,7 +342,7 @@ typedef struct xl_xact_abort
/* xl_xact_relfilelocators follows if XINFO_HAS_RELFILELOCATORS */
/* xl_xact_stats_items follows if XINFO_HAS_DROPPED_STATS */
/* No invalidation messages needed. */
- /* xl_xact_twophase follows if XINFO_HAS_TWOPHASE */
+ /* xl_xact_twophase follows if XINFO_HAS_TWOPHASE (xid is int-aligned!) */
/* twophase_gid follows if XINFO_HAS_GID. As a null-terminated string. */
/* xl_xact_origin follows if XINFO_HAS_ORIGIN, stored unaligned! */
} xl_xact_abort;
diff --git a/src/include/access/xloginsert.h b/src/include/access/xloginsert.h
index cace867497..87fad76eba 100644
--- a/src/include/access/xloginsert.h
+++ b/src/include/access/xloginsert.h
@@ -38,6 +38,7 @@
#define REGBUF_KEEP_DATA 0x10 /* include data even if a full-page image
* is taken */
#define REGBUF_NO_CHANGE 0x20 /* intentionally register clean buffer */
+#define REGBUF_CONVERTED 0x40 /* buffer had format convertion */
/* prototypes for public functions in xloginsert.c: */
extern void XLogBeginInsert(void);
diff --git a/src/include/access/xlogreader.h b/src/include/access/xlogreader.h
index 0813722715..07f3af146c 100644
--- a/src/include/access/xlogreader.h
+++ b/src/include/access/xlogreader.h
@@ -427,10 +427,6 @@ extern bool DecodeXLogRecord(XLogReaderState *state,
#define XLogRecHasBlockData(decoder, block_id) \
((decoder)->record->blocks[block_id].has_data)
-#ifndef FRONTEND
-extern FullTransactionId XLogRecGetFullXid(XLogReaderState *record);
-#endif
-
extern bool RestoreBlockImage(XLogReaderState *record, uint8 block_id, char *page);
extern char *XLogRecGetBlockData(XLogReaderState *record, uint8 block_id, Size *len);
extern void XLogRecGetBlockTag(XLogReaderState *record, uint8 block_id,
diff --git a/src/include/access/xlogrecord.h b/src/include/access/xlogrecord.h
index ec9a3c802a..4e58d9be61 100644
--- a/src/include/access/xlogrecord.h
+++ b/src/include/access/xlogrecord.h
@@ -41,18 +41,17 @@
typedef struct XLogRecord
{
uint32 xl_tot_len; /* total len of entire record */
+ pg_crc32c xl_crc; /* CRC for this record */
TransactionId xl_xid; /* xact id */
XLogRecPtr xl_prev; /* ptr to previous record in log */
uint8 xl_info; /* flag bits, see below */
RmgrId xl_rmid; /* resource manager for this record */
- /* 2 bytes of padding here, initialize to zero */
- pg_crc32c xl_crc; /* CRC for this record */
/* XLogRecordBlockHeaders and XLogRecordDataHeader follow, no padding */
} XLogRecord;
-#define SizeOfXLogRecord (offsetof(XLogRecord, xl_crc) + sizeof(pg_crc32c))
+#define SizeOfXLogRecord (offsetof(XLogRecord, xl_rmid) + sizeof(RmgrId))
/*
* The high 4 bits in xl_info may be used freely by rmgr. The
diff --git a/src/include/c.h b/src/include/c.h
index 82f8e9d4c7..e2420738f2 100644
--- a/src/include/c.h
+++ b/src/include/c.h
@@ -638,19 +638,29 @@ typedef double float8;
typedef Oid regproc;
typedef regproc RegProcedure;
-typedef uint32 TransactionId;
+typedef uint64 TransactionId;
-typedef uint32 LocalTransactionId;
+extern bool TransactionIdPrecedes(TransactionId id1, TransactionId id2);
+extern bool TransactionIdPrecedesOrEquals(TransactionId id1, TransactionId id2);
+extern bool TransactionIdFollows(TransactionId id1, TransactionId id2);
+extern bool TransactionIdFollowsOrEquals(TransactionId id1, TransactionId id2);
-typedef uint32 SubTransactionId;
+typedef uint32 ShortTransactionId;
+typedef uint64 LocalTransactionId;
+typedef uint64 SubTransactionId;
-#define InvalidSubTransactionId ((SubTransactionId) 0)
-#define TopSubTransactionId ((SubTransactionId) 1)
+#define InvalidSubTransactionId ((SubTransactionId) 0)
+#define TopSubTransactionId ((SubTransactionId) 1)
/* MultiXactId must be equivalent to TransactionId, to fit in t_xmax */
typedef TransactionId MultiXactId;
-typedef uint32 MultiXactOffset;
+typedef uint64 MultiXactOffset;
+
+#define MAX_START_XID UINT64CONST(0x3FFFFFFFFFFFFFFF) /* 2^62 - 1 */
+#define StartTransactionIdIsValid(xid) ((xid) <= MAX_START_XID)
+#define StartMultiXactIdIsValid(mxid) ((mxid) <= MAX_START_XID)
+#define StartMultiXactOffsetIsValid(mxoff) ((mxoff) <= MAX_START_XID)
typedef uint32 CommandId;
@@ -824,7 +834,6 @@ typedef NameData *Name;
/* we don't currently need wider versions of the other ALIGN macros */
#define MAXALIGN64(LEN) TYPEALIGN64(MAXIMUM_ALIGNOF, (LEN))
-
/* ----------------------------------------------------------------
* Section 6: assertions
* ----------------------------------------------------------------
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h
index a06a8f0b23..9f5f3637f0 100644
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -57,6 +57,7 @@
*/
/* yyyymmddN */
-#define CATALOG_VERSION_NO 202312071
+/* XXX: should de changed to actual version on commit */
+#define CATALOG_VERSION_NO 999999999
#endif
diff --git a/src/include/catalog/pg_amproc.dat b/src/include/catalog/pg_amproc.dat
index 4c70da41de..b64d64e194 100644
--- a/src/include/catalog/pg_amproc.dat
+++ b/src/include/catalog/pg_amproc.dat
@@ -401,9 +401,9 @@
amprocrighttype => 'bytea', amprocnum => '2',
amproc => 'hashvarlenaextended' },
{ amprocfamily => 'hash/xid_ops', amproclefttype => 'xid',
- amprocrighttype => 'xid', amprocnum => '1', amproc => 'hashint4' },
+ amprocrighttype => 'xid', amprocnum => '1', amproc => 'hashint8' },
{ amprocfamily => 'hash/xid_ops', amproclefttype => 'xid',
- amprocrighttype => 'xid', amprocnum => '2', amproc => 'hashint4extended' },
+ amprocrighttype => 'xid', amprocnum => '2', amproc => 'hashint8extended' },
{ amprocfamily => 'hash/xid8_ops', amproclefttype => 'xid8',
amprocrighttype => 'xid8', amprocnum => '1', amproc => 'hashint8' },
{ amprocfamily => 'hash/xid8_ops', amproclefttype => 'xid8',
diff --git a/src/include/catalog/pg_operator.dat b/src/include/catalog/pg_operator.dat
index b2cdea66c4..f9feefb013 100644
--- a/src/include/catalog/pg_operator.dat
+++ b/src/include/catalog/pg_operator.dat
@@ -183,16 +183,16 @@
oprresult => 'bool', oprcom => '=(xid,xid)', oprnegate => '<>(xid,xid)',
oprcode => 'xideq', oprrest => 'eqsel', oprjoin => 'eqjoinsel' },
{ oid => '353', descr => 'equal',
- oprname => '=', oprleft => 'xid', oprright => 'int4', oprresult => 'bool',
- oprnegate => '<>(xid,int4)', oprcode => 'xideqint4', oprrest => 'eqsel',
+ oprname => '=', oprleft => 'xid', oprright => 'int8', oprresult => 'bool',
+ oprnegate => '<>(xid,int8)', oprcode => 'xideqint8', oprrest => 'eqsel',
oprjoin => 'eqjoinsel' },
{ oid => '3315', descr => 'not equal',
oprname => '<>', oprleft => 'xid', oprright => 'xid', oprresult => 'bool',
oprcom => '<>(xid,xid)', oprnegate => '=(xid,xid)', oprcode => 'xidneq',
oprrest => 'neqsel', oprjoin => 'neqjoinsel' },
{ oid => '3316', descr => 'not equal',
- oprname => '<>', oprleft => 'xid', oprright => 'int4', oprresult => 'bool',
- oprnegate => '=(xid,int4)', oprcode => 'xidneqint4', oprrest => 'neqsel',
+ oprname => '<>', oprleft => 'xid', oprright => 'int8', oprresult => 'bool',
+ oprnegate => '=(xid,int8)', oprcode => 'xidneqint8', oprrest => 'neqsel',
oprjoin => 'neqjoinsel' },
{ oid => '5068', descr => 'equal',
oprname => '=', oprcanmerge => 't', oprcanhash => 't', oprleft => 'xid8',
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index 77e8b13764..f0b02b5599 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -2397,10 +2397,10 @@
{ oid => '1181',
descr => 'age of a transaction ID, in transactions before current transaction',
proname => 'age', provolatile => 's', proparallel => 'r',
- prorettype => 'int4', proargtypes => 'xid', prosrc => 'xid_age' },
+ prorettype => 'int8', proargtypes => 'xid', prosrc => 'xid_age' },
{ oid => '3939',
descr => 'age of a multi-transaction ID, in multi-transactions before current multi-transaction',
- proname => 'mxid_age', provolatile => 's', prorettype => 'int4',
+ proname => 'mxid_age', provolatile => 's', prorettype => 'int8',
proargtypes => 'xid', prosrc => 'mxid_age' },
{ oid => '1188',
@@ -2751,11 +2751,11 @@
prosrc => 'bpcharlen' },
{ oid => '1319',
- proname => 'xideqint4', proleakproof => 't', prorettype => 'bool',
- proargtypes => 'xid int4', prosrc => 'xideq' },
+ proname => 'xideqint8', proleakproof => 't', prorettype => 'bool',
+ proargtypes => 'xid int8', prosrc => 'xideq' },
{ oid => '3309',
- proname => 'xidneqint4', proleakproof => 't', prorettype => 'bool',
- proargtypes => 'xid int4', prosrc => 'xidneq' },
+ proname => 'xidneqint8', proleakproof => 't', prorettype => 'bool',
+ proargtypes => 'xid int8', prosrc => 'xidneq' },
{ oid => '1326',
proname => 'interval_div', prorettype => 'interval',
diff --git a/src/include/catalog/pg_type.dat b/src/include/catalog/pg_type.dat
index f6110a850d..25eb0079cd 100644
--- a/src/include/catalog/pg_type.dat
+++ b/src/include/catalog/pg_type.dat
@@ -95,9 +95,9 @@
typinput => 'tidin', typoutput => 'tidout', typreceive => 'tidrecv',
typsend => 'tidsend', typalign => 's' },
{ oid => '28', array_type_oid => '1011', descr => 'transaction id',
- typname => 'xid', typlen => '4', typbyval => 't', typcategory => 'U',
+ typname => 'xid', typlen => '8', typbyval => 'FLOAT8PASSBYVAL', typcategory => 'U',
typinput => 'xidin', typoutput => 'xidout', typreceive => 'xidrecv',
- typsend => 'xidsend', typalign => 'i' },
+ typsend => 'xidsend', typalign => 'x' },
{ oid => '29', array_type_oid => '1012',
descr => 'command identifier type, sequence in transaction id',
typname => 'cid', typlen => '4', typbyval => 't', typcategory => 'U',
diff --git a/src/include/catalog/pg_type.h b/src/include/catalog/pg_type.h
index 508ba7b0f7..e9bb5c3b26 100644
--- a/src/include/catalog/pg_type.h
+++ b/src/include/catalog/pg_type.h
@@ -300,6 +300,11 @@ DECLARE_UNIQUE_INDEX(pg_type_typname_nsp_index, 2704, TypeNameNspIndexId, pg_typ
#define TYPALIGN_SHORT 's' /* short alignment (typically 2 bytes) */
#define TYPALIGN_INT 'i' /* int alignment (typically 4 bytes) */
#define TYPALIGN_DOUBLE 'd' /* double alignment (often 8 bytes) */
+/*
+ * We need to use alignment sutable for 8-byte XID values.
+ * On system like AIX double alignment (4 bytes) is not enough.
+ */
+#define TYPALIGN_XID 'x'
#define TYPSTORAGE_PLAIN 'p' /* type not prepared for toasting */
#define TYPSTORAGE_EXTERNAL 'e' /* toastable, don't try to compress */
diff --git a/src/include/commands/vacuum.h b/src/include/commands/vacuum.h
index 4af02940c5..121dec1079 100644
--- a/src/include/commands/vacuum.h
+++ b/src/include/commands/vacuum.h
@@ -215,12 +215,12 @@ typedef enum VacOptValue
*/
typedef struct VacuumParams
{
- bits32 options; /* bitmask of VACOPT_* */
- int freeze_min_age; /* min freeze age, -1 to use default */
- int freeze_table_age; /* age at which to scan whole table */
- int multixact_freeze_min_age; /* min multixact freeze age, -1 to
+ bits32 options; /* bitmask of VacuumOption */
+ int64 freeze_min_age; /* min freeze age, -1 to use default */
+ int64 freeze_table_age; /* age at which to scan whole table */
+ int64 multixact_freeze_min_age; /* min multixact freeze age, -1 to
* use default */
- int multixact_freeze_table_age; /* multixact age at which to scan
+ int64 multixact_freeze_table_age; /* multixact age at which to scan
* whole table */
bool is_wraparound; /* force a for-wraparound vacuum */
int log_min_duration; /* minimum execution threshold in ms at
@@ -293,12 +293,12 @@ typedef struct VacDeadItems
/* GUC parameters */
extern PGDLLIMPORT int default_statistics_target; /* PGDLLIMPORT for PostGIS */
-extern PGDLLIMPORT int vacuum_freeze_min_age;
-extern PGDLLIMPORT int vacuum_freeze_table_age;
-extern PGDLLIMPORT int vacuum_multixact_freeze_min_age;
-extern PGDLLIMPORT int vacuum_multixact_freeze_table_age;
-extern PGDLLIMPORT int vacuum_failsafe_age;
-extern PGDLLIMPORT int vacuum_multixact_failsafe_age;
+extern PGDLLIMPORT int64 vacuum_freeze_min_age;
+extern PGDLLIMPORT int64 vacuum_freeze_table_age;
+extern PGDLLIMPORT int64 vacuum_multixact_freeze_min_age;
+extern PGDLLIMPORT int64 vacuum_multixact_freeze_table_age;
+extern PGDLLIMPORT int64 vacuum_failsafe_age;
+extern PGDLLIMPORT int64 vacuum_multixact_failsafe_age;
/*
* Maximum value for default_statistics_target and per-column statistics
diff --git a/src/include/fmgr.h b/src/include/fmgr.h
index edf61e53f3..3469edd160 100644
--- a/src/include/fmgr.h
+++ b/src/include/fmgr.h
@@ -281,6 +281,7 @@ extern struct varlena *pg_detoast_datum_packed(struct varlena *datum);
#define PG_GETARG_FLOAT4(n) DatumGetFloat4(PG_GETARG_DATUM(n))
#define PG_GETARG_FLOAT8(n) DatumGetFloat8(PG_GETARG_DATUM(n))
#define PG_GETARG_INT64(n) DatumGetInt64(PG_GETARG_DATUM(n))
+#define PG_GETARG_TRANSACTIONID(n) DatumGetTransactionId(PG_GETARG_DATUM(n))
/* use this if you want the raw, possibly-toasted input datum: */
#define PG_GETARG_RAW_VARLENA_P(n) ((struct varlena *) PG_GETARG_POINTER(n))
/* use this if you want the input datum de-toasted: */
@@ -367,6 +368,7 @@ extern struct varlena *pg_detoast_datum_packed(struct varlena *datum);
#define PG_RETURN_FLOAT8(x) return Float8GetDatum(x)
#define PG_RETURN_INT64(x) return Int64GetDatum(x)
#define PG_RETURN_UINT64(x) return UInt64GetDatum(x)
+#define PG_RETURN_TRANSACTIONID(x) return TransactionIdGetDatum(x)
/* RETURN macros for other pass-by-ref types will typically look like this: */
#define PG_RETURN_BYTEA_P(x) PG_RETURN_POINTER(x)
#define PG_RETURN_TEXT_P(x) PG_RETURN_POINTER(x)
diff --git a/src/include/nodes/pg_list.h b/src/include/nodes/pg_list.h
index 529a382d28..a687303dae 100644
--- a/src/include/nodes/pg_list.h
+++ b/src/include/nodes/pg_list.h
@@ -46,6 +46,7 @@ typedef union ListCell
{
void *ptr_value;
int int_value;
+ int64 int64_value;
Oid oid_value;
TransactionId xid_value;
} ListCell;
@@ -171,6 +172,7 @@ list_length(const List *l)
*/
#define lfirst(lc) ((lc)->ptr_value)
#define lfirst_int(lc) ((lc)->int_value)
+#define lfirst_int64(lc) ((lc)->int64_value)
#define lfirst_oid(lc) ((lc)->oid_value)
#define lfirst_xid(lc) ((lc)->xid_value)
#define lfirst_node(type,lc) castNode(type, lfirst(lc))
@@ -197,6 +199,7 @@ list_length(const List *l)
#define llast(l) lfirst(list_last_cell(l))
#define llast_int(l) lfirst_int(list_last_cell(l))
+#define llast_int64(l) lfirst_int64(list_last_cell(l))
#define llast_oid(l) lfirst_oid(list_last_cell(l))
#define llast_xid(l) lfirst_xid(list_last_cell(l))
#define llast_node(type,l) castNode(type, llast(l))
@@ -559,6 +562,7 @@ extern List *list_make5_impl(NodeTag t, ListCell datum1, ListCell datum2,
extern pg_nodiscard List *lappend(List *list, void *datum);
extern pg_nodiscard List *lappend_int(List *list, int datum);
+extern pg_nodiscard List *lappend_int64(List *list, int64 datum);
extern pg_nodiscard List *lappend_oid(List *list, Oid datum);
extern pg_nodiscard List *lappend_xid(List *list, TransactionId datum);
diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in
index 5f16918243..4651e61aca 100644
--- a/src/include/pg_config.h.in
+++ b/src/include/pg_config.h.in
@@ -771,6 +771,9 @@
# endif
#endif
+/* Postgres Pro use 64bit xids */
+#undef XID_IS_64BIT
+
/* Size of a WAL file block. This need have no particular relation to BLCKSZ.
XLOG_BLCKSZ must be a power of 2, and if your system supports O_DIRECT I/O,
XLOG_BLCKSZ must be a multiple of the alignment requirement for direct-I/O
diff --git a/src/include/port/pg_lfind.h b/src/include/port/pg_lfind.h
index 59aa8245ed..f9e0ebaddc 100644
--- a/src/include/port/pg_lfind.h
+++ b/src/include/port/pg_lfind.h
@@ -81,35 +81,21 @@ pg_lfind8_le(uint8 key, uint8 *base, uint32 nelem)
}
/*
- * pg_lfind32
+ * pg_lfind64
*
* Return true if there is an element in 'base' that equals 'key', otherwise
* return false.
*/
static inline bool
-pg_lfind32(uint32 key, uint32 *base, uint32 nelem)
+pg_lfind64(uint64 key, uint64 *base, uint32 nelem)
{
- uint32 i = 0;
-
-#ifndef USE_NO_SIMD
-
- /*
- * For better instruction-level parallelism, each loop iteration operates
- * on a block of four registers. Testing for SSE2 has showed this is ~40%
- * faster than using a block of two registers.
- */
- const Vector32 keys = vector32_broadcast(key); /* load copies of key */
- const uint32 nelem_per_vector = sizeof(Vector32) / sizeof(uint32);
- const uint32 nelem_per_iteration = 4 * nelem_per_vector;
-
- /* round down to multiple of elements per iteration */
- const uint32 tail_idx = nelem & ~(nelem_per_iteration - 1);
-
+ uint32 i,
+ iterations;
#if defined(USE_ASSERT_CHECKING)
bool assert_result = false;
/* pre-compute the result for assert checking */
- for (i = 0; i < nelem; i++)
+ for (i = 0; i < nelem; ++i)
{
if (key == base[i])
{
@@ -119,62 +105,127 @@ pg_lfind32(uint32 key, uint32 *base, uint32 nelem)
}
#endif
- for (i = 0; i < tail_idx; i += nelem_per_iteration)
+#define UNROLL_FACTOR 8
+ StaticAssertStmt((UNROLL_FACTOR & (UNROLL_FACTOR - 1)) == 0,
+ "Loop unroll factor must be power of 2");
+ iterations = nelem & ~(UNROLL_FACTOR - 1);
+ for (i = 0; i < iterations; i += UNROLL_FACTOR)
{
- Vector32 vals1,
- vals2,
- vals3,
- vals4,
- result1,
- result2,
- result3,
- result4,
- tmp1,
- tmp2,
- result;
-
- /* load the next block into 4 registers */
- vector32_load(&vals1, &base[i]);
- vector32_load(&vals2, &base[i + nelem_per_vector]);
- vector32_load(&vals3, &base[i + nelem_per_vector * 2]);
- vector32_load(&vals4, &base[i + nelem_per_vector * 3]);
-
- /* compare each value to the key */
- result1 = vector32_eq(keys, vals1);
- result2 = vector32_eq(keys, vals2);
- result3 = vector32_eq(keys, vals3);
- result4 = vector32_eq(keys, vals4);
-
- /* combine the results into a single variable */
- tmp1 = vector32_or(result1, result2);
- tmp2 = vector32_or(result3, result4);
- result = vector32_or(tmp1, tmp2);
-
- /* see if there was a match */
- if (vector32_is_highbit_set(result))
+ if (base[0] == key || base[1] == key || base[2] == key ||
+ base[3] == key || base[4] == key || base[5] == key ||
+ base[6] == key || base[7] == key)
{
+#if defined(USE_ASSERT_CHECKING)
Assert(assert_result == true);
+#endif
return true;
}
+ base += UNROLL_FACTOR;
}
-#endif /* ! USE_NO_SIMD */
/* Process the remaining elements one at a time. */
- for (; i < nelem; i++)
+ iterations = nelem & (UNROLL_FACTOR - 1);
+ for (i = 0; i < iterations; ++i)
{
- if (key == base[i])
+ if (key == *base++)
{
-#ifndef USE_NO_SIMD
+#if defined(USE_ASSERT_CHECKING)
Assert(assert_result == true);
#endif
return true;
}
}
-#ifndef USE_NO_SIMD
+#if defined(USE_ASSERT_CHECKING)
Assert(assert_result == false);
#endif
return false;
+// uint32 i = 0;
+//
+//#ifndef USE_NO_SIMD
+//
+// /*
+// * For better instruction-level parallelism, each loop iteration operates
+// * on a block of four registers. Testing for SSE2 has showed this is ~40%
+// * faster than using a block of two registers.
+// */
+// const Vector32 keys = vector32_broadcast(key); /* load copies of key */
+// const uint32 nelem_per_vector = sizeof(Vector32) / sizeof(uint32);
+// const uint32 nelem_per_iteration = 4 * nelem_per_vector;
+//
+// /* round down to multiple of elements per iteration */
+// const uint32 tail_idx = nelem & ~(nelem_per_iteration - 1);
+//
+//#if defined(USE_ASSERT_CHECKING)
+// bool assert_result = false;
+//
+// /* pre-compute the result for assert checking */
+// for (i = 0; i < nelem; i++)
+// {
+// if (key == base[i])
+// {
+// assert_result = true;
+// break;
+// }
+// }
+//#endif
+//
+// for (i = 0; i < tail_idx; i += nelem_per_iteration)
+// {
+// Vector32 vals1,
+// vals2,
+// vals3,
+// vals4,
+// result1,
+// result2,
+// result3,
+// result4,
+// tmp1,
+// tmp2,
+// result;
+//
+// /* load the next block into 4 registers */
+// vector32_load(&vals1, &base[i]);
+// vector32_load(&vals2, &base[i + nelem_per_vector]);
+// vector32_load(&vals3, &base[i + nelem_per_vector * 2]);
+// vector32_load(&vals4, &base[i + nelem_per_vector * 3]);
+//
+// /* compare each value to the key */
+// result1 = vector32_eq(keys, vals1);
+// result2 = vector32_eq(keys, vals2);
+// result3 = vector32_eq(keys, vals3);
+// result4 = vector32_eq(keys, vals4);
+//
+// /* combine the results into a single variable */
+// tmp1 = vector32_or(result1, result2);
+// tmp2 = vector32_or(result3, result4);
+// result = vector32_or(tmp1, tmp2);
+//
+// /* see if there was a match */
+// if (vector32_is_highbit_set(result))
+// {
+// Assert(assert_result == true);
+// return true;
+// }
+// }
+//#endif /* ! USE_NO_SIMD */
+//
+// /* Process the remaining elements one at a time. */
+// for (; i < nelem; i++)
+// {
+// if (key == base[i])
+// {
+//#ifndef USE_NO_SIMD
+// Assert(assert_result == true);
+//#endif
+// return true;
+// }
+// }
+//
+//#ifndef USE_NO_SIMD
+// Assert(assert_result == false);
+//#endif
+// return false;
}
#endif /* PG_LFIND_H */
diff --git a/src/include/postgres.h b/src/include/postgres.h
index 8a028ff789..5baa6b278c 100644
--- a/src/include/postgres.h
+++ b/src/include/postgres.h
@@ -80,6 +80,9 @@ typedef struct NullableDatum
#define SIZEOF_DATUM SIZEOF_VOID_P
+static uint64 DatumGetUInt64(Datum X);
+static Datum UInt64GetDatum(uint64 X);
+
/*
* DatumGetBool
* Returns boolean value of a datum.
@@ -261,7 +264,7 @@ ObjectIdGetDatum(Oid X)
static inline TransactionId
DatumGetTransactionId(Datum X)
{
- return (TransactionId) X;
+ return DatumGetUInt64(X);
}
/*
@@ -271,7 +274,7 @@ DatumGetTransactionId(Datum X)
static inline Datum
TransactionIdGetDatum(TransactionId X)
{
- return (Datum) X;
+ return UInt64GetDatum(X);
}
/*
@@ -281,7 +284,7 @@ TransactionIdGetDatum(TransactionId X)
static inline Datum
MultiXactIdGetDatum(MultiXactId X)
{
- return (Datum) X;
+ return UInt64GetDatum(X);
}
/*
diff --git a/src/include/postmaster/autovacuum.h b/src/include/postmaster/autovacuum.h
index c9ef31ae66..55d0615746 100644
--- a/src/include/postmaster/autovacuum.h
+++ b/src/include/postmaster/autovacuum.h
@@ -37,8 +37,8 @@ extern PGDLLIMPORT int autovacuum_vac_ins_thresh;
extern PGDLLIMPORT double autovacuum_vac_ins_scale;
extern PGDLLIMPORT int autovacuum_anl_thresh;
extern PGDLLIMPORT double autovacuum_anl_scale;
-extern PGDLLIMPORT int autovacuum_freeze_max_age;
-extern PGDLLIMPORT int autovacuum_multixact_freeze_max_age;
+extern PGDLLIMPORT int64 autovacuum_freeze_max_age;
+extern PGDLLIMPORT int64 autovacuum_multixact_freeze_max_age;
extern PGDLLIMPORT double autovacuum_vac_cost_delay;
extern PGDLLIMPORT int autovacuum_vac_cost_limit;
diff --git a/src/include/storage/buf_internals.h b/src/include/storage/buf_internals.h
index 2c4fd92e39..c34040bd2c 100644
--- a/src/include/storage/buf_internals.h
+++ b/src/include/storage/buf_internals.h
@@ -42,10 +42,10 @@
*/
#define BUF_REFCOUNT_ONE 1
#define BUF_REFCOUNT_MASK ((1U << 18) - 1)
-#define BUF_USAGECOUNT_MASK 0x003C0000U
+#define BUF_USAGECOUNT_MASK 0x001C0000U
#define BUF_USAGECOUNT_ONE (1U << 18)
#define BUF_USAGECOUNT_SHIFT 18
-#define BUF_FLAG_MASK 0xFFC00000U
+#define BUF_FLAG_MASK 0xFFE00000U
/* Get refcount and usagecount from buffer state */
#define BUF_STATE_GET_REFCOUNT(state) ((state) & BUF_REFCOUNT_MASK)
@@ -57,6 +57,7 @@
* Note: BM_TAG_VALID essentially means that there is a buffer hashtable
* entry associated with the buffer's tag.
*/
+#define BM_CONVERTED (1U << 21) /* buffer were converted to 64xid */
#define BM_LOCKED (1U << 22) /* buffer header is locked */
#define BM_DIRTY (1U << 23) /* data needs writing */
#define BM_VALID (1U << 24) /* data is valid */
diff --git a/src/include/storage/bufmgr.h b/src/include/storage/bufmgr.h
index 41e26d3e20..101e43d799 100644
--- a/src/include/storage/bufmgr.h
+++ b/src/include/storage/bufmgr.h
@@ -239,8 +239,12 @@ extern void BufferGetTag(Buffer buffer, RelFileLocator *rlocator,
ForkNumber *forknum, BlockNumber *blknum);
extern void MarkBufferDirtyHint(Buffer buffer, bool buffer_std);
+extern void MarkBufferConverted(Buffer buffer, bool converted);
+extern bool IsBufferConverted(Buffer buffer);
extern void UnlockBuffers(void);
+extern bool IsBufferLocked(Buffer buffer);
+extern bool IsBufferLockedExclusive(Buffer buffer);
extern void LockBuffer(Buffer buffer, int mode);
extern bool ConditionalLockBuffer(Buffer buffer);
extern void LockBufferForCleanup(Buffer buffer);
@@ -266,6 +270,8 @@ extern int GetAccessStrategyBufferCount(BufferAccessStrategy strategy);
extern void FreeAccessStrategy(BufferAccessStrategy strategy);
+/* old tuple format support */
+extern void convert_page(Relation rel, Page orig_page, Buffer buf, BlockNumber blkno);
/* inline functions */
diff --git a/src/include/storage/bufpage.h b/src/include/storage/bufpage.h
index 424ecba028..7b8ddde76a 100644
--- a/src/include/storage/bufpage.h
+++ b/src/include/storage/bufpage.h
@@ -14,10 +14,13 @@
#ifndef BUFPAGE_H
#define BUFPAGE_H
+#include "access/transam.h"
#include "access/xlogdefs.h"
#include "storage/block.h"
#include "storage/item.h"
#include "storage/off.h"
+#include "postgres.h"
+#include "utils/rel.h"
/*
* A postgres disk page is an abstraction layered on top of a postgres
@@ -163,12 +166,41 @@ typedef struct PageHeaderData
LocationIndex pd_upper; /* offset to end of free space */
LocationIndex pd_special; /* offset to start of special space */
uint16 pd_pagesize_version;
- TransactionId pd_prune_xid; /* oldest prunable XID, or zero if none */
+ ShortTransactionId pd_prune_xid; /* oldest prunable XID, or zero if
+ * none */
ItemIdData pd_linp[FLEXIBLE_ARRAY_MEMBER]; /* line pointer array */
} PageHeaderData;
typedef PageHeaderData *PageHeader;
+
+/*
+ * HeapPageSpecialData -- data that stored at the end of each heap page.
+ *
+ * pd_xid_base - base value for transaction IDs on page
+ * pd_multi_base - base value for multixact IDs on page
+ *
+ * pd_xid_base and pd_multi_base are base values for calculation of transaction
+ * identifiers from t_xmin and t_xmax in each heap tuple header on the page.
+ */
+typedef struct HeapPageSpecialData
+{
+ TransactionId pd_xid_base; /* base value for transaction IDs on page */
+ TransactionId pd_multi_base; /* base value for multixact IDs on page */
+} HeapPageSpecialData;
+
+typedef HeapPageSpecialData *HeapPageSpecial;
+
+typedef struct ToastPageSpecialData
+{
+ TransactionId pd_xid_base; /* base value for transaction IDs on page */
+} ToastPageSpecialData;
+
+typedef ToastPageSpecialData *ToastPageSpecial;
+
+extern PGDLLIMPORT HeapPageSpecial heapDoubleXmaxSpecial;
+extern PGDLLIMPORT ToastPageSpecial toastDoubleXmaxSpecial;
+
/*
* pd_flags contains the following flag bits. Undefined bits are initialized
* to zero and may be used in the future.
@@ -200,7 +232,7 @@ typedef PageHeaderData *PageHeader;
* As of Release 9.3, the checksum version must also be considered when
* handling pages.
*/
-#define PG_PAGE_LAYOUT_VERSION 4
+#define PG_PAGE_LAYOUT_VERSION 5
#define PG_DATA_CHECKSUM_VERSION 1
/* ----------------------------------------------------------------
@@ -440,18 +472,177 @@ PageClearAllVisible(Page page)
}
/*
- * These two require "access/transam.h", so left as macros.
+ * Check if page is in "double xmax" format.
*/
-#define PageSetPrunable(page, xid) \
-do { \
- Assert(TransactionIdIsNormal(xid)); \
- if (!TransactionIdIsValid(((PageHeader) (page))->pd_prune_xid) || \
- TransactionIdPrecedes(xid, ((PageHeader) (page))->pd_prune_xid)) \
- ((PageHeader) (page))->pd_prune_xid = (xid); \
-} while (0)
-#define PageClearPrunable(page) \
- (((PageHeader) (page))->pd_prune_xid = InvalidTransactionId)
+static inline bool
+HeapPageIsDoubleXmax(Page page)
+{
+ return ((PageHeader) (page))->pd_special == BLCKSZ;
+}
+/*
+ * Get pointer to HeapPageSpecialData.
+ *
+ * Can be used for non-consistent reads from non-locked pages.
+ *
+ * Return doubleXmaxSpecial when pd_special == BLCKSZ (i.e. "double xmax"
+ * format).
+ */
+static inline HeapPageSpecial
+HeapPageGetSpecialNoAssert(Page page)
+{
+ if (HeapPageIsDoubleXmax(page))
+ return heapDoubleXmaxSpecial;
+
+ return (HeapPageSpecial) ((char *) page +
+ ((PageHeader) page)->pd_special);
+}
+
+/*
+ * Get pointer to ToastPageSpecialData.
+ *
+ * Can be used for non-consistent reads from non-locked pages.
+ *
+ * Return doubleXmaxSpecial when pd_special == BLCKSZ (i.e. "double xmax"
+ * format).
+ */
+static inline ToastPageSpecial
+ToastPageGetSpecialNoAssert(Page page)
+{
+ if (HeapPageIsDoubleXmax(page))
+ return toastDoubleXmaxSpecial;
+
+ return (ToastPageSpecial) ((char *) page +
+ ((PageHeader) page)->pd_special);
+}
+
+/*
+ * Wrapper for HeapPageGetSpecialNoAssert for general use.
+ */
+static inline HeapPageSpecial
+HeapPageGetSpecial(Page page)
+{
+ if (HeapPageIsDoubleXmax(page))
+ return heapDoubleXmaxSpecial;
+
+ Assert(((PageHeader) page)->pd_special ==
+ BLCKSZ - MAXALIGN(sizeof(HeapPageSpecialData)));
+
+ return (HeapPageSpecial) ((char *) page +
+ ((PageHeader) page)->pd_special);
+}
+
+/*
+ * Wrapper for ToastPageGetSpecialNoAssert for general use.
+ */
+static inline ToastPageSpecial
+ToastPageGetSpecial(Page page)
+{
+ if (HeapPageIsDoubleXmax(page))
+ return toastDoubleXmaxSpecial;
+
+ Assert(((PageHeader) page)->pd_special ==
+ BLCKSZ - MAXALIGN(sizeof(ToastPageSpecialData)));
+
+ return (ToastPageSpecial) ((char *) page +
+ ((PageHeader) page)->pd_special);
+}
+
+/*
+ * Set pd_prune_xid.
+ */
+static inline void
+HeapPageSetPruneXid(Page page, TransactionId xid, bool is_toast)
+{
+ TransactionId base;
+
+ if (HeapPageIsDoubleXmax(page))
+ return;
+
+ if (!TransactionIdIsNormal(xid))
+ {
+ ((PageHeader) (page))->pd_prune_xid = xid;
+ return;
+ }
+
+ base = is_toast ? ToastPageGetSpecial(page)->pd_xid_base :
+ HeapPageGetSpecial(page)->pd_xid_base;
+
+ ((PageHeader) (page))->pd_prune_xid = NormalTransactionIdToShort(base, xid);
+ Assert(((PageHeader) (page))->pd_prune_xid <= MaxShortTransactionId);
+}
+
+static inline void
+ToastPageSetPruneXid(Page page, TransactionId xid)
+{
+ if (HeapPageIsDoubleXmax(page))
+ return;
+
+ if (!TransactionIdIsNormal(xid))
+ {
+ ((PageHeader) (page))->pd_prune_xid = xid;
+ return;
+ }
+
+ ((PageHeader) (page))->pd_prune_xid =
+ NormalTransactionIdToShort(ToastPageGetSpecial(page)->pd_xid_base, (xid));
+
+ Assert(((PageHeader) (page))->pd_prune_xid <= MaxShortTransactionId);
+}
+
+/*
+ * Get pd_prune_xid from locked page.
+ */
+static inline TransactionId
+HeapPageGetPruneXid(Page page, bool is_toast)
+{
+ TransactionId base;
+
+ if (HeapPageIsDoubleXmax(page))
+ return ((PageHeader) (page))->pd_prune_xid;
+
+ base = is_toast ? ToastPageGetSpecial(page)->pd_xid_base :
+ HeapPageGetSpecial(page)->pd_xid_base;
+
+ return ShortTransactionIdToNormal(base,
+ ((PageHeader) (page))->pd_prune_xid);
+}
+
+static inline void
+PageSetPrunable(Page page, TransactionId xid, bool is_toast)
+{
+ TransactionId prune_xid;
+
+ Assert(TransactionIdIsNormal(xid));
+
+ if (HeapPageIsDoubleXmax(page))
+ return;
+
+ prune_xid = HeapPageGetPruneXid(page, is_toast);
+ if ((!TransactionIdIsValid(prune_xid) ||
+ TransactionIdPrecedes(xid, prune_xid)))
+ {
+ HeapPageSetPruneXid(page, xid, is_toast);
+ }
+}
+
+/*
+ * Get pd_prune_xid from non-locked page. May return invalid value, but doen't
+ * causes assert failures.
+ */
+static inline TransactionId
+HeapPageGetPruneXidNoAssert(Page page, bool is_toast)
+{
+ TransactionId base;
+
+ if (HeapPageIsDoubleXmax(page))
+ return ((PageHeader) (page))->pd_prune_xid;
+
+ base = is_toast ? ToastPageGetSpecialNoAssert(page)->pd_xid_base :
+ HeapPageGetSpecialNoAssert(page)->pd_xid_base;
+ return ShortTransactionIdToNormal(base,
+ ((PageHeader) (page))->pd_prune_xid);
+}
/* ----------------------------------------------------------------
* extern declarations
@@ -485,6 +676,21 @@ do { \
StaticAssertDecl(BLCKSZ == ((BLCKSZ / sizeof(size_t)) * sizeof(size_t)),
"BLCKSZ has to be a multiple of sizeof(size_t)");
+/*
+ * Tuple defrag support for PageRepairFragmentation and PageIndexMultiDelete
+ */
+typedef struct ItemIdCompactData
+{
+ uint16 offsetindex; /* linp array index */
+ int16 itemoff; /* page offset of item data */
+ uint16 alignedlen; /* MAXALIGN(item data len) */
+} ItemIdCompactData;
+
+typedef ItemIdCompactData *ItemIdCompact;
+typedef RelationData *Relation;
+
+extern int itemoffcompare(const void *item1, const void *item2);
+
extern void PageInit(Page page, Size pageSize, Size specialSize);
extern bool PageIsVerifiedExtended(Page page, BlockNumber blkno, int flags);
extern OffsetNumber PageAddItemExtended(Page page, Item item, Size size,
@@ -493,7 +699,7 @@ extern Page PageGetTempPage(Page page);
extern Page PageGetTempPageCopy(Page page);
extern Page PageGetTempPageCopySpecial(Page page);
extern void PageRestoreTempPage(Page tempPage, Page oldPage);
-extern void PageRepairFragmentation(Page page);
+extern void PageRepairFragmentation(Page page, bool is_toast);
extern void PageTruncateLinePointerArray(Page page);
extern Size PageGetFreeSpace(Page page);
extern Size PageGetFreeSpaceForMultipleTuples(Page page, int ntups);
diff --git a/src/include/storage/itemid.h b/src/include/storage/itemid.h
index e5cfb8c3cc..e24e60981b 100644
--- a/src/include/storage/itemid.h
+++ b/src/include/storage/itemid.h
@@ -78,6 +78,8 @@ typedef uint16 ItemLength;
#define ItemIdGetRedirect(itemId) \
((itemId)->lp_off)
+#define ItemIdGetTupleEnd(itemId) \
+ (MAXALIGN(ItemIdGetLength((itemId))) + ItemIdGetOffset((itemId)))
/*
* ItemIdIsValid
* True iff item identifier is valid.
diff --git a/src/include/storage/lock.h b/src/include/storage/lock.h
index 590c026b5b..0e58f4ccad 100644
--- a/src/include/storage/lock.h
+++ b/src/include/storage/lock.h
@@ -224,8 +224,8 @@ typedef struct LOCKTAG
/* ID info for a transaction is its TransactionId */
#define SET_LOCKTAG_TRANSACTION(locktag,xid) \
- ((locktag).locktag_field1 = (xid), \
- (locktag).locktag_field2 = 0, \
+ ((locktag).locktag_field1 = (uint32)((xid) & 0xFFFFFFFF), \
+ (locktag).locktag_field2 = (uint32)((xid) >> 32), \
(locktag).locktag_field3 = 0, \
(locktag).locktag_field4 = 0, \
(locktag).locktag_type = LOCKTAG_TRANSACTION, \
@@ -234,8 +234,8 @@ typedef struct LOCKTAG
/* ID info for a virtual transaction is its VirtualTransactionId */
#define SET_LOCKTAG_VIRTUALTRANSACTION(locktag,vxid) \
((locktag).locktag_field1 = (vxid).backendId, \
- (locktag).locktag_field2 = (vxid).localTransactionId, \
- (locktag).locktag_field3 = 0, \
+ (locktag).locktag_field2 = (uint32)((vxid).localTransactionId & 0xFFFFFFFF), \
+ (locktag).locktag_field3 = (uint32)((vxid).localTransactionId >> 32), \
(locktag).locktag_field4 = 0, \
(locktag).locktag_type = LOCKTAG_VIRTUALTRANSACTION, \
(locktag).locktag_lockmethodid = DEFAULT_LOCKMETHOD)
@@ -245,9 +245,9 @@ typedef struct LOCKTAG
* its speculative insert counter.
*/
#define SET_LOCKTAG_SPECULATIVE_INSERTION(locktag,xid,token) \
- ((locktag).locktag_field1 = (xid), \
- (locktag).locktag_field2 = (token), \
- (locktag).locktag_field3 = 0, \
+ ((locktag).locktag_field1 = (uint32)((xid) & 0xFFFFFFFF), \
+ (locktag).locktag_field2 = (uint32)((xid) >> 32), \
+ (locktag).locktag_field3 = (token), \
(locktag).locktag_field4 = 0, \
(locktag).locktag_type = LOCKTAG_SPECULATIVE_TOKEN, \
(locktag).locktag_lockmethodid = DEFAULT_LOCKMETHOD)
diff --git a/src/include/storage/proc.h b/src/include/storage/proc.h
index 4b25961249..371b7ffefa 100644
--- a/src/include/storage/proc.h
+++ b/src/include/storage/proc.h
@@ -17,6 +17,7 @@
#include "access/clog.h"
#include "access/xlogdefs.h"
#include "lib/ilist.h"
+#include "port/atomics.h"
#include "storage/latch.h"
#include "storage/lock.h"
#include "storage/pg_sema.h"
@@ -176,12 +177,12 @@ struct PGPROC
Latch procLatch; /* generic latch for process */
- TransactionId xid; /* id of top-level transaction currently being
+ pg_atomic_uint64 xid; /* id of top-level transaction currently being
* executed by this proc, if running and XID
* is assigned; else InvalidTransactionId.
* mirrored in ProcGlobal->xids[pgxactoff] */
- TransactionId xmin; /* minimal running XID as it was when we were
+ pg_atomic_uint64 xmin; /* minimal running XID as it was when we were
* starting our xact, excluding LAZY VACUUM:
* vacuum must not remove tuples deleted by
* xid >= xmin ! */
@@ -368,7 +369,7 @@ typedef struct PROC_HDR
PGPROC *allProcs;
/* Array mirroring PGPROC.xid for each PGPROC currently in the procarray */
- TransactionId *xids;
+ pg_atomic_uint64 *xids;
/*
* Array mirroring PGPROC.subxidStatus for each PGPROC currently in the
diff --git a/src/include/utils/combocid.h b/src/include/utils/combocid.h
index 2b496ee634..3f49819468 100644
--- a/src/include/utils/combocid.h
+++ b/src/include/utils/combocid.h
@@ -15,7 +15,7 @@
#define COMBOCID_H
/*
- * HeapTupleHeaderGetCmin and HeapTupleHeaderGetCmax function prototypes
+ * HeapTupleGetCmin and HeapTupleGetCmax function prototypes
* are in access/htup.h, because that's where the macro definitions that
* those functions replaced used to be.
*/
diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h
index 0ad613c4b8..1d78a56161 100644
--- a/src/include/utils/rel.h
+++ b/src/include/utils/rel.h
@@ -311,12 +311,12 @@ typedef struct AutoVacOpts
int vacuum_ins_threshold;
int analyze_threshold;
int vacuum_cost_limit;
- int freeze_min_age;
- int freeze_max_age;
- int freeze_table_age;
- int multixact_freeze_min_age;
- int multixact_freeze_max_age;
- int multixact_freeze_table_age;
+ int64 freeze_min_age;
+ int64 freeze_max_age;
+ int64 freeze_table_age;
+ int64 multixact_freeze_min_age;
+ int64 multixact_freeze_max_age;
+ int64 multixact_freeze_table_age;
int log_min_duration;
float8 vacuum_cost_delay;
float8 vacuum_scale_factor;
diff --git a/src/include/utils/xid8.h b/src/include/utils/xid8.h
index 2f5e14baad..6401904ad2 100644
--- a/src/include/utils/xid8.h
+++ b/src/include/utils/xid8.h
@@ -17,13 +17,13 @@
static inline FullTransactionId
DatumGetFullTransactionId(Datum X)
{
- return FullTransactionIdFromU64(DatumGetUInt64(X));
+ return FullTransactionIdFromXid(DatumGetUInt64(X));
}
static inline Datum
FullTransactionIdGetDatum(FullTransactionId X)
{
- return UInt64GetDatum(U64FromFullTransactionId(X));
+ return UInt64GetDatum(XidFromFullTransactionId(X));
}
#define PG_GETARG_FULLTRANSACTIONID(X) DatumGetFullTransactionId(PG_GETARG_DATUM(X))
diff --git a/src/pl/plperl/plperl.c b/src/pl/plperl/plperl.c
index d68ad7be34..650d045b07 100644
--- a/src/pl/plperl/plperl.c
+++ b/src/pl/plperl/plperl.c
@@ -2680,7 +2680,7 @@ validate_plperl_function(plperl_proc_ptr *proc_ptr, HeapTuple procTup)
* This is needed because CREATE OR REPLACE FUNCTION can modify the
* function's pg_proc entry without changing its OID.
************************************************************/
- uptodate = (prodesc->fn_xmin == HeapTupleHeaderGetRawXmin(procTup->t_data) &&
+ uptodate = (prodesc->fn_xmin == HeapTupleGetRawXmin(procTup) &&
ItemPointerEquals(&prodesc->fn_tid, &procTup->t_self));
if (uptodate)
@@ -2804,7 +2804,7 @@ compile_plperl_function(Oid fn_oid, bool is_trigger, bool is_event_trigger)
MemoryContextSetIdentifier(proc_cxt, prodesc->proname);
prodesc->fn_cxt = proc_cxt;
prodesc->fn_refcount = 0;
- prodesc->fn_xmin = HeapTupleHeaderGetRawXmin(procTup->t_data);
+ prodesc->fn_xmin = HeapTupleGetRawXmin(procTup);
prodesc->fn_tid = procTup->t_self;
prodesc->nargs = procStruct->pronargs;
prodesc->arg_out_func = (FmgrInfo *) palloc0(prodesc->nargs * sizeof(FmgrInfo));
diff --git a/src/pl/plpgsql/src/pl_comp.c b/src/pl/plpgsql/src/pl_comp.c
index a341cde2c1..7686841265 100644
--- a/src/pl/plpgsql/src/pl_comp.c
+++ b/src/pl/plpgsql/src/pl_comp.c
@@ -171,7 +171,7 @@ recheck:
if (function)
{
/* We have a compiled function, but is it still valid? */
- if (function->fn_xmin == HeapTupleHeaderGetRawXmin(procTup->t_data) &&
+ if (function->fn_xmin == HeapTupleGetRawXmin(procTup) &&
ItemPointerEquals(&function->fn_tid, &procTup->t_self))
function_valid = true;
else
@@ -344,7 +344,7 @@ do_compile(FunctionCallInfo fcinfo,
function->fn_signature = format_procedure(fcinfo->flinfo->fn_oid);
MemoryContextSetIdentifier(func_cxt, function->fn_signature);
function->fn_oid = fcinfo->flinfo->fn_oid;
- function->fn_xmin = HeapTupleHeaderGetRawXmin(procTup->t_data);
+ function->fn_xmin = HeapTupleGetRawXmin(procTup);
function->fn_tid = procTup->t_self;
function->fn_input_collation = fcinfo->fncollation;
function->fn_cxt = func_cxt;
diff --git a/src/pl/plpgsql/src/pl_exec.c b/src/pl/plpgsql/src/pl_exec.c
index f8c7f48747..2ae9a58484 100644
--- a/src/pl/plpgsql/src/pl_exec.c
+++ b/src/pl/plpgsql/src/pl_exec.c
@@ -7394,6 +7394,7 @@ deconstruct_composite_datum(Datum value, HeapTupleData *tmptup)
tmptup->t_len = HeapTupleHeaderGetDatumLength(td);
ItemPointerSetInvalid(&(tmptup->t_self));
tmptup->t_tableOid = InvalidOid;
+ HeapTupleSetZeroXids(tmptup);
tmptup->t_data = td;
/* Extract rowtype info and find a tupdesc */
@@ -7568,6 +7569,7 @@ exec_move_row_from_datum(PLpgSQL_execstate *estate,
tmptup.t_len = HeapTupleHeaderGetDatumLength(td);
ItemPointerSetInvalid(&(tmptup.t_self));
tmptup.t_tableOid = InvalidOid;
+ HeapTupleSetZeroXids(&tmptup);
tmptup.t_data = td;
/* Extract rowtype info */
diff --git a/src/pl/plpython/plpy_procedure.c b/src/pl/plpython/plpy_procedure.c
index 79b6ef6a44..f89d909104 100644
--- a/src/pl/plpython/plpy_procedure.c
+++ b/src/pl/plpython/plpy_procedure.c
@@ -178,7 +178,7 @@ PLy_procedure_create(HeapTuple procTup, Oid fn_oid, bool is_trigger)
proc->proname = pstrdup(NameStr(procStruct->proname));
MemoryContextSetIdentifier(cxt, proc->proname);
proc->pyname = pstrdup(procName);
- proc->fn_xmin = HeapTupleHeaderGetRawXmin(procTup->t_data);
+ proc->fn_xmin = HeapTupleGetRawXmin(procTup);
proc->fn_tid = procTup->t_self;
proc->fn_readonly = (procStruct->provolatile != PROVOLATILE_VOLATILE);
proc->is_setof = procStruct->proretset;
@@ -417,7 +417,7 @@ PLy_procedure_valid(PLyProcedure *proc, HeapTuple procTup)
return false;
/* If the pg_proc tuple has changed, it's not valid */
- if (!(proc->fn_xmin == HeapTupleHeaderGetRawXmin(procTup->t_data) &&
+ if (!(proc->fn_xmin == HeapTupleGetRawXmin(procTup) &&
ItemPointerEquals(&proc->fn_tid, &procTup->t_self)))
return false;
diff --git a/src/pl/tcl/pltcl.c b/src/pl/tcl/pltcl.c
index 6187e15781..485331a134 100644
--- a/src/pl/tcl/pltcl.c
+++ b/src/pl/tcl/pltcl.c
@@ -1428,7 +1428,7 @@ compile_pltcl_function(Oid fn_oid, Oid tgreloid,
* function's pg_proc entry without changing its OID.
************************************************************/
if (prodesc != NULL &&
- prodesc->fn_xmin == HeapTupleHeaderGetRawXmin(procTup->t_data) &&
+ prodesc->fn_xmin == HeapTupleGetRawXmin(procTup) &&
ItemPointerEquals(&prodesc->fn_tid, &procTup->t_self))
{
/* It's still up-to-date, so we can use it */
@@ -1494,7 +1494,7 @@ compile_pltcl_function(Oid fn_oid, Oid tgreloid,
prodesc->internal_proname = pstrdup(internal_proname);
prodesc->fn_cxt = proc_cxt;
prodesc->fn_refcount = 0;
- prodesc->fn_xmin = HeapTupleHeaderGetRawXmin(procTup->t_data);
+ prodesc->fn_xmin = HeapTupleGetRawXmin(procTup);
prodesc->fn_tid = procTup->t_self;
prodesc->nargs = procStruct->pronargs;
prodesc->arg_out_func = (FmgrInfo *) palloc0(prodesc->nargs * sizeof(FmgrInfo));
diff --git a/src/test/Makefile b/src/test/Makefile
index dbd3192874..8e0f39289e 100644
--- a/src/test/Makefile
+++ b/src/test/Makefile
@@ -12,7 +12,8 @@ subdir = src/test
top_builddir = ../..
include $(top_builddir)/src/Makefile.global
-SUBDIRS = perl regress isolation modules authentication recovery subscription
+SUBDIRS = perl regress isolation modules authentication recovery subscription \
+ xid-64
ifeq ($(with_icu),yes)
SUBDIRS += icu
diff --git a/src/test/meson.build b/src/test/meson.build
index 5f3c9c2ba2..e9f504c000 100644
--- a/src/test/meson.build
+++ b/src/test/meson.build
@@ -7,6 +7,7 @@ subdir('authentication')
subdir('recovery')
subdir('subscription')
subdir('modules')
+subdir('xid-64')
if ssl.found()
subdir('ssl')
diff --git a/src/test/modules/test_lfind/test_lfind.c b/src/test/modules/test_lfind/test_lfind.c
index e2e8b7389f..46553cbe89 100644
--- a/src/test/modules/test_lfind/test_lfind.c
+++ b/src/test/modules/test_lfind/test_lfind.c
@@ -120,29 +120,29 @@ Datum
test_lfind32(PG_FUNCTION_ARGS)
{
#define TEST_ARRAY_SIZE 135
- uint32 test_array[TEST_ARRAY_SIZE] = {0};
+ uint64 test_array[TEST_ARRAY_SIZE] = {0};
test_array[8] = 1;
test_array[64] = 2;
test_array[TEST_ARRAY_SIZE - 1] = 3;
- if (pg_lfind32(1, test_array, 4))
- elog(ERROR, "pg_lfind32() found nonexistent element");
- if (!pg_lfind32(1, test_array, TEST_ARRAY_SIZE))
- elog(ERROR, "pg_lfind32() did not find existing element");
+ if (pg_lfind64(1, test_array, 4))
+ elog(ERROR, "pg_lfind64() found nonexistent element");
+ if (!pg_lfind64(1, test_array, TEST_ARRAY_SIZE))
+ elog(ERROR, "pg_lfind64() did not find existing element");
- if (pg_lfind32(2, test_array, 32))
- elog(ERROR, "pg_lfind32() found nonexistent element");
- if (!pg_lfind32(2, test_array, TEST_ARRAY_SIZE))
- elog(ERROR, "pg_lfind32() did not find existing element");
+ if (pg_lfind64(2, test_array, 32))
+ elog(ERROR, "pg_lfind64() found nonexistent element");
+ if (!pg_lfind64(2, test_array, TEST_ARRAY_SIZE))
+ elog(ERROR, "pg_lfind64() did not find existing element");
- if (pg_lfind32(3, test_array, 96))
- elog(ERROR, "pg_lfind32() found nonexistent element");
- if (!pg_lfind32(3, test_array, TEST_ARRAY_SIZE))
- elog(ERROR, "pg_lfind32() did not find existing element");
+ if (pg_lfind64(3, test_array, 96))
+ elog(ERROR, "pg_lfind64() found nonexistent element");
+ if (!pg_lfind64(3, test_array, TEST_ARRAY_SIZE))
+ elog(ERROR, "pg_lfind64() did not find existing element");
- if (pg_lfind32(4, test_array, TEST_ARRAY_SIZE))
- elog(ERROR, "pg_lfind32() found nonexistent element");
+ if (pg_lfind64(4, test_array, TEST_ARRAY_SIZE))
+ elog(ERROR, "pg_lfind64() found nonexistent element");
PG_RETURN_VOID();
}
diff --git a/src/test/perl/PostgreSQL/Test/AdjustUpgrade.pm b/src/test/perl/PostgreSQL/Test/AdjustUpgrade.pm
index e34dfb9243..bcf4ffac55 100644
--- a/src/test/perl/PostgreSQL/Test/AdjustUpgrade.pm
+++ b/src/test/perl/PostgreSQL/Test/AdjustUpgrade.pm
@@ -119,6 +119,10 @@ sub adjust_database_contents
'drop table public.gtest_normal_child2');
}
+ # Can't upgrade xid type
+ _add_st($result, 'regression',
+ 'alter table public.tab_core_types drop column xid');
+
# stuff not supported from release 14
if ($old_version < 14)
{
diff --git a/src/test/recovery/t/003_recovery_targets.pl b/src/test/recovery/t/003_recovery_targets.pl
index e882ce2077..1f2d710cf1 100644
--- a/src/test/recovery/t/003_recovery_targets.pl
+++ b/src/test/recovery/t/003_recovery_targets.pl
@@ -57,7 +57,7 @@ $node_primary->init(has_archiving => 1, allows_streaming => 1);
# Bump the transaction ID epoch. This is useful to stress the portability
# of recovery_target_xid parsing.
-system_or_bail('pg_resetwal', '--epoch', '1', $node_primary->data_dir);
+system_or_bail('pg_resetwal', $node_primary->data_dir);
# Start it
$node_primary->start;
diff --git a/src/test/recovery/t/039_end_of_wal.pl b/src/test/recovery/t/039_end_of_wal.pl
index d2bf062bb2..d4aca99995 100644
--- a/src/test/recovery/t/039_end_of_wal.pl
+++ b/src/test/recovery/t/039_end_of_wal.pl
@@ -21,7 +21,7 @@ use integer; # causes / operator to use integer math
my $BIG_ENDIAN = pack("L", 0x12345678) eq pack("N", 0x12345678);
# Header size of record header.
-my $RECORD_HEADER_SIZE = 24;
+my $RECORD_HEADER_SIZE = 26;
# Fields retrieved from code headers.
my @scan_result = scan_server_header('access/xlog_internal.h',
@@ -131,17 +131,21 @@ sub build_record_header
# This needs to follow the structure XLogRecord:
# I for xl_tot_len
- # I for xl_xid
+ # I for xl_crc
+ # II for xl_xid
# II for xl_prev
# C for xl_info
# C for xl_rmid
- # BB for two bytes of padding
- # I for xl_crc
- return pack("IIIICCBBI",
- $xl_tot_len, $xl_xid,
+ # BBBBBB for two bytes of padding
+ return pack("IIIIIICCBBBBBB",
+ $xl_tot_len,
+ $xl_crc,
+ $BIG_ENDIAN ? 0 : $xl_xid,
+ $BIG_ENDIAN ? $xl_xid : 0,
$BIG_ENDIAN ? 0 : $xl_prev,
$BIG_ENDIAN ? $xl_prev : 0,
- $xl_info, $xl_rmid, 0, 0, $xl_crc);
+ $xl_info, $xl_rmid,
+ 0, 0, 0, 0, 0, 0);
}
# Build a fake WAL page header, based on the data given by the caller
@@ -265,7 +269,7 @@ $node->stop('immediate');
my $log_size = -s $node->logfile;
$node->start;
ok( $node->log_contains(
- "invalid record length at .*: expected at least 24, got 0", $log_size
+ "invalid record length at .*: expected at least 26, got 0", $log_size
),
"xl_tot_len zero");
@@ -277,7 +281,7 @@ write_wal($node, $TLI, $end_lsn, build_record_header(23));
$log_size = -s $node->logfile;
$node->start;
ok( $node->log_contains(
- "invalid record length at .*: expected at least 24, got 23",
+ "invalid record length at .*: expected at least 26, got 23",
$log_size),
"xl_tot_len short");
@@ -290,7 +294,7 @@ write_wal($node, $TLI, $end_lsn, build_record_header(1));
$log_size = -s $node->logfile;
$node->start;
ok( $node->log_contains(
- "invalid record length at .*: expected at least 24, got 1", $log_size
+ "invalid record length at .*: expected at least 26, got 1", $log_size
),
"xl_tot_len short at end-of-page");
diff --git a/src/test/regress/expected/indirect_toast.out b/src/test/regress/expected/indirect_toast.out
index 44b54dc37f..313482b866 100644
--- a/src/test/regress/expected/indirect_toast.out
+++ b/src/test/regress/expected/indirect_toast.out
@@ -161,6 +161,14 @@ SELECT substring(indtoasttest::text, 1, 200) FROM indtoasttest;
("one-toasted,one-null, via indirect",0,1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890
(5 rows)
+create or replace function random_string(len integer) returns text as $$
+select substr((select string_agg(r,'') from (select random()::text as r from generate_series(1,(len+15)/16)) s1), 1, len);
+$$ language sql;
+create table toasttest_main(t text);
+alter table toasttest_main alter column t set storage main;
+insert into toasttest_main (select random_string(len) from generate_series(7000,8000) len);
DROP TABLE indtoasttest;
+DROP TABLE toasttest_main;
DROP FUNCTION update_using_indirect();
+DROP FUNCTION random_string(integer);
RESET default_toast_compression;
diff --git a/src/test/regress/expected/insert.out b/src/test/regress/expected/insert.out
index dd4354fc7d..d52545b443 100644
--- a/src/test/regress/expected/insert.out
+++ b/src/test/regress/expected/insert.out
@@ -100,7 +100,7 @@ SELECT pg_size_pretty(pg_relation_size('large_tuple_test'::regclass, 'main'));
INSERT INTO large_tuple_test (select 3, NULL);
-- now this tuple won't fit on the second page, but the insert should
-- still succeed by extending the relation
-INSERT INTO large_tuple_test (select 4, repeat('a', 8126));
+INSERT INTO large_tuple_test (select 4, repeat('a', 8112));
DROP TABLE large_tuple_test;
--
-- check indirection (field/array assignment), cf bug #14265
@@ -980,3 +980,17 @@ insert into returningwrtest values (2, 'foo') returning returningwrtest;
(1 row)
drop table returningwrtest;
+-- Check for MaxHeapTupleSize
+create table maxheaptuplesize_test(value text);
+alter table maxheaptuplesize_test alter column value set storage external;
+insert into maxheaptuplesize_test values (repeat('x', 8104));
+insert into maxheaptuplesize_test values (repeat('x', 8112));
+insert into maxheaptuplesize_test values (repeat('x', 8120));
+insert into maxheaptuplesize_test values (repeat('x', 8128));
+insert into maxheaptuplesize_test values (repeat('x', 8136));
+insert into maxheaptuplesize_test values (repeat('x', 8144));
+insert into maxheaptuplesize_test values (repeat('x', 8152));
+insert into maxheaptuplesize_test values (repeat('x', 8160));
+insert into maxheaptuplesize_test values (repeat('x', 8168));
+insert into maxheaptuplesize_test values (repeat('x', 8176));
+drop table maxheaptuplesize_test;
diff --git a/src/test/regress/expected/opr_sanity.out b/src/test/regress/expected/opr_sanity.out
index 7610b011d6..c34bef3fb5 100644
--- a/src/test/regress/expected/opr_sanity.out
+++ b/src/test/regress/expected/opr_sanity.out
@@ -197,7 +197,7 @@ WHERE p1.oid != p2.oid AND
ORDER BY 1, 2;
proargtypes | proargtypes
-----------------------------+--------------------------
- integer | xid
+ bigint | xid
timestamp without time zone | timestamp with time zone
bit | bit varying
txid_snapshot | pg_snapshot
@@ -736,7 +736,7 @@ int8(oid)
tideq(tid,tid)
timestamptz_cmp(timestamp with time zone,timestamp with time zone)
interval_cmp(interval,interval)
-xideqint4(xid,integer)
+xideqint8(xid,bigint)
timetz_eq(time with time zone,time with time zone)
timetz_ne(time with time zone,time with time zone)
timetz_lt(time with time zone,time with time zone)
@@ -850,7 +850,7 @@ pg_lsn_gt(pg_lsn,pg_lsn)
pg_lsn_ne(pg_lsn,pg_lsn)
pg_lsn_cmp(pg_lsn,pg_lsn)
xidneq(xid,xid)
-xidneqint4(xid,integer)
+xidneqint8(xid,bigint)
sha224(bytea)
sha256(bytea)
sha384(bytea)
diff --git a/src/test/regress/expected/select_views.out b/src/test/regress/expected/select_views.out
index 1aeed8452b..804d9914e8 100644
--- a/src/test/regress/expected/select_views.out
+++ b/src/test/regress/expected/select_views.out
@@ -2,9 +2,22 @@
-- SELECT_VIEWS
-- test the views defined in CREATE_VIEWS
--
-SELECT * FROM street;
+SELECT * FROM street ORDER BY name COLLATE "C", thepath::text COLLATE "C", cname COLLATE "C";
name | thepath | cname
------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-----------
+ 100th Ave | [(-122.1657,37.429),(-122.1647,37.432)] | Oakland
+ 107th Ave | [(-122.1555,37.403),(-122.1531,37.41)] | Oakland
+ 14th St | [(-122.299,37.147),(-122.3,37.148)] | Lafayette
+ 19th Ave | [(-122.2366,37.897),(-122.2359,37.905)] | Berkeley
+ 1st St | [(-121.75508,37.89294),(-121.753581,37.90031)] | Oakland
+ 5th St | [(-122.278,37),(-122.2792,37.005),(-122.2803,37.009)] | Lafayette
+ 5th St | [(-122.296,37.615),(-122.2953,37.598)] | Berkeley
+ 82nd Ave | [(-122.1695,37.596),(-122.1681,37.603)] | Berkeley
+ 85th Ave | [(-122.1877,37.466),(-122.186,37.476)] | Oakland
+ 89th Ave | [(-122.1822,37.459),(-122.1803,37.471)] | Oakland
+ 98th Ave | [(-122.1568,37.498),(-122.1558,37.502)] | Oakland
+ 98th Ave | [(-122.1693,37.438),(-122.1682,37.444)] | Oakland
+ 98th Ave | [(-122.2001,37.258),(-122.1974,37.27)] | Lafayette
Access Rd 25 | [(-121.9283,37.894),(-121.9283,37.9)] | Oakland
Ada St | [(-122.2487,37.398),(-122.2496,37.401)] | Lafayette
Agua Fria Creek | [(-121.9254,37.922),(-121.9281,37.889)] | Oakland
@@ -22,10 +35,10 @@ SELECT * FROM street;
Arroyo Las Positas | [(-121.7973,37.997),(-121.7957,37.005)] | Oakland
Arroyo Seco | [(-121.7073,37.766),(-121.6997,37.729)] | Oakland
Ash St | [(-122.0408,37.31),(-122.04,37.292)] | Oakland
- Avenue 134th | [(-122.1823,37.002),(-122.1851,37.992)] | Oakland
Avenue 134th | [(-122.1823,37.002),(-122.1851,37.992)] | Berkeley
- Avenue 140th | [(-122.1656,37.003),(-122.1691,37.988)] | Oakland
+ Avenue 134th | [(-122.1823,37.002),(-122.1851,37.992)] | Oakland
Avenue 140th | [(-122.1656,37.003),(-122.1691,37.988)] | Berkeley
+ Avenue 140th | [(-122.1656,37.003),(-122.1691,37.988)] | Oakland
Avenue D | [(-122.298,37.848),(-122.3024,37.849)] | Berkeley
B St | [(-122.1749,37.451),(-122.1743,37.443)] | Oakland
Bancroft Ave | [(-122.15714,37.4242),(-122.156,37.409)] | Oakland
@@ -37,9 +50,9 @@ SELECT * FROM street;
Broadmore Ave | [(-122.095,37.522),(-122.0936,37.497)] | Oakland
Broadway | [(-122.2409,37.586),(-122.2395,37.601)] | Berkeley
Buckingham Blvd | [(-122.2231,37.59),(-122.2214,37.606)] | Berkeley
+ Butterfield Dr | [(-122.0838,37.002),(-122.0834,37.987)] | Berkeley
Butterfield Dr | [(-122.0838,37.002),(-122.0834,37.987)] | Oakland
Butterfield Dr | [(-122.0838,37.002),(-122.0834,37.987)] | Oakland
- Butterfield Dr | [(-122.0838,37.002),(-122.0834,37.987)] | Berkeley
C St | [(-122.1768,37.46),(-122.1749,37.435)] | Oakland
Calaveras Creek | [(-121.8203,37.035),(-121.8207,37.931)] | Oakland
Calaveras Creek | [(-121.8203,37.035),(-121.8207,37.931)] | Oakland
@@ -60,9 +73,9 @@ SELECT * FROM street;
Chapman Dr | [(-122.0421,37.504),(-122.0414,37.498)] | Oakland
Charles St | [(-122.0255,37.505),(-122.0252,37.499)] | Oakland
Cherry St | [(-122.0437,37.42),(-122.0434,37.413)] | Oakland
+ Claremont Pl | [(-122.0542,37.995),(-122.0542,37.008)] | Berkeley
Claremont Pl | [(-122.0542,37.995),(-122.0542,37.008)] | Oakland
Claremont Pl | [(-122.0542,37.995),(-122.0542,37.008)] | Oakland
- Claremont Pl | [(-122.0542,37.995),(-122.0542,37.008)] | Berkeley
Coliseum Way | [(-122.2001,37.47),(-122.1978,37.516)] | Oakland
Coliseum Way | [(-122.2113,37.626),(-122.2085,37.592),(-122.2063,37.568)] | Berkeley
Coolidge Ave | [(-122.2007,37.058),(-122.1992,37.06)] | Lafayette
@@ -77,9 +90,9 @@ SELECT * FROM street;
Cull Canyon Road | [(-122.0536,37.435),(-122.0499,37.315)] | Oakland
Cull Creek | [(-122.0624,37.875),(-122.0582,37.527)] | Berkeley
D St | [(-122.1811,37.505),(-122.1805,37.497)] | Oakland
+ Decoto Road | [(-122.0159,37.006),(-122.016,37.002),(-122.0164,37.993)] | Berkeley
Decoto Road | [(-122.0159,37.006),(-122.016,37.002),(-122.0164,37.993)] | Oakland
Decoto Road | [(-122.0159,37.006),(-122.016,37.002),(-122.0164,37.993)] | Oakland
- Decoto Road | [(-122.0159,37.006),(-122.016,37.002),(-122.0164,37.993)] | Berkeley
Deering St | [(-122.2146,37.904),(-122.2126,37.897)] | Berkeley
Dimond Ave | [(-122.2167,37.994),(-122.2162,37.006)] | Berkeley
Dimond Ave | [(-122.2167,37.994),(-122.2162,37.006)] | Lafayette
@@ -89,9 +102,9 @@ SELECT * FROM street;
Driscoll Road | [(-121.9482,37.403),(-121.948451,37.39995)] | Oakland
E St | [(-122.1832,37.505),(-122.1826,37.498),(-122.182,37.49)] | Oakland
Eden Ave | [(-122.1143,37.505),(-122.1142,37.491)] | Oakland
+ Eden Creek | [(-122.022037,37.00675),(-122.0221,37.998)] | Berkeley
Eden Creek | [(-122.022037,37.00675),(-122.0221,37.998)] | Oakland
Eden Creek | [(-122.022037,37.00675),(-122.0221,37.998)] | Oakland
- Eden Creek | [(-122.022037,37.00675),(-122.0221,37.998)] | Berkeley
Edgewater Dr | [(-122.201,37.379),(-122.2042,37.41)] | Lafayette
Enos Way | [(-121.7677,37.896),(-121.7673,37.91)] | Oakland
Euclid Ave | [(-122.2671,37.009),(-122.2666,37.987)] | Berkeley
@@ -106,8 +119,8 @@ SELECT * FROM street;
Harris Road | [(-122.0659,37.372),(-122.0675,37.363)] | Oakland
Heartwood Dr | [(-122.2006,37.341),(-122.1992,37.338)] | Lafayette
Hegenberger Exwy | [(-122.1946,37.52),(-122.1947,37.497)] | Oakland
- Herrier St | [(-122.1943,37.006),(-122.1936,37.998)] | Oakland
Herrier St | [(-122.1943,37.006),(-122.1936,37.998)] | Berkeley
+ Herrier St | [(-122.1943,37.006),(-122.1936,37.998)] | Oakland
Hesperian Blvd | [(-122.097,37.333),(-122.0956,37.31),(-122.0946,37.293)] | Oakland
Hesperian Blvd | [(-122.097,37.333),(-122.0956,37.31),(-122.0946,37.293)] | Oakland
Hesperian Blvd | [(-122.1132,37.6),(-122.1123,37.586)] | Berkeley
@@ -117,10 +130,10 @@ SELECT * FROM street;
I- 580 | [(-121.9322,37.989),(-121.9243,37.006),(-121.9217,37.014)] | Oakland
I- 580 | [(-122.018,37.019),(-122.0009,37.032),(-121.9787,37.983),(-121.958,37.984),(-121.9571,37.986)] | Oakland
I- 580 | [(-122.018,37.019),(-122.0009,37.032),(-121.9787,37.983),(-121.958,37.984),(-121.9571,37.986)] | Oakland
- I- 580 | [(-122.1108,37.023),(-122.1101,37.02),(-122.108103,37.00764),(-122.108,37.007),(-122.1069,37.998),(-122.1064,37.994),(-122.1053,37.982),(-122.1048,37.977),(-122.1032,37.958),(-122.1026,37.953),(-122.1013,37.938),(-122.0989,37.911),(-122.0984,37.91),(-122.098,37.908)] | Oakland
I- 580 | [(-122.1108,37.023),(-122.1101,37.02),(-122.108103,37.00764),(-122.108,37.007),(-122.1069,37.998),(-122.1064,37.994),(-122.1053,37.982),(-122.1048,37.977),(-122.1032,37.958),(-122.1026,37.953),(-122.1013,37.938),(-122.0989,37.911),(-122.0984,37.91),(-122.098,37.908)] | Berkeley
- I- 580 | [(-122.1543,37.703),(-122.1535,37.694),(-122.1512,37.655),(-122.1475,37.603),(-122.1468,37.583),(-122.1472,37.569),(-122.149044,37.54874),(-122.1493,37.546),(-122.1501,37.532),(-122.1506,37.509),(-122.1495,37.482),(-122.1487,37.467),(-122.1477,37.447),(-122.1414,37.383),(-122.1404,37.376),(-122.1398,37.372),(-122.139,37.356),(-122.1388,37.353),(-122.1385,37.34),(-122.1382,37.33),(-122.1378,37.316)] | Oakland
+ I- 580 | [(-122.1108,37.023),(-122.1101,37.02),(-122.108103,37.00764),(-122.108,37.007),(-122.1069,37.998),(-122.1064,37.994),(-122.1053,37.982),(-122.1048,37.977),(-122.1032,37.958),(-122.1026,37.953),(-122.1013,37.938),(-122.0989,37.911),(-122.0984,37.91),(-122.098,37.908)] | Oakland
I- 580 | [(-122.1543,37.703),(-122.1535,37.694),(-122.1512,37.655),(-122.1475,37.603),(-122.1468,37.583),(-122.1472,37.569),(-122.149044,37.54874),(-122.1493,37.546),(-122.1501,37.532),(-122.1506,37.509),(-122.1495,37.482),(-122.1487,37.467),(-122.1477,37.447),(-122.1414,37.383),(-122.1404,37.376),(-122.1398,37.372),(-122.139,37.356),(-122.1388,37.353),(-122.1385,37.34),(-122.1382,37.33),(-122.1378,37.316)] | Berkeley
+ I- 580 | [(-122.1543,37.703),(-122.1535,37.694),(-122.1512,37.655),(-122.1475,37.603),(-122.1468,37.583),(-122.1472,37.569),(-122.149044,37.54874),(-122.1493,37.546),(-122.1501,37.532),(-122.1506,37.509),(-122.1495,37.482),(-122.1487,37.467),(-122.1477,37.447),(-122.1414,37.383),(-122.1404,37.376),(-122.1398,37.372),(-122.139,37.356),(-122.1388,37.353),(-122.1385,37.34),(-122.1382,37.33),(-122.1378,37.316)] | Oakland
I- 580 | [(-122.2197,37.99),(-122.22,37.99),(-122.222092,37.99523),(-122.2232,37.998),(-122.224146,37.99963),(-122.2261,37.003),(-122.2278,37.007),(-122.2302,37.026),(-122.2323,37.043),(-122.2344,37.059),(-122.235405,37.06427),(-122.2365,37.07)] | Berkeley
I- 580 | [(-122.2197,37.99),(-122.22,37.99),(-122.222092,37.99523),(-122.2232,37.998),(-122.224146,37.99963),(-122.2261,37.003),(-122.2278,37.007),(-122.2302,37.026),(-122.2323,37.043),(-122.2344,37.059),(-122.235405,37.06427),(-122.2365,37.07)] | Lafayette
I- 580 Ramp | [(-121.8521,37.011),(-121.8479,37.999),(-121.8476,37.999),(-121.8456,37.01),(-121.8455,37.011)] | Oakland
@@ -136,8 +149,8 @@ SELECT * FROM street;
I- 580 Ramp | [(-122.0941,37.897),(-122.0943,37.902)] | Berkeley
I- 580 Ramp | [(-122.096,37.888),(-122.0962,37.891),(-122.0964,37.9)] | Berkeley
I- 580 Ramp | [(-122.101,37.898),(-122.1005,37.902),(-122.0989,37.911)] | Berkeley
- I- 580 Ramp | [(-122.1086,37.003),(-122.1068,37.993),(-122.1066,37.992),(-122.1053,37.982)] | Oakland
I- 580 Ramp | [(-122.1086,37.003),(-122.1068,37.993),(-122.1066,37.992),(-122.1053,37.982)] | Berkeley
+ I- 580 Ramp | [(-122.1086,37.003),(-122.1068,37.993),(-122.1066,37.992),(-122.1053,37.982)] | Oakland
I- 580 Ramp | [(-122.1414,37.383),(-122.1407,37.376),(-122.1403,37.372),(-122.139,37.356)] | Oakland
I- 580/I-680 Ramp | ((-121.9207,37.988),(-121.9192,37.016)) | Oakland
I- 580/I-680 Ramp | ((-121.9207,37.988),(-121.9192,37.016)) | Oakland
@@ -158,16 +171,16 @@ SELECT * FROM street;
I- 880 | ((-121.9669,37.075),(-121.9663,37.071),(-121.9656,37.065),(-121.9618,37.037),(-121.95689,37),(-121.948,37.933)) | Oakland
I- 880 | [(-121.948,37.933),(-121.9471,37.925),(-121.9467,37.923),(-121.946,37.918),(-121.9452,37.912),(-121.937,37.852)] | Oakland
I- 880 | [(-122.0219,37.466),(-122.0205,37.447),(-122.020331,37.44447),(-122.020008,37.43962),(-122.0195,37.432),(-122.0193,37.429),(-122.0164,37.393),(-122.010219,37.34771),(-122.0041,37.313)] | Oakland
- I- 880 | [(-122.0375,37.632),(-122.0359,37.619),(-122.0358,37.616),(-122.034514,37.60409),(-122.031876,37.57965),(-122.031193,37.57332),(-122.03016,37.56375),(-122.02943,37.55698),(-122.028689,37.54929),(-122.027833,37.53908),(-122.025979,37.51698),(-122.0238,37.491)] | Oakland
I- 880 | [(-122.0375,37.632),(-122.0359,37.619),(-122.0358,37.616),(-122.034514,37.60409),(-122.031876,37.57965),(-122.031193,37.57332),(-122.03016,37.56375),(-122.02943,37.55698),(-122.028689,37.54929),(-122.027833,37.53908),(-122.025979,37.51698),(-122.0238,37.491)] | Berkeley
+ I- 880 | [(-122.0375,37.632),(-122.0359,37.619),(-122.0358,37.616),(-122.034514,37.60409),(-122.031876,37.57965),(-122.031193,37.57332),(-122.03016,37.56375),(-122.02943,37.55698),(-122.028689,37.54929),(-122.027833,37.53908),(-122.025979,37.51698),(-122.0238,37.491)] | Oakland
+ I- 880 | [(-122.0612,37.003),(-122.0604,37.991),(-122.0596,37.982),(-122.0585,37.967),(-122.0583,37.961),(-122.0553,37.918),(-122.053635,37.89475),(-122.050759,37.8546),(-122.05,37.844),(-122.0485,37.817),(-122.0483,37.813),(-122.0482,37.811)] | Berkeley
I- 880 | [(-122.0612,37.003),(-122.0604,37.991),(-122.0596,37.982),(-122.0585,37.967),(-122.0583,37.961),(-122.0553,37.918),(-122.053635,37.89475),(-122.050759,37.8546),(-122.05,37.844),(-122.0485,37.817),(-122.0483,37.813),(-122.0482,37.811)] | Oakland
I- 880 | [(-122.0612,37.003),(-122.0604,37.991),(-122.0596,37.982),(-122.0585,37.967),(-122.0583,37.961),(-122.0553,37.918),(-122.053635,37.89475),(-122.050759,37.8546),(-122.05,37.844),(-122.0485,37.817),(-122.0483,37.813),(-122.0482,37.811)] | Oakland
- I- 880 | [(-122.0612,37.003),(-122.0604,37.991),(-122.0596,37.982),(-122.0585,37.967),(-122.0583,37.961),(-122.0553,37.918),(-122.053635,37.89475),(-122.050759,37.8546),(-122.05,37.844),(-122.0485,37.817),(-122.0483,37.813),(-122.0482,37.811)] | Berkeley
I- 880 | [(-122.0831,37.312),(-122.0819,37.296),(-122.081,37.285),(-122.0786,37.248),(-122.078,37.24),(-122.077642,37.23496),(-122.076983,37.22567),(-122.076599,37.22026),(-122.076229,37.21505),(-122.0758,37.209)] | Oakland
I- 880 | [(-122.0978,37.528),(-122.096,37.496),(-122.0931,37.453),(-122.09277,37.4496),(-122.090189,37.41442),(-122.0896,37.405),(-122.085,37.34)] | Oakland
I- 880 | [(-122.1365,37.902),(-122.1358,37.898),(-122.1333,37.881),(-122.1323,37.874),(-122.1311,37.866),(-122.1308,37.865),(-122.1307,37.864),(-122.1289,37.851),(-122.1277,37.843),(-122.1264,37.834),(-122.1231,37.812),(-122.1165,37.766),(-122.1104,37.72),(-122.109695,37.71094),(-122.109,37.702),(-122.108312,37.69168),(-122.1076,37.681)] | Berkeley
- I- 880 | [(-122.1755,37.185),(-122.1747,37.178),(-122.1742,37.173),(-122.1692,37.126),(-122.167792,37.11594),(-122.16757,37.11435),(-122.1671,37.111),(-122.1655,37.1),(-122.165169,37.09811),(-122.1641,37.092),(-122.1596,37.061),(-122.158381,37.05275),(-122.155991,37.03657),(-122.1531,37.017),(-122.1478,37.98),(-122.1407,37.932),(-122.1394,37.924),(-122.1389,37.92),(-122.1376,37.91)] | Oakland
I- 880 | [(-122.1755,37.185),(-122.1747,37.178),(-122.1742,37.173),(-122.1692,37.126),(-122.167792,37.11594),(-122.16757,37.11435),(-122.1671,37.111),(-122.1655,37.1),(-122.165169,37.09811),(-122.1641,37.092),(-122.1596,37.061),(-122.158381,37.05275),(-122.155991,37.03657),(-122.1531,37.017),(-122.1478,37.98),(-122.1407,37.932),(-122.1394,37.924),(-122.1389,37.92),(-122.1376,37.91)] | Berkeley
+ I- 880 | [(-122.1755,37.185),(-122.1747,37.178),(-122.1742,37.173),(-122.1692,37.126),(-122.167792,37.11594),(-122.16757,37.11435),(-122.1671,37.111),(-122.1655,37.1),(-122.165169,37.09811),(-122.1641,37.092),(-122.1596,37.061),(-122.158381,37.05275),(-122.155991,37.03657),(-122.1531,37.017),(-122.1478,37.98),(-122.1407,37.932),(-122.1394,37.924),(-122.1389,37.92),(-122.1376,37.91)] | Oakland
I- 880 | [(-122.2214,37.711),(-122.2202,37.699),(-122.2199,37.695),(-122.219,37.682),(-122.2184,37.672),(-122.2173,37.652),(-122.2159,37.638),(-122.2144,37.616),(-122.2138,37.612),(-122.2135,37.609),(-122.212,37.592),(-122.2116,37.586),(-122.2111,37.581)] | Berkeley
I- 880 | [(-122.2707,37.975),(-122.2693,37.972),(-122.2681,37.966),(-122.267,37.962),(-122.2659,37.957),(-122.2648,37.952),(-122.2636,37.946),(-122.2625,37.935),(-122.2617,37.927),(-122.2607,37.921),(-122.2593,37.916),(-122.258,37.911),(-122.2536,37.898),(-122.2432,37.858),(-122.2408,37.845),(-122.2386,37.827),(-122.2374,37.811)] | Berkeley
I- 880 Ramp | [(-122.0019,37.301),(-122.002,37.293)] | Oakland
@@ -175,12 +188,12 @@ SELECT * FROM street;
I- 880 Ramp | [(-122.0041,37.313),(-122.0038,37.308),(-122.0039,37.284),(-122.0013,37.287),(-121.9995,37.289)] | Oakland
I- 880 Ramp | [(-122.0236,37.488),(-122.0231,37.458),(-122.0227,37.458),(-122.0223,37.452),(-122.0205,37.447)] | Oakland
I- 880 Ramp | [(-122.0238,37.491),(-122.0215,37.483),(-122.0211,37.477),(-122.0205,37.447)] | Oakland
+ I- 880 Ramp | [(-122.059,37.982),(-122.0577,37.984),(-122.0612,37.003)] | Berkeley
I- 880 Ramp | [(-122.059,37.982),(-122.0577,37.984),(-122.0612,37.003)] | Oakland
I- 880 Ramp | [(-122.059,37.982),(-122.0577,37.984),(-122.0612,37.003)] | Oakland
- I- 880 Ramp | [(-122.059,37.982),(-122.0577,37.984),(-122.0612,37.003)] | Berkeley
+ I- 880 Ramp | [(-122.0618,37.011),(-122.0631,37.982),(-122.0585,37.967)] | Berkeley
I- 880 Ramp | [(-122.0618,37.011),(-122.0631,37.982),(-122.0585,37.967)] | Oakland
I- 880 Ramp | [(-122.0618,37.011),(-122.0631,37.982),(-122.0585,37.967)] | Oakland
- I- 880 Ramp | [(-122.0618,37.011),(-122.0631,37.982),(-122.0585,37.967)] | Berkeley
I- 880 Ramp | [(-122.085,37.34),(-122.0801,37.316),(-122.081,37.285)] | Oakland
I- 880 Ramp | [(-122.085,37.34),(-122.0801,37.316),(-122.081,37.285)] | Oakland
I- 880 Ramp | [(-122.085,37.34),(-122.0866,37.316),(-122.0819,37.296)] | Oakland
@@ -212,26 +225,26 @@ SELECT * FROM street;
Livermore Ave | [(-121.7687,37.448),(-121.769,37.375)] | Oakland
Livermore Ave | [(-121.772719,37.99085),(-121.7728,37.001)] | Oakland
Livermore Ave | [(-121.772719,37.99085),(-121.7728,37.001)] | Oakland
- Locust St | [(-122.1606,37.007),(-122.1593,37.987)] | Oakland
Locust St | [(-122.1606,37.007),(-122.1593,37.987)] | Berkeley
+ Locust St | [(-122.1606,37.007),(-122.1593,37.987)] | Oakland
Logan Ct | [(-122.0053,37.492),(-122.0061,37.484)] | Oakland
Magnolia St | [(-122.0971,37.5),(-122.0962,37.484)] | Oakland
Mandalay Road | [(-122.2322,37.397),(-122.2321,37.403)] | Lafayette
Marin Ave | [(-122.2741,37.894),(-122.272,37.901)] | Berkeley
Martin Luther King Jr Way | [(-122.2712,37.608),(-122.2711,37.599)] | Berkeley
Mattos Dr | [(-122.0005,37.502),(-122.000898,37.49683)] | Oakland
- Maubert Ave | [(-122.1114,37.009),(-122.1096,37.995)] | Oakland
Maubert Ave | [(-122.1114,37.009),(-122.1096,37.995)] | Berkeley
- McClure Ave | [(-122.1431,37.001),(-122.1436,37.998)] | Oakland
+ Maubert Ave | [(-122.1114,37.009),(-122.1096,37.995)] | Oakland
McClure Ave | [(-122.1431,37.001),(-122.1436,37.998)] | Berkeley
+ McClure Ave | [(-122.1431,37.001),(-122.1436,37.998)] | Oakland
Medlar Dr | [(-122.0627,37.378),(-122.0625,37.375)] | Oakland
Mildred Ct | [(-122.0002,37.388),(-121.9998,37.386)] | Oakland
Miller Road | [(-122.0902,37.645),(-122.0865,37.545)] | Berkeley
Miramar Ave | [(-122.1009,37.025),(-122.099089,37.03209)] | Oakland
Mission Blvd | [(-121.918886,37),(-121.9194,37.976),(-121.9198,37.975)] | Oakland
Mission Blvd | [(-121.918886,37),(-121.9194,37.976),(-121.9198,37.975)] | Oakland
- Mission Blvd | [(-122.0006,37.896),(-121.9989,37.88)] | Oakland
Mission Blvd | [(-122.0006,37.896),(-121.9989,37.88)] | Berkeley
+ Mission Blvd | [(-122.0006,37.896),(-121.9989,37.88)] | Oakland
Moores Ave | [(-122.0087,37.301),(-122.0094,37.292)] | Oakland
National Ave | [(-122.1192,37.5),(-122.1281,37.489)] | Oakland
Navajo Ct | [(-121.8779,37.901),(-121.8783,37.9)] | Oakland
@@ -242,49 +255,49 @@ SELECT * FROM street;
Parkridge Dr | [(-122.1438,37.884),(-122.1428,37.9)] | Berkeley
Parkside Dr | [(-122.0475,37.603),(-122.0443,37.596)] | Berkeley
Paseo Padre Pkwy | [(-121.9143,37.005),(-121.913522,37)] | Oakland
- Paseo Padre Pkwy | [(-122.0021,37.639),(-121.996,37.628)] | Oakland
Paseo Padre Pkwy | [(-122.0021,37.639),(-121.996,37.628)] | Berkeley
+ Paseo Padre Pkwy | [(-122.0021,37.639),(-121.996,37.628)] | Oakland
Pearl St | [(-122.2383,37.594),(-122.2366,37.615)] | Berkeley
Periwinkle Road | [(-122.0451,37.301),(-122.044758,37.29844)] | Oakland
Pimlico Dr | [(-121.8616,37.998),(-121.8618,37.008)] | Oakland
Pimlico Dr | [(-121.8616,37.998),(-121.8618,37.008)] | Oakland
Portsmouth Ave | [(-122.1064,37.315),(-122.1064,37.308)] | Oakland
Proctor Ave | [(-122.2267,37.406),(-122.2251,37.386)] | Lafayette
+ Railroad Ave | [(-122.0245,37.013),(-122.0234,37.003),(-122.0223,37.993)] | Berkeley
Railroad Ave | [(-122.0245,37.013),(-122.0234,37.003),(-122.0223,37.993)] | Oakland
Railroad Ave | [(-122.0245,37.013),(-122.0234,37.003),(-122.0223,37.993)] | Oakland
- Railroad Ave | [(-122.0245,37.013),(-122.0234,37.003),(-122.0223,37.993)] | Berkeley
+ Ranspot Dr | [(-122.0972,37.999),(-122.0959,37)] | Berkeley
Ranspot Dr | [(-122.0972,37.999),(-122.0959,37)] | Oakland
Ranspot Dr | [(-122.0972,37.999),(-122.0959,37)] | Oakland
- Ranspot Dr | [(-122.0972,37.999),(-122.0959,37)] | Berkeley
Redding St | [(-122.1978,37.901),(-122.1975,37.895)] | Berkeley
- Redwood Road | [(-122.1493,37.98),(-122.1437,37.001)] | Oakland
Redwood Road | [(-122.1493,37.98),(-122.1437,37.001)] | Berkeley
+ Redwood Road | [(-122.1493,37.98),(-122.1437,37.001)] | Oakland
Roca Dr | [(-122.0335,37.609),(-122.0314,37.599)] | Berkeley
Rosedale Ct | [(-121.9232,37.9),(-121.924,37.897)] | Oakland
Sacramento St | [(-122.2799,37.606),(-122.2797,37.597)] | Berkeley
Saddle Brook Dr | [(-122.1478,37.909),(-122.1454,37.904),(-122.1451,37.888)] | Berkeley
Saginaw Ct | [(-121.8803,37.898),(-121.8806,37.901)] | Oakland
San Andreas Dr | [(-122.0609,37.9),(-122.0614,37.895)] | Berkeley
+ Santa Maria Ave | [(-122.0773,37),(-122.0773,37.98)] | Berkeley
Santa Maria Ave | [(-122.0773,37),(-122.0773,37.98)] | Oakland
Santa Maria Ave | [(-122.0773,37),(-122.0773,37.98)] | Oakland
- Santa Maria Ave | [(-122.0773,37),(-122.0773,37.98)] | Berkeley
Shattuck Ave | [(-122.2686,37.904),(-122.2686,37.897)] | Berkeley
Sheridan Road | [(-122.2279,37.425),(-122.2253,37.411),(-122.2223,37.377)] | Lafayette
Shoreline Dr | [(-122.2657,37.603),(-122.2648,37.6)] | Berkeley
- Skyline Blvd | [(-122.1738,37.01),(-122.1714,37.996)] | Oakland
Skyline Blvd | [(-122.1738,37.01),(-122.1714,37.996)] | Berkeley
+ Skyline Blvd | [(-122.1738,37.01),(-122.1714,37.996)] | Oakland
Skyline Dr | [(-122.0277,37.5),(-122.0284,37.498)] | Oakland
Skywest Dr | [(-122.1161,37.62),(-122.1123,37.586)] | Berkeley
Southern Pacific Railroad | [(-122.3002,37.674),(-122.2999,37.661)] | Berkeley
Sp Railroad | [(-121.893564,37.99009),(-121.897,37.016)] | Oakland
Sp Railroad | [(-121.893564,37.99009),(-121.897,37.016)] | Oakland
Sp Railroad | [(-121.9565,37.898),(-121.9562,37.9)] | Oakland
+ Sp Railroad | [(-122.0734,37.001),(-122.0734,37.997)] | Berkeley
Sp Railroad | [(-122.0734,37.001),(-122.0734,37.997)] | Oakland
Sp Railroad | [(-122.0734,37.001),(-122.0734,37.997)] | Oakland
- Sp Railroad | [(-122.0734,37.001),(-122.0734,37.997)] | Berkeley
Sp Railroad | [(-122.0914,37.601),(-122.087,37.56),(-122.086408,37.5551)] | Berkeley
- Sp Railroad | [(-122.137792,37.003),(-122.1365,37.992),(-122.131257,37.94612)] | Oakland
Sp Railroad | [(-122.137792,37.003),(-122.1365,37.992),(-122.131257,37.94612)] | Berkeley
+ Sp Railroad | [(-122.137792,37.003),(-122.1365,37.992),(-122.131257,37.94612)] | Oakland
Sp Railroad | [(-122.1947,37.497),(-122.193328,37.4848)] | Oakland
Stanton Ave | [(-122.100392,37.0697),(-122.099513,37.06052)] | Oakland
State Hwy 123 | [(-122.3004,37.986),(-122.2998,37.969),(-122.2995,37.962),(-122.2992,37.952),(-122.299,37.942),(-122.2987,37.935),(-122.2984,37.924),(-122.2982,37.92),(-122.2976,37.904),(-122.297,37.88),(-122.2966,37.869),(-122.2959,37.848),(-122.2961,37.843)] | Berkeley
@@ -316,28 +329,15 @@ SELECT * FROM street;
Welch Creek Road | [(-121.7695,37.386),(-121.7737,37.413)] | Oakland
Welch Creek Road | [(-121.7695,37.386),(-121.7737,37.413)] | Oakland
West Loop Road | [(-122.0576,37.604),(-122.0602,37.586)] | Berkeley
+ Western Pacific Railroad Spur | [(-122.0394,37.018),(-122.0394,37.961)] | Berkeley
Western Pacific Railroad Spur | [(-122.0394,37.018),(-122.0394,37.961)] | Oakland
Western Pacific Railroad Spur | [(-122.0394,37.018),(-122.0394,37.961)] | Oakland
- Western Pacific Railroad Spur | [(-122.0394,37.018),(-122.0394,37.961)] | Berkeley
Whitlock Creek | [(-121.74683,37.91276),(-121.733107,37)] | Oakland
Whitlock Creek | [(-121.74683,37.91276),(-121.733107,37)] | Oakland
Willimet Way | [(-122.0964,37.517),(-122.0949,37.493)] | Oakland
- Wisconsin St | [(-122.1994,37.017),(-122.1975,37.998),(-122.1971,37.994)] | Oakland
Wisconsin St | [(-122.1994,37.017),(-122.1975,37.998),(-122.1971,37.994)] | Berkeley
+ Wisconsin St | [(-122.1994,37.017),(-122.1975,37.998),(-122.1971,37.994)] | Oakland
Wp Railroad | [(-122.254,37.902),(-122.2506,37.891)] | Berkeley
- 100th Ave | [(-122.1657,37.429),(-122.1647,37.432)] | Oakland
- 107th Ave | [(-122.1555,37.403),(-122.1531,37.41)] | Oakland
- 14th St | [(-122.299,37.147),(-122.3,37.148)] | Lafayette
- 19th Ave | [(-122.2366,37.897),(-122.2359,37.905)] | Berkeley
- 1st St | [(-121.75508,37.89294),(-121.753581,37.90031)] | Oakland
- 5th St | [(-122.278,37),(-122.2792,37.005),(-122.2803,37.009)] | Lafayette
- 5th St | [(-122.296,37.615),(-122.2953,37.598)] | Berkeley
- 82nd Ave | [(-122.1695,37.596),(-122.1681,37.603)] | Berkeley
- 85th Ave | [(-122.1877,37.466),(-122.186,37.476)] | Oakland
- 89th Ave | [(-122.1822,37.459),(-122.1803,37.471)] | Oakland
- 98th Ave | [(-122.1568,37.498),(-122.1558,37.502)] | Oakland
- 98th Ave | [(-122.1693,37.438),(-122.1682,37.444)] | Oakland
- 98th Ave | [(-122.2001,37.258),(-122.1974,37.27)] | Lafayette
(333 rows)
SELECT name, #thepath FROM iexit ORDER BY name COLLATE "C", 2;
diff --git a/src/test/regress/expected/txid.out b/src/test/regress/expected/txid.out
index 95ba66e95e..2ea4434f51 100644
--- a/src/test/regress/expected/txid.out
+++ b/src/test/regress/expected/txid.out
@@ -238,9 +238,11 @@ SELECT txid_snapshot '1:9223372036854775807:3';
(1 row)
SELECT txid_snapshot '1:9223372036854775808:3';
-ERROR: invalid input syntax for type pg_snapshot: "1:9223372036854775808:3"
-LINE 1: SELECT txid_snapshot '1:9223372036854775808:3';
- ^
+ txid_snapshot
+-------------------------
+ 1:9223372036854775808:3
+(1 row)
+
-- test txid_current_if_assigned
BEGIN;
SELECT txid_current_if_assigned() IS NULL;
diff --git a/src/test/regress/expected/type_sanity.out b/src/test/regress/expected/type_sanity.out
index 88d8f6c32d..0c4b994343 100644
--- a/src/test/regress/expected/type_sanity.out
+++ b/src/test/regress/expected/type_sanity.out
@@ -19,7 +19,7 @@ WHERE t1.typnamespace = 0 OR
(t1.typlen <= 0 AND t1.typlen != -1 AND t1.typlen != -2) OR
(t1.typtype not in ('b', 'c', 'd', 'e', 'm', 'p', 'r')) OR
NOT t1.typisdefined OR
- (t1.typalign not in ('c', 's', 'i', 'd')) OR
+ (t1.typalign not in ('c', 's', 'i', 'd', 'x')) OR
(t1.typstorage not in ('p', 'x', 'e', 'm'));
oid | typname
-----+---------
@@ -32,7 +32,8 @@ WHERE t1.typbyval AND
(t1.typlen != 1 OR t1.typalign != 'c') AND
(t1.typlen != 2 OR t1.typalign != 's') AND
(t1.typlen != 4 OR t1.typalign != 'i') AND
- (t1.typlen != 8 OR t1.typalign != 'd');
+ (t1.typlen != 8 OR t1.typalign != 'd') AND
+ (t1.typlen != 8 OR t1.typalign != 'x');
oid | typname
-----+---------
(0 rows)
diff --git a/src/test/regress/expected/xid.out b/src/test/regress/expected/xid.out
index 835077e9d5..0154990d1a 100644
--- a/src/test/regress/expected/xid.out
+++ b/src/test/regress/expected/xid.out
@@ -8,9 +8,9 @@ select '010'::xid,
'42'::xid8,
'0xffffffffffffffff'::xid8,
'-1'::xid8;
- xid | xid | xid | xid | xid8 | xid8 | xid8 | xid8
------+-----+------------+------------+------+------+----------------------+----------------------
- 8 | 42 | 4294967295 | 4294967295 | 8 | 42 | 18446744073709551615 | 18446744073709551615
+ xid | xid | xid | xid | xid8 | xid8 | xid8 | xid8
+-----+-----+------------+----------------------+------+------+----------------------+----------------------
+ 8 | 42 | 4294967295 | 18446744073709551615 | 8 | 42 | 18446744073709551615 | 18446744073709551615
(1 row)
-- garbage values
@@ -43,10 +43,10 @@ SELECT pg_input_is_valid('asdf', 'xid');
f
(1 row)
-SELECT * FROM pg_input_error_info('0xffffffffff', 'xid');
- message | detail | hint | sql_error_code
----------------------------------------------------+--------+------+----------------
- value "0xffffffffff" is out of range for type xid | | | 22003
+SELECT * FROM pg_input_error_info('0xffffffffffffffffffff', 'xid');
+ message | detail | hint | sql_error_code
+-------------------------------------------------------------+--------+------+----------------
+ value "0xffffffffffffffffffff" is out of range for type xid | | | 22003
(1 row)
SELECT pg_input_is_valid('42', 'xid8');
@@ -441,9 +441,11 @@ SELECT pg_snapshot '1:9223372036854775807:3';
(1 row)
SELECT pg_snapshot '1:9223372036854775808:3';
-ERROR: invalid input syntax for type pg_snapshot: "1:9223372036854775808:3"
-LINE 1: SELECT pg_snapshot '1:9223372036854775808:3';
- ^
+ pg_snapshot
+-------------------------
+ 1:9223372036854775808:3
+(1 row)
+
-- test pg_current_xact_id_if_assigned
BEGIN;
SELECT pg_current_xact_id_if_assigned() IS NULL;
diff --git a/src/test/regress/expected/xid64.out b/src/test/regress/expected/xid64.out
new file mode 100644
index 0000000000..c30c5b5739
--- /dev/null
+++ b/src/test/regress/expected/xid64.out
@@ -0,0 +1,92 @@
+---
+--- Unit test for xid64 functions
+---
+-- directory paths and dlsuffix are passed to us in environment variables
+\getenv libdir PG_LIBDIR
+\getenv dlsuffix PG_DLSUFFIX
+\set regresslib :libdir '/regress' :dlsuffix
+CREATE FUNCTION xid64_test_1(rel regclass) RETURNS VOID
+ AS :'regresslib', 'xid64_test_1' LANGUAGE C STRICT;
+CREATE FUNCTION xid64_test_2(rel regclass) RETURNS VOID
+ AS :'regresslib', 'xid64_test_2' LANGUAGE C STRICT;
+CREATE FUNCTION xid64_test_double_xmax(rel regclass) RETURNS VOID
+ AS :'regresslib', 'xid64_test_double_xmax' LANGUAGE C STRICT;
+---
+--- Check page consistency after conversion
+---
+CREATE UNLOGGED TABLE test_xid64_table(a int);
+ALTER TABLE test_xid64_table SET (autovacuum_enabled = false);
+INSERT INTO test_xid64_table(a) SELECT a FROM generate_series(1, 1000) AS a;
+SELECT xid64_test_1('test_xid64_table');
+INFO: test 1: page is converted to xid64 format
+ xid64_test_1
+--------------
+
+(1 row)
+
+DROP TABLE test_xid64_table;
+---
+--- Check tuples consistency after conversion
+---
+CREATE UNLOGGED TABLE test_xid64_table(s serial, i int, t text);
+ALTER TABLE test_xid64_table SET (autovacuum_enabled = false);
+DO $$
+BEGIN
+ FOR j IN 1..20 LOOP
+ INSERT INTO test_xid64_table(i, t) VALUES (random()::int, md5(random()::text));
+ COMMIT;
+ END LOOP;
+END $$;
+DO $$
+BEGIN
+ FOR j IN 1..10 LOOP
+ DELETE FROM test_xid64_table WHERE ctid IN (SELECT ctid FROM test_xid64_table TABLESAMPLE BERNOULLI (5));
+ COMMIT;
+ END LOOP;
+END $$;
+SELECT xid64_test_2('test_xid64_table');
+ xid64_test_2
+--------------
+
+(1 row)
+
+DROP TABLE test_xid64_table;
+---
+--- Check tuples consistency after conversion to double xmax (on full page)
+---
+CREATE UNLOGGED TABLE test_xid64_table(i int);
+DO $$
+BEGIN
+ FOR j IN 1..40 LOOP
+ INSERT INTO test_xid64_table SELECT i FROM generate_series(1, 100) AS i;
+ COMMIT;
+ END LOOP;
+END $$;
+SELECT xid64_test_2('test_xid64_table');
+ xid64_test_2
+--------------
+
+(1 row)
+
+DROP TABLE test_xid64_table;
+CREATE UNLOGGED TABLE test_xid64_table(i text);
+INSERT INTO test_xid64_table(i) VALUES ('NNBABCDSDFGHJKLP');
+DO $$
+BEGIN
+ FOR j IN 1..40 LOOP
+ INSERT INTO test_xid64_table(i) SELECT 'A' FROM generate_series(1, 100) AS i;
+ COMMIT;
+ END LOOP;
+END $$;
+SELECT xid64_test_double_xmax('test_xid64_table');
+INFO: test double xmax: page 0 is converted into double xmax format
+INFO: test double xmax: end
+ xid64_test_double_xmax
+------------------------
+
+(1 row)
+
+DROP TABLE test_xid64_table;
+DROP FUNCTION xid64_test_1(rel regclass);
+DROP FUNCTION xid64_test_2(rel regclass);
+DROP FUNCTION xid64_test_double_xmax(rel regclass);
diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule
index f0987ff537..bfbf85a255 100644
--- a/src/test/regress/parallel_schedule
+++ b/src/test/regress/parallel_schedule
@@ -28,7 +28,7 @@ test: strings md5 numerology point lseg line box path polygon circle date time t
# geometry depends on point, lseg, line, box, path, polygon, circle
# horology depends on date, time, timetz, timestamp, timestamptz, interval
# ----------
-test: geometry horology tstypes regex type_sanity opr_sanity misc_sanity comments expressions unicode xid mvcc
+test: geometry horology tstypes regex type_sanity opr_sanity misc_sanity comments expressions unicode xid xid64 mvcc
# ----------
# Load huge amounts of data
diff --git a/src/test/regress/regress.c b/src/test/regress/regress.c
index bcbc6d910f..9bb8b541bb 100644
--- a/src/test/regress/regress.c
+++ b/src/test/regress/regress.c
@@ -23,6 +23,7 @@
#include "access/htup_details.h"
#include "access/transam.h"
#include "access/xact.h"
+#include "catalog/catalog.h"
#include "catalog/namespace.h"
#include "catalog/pg_operator.h"
#include "catalog/pg_type.h"
@@ -578,6 +579,7 @@ make_tuple_indirect(PG_FUNCTION_ARGS)
tuple.t_len = HeapTupleHeaderGetDatumLength(rec);
ItemPointerSetInvalid(&(tuple.t_self));
tuple.t_tableOid = InvalidOid;
+ HeapTupleSetZeroXids(&tuple);
tuple.t_data = rec;
values = (Datum *) palloc(ncolumns * sizeof(Datum));
@@ -1262,3 +1264,293 @@ get_columns_length(PG_FUNCTION_ARGS)
PG_RETURN_INT32(column_offset);
}
+
+#include "access/hio.h"
+#include "access/relation.h"
+#include "storage/bufmgr.h"
+#include "utils/rel.h"
+
+static void
+CheckNewPage(char *msg, Page page)
+{
+ uint16 size;
+
+ if (PageGetPageLayoutVersion(page) != PG_PAGE_LAYOUT_VERSION)
+ elog(ERROR, "%s: page version is %d, expected %d ",
+ msg, PageGetPageLayoutVersion(page), PG_PAGE_LAYOUT_VERSION);
+
+ size = PageGetSpecialSize(page);
+ if (size == MAXALIGN(sizeof(HeapPageSpecialData)))
+ elog(INFO, "%s: page is converted to xid64 format", msg);
+ else if (HeapPageIsDoubleXmax(page))
+ elog(INFO, "%s: page is converted into double xmax format", msg);
+ else
+ elog(ERROR, "%s: converted page has pageSpecial size %u, expected %llu",
+ msg, size,
+ (unsigned long long) MAXALIGN(sizeof(HeapPageSpecialData)));
+}
+
+/*
+ * Get page from relation.
+ * Make this page look like in 32-bit xid format.
+ * Convert it to 64-bit xid format.
+ * Run basic checks.
+ */
+PG_FUNCTION_INFO_V1(xid64_test_1);
+Datum
+xid64_test_1(PG_FUNCTION_ARGS)
+{
+ Oid relid;
+ Relation rel;
+ Buffer buf;
+ Page page;
+ PageHeader hdr;
+
+ relid = PG_GETARG_OID(0);
+ rel = relation_open(relid, AccessExclusiveLock);
+ buf = ReadBuffer(rel, 0);
+ LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
+
+ page = BufferGetPage(buf);
+ hdr = (PageHeader) page;
+
+ if (PageGetSpecialSize(page) != MAXALIGN(sizeof(HeapPageSpecialData)))
+ elog(ERROR, "page expected in new format");
+
+ if (PageGetPageLayoutVersion(page) != PG_PAGE_LAYOUT_VERSION)
+ elog(ERROR, "unknown page version (%u)",
+ PageGetPageLayoutVersion(page));
+
+ hdr->pd_special = BLCKSZ;
+ PageSetPageSizeAndVersion(page, BLCKSZ, PG_PAGE_LAYOUT_VERSION - 1);
+
+ convert_page(rel, page, buf, 0);
+ CheckNewPage("test 1", page);
+
+ UnlockReleaseBuffer(buf);
+ relation_close(rel, AccessExclusiveLock);
+
+ PG_RETURN_VOID();
+}
+
+typedef struct TupleCheckValues
+{
+ TransactionId xmin;
+ TransactionId xmax;
+} TupleCheckValues;
+
+typedef struct RelCheckValues
+{
+ TupleCheckValues *tcv;
+ Size ntuples;
+} RelCheckValues;
+
+static RelCheckValues
+FillRelCheckValues(Relation rel, Buffer buffer, Page page)
+{
+ RelCheckValues set;
+ Size n;
+
+#define DEFAULT_SET_SIZE 64
+ n = DEFAULT_SET_SIZE;
+ set.ntuples = 0;
+ set.tcv = palloc(sizeof(set.tcv[0]) * n);
+
+ {
+ OffsetNumber maxoff,
+ offnum;
+ HeapTupleHeader tuphdr;
+ ItemId itemid;
+ HeapTupleData tuple;
+ TransactionId xmin,
+ xmax;
+
+ maxoff = PageGetMaxOffsetNumber(page);
+
+ for (offnum = FirstOffsetNumber;
+ offnum <= maxoff;
+ offnum = OffsetNumberNext(offnum))
+ {
+ itemid = PageGetItemId(page, offnum);
+ tuphdr = (HeapTupleHeader) PageGetItem(page, itemid);
+ tuple.t_data = tuphdr;
+ tuple.t_len = ItemIdGetLength(itemid);
+ tuple.t_tableOid = RelationGetRelid(rel);
+
+ if (HeapPageGetSpecial(page) == heapDoubleXmaxSpecial)
+ {
+ xmin = tuphdr->t_choice.t_heap.t_xmin;
+ xmax = tuphdr->t_choice.t_heap.t_xmax;
+ }
+ else
+ {
+ HeapTupleCopyXidsFromPage(buffer, &tuple, page,
+ IsToastRelation(rel));
+
+ xmin = HeapTupleGetRawXmin(&tuple);
+ xmax = HeapTupleGetRawXmax(&tuple);
+ }
+
+ if (set.ntuples == n)
+ {
+ n *= 2;
+ set.tcv = repalloc(set.tcv, sizeof(set.tcv[0]) * n);
+ }
+
+ set.tcv[set.ntuples].xmin = xmin;
+ set.tcv[set.ntuples].xmax = xmax;
+ set.ntuples++;
+ }
+ }
+
+ return set;
+}
+
+/*
+ * Test xmin/xmax invariant when converting page from 32bit xid to 64xid.
+ *
+ * Scenario:
+ * - enforce all relation pages to 32bit xid format, discarding pd_xid_base and
+ * pd_multi_base
+ * - store all xmin/xmax in array
+ * - convert all the pages from relation into 64xid format
+ * - store all new xmin/xmax in array
+ * - compare old and new xmin/xmax
+ *
+ * NOTE: inital xid value does not affect test as pd_xid_base/pd_multi_base
+ * discarded.
+ */
+PG_FUNCTION_INFO_V1(xid64_test_2);
+Datum
+xid64_test_2(PG_FUNCTION_ARGS)
+{
+ Oid relid;
+ Relation rel;
+ RelCheckValues before,
+ after;
+ BlockNumber pageno,
+ npages;
+ Size i;
+
+ relid = PG_GETARG_OID(0);
+ rel = relation_open(relid, AccessExclusiveLock);
+ npages = RelationGetNumberOfBlocks(rel);
+
+ for (pageno = 0; pageno != npages; ++pageno)
+ {
+ Buffer buf;
+ Page page;
+ PageHeader hdr;
+
+ /* get page */
+ buf = ReadBuffer(rel, pageno);
+ LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
+ page = BufferGetPage(buf);
+ hdr = (PageHeader) page;
+
+ /* make page look like 32-bit xid page */
+ hdr->pd_special = BLCKSZ;
+ PageSetPageSizeAndVersion(page, BLCKSZ, PG_PAGE_LAYOUT_VERSION - 1);
+
+ before = FillRelCheckValues(rel, buf, page);
+ convert_page(rel, page, buf, pageno);
+ after = FillRelCheckValues(rel, buf, page);
+
+ /* check */
+ if (before.ntuples != after.ntuples)
+ elog(ERROR, "numer of tuples must be equal");
+
+ for (i = 0; i != before.ntuples; ++i)
+ {
+ if (before.tcv[i].xmin != after.tcv[i].xmin && after.tcv[i].xmin)
+ elog(ERROR, "old and new xmin does not match (%llu != %llu)",
+ (unsigned long long) before.tcv[i].xmin,
+ (unsigned long long) after.tcv[i].xmin);
+
+ if (before.tcv[i].xmax != after.tcv[i].xmax)
+ elog(ERROR, "old and new xmax does not match (%llu != %llu)",
+ (unsigned long long) before.tcv[i].xmax,
+ (unsigned long long) after.tcv[i].xmax);
+ }
+
+ Assert(npages != 0);
+ pfree(before.tcv);
+ pfree(after.tcv);
+
+ UnlockReleaseBuffer(buf);
+ }
+
+ relation_close(rel, AccessExclusiveLock);
+
+ PG_RETURN_VOID();
+}
+
+PG_FUNCTION_INFO_V1(xid64_test_double_xmax);
+Datum
+xid64_test_double_xmax(PG_FUNCTION_ARGS)
+{
+ Oid relid;
+ Relation rel;
+ BlockNumber pageno,
+ npages;
+ bool found;
+
+ relid = PG_GETARG_OID(0);
+ rel = relation_open(relid, AccessExclusiveLock);
+ npages = RelationGetNumberOfBlocks(rel);
+ found = false;
+
+ for (pageno = 0; pageno != npages; ++pageno)
+ {
+ Buffer buf;
+ Page page;
+ PageHeader hdr;
+ ItemId itemid;
+ OffsetNumber offnum;
+ HeapTupleHeader tuphdr;
+
+ buf = ReadBuffer(rel, pageno);
+ LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
+ page = BufferGetPage(buf);
+ hdr = (PageHeader) page;
+
+ if (pageno == 0)
+ {
+ itemid = PageGetItemId(page, FirstOffsetNumber);
+ itemid->lp_len += 16; /* Move to overlap special */
+ }
+
+ for (offnum = FirstOffsetNumber;
+ offnum <= PageGetMaxOffsetNumber(page);
+ offnum = OffsetNumberNext(offnum))
+ {
+ itemid = PageGetItemId(page, offnum);
+ tuphdr = (HeapTupleHeader) PageGetItem(page, itemid);
+ tuphdr->t_infomask |= HEAP_XMIN_COMMITTED;
+ }
+
+ hdr->pd_special = BLCKSZ;
+ PageSetPageSizeAndVersion(page, BLCKSZ, PG_PAGE_LAYOUT_VERSION - 1);
+
+ convert_page(rel, page, buf, pageno);
+
+ if (HeapPageIsDoubleXmax(page))
+ {
+ found = true;
+ elog(INFO, "test double xmax: page %u is converted into double xmax format",
+ pageno);
+ }
+
+ UnlockReleaseBuffer(buf);
+ }
+
+ if (!found)
+ elog(ERROR, "test double xmax: failed, no double xmax");
+
+ Assert(npages != 0);
+ elog(INFO, "test double xmax: end");
+
+ relation_close(rel, AccessExclusiveLock);
+
+ PG_RETURN_VOID();
+}
diff --git a/src/test/regress/sql/indirect_toast.sql b/src/test/regress/sql/indirect_toast.sql
index 3e2f6c0237..ea087b5128 100644
--- a/src/test/regress/sql/indirect_toast.sql
+++ b/src/test/regress/sql/indirect_toast.sql
@@ -76,7 +76,18 @@ SELECT substring(indtoasttest::text, 1, 200) FROM indtoasttest;
VACUUM FREEZE indtoasttest;
SELECT substring(indtoasttest::text, 1, 200) FROM indtoasttest;
+create or replace function random_string(len integer) returns text as $$
+select substr((select string_agg(r,'') from (select random()::text as r from generate_series(1,(len+15)/16)) s1), 1, len);
+$$ language sql;
+
+create table toasttest_main(t text);
+alter table toasttest_main alter column t set storage main;
+
+insert into toasttest_main (select random_string(len) from generate_series(7000,8000) len);
+
DROP TABLE indtoasttest;
+DROP TABLE toasttest_main;
DROP FUNCTION update_using_indirect();
+DROP FUNCTION random_string(integer);
RESET default_toast_compression;
diff --git a/src/test/regress/sql/insert.sql b/src/test/regress/sql/insert.sql
index bdcffd0314..7ada0801eb 100644
--- a/src/test/regress/sql/insert.sql
+++ b/src/test/regress/sql/insert.sql
@@ -55,7 +55,7 @@ INSERT INTO large_tuple_test (select 3, NULL);
-- now this tuple won't fit on the second page, but the insert should
-- still succeed by extending the relation
-INSERT INTO large_tuple_test (select 4, repeat('a', 8126));
+INSERT INTO large_tuple_test (select 4, repeat('a', 8112));
DROP TABLE large_tuple_test;
@@ -597,3 +597,18 @@ alter table returningwrtest2 drop c;
alter table returningwrtest attach partition returningwrtest2 for values in (2);
insert into returningwrtest values (2, 'foo') returning returningwrtest;
drop table returningwrtest;
+
+-- Check for MaxHeapTupleSize
+create table maxheaptuplesize_test(value text);
+alter table maxheaptuplesize_test alter column value set storage external;
+insert into maxheaptuplesize_test values (repeat('x', 8104));
+insert into maxheaptuplesize_test values (repeat('x', 8112));
+insert into maxheaptuplesize_test values (repeat('x', 8120));
+insert into maxheaptuplesize_test values (repeat('x', 8128));
+insert into maxheaptuplesize_test values (repeat('x', 8136));
+insert into maxheaptuplesize_test values (repeat('x', 8144));
+insert into maxheaptuplesize_test values (repeat('x', 8152));
+insert into maxheaptuplesize_test values (repeat('x', 8160));
+insert into maxheaptuplesize_test values (repeat('x', 8168));
+insert into maxheaptuplesize_test values (repeat('x', 8176));
+drop table maxheaptuplesize_test;
diff --git a/src/test/regress/sql/select_views.sql b/src/test/regress/sql/select_views.sql
index e742f13699..a94bd7259c 100644
--- a/src/test/regress/sql/select_views.sql
+++ b/src/test/regress/sql/select_views.sql
@@ -3,7 +3,7 @@
-- test the views defined in CREATE_VIEWS
--
-SELECT * FROM street;
+SELECT * FROM street ORDER BY name COLLATE "C", thepath::text COLLATE "C", cname COLLATE "C";
SELECT name, #thepath FROM iexit ORDER BY name COLLATE "C", 2;
diff --git a/src/test/regress/sql/type_sanity.sql b/src/test/regress/sql/type_sanity.sql
index e88d6cbe49..bbc2213b54 100644
--- a/src/test/regress/sql/type_sanity.sql
+++ b/src/test/regress/sql/type_sanity.sql
@@ -22,7 +22,7 @@ WHERE t1.typnamespace = 0 OR
(t1.typlen <= 0 AND t1.typlen != -1 AND t1.typlen != -2) OR
(t1.typtype not in ('b', 'c', 'd', 'e', 'm', 'p', 'r')) OR
NOT t1.typisdefined OR
- (t1.typalign not in ('c', 's', 'i', 'd')) OR
+ (t1.typalign not in ('c', 's', 'i', 'd', 'x')) OR
(t1.typstorage not in ('p', 'x', 'e', 'm'));
-- Look for "pass by value" types that can't be passed by value.
@@ -33,7 +33,8 @@ WHERE t1.typbyval AND
(t1.typlen != 1 OR t1.typalign != 'c') AND
(t1.typlen != 2 OR t1.typalign != 's') AND
(t1.typlen != 4 OR t1.typalign != 'i') AND
- (t1.typlen != 8 OR t1.typalign != 'd');
+ (t1.typlen != 8 OR t1.typalign != 'd') AND
+ (t1.typlen != 8 OR t1.typalign != 'x');
-- Look for "toastable" types that aren't varlena.
diff --git a/src/test/regress/sql/xid.sql b/src/test/regress/sql/xid.sql
index 9f716b3653..9b94cb9a4a 100644
--- a/src/test/regress/sql/xid.sql
+++ b/src/test/regress/sql/xid.sql
@@ -19,7 +19,7 @@ select 'asdf'::xid8;
-- Also try it with non-error-throwing API
SELECT pg_input_is_valid('42', 'xid');
SELECT pg_input_is_valid('asdf', 'xid');
-SELECT * FROM pg_input_error_info('0xffffffffff', 'xid');
+SELECT * FROM pg_input_error_info('0xffffffffffffffffffff', 'xid');
SELECT pg_input_is_valid('42', 'xid8');
SELECT pg_input_is_valid('asdf', 'xid8');
SELECT * FROM pg_input_error_info('0xffffffffffffffffffff', 'xid8');
diff --git a/src/test/regress/sql/xid64.sql b/src/test/regress/sql/xid64.sql
new file mode 100644
index 0000000000..caa97a0ed9
--- /dev/null
+++ b/src/test/regress/sql/xid64.sql
@@ -0,0 +1,84 @@
+---
+--- Unit test for xid64 functions
+---
+
+-- directory paths and dlsuffix are passed to us in environment variables
+\getenv libdir PG_LIBDIR
+\getenv dlsuffix PG_DLSUFFIX
+
+\set regresslib :libdir '/regress' :dlsuffix
+
+CREATE FUNCTION xid64_test_1(rel regclass) RETURNS VOID
+ AS :'regresslib', 'xid64_test_1' LANGUAGE C STRICT;
+CREATE FUNCTION xid64_test_2(rel regclass) RETURNS VOID
+ AS :'regresslib', 'xid64_test_2' LANGUAGE C STRICT;
+CREATE FUNCTION xid64_test_double_xmax(rel regclass) RETURNS VOID
+ AS :'regresslib', 'xid64_test_double_xmax' LANGUAGE C STRICT;
+
+---
+--- Check page consistency after conversion
+---
+CREATE UNLOGGED TABLE test_xid64_table(a int);
+ALTER TABLE test_xid64_table SET (autovacuum_enabled = false);
+INSERT INTO test_xid64_table(a) SELECT a FROM generate_series(1, 1000) AS a;
+SELECT xid64_test_1('test_xid64_table');
+DROP TABLE test_xid64_table;
+
+---
+--- Check tuples consistency after conversion
+---
+CREATE UNLOGGED TABLE test_xid64_table(s serial, i int, t text);
+ALTER TABLE test_xid64_table SET (autovacuum_enabled = false);
+
+DO $$
+BEGIN
+ FOR j IN 1..20 LOOP
+ INSERT INTO test_xid64_table(i, t) VALUES (random()::int, md5(random()::text));
+ COMMIT;
+ END LOOP;
+END $$;
+
+DO $$
+BEGIN
+ FOR j IN 1..10 LOOP
+ DELETE FROM test_xid64_table WHERE ctid IN (SELECT ctid FROM test_xid64_table TABLESAMPLE BERNOULLI (5));
+ COMMIT;
+ END LOOP;
+END $$;
+
+SELECT xid64_test_2('test_xid64_table');
+DROP TABLE test_xid64_table;
+
+---
+--- Check tuples consistency after conversion to double xmax (on full page)
+---
+CREATE UNLOGGED TABLE test_xid64_table(i int);
+
+DO $$
+BEGIN
+ FOR j IN 1..40 LOOP
+ INSERT INTO test_xid64_table SELECT i FROM generate_series(1, 100) AS i;
+ COMMIT;
+ END LOOP;
+END $$;
+
+SELECT xid64_test_2('test_xid64_table');
+DROP TABLE test_xid64_table;
+
+CREATE UNLOGGED TABLE test_xid64_table(i text);
+INSERT INTO test_xid64_table(i) VALUES ('NNBABCDSDFGHJKLP');
+
+DO $$
+BEGIN
+ FOR j IN 1..40 LOOP
+ INSERT INTO test_xid64_table(i) SELECT 'A' FROM generate_series(1, 100) AS i;
+ COMMIT;
+ END LOOP;
+END $$;
+
+SELECT xid64_test_double_xmax('test_xid64_table');
+DROP TABLE test_xid64_table;
+
+DROP FUNCTION xid64_test_1(rel regclass);
+DROP FUNCTION xid64_test_2(rel regclass);
+DROP FUNCTION xid64_test_double_xmax(rel regclass);
diff --git a/src/test/xid-64/Makefile b/src/test/xid-64/Makefile
new file mode 100644
index 0000000000..3b1e50dfc0
--- /dev/null
+++ b/src/test/xid-64/Makefile
@@ -0,0 +1,22 @@
+#-------------------------------------------------------------------------
+#
+# Makefile for src/test/xid-64
+#
+# Copyright (c) 2018, Postgres Professional
+#
+# src/test/xid-64/Makefile
+#
+#-------------------------------------------------------------------------
+
+subdir = src/test/xid-64
+top_builddir = ../../..
+include $(top_builddir)/src/Makefile.global
+
+check:
+ $(prove_check)
+
+installcheck:
+ $(prove_installcheck)
+
+clean distclean maintainer-clean:
+ rm -rf tmp_check
diff --git a/src/test/xid-64/README b/src/test/xid-64/README
new file mode 100644
index 0000000000..01c0a1a1f7
--- /dev/null
+++ b/src/test/xid-64/README
@@ -0,0 +1,16 @@
+src/test/xid-64/README
+
+Regression tests for 64-bit XIDs
+=============================================
+
+This directory contains a test suite for 64-bit xids.
+
+Running the tests
+=================
+
+ make check
+
+NOTE: This creates a temporary installation, and some tests may
+create one or multiple nodes.
+
+NOTE: This requires the --enable-tap-tests argument to configure.
diff --git a/src/test/xid-64/meson.build b/src/test/xid-64/meson.build
new file mode 100644
index 0000000000..63a780b69e
--- /dev/null
+++ b/src/test/xid-64/meson.build
@@ -0,0 +1,16 @@
+tests += {
+ 'name': 'xid-64',
+ 'sd': meson.current_source_dir(),
+ 'bd': meson.current_build_dir(),
+ 'tap': {
+ 'tests': [
+ 't/001_test_large_xids.pl',
+ 't/002_test_gucs.pl',
+ 't/003_test_integrity.pl',
+ 't/004_test_relminmxid.pl',
+ 't/005_stream_subxact.pl',
+ 't/006_zeropage.pl',
+ 't/007_first_multi.pl',
+ ],
+ },
+}
diff --git a/src/test/xid-64/t/002_test_gucs.pl b/src/test/xid-64/t/002_test_gucs.pl
new file mode 100644
index 0000000000..9341389233
--- /dev/null
+++ b/src/test/xid-64/t/002_test_gucs.pl
@@ -0,0 +1,79 @@
+# Tests for guc boundary values
+use strict;
+use warnings;
+use PostgreSQL::Test::Cluster;
+use PostgreSQL::Test::Utils;
+use Test::More;
+use bigint;
+
+sub command_output
+{
+ my ($cmd) = @_;
+ my ($stdout, $stderr);
+ print("# Running: " . join(" ", @{$cmd}) . "\n");
+ my $result = IPC::Run::run $cmd, '>', \$stdout, '2>', \$stderr;
+ ok($result, "@$cmd exit code 0");
+ is($stderr, '', "@$cmd no stderr");
+ return $stdout;
+}
+
+sub set_guc
+{
+ my ($node, $guc, $val) = @_;
+ print("SET $guc = $val\n");
+ $node->safe_psql('postgres', "ALTER SYSTEM SET $guc = $val");
+ $node->restart();
+}
+
+sub test_pgbench
+{
+ my ($node) = @_;
+ $node->command_ok(
+ [ qw(pgbench --progress=5 --transactions=1000 --jobs=5 --client=5) ],
+ 'pgbench finished without errors');
+}
+
+my @guc_vals = (
+ [ "autovacuum_freeze_max_age", 100000, 2**63 - 1 ],
+ [ "autovacuum_multixact_freeze_max_age", 10000, 2**63 - 1 ],
+ [ "vacuum_freeze_min_age", 0, 2**63 - 1 ],
+ [ "vacuum_freeze_table_age", 0, 2**63 - 1 ],
+ [ "vacuum_multixact_freeze_min_age", 0, 2**63 - 1 ],
+ [ "vacuum_multixact_freeze_table_age", 0, 2**63 -1 ]
+);
+
+my $START_VAL = 2**32;
+my $MAX_VAL = 2**62;
+
+my $ixid = $START_VAL + int(rand($MAX_VAL - $START_VAL));
+my $imxid = $START_VAL + int(rand($MAX_VAL - $START_VAL));
+my $imoff = $START_VAL + int(rand($MAX_VAL - $START_VAL));
+
+# Initialize master node
+my $node = PostgreSQL::Test::Cluster->new('master');
+$node->init;
+# Disable logging of all statements to avoid log bloat during pgbench
+$node->append_conf('postgresql.conf', "log_statement = none");
+$node->start;
+
+# Fill the test database with the pgbench data
+$node->command_ok(
+ [ qw(pgbench --initialize --scale=10) ],
+ 'pgbench finished without errors');
+
+# Test all GUCs with minimum, maximum and random value inbetween
+# (run pgbench for every configuration setting)
+foreach my $gi (0 .. $#guc_vals) {
+ print($guc_vals[$gi][0]); print("\n");
+ my $guc = $guc_vals[$gi][0];
+ my $minval = $guc_vals[$gi][1];
+ my $maxval = $guc_vals[$gi][2];
+ set_guc($node, $guc, $minval);
+ test_pgbench($node);
+ set_guc($node, $guc, $maxval);
+ test_pgbench($node);
+ set_guc($node, $guc, $minval + int(rand($maxval - $minval)));
+ test_pgbench($node);
+}
+
+done_testing();
diff --git a/src/test/xid-64/t/003_test_integrity.pl b/src/test/xid-64/t/003_test_integrity.pl
new file mode 100644
index 0000000000..5b0789688e
--- /dev/null
+++ b/src/test/xid-64/t/003_test_integrity.pl
@@ -0,0 +1,58 @@
+# Check integrity after dump/restore with different xids
+use strict;
+use warnings;
+use PostgreSQL::Test::Cluster;
+use PostgreSQL::Test::Utils;
+use Test::More;
+use File::Compare;
+
+my $tempdir = PostgreSQL::Test::Utils::tempdir;
+use bigint;
+
+my $START_VAL = 2**32;
+my $MAX_VAL = 2**62;
+
+my $ixid = $START_VAL + int(rand($MAX_VAL - $START_VAL));
+my $imxid = $START_VAL + int(rand($MAX_VAL - $START_VAL));
+my $imoff = $START_VAL + int(rand($MAX_VAL - $START_VAL));
+
+# Initialize master node
+my $node = PostgreSQL::Test::Cluster->new('master');
+$node->init();
+$node->start;
+
+# Create a database and fill it with the pgbench data
+$node->safe_psql('postgres', "CREATE DATABASE pgbench_db");
+$node->command_ok(
+ [ qw(pgbench --initialize --scale=2 pgbench_db) ],
+ 'pgbench finished without errors');
+# Dump the database (cluster the main table to put data in a determined order)
+$node->safe_psql('pgbench_db', qq(
+ CREATE INDEX pa_aid_idx ON pgbench_accounts (aid);
+ CLUSTER pgbench_accounts USING pa_aid_idx));
+$node->command_ok(
+ [ "pg_dump", "-w", "--inserts", "--file=$tempdir/pgbench.sql", "pgbench_db" ],
+ 'pgdump finished without errors');
+$node->stop('fast');
+
+# Initialize second node
+my $node2 = PostgreSQL::Test::Cluster->new('master2');
+$node2->init;
+# Disable logging of all statements to avoid log bloat during restore
+$node2->append_conf('postgresql.conf', "log_statement = none");
+$node2->start;
+
+# Create a database and restore the previous dump
+$node2->safe_psql('postgres', "CREATE DATABASE pgbench_db");
+my $txid0 = $node2->safe_psql('pgbench_db', 'SELECT txid_current()');
+print("# Initial txid_current: $txid0\n");
+$node2->command_ok(["psql", "-q", "-f", "$tempdir/pgbench.sql", "pgbench_db"]);
+
+# Dump the database and compare the dumped content with the previous one
+$node2->safe_psql('pgbench_db', 'CLUSTER pgbench_accounts');
+$node2->command_ok(
+ [ "pg_dump", "-w", "--inserts", "--file=$tempdir/pgbench2.sql", "pgbench_db" ],
+ 'pgdump finished without errors');
+ok(File::Compare::compare_text("$tempdir/pgbench.sql", "$tempdir/pgbench2.sql") == 0, "no differences detected");
+
+done_testing();
diff --git a/src/test/xid-64/t/004_test_relminmxid.pl b/src/test/xid-64/t/004_test_relminmxid.pl
new file mode 100644
index 0000000000..e924f9cd9a
--- /dev/null
+++ b/src/test/xid-64/t/004_test_relminmxid.pl
@@ -0,0 +1,90 @@
+# Check integrity after dump/restore with different xids
+use strict;
+use warnings;
+use PostgreSQL::Test::Cluster;
+use PostgreSQL::Test::Utils;
+use Test::More;
+use bigint;
+
+my ($node, $rmm, $vacout);
+$node = PostgreSQL::Test::Cluster->new('master');
+$node->init;
+$node->append_conf('postgresql.conf', 'max_prepared_transactions = 2');
+$node->start;
+
+sub relminmxid
+{
+ my $rmm = $node->safe_psql("postgres", qq(
+ SELECT relminmxid
+ FROM pg_class
+ WHERE relname = 'foo';));
+ return $rmm + 0;
+}
+
+sub vacuum
+{
+ my ($rc, $stdout, $stderr) = $node->psql("postgres", "VACUUM foo;");
+ return $stdout.$stderr;
+}
+
+sub gen_multixact
+{
+ $node->safe_psql("postgres", qq(
+ BEGIN;
+ SELECT * FROM foo FOR KEY SHARE;
+ PREPARE TRANSACTION 'fooshare';
+ ));
+
+ my $xmax = $node->safe_psql("postgres", qq(
+ SELECT xmax FROM foo;
+ ));
+ isnt($xmax + 0, 0, "xmax not empty");
+
+ $node->safe_psql("postgres", qq(
+ BEGIN;
+ SELECT * FROM foo FOR KEY SHARE;
+ COMMIT;
+ COMMIT PREPARED 'fooshare';
+ ));
+
+ my $mxact = $node->safe_psql("postgres", qq(
+ SELECT xmax FROM foo;
+ ));
+ isnt($mxact + 0, 0, "mxact not empty");
+ cmp_ok($xmax, '>', $mxact, "xmax is greater than mxact");
+}
+
+# Initialize master node with the random xid-related parameters
+$node->safe_psql("postgres", "CREATE TABLE foo (a int); INSERT INTO foo VALUES (1);");
+
+is(relminmxid(), 1, "relminmxid is default");
+
+vacuum();
+is(relminmxid(), 1, "relminmxid is still default");
+
+gen_multixact();
+is(relminmxid(), 1, "relminmxid is still still default");
+
+unlike(vacuum(), qr/multixact.*before relminmxid/, "no relminmxid error");
+
+# No intentionally break relminmxid
+$node->safe_psql("postgres", qq(
+ UPDATE pg_class SET relminmxid = ((1::int8<<62) + 1)::text::xid
+ WHERE relname = 'foo'
+));
+cmp_ok(relminmxid(), '>', 2**62, "relminmxid broken (intentionally)");
+
+gen_multixact();
+like(vacuum(), qr/multixact.*before relminmxid/, "got relminmxid error");
+cmp_ok(relminmxid(), '>', 2**62, "relminmxid broken (still)");
+
+# Fix relminmxid by setting to default
+$node->safe_psql("postgres", qq(
+ UPDATE pg_class SET relminmxid = '1'
+ WHERE relname = 'foo'
+));
+is(relminmxid(), 1, "relminmxid is default again");
+
+unlike(vacuum(), qr/multixact.*before relminmxid/, "no relminmxid error again");
+
+done_testing();
diff --git a/src/test/xid-64/t/005_stream_subxact.pl b/src/test/xid-64/t/005_stream_subxact.pl
new file mode 100644
index 0000000000..6765f6061c
--- /dev/null
+++ b/src/test/xid-64/t/005_stream_subxact.pl
@@ -0,0 +1,100 @@
+
+# Copyright (c) 2021, PostgreSQL Global Development Group
+
+# Test xids streaming of large transaction containing large subtransactions
+# near 32-bit boundary.
+#
+# Mostly it is a copy of 016_stream_subxact.pl, but with publisher xid inited
+# just before 32-bit boundary, so if xids are replicated as 32-bit values,
+# subscriber will get 0 xid value.
+use strict;
+use warnings;
+use PostgreSQL::Test::Cluster;
+use PostgreSQL::Test::Utils;
+use Test::More;
+
+# Create publisher node
+my $node_publisher = PostgreSQL::Test::Cluster->new('publisher');
+$node_publisher->init(allows_streaming => 'logical');
+$node_publisher->append_conf('postgresql.conf',
+ 'logical_decoding_work_mem = 64kB');
+$node_publisher->start;
+
+# Create subscriber node
+my $node_subscriber = PostgreSQL::Test::Cluster->new('subscriber');
+$node_subscriber->init(allows_streaming => 'logical');
+$node_subscriber->start;
+
+# Create some preexisting content on publisher
+$node_publisher->safe_psql('postgres',
+ "CREATE TABLE test_tab (a int primary key, b varchar)");
+$node_publisher->safe_psql('postgres',
+ "INSERT INTO test_tab VALUES (1, 'foo'), (2, 'bar')");
+
+# Setup structure on subscriber
+$node_subscriber->safe_psql('postgres',
+ "CREATE TABLE test_tab (a int primary key, b text, c timestamptz DEFAULT now(), d bigint DEFAULT 999)"
+);
+
+# Setup logical replication
+my $publisher_connstr = $node_publisher->connstr . ' dbname=postgres';
+$node_publisher->safe_psql('postgres',
+ "CREATE PUBLICATION tap_pub FOR TABLE test_tab");
+
+my $appname = 'tap_sub';
+$node_subscriber->safe_psql('postgres',
+ "CREATE SUBSCRIPTION tap_sub CONNECTION '$publisher_connstr application_name=$appname' PUBLICATION tap_pub WITH (streaming = on)"
+);
+
+$node_publisher->wait_for_catchup($appname);
+
+# Also wait for initial table sync to finish
+my $synced_query =
+ "SELECT count(1) = 0 FROM pg_subscription_rel WHERE srsubstate NOT IN ('r', 's');";
+$node_subscriber->poll_query_until('postgres', $synced_query)
+ or die "Timed out while waiting for subscriber to synchronize data";
+
+my $result =
+ $node_subscriber->safe_psql('postgres',
+ "SELECT count(*), count(c), count(d = 999) FROM test_tab");
+is($result, qq(2|2|2), 'check initial data was copied to subscriber');
+
+# Insert, update and delete enough rows to exceed 64kB limit.
+$node_publisher->safe_psql(
+ 'postgres', q{
+BEGIN;
+INSERT INTO test_tab SELECT i, md5(i::text) FROM generate_series( 3, 500) s(i);
+UPDATE test_tab SET b = md5(b) WHERE mod(a,2) = 0;
+DELETE FROM test_tab WHERE mod(a,3) = 0;
+SAVEPOINT s1;
+INSERT INTO test_tab SELECT i, md5(i::text) FROM generate_series(501, 1000) s(i);
+UPDATE test_tab SET b = md5(b) WHERE mod(a,2) = 0;
+DELETE FROM test_tab WHERE mod(a,3) = 0;
+SAVEPOINT s2;
+INSERT INTO test_tab SELECT i, md5(i::text) FROM generate_series(1001, 1500) s(i);
+UPDATE test_tab SET b = md5(b) WHERE mod(a,2) = 0;
+DELETE FROM test_tab WHERE mod(a,3) = 0;
+SAVEPOINT s3;
+INSERT INTO test_tab SELECT i, md5(i::text) FROM generate_series(1501, 2000) s(i);
+UPDATE test_tab SET b = md5(b) WHERE mod(a,2) = 0;
+DELETE FROM test_tab WHERE mod(a,3) = 0;
+SAVEPOINT s4;
+INSERT INTO test_tab SELECT i, md5(i::text) FROM generate_series(2001, 2500) s(i);
+UPDATE test_tab SET b = md5(b) WHERE mod(a,2) = 0;
+DELETE FROM test_tab WHERE mod(a,3) = 0;
+COMMIT;
+});
+
+$node_publisher->wait_for_catchup($appname);
+
+$result =
+ $node_subscriber->safe_psql('postgres',
+ "SELECT count(*), count(c), count(d = 999) FROM test_tab");
+is($result, qq(1667|1667|1667),
+ 'check data was copied to subscriber in streaming mode and extra columns contain local defaults'
+);
+
+$node_subscriber->stop;
+$node_publisher->stop;
+
+done_testing();
diff --git a/src/test/xid-64/t/006_zeropage.pl b/src/test/xid-64/t/006_zeropage.pl
new file mode 100644
index 0000000000..4b87c90edc
--- /dev/null
+++ b/src/test/xid-64/t/006_zeropage.pl
@@ -0,0 +1,33 @@
+use strict;
+use warnings;
+
+use PostgreSQL::Test::Cluster;
+use PostgreSQL::Test::Utils;
+use Test::More;
+
+# Check WAL for ZEROPAGE record.
+
+sub command_output
+{
+ my ($cmd) = @_;
+ my ($stdout, $stderr);
+ print("# Running: " . join(" ", @{$cmd}) . "\n");
+ my $result = IPC::Run::run $cmd, '>', \$stdout, '2>', \$stderr;
+ return $stdout;
+}
+
+my $node = PostgreSQL::Test::Cluster->new('main');
+$node->init;
+$node->start;
+my $pgdata = $node->data_dir;
+my $xlogfilename0 = $node->safe_psql('postgres',
+ "SELECT pg_walfile_name(pg_current_wal_lsn())");
+#$node->command_like(
+# [ 'pg_waldump', '-S', "$pgdata/pg_wal/$xlogfilename0" ],
+# qr/ZEROPAGE/,
+# 'pg_waldump prints start timestamp');
+my $wd_output = command_output(
+ [ 'pg_waldump', "$pgdata/pg_wal/$xlogfilename0" ]);
+ok($wd_output =~ qr/ZEROPAGE page 0/, "ZEROPAGE found");
+
+done_testing();
diff --git a/src/test/xid-64/t/007_first_multi.pl b/src/test/xid-64/t/007_first_multi.pl
new file mode 100644
index 0000000000..eca2c39af9
--- /dev/null
+++ b/src/test/xid-64/t/007_first_multi.pl
@@ -0,0 +1,83 @@
+# Test for pages with first tuple has xmax multi
+use strict;
+use warnings;
+use PostgreSQL::Test::Cluster;
+use PostgreSQL::Test::Utils;
+use Test::More;
+
+sub test_multixact
+{
+ my ($primary, $standby, $test_name) = @_;
+
+ $primary->safe_psql('postgres', q{
+ CREATE TABLE t (id INT, data TEXT, CONSTRAINT t_id_pk PRIMARY KEY(id));
+ INSERT INTO t SELECT 1, repeat('a', 1000);
+ });
+
+ my %psql = (
+ '1' => $primary->background_psql('postgres'),
+ '2' => $primary->background_psql('postgres'),
+ );
+
+ # Lock tuples
+ $psql{1}->query_safe(q(
+ BEGIN;
+ SELECT * FROM t FOR KEY SHARE;
+ ));
+
+ $psql{2}->query_safe(q(
+ BEGIN;
+ SELECT * FROM t FOR KEY SHARE;
+ ));
+
+ # Repeat update until we get a new page with one tuple
+ my $res;
+ my $guard = 0;
+
+ do {
+ $res = $primary->safe_psql('postgres', q{
+ UPDATE t SET data = repeat('a', 1000) RETURNING ctid;
+ });
+ # Fail if we already write around 64k and still have no new page.
+ fail("creating second page") if (++$guard == 64);
+ } until ($res eq "(1,1)");
+
+ $psql{1}->quit;
+ $psql{2}->quit;
+ $primary->wait_for_catchup($standby);
+
+ # Check results
+ my $query = q{
+ SELECT xmax, ctid, id, data = repeat('a', 1000) as data FROM t;
+ };
+ my $res_primary = $primary->safe_psql('postgres', $query);
+ my $res_standby = $standby->safe_psql('postgres', $query);
+
+ is($res_primary, $res_standby, "rows are the same in test $test_name");
+}
+
+# We should run test for full_page_writes on and off.
+foreach ('true', 'false') {
+ # Create primary
+ my $primary = PostgreSQL::Test::Cluster->new("master_$_");
+ $primary->init(allows_streaming => 1);
+ $primary->append_conf('postgresql.conf', "full_page_writes = $_");
+ $primary->start;
+
+ # Take backup
+ my $backup_name = "my_backup_$_";
+ $primary->backup($backup_name);
+
+ # Create standby from backup
+ my $standby = PostgreSQL::Test::Cluster->new("standby_$_");
+ $standby->init_from_backup($primary, $backup_name, has_streaming => 1);
+ $standby->start;
+
+ # Check
+ test_multixact($primary, $standby, "with FPW $_");
+
+ $standby->stop();
+ $primary->stop();
+}
+
+done_testing();
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 38a86575e1..194646ef2d 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -3438,8 +3438,8 @@ intset_leaf_node
intset_node
intvKEY
io_stat_col
-itemIdCompact
-itemIdCompactData
+ItemIdCompact
+ItemIdCompactData
iterator
jmp_buf
join_search_hook_type
--
2.34.1