v47-0008-Use-64-bit-XIDs.patch
application/octet-stream
Filename: v47-0008-Use-64-bit-XIDs.patch
Type: application/octet-stream
Part: 7
Patch
Same data as JSON:
GET /api/v1/attachments/:id/patch
the parsed metadata as JSON — format, series position, per-file stats; never the diff bytes.
API reference →
Format: format-patch
Series: patch v47-0008
Subject: Use 64-bit XIDs
| File | + | − |
|---|---|---|
| contrib/amcheck/verify_heapam.c | 33 | 46 |
| contrib/amcheck/verify_nbtree.c | 1 | 1 |
| contrib/hstore/hstore_io.c | 2 | 0 |
| contrib/pageinspect/btreefuncs.c | 12 | 4 |
| contrib/pageinspect/expected/btree.out | 2 | 2 |
| contrib/pageinspect/expected/hash_1.out | 166 | 0 |
| contrib/pageinspect/expected/oldextversions.out | 5 | 5 |
| contrib/pageinspect/expected/page.out | 14 | 14 |
| contrib/pageinspect/heapfuncs.c | 6 | 3 |
| contrib/pageinspect/Makefile | 2 | 1 |
| contrib/pageinspect/meson.build | 1 | 0 |
| contrib/pageinspect/pageinspect--1.10--1.11.sql | 145 | 0 |
| contrib/pageinspect/pageinspect--1.5.sql | 2 | 0 |
| contrib/pageinspect/pageinspect.control | 1 | 1 |
| contrib/pageinspect/rawpage.c | 31 | 4 |
| contrib/pageinspect/sql/btree.sql | 2 | 1 |
| contrib/pgrowlocks/pgrowlocks.c | 1 | 1 |
| contrib/pgstattuple/pgstatapprox.c | 2 | 0 |
| contrib/pgstattuple/pgstatindex.c | 1 | 1 |
| contrib/pg_surgery/heap_surgery.c | 20 | 2 |
| contrib/pg_visibility/expected/pg_visibility.out | 17 | 0 |
| contrib/pg_visibility/pg_visibility.c | 5 | 2 |
| contrib/pg_visibility/sql/pg_visibility.sql | 18 | 0 |
| contrib/postgres_fdw/expected/postgres_fdw.out | 36 | 19 |
| contrib/postgres_fdw/postgres_fdw.c | 5 | 4 |
| contrib/postgres_fdw/sql/postgres_fdw.sql | 10 | 5 |
| src/backend/access/common/heaptuple.c | 6 | 2 |
| src/backend/access/common/reloptions.c | 60 | 58 |
| src/backend/access/hash/hashvalidate.c | 2 | 3 |
| src/backend/access/heap/heapam.c | 1122 | 134 |
| src/backend/access/heap/heapam_handler.c | 37 | 16 |
| src/backend/access/heap/heapam_visibility.c | 87 | 86 |
| src/backend/access/heap/heaptoast.c | 3 | 0 |
| src/backend/access/heap/hio.c | 42 | 4 |
| src/backend/access/heap/pruneheap.c | 73 | 19 |
| src/backend/access/heap/rewriteheap.c | 80 | 21 |
| src/backend/access/heap/vacuumlazy.c | 43 | 107 |
| src/backend/access/nbtree/nbtpage.c | 2 | 0 |
| src/backend/access/nbtree/nbtsplitloc.c | 15 | 1 |
| src/backend/access/nbtree/nbtxlog.c | 2 | 0 |
| src/backend/access/rmgrdesc/gistdesc.c | 2 | 2 |
| src/backend/access/rmgrdesc/heapdesc.c | 32 | 0 |
| src/backend/access/rmgrdesc/mxactdesc.c | 5 | 4 |
| src/backend/access/rmgrdesc/nbtdesc.c | 2 | 2 |
| src/backend/access/rmgrdesc/xactdesc.c | 4 | 2 |
| src/backend/access/rmgrdesc/xlogdesc.c | 3 | 3 |
| src/backend/access/transam/clog.c | 7 | 17 |
| src/backend/access/transam/commit_ts.c | 0 | 19 |
| src/backend/access/transam/multixact.c | 62 | 624 |
| src/backend/access/transam/slru.c | 7 | 6 |
| src/backend/access/transam/subtrans.c | 5 | 4 |
| src/backend/access/transam/transam.c | 9 | 9 |
| src/backend/access/transam/twophase.c | 7 | 8 |
| src/backend/access/transam/varsup.c | 8 | 153 |
| src/backend/access/transam/xact.c | 24 | 12 |
| src/backend/access/transam/xlog.c | 5 | 5 |
| src/backend/access/transam/xloginsert.c | 7 | 0 |
| src/backend/access/transam/xlogreader.c | 0 | 34 |
| src/backend/access/transam/xlogrecovery.c | 1 | 1 |
| src/backend/bootstrap/bootstrap.c | 10 | 16 |
| src/backend/catalog/heap.c | 4 | 4 |
| src/backend/catalog/pg_inherits.c | 1 | 1 |
| src/backend/commands/async.c | 1 | 1 |
| src/backend/commands/dbcommands.c | 6 | 3 |
| src/backend/commands/indexcmds.c | 3 | 3 |
| src/backend/commands/sequence.c | 21 | 4 |
| src/backend/commands/vacuum.c | 27 | 18 |
| src/backend/executor/execExprInterp.c | 1 | 0 |
| src/backend/executor/execUtils.c | 1 | 0 |
| src/backend/executor/nodeModifyTable.c | 1 | 0 |
| src/backend/executor/spi.c | 1 | 0 |
| src/backend/nodes/gen_node_support.pl | 3 | 3 |
| src/backend/nodes/outfuncs.c | 0 | 1 |
| src/backend/optimizer/util/plancat.c | 1 | 1 |
| src/backend/postmaster/autovacuum.c | 38 | 34 |
| src/backend/replication/logical/decode.c | 15 | 3 |
| src/backend/replication/logical/proto.c | 25 | 25 |
| src/backend/replication/logical/reorderbuffer.c | 12 | 5 |
| src/backend/replication/logical/snapbuild.c | 5 | 4 |
| src/backend/replication/logical/worker.c | 1 | 1 |
| src/backend/replication/pgoutput/pgoutput.c | 2 | 1 |
| src/backend/replication/walreceiver.c | 4 | 24 |
| src/backend/replication/walsender.c | 8 | 65 |
| src/backend/statistics/extended_stats.c | 1 | 0 |
| src/backend/storage/buffer/bufmgr.c | 130 | 4 |
| src/backend/storage/buffer/heap_convert.c | 546 | 0 |
| src/backend/storage/buffer/Makefile | 2 | 1 |
| src/backend/storage/buffer/meson.build | 1 | 0 |
| src/backend/storage/ipc/procarray.c | 68 | 115 |
| src/backend/storage/ipc/sinvaladt.c | 2 | 2 |
| src/backend/storage/ipc/standby.c | 3 | 3 |
| src/backend/storage/lmgr/lmgr.c | 12 | 4 |
| src/backend/storage/lmgr/lock.c | 2 | 2 |
| src/backend/storage/lmgr/predicate.c | 3 | 3 |
| src/backend/storage/lmgr/proc.c | 6 | 5 |
| src/backend/storage/page/bufpage.c | 237 | 17 |
| src/backend/tcop/postgres.c | 9 | 16 |
| src/backend/utils/adt/enum.c | 1 | 1 |
| src/backend/utils/adt/jsonfuncs.c | 1 | 0 |
| src/backend/utils/adt/lockfuncs.c | 6 | 3 |
| src/backend/utils/adt/pgstatfuncs.c | 1 | 0 |
| src/backend/utils/adt/rowtypes.c | 12 | 0 |
| src/backend/utils/adt/xid8funcs.c | 21 | 62 |
| src/backend/utils/adt/xid.c | 24 | 13 |
| src/backend/utils/cache/catcache.c | 1 | 0 |
| src/backend/utils/cache/relcache.c | 1 | 2 |
| src/backend/utils/fmgr/fmgr.c | 2 | 2 |
| src/backend/utils/misc/guc.c | 11 | 0 |
| src/backend/utils/misc/guc_tables.c | 90 | 90 |
| src/backend/utils/misc/help_config.c | 7 | 1 |
| src/backend/utils/misc/pg_controldata.c | 1 | 1 |
| src/backend/utils/misc/postgresql.conf.sample | 2 | 2 |
| src/backend/utils/sort/tuplesortvariants.c | 12 | 2 |
| src/backend/utils/time/combocid.c | 10 | 8 |
| src/backend/utils/time/snapmgr.c | 13 | 12 |
| src/bin/initdb/initdb.c | 30 | 30 |
| src/bin/initdb/t/001_initdb.pl | 6 | 6 |
| src/bin/pg_amcheck/t/004_verify_heapam.pl | 171 | 39 |
| src/bin/pg_controldata/pg_controldata.c | 1 | 1 |
| src/bin/pg_dump/pg_dump.c | 17 | 10 |
| src/bin/pg_dump/pg_dump.h | 4 | 4 |
| src/bin/pg_resetwal/pg_resetwal.c | 13 | 43 |
| src/bin/pg_upgrade/check.c | 148 | 1 |
| src/bin/pg_upgrade/controldata.c | 9 | 8 |
| src/bin/pg_upgrade/file.c | 98 | 1 |
| src/bin/pg_upgrade/Makefile | 1 | 0 |
| src/bin/pg_upgrade/meson.build | 1 | 0 |
| src/bin/pg_upgrade/pg_upgrade.c | 117 | 28 |
| src/bin/pg_upgrade/pg_upgrade.h | 26 | 8 |
| src/bin/pg_upgrade/relfilenumber.c | 28 | 6 |
| src/bin/pg_upgrade/segresize.c | 586 | 0 |
| src/bin/pg_upgrade/t/002_pg_upgrade.pl | 19 | 1 |
| src/bin/pg_upgrade/version.c | 86 | 18 |
| src/bin/pg_verifybackup/t/003_corruption.pl | 1 | 1 |
| src/bin/pg_waldump/pg_waldump.c | 1 | 1 |
| src/include/access/clog.h | 1 | 1 |
| src/include/access/ginblock.h | 9 | 2 |
| src/include/access/gist.h | 1 | 1 |
| src/include/access/heapam.h | 13 | 7 |
| src/include/access/heapam_xlog.h | 33 | 5 |
| src/include/access/heaptoast.h | 10 | 1 |
| src/include/access/htup_details.h | 201 | 36 |
| src/include/access/htup.h | 13 | 5 |
| src/include/access/multixact.h | 5 | 6 |
| src/include/access/nbtree.h | 10 | 0 |
| src/include/access/rewriteheap.h | 2 | 2 |
| src/include/access/rmgrlist.h | 1 | 0 |
| src/include/access/slru.h | 1 | 9 |
| src/include/access/tableam.h | 1 | 1 |
| src/include/access/transam.h | 54 | 35 |
| src/include/access/tupmacs.h | 2 | 1 |
| src/include/access/xact.h | 9 | 4 |
| src/include/access/xloginsert.h | 1 | 0 |
| src/include/access/xlogreader.h | 0 | 4 |
| src/include/access/xlogrecord.h | 2 | 3 |
| src/include/catalog/catversion.h | 2 | 1 |
| src/include/catalog/pg_amproc.dat | 2 | 2 |
| src/include/catalog/pg_control.h | 6 | 0 |
| src/include/catalog/pg_operator.dat | 4 | 4 |
| src/include/catalog/pg_proc.dat | 6 | 6 |
| src/include/catalog/pg_type.dat | 2 | 2 |
| src/include/catalog/pg_type.h | 5 | 0 |
| src/include/c.h | 20 | 7 |
| src/include/commands/vacuum.h | 15 | 15 |
| src/include/fmgr.h | 2 | 0 |
| src/include/nodes/pg_list.h | 4 | 0 |
| src/include/pg_config.h.in | 3 | 0 |
| src/include/port/pg_lfind.h | 107 | 56 |
| src/include/postgres.h | 6 | 3 |
| src/include/postmaster/autovacuum.h | 2 | 2 |
| src/include/storage/buf_internals.h | 3 | 2 |
| src/include/storage/bufmgr.h | 6 | 0 |
| src/include/storage/bufpage.h | 219 | 13 |
| src/include/storage/itemid.h | 2 | 0 |
| src/include/storage/lock.h | 7 | 7 |
| src/include/storage/proc.h | 4 | 3 |
| src/include/storage/standby.h | 1 | 1 |
| src/include/utils/combocid.h | 1 | 1 |
| src/include/utils/rel.h | 6 | 6 |
| src/include/utils/xid8.h | 2 | 2 |
| src/pl/plperl/plperl.c | 2 | 2 |
| src/pl/plpgsql/src/pl_comp.c | 2 | 2 |
| src/pl/plpgsql/src/pl_exec.c | 2 | 0 |
| src/pl/plpython/plpy_procedure.c | 2 | 2 |
| src/pl/tcl/pltcl.c | 2 | 2 |
| src/test/Makefile | 2 | 1 |
| src/test/meson.build | 1 | 0 |
| src/test/modules/test_lfind/test_lfind.c | 15 | 15 |
| src/test/perl/PostgreSQL/Test/Cluster.pm | 3 | 1 |
| src/test/recovery/t/003_recovery_targets.pl | 1 | 1 |
| src/test/regress/expected/indirect_toast.out | 8 | 0 |
| src/test/regress/expected/insert.out | 15 | 1 |
| src/test/regress/expected/opr_sanity.out | 3 | 3 |
| src/test/regress/expected/select_views.out | 35 | 35 |
| src/test/regress/expected/txid.out | 5 | 3 |
| src/test/regress/expected/type_sanity.out | 3 | 2 |
| src/test/regress/expected/xid64.out | 92 | 0 |
| src/test/regress/expected/xid.out | 8 | 6 |
| src/test/regress/parallel_schedule | 1 | 1 |
| src/test/regress/pg_regress.c | 1 | 1 |
| src/test/regress/regress.c | 291 | 0 |
| src/test/regress/sql/indirect_toast.sql | 11 | 0 |
| src/test/regress/sql/insert.sql | 16 | 1 |
| src/test/regress/sql/select_views.sql | 1 | 1 |
| src/test/regress/sql/type_sanity.sql | 3 | 2 |
| src/test/regress/sql/xid64.sql | 84 | 0 |
| src/test/xid-64/Makefile | 22 | 0 |
| src/test/xid-64/meson.build | 15 | 0 |
| src/test/xid-64/README | 16 | 0 |
| src/test/xid-64/t/001_test_large_xids.pl | 54 | 0 |
| src/test/xid-64/t/002_test_gucs.pl | 79 | 0 |
| src/test/xid-64/t/003_test_integrity.pl | 58 | 0 |
| src/test/xid-64/t/004_test_relminmxid.pl | 90 | 0 |
| src/test/xid-64/t/005_stream_subxact.pl | 100 | 0 |
| src/test/xid-64/t/006_zeropage.pl | 33 | 0 |
| src/tools/msvc/Solution.pm | 1 | 0 |
| src/tools/pgindent/typedefs.list | 2 | 2 |
From 3140bbb4fa49c5d6f14f99b9910dea3e7fb0d970 Mon Sep 17 00:00:00 2001
From: Maxim Orlov <m.orlov@postgrespro.ru>
Date: Fri, 11 Mar 2022 11:37:29 +0300
Subject: [PATCH v47 8/8] Use 64-bit XIDs
- change TransactionId to 64bit
- disk tuple format (HeapTupleHeader) is (almost) unchanged: xmin and xmax remains 32bit
-- now 32bit xid is named ShortTransactionId
- heap page format is changed to contain xid and multixact base value, tuple's
xmin and xmax are offsets from.
-- xid_base and multi_base are stored as a page special data. PageHeader remains unmodified.
- in-memory tuple (HeapTuple) were enriched with precalulated 64bit xmin/xmax.
Authors:
- Alexander Korotkov <aekorotkov@gmail.com>
- Teodor Sigaev <teodor@sigaev.ru>
- Nikita Glukhov <n.gluhov@postgrespro.ru>
- Maxim Orlov <orlovmg@gmail.com>
- Pavel Borisov <pashkin.elfe@gmail.com>
- Yura Sokolov <y.sokolov@postgrespro.ru> <funny.falcon@gmail.com>
- Aleksander Alekseev <aleksander@timescale.com>
Discussion: https://postgr.es/m/CACG%3DezZe1NQSCnfHOr78AtAZxJZeCvxrts0ygrxYwe%3DpyyjVWA%40mail.gmail.com
Discussion: https://postgr.es/m/CAJ7c6TPDOYBYrnCAeyndkBktO0WG2xSdYduTF0nxq%2BvfkmTF5Q%40mail.gmail.com
---
contrib/amcheck/verify_heapam.c | 79 +-
contrib/amcheck/verify_nbtree.c | 2 +-
contrib/hstore/hstore_io.c | 2 +
contrib/pageinspect/Makefile | 3 +-
contrib/pageinspect/btreefuncs.c | 16 +-
contrib/pageinspect/expected/btree.out | 4 +-
contrib/pageinspect/expected/hash_1.out | 166 +++
.../pageinspect/expected/oldextversions.out | 10 +-
contrib/pageinspect/expected/page.out | 28 +-
contrib/pageinspect/heapfuncs.c | 9 +-
contrib/pageinspect/meson.build | 1 +
.../pageinspect/pageinspect--1.10--1.11.sql | 145 ++
contrib/pageinspect/pageinspect--1.5.sql | 2 +
contrib/pageinspect/pageinspect.control | 2 +-
contrib/pageinspect/rawpage.c | 35 +-
contrib/pageinspect/sql/btree.sql | 3 +-
contrib/pg_surgery/heap_surgery.c | 22 +-
.../pg_visibility/expected/pg_visibility.out | 17 +
contrib/pg_visibility/pg_visibility.c | 7 +-
contrib/pg_visibility/sql/pg_visibility.sql | 18 +
contrib/pgrowlocks/pgrowlocks.c | 2 +-
contrib/pgstattuple/pgstatapprox.c | 2 +
contrib/pgstattuple/pgstatindex.c | 2 +-
.../postgres_fdw/expected/postgres_fdw.out | 55 +-
contrib/postgres_fdw/postgres_fdw.c | 9 +-
contrib/postgres_fdw/sql/postgres_fdw.sql | 15 +-
src/backend/access/common/heaptuple.c | 8 +-
src/backend/access/common/reloptions.c | 118 +-
src/backend/access/hash/hashvalidate.c | 5 +-
src/backend/access/heap/heapam.c | 1256 +++++++++++++++--
src/backend/access/heap/heapam_handler.c | 53 +-
src/backend/access/heap/heapam_visibility.c | 173 +--
src/backend/access/heap/heaptoast.c | 3 +
src/backend/access/heap/hio.c | 46 +-
src/backend/access/heap/pruneheap.c | 92 +-
src/backend/access/heap/rewriteheap.c | 101 +-
src/backend/access/heap/vacuumlazy.c | 150 +-
src/backend/access/nbtree/nbtpage.c | 2 +
src/backend/access/nbtree/nbtsplitloc.c | 16 +-
src/backend/access/nbtree/nbtxlog.c | 2 +
src/backend/access/rmgrdesc/gistdesc.c | 4 +-
src/backend/access/rmgrdesc/heapdesc.c | 32 +
src/backend/access/rmgrdesc/mxactdesc.c | 9 +-
src/backend/access/rmgrdesc/nbtdesc.c | 4 +-
src/backend/access/rmgrdesc/xactdesc.c | 6 +-
src/backend/access/rmgrdesc/xlogdesc.c | 6 +-
src/backend/access/transam/clog.c | 24 +-
src/backend/access/transam/commit_ts.c | 19 -
src/backend/access/transam/multixact.c | 686 +--------
src/backend/access/transam/slru.c | 13 +-
src/backend/access/transam/subtrans.c | 9 +-
src/backend/access/transam/transam.c | 18 +-
src/backend/access/transam/twophase.c | 15 +-
src/backend/access/transam/varsup.c | 161 +--
src/backend/access/transam/xact.c | 36 +-
src/backend/access/transam/xlog.c | 10 +-
src/backend/access/transam/xloginsert.c | 7 +
src/backend/access/transam/xlogreader.c | 34 -
src/backend/access/transam/xlogrecovery.c | 2 +-
src/backend/bootstrap/bootstrap.c | 26 +-
src/backend/catalog/heap.c | 8 +-
src/backend/catalog/pg_inherits.c | 2 +-
src/backend/commands/async.c | 2 +-
src/backend/commands/dbcommands.c | 9 +-
src/backend/commands/indexcmds.c | 6 +-
src/backend/commands/sequence.c | 25 +-
src/backend/commands/vacuum.c | 45 +-
src/backend/executor/execExprInterp.c | 1 +
src/backend/executor/execUtils.c | 1 +
src/backend/executor/nodeModifyTable.c | 1 +
src/backend/executor/spi.c | 1 +
src/backend/nodes/gen_node_support.pl | 6 +-
src/backend/nodes/outfuncs.c | 1 -
src/backend/optimizer/util/plancat.c | 2 +-
src/backend/postmaster/autovacuum.c | 72 +-
src/backend/replication/logical/decode.c | 18 +-
src/backend/replication/logical/proto.c | 50 +-
.../replication/logical/reorderbuffer.c | 17 +-
src/backend/replication/logical/snapbuild.c | 9 +-
src/backend/replication/logical/worker.c | 2 +-
src/backend/replication/pgoutput/pgoutput.c | 3 +-
src/backend/replication/walreceiver.c | 28 +-
src/backend/replication/walsender.c | 73 +-
src/backend/statistics/extended_stats.c | 1 +
src/backend/storage/buffer/Makefile | 3 +-
src/backend/storage/buffer/bufmgr.c | 134 +-
src/backend/storage/buffer/heap_convert.c | 546 +++++++
src/backend/storage/buffer/meson.build | 1 +
src/backend/storage/ipc/procarray.c | 183 +--
src/backend/storage/ipc/sinvaladt.c | 4 +-
src/backend/storage/ipc/standby.c | 6 +-
src/backend/storage/lmgr/lmgr.c | 16 +-
src/backend/storage/lmgr/lock.c | 4 +-
src/backend/storage/lmgr/predicate.c | 6 +-
src/backend/storage/lmgr/proc.c | 11 +-
src/backend/storage/page/bufpage.c | 254 +++-
src/backend/tcop/postgres.c | 25 +-
src/backend/utils/adt/enum.c | 2 +-
src/backend/utils/adt/jsonfuncs.c | 1 +
src/backend/utils/adt/lockfuncs.c | 9 +-
src/backend/utils/adt/pgstatfuncs.c | 1 +
src/backend/utils/adt/rowtypes.c | 12 +
src/backend/utils/adt/xid.c | 37 +-
src/backend/utils/adt/xid8funcs.c | 83 +-
src/backend/utils/cache/catcache.c | 1 +
src/backend/utils/cache/relcache.c | 3 +-
src/backend/utils/fmgr/fmgr.c | 4 +-
src/backend/utils/misc/guc.c | 11 +
src/backend/utils/misc/guc_tables.c | 180 +--
src/backend/utils/misc/help_config.c | 8 +-
src/backend/utils/misc/pg_controldata.c | 2 +-
src/backend/utils/misc/postgresql.conf.sample | 4 +-
src/backend/utils/sort/tuplesortvariants.c | 14 +-
src/backend/utils/time/combocid.c | 18 +-
src/backend/utils/time/snapmgr.c | 25 +-
src/bin/initdb/initdb.c | 60 +-
src/bin/initdb/t/001_initdb.pl | 12 +-
src/bin/pg_amcheck/t/004_verify_heapam.pl | 210 ++-
src/bin/pg_controldata/pg_controldata.c | 2 +-
src/bin/pg_dump/pg_dump.c | 27 +-
src/bin/pg_dump/pg_dump.h | 8 +-
src/bin/pg_resetwal/pg_resetwal.c | 56 +-
src/bin/pg_upgrade/Makefile | 1 +
src/bin/pg_upgrade/check.c | 149 +-
src/bin/pg_upgrade/controldata.c | 17 +-
src/bin/pg_upgrade/file.c | 99 +-
src/bin/pg_upgrade/meson.build | 1 +
src/bin/pg_upgrade/pg_upgrade.c | 145 +-
src/bin/pg_upgrade/pg_upgrade.h | 34 +-
src/bin/pg_upgrade/relfilenumber.c | 34 +-
src/bin/pg_upgrade/segresize.c | 586 ++++++++
src/bin/pg_upgrade/t/002_pg_upgrade.pl | 20 +-
src/bin/pg_upgrade/version.c | 104 +-
src/bin/pg_verifybackup/t/003_corruption.pl | 2 +-
src/bin/pg_waldump/pg_waldump.c | 2 +-
src/include/access/clog.h | 2 +-
src/include/access/ginblock.h | 11 +-
src/include/access/gist.h | 2 +-
src/include/access/heapam.h | 20 +-
src/include/access/heapam_xlog.h | 38 +-
src/include/access/heaptoast.h | 11 +-
src/include/access/htup.h | 18 +-
src/include/access/htup_details.h | 237 +++-
src/include/access/multixact.h | 11 +-
src/include/access/nbtree.h | 10 +
src/include/access/rewriteheap.h | 4 +-
src/include/access/rmgrlist.h | 1 +
src/include/access/slru.h | 10 +-
src/include/access/tableam.h | 2 +-
src/include/access/transam.h | 89 +-
src/include/access/tupmacs.h | 3 +-
src/include/access/xact.h | 13 +-
src/include/access/xloginsert.h | 1 +
src/include/access/xlogreader.h | 4 -
src/include/access/xlogrecord.h | 5 +-
src/include/c.h | 27 +-
src/include/catalog/catversion.h | 3 +-
src/include/catalog/pg_amproc.dat | 4 +-
src/include/catalog/pg_control.h | 6 +
src/include/catalog/pg_operator.dat | 8 +-
src/include/catalog/pg_proc.dat | 12 +-
src/include/catalog/pg_type.dat | 4 +-
src/include/catalog/pg_type.h | 5 +
src/include/commands/vacuum.h | 30 +-
src/include/fmgr.h | 2 +
src/include/nodes/pg_list.h | 4 +
src/include/pg_config.h.in | 3 +
src/include/port/pg_lfind.h | 163 ++-
src/include/postgres.h | 9 +-
src/include/postmaster/autovacuum.h | 4 +-
src/include/storage/buf_internals.h | 5 +-
src/include/storage/bufmgr.h | 6 +
src/include/storage/bufpage.h | 232 ++-
src/include/storage/itemid.h | 2 +
src/include/storage/lock.h | 14 +-
src/include/storage/proc.h | 7 +-
src/include/storage/standby.h | 2 +-
src/include/utils/combocid.h | 2 +-
src/include/utils/rel.h | 12 +-
src/include/utils/xid8.h | 4 +-
src/pl/plperl/plperl.c | 4 +-
src/pl/plpgsql/src/pl_comp.c | 4 +-
src/pl/plpgsql/src/pl_exec.c | 2 +
src/pl/plpython/plpy_procedure.c | 4 +-
src/pl/tcl/pltcl.c | 4 +-
src/test/Makefile | 3 +-
src/test/meson.build | 1 +
src/test/modules/test_lfind/test_lfind.c | 30 +-
src/test/perl/PostgreSQL/Test/Cluster.pm | 4 +-
src/test/recovery/t/003_recovery_targets.pl | 2 +-
src/test/regress/expected/indirect_toast.out | 8 +
src/test/regress/expected/insert.out | 16 +-
src/test/regress/expected/opr_sanity.out | 6 +-
src/test/regress/expected/select_views.out | 70 +-
src/test/regress/expected/txid.out | 8 +-
src/test/regress/expected/type_sanity.out | 5 +-
src/test/regress/expected/xid.out | 14 +-
src/test/regress/expected/xid64.out | 92 ++
src/test/regress/parallel_schedule | 2 +-
src/test/regress/pg_regress.c | 2 +-
src/test/regress/regress.c | 291 ++++
src/test/regress/sql/indirect_toast.sql | 11 +
src/test/regress/sql/insert.sql | 17 +-
src/test/regress/sql/select_views.sql | 2 +-
src/test/regress/sql/type_sanity.sql | 5 +-
src/test/regress/sql/xid64.sql | 84 ++
src/test/xid-64/Makefile | 22 +
src/test/xid-64/README | 16 +
src/test/xid-64/meson.build | 15 +
src/test/xid-64/t/001_test_large_xids.pl | 54 +
src/test/xid-64/t/002_test_gucs.pl | 79 ++
src/test/xid-64/t/003_test_integrity.pl | 58 +
src/test/xid-64/t/004_test_relminmxid.pl | 90 ++
src/test/xid-64/t/005_stream_subxact.pl | 100 ++
src/test/xid-64/t/006_zeropage.pl | 33 +
src/tools/msvc/Solution.pm | 1 +
src/tools/pgindent/typedefs.list | 4 +-
217 files changed, 6981 insertions(+), 2606 deletions(-)
create mode 100644 contrib/pageinspect/expected/hash_1.out
create mode 100644 contrib/pageinspect/pageinspect--1.10--1.11.sql
create mode 100644 src/backend/storage/buffer/heap_convert.c
create mode 100644 src/bin/pg_upgrade/segresize.c
create mode 100644 src/test/regress/expected/xid64.out
create mode 100644 src/test/regress/sql/xid64.sql
create mode 100644 src/test/xid-64/Makefile
create mode 100644 src/test/xid-64/README
create mode 100644 src/test/xid-64/meson.build
create mode 100644 src/test/xid-64/t/001_test_large_xids.pl
create mode 100644 src/test/xid-64/t/002_test_gucs.pl
create mode 100644 src/test/xid-64/t/003_test_integrity.pl
create mode 100644 src/test/xid-64/t/004_test_relminmxid.pl
create mode 100644 src/test/xid-64/t/005_stream_subxact.pl
create mode 100644 src/test/xid-64/t/006_zeropage.pl
diff --git a/contrib/amcheck/verify_heapam.c b/contrib/amcheck/verify_heapam.c
index c9e71e4e50..63e043e6dd 100644
--- a/contrib/amcheck/verify_heapam.c
+++ b/contrib/amcheck/verify_heapam.c
@@ -17,6 +17,7 @@
#include "access/multixact.h"
#include "access/toast_internals.h"
#include "access/visibilitymap.h"
+#include "catalog/catalog.h"
#include "catalog/pg_am.h"
#include "funcapi.h"
#include "miscadmin.h"
@@ -85,7 +86,7 @@ typedef struct HeapCheckContext
* from them.
*/
FullTransactionId next_fxid; /* ShmemVariableCache->nextXid */
- TransactionId next_xid; /* 32-bit version of next_fxid */
+ TransactionId next_xid; /* 64-bit version of next_fxid */
TransactionId oldest_xid; /* ShmemVariableCache->oldestXid */
FullTransactionId oldest_fxid; /* 64-bit version of oldest_xid, computed
* relative to next_fxid */
@@ -126,6 +127,7 @@ typedef struct HeapCheckContext
uint16 lp_len;
uint16 lp_off;
HeapTupleHeader tuphdr;
+ HeapTupleData tuple;
int natts;
/* Values for iterating over attributes within the tuple */
@@ -165,8 +167,6 @@ static bool check_tuple_visibility(HeapCheckContext *ctx);
static void report_corruption(HeapCheckContext *ctx, char *msg);
static void report_toast_corruption(HeapCheckContext *ctx,
ToastedAttribute *ta, char *msg);
-static FullTransactionId FullTransactionIdFromXidAndCtx(TransactionId xid,
- const HeapCheckContext *ctx);
static void update_cached_xid_range(HeapCheckContext *ctx);
static void update_cached_mxid_range(HeapCheckContext *ctx);
static XidBoundsViolation check_mxid_in_range(MultiXactId mxid,
@@ -390,7 +390,7 @@ verify_heapam(PG_FUNCTION_ARGS)
update_cached_xid_range(&ctx);
update_cached_mxid_range(&ctx);
ctx.relfrozenxid = ctx.rel->rd_rel->relfrozenxid;
- ctx.relfrozenfxid = FullTransactionIdFromXidAndCtx(ctx.relfrozenxid, &ctx);
+ ctx.relfrozenfxid = FullTransactionIdFromXid(ctx.relfrozenxid);
ctx.relminmxid = ctx.rel->rd_rel->relminmxid;
if (TransactionIdIsNormal(ctx.relfrozenxid))
@@ -505,6 +505,12 @@ verify_heapam(PG_FUNCTION_ARGS)
ctx.tuphdr = (HeapTupleHeader) PageGetItem(ctx.page, ctx.itemid);
ctx.natts = HeapTupleHeaderGetNatts(ctx.tuphdr);
+ ctx.tuple.t_data = ctx.tuphdr;
+ ctx.tuple.t_len = ItemIdGetLength(ctx.itemid);
+ ctx.tuple.t_tableOid = RelationGetRelid(ctx.rel);
+ HeapTupleCopyBaseFromPage(ctx.buffer, &ctx.tuple, ctx.page,
+ IsToastRelation(ctx.rel));
+
/* Ok, ready to check this next tuple */
check_tuple(&ctx);
}
@@ -728,12 +734,13 @@ check_tuple_visibility(HeapCheckContext *ctx)
XidCommitStatus xmin_status;
XidCommitStatus xvac_status;
XidCommitStatus xmax_status;
+ HeapTuple tuple = &ctx->tuple;
HeapTupleHeader tuphdr = ctx->tuphdr;
ctx->tuple_could_be_pruned = true; /* have not yet proven otherwise */
/* If xmin is normal, it should be within valid range */
- xmin = HeapTupleHeaderGetXmin(tuphdr);
+ xmin = HeapTupleGetXmin(tuple);
switch (get_xid_status(xmin, ctx, &xmin_status))
{
case XID_INVALID:
@@ -743,19 +750,19 @@ check_tuple_visibility(HeapCheckContext *ctx)
report_corruption(ctx,
psprintf("xmin %llu equals or exceeds next valid transaction ID %llu",
(unsigned long long) xmin,
- (unsigned long long) U64FromFullTransactionId(ctx->next_fxid)));
+ (unsigned long long) XidFromFullTransactionId(ctx->next_fxid)));
return false;
case XID_PRECEDES_CLUSTERMIN:
report_corruption(ctx,
psprintf("xmin %llu precedes oldest valid transaction ID %llu",
(unsigned long long) xmin,
- (unsigned long long) U64FromFullTransactionId(ctx->oldest_fxid)));
+ (unsigned long long) XidFromFullTransactionId(ctx->oldest_fxid)));
return false;
case XID_PRECEDES_RELMIN:
report_corruption(ctx,
psprintf("xmin %llu precedes relation freeze threshold %llu",
(unsigned long long) xmin,
- (unsigned long long) U64FromFullTransactionId(ctx->relfrozenfxid)));
+ (unsigned long long) XidFromFullTransactionId(ctx->relfrozenfxid)));
return false;
}
@@ -781,19 +788,19 @@ check_tuple_visibility(HeapCheckContext *ctx)
report_corruption(ctx,
psprintf("old-style VACUUM FULL transaction ID %llu for moved off tuple equals or exceeds next valid transaction ID %llu",
(unsigned long long) xvac,
- (unsigned long long) U64FromFullTransactionId(ctx->next_fxid)));
+ (unsigned long long) XidFromFullTransactionId(ctx->next_fxid)));
return false;
case XID_PRECEDES_RELMIN:
report_corruption(ctx,
psprintf("old-style VACUUM FULL transaction ID %llu for moved off tuple precedes relation freeze threshold %llu",
(unsigned long long) xvac,
- (unsigned long long) U64FromFullTransactionId(ctx->relfrozenfxid)));
+ (unsigned long long) XidFromFullTransactionId(ctx->relfrozenfxid)));
return false;
case XID_PRECEDES_CLUSTERMIN:
report_corruption(ctx,
psprintf("old-style VACUUM FULL transaction ID %llu for moved off tuple precedes oldest valid transaction ID %llu",
(unsigned long long) xvac,
- (unsigned long long) U64FromFullTransactionId(ctx->oldest_fxid)));
+ (unsigned long long) XidFromFullTransactionId(ctx->oldest_fxid)));
return false;
case XID_BOUNDS_OK:
break;
@@ -847,19 +854,19 @@ check_tuple_visibility(HeapCheckContext *ctx)
report_corruption(ctx,
psprintf("old-style VACUUM FULL transaction ID %llu for moved in tuple equals or exceeds next valid transaction ID %llu",
(unsigned long long) xvac,
- (unsigned long long) U64FromFullTransactionId(ctx->next_fxid)));
+ (unsigned long long) XidFromFullTransactionId(ctx->next_fxid)));
return false;
case XID_PRECEDES_RELMIN:
report_corruption(ctx,
psprintf("old-style VACUUM FULL transaction ID %llu for moved in tuple precedes relation freeze threshold %llu",
(unsigned long long) xvac,
- (unsigned long long) U64FromFullTransactionId(ctx->relfrozenfxid)));
+ (unsigned long long) XidFromFullTransactionId(ctx->relfrozenfxid)));
return false;
case XID_PRECEDES_CLUSTERMIN:
report_corruption(ctx,
psprintf("old-style VACUUM FULL transaction ID %llu for moved in tuple precedes oldest valid transaction ID %llu",
(unsigned long long) xvac,
- (unsigned long long) U64FromFullTransactionId(ctx->oldest_fxid)));
+ (unsigned long long) XidFromFullTransactionId(ctx->oldest_fxid)));
return false;
case XID_BOUNDS_OK:
break;
@@ -936,7 +943,7 @@ check_tuple_visibility(HeapCheckContext *ctx)
* HEAP_XMAX_IS_LOCKED_ONLY is true, but for now we err on the side of
* avoiding possibly-bogus complaints about missing TOAST entries.
*/
- xmax = HeapTupleHeaderGetRawXmax(tuphdr);
+ xmax = HeapTupleGetRawXmax(tuple);
switch (check_mxid_valid_in_rel(xmax, ctx))
{
case XID_INVALID:
@@ -995,7 +1002,7 @@ check_tuple_visibility(HeapCheckContext *ctx)
* We already checked above that this multixact is within limits for
* this table. Now check the update xid from this multixact.
*/
- xmax = HeapTupleGetUpdateXid(tuphdr);
+ xmax = HeapTupleGetUpdateXid(tuple);
switch (get_xid_status(xmax, ctx, &xmax_status))
{
case XID_INVALID:
@@ -1007,19 +1014,19 @@ check_tuple_visibility(HeapCheckContext *ctx)
report_corruption(ctx,
psprintf("update xid %llu equals or exceeds next valid transaction ID %llu",
(unsigned long long) xmax,
- (unsigned long long) U64FromFullTransactionId(ctx->next_fxid)));
+ (unsigned long long) XidFromFullTransactionId(ctx->next_fxid)));
return true;
case XID_PRECEDES_RELMIN:
report_corruption(ctx,
psprintf("update xid %llu precedes relation freeze threshold %llu",
(unsigned long long) xmax,
- (unsigned long long) U64FromFullTransactionId(ctx->relfrozenfxid)));
+ (unsigned long long) XidFromFullTransactionId(ctx->relfrozenfxid)));
return true;
case XID_PRECEDES_CLUSTERMIN:
report_corruption(ctx,
psprintf("update xid %llu precedes oldest valid transaction ID %llu",
(unsigned long long) xmax,
- (unsigned long long) U64FromFullTransactionId(ctx->oldest_fxid)));
+ (unsigned long long) XidFromFullTransactionId(ctx->oldest_fxid)));
return true;
case XID_BOUNDS_OK:
break;
@@ -1059,26 +1066,26 @@ check_tuple_visibility(HeapCheckContext *ctx)
}
/* xmax is an XID, not a MXID. Sanity check it. */
- xmax = HeapTupleHeaderGetRawXmax(tuphdr);
+ xmax = HeapTupleGetRawXmax(tuple);
switch (get_xid_status(xmax, ctx, &xmax_status))
{
case XID_IN_FUTURE:
report_corruption(ctx,
psprintf("xmax %llu equals or exceeds next valid transaction ID %llu",
(unsigned long long) xmax,
- (unsigned long long) U64FromFullTransactionId(ctx->next_fxid)));
+ (unsigned long long) XidFromFullTransactionId(ctx->next_fxid)));
return false; /* corrupt */
case XID_PRECEDES_RELMIN:
report_corruption(ctx,
psprintf("xmax %llu precedes relation freeze threshold %llu",
(unsigned long long) xmax,
- (unsigned long long) U64FromFullTransactionId(ctx->relfrozenfxid)));
+ (unsigned long long) XidFromFullTransactionId(ctx->relfrozenfxid)));
return false; /* corrupt */
case XID_PRECEDES_CLUSTERMIN:
report_corruption(ctx,
psprintf("xmax %llu precedes oldest valid transaction ID %llu",
(unsigned long long) xmax,
- (unsigned long long) U64FromFullTransactionId(ctx->oldest_fxid)));
+ (unsigned long long) XidFromFullTransactionId(ctx->oldest_fxid)));
return false; /* corrupt */
case XID_BOUNDS_OK:
case XID_INVALID:
@@ -1553,24 +1560,6 @@ check_tuple(HeapCheckContext *ctx)
ctx->attnum = -1;
}
-/*
- * Convert a TransactionId into a FullTransactionId using our cached values of
- * the valid transaction ID range. It is the caller's responsibility to have
- * already updated the cached values, if necessary.
- */
-static FullTransactionId
-FullTransactionIdFromXidAndCtx(TransactionId xid, const HeapCheckContext *ctx)
-{
- uint32 epoch;
-
- if (!TransactionIdIsNormal(xid))
- return FullTransactionIdFromEpochAndXid(0, xid);
- epoch = EpochFromFullTransactionId(ctx->next_fxid);
- if (xid > ctx->next_xid)
- epoch--;
- return FullTransactionIdFromEpochAndXid(epoch, xid);
-}
-
/*
* Update our cached range of valid transaction IDs.
*/
@@ -1584,7 +1573,7 @@ update_cached_xid_range(HeapCheckContext *ctx)
LWLockRelease(XidGenLock);
/* And compute alternate versions of the same */
- ctx->oldest_fxid = FullTransactionIdFromXidAndCtx(ctx->oldest_xid, ctx);
+ ctx->oldest_fxid = FullTransactionIdFromXid(ctx->oldest_xid);
ctx->next_xid = XidFromFullTransactionId(ctx->next_fxid);
}
@@ -1684,7 +1673,7 @@ get_xid_status(TransactionId xid, HeapCheckContext *ctx,
}
/* Check if the xid is within bounds */
- fxid = FullTransactionIdFromXidAndCtx(xid, ctx);
+ fxid = FullTransactionIdFromXid(xid);
if (!fxid_in_cached_range(fxid, ctx))
{
/*
@@ -1693,7 +1682,6 @@ get_xid_status(TransactionId xid, HeapCheckContext *ctx,
* performed the full xid conversion, reconvert.
*/
update_cached_xid_range(ctx);
- fxid = FullTransactionIdFromXidAndCtx(xid, ctx);
}
if (FullTransactionIdPrecedesOrEquals(ctx->next_fxid, fxid))
@@ -1717,8 +1705,7 @@ get_xid_status(TransactionId xid, HeapCheckContext *ctx,
*status = XID_COMMITTED;
LWLockAcquire(XactTruncationLock, LW_SHARED);
clog_horizon =
- FullTransactionIdFromXidAndCtx(ShmemVariableCache->oldestClogXid,
- ctx);
+ FullTransactionIdFromXid(ShmemVariableCache->oldestClogXid);
if (FullTransactionIdPrecedesOrEquals(clog_horizon, fxid))
{
if (TransactionIdIsCurrentTransactionId(xid))
diff --git a/contrib/amcheck/verify_nbtree.c b/contrib/amcheck/verify_nbtree.c
index 9021d156eb..d2720124d7 100644
--- a/contrib/amcheck/verify_nbtree.c
+++ b/contrib/amcheck/verify_nbtree.c
@@ -526,7 +526,7 @@ bt_check_every_level(Relation rel, Relation heaprel, bool heapkeyspace,
* avoid this.
*/
if (IsolationUsesXactSnapshot() && rel->rd_index->indcheckxmin &&
- !TransactionIdPrecedes(HeapTupleHeaderGetXmin(rel->rd_indextuple->t_data),
+ !TransactionIdPrecedes(HeapTupleGetXmin(rel->rd_indextuple),
snapshot->xmin))
ereport(ERROR,
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
diff --git a/contrib/hstore/hstore_io.c b/contrib/hstore/hstore_io.c
index 6161df2790..ed4dcbe9a3 100644
--- a/contrib/hstore/hstore_io.c
+++ b/contrib/hstore/hstore_io.c
@@ -853,6 +853,7 @@ hstore_from_record(PG_FUNCTION_ARGS)
ItemPointerSetInvalid(&(tuple.t_self));
tuple.t_tableOid = InvalidOid;
tuple.t_data = rec;
+ HeapTupleSetZeroBase(&tuple);
values = (Datum *) palloc(ncolumns * sizeof(Datum));
nulls = (bool *) palloc(ncolumns * sizeof(bool));
@@ -1006,6 +1007,7 @@ hstore_populate_record(PG_FUNCTION_ARGS)
ItemPointerSetInvalid(&(tuple.t_self));
tuple.t_tableOid = InvalidOid;
tuple.t_data = rec;
+ HeapTupleSetZeroBase(&tuple);
}
/*
diff --git a/contrib/pageinspect/Makefile b/contrib/pageinspect/Makefile
index 5c0736564a..5ca80c9d76 100644
--- a/contrib/pageinspect/Makefile
+++ b/contrib/pageinspect/Makefile
@@ -13,7 +13,8 @@ OBJS = \
rawpage.o
EXTENSION = pageinspect
-DATA = pageinspect--1.9--1.10.sql pageinspect--1.8--1.9.sql \
+DATA = pageinspect--1.10--1.11.sql \
+ pageinspect--1.9--1.10.sql pageinspect--1.8--1.9.sql \
pageinspect--1.7--1.8.sql pageinspect--1.6--1.7.sql \
pageinspect--1.5.sql pageinspect--1.5--1.6.sql \
pageinspect--1.4--1.5.sql pageinspect--1.3--1.4.sql \
diff --git a/contrib/pageinspect/btreefuncs.c b/contrib/pageinspect/btreefuncs.c
index b18aa0af7f..7b3a57dcf5 100644
--- a/contrib/pageinspect/btreefuncs.c
+++ b/contrib/pageinspect/btreefuncs.c
@@ -105,6 +105,9 @@ GetBTPageStatistics(BlockNumber blkno, Buffer buffer, BTPageStat *stat)
stat->page_size = PageGetPageSize(page);
+ stat->btpo_prev = opaque->btpo_prev;
+ stat->btpo_level = opaque->btpo_level;
+
/* page type (flags) */
if (P_ISDELETED(opaque))
{
@@ -126,11 +129,18 @@ GetBTPageStatistics(BlockNumber blkno, Buffer buffer, BTPageStat *stat)
FullTransactionId safexid = BTPageGetDeleteXid(page);
elog(DEBUG2, "deleted page from block %u has safexid %llu",
- blkno, (unsigned long long) U64FromFullTransactionId(safexid));
+ blkno, (unsigned long long) XidFromFullTransactionId(safexid));
}
else
+ {
+ ShortTransactionId safexid = BTP_GET_XACT(opaque);
+
+ stat->btpo_prev = 0;
+ stat->btpo_level = 0;
+
elog(DEBUG2, "deleted page from block %u has safexid %u",
- blkno, opaque->btpo_level);
+ blkno, safexid);
+ }
/* Don't interpret BTDeletedPageData as index tuples */
maxoff = InvalidOffsetNumber;
@@ -145,9 +155,7 @@ GetBTPageStatistics(BlockNumber blkno, Buffer buffer, BTPageStat *stat)
stat->type = 'i';
/* btpage opaque data */
- stat->btpo_prev = opaque->btpo_prev;
stat->btpo_next = opaque->btpo_next;
- stat->btpo_level = opaque->btpo_level;
stat->btpo_flags = opaque->btpo_flags;
stat->btpo_cycleid = opaque->btpo_cycleid;
diff --git a/contrib/pageinspect/expected/btree.out b/contrib/pageinspect/expected/btree.out
index 035a81a759..5fb9122466 100644
--- a/contrib/pageinspect/expected/btree.out
+++ b/contrib/pageinspect/expected/btree.out
@@ -94,8 +94,8 @@ SELECT bt_page_items('aaa'::bytea);
ERROR: invalid page size
-- invalid special area size
CREATE INDEX test1_a_brin ON test1 USING brin(a);
-SELECT bt_page_items(get_raw_page('test1', 0));
-ERROR: input page is not a valid btree page
+-- XXX: false positive in 64xids due to equal sizes of BTPageOpaque and HeapPageSpecialData
+-- SELECT bt_page_items(get_raw_page('test1', 0));
SELECT bt_page_items(get_raw_page('test1_a_brin', 0));
ERROR: input page is not a valid btree page
\set VERBOSITY default
diff --git a/contrib/pageinspect/expected/hash_1.out b/contrib/pageinspect/expected/hash_1.out
new file mode 100644
index 0000000000..5e64eb9260
--- /dev/null
+++ b/contrib/pageinspect/expected/hash_1.out
@@ -0,0 +1,166 @@
+CREATE TABLE test_hash (a int, b text);
+INSERT INTO test_hash VALUES (1, 'one');
+CREATE INDEX test_hash_a_idx ON test_hash USING hash (a);
+\x
+SELECT hash_page_type(get_raw_page('test_hash_a_idx', 0));
+-[ RECORD 1 ]--+---------
+hash_page_type | metapage
+
+SELECT hash_page_type(get_raw_page('test_hash_a_idx', 1));
+-[ RECORD 1 ]--+-------
+hash_page_type | bucket
+
+SELECT hash_page_type(get_raw_page('test_hash_a_idx', 2));
+-[ RECORD 1 ]--+-------
+hash_page_type | bucket
+
+SELECT hash_page_type(get_raw_page('test_hash_a_idx', 3));
+-[ RECORD 1 ]--+-------
+hash_page_type | bucket
+
+SELECT hash_page_type(get_raw_page('test_hash_a_idx', 4));
+-[ RECORD 1 ]--+-------
+hash_page_type | bucket
+
+SELECT hash_page_type(get_raw_page('test_hash_a_idx', 5));
+-[ RECORD 1 ]--+-------
+hash_page_type | bitmap
+
+SELECT hash_page_type(get_raw_page('test_hash_a_idx', 6));
+ERROR: block number 6 is out of range for relation "test_hash_a_idx"
+SELECT * FROM hash_bitmap_info('test_hash_a_idx', -1);
+ERROR: invalid block number
+SELECT * FROM hash_bitmap_info('test_hash_a_idx', 0);
+ERROR: invalid overflow block number 0
+SELECT * FROM hash_bitmap_info('test_hash_a_idx', 1);
+ERROR: invalid overflow block number 1
+SELECT * FROM hash_bitmap_info('test_hash_a_idx', 2);
+ERROR: invalid overflow block number 2
+SELECT * FROM hash_bitmap_info('test_hash_a_idx', 3);
+ERROR: invalid overflow block number 3
+SELECT * FROM hash_bitmap_info('test_hash_a_idx', 4);
+ERROR: invalid overflow block number 4
+SELECT * FROM hash_bitmap_info('test_hash_a_idx', 5);
+ERROR: invalid overflow block number 5
+SELECT * FROM hash_bitmap_info('test_hash_a_idx', 6);
+ERROR: block number 6 is out of range for relation "test_hash_a_idx"
+SELECT magic, version, ntuples, bsize, bmsize, bmshift, maxbucket, highmask,
+lowmask, ovflpoint, firstfree, nmaps, procid, spares, mapp FROM
+hash_metapage_info(get_raw_page('test_hash_a_idx', 0));
+-[ RECORD 1 ]--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+magic | 105121344
+version | 4
+ntuples | 1
+bsize | 8156
+bmsize | 4096
+bmshift | 15
+maxbucket | 3
+highmask | 7
+lowmask | 3
+ovflpoint | 2
+firstfree | 0
+nmaps | 1
+procid | 450
+spares | {0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}
+mapp | {5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}
+
+SELECT magic, version, ntuples, bsize, bmsize, bmshift, maxbucket, highmask,
+lowmask, ovflpoint, firstfree, nmaps, procid, spares, mapp FROM
+hash_metapage_info(get_raw_page('test_hash_a_idx', 1));
+ERROR: page is not a hash meta page
+SELECT magic, version, ntuples, bsize, bmsize, bmshift, maxbucket, highmask,
+lowmask, ovflpoint, firstfree, nmaps, procid, spares, mapp FROM
+hash_metapage_info(get_raw_page('test_hash_a_idx', 2));
+ERROR: page is not a hash meta page
+SELECT magic, version, ntuples, bsize, bmsize, bmshift, maxbucket, highmask,
+lowmask, ovflpoint, firstfree, nmaps, procid, spares, mapp FROM
+hash_metapage_info(get_raw_page('test_hash_a_idx', 3));
+ERROR: page is not a hash meta page
+SELECT magic, version, ntuples, bsize, bmsize, bmshift, maxbucket, highmask,
+lowmask, ovflpoint, firstfree, nmaps, procid, spares, mapp FROM
+hash_metapage_info(get_raw_page('test_hash_a_idx', 4));
+ERROR: page is not a hash meta page
+SELECT magic, version, ntuples, bsize, bmsize, bmshift, maxbucket, highmask,
+lowmask, ovflpoint, firstfree, nmaps, procid, spares, mapp FROM
+hash_metapage_info(get_raw_page('test_hash_a_idx', 5));
+ERROR: page is not a hash meta page
+SELECT live_items, dead_items, page_size, hasho_prevblkno, hasho_nextblkno,
+hasho_bucket, hasho_flag, hasho_page_id FROM
+hash_page_stats(get_raw_page('test_hash_a_idx', 0));
+ERROR: page is not a hash bucket or overflow page
+SELECT live_items, dead_items, page_size, hasho_prevblkno, hasho_nextblkno,
+hasho_bucket, hasho_flag, hasho_page_id FROM
+hash_page_stats(get_raw_page('test_hash_a_idx', 1));
+-[ RECORD 1 ]---+-----------
+live_items | 0
+dead_items | 0
+page_size | 8192
+hasho_prevblkno | 3
+hasho_nextblkno | 4294967295
+hasho_bucket | 0
+hasho_flag | 2
+hasho_page_id | 65408
+
+SELECT live_items, dead_items, page_size, hasho_prevblkno, hasho_nextblkno,
+hasho_bucket, hasho_flag, hasho_page_id FROM
+hash_page_stats(get_raw_page('test_hash_a_idx', 2));
+-[ RECORD 1 ]---+-----------
+live_items | 0
+dead_items | 0
+page_size | 8192
+hasho_prevblkno | 3
+hasho_nextblkno | 4294967295
+hasho_bucket | 1
+hasho_flag | 2
+hasho_page_id | 65408
+
+SELECT live_items, dead_items, page_size, hasho_prevblkno, hasho_nextblkno,
+hasho_bucket, hasho_flag, hasho_page_id FROM
+hash_page_stats(get_raw_page('test_hash_a_idx', 3));
+-[ RECORD 1 ]---+-----------
+live_items | 1
+dead_items | 0
+page_size | 8192
+hasho_prevblkno | 3
+hasho_nextblkno | 4294967295
+hasho_bucket | 2
+hasho_flag | 2
+hasho_page_id | 65408
+
+SELECT live_items, dead_items, page_size, hasho_prevblkno, hasho_nextblkno,
+hasho_bucket, hasho_flag, hasho_page_id FROM
+hash_page_stats(get_raw_page('test_hash_a_idx', 4));
+-[ RECORD 1 ]---+-----------
+live_items | 0
+dead_items | 0
+page_size | 8192
+hasho_prevblkno | 3
+hasho_nextblkno | 4294967295
+hasho_bucket | 3
+hasho_flag | 2
+hasho_page_id | 65408
+
+SELECT live_items, dead_items, page_size, hasho_prevblkno, hasho_nextblkno,
+hasho_bucket, hasho_flag, hasho_page_id FROM
+hash_page_stats(get_raw_page('test_hash_a_idx', 5));
+ERROR: page is not a hash bucket or overflow page
+SELECT * FROM hash_page_items(get_raw_page('test_hash_a_idx', 0));
+ERROR: page is not a hash bucket or overflow page
+SELECT * FROM hash_page_items(get_raw_page('test_hash_a_idx', 1));
+(0 rows)
+
+SELECT * FROM hash_page_items(get_raw_page('test_hash_a_idx', 2));
+(0 rows)
+
+SELECT * FROM hash_page_items(get_raw_page('test_hash_a_idx', 3));
+-[ RECORD 1 ]----------
+itemoffset | 1
+ctid | (0,1)
+data | 2389907270
+
+SELECT * FROM hash_page_items(get_raw_page('test_hash_a_idx', 4));
+(0 rows)
+
+SELECT * FROM hash_page_items(get_raw_page('test_hash_a_idx', 5));
+ERROR: page is not a hash bucket or overflow page
+DROP TABLE test_hash;
diff --git a/contrib/pageinspect/expected/oldextversions.out b/contrib/pageinspect/expected/oldextversions.out
index f5c4b61bd7..00323d392d 100644
--- a/contrib/pageinspect/expected/oldextversions.out
+++ b/contrib/pageinspect/expected/oldextversions.out
@@ -40,16 +40,16 @@ SELECT * FROM bt_page_items('test1_a_idx', 1);
-- pagesize in pageinspect >= 1.10.
ALTER EXTENSION pageinspect UPDATE TO '1.9';
\df page_header
- List of functions
- Schema | Name | Result data type | Argument data types | Type
---------+-------------+------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------
- public | page_header | record | page bytea, OUT lsn pg_lsn, OUT checksum smallint, OUT flags smallint, OUT lower smallint, OUT upper smallint, OUT special smallint, OUT pagesize smallint, OUT version smallint, OUT prune_xid xid | func
+ List of functions
+ Schema | Name | Result data type | Argument data types | Type
+--------+-------------+------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------
+ public | page_header | record | page bytea, OUT lsn pg_lsn, OUT checksum smallint, OUT flags smallint, OUT lower smallint, OUT upper smallint, OUT special smallint, OUT pagesize smallint, OUT version smallint, OUT xid_base xid, OUT multi_base xid, OUT prune_xid xid | func
(1 row)
SELECT pagesize, version FROM page_header(get_raw_page('test1', 0));
pagesize | version
----------+---------
- 8192 | 4
+ 8192 | 5
(1 row)
DROP TABLE test1;
diff --git a/contrib/pageinspect/expected/page.out b/contrib/pageinspect/expected/page.out
index 3bdc37bbf5..5ca00378df 100644
--- a/contrib/pageinspect/expected/page.out
+++ b/contrib/pageinspect/expected/page.out
@@ -48,7 +48,7 @@ SELECT get_raw_page('test1', 0) = get_raw_page('test1', 'main', 0);
SELECT pagesize, version FROM page_header(get_raw_page('test1', 0));
pagesize | version
----------+---------
- 8192 | 4
+ 8192 | 5
(1 row)
SELECT page_checksum(get_raw_page('test1', 0), 0) IS NOT NULL AS silly_checksum_test;
@@ -69,19 +69,19 @@ SELECT tuple_data_split('test1'::regclass, t_data, t_infomask, t_infomask2, t_bi
SELECT * FROM fsm_page_contents(get_raw_page('test1', 'fsm', 0));
fsm_page_contents
-------------------
- 0: 254 +
- 1: 254 +
- 3: 254 +
- 7: 254 +
- 15: 254 +
- 31: 254 +
- 63: 254 +
- 127: 254 +
- 255: 254 +
- 511: 254 +
- 1023: 254 +
- 2047: 254 +
- 4095: 254 +
+ 0: 253 +
+ 1: 253 +
+ 3: 253 +
+ 7: 253 +
+ 15: 253 +
+ 31: 253 +
+ 63: 253 +
+ 127: 253 +
+ 255: 253 +
+ 511: 253 +
+ 1023: 253 +
+ 2047: 253 +
+ 4095: 253 +
fp_next_slot: 0 +
(1 row)
diff --git a/contrib/pageinspect/heapfuncs.c b/contrib/pageinspect/heapfuncs.c
index aed2753253..c65f7099b7 100644
--- a/contrib/pageinspect/heapfuncs.c
+++ b/contrib/pageinspect/heapfuncs.c
@@ -163,7 +163,7 @@ heap_page_items(PG_FUNCTION_ARGS)
inter_call_data->tupd = tupdesc;
inter_call_data->offset = FirstOffsetNumber;
- inter_call_data->page = VARDATA(raw_page);
+ inter_call_data->page = get_page_from_raw(raw_page);
fctx->max_calls = PageGetMaxOffsetNumber(inter_call_data->page);
fctx->user_fctx = inter_call_data;
@@ -211,6 +211,7 @@ heap_page_items(PG_FUNCTION_ARGS)
lp_offset == MAXALIGN(lp_offset) &&
lp_offset + lp_len <= raw_page_size)
{
+ HeapTupleData tup;
HeapTupleHeader tuphdr;
bytea *tuple_data_bytea;
int tuple_data_len;
@@ -218,9 +219,11 @@ heap_page_items(PG_FUNCTION_ARGS)
/* Extract information from the tuple header */
tuphdr = (HeapTupleHeader) PageGetItem(page, id);
+ tup.t_data = tuphdr;
+ HeapTupleCopyBaseFromPage(InvalidBuffer, &tup, page, false);
- values[4] = UInt32GetDatum(HeapTupleHeaderGetRawXmin(tuphdr));
- values[5] = UInt32GetDatum(HeapTupleHeaderGetRawXmax(tuphdr));
+ values[4] = TransactionIdGetDatum(HeapTupleGetXmin(&tup));
+ values[5] = TransactionIdGetDatum(HeapTupleGetRawXmax(&tup));
/* shared with xvac */
values[6] = UInt32GetDatum(HeapTupleHeaderGetRawCommandId(tuphdr));
values[7] = PointerGetDatum(&tuphdr->t_ctid);
diff --git a/contrib/pageinspect/meson.build b/contrib/pageinspect/meson.build
index 4af8153e4f..8af29aa7fb 100644
--- a/contrib/pageinspect/meson.build
+++ b/contrib/pageinspect/meson.build
@@ -27,6 +27,7 @@ install_data(
'pageinspect--1.7--1.8.sql',
'pageinspect--1.8--1.9.sql',
'pageinspect--1.9--1.10.sql',
+ 'pageinspect--1.10--1.11.sql',
'pageinspect.control',
kwargs: contrib_data_args,
)
diff --git a/contrib/pageinspect/pageinspect--1.10--1.11.sql b/contrib/pageinspect/pageinspect--1.10--1.11.sql
new file mode 100644
index 0000000000..236f18aa2f
--- /dev/null
+++ b/contrib/pageinspect/pageinspect--1.10--1.11.sql
@@ -0,0 +1,145 @@
+/* contrib/pageinspect/pageinspect--1.10--1.11.sql */
+
+-- complain if script is sourced in psql, rather than via ALTER EXTENSION
+\echo Use "ALTER EXTENSION pageinspect UPDATE TO '1.11'" to load this file. \quit
+
+--
+-- gist_page_opaque_info()
+--
+DROP FUNCTION gist_page_opaque_info(bytea);
+CREATE FUNCTION gist_page_opaque_info(IN page bytea,
+ OUT lsn pg_lsn,
+ OUT nsn pg_lsn,
+ OUT rightlink bigint,
+ OUT flags text[])
+AS 'MODULE_PATHNAME', 'gist_page_opaque_info'
+LANGUAGE C STRICT PARALLEL SAFE;
+
+
+--
+-- gist_page_items_bytea()
+--
+DROP FUNCTION gist_page_items_bytea(bytea);
+CREATE FUNCTION gist_page_items_bytea(IN page bytea,
+ OUT itemoffset smallint,
+ OUT ctid tid,
+ OUT itemlen smallint,
+ OUT dead boolean,
+ OUT key_data bytea)
+RETURNS SETOF record
+AS 'MODULE_PATHNAME', 'gist_page_items_bytea'
+LANGUAGE C STRICT PARALLEL SAFE;
+
+--
+-- gist_page_items()
+--
+DROP FUNCTION gist_page_items(bytea, regclass);
+CREATE FUNCTION gist_page_items(IN page bytea,
+ IN index_oid regclass,
+ OUT itemoffset smallint,
+ OUT ctid tid,
+ OUT itemlen smallint,
+ OUT dead boolean,
+ OUT keys text)
+RETURNS SETOF record
+AS 'MODULE_PATHNAME', 'gist_page_items'
+LANGUAGE C STRICT PARALLEL SAFE;
+
+--
+-- get_raw_page()
+--
+DROP FUNCTION get_raw_page(text, int8);
+DROP FUNCTION IF EXISTS get_raw_page(text, int4);
+CREATE FUNCTION get_raw_page(text, int8)
+RETURNS bytea
+AS 'MODULE_PATHNAME', 'get_raw_page_1_9'
+LANGUAGE C STRICT PARALLEL SAFE;
+
+DROP FUNCTION get_raw_page(text, text, int8);
+DROP FUNCTION IF EXISTS get_raw_page(text, text, int4);
+CREATE FUNCTION get_raw_page(text, text, int8)
+RETURNS bytea
+AS 'MODULE_PATHNAME', 'get_raw_page_fork_1_9'
+LANGUAGE C STRICT PARALLEL SAFE;
+
+--
+-- page_checksum()
+--
+DROP FUNCTION page_checksum(IN page bytea, IN blkno int8);
+DROP FUNCTION IF EXISTS page_checksum(IN page bytea, IN blkno int4);
+CREATE FUNCTION page_checksum(IN page bytea, IN blkno int8)
+RETURNS smallint
+AS 'MODULE_PATHNAME', 'page_checksum_1_9'
+LANGUAGE C STRICT PARALLEL SAFE;
+
+--
+-- bt_metap()
+--
+DROP FUNCTION bt_metap(text);
+CREATE FUNCTION bt_metap(IN relname text,
+ OUT magic int4,
+ OUT version int4,
+ OUT root int8,
+ OUT level int8,
+ OUT fastroot int8,
+ OUT fastlevel int8,
+ OUT last_cleanup_num_delpages int8,
+ OUT last_cleanup_num_tuples float8,
+ OUT allequalimage boolean)
+AS 'MODULE_PATHNAME', 'bt_metap'
+LANGUAGE C STRICT PARALLEL SAFE;
+
+--
+-- bt_page_stats()
+--
+DROP FUNCTION bt_page_stats(text, int8);
+DROP FUNCTION IF EXISTS bt_page_stats(text, int4);
+CREATE FUNCTION bt_page_stats(IN relname text, IN blkno int8,
+ OUT blkno int8,
+ OUT type "char",
+ OUT live_items int4,
+ OUT dead_items int4,
+ OUT avg_item_size int4,
+ OUT page_size int4,
+ OUT free_size int4,
+ OUT btpo_prev int8,
+ OUT btpo_next int8,
+ OUT btpo_level int8,
+ OUT btpo_flags int4)
+AS 'MODULE_PATHNAME', 'bt_page_stats_1_9'
+LANGUAGE C STRICT PARALLEL SAFE;
+
+--
+-- bt_page_items()
+--
+DROP FUNCTION bt_page_items(text, int8);
+DROP FUNCTION IF EXISTS bt_page_items(text, int4);
+CREATE FUNCTION bt_page_items(IN relname text, IN blkno int8,
+ OUT itemoffset smallint,
+ OUT ctid tid,
+ OUT itemlen smallint,
+ OUT nulls bool,
+ OUT vars bool,
+ OUT data text,
+ OUT dead boolean,
+ OUT htid tid,
+ OUT tids tid[])
+RETURNS SETOF record
+AS 'MODULE_PATHNAME', 'bt_page_items_1_9'
+LANGUAGE C STRICT PARALLEL SAFE;
+
+--
+-- brin_page_items()
+--
+DROP FUNCTION brin_page_items(IN page bytea, IN index_oid regclass);
+CREATE FUNCTION brin_page_items(IN page bytea, IN index_oid regclass,
+ OUT itemoffset int,
+ OUT blknum int8,
+ OUT attnum int,
+ OUT allnulls bool,
+ OUT hasnulls bool,
+ OUT placeholder bool,
+ OUT value text)
+RETURNS SETOF record
+AS 'MODULE_PATHNAME', 'brin_page_items'
+LANGUAGE C STRICT PARALLEL SAFE;
diff --git a/contrib/pageinspect/pageinspect--1.5.sql b/contrib/pageinspect/pageinspect--1.5.sql
index 1e40c3c97e..fdbd2995a2 100644
--- a/contrib/pageinspect/pageinspect--1.5.sql
+++ b/contrib/pageinspect/pageinspect--1.5.sql
@@ -28,6 +28,8 @@ CREATE FUNCTION page_header(IN page bytea,
OUT special smallint,
OUT pagesize smallint,
OUT version smallint,
+ OUT xid_base xid,
+ OUT multi_base xid,
OUT prune_xid xid)
AS 'MODULE_PATHNAME', 'page_header'
LANGUAGE C STRICT PARALLEL SAFE;
diff --git a/contrib/pageinspect/pageinspect.control b/contrib/pageinspect/pageinspect.control
index 7cdf37913d..f277413dd8 100644
--- a/contrib/pageinspect/pageinspect.control
+++ b/contrib/pageinspect/pageinspect.control
@@ -1,5 +1,5 @@
# pageinspect extension
comment = 'inspect the contents of database pages at a low level'
-default_version = '1.10'
+default_version = '1.11'
module_pathname = '$libdir/pageinspect'
relocatable = true
diff --git a/contrib/pageinspect/rawpage.c b/contrib/pageinspect/rawpage.c
index 90942be71e..6d50940955 100644
--- a/contrib/pageinspect/rawpage.c
+++ b/contrib/pageinspect/rawpage.c
@@ -17,6 +17,7 @@
#include "access/htup_details.h"
#include "access/relation.h"
+#include "commands/sequence.h"
#include "catalog/namespace.h"
#include "catalog/pg_type.h"
#include "funcapi.h"
@@ -251,8 +252,9 @@ page_header(PG_FUNCTION_ARGS)
Datum result;
HeapTuple tuple;
- Datum values[9];
- bool nulls[9];
+ Datum values[11];
+ bool nulls[11];
+ bool is_toast;
Page page;
PageHeader pageheader;
@@ -314,12 +316,37 @@ page_header(PG_FUNCTION_ARGS)
}
values[7] = UInt16GetDatum(PageGetPageLayoutVersion(page));
- values[8] = TransactionIdGetDatum(pageheader->pd_prune_xid);
+ is_toast = PageGetSpecialSize(page) ==
+ MAXALIGN(sizeof(ToastPageSpecialData));
+ values[8] = TransactionIdGetDatum(HeapPageGetPruneXidNoAssert((Page) page,
+ is_toast));
/* Build and return the tuple. */
-
memset(nulls, 0, sizeof(nulls));
+ if (PageGetSpecialSize(page) == MAXALIGN(sizeof(HeapPageSpecialData)))
+ {
+ /* Heap page */
+ HeapPageSpecial pageSpecial = HeapPageGetSpecial((Page) page);
+
+ values[9] = TransactionIdGetDatum(pageSpecial->pd_xid_base);
+ values[10] = TransactionIdGetDatum(pageSpecial->pd_multi_base);
+ }
+ else if (PageGetSpecialSize(page) == MAXALIGN(sizeof(ToastPageSpecialData)))
+ {
+ /* TOAST page */
+ ToastPageSpecial pageSpecial = ToastPageGetSpecial((Page) page);
+
+ values[9] = TransactionIdGetDatum(pageSpecial->pd_xid_base);
+ nulls[10] = true;
+ }
+ else
+ {
+ /* Double xmax page */
+ nulls[9] = true;
+ nulls[10] = true;
+ }
+
tuple = heap_form_tuple(tupdesc, values, nulls);
result = HeapTupleGetDatum(tuple);
diff --git a/contrib/pageinspect/sql/btree.sql b/contrib/pageinspect/sql/btree.sql
index 1f554f0f67..538d71d23a 100644
--- a/contrib/pageinspect/sql/btree.sql
+++ b/contrib/pageinspect/sql/btree.sql
@@ -40,7 +40,8 @@ SELECT bt_page_items(get_raw_page('test1_b_gist', 0));
SELECT bt_page_items('aaa'::bytea);
-- invalid special area size
CREATE INDEX test1_a_brin ON test1 USING brin(a);
-SELECT bt_page_items(get_raw_page('test1', 0));
+-- XXX: false positive in 64xids due to equal sizes of BTPageOpaque and HeapPageSpecialData
+-- SELECT bt_page_items(get_raw_page('test1', 0));
SELECT bt_page_items(get_raw_page('test1_a_brin', 0));
\set VERBOSITY default
diff --git a/contrib/pg_surgery/heap_surgery.c b/contrib/pg_surgery/heap_surgery.c
index 8a2ad9773d..439ebaf8e2 100644
--- a/contrib/pg_surgery/heap_surgery.c
+++ b/contrib/pg_surgery/heap_surgery.c
@@ -15,6 +15,7 @@
#include "access/heapam.h"
#include "access/visibilitymap.h"
#include "access/xloginsert.h"
+#include "catalog/catalog.h"
#include "catalog/pg_am_d.h"
#include "catalog/pg_proc_d.h"
#include "miscadmin.h"
@@ -272,11 +273,18 @@ heap_force_common(FunctionCallInfo fcinfo, HeapTupleForceOption heap_force_opt)
else
{
HeapTupleHeader htup;
+ HeapTupleData tuple;
Assert(heap_force_opt == HEAP_FORCE_FREEZE);
htup = (HeapTupleHeader) PageGetItem(page, itemid);
+ tuple.t_data = htup;
+ tuple.t_len = ItemIdGetLength(itemid);
+ tuple.t_tableOid = RelationGetRelid(rel);
+ HeapTupleCopyBaseFromPage(buf, &tuple, page,
+ IsToastRelation(rel));
+
/*
* Reset all visibility-related fields of the tuple. This
* logic should mimic heap_execute_freeze_tuple(), but we
@@ -284,8 +292,18 @@ heap_force_common(FunctionCallInfo fcinfo, HeapTupleForceOption heap_force_opt)
* potentially-garbled data is left behind.
*/
ItemPointerSet(&htup->t_ctid, blkno, curoff);
- HeapTupleHeaderSetXmin(htup, FrozenTransactionId);
- HeapTupleHeaderSetXmax(htup, InvalidTransactionId);
+ if (IsToastRelation(rel))
+ {
+ ToastTupleHeaderSetXmin(page, &tuple);
+ ToastTupleHeaderSetXmax(page, &tuple);
+ }
+ else
+ {
+ HeapTupleHeaderSetXmin(page, &tuple);
+ HeapTupleHeaderSetXmax(page, &tuple);
+ }
+ HeapTupleSetXmin(&tuple, FrozenTransactionId);
+ HeapTupleSetXmax(&tuple, InvalidTransactionId);
if (htup->t_infomask & HEAP_MOVED)
{
if (htup->t_infomask & HEAP_MOVED_OFF)
diff --git a/contrib/pg_visibility/expected/pg_visibility.out b/contrib/pg_visibility/expected/pg_visibility.out
index 9de54db2a2..d3c893b4e3 100644
--- a/contrib/pg_visibility/expected/pg_visibility.out
+++ b/contrib/pg_visibility/expected/pg_visibility.out
@@ -267,6 +267,22 @@ select * from pg_check_frozen('copyfreeze');
--------
(0 rows)
+create table vacuum_test as select 42 i;
+vacuum (disable_page_skipping) vacuum_test;
+-- pg_check_visible() can report false positive due to autovacuum activity.
+-- To workaround this issue, repeat the call.
+do $$
+declare
+ non_visible_count bigint;
+ i integer;
+begin
+ for i in 1 .. 10 loop
+ if i > 1 then perform pg_sleep(1); end if;
+ select count(*) from pg_check_visible('vacuum_test') into non_visible_count;
+ if non_visible_count = 0 then exit; end if;
+ end loop;
+ if non_visible_count > 0 then raise exception 'The visibility map is corrupt.'; end if;
+end $$;
-- cleanup
drop table test_partitioned;
drop view test_view;
@@ -277,3 +293,4 @@ drop foreign data wrapper dummy;
drop materialized view matview_visibility_test;
drop table regular_table;
drop table copyfreeze;
+drop table vacuum_test;
diff --git a/contrib/pg_visibility/pg_visibility.c b/contrib/pg_visibility/pg_visibility.c
index a95f73ec79..d2296c2d02 100644
--- a/contrib/pg_visibility/pg_visibility.c
+++ b/contrib/pg_visibility/pg_visibility.c
@@ -14,6 +14,7 @@
#include "access/htup_details.h"
#include "access/visibilitymap.h"
#include "access/xloginsert.h"
+#include "catalog/catalog.h"
#include "catalog/pg_type.h"
#include "catalog/storage_xlog.h"
#include "funcapi.h"
@@ -652,6 +653,8 @@ collect_corrupt_items(Oid relid, bool all_visible, bool all_frozen)
tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
tuple.t_len = ItemIdGetLength(itemid);
tuple.t_tableOid = relid;
+ HeapTupleCopyBaseFromPage(buffer, &tuple, page,
+ IsToastRelation(rel));
/*
* If we're checking whether the page is all-visible, we expect
@@ -695,7 +698,7 @@ collect_corrupt_items(Oid relid, bool all_visible, bool all_frozen)
*/
if (check_frozen)
{
- if (heap_tuple_needs_eventual_freeze(tuple.t_data))
+ if (heap_tuple_needs_eventual_freeze(&tuple))
record_corrupt_item(items, &tuple.t_self);
}
}
@@ -758,7 +761,7 @@ tuple_all_visible(HeapTuple tup, TransactionId OldestXmin, Buffer buffer)
* be set here. So just check the xmin.
*/
- xmin = HeapTupleHeaderGetXmin(tup->t_data);
+ xmin = HeapTupleGetXmin(tup);
if (!TransactionIdPrecedes(xmin, OldestXmin))
return false; /* xmin not old enough for all to see */
diff --git a/contrib/pg_visibility/sql/pg_visibility.sql b/contrib/pg_visibility/sql/pg_visibility.sql
index ff3538f996..a0d9525df9 100644
--- a/contrib/pg_visibility/sql/pg_visibility.sql
+++ b/contrib/pg_visibility/sql/pg_visibility.sql
@@ -170,6 +170,23 @@ commit;
select * from pg_visibility_map('copyfreeze');
select * from pg_check_frozen('copyfreeze');
+create table vacuum_test as select 42 i;
+vacuum (disable_page_skipping) vacuum_test;
+-- pg_check_visible() can report false positive due to autovacuum activity.
+-- To workaround this issue, repeat the call.
+do $$
+declare
+ non_visible_count bigint;
+ i integer;
+begin
+ for i in 1 .. 10 loop
+ if i > 1 then perform pg_sleep(1); end if;
+ select count(*) from pg_check_visible('vacuum_test') into non_visible_count;
+ if non_visible_count = 0 then exit; end if;
+ end loop;
+ if non_visible_count > 0 then raise exception 'The visibility map is corrupt.'; end if;
+end $$;
+
-- cleanup
drop table test_partitioned;
drop view test_view;
@@ -180,3 +197,4 @@ drop foreign data wrapper dummy;
drop materialized view matview_visibility_test;
drop table regular_table;
drop table copyfreeze;
+drop table vacuum_test;
diff --git a/contrib/pgrowlocks/pgrowlocks.c b/contrib/pgrowlocks/pgrowlocks.c
index ef89b84ec3..0abf1ea21a 100644
--- a/contrib/pgrowlocks/pgrowlocks.c
+++ b/contrib/pgrowlocks/pgrowlocks.c
@@ -130,7 +130,7 @@ pgrowlocks(PG_FUNCTION_ARGS)
htsu = HeapTupleSatisfiesUpdate(tuple,
GetCurrentCommandId(false),
hscan->rs_cbuf);
- xmax = HeapTupleHeaderGetRawXmax(tuple->t_data);
+ xmax = HeapTupleGetRawXmax(tuple);
infomask = tuple->t_data->t_infomask;
/*
diff --git a/contrib/pgstattuple/pgstatapprox.c b/contrib/pgstattuple/pgstatapprox.c
index b827728326..5bf73251b7 100644
--- a/contrib/pgstattuple/pgstatapprox.c
+++ b/contrib/pgstattuple/pgstatapprox.c
@@ -19,6 +19,7 @@
#include "access/transam.h"
#include "access/visibilitymap.h"
#include "access/xact.h"
+#include "catalog/catalog.h"
#include "catalog/namespace.h"
#include "catalog/pg_am_d.h"
#include "commands/vacuum.h"
@@ -153,6 +154,7 @@ statapprox_heap(Relation rel, output_type *stat)
tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
tuple.t_len = ItemIdGetLength(itemid);
tuple.t_tableOid = RelationGetRelid(rel);
+ HeapTupleCopyBaseFromPage(buf, &tuple, page, IsToastRelation(rel));
/*
* We follow VACUUM's lead in counting INSERT_IN_PROGRESS tuples
diff --git a/contrib/pgstattuple/pgstatindex.c b/contrib/pgstattuple/pgstatindex.c
index d69ac1c93d..4d045b2764 100644
--- a/contrib/pgstattuple/pgstatindex.c
+++ b/contrib/pgstattuple/pgstatindex.c
@@ -605,7 +605,7 @@ pgstathashindex(PG_FUNCTION_ARGS)
metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ, LH_META_PAGE);
metap = HashPageGetMeta(BufferGetPage(metabuf));
stats.version = metap->hashm_version;
- stats.space_per_page = metap->hashm_bsize;
+ stats.space_per_page = BLCKSZ - SizeOfPageHeaderData - MAXALIGN(sizeof(HashPageOpaqueData));
_hash_relbuf(rel, metabuf);
/* Get the current relation length */
diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out
index cc9e39c4a5..69bd2c929f 100644
--- a/contrib/postgres_fdw/expected/postgres_fdw.out
+++ b/contrib/postgres_fdw/expected/postgres_fdw.out
@@ -4599,16 +4599,24 @@ UPDATE ft2 SET c2 = c2 + 300, c3 = c3 || '_update3' WHERE c1 % 10 = 3;
UPDATE ft2 SET c2 = c2 + 300, c3 = c3 || '_update3' WHERE c1 % 10 = 3;
EXPLAIN (verbose, costs off)
-UPDATE ft2 SET c2 = c2 + 400, c3 = c3 || '_update7' WHERE c1 % 10 = 7 RETURNING *; -- can be pushed down
- QUERY PLAN
-------------------------------------------------------------------------------------------------------------------------------------------------------------
- Update on public.ft2
- Output: c1, c2, c3, c4, c5, c6, c7, c8
- -> Foreign Update on public.ft2
- Remote SQL: UPDATE "S 1"."T 1" SET c2 = (c2 + 400), c3 = (c3 || '_update7') WHERE ((("C 1" % 10) = 7)) RETURNING "C 1", c2, c3, c4, c5, c6, c7, c8
-(4 rows)
+WITH t AS (UPDATE ft2 SET c2 = c2 + 400, c3 = c3 || '_update7' WHERE c1 % 10 = 7 RETURNING *)
+SELECT * FROM t ORDER BY c1; -- can be pushed down
+ QUERY PLAN
+--------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ Sort
+ Output: t.c1, t.c2, t.c3, t.c4, t.c5, t.c6, t.c7, t.c8
+ Sort Key: t.c1
+ CTE t
+ -> Update on public.ft2
+ Output: ft2.c1, ft2.c2, ft2.c3, ft2.c4, ft2.c5, ft2.c6, ft2.c7, ft2.c8
+ -> Foreign Update on public.ft2
+ Remote SQL: UPDATE "S 1"."T 1" SET c2 = (c2 + 400), c3 = (c3 || '_update7') WHERE ((("C 1" % 10) = 7)) RETURNING "C 1", c2, c3, c4, c5, c6, c7, c8
+ -> CTE Scan on t
+ Output: t.c1, t.c2, t.c3, t.c4, t.c5, t.c6, t.c7, t.c8
+(10 rows)
-UPDATE ft2 SET c2 = c2 + 400, c3 = c3 || '_update7' WHERE c1 % 10 = 7 RETURNING *;
+WITH t AS (UPDATE ft2 SET c2 = c2 + 400, c3 = c3 || '_update7' WHERE c1 % 10 = 7 RETURNING *)
+SELECT * FROM t ORDER BY c1;
c1 | c2 | c3 | c4 | c5 | c6 | c7 | c8
------+-----+--------------------+------------------------------+--------------------------+----+------------+-----
7 | 407 | 00007_update7 | Thu Jan 08 00:00:00 1970 PST | Thu Jan 08 00:00:00 1970 | 7 | 7 | foo
@@ -4728,16 +4736,24 @@ UPDATE ft2 SET c2 = ft2.c2 + 500, c3 = ft2.c3 || '_update9', c7 = DEFAULT
UPDATE ft2 SET c2 = ft2.c2 + 500, c3 = ft2.c3 || '_update9', c7 = DEFAULT
FROM ft1 WHERE ft1.c1 = ft2.c2 AND ft1.c1 % 10 = 9;
EXPLAIN (verbose, costs off)
- DELETE FROM ft2 WHERE c1 % 10 = 5 RETURNING c1, c4; -- can be pushed down
- QUERY PLAN
---------------------------------------------------------------------------------------------
- Delete on public.ft2
- Output: c1, c4
- -> Foreign Delete on public.ft2
- Remote SQL: DELETE FROM "S 1"."T 1" WHERE ((("C 1" % 10) = 5)) RETURNING "C 1", c4
-(4 rows)
+ WITH t AS (DELETE FROM ft2 WHERE c1 % 10 = 5 RETURNING c1, c4)
+ SELECT * FROM t ORDER BY c1; -- can be pushed down
+ QUERY PLAN
+----------------------------------------------------------------------------------------------------
+ Sort
+ Output: t.c1, t.c4
+ Sort Key: t.c1
+ CTE t
+ -> Delete on public.ft2
+ Output: ft2.c1, ft2.c4
+ -> Foreign Delete on public.ft2
+ Remote SQL: DELETE FROM "S 1"."T 1" WHERE ((("C 1" % 10) = 5)) RETURNING "C 1", c4
+ -> CTE Scan on t
+ Output: t.c1, t.c4
+(10 rows)
-DELETE FROM ft2 WHERE c1 % 10 = 5 RETURNING c1, c4;
+WITH t AS (DELETE FROM ft2 WHERE c1 % 10 = 5 RETURNING c1, c4)
+SELECT * FROM t ORDER BY c1;
c1 | c4
------+------------------------------
5 | Tue Jan 06 00:00:00 1970 PST
@@ -5998,7 +6014,8 @@ INSERT INTO ft2 (c1,c2,c3,c6) VALUES (1218, 818, 'ggg', '(--;') RETURNING *;
1218 | 818 | ggg_trig_update | | | (--; | ft2 |
(1 row)
-UPDATE ft2 SET c2 = c2 + 600 WHERE c1 % 10 = 8 AND c1 < 1200 RETURNING *;
+WITH t AS (UPDATE ft2 SET c2 = c2 + 600 WHERE c1 % 10 = 8 AND c1 < 1200 RETURNING *)
+SELECT * FROM t ORDER BY c1;
c1 | c2 | c3 | c4 | c5 | c6 | c7 | c8
------+-----+------------------------+------------------------------+--------------------------+----+------------+-----
8 | 608 | 00008_trig_update | Fri Jan 09 00:00:00 1970 PST | Fri Jan 09 00:00:00 1970 | 8 | 8 | foo
diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c
index dd858aba03..b54fbeccca 100644
--- a/contrib/postgres_fdw/postgres_fdw.c
+++ b/contrib/postgres_fdw/postgres_fdw.c
@@ -4814,8 +4814,8 @@ apply_returning_filter(PgFdwDirectModifyState *dmstate,
* Note: no need to care about tableoid here because it will be
* initialized in ExecProcessReturning().
*/
- HeapTupleHeaderSetXmin(resultTup->t_data, InvalidTransactionId);
- HeapTupleHeaderSetXmax(resultTup->t_data, InvalidTransactionId);
+ HeapTupleSetXmin(resultTup, InvalidTransactionId);
+ HeapTupleSetXmax(resultTup, InvalidTransactionId);
HeapTupleHeaderSetCmin(resultTup->t_data, InvalidTransactionId);
}
@@ -7328,6 +7328,7 @@ make_tuple_from_result_row(PGresult *res,
*/
if (ctid)
tuple->t_self = tuple->t_data->t_ctid = *ctid;
+ HeapTupleSetZeroBase(tuple);
/*
* Stomp on the xmin, xmax, and cmin fields from the tuple created by
@@ -7337,8 +7338,8 @@ make_tuple_from_result_row(PGresult *res,
* assumption. If we don't do this then, for example, the tuple length
* ends up in the xmin field, which isn't what we want.
*/
- HeapTupleHeaderSetXmax(tuple->t_data, InvalidTransactionId);
- HeapTupleHeaderSetXmin(tuple->t_data, InvalidTransactionId);
+ HeapTupleSetXmax(tuple, InvalidTransactionId);
+ HeapTupleSetXmin(tuple, InvalidTransactionId);
HeapTupleHeaderSetCmin(tuple->t_data, InvalidTransactionId);
/* Clean up */
diff --git a/contrib/postgres_fdw/sql/postgres_fdw.sql b/contrib/postgres_fdw/sql/postgres_fdw.sql
index e48ccd286b..b2f035b50f 100644
--- a/contrib/postgres_fdw/sql/postgres_fdw.sql
+++ b/contrib/postgres_fdw/sql/postgres_fdw.sql
@@ -1285,16 +1285,20 @@ EXPLAIN (verbose, costs off)
UPDATE ft2 SET c2 = c2 + 300, c3 = c3 || '_update3' WHERE c1 % 10 = 3; -- can be pushed down
UPDATE ft2 SET c2 = c2 + 300, c3 = c3 || '_update3' WHERE c1 % 10 = 3;
EXPLAIN (verbose, costs off)
-UPDATE ft2 SET c2 = c2 + 400, c3 = c3 || '_update7' WHERE c1 % 10 = 7 RETURNING *; -- can be pushed down
-UPDATE ft2 SET c2 = c2 + 400, c3 = c3 || '_update7' WHERE c1 % 10 = 7 RETURNING *;
+WITH t AS (UPDATE ft2 SET c2 = c2 + 400, c3 = c3 || '_update7' WHERE c1 % 10 = 7 RETURNING *)
+SELECT * FROM t ORDER BY c1; -- can be pushed down
+WITH t AS (UPDATE ft2 SET c2 = c2 + 400, c3 = c3 || '_update7' WHERE c1 % 10 = 7 RETURNING *)
+SELECT * FROM t ORDER BY c1;
EXPLAIN (verbose, costs off)
UPDATE ft2 SET c2 = ft2.c2 + 500, c3 = ft2.c3 || '_update9', c7 = DEFAULT
FROM ft1 WHERE ft1.c1 = ft2.c2 AND ft1.c1 % 10 = 9; -- can be pushed down
UPDATE ft2 SET c2 = ft2.c2 + 500, c3 = ft2.c3 || '_update9', c7 = DEFAULT
FROM ft1 WHERE ft1.c1 = ft2.c2 AND ft1.c1 % 10 = 9;
EXPLAIN (verbose, costs off)
- DELETE FROM ft2 WHERE c1 % 10 = 5 RETURNING c1, c4; -- can be pushed down
-DELETE FROM ft2 WHERE c1 % 10 = 5 RETURNING c1, c4;
+ WITH t AS (DELETE FROM ft2 WHERE c1 % 10 = 5 RETURNING c1, c4)
+ SELECT * FROM t ORDER BY c1; -- can be pushed down
+WITH t AS (DELETE FROM ft2 WHERE c1 % 10 = 5 RETURNING c1, c4)
+SELECT * FROM t ORDER BY c1;
EXPLAIN (verbose, costs off)
DELETE FROM ft2 USING ft1 WHERE ft1.c1 = ft2.c2 AND ft1.c1 % 10 = 2; -- can be pushed down
DELETE FROM ft2 USING ft1 WHERE ft1.c1 = ft2.c2 AND ft1.c1 % 10 = 2;
@@ -1401,7 +1405,8 @@ CREATE TRIGGER t1_br_insert BEFORE INSERT OR UPDATE
INSERT INTO ft2 (c1,c2,c3) VALUES (1208, 818, 'fff') RETURNING *;
INSERT INTO ft2 (c1,c2,c3,c6) VALUES (1218, 818, 'ggg', '(--;') RETURNING *;
-UPDATE ft2 SET c2 = c2 + 600 WHERE c1 % 10 = 8 AND c1 < 1200 RETURNING *;
+WITH t AS (UPDATE ft2 SET c2 = c2 + 600 WHERE c1 % 10 = 8 AND c1 < 1200 RETURNING *)
+SELECT * FROM t ORDER BY c1;
-- Test errors thrown on remote side during update
ALTER TABLE "S 1"."T 1" ADD CONSTRAINT c2positive CHECK (c2 >= 0);
diff --git a/src/backend/access/common/heaptuple.c b/src/backend/access/common/heaptuple.c
index 7e355585a0..9ce239bbbf 100644
--- a/src/backend/access/common/heaptuple.c
+++ b/src/backend/access/common/heaptuple.c
@@ -640,10 +640,10 @@ heap_getsysattr(HeapTuple tup, int attnum, TupleDesc tupleDesc, bool *isnull)
result = PointerGetDatum(&(tup->t_self));
break;
case MinTransactionIdAttributeNumber:
- result = TransactionIdGetDatum(HeapTupleHeaderGetRawXmin(tup->t_data));
+ result = TransactionIdGetDatum(HeapTupleGetRawXmin(tup));
break;
case MaxTransactionIdAttributeNumber:
- result = TransactionIdGetDatum(HeapTupleHeaderGetRawXmax(tup->t_data));
+ result = TransactionIdGetDatum(HeapTupleGetRawXmax(tup));
break;
case MinCommandIdAttributeNumber:
case MaxCommandIdAttributeNumber:
@@ -688,6 +688,7 @@ heap_copytuple(HeapTuple tuple)
newTuple->t_len = tuple->t_len;
newTuple->t_self = tuple->t_self;
newTuple->t_tableOid = tuple->t_tableOid;
+ HeapTupleCopyBase(newTuple, tuple);
newTuple->t_data = (HeapTupleHeader) ((char *) newTuple + HEAPTUPLESIZE);
memcpy((char *) newTuple->t_data, (char *) tuple->t_data, tuple->t_len);
return newTuple;
@@ -714,6 +715,7 @@ heap_copytuple_with_tuple(HeapTuple src, HeapTuple dest)
dest->t_len = src->t_len;
dest->t_self = src->t_self;
dest->t_tableOid = src->t_tableOid;
+ HeapTupleCopyBase(dest, src);
dest->t_data = (HeapTupleHeader) palloc(src->t_len);
memcpy((char *) dest->t_data, (char *) src->t_data, src->t_len);
}
@@ -1161,6 +1163,7 @@ heap_modify_tuple(HeapTuple tuple,
newTuple->t_data->t_ctid = tuple->t_data->t_ctid;
newTuple->t_self = tuple->t_self;
newTuple->t_tableOid = tuple->t_tableOid;
+ HeapTupleCopyBase(newTuple, tuple);
return newTuple;
}
@@ -1224,6 +1227,7 @@ heap_modify_tuple_by_cols(HeapTuple tuple,
newTuple->t_data->t_ctid = tuple->t_data->t_ctid;
newTuple->t_self = tuple->t_self;
newTuple->t_tableOid = tuple->t_tableOid;
+ HeapTupleCopyBase(newTuple, tuple);
return newTuple;
}
diff --git a/src/backend/access/common/reloptions.c b/src/backend/access/common/reloptions.c
index b0993f37d4..1da7f572c3 100644
--- a/src/backend/access/common/reloptions.c
+++ b/src/backend/access/common/reloptions.c
@@ -260,58 +260,6 @@ static relopt_int intRelOpts[] =
},
-1, 1, 10000
},
- {
- {
- "autovacuum_freeze_min_age",
- "Minimum age at which VACUUM should freeze a table row, for autovacuum",
- RELOPT_KIND_HEAP | RELOPT_KIND_TOAST,
- ShareUpdateExclusiveLock
- },
- -1, 0, 1000000000
- },
- {
- {
- "autovacuum_multixact_freeze_min_age",
- "Minimum multixact age at which VACUUM should freeze a row multixact's, for autovacuum",
- RELOPT_KIND_HEAP | RELOPT_KIND_TOAST,
- ShareUpdateExclusiveLock
- },
- -1, 0, 1000000000
- },
- {
- {
- "autovacuum_freeze_max_age",
- "Age at which to autovacuum a table to prevent transaction ID wraparound",
- RELOPT_KIND_HEAP | RELOPT_KIND_TOAST,
- ShareUpdateExclusiveLock
- },
- -1, 100000, 2000000000
- },
- {
- {
- "autovacuum_multixact_freeze_max_age",
- "Multixact age at which to autovacuum a table to prevent multixact wraparound",
- RELOPT_KIND_HEAP | RELOPT_KIND_TOAST,
- ShareUpdateExclusiveLock
- },
- -1, 10000, 2000000000
- },
- {
- {
- "autovacuum_freeze_table_age",
- "Age at which VACUUM should perform a full table sweep to freeze row versions",
- RELOPT_KIND_HEAP | RELOPT_KIND_TOAST,
- ShareUpdateExclusiveLock
- }, -1, 0, 2000000000
- },
- {
- {
- "autovacuum_multixact_freeze_table_age",
- "Age of multixact at which VACUUM should perform a full table sweep to freeze row versions",
- RELOPT_KIND_HEAP | RELOPT_KIND_TOAST,
- ShareUpdateExclusiveLock
- }, -1, 0, 2000000000
- },
{
{
"log_autovacuum_min_duration",
@@ -388,6 +336,60 @@ static relopt_int intRelOpts[] =
static relopt_int64 int64RelOpts[] =
{
+ {
+ {
+ "autovacuum_freeze_min_age",
+ "Minimum age at which VACUUM should freeze a table row, for autovacuum",
+ RELOPT_KIND_HEAP | RELOPT_KIND_TOAST,
+ ShareUpdateExclusiveLock
+ },
+ INT64CONST(-1), INT64CONST(0), INT64CONST(1000000000)
+ },
+ {
+ {
+ "autovacuum_multixact_freeze_min_age",
+ "Minimum multixact age at which VACUUM should freeze a row multixact's, for autovacuum",
+ RELOPT_KIND_HEAP | RELOPT_KIND_TOAST,
+ ShareUpdateExclusiveLock
+ },
+ INT64CONST(-1), INT64CONST(0), INT64CONST(1000000000)
+ },
+ {
+ {
+ "autovacuum_freeze_max_age",
+ "Age at which to autovacuum a table to prevent transaction ID wraparound",
+ RELOPT_KIND_HEAP | RELOPT_KIND_TOAST,
+ ShareUpdateExclusiveLock
+ },
+ INT64CONST(-1), INT64CONST(100000), INT64CONST(2000000000)
+ },
+ {
+ {
+ "autovacuum_multixact_freeze_max_age",
+ "Multixact age at which to autovacuum a table to prevent multixact wraparound",
+ RELOPT_KIND_HEAP | RELOPT_KIND_TOAST,
+ ShareUpdateExclusiveLock
+ },
+ INT64CONST(-1), INT64CONST(10000), INT64CONST(2000000000)
+ },
+ {
+ {
+ "autovacuum_freeze_table_age",
+ "Age at which VACUUM should perform a full table sweep to freeze row versions",
+ RELOPT_KIND_HEAP | RELOPT_KIND_TOAST,
+ ShareUpdateExclusiveLock
+ },
+ INT64CONST(-1), INT64CONST(0), INT64CONST(2000000000)
+ },
+ {
+ {
+ "autovacuum_multixact_freeze_table_age",
+ "Age of multixact at which VACUUM should perform a full table sweep to freeze row versions",
+ RELOPT_KIND_HEAP | RELOPT_KIND_TOAST,
+ ShareUpdateExclusiveLock
+ },
+ INT64CONST(-1), INT64CONST(0), INT64CONST(2000000000)
+ },
/* list terminator */
{{NULL}}
};
@@ -1920,17 +1922,17 @@ default_reloptions(Datum reloptions, bool validate, relopt_kind kind)
offsetof(StdRdOptions, autovacuum) + offsetof(AutoVacOpts, analyze_threshold)},
{"autovacuum_vacuum_cost_limit", RELOPT_TYPE_INT,
offsetof(StdRdOptions, autovacuum) + offsetof(AutoVacOpts, vacuum_cost_limit)},
- {"autovacuum_freeze_min_age", RELOPT_TYPE_INT,
+ {"autovacuum_freeze_min_age", RELOPT_TYPE_INT64,
offsetof(StdRdOptions, autovacuum) + offsetof(AutoVacOpts, freeze_min_age)},
- {"autovacuum_freeze_max_age", RELOPT_TYPE_INT,
+ {"autovacuum_freeze_max_age", RELOPT_TYPE_INT64,
offsetof(StdRdOptions, autovacuum) + offsetof(AutoVacOpts, freeze_max_age)},
- {"autovacuum_freeze_table_age", RELOPT_TYPE_INT,
+ {"autovacuum_freeze_table_age", RELOPT_TYPE_INT64,
offsetof(StdRdOptions, autovacuum) + offsetof(AutoVacOpts, freeze_table_age)},
- {"autovacuum_multixact_freeze_min_age", RELOPT_TYPE_INT,
+ {"autovacuum_multixact_freeze_min_age", RELOPT_TYPE_INT64,
offsetof(StdRdOptions, autovacuum) + offsetof(AutoVacOpts, multixact_freeze_min_age)},
- {"autovacuum_multixact_freeze_max_age", RELOPT_TYPE_INT,
+ {"autovacuum_multixact_freeze_max_age", RELOPT_TYPE_INT64,
offsetof(StdRdOptions, autovacuum) + offsetof(AutoVacOpts, multixact_freeze_max_age)},
- {"autovacuum_multixact_freeze_table_age", RELOPT_TYPE_INT,
+ {"autovacuum_multixact_freeze_table_age", RELOPT_TYPE_INT64,
offsetof(StdRdOptions, autovacuum) + offsetof(AutoVacOpts, multixact_freeze_table_age)},
{"log_autovacuum_min_duration", RELOPT_TYPE_INT,
offsetof(StdRdOptions, autovacuum) + offsetof(AutoVacOpts, log_min_duration)},
diff --git a/src/backend/access/hash/hashvalidate.c b/src/backend/access/hash/hashvalidate.c
index 10bf26ce7c..83e033b93c 100644
--- a/src/backend/access/hash/hashvalidate.c
+++ b/src/backend/access/hash/hashvalidate.c
@@ -317,11 +317,10 @@ check_hash_func_signature(Oid funcid, int16 amprocnum, Oid argtype)
* INTERNAL and allowing any such function seems too scary.
*/
if ((funcid == F_HASHINT4 || funcid == F_HASHINT4EXTENDED) &&
- (argtype == DATEOID ||
- argtype == XIDOID || argtype == CIDOID))
+ (argtype == DATEOID || argtype == CIDOID))
/* okay, allowed use of hashint4() */ ;
else if ((funcid == F_HASHINT8 || funcid == F_HASHINT8EXTENDED) &&
- (argtype == XID8OID))
+ (argtype == XID8OID || argtype == XIDOID))
/* okay, allowed use of hashint8() */ ;
else if ((funcid == F_TIMESTAMP_HASH ||
funcid == F_TIMESTAMP_HASH_EXTENDED) &&
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index 685200d154..5bc35a28d8 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -52,10 +52,14 @@
#include "access/xloginsert.h"
#include "access/xlogutils.h"
#include "catalog/catalog.h"
+#include "catalog/index.h"
+#include "catalog/namespace.h"
+#include "commands/vacuum.h"
#include "miscadmin.h"
#include "pgstat.h"
#include "port/atomics.h"
#include "port/pg_bitutils.h"
+#include "storage/buf_internals.h"
#include "storage/bufmgr.h"
#include "storage/freespace.h"
#include "storage/lmgr.h"
@@ -73,7 +77,7 @@
static HeapTuple heap_prepare_insert(Relation relation, HeapTuple tup,
- TransactionId xid, CommandId cid, int options);
+ CommandId cid, int options);
static XLogRecPtr log_heap_update(Relation reln, Buffer oldbuf,
Buffer newbuf, HeapTuple oldtup,
HeapTuple newtup, HeapTuple old_key_tuple,
@@ -110,6 +114,8 @@ static int bottomup_sort_and_shrink(TM_IndexDeleteOp *delstate);
static XLogRecPtr log_heap_new_cid(Relation relation, HeapTuple tup);
static HeapTuple ExtractReplicaIdentity(Relation relation, HeapTuple tp, bool key_required,
bool *copy);
+static bool heap_page_prepare_for_xid(Relation relation, Buffer buffer,
+ TransactionId xid, bool multi);
/*
@@ -460,6 +466,8 @@ heapgetpage(TableScanDesc sscan, BlockNumber page)
loctup.t_tableOid = RelationGetRelid(scan->rs_base.rs_rd);
loctup.t_data = (HeapTupleHeader) PageGetItem((Page) dp, lpp);
loctup.t_len = ItemIdGetLength(lpp);
+ HeapTupleCopyBaseFromPage(buffer, &loctup, dp,
+ IsToastRelation(scan->rs_base.rs_rd));
ItemPointerSet(&(loctup.t_self), page, lineoff);
if (all_visible)
@@ -676,6 +684,8 @@ heapgettup(HeapScanDesc scan,
tuple->t_data = (HeapTupleHeader) PageGetItem((Page) dp, lpp);
tuple->t_len = ItemIdGetLength(lpp);
+ HeapTupleCopyBaseFromPage(scan->rs_cbuf, tuple, dp,
+ IsToastRelation(scan->rs_base.rs_rd));
return;
}
@@ -702,6 +712,8 @@ heapgettup(HeapScanDesc scan,
tuple->t_data = (HeapTupleHeader) PageGetItem((Page) dp, lpp);
tuple->t_len = ItemIdGetLength(lpp);
+ HeapTupleCopyBaseFromPage(scan->rs_cbuf, tuple, dp,
+ IsToastRelation(scan->rs_base.rs_rd));
ItemPointerSet(&(tuple->t_self), page, lineoff);
/*
@@ -1001,6 +1013,8 @@ heapgettup_pagemode(HeapScanDesc scan,
tuple->t_data = (HeapTupleHeader) PageGetItem((Page) dp, lpp);
tuple->t_len = ItemIdGetLength(lpp);
+ HeapTupleCopyBaseFromPage(InvalidBuffer, tuple, dp,
+ IsToastRelation(scan->rs_base.rs_rd));
/* check that rs_cindex is in sync */
Assert(scan->rs_cindex < scan->rs_ntuples);
@@ -1023,6 +1037,8 @@ heapgettup_pagemode(HeapScanDesc scan,
tuple->t_data = (HeapTupleHeader) PageGetItem((Page) dp, lpp);
tuple->t_len = ItemIdGetLength(lpp);
+ HeapTupleCopyBaseFromPage(InvalidBuffer, tuple, dp,
+ IsToastRelation(scan->rs_base.rs_rd));
ItemPointerSet(&(tuple->t_self), page, lineoff);
/*
@@ -1614,6 +1630,7 @@ heap_fetch(Relation relation,
tuple->t_data = (HeapTupleHeader) PageGetItem(page, lp);
tuple->t_len = ItemIdGetLength(lp);
tuple->t_tableOid = RelationGetRelid(relation);
+ HeapTupleCopyBaseFromPage(buffer, tuple, page, IsToastRelation(relation));
/*
* check tuple visibility, then release lock
@@ -1622,7 +1639,7 @@ heap_fetch(Relation relation,
if (valid)
PredicateLockTID(relation, &(tuple->t_self), snapshot,
- HeapTupleHeaderGetXmin(tuple->t_data));
+ HeapTupleGetXmin(tuple));
HeapCheckForSerializableConflictOut(valid, relation, tuple, buffer, snapshot);
@@ -1699,6 +1716,8 @@ heap_hot_search_buffer(ItemPointer tid, Relation relation, Buffer buffer,
Assert(TransactionIdIsValid(RecentXmin));
Assert(BufferGetBlockNumber(buffer) == blkno);
+ heapTuple->t_self = *tid;
+
/* Scan through possible multiple members of HOT-chain */
for (;;)
{
@@ -1734,6 +1753,8 @@ heap_hot_search_buffer(ItemPointer tid, Relation relation, Buffer buffer,
heapTuple->t_data = (HeapTupleHeader) PageGetItem(dp, lp);
heapTuple->t_len = ItemIdGetLength(lp);
heapTuple->t_tableOid = RelationGetRelid(relation);
+ HeapTupleCopyBaseFromPage(buffer, heapTuple, dp,
+ IsToastRelation(relation));
ItemPointerSet(&heapTuple->t_self, blkno, offnum);
/*
@@ -1748,7 +1769,7 @@ heap_hot_search_buffer(ItemPointer tid, Relation relation, Buffer buffer,
*/
if (TransactionIdIsValid(prev_xmax) &&
!TransactionIdEquals(prev_xmax,
- HeapTupleHeaderGetXmin(heapTuple->t_data)))
+ HeapTupleGetXmin(heapTuple)))
break;
/*
@@ -1769,7 +1790,7 @@ heap_hot_search_buffer(ItemPointer tid, Relation relation, Buffer buffer,
{
ItemPointerSetOffsetNumber(tid, offnum);
PredicateLockTID(relation, &heapTuple->t_self, snapshot,
- HeapTupleHeaderGetXmin(heapTuple->t_data));
+ HeapTupleGetXmin(heapTuple));
if (all_dead)
*all_dead = false;
return true;
@@ -1804,7 +1825,7 @@ heap_hot_search_buffer(ItemPointer tid, Relation relation, Buffer buffer,
blkno);
offnum = ItemPointerGetOffsetNumber(&heapTuple->t_data->t_ctid);
at_chain_start = false;
- prev_xmax = HeapTupleHeaderGetUpdateXid(heapTuple->t_data);
+ prev_xmax = HeapTupleGetUpdateXidAny(heapTuple);
}
else
break; /* end of chain */
@@ -1891,13 +1912,14 @@ heap_get_latest_tid(TableScanDesc sscan,
tp.t_data = (HeapTupleHeader) PageGetItem(page, lp);
tp.t_len = ItemIdGetLength(lp);
tp.t_tableOid = RelationGetRelid(relation);
+ HeapTupleCopyBaseFromPage(buffer, &tp, page, IsToastRelation(relation));
/*
* After following a t_ctid link, we might arrive at an unrelated
* tuple. Check for XMIN match.
*/
if (TransactionIdIsValid(priorXmax) &&
- !TransactionIdEquals(priorXmax, HeapTupleHeaderGetXmin(tp.t_data)))
+ !TransactionIdEquals(priorXmax, HeapTupleGetXmin(&tp)))
{
UnlockReleaseBuffer(buffer);
break;
@@ -1916,7 +1938,7 @@ heap_get_latest_tid(TableScanDesc sscan,
* If there's a valid t_ctid link, follow it, else we're done.
*/
if ((tp.t_data->t_infomask & HEAP_XMAX_INVALID) ||
- HeapTupleHeaderIsOnlyLocked(tp.t_data) ||
+ HeapTupleIsOnlyLocked(&tp) ||
HeapTupleHeaderIndicatesMovedPartitions(tp.t_data) ||
ItemPointerEquals(&tp.t_self, &tp.t_data->t_ctid))
{
@@ -1925,7 +1947,7 @@ heap_get_latest_tid(TableScanDesc sscan,
}
ctid = tp.t_data->t_ctid;
- priorXmax = HeapTupleHeaderGetUpdateXid(tp.t_data);
+ priorXmax = HeapTupleGetUpdateXidAny(&tp);
UnlockReleaseBuffer(buffer);
} /* end of loop */
}
@@ -1950,7 +1972,7 @@ heap_get_latest_tid(TableScanDesc sscan,
static void
UpdateXmaxHintBits(HeapTupleHeader tuple, Buffer buffer, TransactionId xid)
{
- Assert(TransactionIdEquals(HeapTupleHeaderGetRawXmax(tuple), xid));
+ Assert(TransactionIdEquals(HeapTupleHeaderGetRawXmax(BufferGetPage(buffer), tuple), xid));
Assert(!(tuple->t_infomask & HEAP_XMAX_IS_MULTI));
if (!(tuple->t_infomask & (HEAP_XMAX_COMMITTED | HEAP_XMAX_INVALID)))
@@ -2042,7 +2064,7 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid,
* Note: below this point, heaptup is the data we actually intend to store
* into the relation; tup is the caller's original untoasted data.
*/
- heaptup = heap_prepare_insert(relation, tup, xid, cid, options);
+ heaptup = heap_prepare_insert(relation, tup, cid, options);
/*
* Find buffer to insert this tuple into. If the page is all visible,
@@ -2069,6 +2091,9 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid,
*/
CheckForSerializableConflictIn(relation, NULL, InvalidBlockNumber);
+ heap_page_prepare_for_xid(relation, buffer, xid, false);
+ HeapTupleSetXmin(heaptup, xid);
+
/* NO EREPORT(ERROR) from here till changes are logged */
START_CRIT_SECTION();
@@ -2144,12 +2169,23 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid,
{
xlrec.flags |= XLH_INSERT_CONTAINS_NEW_TUPLE;
bufflags |= REGBUF_KEEP_DATA;
-
- if (IsToastRelation(relation))
- xlrec.flags |= XLH_INSERT_ON_TOAST_RELATION;
}
+ if (IsToastRelation(relation))
+ xlrec.flags |= XLH_INSERT_ON_TOAST_RELATION;
+
XLogBeginInsert();
+
+ if (info & XLOG_HEAP_INIT_PAGE)
+ {
+ char *base;
+
+ base = IsToastRelation(relation) ?
+ (char *) &ToastPageGetSpecial(page)->pd_xid_base :
+ (char *) &HeapPageGetSpecial(page)->pd_xid_base;
+ XLogRegisterData(base, sizeof(TransactionId));
+ }
+
XLogRegisterData((char *) &xlrec, SizeOfHeapInsert);
xlhdr.t_infomask2 = heaptup->t_data->t_infomask2;
@@ -2204,6 +2240,535 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid,
}
}
+static void
+xid_min_max(ShortTransactionId *min, ShortTransactionId *max,
+ ShortTransactionId xid,
+ bool *found)
+{
+ Assert(TransactionIdIsNormal(xid));
+ Assert(xid <= MaxShortTransactionId);
+
+ if (!*found)
+ {
+ *min = *max = xid;
+ *found = true;
+ }
+ else
+ {
+ *min = Min(*min, xid);
+ *max = Max(*max, xid);
+ }
+}
+
+/*
+ * Find minimum and maximum short transaction ids which occurs in the page.
+ *
+ * Works for multi and non multi transaction. Which is defined by "multi"
+ * argument.
+ */
+static bool
+heap_page_xid_min_max(Page page, bool multi,
+ ShortTransactionId *min, ShortTransactionId *max,
+ bool is_toast)
+{
+ bool found;
+ OffsetNumber offnum,
+ maxoff;
+ ItemId itemid;
+ HeapTupleHeader htup;
+
+ maxoff = PageGetMaxOffsetNumber(page);
+ found = false;
+
+ Assert(!multi || !is_toast);
+
+ for (offnum = FirstOffsetNumber;
+ offnum <= maxoff;
+ offnum = OffsetNumberNext(offnum))
+ {
+ itemid = PageGetItemId(page, offnum);
+
+ if (!ItemIdIsNormal(itemid))
+ continue;
+
+ htup = (HeapTupleHeader) PageGetItem(page, itemid);
+
+ if (!multi)
+ {
+ /*
+ * For non multi transactions we should see inside the tuple for
+ * update transaction.
+ */
+ Assert(!is_toast || !(htup->t_infomask & HEAP_XMAX_IS_MULTI));
+
+ if (TransactionIdIsNormal(htup->t_choice.t_heap.t_xmin) &&
+ !HeapTupleHeaderXminFrozen(htup))
+ {
+ xid_min_max(min, max, htup->t_choice.t_heap.t_xmin, &found);
+ }
+
+ if ((htup->t_infomask & HEAP_XMAX_IS_MULTI) &&
+ (!(htup->t_infomask & HEAP_XMAX_LOCK_ONLY)))
+ {
+ TransactionId update_xid;
+ ShortTransactionId xid;
+
+ Assert(!is_toast);
+ update_xid = MultiXactIdGetUpdateXid(HeapTupleHeaderGetRawXmax(page, htup),
+ htup->t_infomask);
+ xid = NormalTransactionIdToShort(HeapPageGetSpecial(page)->pd_xid_base,
+ update_xid);
+
+ xid_min_max(min, max, xid, &found);
+ }
+ }
+
+ if (!TransactionIdIsNormal(htup->t_choice.t_heap.t_xmax))
+ continue;
+
+ if (multi != ((htup->t_infomask & HEAP_XMAX_IS_MULTI) != 0))
+ continue;
+
+ xid_min_max(min, max, htup->t_choice.t_heap.t_xmax, &found);
+ }
+
+ Assert(!found || (*min > InvalidTransactionId && *max <= MaxShortTransactionId));
+
+ return found;
+}
+
+/*
+ * Shift xid base in the page. WAL-logged if buffer is specified.
+ */
+static void
+heap_page_shift_base(Relation relation, Buffer buffer, Page page,
+ bool multi, int64 delta, bool is_toast)
+{
+ TransactionId *xid_base,
+ *multi_base;
+ OffsetNumber offnum,
+ maxoff;
+ ItemId itemid;
+ HeapTupleHeader htup;
+
+ Assert(IsBufferLockedExclusive(buffer));
+
+ xid_base = multi_base = NULL;
+
+ START_CRIT_SECTION();
+
+ if (is_toast)
+ {
+ Assert(!multi);
+ xid_base = &ToastPageGetSpecial(page)->pd_xid_base;
+ }
+ else
+ {
+ HeapPageSpecial special;
+
+ special = HeapPageGetSpecial(page);
+ xid_base = &special->pd_xid_base;
+ multi_base = &special->pd_multi_base;
+ }
+
+ /* Iterate over page items */
+ maxoff = PageGetMaxOffsetNumber(page);
+ for (offnum = FirstOffsetNumber;
+ offnum <= maxoff;
+ offnum = OffsetNumberNext(offnum))
+ {
+ itemid = PageGetItemId(page, offnum);
+
+ if (!ItemIdIsNormal(itemid))
+ continue;
+
+ htup = (HeapTupleHeader) PageGetItem(page, itemid);
+
+ /* Apply xid shift to heap tuple */
+ if (!multi)
+ {
+ /* shift xmin */
+ if (TransactionIdIsNormal(htup->t_choice.t_heap.t_xmin) &&
+ !HeapTupleHeaderXminFrozen(htup))
+ {
+ Assert(htup->t_choice.t_heap.t_xmin - delta >= FirstNormalTransactionId);
+ Assert(htup->t_choice.t_heap.t_xmin - delta <= MaxShortTransactionId);
+ htup->t_choice.t_heap.t_xmin -= delta;
+ }
+ }
+
+ /* shift xmax */
+ if (!TransactionIdIsNormal(htup->t_choice.t_heap.t_xmax))
+ continue;
+
+ if (multi != (bool) (htup->t_infomask & HEAP_XMAX_IS_MULTI))
+ continue;
+
+ Assert(htup->t_choice.t_heap.t_xmax - delta >= FirstNormalTransactionId);
+ Assert(htup->t_choice.t_heap.t_xmax - delta <= MaxShortTransactionId);
+ htup->t_choice.t_heap.t_xmax -= delta;
+ }
+
+ /* Apply xid shift to base as well */
+ if (!multi)
+ *xid_base += delta;
+ else
+ *multi_base += delta;
+
+ if (BufferIsValid(buffer))
+ MarkBufferDirty(buffer);
+
+ /* Write WAL record if needed */
+ if (relation && RelationNeedsWAL(relation))
+ {
+ XLogRecPtr recptr;
+ xl_heap_base_shift xlrec;
+
+ xlrec.delta = delta;
+ xlrec.multi = multi;
+ xlrec.flags = 0;
+ if (IsToastRelation(relation))
+ xlrec.flags |= XLH_BASE_SHIFT_ON_TOAST_RELATION;
+
+ XLogBeginInsert();
+ XLogRegisterData((char *) &xlrec, SizeOfHeapBaseShift);
+ XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
+
+ recptr = XLogInsert(RM_HEAP3_ID, XLOG_HEAP3_BASE_SHIFT);
+
+ PageSetLSN(page, recptr);
+ }
+
+ END_CRIT_SECTION();
+}
+
+/*
+ * Freeze xids in the single heap page. Useful when we can't fit new xid even
+ * with base shift.
+ */
+static void
+freeze_single_heap_page(Relation relation, Buffer buffer)
+{
+ Page page = BufferGetPage(buffer);
+ OffsetNumber offnum,
+ maxoff;
+ HeapTupleData tuple;
+ int nfrozen = 0;
+ xl_heap_freeze_tuple *frozen;
+ TransactionId OldestXmin,
+ FreezeXid;
+ MultiXactId OldestMxact,
+ MultiXactCutoff;
+ GlobalVisState *vistest;
+ ItemId itemid;
+ bool tuple_totally_frozen;
+ int ndeleted,
+ nnewlpdead;
+
+ vacuum_set_xid_limits(relation, 0, 0, 0, 0, &OldestMxact,
+ &OldestXmin, &FreezeXid, &MultiXactCutoff);
+
+ vistest = GlobalVisTestFor(relation);
+
+ ndeleted = heap_page_prune(relation, buffer, vistest, InvalidTransactionId, 0,
+ &nnewlpdead, &offnum, false);
+ if (ndeleted > nnewlpdead)
+ pgstat_update_heap_dead_tuples(relation,
+ ndeleted - nnewlpdead);
+
+ /*
+ * Now scan the page to collect vacuumable items and check for tuples
+ * requiring freezing.
+ */
+ maxoff = PageGetMaxOffsetNumber(page);
+ frozen = palloc(sizeof(xl_heap_freeze_tuple) * MaxHeapTuplesPerPage);
+
+ /*
+ * Note: If you change anything in the loop below, also look at
+ * heap_page_is_all_visible to see if that needs to be changed.
+ */
+ for (offnum = FirstOffsetNumber;
+ offnum <= maxoff;
+ offnum = OffsetNumberNext(offnum))
+ {
+ TransactionId NewRelfrozenXid;
+ MultiXactId NewRelminMxid;
+
+ itemid = PageGetItemId(page, offnum);
+
+ if (!ItemIdIsNormal(itemid))
+ continue;
+
+ tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
+ tuple.t_len = ItemIdGetLength(itemid);
+ tuple.t_tableOid = RelationGetRelid(relation);
+ HeapTupleCopyBaseFromPage(buffer, &tuple, page,
+ IsToastRelation(relation));
+
+ /*
+ * Each non-removable tuple must be checked to see if it needs
+ * freezing. Note we already have exclusive buffer lock.
+ */
+ if (heap_prepare_freeze_tuple(&tuple,
+ relation->rd_rel->relfrozenxid,
+ relation->rd_rel->relminmxid,
+ FreezeXid, MultiXactCutoff,
+ &frozen[nfrozen], &tuple_totally_frozen,
+ &NewRelfrozenXid, &NewRelminMxid))
+ frozen[nfrozen++].offset = offnum;
+ }
+
+ /*
+ * If we froze any tuples, mark the buffer dirty, and write a WAL record
+ * recording the changes. We must log the changes to be crash-safe
+ * against future truncation of CLOG.
+ */
+ if (nfrozen > 0)
+ {
+ int i;
+ ItemId itemid;
+ HeapTupleHeader htup;
+
+ START_CRIT_SECTION();
+
+ MarkBufferDirty(buffer);
+
+ /* execute collected freezes */
+ for (i = 0; i < nfrozen; i++)
+ {
+ itemid = PageGetItemId(page, frozen[i].offset);
+ htup = (HeapTupleHeader) PageGetItem(page, itemid);
+ heap_execute_freeze_tuple_page(page, htup, &frozen[i],
+ IsToastRelation(relation));
+ }
+
+ /* Now WAL-log freezing if necessary */
+ if (RelationNeedsWAL(relation))
+ {
+ XLogRecPtr recptr;
+
+ recptr = log_heap_freeze(relation, buffer, FreezeXid,
+ frozen, nfrozen);
+ PageSetLSN(page, recptr);
+ }
+
+ END_CRIT_SECTION();
+ }
+
+ pfree(frozen);
+
+ return;
+}
+
+/*
+ * Check if xid still fits on a page with given base and delta.
+ */
+static inline bool
+is_delta_fits_heap_page(TransactionId xid, TransactionId base, int64 delta)
+{
+ return xid >= base + delta + FirstNormalTransactionId &&
+ xid <= base + delta + MaxShortTransactionId;
+}
+
+/*
+ * Check if xid fits on a page with given base.
+ */
+static inline bool
+is_xid_fits_heap_page(TransactionId xid, TransactionId base)
+{
+ return xid >= base + FirstNormalTransactionId &&
+ xid <= base + MaxShortTransactionId;
+}
+
+/*
+ * Check if delta fits on a page.
+ *
+ * If delta does not fits, never return.
+ */
+static void
+heap_page_check_delta(Buffer buffer,
+ TransactionId xid, TransactionId base,
+ ShortTransactionId min, ShortTransactionId max,
+ int64 delta, int64 *freeDelta, int64 *requiredDelta)
+{
+ BufferDesc *buf;
+ char *path;
+ BackendId backend;
+
+ Assert((freeDelta == NULL) == (requiredDelta == NULL));
+
+ /*
+ * If delta fits the page, we good to go ...
+ */
+ if (is_delta_fits_heap_page(xid, base, delta))
+ return;
+
+ /*
+ * ... otherwise handle the error.
+ */
+ if (buffer == InvalidBuffer)
+ return;
+
+ if (BufferIsLocal(buffer))
+ {
+ buf = GetLocalBufferDescriptor(-buffer - 1);
+ backend = MyBackendId;
+ }
+ else
+ {
+ buf = GetBufferDescriptor(buffer - 1);
+ backend = InvalidBackendId;
+ }
+
+ path = relpathbackend(BufTagGetRelFileLocator(&buf->tag), backend,
+ buf->tag.forkNum);
+
+ if (freeDelta == NULL)
+ elog(FATAL, "Fatal xid base calculation error: xid = %llu, base = %llu, min = %u, max = %u, delta = %lld (rel=%s, blockNum=%u)",
+ (unsigned long long) xid, (unsigned long long) base,
+ min, max,
+ (long long) delta,
+ path, buf->tag.blockNum);
+
+ elog(FATAL, "Fatal xid base calculation error: xid = %llu, base = %llu, min = %u, max = %u, freeDelta = %lld, requiredDelta = %lld, delta = %lld (rel=%s, blockNum=%u)",
+ (unsigned long long) xid, (unsigned long long) base,
+ min, max,
+ (long long) *freeDelta, (long long) *requiredDelta,
+ (long long) delta,
+ path, buf->tag.blockNum);
+}
+
+/*
+ * Shift page base.
+ */
+static void
+heap_page_apply_delta(Relation relation, Buffer buffer, Page page,
+ TransactionId xid, bool multi,
+ TransactionId base, int64 delta, bool is_toast)
+{
+ Assert(is_delta_fits_heap_page(xid, base, delta));
+
+ heap_page_shift_base(relation, buffer, page, multi, delta, is_toast);
+
+#ifdef USE_ASSERT_CHECKING
+ if (is_toast)
+ {
+ Assert(!multi);
+ base = ToastPageGetSpecial(page)->pd_xid_base;
+ }
+ else
+ base = multi ? HeapPageGetSpecial(page)->pd_multi_base :
+ HeapPageGetSpecial(page)->pd_xid_base;
+
+ Assert(is_xid_fits_heap_page(xid, base));
+#endif /* USE_ASSERT_CHECKING */
+}
+
+/*
+ * Try to fit xid on a page.
+ */
+static int
+heap_page_try_prepare_for_xid(Relation relation, Buffer buffer, Page page,
+ TransactionId xid, bool multi, bool is_toast)
+{
+ TransactionId base;
+ ShortTransactionId min = InvalidTransactionId,
+ max = InvalidTransactionId;
+ int64 delta,
+ freeDelta,
+ requiredDelta;
+
+ if (is_toast)
+ {
+ Assert(!multi);
+ base = ToastPageGetSpecial(page)->pd_xid_base;
+ }
+ else
+ {
+ HeapPageSpecial special;
+
+ special = HeapPageGetSpecial(page);
+ base = multi ? special->pd_multi_base : special->pd_xid_base;
+ }
+
+ /* If xid fits the page no action needed. */
+ if (is_xid_fits_heap_page(xid, base))
+ return 0;
+
+ /* No items on the page? */
+ if (!heap_page_xid_min_max(page, multi, &min, &max, is_toast))
+ {
+ delta = (int64) (xid - FirstNormalTransactionId) - (int64) base;
+ heap_page_check_delta(buffer, xid, base, min, max, delta, NULL, NULL);
+ heap_page_apply_delta(relation, buffer, page, xid, multi, base, delta,
+ is_toast);
+ return 0;
+ }
+
+ /* Can we just shift base on the page? */
+ if (xid < base + FirstNormalTransactionId)
+ {
+ freeDelta = MaxShortTransactionId - max;
+ requiredDelta = (base + FirstNormalTransactionId) - xid;
+ /* Shouldn't consider setting base less than 0 */
+ freeDelta = Min(freeDelta, base);
+
+ if (requiredDelta > freeDelta)
+ return -1;
+
+ delta = -(freeDelta + requiredDelta) / 2;
+ }
+ else
+ {
+ freeDelta = min - FirstNormalTransactionId;
+ requiredDelta = xid - (base + MaxShortTransactionId);
+
+ if (requiredDelta > freeDelta)
+ return -1;
+
+ delta = (freeDelta + requiredDelta) / 2;
+ }
+
+ heap_page_check_delta(buffer, xid, base, min, max,
+ delta, &freeDelta, &requiredDelta);
+ heap_page_apply_delta(relation, buffer, page, xid, multi, base,
+ delta, is_toast);
+
+ return 0;
+}
+
+/*
+ * Ensure that given xid fits base of given page.
+ */
+void
+rewrite_page_prepare_for_xid(Page page, HeapTuple tup, bool is_toast)
+{
+ TransactionId xid;
+ int res;
+
+ /* xmin */
+ xid = HeapTupleGetXmin(tup);
+ if (TransactionIdIsNormal(xid))
+ {
+ res = heap_page_try_prepare_for_xid(NULL, InvalidBuffer, page, xid,
+ false, is_toast);
+ if (res == -1)
+ elog(ERROR, "could not fit xid into page");
+ }
+
+ /* xmax */
+ xid = HeapTupleGetRawXmax(tup);
+ if (TransactionIdIsNormal(xid))
+ {
+ res = heap_page_try_prepare_for_xid(NULL, InvalidBuffer, page, xid,
+ tup->t_data->t_infomask & HEAP_XMAX_IS_MULTI,
+ is_toast);
+ if (res == -1)
+ elog(ERROR, "could not fit xid into page");
+ }
+}
+
+
/*
* Subroutine for heap_insert(). Prepares a tuple for insertion. This sets the
* tuple header fields and toasts the tuple if necessary. Returns a toasted
@@ -2211,7 +2776,7 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid,
* that in any case, the header fields are also set in the original tuple.
*/
static HeapTuple
-heap_prepare_insert(Relation relation, HeapTuple tup, TransactionId xid,
+heap_prepare_insert(Relation relation, HeapTuple tup,
CommandId cid, int options)
{
/*
@@ -2228,12 +2793,12 @@ heap_prepare_insert(Relation relation, HeapTuple tup, TransactionId xid,
tup->t_data->t_infomask &= ~(HEAP_XACT_MASK);
tup->t_data->t_infomask2 &= ~(HEAP2_XACT_MASK);
tup->t_data->t_infomask |= HEAP_XMAX_INVALID;
- HeapTupleHeaderSetXmin(tup->t_data, xid);
+ HeapTupleSetXmin(tup, InvalidTransactionId);
if (options & HEAP_INSERT_FROZEN)
HeapTupleHeaderSetXminFrozen(tup->t_data);
HeapTupleHeaderSetCmin(tup->t_data, cid);
- HeapTupleHeaderSetXmax(tup->t_data, 0); /* for cleanliness */
+ HeapTupleSetXmax(tup, 0); /* for cleanliness */
tup->t_tableOid = RelationGetRelid(relation);
/*
@@ -2296,8 +2861,7 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples,
tuple = ExecFetchSlotHeapTuple(slots[i], true, NULL);
slots[i]->tts_tableOid = RelationGetRelid(relation);
tuple->t_tableOid = slots[i]->tts_tableOid;
- heaptuples[i] = heap_prepare_insert(relation, tuple, xid, cid,
- options);
+ heaptuples[i] = heap_prepare_insert(relation, tuple, cid, options);
}
/*
@@ -2353,6 +2917,8 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples,
if (starting_with_empty_page && (options & HEAP_INSERT_FROZEN))
all_frozen_set = true;
+ heap_page_prepare_for_xid(relation, buffer, xid, false);
+
/* NO EREPORT(ERROR) from here till changes are logged */
START_CRIT_SECTION();
@@ -2360,6 +2926,7 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples,
* RelationGetBufferForTuple has ensured that the first tuple fits.
* Put that on the page, and then as many other tuples as fit.
*/
+ HeapTupleSetXmin(heaptuples[ndone], xid);
RelationPutHeapTuple(relation, buffer, heaptuples[ndone], false);
/*
@@ -2376,6 +2943,7 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples,
if (PageGetHeapFreeSpace(page) < MAXALIGN(heaptup->t_len) + saveFreeSpace)
break;
+ HeapTupleSetXmin(heaptup, xid);
RelationPutHeapTuple(relation, buffer, heaptup, false);
/*
@@ -2511,6 +3079,17 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples,
bufflags |= REGBUF_KEEP_DATA;
XLogBeginInsert();
+
+ if (info & XLOG_HEAP_INIT_PAGE)
+ {
+ char *base;
+
+ base = IsToastRelation(relation) ?
+ (char *) &ToastPageGetSpecial(page)->pd_xid_base :
+ (char *) &HeapPageGetSpecial(page)->pd_xid_base;
+ XLogRegisterData(base, sizeof(TransactionId));
+ }
+
XLogRegisterData((char *) xlrec, tupledata - scratch.data);
XLogRegisterBuffer(0, buffer, REGBUF_STANDARD | bufflags);
@@ -2718,6 +3297,7 @@ heap_delete(Relation relation, ItemPointer tid,
tp.t_data = (HeapTupleHeader) PageGetItem(page, lp);
tp.t_len = ItemIdGetLength(lp);
tp.t_self = *tid;
+ HeapTupleCopyBaseFromPage(buffer, &tp, page, IsToastRelation(relation));
l1:
/*
@@ -2748,7 +3328,7 @@ l1:
uint16 infomask;
/* must copy state data before unlocking buffer */
- xwait = HeapTupleHeaderGetRawXmax(tp.t_data);
+ xwait = HeapTupleGetRawXmax(&tp);
infomask = tp.t_data->t_infomask;
/*
@@ -2787,6 +3367,10 @@ l1:
NULL);
LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
+ /* Copy possibly updated xid base after relocking */
+ HeapTupleCopyBaseFromPage(buffer, &tp, page,
+ IsToastRelation(relation));
+
/*
* If xwait had just locked the tuple then some other xact
* could update this tuple before we get to this point. Check
@@ -2797,7 +3381,7 @@ l1:
*/
if ((vmbuffer == InvalidBuffer && PageIsAllVisible(page)) ||
xmax_infomask_changed(tp.t_data->t_infomask, infomask) ||
- !TransactionIdEquals(HeapTupleHeaderGetRawXmax(tp.t_data),
+ !TransactionIdEquals(HeapTupleGetRawXmax(&tp),
xwait))
goto l1;
}
@@ -2824,6 +3408,10 @@ l1:
XactLockTableWait(xwait, relation, &(tp.t_self), XLTW_Delete);
LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
+ /* Copy possibly updated xid base after relocking */
+ HeapTupleCopyBaseFromPage(buffer, &tp, page,
+ IsToastRelation(relation));
+
/*
* xwait is done, but if xwait had just locked the tuple then some
* other xact could update this tuple before we get to this point.
@@ -2834,7 +3422,7 @@ l1:
*/
if ((vmbuffer == InvalidBuffer && PageIsAllVisible(page)) ||
xmax_infomask_changed(tp.t_data->t_infomask, infomask) ||
- !TransactionIdEquals(HeapTupleHeaderGetRawXmax(tp.t_data),
+ !TransactionIdEquals(HeapTupleGetRawXmax(&tp),
xwait))
goto l1;
@@ -2848,7 +3436,7 @@ l1:
*/
if ((tp.t_data->t_infomask & HEAP_XMAX_INVALID) ||
HEAP_XMAX_IS_LOCKED_ONLY(tp.t_data->t_infomask) ||
- HeapTupleHeaderIsOnlyLocked(tp.t_data))
+ HeapTupleIsOnlyLocked(&tp))
result = TM_Ok;
else if (!ItemPointerEquals(&tp.t_self, &tp.t_data->t_ctid))
result = TM_Updated;
@@ -2873,9 +3461,9 @@ l1:
Assert(result != TM_Updated ||
!ItemPointerEquals(&tp.t_self, &tp.t_data->t_ctid));
tmfd->ctid = tp.t_data->t_ctid;
- tmfd->xmax = HeapTupleHeaderGetUpdateXid(tp.t_data);
+ tmfd->xmax = HeapTupleGetUpdateXidAny(&tp);
if (result == TM_SelfModified)
- tmfd->cmax = HeapTupleHeaderGetCmax(tp.t_data);
+ tmfd->cmax = HeapTupleGetCmax(&tp);
else
tmfd->cmax = InvalidCommandId;
UnlockReleaseBuffer(buffer);
@@ -2898,7 +3486,7 @@ l1:
CheckForSerializableConflictIn(relation, tid, BufferGetBlockNumber(buffer));
/* replace cid with a combo CID if necessary */
- HeapTupleHeaderAdjustCmax(tp.t_data, &cid, &iscombo);
+ HeapTupleHeaderAdjustCmax(&tp, &cid, &iscombo);
/*
* Compute replica identity tuple before entering the critical section so
@@ -2916,11 +3504,20 @@ l1:
*/
MultiXactIdSetOldestMember();
- compute_new_xmax_infomask(HeapTupleHeaderGetRawXmax(tp.t_data),
+ compute_new_xmax_infomask(HeapTupleGetRawXmax(&tp),
tp.t_data->t_infomask, tp.t_data->t_infomask2,
xid, LockTupleExclusive, true,
&new_xmax, &new_infomask, &new_infomask2);
+#ifdef USE_ASSERT_CHECKING
+ if (IsToastRelation(relation))
+ Assert((new_infomask & HEAP_XMAX_IS_MULTI) == false);
+#endif
+
+ heap_page_prepare_for_xid(relation, buffer, new_xmax,
+ (new_infomask & HEAP_XMAX_IS_MULTI) ? true : false);
+ HeapTupleCopyBaseFromPage(buffer, &tp, page, IsToastRelation(relation));
+
START_CRIT_SECTION();
/*
@@ -2930,7 +3527,7 @@ l1:
* the subsequent page pruning will be a no-op and the hint will be
* cleared.
*/
- PageSetPrunable(page, xid);
+ PageSetPrunable(page, xid, IsToastRelation(relation));
if (PageIsAllVisible(page))
{
@@ -2946,10 +3543,15 @@ l1:
tp.t_data->t_infomask |= new_infomask;
tp.t_data->t_infomask2 |= new_infomask2;
HeapTupleHeaderClearHotUpdated(tp.t_data);
- HeapTupleHeaderSetXmax(tp.t_data, new_xmax);
+ HeapTupleSetXmax(&tp, new_xmax);
+ if (IsToastRelation(relation))
+ ToastTupleHeaderSetXmax(page, &tp);
+ else
+ HeapTupleHeaderSetXmax(page, &tp);
HeapTupleHeaderSetCmax(tp.t_data, cid, iscombo);
/* Make sure there is no forward chain link in t_ctid */
tp.t_data->t_ctid = tp.t_self;
+ HeapTupleCopyBaseFromPage(buffer, &tp, page, IsToastRelation(relation));
/* Signal that this is actually a move into another partition */
if (changingPart)
@@ -2985,6 +3587,8 @@ l1:
tp.t_data->t_infomask2);
xlrec.offnum = ItemPointerGetOffsetNumber(&tp.t_self);
xlrec.xmax = new_xmax;
+ if (IsToastRelation(relation))
+ xlrec.flags |= XLH_DELETE_PAGE_ON_TOAST_RELATION;
if (old_key_tuple != NULL)
{
@@ -3140,7 +3744,8 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup,
HeapTuple heaptup;
HeapTuple old_key_tuple = NULL;
bool old_key_copied = false;
- Page page;
+ Page page,
+ newpage;
BlockNumber block;
MultiXactStatus mxact_status;
Buffer buffer,
@@ -3233,6 +3838,7 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup,
oldtup.t_data = (HeapTupleHeader) PageGetItem(page, lp);
oldtup.t_len = ItemIdGetLength(lp);
oldtup.t_self = *otid;
+ HeapTupleCopyBaseFromPage(buffer, &oldtup, page, IsToastRelation(relation));
/* the new tuple is ready, except for this: */
newtup->t_tableOid = RelationGetRelid(relation);
@@ -3326,7 +3932,7 @@ l2:
*/
/* must copy state data before unlocking buffer */
- xwait = HeapTupleHeaderGetRawXmax(oldtup.t_data);
+ xwait = HeapTupleGetRawXmax(&oldtup);
infomask = oldtup.t_data->t_infomask;
/*
@@ -3377,6 +3983,8 @@ l2:
checked_lockers = true;
locker_remains = remain != 0;
LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
+ HeapTupleCopyBaseFromPage(buffer, &oldtup, page,
+ IsToastRelation(relation));
/*
* If xwait had just locked the tuple then some other xact
@@ -3385,7 +3993,7 @@ l2:
*/
if (xmax_infomask_changed(oldtup.t_data->t_infomask,
infomask) ||
- !TransactionIdEquals(HeapTupleHeaderGetRawXmax(oldtup.t_data),
+ !TransactionIdEquals(HeapTupleGetRawXmax(&oldtup),
xwait))
goto l2;
}
@@ -3411,7 +4019,7 @@ l2:
* subxact aborts.
*/
if (!HEAP_XMAX_IS_LOCKED_ONLY(oldtup.t_data->t_infomask))
- update_xact = HeapTupleGetUpdateXid(oldtup.t_data);
+ update_xact = HeapTupleGetUpdateXid(&oldtup);
else
update_xact = InvalidTransactionId;
@@ -3459,6 +4067,9 @@ l2:
checked_lockers = true;
LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
+ HeapTupleCopyBaseFromPage(buffer, &oldtup, page,
+ IsToastRelation(relation));
+
/*
* xwait is done, but if xwait had just locked the tuple then some
* other xact could update this tuple before we get to this point.
@@ -3466,7 +4077,7 @@ l2:
*/
if (xmax_infomask_changed(oldtup.t_data->t_infomask, infomask) ||
!TransactionIdEquals(xwait,
- HeapTupleHeaderGetRawXmax(oldtup.t_data)))
+ HeapTupleGetRawXmax(&oldtup)))
goto l2;
/* Otherwise check if it committed or aborted */
@@ -3503,9 +4114,9 @@ l2:
Assert(result != TM_Updated ||
!ItemPointerEquals(&oldtup.t_self, &oldtup.t_data->t_ctid));
tmfd->ctid = oldtup.t_data->t_ctid;
- tmfd->xmax = HeapTupleHeaderGetUpdateXid(oldtup.t_data);
+ tmfd->xmax = HeapTupleGetUpdateXidAny(&oldtup);
if (result == TM_SelfModified)
- tmfd->cmax = HeapTupleHeaderGetCmax(oldtup.t_data);
+ tmfd->cmax = HeapTupleGetCmax(&oldtup);
else
tmfd->cmax = InvalidCommandId;
UnlockReleaseBuffer(buffer);
@@ -3535,6 +4146,8 @@ l2:
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
visibilitymap_pin(relation, block, &vmbuffer);
LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
+ HeapTupleCopyBaseFromPage(buffer, &oldtup, page,
+ IsToastRelation(relation));
goto l2;
}
@@ -3544,7 +4157,7 @@ l2:
* If the tuple we're updating is locked, we need to preserve the locking
* info in the old tuple's Xmax. Prepare a new Xmax value for this.
*/
- compute_new_xmax_infomask(HeapTupleHeaderGetRawXmax(oldtup.t_data),
+ compute_new_xmax_infomask(HeapTupleGetRawXmax(&oldtup),
oldtup.t_data->t_infomask,
oldtup.t_data->t_infomask2,
xid, *lockmode, true,
@@ -3563,7 +4176,7 @@ l2:
(checked_lockers && !locker_remains))
xmax_new_tuple = InvalidTransactionId;
else
- xmax_new_tuple = HeapTupleHeaderGetRawXmax(oldtup.t_data);
+ xmax_new_tuple = HeapTupleGetRawXmax(&oldtup);
if (!TransactionIdIsValid(xmax_new_tuple))
{
@@ -3596,17 +4209,15 @@ l2:
*/
newtup->t_data->t_infomask &= ~(HEAP_XACT_MASK);
newtup->t_data->t_infomask2 &= ~(HEAP2_XACT_MASK);
- HeapTupleHeaderSetXmin(newtup->t_data, xid);
HeapTupleHeaderSetCmin(newtup->t_data, cid);
newtup->t_data->t_infomask |= HEAP_UPDATED | infomask_new_tuple;
newtup->t_data->t_infomask2 |= infomask2_new_tuple;
- HeapTupleHeaderSetXmax(newtup->t_data, xmax_new_tuple);
/*
* Replace cid with a combo CID if necessary. Note that we already put
* the plain cid into the new tuple.
*/
- HeapTupleHeaderAdjustCmax(oldtup.t_data, &cid, &iscombo);
+ HeapTupleHeaderAdjustCmax(&oldtup, &cid, &iscombo);
/*
* If the toaster needs to be activated, OR if the new tuple will not fit
@@ -3636,7 +4247,7 @@ l2:
newtupsize = MAXALIGN(newtup->t_len);
- if (need_toast || newtupsize > pagefree)
+ if (need_toast || newtupsize > pagefree || HeapPageIsDoubleXmax(page))
{
TransactionId xmax_lock_old_tuple;
uint16 infomask_lock_old_tuple,
@@ -3661,7 +4272,7 @@ l2:
* updating, because the potentially created multixact would otherwise
* be wrong.
*/
- compute_new_xmax_infomask(HeapTupleHeaderGetRawXmax(oldtup.t_data),
+ compute_new_xmax_infomask(HeapTupleGetRawXmax(&oldtup),
oldtup.t_data->t_infomask,
oldtup.t_data->t_infomask2,
xid, *lockmode, false,
@@ -3670,6 +4281,16 @@ l2:
Assert(HEAP_XMAX_IS_LOCKED_ONLY(infomask_lock_old_tuple));
+#ifdef USE_ASSERT_CHECKING
+ if (IsToastRelation(relation))
+ Assert((infomask_lock_old_tuple & HEAP_XMAX_IS_MULTI) == false);
+#endif
+
+ heap_page_prepare_for_xid(relation, buffer, xmax_lock_old_tuple,
+ (infomask_lock_old_tuple & HEAP_XMAX_IS_MULTI) ? true : false);
+ HeapTupleCopyBaseFromPage(buffer, &oldtup, page,
+ IsToastRelation(relation));
+
START_CRIT_SECTION();
/* Clear obsolete visibility flags ... */
@@ -3678,10 +4299,13 @@ l2:
HeapTupleClearHotUpdated(&oldtup);
/* ... and store info about transaction updating this tuple */
Assert(TransactionIdIsValid(xmax_lock_old_tuple));
- HeapTupleHeaderSetXmax(oldtup.t_data, xmax_lock_old_tuple);
oldtup.t_data->t_infomask |= infomask_lock_old_tuple;
oldtup.t_data->t_infomask2 |= infomask2_lock_old_tuple;
+ HeapTupleSetXmax(&oldtup, xmax_lock_old_tuple);
+ HeapTupleHeaderSetXmax(page, &oldtup);
HeapTupleHeaderSetCmax(oldtup.t_data, cid, iscombo);
+ HeapTupleCopyBaseFromPage(buffer, &oldtup, page,
+ IsToastRelation(relation));
/* temporarily make it look not-updated, but locked */
oldtup.t_data->t_ctid = oldtup.t_self;
@@ -3763,7 +4387,11 @@ l2:
*/
for (;;)
{
- if (newtupsize > pagefree)
+ /*
+ * We can't fit new tuple to "double xmax" page, since it's
+ * impossible to set xmin there.
+ */
+ if (newtupsize > pagefree || HeapPageIsDoubleXmax(page))
{
/* It doesn't fit, must use RelationGetBufferForTuple. */
newbuf = RelationGetBufferForTuple(relation, heaptup->t_len,
@@ -3796,6 +4424,10 @@ l2:
break;
}
}
+
+ /* Copy possibly updated xid base to old tuple after relocking */
+ HeapTupleCopyBaseFromPage(buffer, &oldtup, page,
+ IsToastRelation(relation));
}
else
{
@@ -3855,6 +4487,48 @@ l2:
id_has_external,
&old_key_copied);
+ newpage = BufferGetPage(newbuf);
+
+ /*
+ * Prepare pages for the current xid, that witten to the new tuple's Xmax
+ * and old page's pd_prune_xid.
+ */
+ heap_page_prepare_for_xid(relation, buffer, xid, false);
+ if (newbuf != buffer)
+ heap_page_prepare_for_xid(relation, newbuf, xid, false);
+
+#ifdef USE_ASSERT_CHECKING
+ if (IsToastRelation(relation))
+ {
+ Assert((infomask_old_tuple & HEAP_XMAX_IS_MULTI) == false);
+ Assert((heaptup->t_data->t_infomask & HEAP_XMAX_IS_MULTI) == false);
+ }
+#endif
+
+ /* Prepare pages for tuple's Xmax */
+ heap_page_prepare_for_xid(relation, buffer, xmax_old_tuple,
+ (infomask_old_tuple & HEAP_XMAX_IS_MULTI) ? true : false);
+ heap_page_prepare_for_xid(relation, newbuf, xmax_new_tuple,
+ (heaptup->t_data->t_infomask & HEAP_XMAX_IS_MULTI) ? true : false);
+
+ /* Copy possibly updated Xid bases to the both tuples. */
+ HeapTupleCopyBaseFromPage(buffer, &oldtup, page,
+ IsToastRelation(relation));
+
+ /*
+ * Set new tuple's Xmin/Xmax, old tuple's Xmin/Xmax were already shifted.
+ */
+ HeapTupleSetXmin(heaptup, xid);
+ if (IsToastRelation(relation))
+ ToastTupleHeaderSetXmin(newpage, heaptup);
+ else
+ HeapTupleHeaderSetXmin(newpage, heaptup);
+ HeapTupleSetXmax(heaptup, xmax_new_tuple);
+ if (IsToastRelation(relation))
+ ToastTupleHeaderSetXmax(newpage, heaptup);
+ else
+ HeapTupleHeaderSetXmax(newpage, heaptup);
+
/* NO EREPORT(ERROR) from here till changes are logged */
START_CRIT_SECTION();
@@ -3870,7 +4544,9 @@ l2:
* not to optimize for aborts. Note that heap_xlog_update must be kept in
* sync if this decision changes.
*/
- PageSetPrunable(page, xid);
+
+ Assert(!IsToastRelation(relation));
+ PageSetPrunable(page, xid, false);
if (use_hot_update)
{
@@ -3897,10 +4573,12 @@ l2:
oldtup.t_data->t_infomask2 &= ~HEAP_KEYS_UPDATED;
/* ... and store info about transaction updating this tuple */
Assert(TransactionIdIsValid(xmax_old_tuple));
- HeapTupleHeaderSetXmax(oldtup.t_data, xmax_old_tuple);
oldtup.t_data->t_infomask |= infomask_old_tuple;
oldtup.t_data->t_infomask2 |= infomask2_old_tuple;
+ HeapTupleSetXmax(&oldtup, xmax_old_tuple);
+ HeapTupleHeaderSetXmax(page, &oldtup);
HeapTupleHeaderSetCmax(oldtup.t_data, cid, iscombo);
+ HeapTupleCopyBaseFromPage(buffer, &oldtup, page, IsToastRelation(relation));
/* record address of new tuple in t_ctid of old one */
oldtup.t_data->t_ctid = heaptup->t_self;
@@ -3954,6 +4632,20 @@ l2:
END_CRIT_SECTION();
+ if (newtup != heaptup)
+ {
+ /*
+ * Set new tuple's Xmin/Xmax only after both xid base preparations.
+ * Old tuple's Xmin/Xmax were already shifted because old tuple is on
+ * the page.
+ */
+ HeapTupleCopyBase(newtup, heaptup);
+ HeapTupleSetXmin(newtup, xid);
+ HeapTupleHeaderSetXmin(newpage, newtup);
+ HeapTupleSetXmax(newtup, xmax_new_tuple);
+ HeapTupleHeaderSetXmax(newpage, newtup);
+ }
+
if (newbuf != buffer)
LockBuffer(newbuf, BUFFER_LOCK_UNLOCK);
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
@@ -4292,6 +4984,7 @@ heap_lock_tuple(Relation relation, HeapTuple tuple,
tuple->t_data = (HeapTupleHeader) PageGetItem(page, lp);
tuple->t_len = ItemIdGetLength(lp);
tuple->t_tableOid = RelationGetRelid(relation);
+ HeapTupleCopyBaseFromPage(*buffer, tuple, page, IsToastRelation(relation));
l3:
result = HeapTupleSatisfiesUpdate(tuple, cid, *buffer);
@@ -4318,7 +5011,7 @@ l3:
ItemPointerData t_ctid;
/* must copy state data before unlocking buffer */
- xwait = HeapTupleHeaderGetRawXmax(tuple->t_data);
+ xwait = HeapTupleGetRawXmax(tuple);
infomask = tuple->t_data->t_infomask;
infomask2 = tuple->t_data->t_infomask2;
ItemPointerCopy(&tuple->t_data->t_ctid, &t_ctid);
@@ -4476,11 +5169,15 @@ l3:
result = res;
/* recovery code expects to have buffer lock held */
LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
+ HeapTupleCopyBaseFromPage(*buffer, tuple, page,
+ IsToastRelation(relation));
goto failed;
}
}
LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
+ HeapTupleCopyBaseFromPage(*buffer, tuple, page,
+ IsToastRelation(relation));
/*
* Make sure it's still an appropriate lock, else start over.
@@ -4489,7 +5186,7 @@ l3:
* now need to follow the update chain to lock the new
* versions.
*/
- if (!HeapTupleHeaderIsOnlyLocked(tuple->t_data) &&
+ if (!HeapTupleIsOnlyLocked(tuple) &&
((tuple->t_data->t_infomask2 & HEAP_KEYS_UPDATED) ||
!updated))
goto l3;
@@ -4516,6 +5213,8 @@ l3:
!HEAP_XMAX_IS_EXCL_LOCKED(infomask))
{
LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
+ HeapTupleCopyBaseFromPage(*buffer, tuple, page,
+ IsToastRelation(relation));
/*
* Make sure it's still an appropriate lock, else start over.
@@ -4544,8 +5243,11 @@ l3:
* meantime, start over.
*/
LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
+ HeapTupleCopyBaseFromPage(*buffer, tuple, page,
+ IsToastRelation(relation));
+
if (xmax_infomask_changed(tuple->t_data->t_infomask, infomask) ||
- !TransactionIdEquals(HeapTupleHeaderGetRawXmax(tuple->t_data),
+ !TransactionIdEquals(HeapTupleGetRawXmax(tuple),
xwait))
goto l3;
@@ -4556,10 +5258,12 @@ l3:
else if (HEAP_XMAX_IS_KEYSHR_LOCKED(infomask))
{
LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
+ HeapTupleCopyBaseFromPage(*buffer, tuple, page,
+ IsToastRelation(relation));
/* if the xmax changed in the meantime, start over */
if (xmax_infomask_changed(tuple->t_data->t_infomask, infomask) ||
- !TransactionIdEquals(HeapTupleHeaderGetRawXmax(tuple->t_data),
+ !TransactionIdEquals(HeapTupleGetRawXmax(tuple),
xwait))
goto l3;
/* otherwise, we're good */
@@ -4584,8 +5288,11 @@ l3:
{
/* ... but if the xmax changed in the meantime, start over */
LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
+ HeapTupleCopyBaseFromPage(*buffer, tuple, page,
+ IsToastRelation(relation));
+
if (xmax_infomask_changed(tuple->t_data->t_infomask, infomask) ||
- !TransactionIdEquals(HeapTupleHeaderGetRawXmax(tuple->t_data),
+ !TransactionIdEquals(HeapTupleGetRawXmax(tuple),
xwait))
goto l3;
Assert(HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_data->t_infomask));
@@ -4606,6 +5313,8 @@ l3:
if (require_sleep && (result == TM_Updated || result == TM_Deleted))
{
LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
+ HeapTupleCopyBaseFromPage(*buffer, tuple, page,
+ IsToastRelation(relation));
goto failed;
}
else if (require_sleep)
@@ -4631,6 +5340,8 @@ l3:
result = TM_WouldBlock;
/* recovery code expects to have buffer lock held */
LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
+ HeapTupleCopyBaseFromPage(*buffer, tuple, page,
+ IsToastRelation(relation));
goto failed;
}
@@ -4657,6 +5368,8 @@ l3:
result = TM_WouldBlock;
/* recovery code expects to have buffer lock held */
LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
+ HeapTupleCopyBaseFromPage(*buffer, tuple, page,
+ IsToastRelation(relation));
goto failed;
}
break;
@@ -4697,6 +5410,8 @@ l3:
result = TM_WouldBlock;
/* recovery code expects to have buffer lock held */
LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
+ HeapTupleCopyBaseFromPage(*buffer, tuple, page,
+ IsToastRelation(relation));
goto failed;
}
break;
@@ -4723,11 +5438,15 @@ l3:
result = res;
/* recovery code expects to have buffer lock held */
LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
+ HeapTupleCopyBaseFromPage(*buffer, tuple, page,
+ IsToastRelation(relation));
goto failed;
}
}
LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
+ HeapTupleCopyBaseFromPage(*buffer, tuple, page,
+ IsToastRelation(relation));
/*
* xwait is done, but if xwait had just locked the tuple then some
@@ -4735,7 +5454,7 @@ l3:
* Check for xmax change, and start over if so.
*/
if (xmax_infomask_changed(tuple->t_data->t_infomask, infomask) ||
- !TransactionIdEquals(HeapTupleHeaderGetRawXmax(tuple->t_data),
+ !TransactionIdEquals(HeapTupleGetRawXmax(tuple),
xwait))
goto l3;
@@ -4763,7 +5482,7 @@ l3:
if (!require_sleep ||
(tuple->t_data->t_infomask & HEAP_XMAX_INVALID) ||
HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_data->t_infomask) ||
- HeapTupleHeaderIsOnlyLocked(tuple->t_data))
+ HeapTupleIsOnlyLocked(tuple))
result = TM_Ok;
else if (!ItemPointerEquals(&tuple->t_self, &tuple->t_data->t_ctid))
result = TM_Updated;
@@ -4789,9 +5508,9 @@ failed:
Assert(result != TM_Updated ||
!ItemPointerEquals(&tuple->t_self, &tuple->t_data->t_ctid));
tmfd->ctid = tuple->t_data->t_ctid;
- tmfd->xmax = HeapTupleHeaderGetUpdateXid(tuple->t_data);
+ tmfd->xmax = HeapTupleGetUpdateXidAny(tuple);
if (result == TM_SelfModified)
- tmfd->cmax = HeapTupleHeaderGetCmax(tuple->t_data);
+ tmfd->cmax = HeapTupleGetCmax(tuple);
else
tmfd->cmax = InvalidCommandId;
goto out_locked;
@@ -4811,10 +5530,12 @@ failed:
LockBuffer(*buffer, BUFFER_LOCK_UNLOCK);
visibilitymap_pin(relation, block, &vmbuffer);
LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
+ HeapTupleCopyBaseFromPage(*buffer, tuple, page,
+ IsToastRelation(relation));
goto l3;
}
- xmax = HeapTupleHeaderGetRawXmax(tuple->t_data);
+ xmax = HeapTupleGetRawXmax(tuple);
old_infomask = tuple->t_data->t_infomask;
/*
@@ -4836,6 +5557,15 @@ failed:
GetCurrentTransactionId(), mode, false,
&xid, &new_infomask, &new_infomask2);
+#ifdef USE_ASSERT_CHECKING
+ if (IsToastRelation(relation))
+ Assert((new_infomask & HEAP_XMAX_IS_MULTI) == false);
+#endif
+
+ heap_page_prepare_for_xid(relation, *buffer, xid,
+ (new_infomask & HEAP_XMAX_IS_MULTI) ? true : false);
+ HeapTupleCopyBaseFromPage(*buffer, tuple, page, IsToastRelation(relation));
+
START_CRIT_SECTION();
/*
@@ -4854,7 +5584,8 @@ failed:
tuple->t_data->t_infomask2 |= new_infomask2;
if (HEAP_XMAX_IS_LOCKED_ONLY(new_infomask))
HeapTupleHeaderClearHotUpdated(tuple->t_data);
- HeapTupleHeaderSetXmax(tuple->t_data, xid);
+ HeapTupleSetXmax(tuple, xid);
+ HeapTupleHeaderSetXmax(page, tuple);
/*
* Make sure there is no forward chain link in t_ctid. Note that in the
@@ -5448,12 +6179,19 @@ l4:
LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
}
+ /*
+ * Copy xid base after buffer relocking, it could have changed since
+ * heap_fetch().
+ */
+ HeapTupleCopyBaseFromPage(buf, &mytup, BufferGetPage(buf),
+ IsToastRelation(rel));
+
/*
* Check the tuple XMIN against prior XMAX, if any. If we reached the
* end of the chain, we're done, so return success.
*/
if (TransactionIdIsValid(priorXmax) &&
- !TransactionIdEquals(HeapTupleHeaderGetXmin(mytup.t_data),
+ !TransactionIdEquals(HeapTupleGetXmin(&mytup),
priorXmax))
{
result = TM_Ok;
@@ -5465,7 +6203,7 @@ l4:
* (sub)transaction, then we already locked the last live one in the
* chain, thus we're done, so return success.
*/
- if (TransactionIdDidAbort(HeapTupleHeaderGetXmin(mytup.t_data)))
+ if (TransactionIdDidAbort(HeapTupleGetXmin(&mytup)))
{
result = TM_Ok;
goto out_locked;
@@ -5473,7 +6211,7 @@ l4:
old_infomask = mytup.t_data->t_infomask;
old_infomask2 = mytup.t_data->t_infomask2;
- xmax = HeapTupleHeaderGetRawXmax(mytup.t_data);
+ xmax = HeapTupleGetRawXmax(&mytup);
/*
* If this tuple version has been updated or locked by some concurrent
@@ -5486,7 +6224,7 @@ l4:
TransactionId rawxmax;
bool needwait;
- rawxmax = HeapTupleHeaderGetRawXmax(mytup.t_data);
+ rawxmax = HeapTupleGetRawXmax(&mytup);
if (old_infomask & HEAP_XMAX_IS_MULTI)
{
int nmembers;
@@ -5627,14 +6365,25 @@ l4:
VISIBILITYMAP_ALL_FROZEN))
cleared_all_frozen = true;
+#ifdef USE_ASSERT_CHECKING
+ if (IsToastRelation(rel))
+ Assert((new_infomask & HEAP_XMAX_IS_MULTI) == false);
+#endif
+
+ heap_page_prepare_for_xid(rel, buf, new_xmax,
+ (new_infomask & HEAP_XMAX_IS_MULTI) ? true : false);
+ HeapTupleCopyBaseFromPage(buf, &mytup, BufferGetPage(buf),
+ IsToastRelation(rel));
+
START_CRIT_SECTION();
/* ... and set them */
- HeapTupleHeaderSetXmax(mytup.t_data, new_xmax);
mytup.t_data->t_infomask &= ~HEAP_XMAX_BITS;
mytup.t_data->t_infomask2 &= ~HEAP_KEYS_UPDATED;
mytup.t_data->t_infomask |= new_infomask;
mytup.t_data->t_infomask2 |= new_infomask2;
+ HeapTupleSetXmax(&mytup, new_xmax);
+ HeapTupleHeaderSetXmax(BufferGetPage(buf), &mytup);
MarkBufferDirty(buf);
@@ -5668,14 +6417,14 @@ next:
if (mytup.t_data->t_infomask & HEAP_XMAX_INVALID ||
HeapTupleHeaderIndicatesMovedPartitions(mytup.t_data) ||
ItemPointerEquals(&mytup.t_self, &mytup.t_data->t_ctid) ||
- HeapTupleHeaderIsOnlyLocked(mytup.t_data))
+ HeapTupleIsOnlyLocked(&mytup))
{
result = TM_Ok;
goto out_locked;
}
/* tail recursion */
- priorXmax = HeapTupleHeaderGetUpdateXid(mytup.t_data);
+ priorXmax = HeapTupleGetUpdateXidAny(&mytup);
ItemPointerCopy(&(mytup.t_data->t_ctid), &tupid);
UnlockReleaseBuffer(buf);
}
@@ -5882,12 +6631,13 @@ heap_abort_speculative(Relation relation, ItemPointer tid)
tp.t_data = (HeapTupleHeader) PageGetItem(page, lp);
tp.t_len = ItemIdGetLength(lp);
tp.t_self = *tid;
+ HeapTupleCopyBaseFromPage(buffer, &tp, page, IsToastRelation(relation));
/*
* Sanity check that the tuple really is a speculatively inserted tuple,
* inserted by us.
*/
- if (tp.t_data->t_choice.t_heap.t_xmin != xid)
+ if (HeapTupleGetRawXmin(&tp) != xid)
elog(ERROR, "attempted to kill a tuple inserted by another transaction");
if (!(IsToastRelation(relation) || HeapTupleHeaderIsSpeculative(tp.t_data)))
elog(ERROR, "attempted to kill a non-speculative tuple");
@@ -5916,7 +6666,9 @@ heap_abort_speculative(Relation relation, ItemPointer tid)
prune_xid = relation->rd_rel->relfrozenxid;
else
prune_xid = TransactionXmin;
- PageSetPrunable(page, prune_xid);
+ Assert(TransactionIdIsValid(prune_xid));
+ heap_page_prepare_for_xid(relation, buffer, prune_xid, false);
+ PageSetPrunable(page, prune_xid, IsToastRelation(relation));
/* store transaction information of xact deleting the tuple */
tp.t_data->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
@@ -5925,9 +6677,15 @@ heap_abort_speculative(Relation relation, ItemPointer tid)
/*
* Set the tuple header xmin to InvalidTransactionId. This makes the
* tuple immediately invisible everyone. (In particular, to any
- * transactions waiting on the speculative token, woken up later.)
+ * transactions waiting on the speculative token, woken up later.) Don't
+ * need to reload xid base from page because InvalidTransactionId doesn't
+ * require xid base to be valid.
*/
- HeapTupleHeaderSetXmin(tp.t_data, InvalidTransactionId);
+ HeapTupleSetXmin(&tp, InvalidTransactionId);
+ if (IsToastRelation(relation))
+ ToastTupleHeaderSetXmin(page, &tp);
+ else
+ HeapTupleHeaderSetXmin(page, &tp);
/* Clear the speculative insertion token too */
tp.t_data->t_ctid = tp.t_self;
@@ -5946,6 +6704,8 @@ heap_abort_speculative(Relation relation, ItemPointer tid)
XLogRecPtr recptr;
xlrec.flags = XLH_DELETE_IS_SUPER;
+ if (IsToastRelation(relation))
+ xlrec.flags |= XLH_DELETE_PAGE_ON_TOAST_RELATION;
xlrec.infobits_set = compute_infobits(tp.t_data->t_infomask,
tp.t_data->t_infomask2);
xlrec.offnum = ItemPointerGetOffsetNumber(&tp.t_self);
@@ -6275,7 +7035,7 @@ FreezeMultiXactId(MultiXactId multi, uint16 t_infomask,
* individual members might even show that we don't need to keep anything.
*/
nnewmembers = 0;
- newmembers = palloc(sizeof(MultiXactMember) * nmembers);
+ newmembers = palloc0(sizeof(MultiXactMember) * nmembers);
has_lockers = false;
update_xid = InvalidTransactionId;
update_committed = false;
@@ -6475,7 +7235,7 @@ FreezeMultiXactId(MultiXactId multi, uint16 t_infomask,
* The *frz WAL record we output completely removes all old XIDs during REDO.
*/
bool
-heap_prepare_freeze_tuple(HeapTupleHeader tuple,
+heap_prepare_freeze_tuple(HeapTuple htup,
TransactionId relfrozenxid, TransactionId relminmxid,
TransactionId cutoff_xid, TransactionId cutoff_multi,
xl_heap_freeze_tuple *frz, bool *totally_frozen,
@@ -6487,11 +7247,12 @@ heap_prepare_freeze_tuple(HeapTupleHeader tuple,
bool xmin_frozen;
bool freeze_xmax;
TransactionId xid;
+ HeapTupleHeader tuple = htup->t_data;
frz->frzflags = 0;
frz->t_infomask2 = tuple->t_infomask2;
frz->t_infomask = tuple->t_infomask;
- frz->xmax = HeapTupleHeaderGetRawXmax(tuple);
+ frz->xmax = HeapTupleGetRawXmax(htup);
/*
* Process xmin. xmin_frozen has two slightly different meanings: in the
@@ -6503,7 +7264,7 @@ heap_prepare_freeze_tuple(HeapTupleHeader tuple,
* handling, since either way the tuple's xmin will be a permanent value
* once we're done with it.
*/
- xid = HeapTupleHeaderGetXmin(tuple);
+ xid = HeapTupleGetXmin(htup);
if (!TransactionIdIsNormal(xid))
xmin_frozen = true;
else
@@ -6545,7 +7306,7 @@ heap_prepare_freeze_tuple(HeapTupleHeader tuple,
*
* Make sure to keep heap_tuple_would_freeze in sync with this.
*/
- xid = HeapTupleHeaderGetRawXmax(tuple);
+ xid = HeapTupleGetRawXmax(htup);
if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
{
@@ -6645,6 +7406,15 @@ heap_prepare_freeze_tuple(HeapTupleHeader tuple,
Assert(!TransactionIdIsValid(newxmax));
}
}
+ else if ((tuple->t_infomask & HEAP_XMAX_INVALID) &&
+ TransactionIdIsNormal(xid))
+ {
+ /*
+ * To reset xmax without reading clog.
+ * This prevent excess growth of xmax.
+ */
+ freeze_xmax = true;
+ }
else if (TransactionIdIsNormal(xid))
{
if (TransactionIdPrecedes(xid, relfrozenxid))
@@ -6679,7 +7449,7 @@ heap_prepare_freeze_tuple(HeapTupleHeader tuple,
}
}
else if ((tuple->t_infomask & HEAP_XMAX_INVALID) ||
- !TransactionIdIsValid(HeapTupleHeaderGetRawXmax(tuple)))
+ !TransactionIdIsValid(HeapTupleGetRawXmax(htup)))
{
freeze_xmax = false;
xmax_already_frozen = true;
@@ -6775,18 +7545,35 @@ heap_prepare_freeze_tuple(HeapTupleHeader tuple,
* NB: All code in here must be safe to execute during crash recovery!
*/
void
-heap_execute_freeze_tuple(HeapTupleHeader tuple, xl_heap_freeze_tuple *frz)
+heap_execute_freeze_tuple(HeapTuple htup, xl_heap_freeze_tuple *frz)
{
- HeapTupleHeaderSetXmax(tuple, frz->xmax);
+ HeapTupleHeader tuple = htup->t_data;
+
+ tuple->t_infomask = frz->t_infomask;
+ tuple->t_infomask2 = frz->t_infomask2;
+
+ HeapTupleSetXmax(htup, frz->xmax);
if (frz->frzflags & XLH_FREEZE_XVAC)
HeapTupleHeaderSetXvac(tuple, FrozenTransactionId);
if (frz->frzflags & XLH_INVALID_XVAC)
HeapTupleHeaderSetXvac(tuple, InvalidTransactionId);
+}
- tuple->t_infomask = frz->t_infomask;
- tuple->t_infomask2 = frz->t_infomask2;
+void
+heap_execute_freeze_tuple_page(Page page, HeapTupleHeader htup,
+ xl_heap_freeze_tuple *frz, bool is_toast)
+{
+ HeapTupleData tuple;
+
+ tuple.t_data = htup;
+ heap_execute_freeze_tuple(&tuple, frz);
+
+ if (is_toast)
+ ToastTupleHeaderSetXmax(page, &tuple);
+ else
+ HeapTupleHeaderSetXmax(page, &tuple);
}
/*
@@ -6796,7 +7583,7 @@ heap_execute_freeze_tuple(HeapTupleHeader tuple, xl_heap_freeze_tuple *frz)
* Useful for callers like CLUSTER that perform their own WAL logging.
*/
bool
-heap_freeze_tuple(HeapTupleHeader tuple,
+heap_freeze_tuple(HeapTuple tuple,
TransactionId relfrozenxid, TransactionId relminmxid,
TransactionId cutoff_xid, TransactionId cutoff_multi)
{
@@ -6963,10 +7750,10 @@ MultiXactIdGetUpdateXid(TransactionId xmax, uint16 t_infomask)
* checking the hint bits.
*/
TransactionId
-HeapTupleGetUpdateXid(HeapTupleHeader tuple)
+HeapTupleGetUpdateXid(HeapTuple tuple)
{
- return MultiXactIdGetUpdateXid(HeapTupleHeaderGetRawXmax(tuple),
- tuple->t_infomask);
+ return MultiXactIdGetUpdateXid(HeapTupleGetRawXmax(tuple),
+ tuple->t_data->t_infomask);
}
/*
@@ -7192,15 +7979,18 @@ ConditionalMultiXactIdWait(MultiXactId multi, MultiXactStatus status,
* will eventually require freezing (if tuple isn't removed by pruning first).
*/
bool
-heap_tuple_needs_eventual_freeze(HeapTupleHeader tuple)
+heap_tuple_needs_eventual_freeze(HeapTuple htup)
{
TransactionId xid;
+ HeapTupleHeader tuple;
+
+ tuple = htup->t_data;
/*
* If xmin is a normal transaction ID, this tuple is definitely not
* frozen.
*/
- xid = HeapTupleHeaderGetXmin(tuple);
+ xid = HeapTupleGetXmin(htup);
if (TransactionIdIsNormal(xid))
return true;
@@ -7211,13 +8001,13 @@ heap_tuple_needs_eventual_freeze(HeapTupleHeader tuple)
{
MultiXactId multi;
- multi = HeapTupleHeaderGetRawXmax(tuple);
+ multi = HeapTupleGetRawXmax(htup);
if (MultiXactIdIsValid(multi))
return true;
}
else
{
- xid = HeapTupleHeaderGetRawXmax(tuple);
+ xid = HeapTupleGetRawXmax(htup);
if (TransactionIdIsNormal(xid))
return true;
}
@@ -7245,7 +8035,7 @@ heap_tuple_needs_eventual_freeze(HeapTupleHeader tuple)
* never freeze here, which makes tracking the oldest extant XID/MXID simple.
*/
bool
-heap_tuple_would_freeze(HeapTupleHeader tuple, TransactionId cutoff_xid,
+heap_tuple_would_freeze(HeapTuple htup, TransactionId cutoff_xid,
MultiXactId cutoff_multi,
TransactionId *relfrozenxid_out,
MultiXactId *relminmxid_out)
@@ -7253,9 +8043,10 @@ heap_tuple_would_freeze(HeapTupleHeader tuple, TransactionId cutoff_xid,
TransactionId xid;
MultiXactId multi;
bool would_freeze = false;
+ HeapTupleHeader tuple = htup->t_data;
/* First deal with xmin */
- xid = HeapTupleHeaderGetXmin(tuple);
+ xid = HeapTupleGetXmin(htup);
if (TransactionIdIsNormal(xid))
{
if (TransactionIdPrecedes(xid, *relfrozenxid_out))
@@ -7268,9 +8059,9 @@ heap_tuple_would_freeze(HeapTupleHeader tuple, TransactionId cutoff_xid,
xid = InvalidTransactionId;
multi = InvalidMultiXactId;
if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
- multi = HeapTupleHeaderGetRawXmax(tuple);
+ multi = HeapTupleGetRawXmax(htup);
else
- xid = HeapTupleHeaderGetRawXmax(tuple);
+ xid = HeapTupleGetRawXmax(htup);
if (TransactionIdIsNormal(xid))
{
@@ -7343,14 +8134,14 @@ heap_tuple_would_freeze(HeapTupleHeader tuple, TransactionId cutoff_xid,
* with queries.
*/
void
-HeapTupleHeaderAdvanceLatestRemovedXid(HeapTupleHeader tuple,
+HeapTupleHeaderAdvanceLatestRemovedXid(HeapTuple tuple,
TransactionId *latestRemovedXid)
{
- TransactionId xmin = HeapTupleHeaderGetXmin(tuple);
- TransactionId xmax = HeapTupleHeaderGetUpdateXid(tuple);
- TransactionId xvac = HeapTupleHeaderGetXvac(tuple);
+ TransactionId xmin = HeapTupleGetXmin(tuple);
+ TransactionId xmax = HeapTupleGetUpdateXidAny(tuple);
+ TransactionId xvac = HeapTupleHeaderGetXvac(tuple->t_data);
- if (tuple->t_infomask & HEAP_MOVED)
+ if (tuple->t_data->t_infomask & HEAP_MOVED)
{
if (TransactionIdPrecedes(*latestRemovedXid, xvac))
*latestRemovedXid = xvac;
@@ -7362,8 +8153,8 @@ HeapTupleHeaderAdvanceLatestRemovedXid(HeapTupleHeader tuple,
*
* Look for a committed hint bit, or if no xmin bit is set, check clog.
*/
- if (HeapTupleHeaderXminCommitted(tuple) ||
- (!HeapTupleHeaderXminInvalid(tuple) && TransactionIdDidCommit(xmin)))
+ if (HeapTupleHeaderXminCommitted(tuple->t_data) ||
+ (!HeapTupleHeaderXminInvalid(tuple->t_data) && TransactionIdDidCommit(xmin)))
{
if (xmax != xmin &&
TransactionIdFollows(xmax, *latestRemovedXid))
@@ -7713,7 +8504,7 @@ heap_index_delete_tuples(Relation rel, TM_IndexDeleteOp *delstate)
for (;;)
{
ItemId lp;
- HeapTupleHeader htup;
+ HeapTupleData htup;
/* Sanity check (pure paranoia) */
if (offnum < FirstOffsetNumber)
@@ -7750,16 +8541,18 @@ heap_index_delete_tuples(Relation rel, TM_IndexDeleteOp *delstate)
if (!ItemIdIsNormal(lp))
break;
- htup = (HeapTupleHeader) PageGetItem(page, lp);
+ htup.t_data = (HeapTupleHeader) PageGetItem(page, lp);
+ htup.t_len = ItemIdGetLength(lp);
+ HeapTupleCopyBaseFromPage(buf, &htup, page, IsToastRelation(rel));
/*
* Check the tuple XMIN against prior XMAX, if any
*/
if (TransactionIdIsValid(priorXmax) &&
- !TransactionIdEquals(HeapTupleHeaderGetXmin(htup), priorXmax))
+ !TransactionIdEquals(HeapTupleGetXmin(&htup), priorXmax))
break;
- HeapTupleHeaderAdvanceLatestRemovedXid(htup, &latestRemovedXid);
+ HeapTupleHeaderAdvanceLatestRemovedXid(&htup, &latestRemovedXid);
/*
* If the tuple is not HOT-updated, then we are at the end of this
@@ -7767,13 +8560,13 @@ heap_index_delete_tuples(Relation rel, TM_IndexDeleteOp *delstate)
* chain (they get their own index entries) -- just move on to
* next htid from index AM caller.
*/
- if (!HeapTupleHeaderIsHotUpdated(htup))
+ if (!HeapTupleHeaderIsHotUpdated(htup.t_data))
break;
/* Advance to next HOT chain member */
- Assert(ItemPointerGetBlockNumber(&htup->t_ctid) == blkno);
- offnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
- priorXmax = HeapTupleHeaderGetUpdateXid(htup);
+ Assert(ItemPointerGetBlockNumber(&htup.t_data->t_ctid) == blkno);
+ offnum = ItemPointerGetOffsetNumber(&htup.t_data->t_ctid);
+ priorXmax = HeapTupleGetUpdateXidAny(&htup);
}
/* Enable further/final shrinking of deltids for caller */
@@ -8179,6 +8972,9 @@ log_heap_freeze(Relation reln, Buffer buffer, TransactionId cutoff_xid,
xlrec.cutoff_xid = cutoff_xid;
xlrec.ntuples = ntuples;
+ xlrec.flags = 0;
+ if (IsToastRelation(reln))
+ xlrec.flags |= XLH_FREEZE_PAGE_ON_TOAST_RELATION;
XLogBeginInsert();
XLogRegisterData((char *) &xlrec, SizeOfHeapFreezePage);
@@ -8353,13 +9149,13 @@ log_heap_update(Relation reln, Buffer oldbuf,
/* Prepare WAL data for the old page */
xlrec.old_offnum = ItemPointerGetOffsetNumber(&oldtup->t_self);
- xlrec.old_xmax = HeapTupleHeaderGetRawXmax(oldtup->t_data);
+ xlrec.old_xmax = HeapTupleGetRawXmax(oldtup);
xlrec.old_infobits_set = compute_infobits(oldtup->t_data->t_infomask,
oldtup->t_data->t_infomask2);
/* Prepare WAL data for the new page */
xlrec.new_offnum = ItemPointerGetOffsetNumber(&newtup->t_self);
- xlrec.new_xmax = HeapTupleHeaderGetRawXmax(newtup->t_data);
+ xlrec.new_xmax = HeapTupleGetRawXmax(newtup);
bufflags = REGBUF_STANDARD;
if (init)
@@ -8371,6 +9167,10 @@ log_heap_update(Relation reln, Buffer oldbuf,
if (oldbuf != newbuf)
XLogRegisterBuffer(1, oldbuf, REGBUF_STANDARD);
+ if (info & XLOG_HEAP_INIT_PAGE)
+ XLogRegisterData((char *) &HeapPageGetSpecial(page)->pd_xid_base,
+ sizeof(TransactionId));
+
XLogRegisterData((char *) &xlrec, SizeOfHeapUpdate);
/*
@@ -8483,8 +9283,8 @@ log_heap_new_cid(Relation relation, HeapTuple tup)
{
Assert(!(hdr->t_infomask & HEAP_XMAX_INVALID));
Assert(!HeapTupleHeaderXminInvalid(hdr));
- xlrec.cmin = HeapTupleHeaderGetCmin(hdr);
- xlrec.cmax = HeapTupleHeaderGetCmax(hdr);
+ xlrec.cmin = HeapTupleGetCmin(tup);
+ xlrec.cmax = HeapTupleGetCmax(tup);
xlrec.combocid = HeapTupleHeaderGetRawCommandId(hdr);
}
/* No combo CID, so only cmin or cmax can be set by this TX */
@@ -8686,7 +9486,9 @@ heap_xlog_prune(XLogReaderState *record)
heap_page_prune_execute(buffer,
redirected, nredirected,
nowdead, ndead,
- nowunused, nunused);
+ nowunused, nunused,
+ xlrec->flags & XLH_PRUNE_REPAIR_FRAGMENTATION,
+ xlrec->flags & XLH_PRUNE_ON_TOAST_RELATION);
/*
* Note: we don't worry about updating the page's prunability hints.
@@ -8978,7 +9780,8 @@ heap_xlog_freeze_page(XLogReaderState *record)
lp = PageGetItemId(page, xlrec_tp->offset); /* offsets are one-based */
tuple = (HeapTupleHeader) PageGetItem(page, lp);
- heap_execute_freeze_tuple(tuple, xlrec_tp);
+ heap_execute_freeze_tuple_page(page, tuple, xlrec_tp,
+ xlrec->flags & XLH_FREEZE_PAGE_ON_TOAST_RELATION);
}
PageSetLSN(page, lsn);
@@ -9049,6 +9852,8 @@ heap_xlog_delete(XLogReaderState *record)
if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
{
+ HeapTupleData tuple;
+
page = BufferGetPage(buffer);
if (PageGetMaxOffsetNumber(page) >= xlrec->offnum)
@@ -9064,14 +9869,29 @@ heap_xlog_delete(XLogReaderState *record)
HeapTupleHeaderClearHotUpdated(htup);
fix_infomask_from_infobits(xlrec->infobits_set,
&htup->t_infomask, &htup->t_infomask2);
+ tuple.t_data = htup;
+
if (!(xlrec->flags & XLH_DELETE_IS_SUPER))
- HeapTupleHeaderSetXmax(htup, xlrec->xmax);
+ {
+ HeapTupleSetXmax(&tuple, xlrec->xmax);
+ if (xlrec->flags & XLH_DELETE_PAGE_ON_TOAST_RELATION)
+ ToastTupleHeaderSetXmax(page, &tuple);
+ else
+ HeapTupleHeaderSetXmax(page, &tuple);
+ }
else
- HeapTupleHeaderSetXmin(htup, InvalidTransactionId);
+ {
+ HeapTupleSetXmin(&tuple, InvalidTransactionId);
+ if (xlrec->flags & XLH_DELETE_PAGE_ON_TOAST_RELATION)
+ ToastTupleHeaderSetXmin(page, &tuple);
+ else
+ HeapTupleHeaderSetXmin(page, &tuple);
+ }
HeapTupleHeaderSetCmax(htup, FirstCommandId, false);
/* Mark the page as a candidate for pruning */
- PageSetPrunable(page, XLogRecGetXid(record));
+ PageSetPrunable(page, XLogRecGetXid(record),
+ xlrec->flags & XLH_DELETE_PAGE_ON_TOAST_RELATION);
if (xlrec->flags & XLH_DELETE_ALL_VISIBLE_CLEARED)
PageClearAllVisible(page);
@@ -9092,7 +9912,7 @@ static void
heap_xlog_insert(XLogReaderState *record)
{
XLogRecPtr lsn = record->EndRecPtr;
- xl_heap_insert *xlrec = (xl_heap_insert *) XLogRecGetData(record);
+ xl_heap_insert *xlrec;
Buffer buffer;
Page page;
union
@@ -9108,6 +9928,17 @@ heap_xlog_insert(XLogReaderState *record)
BlockNumber blkno;
ItemPointerData target_tid;
XLogRedoAction action;
+ bool isinit = (XLogRecGetInfo(record) & XLOG_HEAP_INIT_PAGE) != 0;
+ TransactionId pd_xid_base = InvalidTransactionId;
+ Pointer rec_data = (Pointer) XLogRecGetData(record);
+
+ if (isinit)
+ {
+ pd_xid_base = *((TransactionId *) rec_data);
+ rec_data += sizeof(TransactionId);
+ }
+
+ xlrec = (xl_heap_insert *) rec_data;
XLogRecGetBlockTag(record, 0, &target_locator, NULL, &blkno);
ItemPointerSetBlockNumber(&target_tid, blkno);
@@ -9132,11 +9963,24 @@ heap_xlog_insert(XLogReaderState *record)
* If we inserted the first and only tuple on the page, re-initialize the
* page from scratch.
*/
- if (XLogRecGetInfo(record) & XLOG_HEAP_INIT_PAGE)
+ if (isinit)
{
buffer = XLogInitBufferForRedo(record, 0);
page = BufferGetPage(buffer);
- PageInit(page, BufferGetPageSize(buffer), 0);
+
+ if (xlrec->flags & XLH_INSERT_ON_TOAST_RELATION)
+ {
+ PageInit(page, BufferGetPageSize(buffer),
+ sizeof(ToastPageSpecialData));
+ ToastPageGetSpecial(page)->pd_xid_base = pd_xid_base;
+ }
+ else
+ {
+ PageInit(page, BufferGetPageSize(buffer),
+ sizeof(HeapPageSpecialData));
+ HeapPageGetSpecial(page)->pd_xid_base = pd_xid_base;
+ }
+
action = BLK_NEEDS_REDO;
}
else
@@ -9145,6 +9989,7 @@ heap_xlog_insert(XLogReaderState *record)
{
Size datalen;
char *data;
+ HeapTupleData tuple;
page = BufferGetPage(buffer);
@@ -9168,7 +10013,12 @@ heap_xlog_insert(XLogReaderState *record)
htup->t_infomask2 = xlhdr.t_infomask2;
htup->t_infomask = xlhdr.t_infomask;
htup->t_hoff = xlhdr.t_hoff;
- HeapTupleHeaderSetXmin(htup, XLogRecGetXid(record));
+ tuple.t_data = htup;
+ HeapTupleSetXmin(&tuple, XLogRecGetXid(record));
+ if (xlrec->flags & XLH_INSERT_ON_TOAST_RELATION)
+ ToastTupleHeaderSetXmin(page, &tuple);
+ else
+ HeapTupleHeaderSetXmin(page, &tuple);
HeapTupleHeaderSetCmin(htup, FirstCommandId);
htup->t_ctid = target_tid;
@@ -9189,6 +10039,7 @@ heap_xlog_insert(XLogReaderState *record)
MarkBufferDirty(buffer);
}
+
if (BufferIsValid(buffer))
UnlockReleaseBuffer(buffer);
@@ -9228,12 +10079,19 @@ heap_xlog_multi_insert(XLogReaderState *record)
int i;
bool isinit = (XLogRecGetInfo(record) & XLOG_HEAP_INIT_PAGE) != 0;
XLogRedoAction action;
+ TransactionId pd_xid_base = InvalidTransactionId;
+ Pointer rec_data = (Pointer) XLogRecGetData(record);
/*
* Insertion doesn't overwrite MVCC data, so no conflict processing is
* required.
*/
- xlrec = (xl_heap_multi_insert *) XLogRecGetData(record);
+ if (isinit)
+ {
+ pd_xid_base = *((TransactionId *) rec_data);
+ rec_data += sizeof(TransactionId);
+ }
+ xlrec = (xl_heap_multi_insert *) rec_data;
XLogRecGetBlockTag(record, 0, &rlocator, NULL, &blkno);
@@ -9260,7 +10118,18 @@ heap_xlog_multi_insert(XLogReaderState *record)
{
buffer = XLogInitBufferForRedo(record, 0);
page = BufferGetPage(buffer);
- PageInit(page, BufferGetPageSize(buffer), 0);
+
+ if (xlrec->flags & XLH_INSERT_ON_TOAST_RELATION)
+ {
+ PageInit(page, BufferGetPageSize(buffer), sizeof(ToastPageSpecialData));
+ ToastPageGetSpecial(page)->pd_xid_base = pd_xid_base;
+ }
+ else
+ {
+ PageInit(page, BufferGetPageSize(buffer), sizeof(HeapPageSpecialData));
+ HeapPageGetSpecial(page)->pd_xid_base = pd_xid_base;
+ }
+
action = BLK_NEEDS_REDO;
}
else
@@ -9281,6 +10150,7 @@ heap_xlog_multi_insert(XLogReaderState *record)
{
OffsetNumber offnum;
xl_multi_insert_tuple *xlhdr;
+ HeapTupleData tuple;
/*
* If we're reinitializing the page, the tuples are stored in
@@ -9311,7 +10181,9 @@ heap_xlog_multi_insert(XLogReaderState *record)
htup->t_infomask2 = xlhdr->t_infomask2;
htup->t_infomask = xlhdr->t_infomask;
htup->t_hoff = xlhdr->t_hoff;
- HeapTupleHeaderSetXmin(htup, XLogRecGetXid(record));
+ tuple.t_data = htup;
+ HeapTupleSetXmin(&tuple, XLogRecGetXid(record));
+ HeapTupleHeaderSetXmin(page, &tuple);
HeapTupleHeaderSetCmin(htup, FirstCommandId);
ItemPointerSetBlockNumber(&htup->t_ctid, blkno);
ItemPointerSetOffsetNumber(&htup->t_ctid, offnum);
@@ -9359,8 +10231,8 @@ static void
heap_xlog_update(XLogReaderState *record, bool hot_update)
{
XLogRecPtr lsn = record->EndRecPtr;
- xl_heap_update *xlrec = (xl_heap_update *) XLogRecGetData(record);
RelFileLocator rlocator;
+ xl_heap_update *xlrec;
BlockNumber oldblk;
BlockNumber newblk;
ItemPointerData newtid;
@@ -9384,6 +10256,17 @@ heap_xlog_update(XLogReaderState *record, bool hot_update)
Size freespace = 0;
XLogRedoAction oldaction;
XLogRedoAction newaction;
+ bool isinit = (XLogRecGetInfo(record) & XLOG_HEAP_INIT_PAGE) != 0;
+ TransactionId pd_xid_base = InvalidTransactionId;
+ Pointer rec_data = (Pointer) XLogRecGetData(record);
+
+ if (isinit)
+ {
+ pd_xid_base = *((TransactionId *) rec_data);
+ rec_data += sizeof(TransactionId);
+ }
+
+ xlrec = (xl_heap_update *) rec_data;
/* initialize to keep the compiler quiet */
oldtup.t_data = NULL;
@@ -9430,6 +10313,8 @@ heap_xlog_update(XLogReaderState *record, bool hot_update)
&obuffer);
if (oldaction == BLK_NEEDS_REDO)
{
+ HeapTupleData tuple;
+
page = BufferGetPage(obuffer);
offnum = xlrec->old_offnum;
if (PageGetMaxOffsetNumber(page) >= offnum)
@@ -9442,6 +10327,8 @@ heap_xlog_update(XLogReaderState *record, bool hot_update)
oldtup.t_data = htup;
oldtup.t_len = ItemIdGetLength(lp);
+ /* Toast tuples are never updated. */
+ HeapTupleCopyBaseFromPage(obuffer, &oldtup, page, false);
htup->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
htup->t_infomask2 &= ~HEAP_KEYS_UPDATED;
@@ -9451,13 +10338,16 @@ heap_xlog_update(XLogReaderState *record, bool hot_update)
HeapTupleHeaderClearHotUpdated(htup);
fix_infomask_from_infobits(xlrec->old_infobits_set, &htup->t_infomask,
&htup->t_infomask2);
- HeapTupleHeaderSetXmax(htup, xlrec->old_xmax);
+ tuple.t_data = htup;
+ HeapTupleSetXmax(&tuple, xlrec->old_xmax);
+ HeapTupleHeaderSetXmax(page, &tuple);
HeapTupleHeaderSetCmax(htup, FirstCommandId, false);
/* Set forward chain link in t_ctid */
htup->t_ctid = newtid;
/* Mark the page as a candidate for pruning */
- PageSetPrunable(page, XLogRecGetXid(record));
+ /* Toast tuples are never updated. */
+ PageSetPrunable(page, XLogRecGetXid(record), false);
if (xlrec->flags & XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED)
PageClearAllVisible(page);
@@ -9474,11 +10364,15 @@ heap_xlog_update(XLogReaderState *record, bool hot_update)
nbuffer = obuffer;
newaction = oldaction;
}
- else if (XLogRecGetInfo(record) & XLOG_HEAP_INIT_PAGE)
+ else if (isinit)
{
nbuffer = XLogInitBufferForRedo(record, 0);
page = (Page) BufferGetPage(nbuffer);
- PageInit(page, BufferGetPageSize(nbuffer), 0);
+
+ /* Toast tuples are never updated. */
+ PageInit(page, BufferGetPageSize(nbuffer), sizeof(HeapPageSpecialData));
+
+ HeapPageGetSpecial(page)->pd_xid_base = pd_xid_base;
newaction = BLK_NEEDS_REDO;
}
else
@@ -9506,6 +10400,7 @@ heap_xlog_update(XLogReaderState *record, bool hot_update)
char *recdata_end;
Size datalen;
Size tuplen;
+ HeapTupleData tuple;
recdata = XLogRecGetBlockData(record, 0, &datalen);
recdata_end = recdata + datalen;
@@ -9584,9 +10479,12 @@ heap_xlog_update(XLogReaderState *record, bool hot_update)
htup->t_infomask = xlhdr.t_infomask;
htup->t_hoff = xlhdr.t_hoff;
- HeapTupleHeaderSetXmin(htup, XLogRecGetXid(record));
+ tuple.t_data = htup;
+ HeapTupleSetXmin(&tuple, XLogRecGetXid(record));
+ HeapTupleHeaderSetXmin(page, &tuple);
HeapTupleHeaderSetCmin(htup, FirstCommandId);
- HeapTupleHeaderSetXmax(htup, xlrec->new_xmax);
+ HeapTupleSetXmax(&tuple, xlrec->new_xmax);
+ HeapTupleHeaderSetXmax(page, &tuple);
/* Make sure there is no forward chain link in t_ctid */
htup->t_ctid = newtid;
@@ -9697,6 +10595,8 @@ heap_xlog_lock(XLogReaderState *record)
if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
{
+ HeapTupleData tuple;
+
page = (Page) BufferGetPage(buffer);
offnum = xlrec->offnum;
@@ -9725,7 +10625,10 @@ heap_xlog_lock(XLogReaderState *record)
BufferGetBlockNumber(buffer),
offnum);
}
- HeapTupleHeaderSetXmax(htup, xlrec->locking_xid);
+
+ tuple.t_data = htup;
+ HeapTupleSetXmax(&tuple, xlrec->locking_xid);
+ HeapTupleHeaderSetXmax(page, &tuple);
HeapTupleHeaderSetCmax(htup, FirstCommandId, false);
PageSetLSN(page, lsn);
MarkBufferDirty(buffer);
@@ -9770,6 +10673,8 @@ heap_xlog_lock_updated(XLogReaderState *record)
if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
{
+ HeapTupleData tuple;
+
page = BufferGetPage(buffer);
offnum = xlrec->offnum;
@@ -9785,7 +10690,9 @@ heap_xlog_lock_updated(XLogReaderState *record)
htup->t_infomask2 &= ~HEAP_KEYS_UPDATED;
fix_infomask_from_infobits(xlrec->infobits_set, &htup->t_infomask,
&htup->t_infomask2);
- HeapTupleHeaderSetXmax(htup, xlrec->xmax);
+ tuple.t_data = htup;
+ HeapTupleSetXmax(&tuple, xlrec->xmax);
+ HeapTupleHeaderSetXmax(page, &tuple);
PageSetLSN(page, lsn);
MarkBufferDirty(buffer);
@@ -9835,6 +10742,33 @@ heap_xlog_inplace(XLogReaderState *record)
UnlockReleaseBuffer(buffer);
}
+static void
+heap_xlog_base_shift(XLogReaderState *record)
+{
+ XLogRecPtr lsn = record->EndRecPtr;
+ xl_heap_base_shift *xlrec = (xl_heap_base_shift *) XLogRecGetData(record);
+ Buffer buffer;
+ Page page;
+ BlockNumber blkno;
+ RelFileLocator target_node;
+
+ XLogRecGetBlockTag(record, 0, &target_node, NULL, &blkno);
+
+ if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
+ {
+ page = BufferGetPage(buffer);
+ heap_page_shift_base(NULL, InvalidBuffer, page, xlrec->multi,
+ xlrec->delta,
+ xlrec->flags & XLH_BASE_SHIFT_ON_TOAST_RELATION);
+ PageSetLSN(page, lsn);
+ MarkBufferDirty(buffer);
+ }
+
+ if (BufferIsValid(buffer))
+ UnlockReleaseBuffer(buffer);
+}
+
+
void
heap_redo(XLogReaderState *record)
{
@@ -9921,6 +10855,21 @@ heap2_redo(XLogReaderState *record)
}
}
+void
+heap3_redo(XLogReaderState *record)
+{
+ uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
+
+ switch (info & XLOG_HEAP_OPMASK)
+ {
+ case XLOG_HEAP3_BASE_SHIFT:
+ heap_xlog_base_shift(record);
+ break;
+ default:
+ elog(PANIC, "heap3_redo: unknown op code %u", info);
+ }
+}
+
/*
* Mask a heap page before performing consistency checks on it.
*/
@@ -9933,6 +10882,10 @@ heap_mask(char *pagedata, BlockNumber blkno)
mask_page_lsn_and_checksum(page);
mask_page_hint_bits(page);
+
+ /* Ignore prune_xid (it's like a hint-bit) */
+ HeapPageSetPruneXid(page, InvalidTransactionId, false);
+
mask_unused_space(page);
for (off = 1; off <= PageGetMaxOffsetNumber(page); off++)
@@ -10048,14 +11001,14 @@ HeapCheckForSerializableConflictOut(bool visible, Relation relation,
case HEAPTUPLE_LIVE:
if (visible)
return;
- xid = HeapTupleHeaderGetXmin(tuple->t_data);
+ xid = HeapTupleGetXmin(tuple);
break;
case HEAPTUPLE_RECENTLY_DEAD:
case HEAPTUPLE_DELETE_IN_PROGRESS:
if (visible)
- xid = HeapTupleHeaderGetUpdateXid(tuple->t_data);
+ xid = HeapTupleGetUpdateXidAny(tuple);
else
- xid = HeapTupleHeaderGetXmin(tuple->t_data);
+ xid = HeapTupleGetXmin(tuple);
if (TransactionIdPrecedes(xid, TransactionXmin))
{
@@ -10065,7 +11018,7 @@ HeapCheckForSerializableConflictOut(bool visible, Relation relation,
}
break;
case HEAPTUPLE_INSERT_IN_PROGRESS:
- xid = HeapTupleHeaderGetXmin(tuple->t_data);
+ xid = HeapTupleGetXmin(tuple);
break;
case HEAPTUPLE_DEAD:
Assert(!visible);
@@ -10103,3 +11056,38 @@ HeapCheckForSerializableConflictOut(bool visible, Relation relation,
CheckForSerializableConflictOut(relation, xid, snapshot);
}
+
+/*
+ * Ensure that given xid fits base of given page.
+ */
+static bool
+heap_page_prepare_for_xid(Relation relation, Buffer buffer,
+ TransactionId xid, bool multi)
+{
+ Page page = BufferGetPage(buffer);
+ int res;
+
+ /* "Double xmax" page format doesn't require any preparation */
+ if (HeapPageIsDoubleXmax(page))
+ return false;
+
+ if (!TransactionIdIsNormal(xid))
+ return false;
+
+ res = heap_page_try_prepare_for_xid(relation, buffer, page, xid, multi,
+ IsToastRelation(relation));
+ if (res != -1)
+ return res == 1;
+
+ /* Have to try freeing the page... */
+ freeze_single_heap_page(relation, buffer);
+
+ res = heap_page_try_prepare_for_xid(relation, buffer, page, xid, multi,
+ IsToastRelation(relation));
+ if (res != -1)
+ return res == 1;
+
+ elog(ERROR, "could not fit xid into page");
+
+ return false;
+}
diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c
index 335abea67c..ffbfae09dd 100644
--- a/src/backend/access/heap/heapam_handler.c
+++ b/src/backend/access/heap/heapam_handler.c
@@ -413,7 +413,7 @@ tuple_lock_retry:
* changes in an existing tuple, except to invalid or
* frozen, and neither of those can match priorXmax.)
*/
- if (!TransactionIdEquals(HeapTupleHeaderGetXmin(tuple->t_data),
+ if (!TransactionIdEquals(HeapTupleGetXmin(tuple),
priorXmax))
{
ReleaseBuffer(buffer);
@@ -473,7 +473,7 @@ tuple_lock_retry:
* variable instead of doing HeapTupleHeaderGetXmin again.
*/
if (TransactionIdIsCurrentTransactionId(priorXmax) &&
- HeapTupleHeaderGetCmin(tuple->t_data) >= cid)
+ HeapTupleGetCmin(tuple) >= cid)
{
tmfd->xmax = priorXmax;
@@ -481,7 +481,7 @@ tuple_lock_retry:
* Cmin is the problematic value, so store that. See
* above.
*/
- tmfd->cmax = HeapTupleHeaderGetCmin(tuple->t_data);
+ tmfd->cmax = HeapTupleGetCmin(tuple);
ReleaseBuffer(buffer);
return TM_SelfModified;
}
@@ -507,7 +507,7 @@ tuple_lock_retry:
/*
* As above, if xmin isn't what we're expecting, do nothing.
*/
- if (!TransactionIdEquals(HeapTupleHeaderGetXmin(tuple->t_data),
+ if (!TransactionIdEquals(HeapTupleGetXmin(tuple),
priorXmax))
{
ReleaseBuffer(buffer);
@@ -538,7 +538,7 @@ tuple_lock_retry:
/* updated, so look at the updated row */
*tid = tuple->t_data->t_ctid;
/* updated row should have xmin matching this xmax */
- priorXmax = HeapTupleHeaderGetUpdateXid(tuple->t_data);
+ priorXmax = HeapTupleGetUpdateXidAny(tuple);
ReleaseBuffer(buffer);
/* loop back to fetch next in chain */
}
@@ -858,7 +858,7 @@ heapam_relation_copy_for_cluster(Relation OldHeap, Relation NewHeap,
* case we had better copy it.
*/
if (!is_system_catalog &&
- !TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmin(tuple->t_data)))
+ !TransactionIdIsCurrentTransactionId(HeapTupleGetXmin(tuple)))
elog(WARNING, "concurrent insert in progress within table \"%s\"",
RelationGetRelationName(OldHeap));
/* treat as live */
@@ -870,7 +870,7 @@ heapam_relation_copy_for_cluster(Relation OldHeap, Relation NewHeap,
* Similar situation to INSERT_IN_PROGRESS case.
*/
if (!is_system_catalog &&
- !TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetUpdateXid(tuple->t_data)))
+ !TransactionIdIsCurrentTransactionId(HeapTupleGetUpdateXidAny(tuple)))
elog(WARNING, "concurrent delete in progress within table \"%s\"",
RelationGetRelationName(OldHeap));
/* treat as recently dead */
@@ -1055,6 +1055,8 @@ heapam_scan_analyze_next_tuple(TableScanDesc scan, TransactionId OldestXmin,
targtuple->t_tableOid = RelationGetRelid(scan->rs_rd);
targtuple->t_data = (HeapTupleHeader) PageGetItem(targpage, itemid);
targtuple->t_len = ItemIdGetLength(itemid);
+ HeapTupleCopyBaseFromPage(hscan->rs_cbuf, targtuple, targpage,
+ IsToastRelation(scan->rs_rd));
switch (HeapTupleSatisfiesVacuum(targtuple, OldestXmin,
hscan->rs_cbuf))
@@ -1090,7 +1092,7 @@ heapam_scan_analyze_next_tuple(TableScanDesc scan, TransactionId OldestXmin,
* numbers we report to the cumulative stats system to make
* this come out right.)
*/
- if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmin(targtuple->t_data)))
+ if (TransactionIdIsCurrentTransactionId(HeapTupleGetXmin(targtuple)))
{
sample_it = true;
*liverows += 1;
@@ -1121,7 +1123,7 @@ heapam_scan_analyze_next_tuple(TableScanDesc scan, TransactionId OldestXmin,
* but not the post-image. We also get sane results if the
* concurrent transaction never commits.
*/
- if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetUpdateXid(targtuple->t_data)))
+ if (TransactionIdIsCurrentTransactionId(HeapTupleGetUpdateXidAny(targtuple)))
*deadrows += 1;
else
{
@@ -1370,7 +1372,8 @@ heapam_index_build_range_scan(Relation heapRelation,
Page page = BufferGetPage(hscan->rs_cbuf);
LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_SHARE);
- heap_get_root_tuples(page, root_offsets);
+ heap_get_root_tuples(heapRelation, hscan->rs_cbuf, page,
+ root_offsets);
LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_UNLOCK);
root_blkno = hscan->rs_cblock;
@@ -1463,7 +1466,7 @@ heapam_index_build_range_scan(Relation heapRelation,
* before commit there. Give a warning if neither case
* applies.
*/
- xwait = HeapTupleHeaderGetXmin(heapTuple->t_data);
+ xwait = HeapTupleGetXmin(heapTuple);
if (!TransactionIdIsCurrentTransactionId(xwait))
{
if (!is_system_catalog)
@@ -1522,7 +1525,7 @@ heapam_index_build_range_scan(Relation heapRelation,
break;
}
- xwait = HeapTupleHeaderGetUpdateXid(heapTuple->t_data);
+ xwait = HeapTupleGetUpdateXidAny(heapTuple);
if (!TransactionIdIsCurrentTransactionId(xwait))
{
if (!is_system_catalog)
@@ -1667,7 +1670,8 @@ heapam_index_build_range_scan(Relation heapRelation,
Page page = BufferGetPage(hscan->rs_cbuf);
LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_SHARE);
- heap_get_root_tuples(page, root_offsets);
+ heap_get_root_tuples(heapRelation, hscan->rs_cbuf, page,
+ root_offsets);
LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_UNLOCK);
}
@@ -1833,7 +1837,8 @@ heapam_index_validate_scan(Relation heapRelation,
Page page = BufferGetPage(hscan->rs_cbuf);
LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_SHARE);
- heap_get_root_tuples(page, root_offsets);
+ heap_get_root_tuples(heapRelation, hscan->rs_cbuf, page,
+ root_offsets);
LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_UNLOCK);
memset(in_index, 0, sizeof(in_index));
@@ -2200,13 +2205,15 @@ heapam_scan_bitmap_next_block(TableScanDesc scan,
loctup.t_data = (HeapTupleHeader) PageGetItem((Page) dp, lp);
loctup.t_len = ItemIdGetLength(lp);
loctup.t_tableOid = scan->rs_rd->rd_id;
+ HeapTupleCopyBaseFromPage(hscan->rs_cbuf, &loctup, dp,
+ IsToastRelation(scan->rs_rd));
ItemPointerSet(&loctup.t_self, page, offnum);
valid = HeapTupleSatisfiesVisibility(&loctup, snapshot, buffer);
if (valid)
{
hscan->rs_vistuples[ntup++] = offnum;
PredicateLockTID(scan->rs_rd, &loctup.t_self, snapshot,
- HeapTupleHeaderGetXmin(loctup.t_data));
+ HeapTupleGetXmin(&loctup));
}
HeapCheckForSerializableConflictOut(valid, scan->rs_rd, &loctup,
buffer, snapshot);
@@ -2221,6 +2228,13 @@ heapam_scan_bitmap_next_block(TableScanDesc scan,
return ntup > 0;
}
+static inline void
+HeapTupleSetInvalid(HeapTuple tuple)
+{
+ tuple->t_xmin = InvalidTransactionId;
+ tuple->t_xmax = InvalidTransactionId;
+}
+
static bool
heapam_scan_bitmap_next_tuple(TableScanDesc scan,
TBMIterateResult *tbmres,
@@ -2245,6 +2259,7 @@ heapam_scan_bitmap_next_tuple(TableScanDesc scan,
hscan->rs_ctup.t_data = (HeapTupleHeader) PageGetItem((Page) dp, lp);
hscan->rs_ctup.t_len = ItemIdGetLength(lp);
hscan->rs_ctup.t_tableOid = scan->rs_rd->rd_id;
+ HeapTupleSetInvalid(&hscan->rs_ctup);
ItemPointerSet(&hscan->rs_ctup.t_self, hscan->rs_cblock, targoffset);
pgstat_count_heap_fetch(scan->rs_rd);
@@ -2385,8 +2400,14 @@ heapam_scan_sample_next_tuple(TableScanDesc scan, SampleScanState *scanstate,
tuple->t_data = (HeapTupleHeader) PageGetItem(page, itemid);
tuple->t_len = ItemIdGetLength(itemid);
- ItemPointerSet(&(tuple->t_self), blockno, tupoffset);
+ if (pagemode)
+ HeapTupleSetInvalid(tuple);
+ else
+ HeapTupleCopyBaseFromPage(hscan->rs_cbuf, tuple, page,
+ IsToastRelation(scan->rs_rd));
+
+ ItemPointerSet(&(tuple->t_self), blockno, tupoffset);
if (all_visible)
visible = true;
diff --git a/src/backend/access/heap/heapam_visibility.c b/src/backend/access/heap/heapam_visibility.c
index 6e33d1c881..370a8a67d8 100644
--- a/src/backend/access/heap/heapam_visibility.c
+++ b/src/backend/access/heap/heapam_visibility.c
@@ -217,7 +217,7 @@ HeapTupleSatisfiesSelf(HeapTuple htup, Snapshot snapshot, Buffer buffer)
}
}
}
- else if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmin(tuple)))
+ else if (TransactionIdIsCurrentTransactionId(HeapTupleGetRawXmin(htup)))
{
if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid */
return true;
@@ -229,7 +229,7 @@ HeapTupleSatisfiesSelf(HeapTuple htup, Snapshot snapshot, Buffer buffer)
{
TransactionId xmax;
- xmax = HeapTupleGetUpdateXid(tuple);
+ xmax = HeapTupleGetUpdateXid(htup);
/* not LOCKED_ONLY, so it has to have an xmax */
Assert(TransactionIdIsValid(xmax));
@@ -241,7 +241,7 @@ HeapTupleSatisfiesSelf(HeapTuple htup, Snapshot snapshot, Buffer buffer)
return false;
}
- if (!TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmax(tuple)))
+ if (!TransactionIdIsCurrentTransactionId(HeapTupleGetRawXmax(htup)))
{
/* deleting subtransaction must have aborted */
SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
@@ -251,11 +251,11 @@ HeapTupleSatisfiesSelf(HeapTuple htup, Snapshot snapshot, Buffer buffer)
return false;
}
- else if (TransactionIdIsInProgress(HeapTupleHeaderGetRawXmin(tuple)))
+ else if (TransactionIdIsInProgress(HeapTupleGetRawXmin(htup)))
return false;
- else if (TransactionIdDidCommit(HeapTupleHeaderGetRawXmin(tuple)))
+ else if (TransactionIdDidCommit(HeapTupleGetRawXmin(htup)))
SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
- HeapTupleHeaderGetRawXmin(tuple));
+ HeapTupleGetRawXmin(htup));
else
{
/* it must have aborted or crashed */
@@ -284,7 +284,7 @@ HeapTupleSatisfiesSelf(HeapTuple htup, Snapshot snapshot, Buffer buffer)
if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask))
return true;
- xmax = HeapTupleGetUpdateXid(tuple);
+ xmax = HeapTupleGetUpdateXid(htup);
/* not LOCKED_ONLY, so it has to have an xmax */
Assert(TransactionIdIsValid(xmax));
@@ -299,17 +299,17 @@ HeapTupleSatisfiesSelf(HeapTuple htup, Snapshot snapshot, Buffer buffer)
return true;
}
- if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmax(tuple)))
+ if (TransactionIdIsCurrentTransactionId(HeapTupleGetRawXmax(htup)))
{
if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask))
return true;
return false;
}
- if (TransactionIdIsInProgress(HeapTupleHeaderGetRawXmax(tuple)))
+ if (TransactionIdIsInProgress(HeapTupleGetRawXmax(htup)))
return true;
- if (!TransactionIdDidCommit(HeapTupleHeaderGetRawXmax(tuple)))
+ if (!TransactionIdDidCommit(HeapTupleGetRawXmax(htup)))
{
/* it must have aborted or crashed */
SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
@@ -327,7 +327,7 @@ HeapTupleSatisfiesSelf(HeapTuple htup, Snapshot snapshot, Buffer buffer)
}
SetHintBits(tuple, buffer, HEAP_XMAX_COMMITTED,
- HeapTupleHeaderGetRawXmax(tuple));
+ HeapTupleGetRawXmax(htup));
return false;
}
@@ -416,7 +416,7 @@ HeapTupleSatisfiesToast(HeapTuple htup, Snapshot snapshot,
* is canceled by super-deleting the tuple. This also applies to
* TOAST tuples created during speculative insertion.
*/
- else if (!TransactionIdIsValid(HeapTupleHeaderGetXmin(tuple)))
+ else if (!TransactionIdIsValid(HeapTupleGetXmin(htup)))
return false;
}
@@ -506,9 +506,9 @@ HeapTupleSatisfiesUpdate(HeapTuple htup, CommandId curcid,
}
}
}
- else if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmin(tuple)))
+ else if (TransactionIdIsCurrentTransactionId(HeapTupleGetRawXmin(htup)))
{
- if (HeapTupleHeaderGetCmin(tuple) >= curcid)
+ if (HeapTupleGetCmin(htup) >= curcid)
return TM_Invisible; /* inserted after scan started */
if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid */
@@ -518,7 +518,7 @@ HeapTupleSatisfiesUpdate(HeapTuple htup, CommandId curcid,
{
TransactionId xmax;
- xmax = HeapTupleHeaderGetRawXmax(tuple);
+ xmax = HeapTupleGetRawXmax(htup);
/*
* Careful here: even though this tuple was created by our own
@@ -549,7 +549,7 @@ HeapTupleSatisfiesUpdate(HeapTuple htup, CommandId curcid,
{
TransactionId xmax;
- xmax = HeapTupleGetUpdateXid(tuple);
+ xmax = HeapTupleGetUpdateXid(htup);
/* not LOCKED_ONLY, so it has to have an xmax */
Assert(TransactionIdIsValid(xmax));
@@ -557,21 +557,21 @@ HeapTupleSatisfiesUpdate(HeapTuple htup, CommandId curcid,
/* deleting subtransaction must have aborted */
if (!TransactionIdIsCurrentTransactionId(xmax))
{
- if (MultiXactIdIsRunning(HeapTupleHeaderGetRawXmax(tuple),
+ if (MultiXactIdIsRunning(HeapTupleGetRawXmax(htup),
false))
return TM_BeingModified;
return TM_Ok;
}
else
{
- if (HeapTupleHeaderGetCmax(tuple) >= curcid)
+ if (HeapTupleGetCmax(htup) >= curcid)
return TM_SelfModified; /* updated after scan started */
else
return TM_Invisible; /* updated before scan started */
}
}
- if (!TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmax(tuple)))
+ if (!TransactionIdIsCurrentTransactionId(HeapTupleGetRawXmax(htup)))
{
/* deleting subtransaction must have aborted */
SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
@@ -579,16 +579,16 @@ HeapTupleSatisfiesUpdate(HeapTuple htup, CommandId curcid,
return TM_Ok;
}
- if (HeapTupleHeaderGetCmax(tuple) >= curcid)
+ if (HeapTupleGetCmax(htup) >= curcid)
return TM_SelfModified; /* updated after scan started */
else
return TM_Invisible; /* updated before scan started */
}
- else if (TransactionIdIsInProgress(HeapTupleHeaderGetRawXmin(tuple)))
+ else if (TransactionIdIsInProgress(HeapTupleGetRawXmin(htup)))
return TM_Invisible;
- else if (TransactionIdDidCommit(HeapTupleHeaderGetRawXmin(tuple)))
+ else if (TransactionIdDidCommit(HeapTupleGetRawXmin(htup)))
SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
- HeapTupleHeaderGetRawXmin(tuple));
+ HeapTupleGetRawXmin(htup));
else
{
/* it must have aborted or crashed */
@@ -622,17 +622,17 @@ HeapTupleSatisfiesUpdate(HeapTuple htup, CommandId curcid,
if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask))
{
- if (MultiXactIdIsRunning(HeapTupleHeaderGetRawXmax(tuple), true))
+ if (MultiXactIdIsRunning(HeapTupleGetRawXmax(htup), true))
return TM_BeingModified;
SetHintBits(tuple, buffer, HEAP_XMAX_INVALID, InvalidTransactionId);
return TM_Ok;
}
- xmax = HeapTupleGetUpdateXid(tuple);
+ xmax = HeapTupleGetUpdateXid(htup);
if (!TransactionIdIsValid(xmax))
{
- if (MultiXactIdIsRunning(HeapTupleHeaderGetRawXmax(tuple), false))
+ if (MultiXactIdIsRunning(HeapTupleGetRawXmax(htup), false))
return TM_BeingModified;
}
@@ -641,13 +641,13 @@ HeapTupleSatisfiesUpdate(HeapTuple htup, CommandId curcid,
if (TransactionIdIsCurrentTransactionId(xmax))
{
- if (HeapTupleHeaderGetCmax(tuple) >= curcid)
+ if (HeapTupleGetCmax(htup) >= curcid)
return TM_SelfModified; /* updated after scan started */
else
return TM_Invisible; /* updated before scan started */
}
- if (MultiXactIdIsRunning(HeapTupleHeaderGetRawXmax(tuple), false))
+ if (MultiXactIdIsRunning(HeapTupleGetRawXmax(htup), false))
return TM_BeingModified;
if (TransactionIdDidCommit(xmax))
@@ -663,7 +663,7 @@ HeapTupleSatisfiesUpdate(HeapTuple htup, CommandId curcid,
* what about the other members?
*/
- if (!MultiXactIdIsRunning(HeapTupleHeaderGetRawXmax(tuple), false))
+ if (!MultiXactIdIsRunning(HeapTupleGetRawXmax(htup), false))
{
/*
* There's no member, even just a locker, alive anymore, so we can
@@ -680,20 +680,20 @@ HeapTupleSatisfiesUpdate(HeapTuple htup, CommandId curcid,
}
}
- if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmax(tuple)))
+ if (TransactionIdIsCurrentTransactionId(HeapTupleGetRawXmax(htup)))
{
if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask))
return TM_BeingModified;
- if (HeapTupleHeaderGetCmax(tuple) >= curcid)
+ if (HeapTupleGetCmax(htup) >= curcid)
return TM_SelfModified; /* updated after scan started */
else
return TM_Invisible; /* updated before scan started */
}
- if (TransactionIdIsInProgress(HeapTupleHeaderGetRawXmax(tuple)))
+ if (TransactionIdIsInProgress(HeapTupleGetRawXmax(htup)))
return TM_BeingModified;
- if (!TransactionIdDidCommit(HeapTupleHeaderGetRawXmax(tuple)))
+ if (!TransactionIdDidCommit(HeapTupleGetRawXmax(htup)))
{
/* it must have aborted or crashed */
SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
@@ -711,7 +711,7 @@ HeapTupleSatisfiesUpdate(HeapTuple htup, CommandId curcid,
}
SetHintBits(tuple, buffer, HEAP_XMAX_COMMITTED,
- HeapTupleHeaderGetRawXmax(tuple));
+ HeapTupleGetRawXmax(htup));
if (!ItemPointerEquals(&htup->t_self, &tuple->t_ctid))
return TM_Updated; /* updated by other */
else
@@ -794,7 +794,7 @@ HeapTupleSatisfiesDirty(HeapTuple htup, Snapshot snapshot,
}
}
}
- else if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmin(tuple)))
+ else if (TransactionIdIsCurrentTransactionId(HeapTupleGetRawXmin(htup)))
{
if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid */
return true;
@@ -806,7 +806,7 @@ HeapTupleSatisfiesDirty(HeapTuple htup, Snapshot snapshot,
{
TransactionId xmax;
- xmax = HeapTupleGetUpdateXid(tuple);
+ xmax = HeapTupleGetUpdateXid(htup);
/* not LOCKED_ONLY, so it has to have an xmax */
Assert(TransactionIdIsValid(xmax));
@@ -818,7 +818,7 @@ HeapTupleSatisfiesDirty(HeapTuple htup, Snapshot snapshot,
return false;
}
- if (!TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmax(tuple)))
+ if (!TransactionIdIsCurrentTransactionId(HeapTupleGetRawXmax(htup)))
{
/* deleting subtransaction must have aborted */
SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
@@ -828,7 +828,7 @@ HeapTupleSatisfiesDirty(HeapTuple htup, Snapshot snapshot,
return false;
}
- else if (TransactionIdIsInProgress(HeapTupleHeaderGetRawXmin(tuple)))
+ else if (TransactionIdIsInProgress(HeapTupleGetRawXmin(htup)))
{
/*
* Return the speculative token to caller. Caller can worry about
@@ -844,13 +844,13 @@ HeapTupleSatisfiesDirty(HeapTuple htup, Snapshot snapshot,
Assert(snapshot->speculativeToken != 0);
}
- snapshot->xmin = HeapTupleHeaderGetRawXmin(tuple);
+ snapshot->xmin = HeapTupleGetRawXmin(htup);
/* XXX shouldn't we fall through to look at xmax? */
return true; /* in insertion by other */
}
- else if (TransactionIdDidCommit(HeapTupleHeaderGetRawXmin(tuple)))
+ else if (TransactionIdDidCommit(HeapTupleGetRawXmin(htup)))
SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
- HeapTupleHeaderGetRawXmin(tuple));
+ HeapTupleGetRawXmin(htup));
else
{
/* it must have aborted or crashed */
@@ -879,7 +879,7 @@ HeapTupleSatisfiesDirty(HeapTuple htup, Snapshot snapshot,
if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask))
return true;
- xmax = HeapTupleGetUpdateXid(tuple);
+ xmax = HeapTupleGetUpdateXid(htup);
/* not LOCKED_ONLY, so it has to have an xmax */
Assert(TransactionIdIsValid(xmax));
@@ -897,21 +897,21 @@ HeapTupleSatisfiesDirty(HeapTuple htup, Snapshot snapshot,
return true;
}
- if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmax(tuple)))
+ if (TransactionIdIsCurrentTransactionId(HeapTupleGetRawXmax(htup)))
{
if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask))
return true;
return false;
}
- if (TransactionIdIsInProgress(HeapTupleHeaderGetRawXmax(tuple)))
+ if (TransactionIdIsInProgress(HeapTupleGetRawXmax(htup)))
{
if (!HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask))
- snapshot->xmax = HeapTupleHeaderGetRawXmax(tuple);
+ snapshot->xmax = HeapTupleGetRawXmax(htup);
return true;
}
- if (!TransactionIdDidCommit(HeapTupleHeaderGetRawXmax(tuple)))
+ if (!TransactionIdDidCommit(HeapTupleGetRawXmax(htup)))
{
/* it must have aborted or crashed */
SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
@@ -929,7 +929,7 @@ HeapTupleSatisfiesDirty(HeapTuple htup, Snapshot snapshot,
}
SetHintBits(tuple, buffer, HEAP_XMAX_COMMITTED,
- HeapTupleHeaderGetRawXmax(tuple));
+ HeapTupleGetRawXmax(htup));
return false; /* updated by other */
}
@@ -1008,9 +1008,9 @@ HeapTupleSatisfiesMVCC(HeapTuple htup, Snapshot snapshot,
}
}
}
- else if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmin(tuple)))
+ else if (TransactionIdIsCurrentTransactionId(HeapTupleGetRawXmin(htup)))
{
- if (HeapTupleHeaderGetCmin(tuple) >= snapshot->curcid)
+ if (HeapTupleGetCmin(htup) >= snapshot->curcid)
return false; /* inserted after scan started */
if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid */
@@ -1023,7 +1023,7 @@ HeapTupleSatisfiesMVCC(HeapTuple htup, Snapshot snapshot,
{
TransactionId xmax;
- xmax = HeapTupleGetUpdateXid(tuple);
+ xmax = HeapTupleGetUpdateXid(htup);
/* not LOCKED_ONLY, so it has to have an xmax */
Assert(TransactionIdIsValid(xmax));
@@ -1031,13 +1031,13 @@ HeapTupleSatisfiesMVCC(HeapTuple htup, Snapshot snapshot,
/* updating subtransaction must have aborted */
if (!TransactionIdIsCurrentTransactionId(xmax))
return true;
- else if (HeapTupleHeaderGetCmax(tuple) >= snapshot->curcid)
+ else if (HeapTupleGetCmax(htup) >= snapshot->curcid)
return true; /* updated after scan started */
else
return false; /* updated before scan started */
}
- if (!TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmax(tuple)))
+ if (!TransactionIdIsCurrentTransactionId(HeapTupleGetRawXmax(htup)))
{
/* deleting subtransaction must have aborted */
SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
@@ -1045,16 +1045,16 @@ HeapTupleSatisfiesMVCC(HeapTuple htup, Snapshot snapshot,
return true;
}
- if (HeapTupleHeaderGetCmax(tuple) >= snapshot->curcid)
+ if (HeapTupleGetCmax(htup) >= snapshot->curcid)
return true; /* deleted after scan started */
else
return false; /* deleted before scan started */
}
- else if (XidInMVCCSnapshot(HeapTupleHeaderGetRawXmin(tuple), snapshot))
+ else if (XidInMVCCSnapshot(HeapTupleGetRawXmin(htup), snapshot))
return false;
- else if (TransactionIdDidCommit(HeapTupleHeaderGetRawXmin(tuple)))
+ else if (TransactionIdDidCommit(HeapTupleGetRawXmin(htup)))
SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
- HeapTupleHeaderGetRawXmin(tuple));
+ HeapTupleGetRawXmin(htup));
else
{
/* it must have aborted or crashed */
@@ -1067,7 +1067,7 @@ HeapTupleSatisfiesMVCC(HeapTuple htup, Snapshot snapshot,
{
/* xmin is committed, but maybe not according to our snapshot */
if (!HeapTupleHeaderXminFrozen(tuple) &&
- XidInMVCCSnapshot(HeapTupleHeaderGetRawXmin(tuple), snapshot))
+ XidInMVCCSnapshot(HeapTupleGetRawXmin(htup), snapshot))
return false; /* treat as still in progress */
}
@@ -1086,14 +1086,14 @@ HeapTupleSatisfiesMVCC(HeapTuple htup, Snapshot snapshot,
/* already checked above */
Assert(!HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask));
- xmax = HeapTupleGetUpdateXid(tuple);
+ xmax = HeapTupleGetUpdateXid(htup);
/* not LOCKED_ONLY, so it has to have an xmax */
Assert(TransactionIdIsValid(xmax));
if (TransactionIdIsCurrentTransactionId(xmax))
{
- if (HeapTupleHeaderGetCmax(tuple) >= snapshot->curcid)
+ if (HeapTupleGetCmax(htup) >= snapshot->curcid)
return true; /* deleted after scan started */
else
return false; /* deleted before scan started */
@@ -1108,18 +1108,18 @@ HeapTupleSatisfiesMVCC(HeapTuple htup, Snapshot snapshot,
if (!(tuple->t_infomask & HEAP_XMAX_COMMITTED))
{
- if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmax(tuple)))
+ if (TransactionIdIsCurrentTransactionId(HeapTupleGetRawXmax(htup)))
{
- if (HeapTupleHeaderGetCmax(tuple) >= snapshot->curcid)
+ if (HeapTupleGetCmax(htup) >= snapshot->curcid)
return true; /* deleted after scan started */
else
return false; /* deleted before scan started */
}
- if (XidInMVCCSnapshot(HeapTupleHeaderGetRawXmax(tuple), snapshot))
+ if (XidInMVCCSnapshot(HeapTupleGetRawXmax(htup), snapshot))
return true;
- if (!TransactionIdDidCommit(HeapTupleHeaderGetRawXmax(tuple)))
+ if (!TransactionIdDidCommit(HeapTupleGetRawXmax(htup)))
{
/* it must have aborted or crashed */
SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
@@ -1129,12 +1129,12 @@ HeapTupleSatisfiesMVCC(HeapTuple htup, Snapshot snapshot,
/* xmax transaction committed */
SetHintBits(tuple, buffer, HEAP_XMAX_COMMITTED,
- HeapTupleHeaderGetRawXmax(tuple));
+ HeapTupleGetRawXmax(htup));
}
else
{
/* xmax is committed, but maybe not according to our snapshot */
- if (XidInMVCCSnapshot(HeapTupleHeaderGetRawXmax(tuple), snapshot))
+ if (XidInMVCCSnapshot(HeapTupleGetRawXmax(htup), snapshot))
return true; /* treat as still in progress */
}
@@ -1249,21 +1249,21 @@ HeapTupleSatisfiesVacuumHorizon(HeapTuple htup, Buffer buffer, TransactionId *de
return HEAPTUPLE_DEAD;
}
}
- else if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmin(tuple)))
+ else if (TransactionIdIsCurrentTransactionId(HeapTupleGetRawXmin(htup)))
{
if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid */
return HEAPTUPLE_INSERT_IN_PROGRESS;
/* only locked? run infomask-only check first, for performance */
if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask) ||
- HeapTupleHeaderIsOnlyLocked(tuple))
+ HeapTupleIsOnlyLocked(htup))
return HEAPTUPLE_INSERT_IN_PROGRESS;
/* inserted and then deleted by same xact */
- if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetUpdateXid(tuple)))
+ if (TransactionIdIsCurrentTransactionId(HeapTupleGetUpdateXidAny(htup)))
return HEAPTUPLE_DELETE_IN_PROGRESS;
/* deleting subtransaction must have aborted */
return HEAPTUPLE_INSERT_IN_PROGRESS;
}
- else if (TransactionIdIsInProgress(HeapTupleHeaderGetRawXmin(tuple)))
+ else if (TransactionIdIsInProgress(HeapTupleGetRawXmin(htup)))
{
/*
* It'd be possible to discern between INSERT/DELETE in progress
@@ -1275,9 +1275,9 @@ HeapTupleSatisfiesVacuumHorizon(HeapTuple htup, Buffer buffer, TransactionId *de
*/
return HEAPTUPLE_INSERT_IN_PROGRESS;
}
- else if (TransactionIdDidCommit(HeapTupleHeaderGetRawXmin(tuple)))
+ else if (TransactionIdDidCommit(HeapTupleGetRawXmin(htup)))
SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
- HeapTupleHeaderGetRawXmin(tuple));
+ HeapTupleGetRawXmin(htup));
else
{
/*
@@ -1319,14 +1319,14 @@ HeapTupleSatisfiesVacuumHorizon(HeapTuple htup, Buffer buffer, TransactionId *de
* possibly be running; otherwise have to check.
*/
if (!HEAP_LOCKED_UPGRADED(tuple->t_infomask) &&
- MultiXactIdIsRunning(HeapTupleHeaderGetRawXmax(tuple),
+ MultiXactIdIsRunning(HeapTupleGetRawXmax(htup),
true))
return HEAPTUPLE_LIVE;
SetHintBits(tuple, buffer, HEAP_XMAX_INVALID, InvalidTransactionId);
}
else
{
- if (TransactionIdIsInProgress(HeapTupleHeaderGetRawXmax(tuple)))
+ if (TransactionIdIsInProgress(HeapTupleGetRawXmax(htup)))
return HEAPTUPLE_LIVE;
SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
InvalidTransactionId);
@@ -1344,7 +1344,7 @@ HeapTupleSatisfiesVacuumHorizon(HeapTuple htup, Buffer buffer, TransactionId *de
if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
{
- TransactionId xmax = HeapTupleGetUpdateXid(tuple);
+ TransactionId xmax = HeapTupleGetUpdateXid(htup);
/* already checked above */
Assert(!HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask));
@@ -1367,7 +1367,7 @@ HeapTupleSatisfiesVacuumHorizon(HeapTuple htup, Buffer buffer, TransactionId *de
*dead_after = xmax;
return HEAPTUPLE_RECENTLY_DEAD;
}
- else if (!MultiXactIdIsRunning(HeapTupleHeaderGetRawXmax(tuple), false))
+ else if (!MultiXactIdIsRunning(HeapTupleGetRawXmax(htup), false))
{
/*
* Not in Progress, Not Committed, so either Aborted or crashed.
@@ -1381,11 +1381,11 @@ HeapTupleSatisfiesVacuumHorizon(HeapTuple htup, Buffer buffer, TransactionId *de
if (!(tuple->t_infomask & HEAP_XMAX_COMMITTED))
{
- if (TransactionIdIsInProgress(HeapTupleHeaderGetRawXmax(tuple)))
+ if (TransactionIdIsInProgress(HeapTupleGetRawXmax(htup)))
return HEAPTUPLE_DELETE_IN_PROGRESS;
- else if (TransactionIdDidCommit(HeapTupleHeaderGetRawXmax(tuple)))
+ else if (TransactionIdDidCommit(HeapTupleGetRawXmax(htup)))
SetHintBits(tuple, buffer, HEAP_XMAX_COMMITTED,
- HeapTupleHeaderGetRawXmax(tuple));
+ HeapTupleGetRawXmax(htup));
else
{
/*
@@ -1407,7 +1407,7 @@ HeapTupleSatisfiesVacuumHorizon(HeapTuple htup, Buffer buffer, TransactionId *de
* Deleter committed, allow caller to check if it was recent enough that
* some open transactions could still see the tuple.
*/
- *dead_after = HeapTupleHeaderGetRawXmax(tuple);
+ *dead_after = HeapTupleGetRawXmax(htup);
return HEAPTUPLE_RECENTLY_DEAD;
}
@@ -1503,7 +1503,7 @@ HeapTupleIsSurelyDead(HeapTuple htup, GlobalVisState *vistest)
/* Deleter committed, so tuple is dead if the XID is old enough. */
return GlobalVisTestIsRemovableXid(vistest,
- HeapTupleHeaderGetRawXmax(tuple));
+ HeapTupleGetRawXmax(htup));
}
/*
@@ -1516,8 +1516,9 @@ HeapTupleIsSurelyDead(HeapTuple htup, GlobalVisState *vistest)
* at the top of this file.
*/
bool
-HeapTupleHeaderIsOnlyLocked(HeapTupleHeader tuple)
+HeapTupleIsOnlyLocked(HeapTuple htup)
{
+ HeapTupleHeader tuple = htup->t_data;
TransactionId xmax;
/* if there's no valid Xmax, then there's obviously no update either */
@@ -1528,7 +1529,7 @@ HeapTupleHeaderIsOnlyLocked(HeapTupleHeader tuple)
return true;
/* invalid xmax means no update */
- if (!TransactionIdIsValid(HeapTupleHeaderGetRawXmax(tuple)))
+ if (!TransactionIdIsValid(HeapTupleGetRawXmax(htup)))
return true;
/*
@@ -1539,7 +1540,7 @@ HeapTupleHeaderIsOnlyLocked(HeapTupleHeader tuple)
return false;
/* ... but if it's a multi, then perhaps the updating Xid aborted. */
- xmax = HeapTupleGetUpdateXid(tuple);
+ xmax = HeapTupleGetUpdateXid(htup);
/* not LOCKED_ONLY, so it has to have an xmax */
Assert(TransactionIdIsValid(xmax));
@@ -1587,8 +1588,8 @@ HeapTupleSatisfiesHistoricMVCC(HeapTuple htup, Snapshot snapshot,
Buffer buffer)
{
HeapTupleHeader tuple = htup->t_data;
- TransactionId xmin = HeapTupleHeaderGetXmin(tuple);
- TransactionId xmax = HeapTupleHeaderGetRawXmax(tuple);
+ TransactionId xmin = HeapTupleGetXmin(htup);
+ TransactionId xmax = HeapTupleGetRawXmax(htup);
Assert(ItemPointerIsValid(&htup->t_self));
Assert(htup->t_tableOid != InvalidOid);
@@ -1688,7 +1689,7 @@ HeapTupleSatisfiesHistoricMVCC(HeapTuple htup, Snapshot snapshot,
*/
else if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
{
- xmax = HeapTupleGetUpdateXid(tuple);
+ xmax = HeapTupleGetUpdateXid(htup);
}
/* check if it's one of our txids, toplevel is also in there */
diff --git a/src/backend/access/heap/heaptoast.c b/src/backend/access/heap/heaptoast.c
index 1575a81b01..252e57cc1d 100644
--- a/src/backend/access/heap/heaptoast.c
+++ b/src/backend/access/heap/heaptoast.c
@@ -307,6 +307,7 @@ heap_toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup,
result_tuple->t_len = new_tuple_len;
result_tuple->t_self = newtup->t_self;
result_tuple->t_tableOid = newtup->t_tableOid;
+ HeapTupleCopyBase(result_tuple, newtup);
new_data = (HeapTupleHeader) ((char *) result_tuple + HEAPTUPLESIZE);
result_tuple->t_data = new_data;
@@ -395,6 +396,7 @@ toast_flatten_tuple(HeapTuple tup, TupleDesc tupleDesc)
*/
new_tuple->t_self = tup->t_self;
new_tuple->t_tableOid = tup->t_tableOid;
+ HeapTupleCopyBase(new_tuple, tup);
new_tuple->t_data->t_choice = tup->t_data->t_choice;
new_tuple->t_data->t_ctid = tup->t_data->t_ctid;
@@ -467,6 +469,7 @@ toast_flatten_tuple_to_datum(HeapTupleHeader tup,
ItemPointerSetInvalid(&(tmptup.t_self));
tmptup.t_tableOid = InvalidOid;
tmptup.t_data = tup;
+ HeapTupleSetZeroBase(&tmptup);
/*
* Break down the tuple into fields.
diff --git a/src/backend/access/heap/hio.c b/src/backend/access/heap/hio.c
index b0ece66629..49a91b727b 100644
--- a/src/backend/access/heap/hio.c
+++ b/src/backend/access/heap/hio.c
@@ -19,6 +19,7 @@
#include "access/hio.h"
#include "access/htup_details.h"
#include "access/visibilitymap.h"
+#include "catalog/catalog.h"
#include "storage/bufmgr.h"
#include "storage/freespace.h"
#include "storage/lmgr.h"
@@ -59,6 +60,19 @@ RelationPutHeapTuple(Relation relation,
/* Add the tuple to the page */
pageHeader = BufferGetPage(buffer);
+ HeapTupleSetXmin(tuple, tuple->t_xmin);
+ if (IsToastRelation(relation))
+ {
+ ToastTupleHeaderSetXmin(pageHeader, tuple);
+ ToastTupleHeaderSetXmax(pageHeader, tuple);
+ }
+ else
+ {
+ HeapTupleHeaderSetXmin(pageHeader, tuple);
+ HeapTupleHeaderSetXmax(pageHeader, tuple);
+ }
+ HeapTupleSetXmax(tuple, tuple->t_xmax);
+
offnum = PageAddItem(pageHeader, (Item) tuple->t_data,
tuple->t_len, InvalidOffsetNumber, false, true);
@@ -243,7 +257,7 @@ RelationAddExtraBlocks(Relation relation, BulkInsertState bistate)
/* we'll need this info below */
blockNum = BufferGetBlockNumber(buffer);
- freespace = BufferGetPageSize(buffer) - SizeOfPageHeaderData;
+ freespace = BufferGetPageSize(buffer) - SizeOfPageHeaderData - MAXALIGN(sizeof(HeapPageSpecialData));
UnlockReleaseBuffer(buffer);
@@ -514,6 +528,9 @@ loop:
/*
* Now we can check to see if there's enough free space here. If so,
* we're done.
+ *
+ * "Double xmax" page is not suitable for any new tuple, since xmin
+ * can't be set there.
*/
page = BufferGetPage(buffer);
@@ -525,12 +542,23 @@ loop:
*/
if (PageIsNew(page))
{
- PageInit(page, BufferGetPageSize(buffer), 0);
+ if (IsToastRelation(relation))
+ {
+ PageInit(page, BufferGetPageSize(buffer), sizeof(ToastPageSpecialData));
+ ToastPageGetSpecial(page)->pd_xid_base = RecentXmin - FirstNormalTransactionId;
+ }
+ else
+ {
+ PageInit(page, BufferGetPageSize(buffer), sizeof(HeapPageSpecialData));
+ HeapPageGetSpecial(page)->pd_xid_base = RecentXmin - FirstNormalTransactionId;
+ }
+
MarkBufferDirty(buffer);
}
pageFreeSpace = PageGetHeapFreeSpace(page);
- if (targetFreeSpace <= pageFreeSpace)
+ if (targetFreeSpace <= pageFreeSpace &&
+ !HeapPageIsDoubleXmax(page))
{
/* use this page as future insert target, too */
RelationSetTargetBlock(relation, targetBlock);
@@ -635,7 +663,17 @@ loop:
BufferGetBlockNumber(buffer),
RelationGetRelationName(relation));
- PageInit(page, BufferGetPageSize(buffer), 0);
+ if (IsToastRelation(relation))
+ {
+ PageInit(page, BufferGetPageSize(buffer), sizeof(ToastPageSpecialData));
+ ToastPageGetSpecial(page)->pd_xid_base = RecentXmin - FirstNormalTransactionId;
+ }
+ else
+ {
+ PageInit(page, BufferGetPageSize(buffer), sizeof(HeapPageSpecialData));
+ HeapPageGetSpecial(page)->pd_xid_base = RecentXmin - FirstNormalTransactionId;
+ }
+
MarkBufferDirty(buffer);
/*
diff --git a/src/backend/access/heap/pruneheap.c b/src/backend/access/heap/pruneheap.c
index 9f43bbe25f..b1eff5a068 100644
--- a/src/backend/access/heap/pruneheap.c
+++ b/src/backend/access/heap/pruneheap.c
@@ -91,6 +91,17 @@ static void heap_prune_record_dead(PruneState *prstate, OffsetNumber offnum);
static void heap_prune_record_unused(PruneState *prstate, OffsetNumber offnum);
static void page_verify_redirects(Page page);
+static inline bool
+XidFitsPage(Page page, TransactionId xid, bool is_toast)
+{
+ TransactionId base;
+
+ base = is_toast ? ToastPageGetSpecial(page)->pd_xid_base :
+ HeapPageGetSpecial(page)->pd_xid_base;
+
+ return xid >= base + FirstNormalTransactionId &&
+ xid <= base + MaxShortTransactionId;
+}
/*
* Optionally prune and repair fragmentation in the specified page.
@@ -136,7 +147,8 @@ heap_page_prune_opt(Relation relation, Buffer buffer)
* determining the appropriate horizon is a waste if there's no prune_xid
* (i.e. no updates/deletes left potentially dead tuples around).
*/
- prune_xid = ((PageHeader) page)->pd_prune_xid;
+ prune_xid = HeapPageGetPruneXidNoAssert(page, IsToastRelation(relation));
+
if (!TransactionIdIsValid(prune_xid))
return;
@@ -207,7 +219,7 @@ heap_page_prune_opt(Relation relation, Buffer buffer)
nnewlpdead;
ndeleted = heap_page_prune(relation, buffer, vistest, limited_xmin,
- limited_ts, &nnewlpdead, NULL);
+ limited_ts, &nnewlpdead, NULL, true);
/*
* Report the number of tuples reclaimed to pgstats. This is
@@ -268,7 +280,8 @@ heap_page_prune(Relation relation, Buffer buffer,
TransactionId old_snap_xmin,
TimestampTz old_snap_ts,
int *nnewlpdead,
- OffsetNumber *off_loc)
+ OffsetNumber *off_loc,
+ bool repairFragmentation)
{
int ndeleted = 0;
Page page = BufferGetPage(buffer);
@@ -339,6 +352,8 @@ heap_page_prune(Relation relation, Buffer buffer,
htup = (HeapTupleHeader) PageGetItem(page, itemid);
tup.t_data = htup;
tup.t_len = ItemIdGetLength(itemid);
+ HeapTupleCopyBaseFromPage(buffer, &tup, page,
+ IsToastRelation(relation));
ItemPointerSet(&(tup.t_self), blockno, offnum);
/*
@@ -393,13 +408,17 @@ heap_page_prune(Relation relation, Buffer buffer,
heap_page_prune_execute(buffer,
prstate.redirected, prstate.nredirected,
prstate.nowdead, prstate.ndead,
- prstate.nowunused, prstate.nunused);
+ prstate.nowunused, prstate.nunused,
+ repairFragmentation,
+ IsToastRelation(relation));
/*
* Update the page's pd_prune_xid field to either zero, or the lowest
* XID of any soon-prunable tuple.
*/
- ((PageHeader) page)->pd_prune_xid = prstate.new_prune_xid;
+ if (XidFitsPage(page, prstate.new_prune_xid, IsToastRelation(relation)))
+ HeapPageSetPruneXid(page, prstate.new_prune_xid,
+ IsToastRelation(relation));
/*
* Also clear the "page is full" flag, since there's no point in
@@ -421,6 +440,13 @@ heap_page_prune(Relation relation, Buffer buffer,
xlrec.latestRemovedXid = prstate.latestRemovedXid;
xlrec.nredirected = prstate.nredirected;
xlrec.ndead = prstate.ndead;
+ xlrec.flags = 0;
+
+ if (IsToastRelation(relation))
+ xlrec.flags |= XLH_PRUNE_ON_TOAST_RELATION;
+
+ if (repairFragmentation)
+ xlrec.flags |= XLH_PRUNE_REPAIR_FRAGMENTATION;
XLogBeginInsert();
XLogRegisterData((char *) &xlrec, SizeOfHeapPrune);
@@ -461,10 +487,12 @@ heap_page_prune(Relation relation, Buffer buffer,
* point in repeating the prune/defrag process until something else
* happens to the page.
*/
- if (((PageHeader) page)->pd_prune_xid != prstate.new_prune_xid ||
+ bool is_toast = IsToastRelation(relation);
+
+ if (HeapPageGetPruneXid(page, is_toast) != prstate.new_prune_xid ||
PageIsFull(page))
{
- ((PageHeader) page)->pd_prune_xid = prstate.new_prune_xid;
+ HeapPageSetPruneXid(page, prstate.new_prune_xid, is_toast);
PageClearFull(page);
MarkBufferDirtyHint(buffer, true);
}
@@ -601,6 +629,9 @@ heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum, PruneState *prstate)
OffsetNumber chainitems[MaxHeapTuplesPerPage];
int nchain = 0,
i;
+ HeapTupleData tup;
+
+ tup.t_tableOid = RelationGetRelid(prstate->rel);
rootlp = PageGetItemId(dp, rootoffnum);
@@ -612,6 +643,12 @@ heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum, PruneState *prstate)
Assert(prstate->htsv[rootoffnum] != -1);
htup = (HeapTupleHeader) PageGetItem(dp, rootlp);
+ tup.t_data = htup;
+ tup.t_len = ItemIdGetLength(rootlp);
+ ItemPointerSet(&(tup.t_self), BufferGetBlockNumber(buffer), rootoffnum);
+ HeapTupleCopyBaseFromPage(buffer, &tup, dp,
+ IsToastRelation(prstate->rel));
+
if (HeapTupleHeaderIsHeapOnly(htup))
{
/*
@@ -636,7 +673,7 @@ heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum, PruneState *prstate)
!HeapTupleHeaderIsHotUpdated(htup))
{
heap_prune_record_unused(prstate, rootoffnum);
- HeapTupleHeaderAdvanceLatestRemovedXid(htup,
+ HeapTupleHeaderAdvanceLatestRemovedXid(&tup,
&prstate->latestRemovedXid);
ndeleted++;
}
@@ -703,11 +740,17 @@ heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum, PruneState *prstate)
Assert(prstate->htsv[offnum] != -1);
htup = (HeapTupleHeader) PageGetItem(dp, lp);
+ tup.t_data = htup;
+ tup.t_len = ItemIdGetLength(lp);
+ HeapTupleCopyBaseFromPage(buffer, &tup, dp,
+ IsToastRelation(prstate->rel));
+ ItemPointerSet(&(tup.t_self), BufferGetBlockNumber(buffer), offnum);
+
/*
* Check the tuple XMIN against prior XMAX, if any
*/
if (TransactionIdIsValid(priorXmax) &&
- !TransactionIdEquals(HeapTupleHeaderGetXmin(htup), priorXmax))
+ !TransactionIdEquals(HeapTupleGetXmin(&tup), priorXmax))
break;
/*
@@ -734,7 +777,7 @@ heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum, PruneState *prstate)
* that the page is reconsidered for pruning in future.
*/
heap_prune_record_prunable(prstate,
- HeapTupleHeaderGetUpdateXid(htup));
+ HeapTupleGetUpdateXidAny(&tup));
break;
case HEAPTUPLE_DELETE_IN_PROGRESS:
@@ -744,7 +787,7 @@ heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum, PruneState *prstate)
* that the page is reconsidered for pruning in future.
*/
heap_prune_record_prunable(prstate,
- HeapTupleHeaderGetUpdateXid(htup));
+ HeapTupleGetUpdateXidAny(&tup));
break;
case HEAPTUPLE_LIVE:
@@ -773,7 +816,7 @@ heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum, PruneState *prstate)
if (tupdead)
{
latestdead = offnum;
- HeapTupleHeaderAdvanceLatestRemovedXid(htup,
+ HeapTupleHeaderAdvanceLatestRemovedXid(&tup,
&prstate->latestRemovedXid);
}
else if (!recent_dead)
@@ -795,7 +838,7 @@ heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum, PruneState *prstate)
Assert(ItemPointerGetBlockNumber(&htup->t_ctid) ==
BufferGetBlockNumber(buffer));
offnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
- priorXmax = HeapTupleHeaderGetUpdateXid(htup);
+ priorXmax = HeapTupleGetUpdateXidAny(&tup);
}
/*
@@ -912,7 +955,9 @@ void
heap_page_prune_execute(Buffer buffer,
OffsetNumber *redirected, int nredirected,
OffsetNumber *nowdead, int ndead,
- OffsetNumber *nowunused, int nunused)
+ OffsetNumber *nowunused, int nunused,
+ bool repairFragmentation,
+ bool is_toast)
{
Page page = (Page) BufferGetPage(buffer);
OffsetNumber *offnum;
@@ -1036,7 +1081,8 @@ heap_page_prune_execute(Buffer buffer,
* Finally, repair any fragmentation, and update the page's hint bit about
* whether it has free pointers.
*/
- PageRepairFragmentation(page);
+ if (repairFragmentation)
+ PageRepairFragmentation(page, is_toast);
/*
* Now that the page has been modified, assert that redirect items still
@@ -1108,7 +1154,8 @@ page_verify_redirects(Page page)
* and reused by a completely unrelated tuple.
*/
void
-heap_get_root_tuples(Page page, OffsetNumber *root_offsets)
+heap_get_root_tuples(Relation relation, Buffer buffer, Page page,
+ OffsetNumber *root_offsets)
{
OffsetNumber offnum,
maxoff;
@@ -1123,6 +1170,7 @@ heap_get_root_tuples(Page page, OffsetNumber *root_offsets)
HeapTupleHeader htup;
OffsetNumber nextoffnum;
TransactionId priorXmax;
+ HeapTupleData tup;
/* skip unused and dead items */
if (!ItemIdIsUsed(lp) || ItemIdIsDead(lp))
@@ -1131,6 +1179,9 @@ heap_get_root_tuples(Page page, OffsetNumber *root_offsets)
if (ItemIdIsNormal(lp))
{
htup = (HeapTupleHeader) PageGetItem(page, lp);
+ tup.t_data = htup;
+ HeapTupleCopyBaseFromPage(buffer, &tup, page,
+ IsToastRelation(relation));
/*
* Check if this tuple is part of a HOT-chain rooted at some other
@@ -1152,7 +1203,7 @@ heap_get_root_tuples(Page page, OffsetNumber *root_offsets)
/* Set up to scan the HOT-chain */
nextoffnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
- priorXmax = HeapTupleHeaderGetUpdateXid(htup);
+ priorXmax = HeapTupleGetUpdateXidAny(&tup);
}
else
{
@@ -1191,9 +1242,12 @@ heap_get_root_tuples(Page page, OffsetNumber *root_offsets)
break;
htup = (HeapTupleHeader) PageGetItem(page, lp);
+ tup.t_data = htup;
+ HeapTupleCopyBaseFromPage(buffer, &tup, page,
+ IsToastRelation(relation));
if (TransactionIdIsValid(priorXmax) &&
- !TransactionIdEquals(priorXmax, HeapTupleHeaderGetXmin(htup)))
+ !TransactionIdEquals(priorXmax, HeapTupleGetXmin(&tup)))
break;
/* Remember the root line pointer for this item */
@@ -1207,7 +1261,7 @@ heap_get_root_tuples(Page page, OffsetNumber *root_offsets)
Assert(!HeapTupleHeaderIndicatesMovedPartitions(htup));
nextoffnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
- priorXmax = HeapTupleHeaderGetUpdateXid(htup);
+ priorXmax = HeapTupleGetUpdateXidAny(&tup);
}
}
}
diff --git a/src/backend/access/heap/rewriteheap.c b/src/backend/access/heap/rewriteheap.c
index b01b39b008..3060a7fdf8 100644
--- a/src/backend/access/heap/rewriteheap.c
+++ b/src/backend/access/heap/rewriteheap.c
@@ -380,6 +380,7 @@ rewrite_heap_tuple(RewriteState state,
&old_tuple->t_data->t_choice.t_heap,
sizeof(HeapTupleFields));
+ HeapTupleCopyBase(new_tuple, old_tuple);
new_tuple->t_data->t_infomask &= ~HEAP_XACT_MASK;
new_tuple->t_data->t_infomask2 &= ~HEAP2_XACT_MASK;
new_tuple->t_data->t_infomask |=
@@ -389,7 +390,7 @@ rewrite_heap_tuple(RewriteState state,
* While we have our hands on the tuple, we may as well freeze any
* eligible xmin or xmax, so that future VACUUM effort can be saved.
*/
- heap_freeze_tuple(new_tuple->t_data,
+ heap_freeze_tuple(new_tuple,
state->rs_old_rel->rd_rel->relfrozenxid,
state->rs_old_rel->rd_rel->relminmxid,
state->rs_freeze_xid,
@@ -405,7 +406,7 @@ rewrite_heap_tuple(RewriteState state,
* If the tuple has been updated, check the old-to-new mapping hash table.
*/
if (!((old_tuple->t_data->t_infomask & HEAP_XMAX_INVALID) ||
- HeapTupleHeaderIsOnlyLocked(old_tuple->t_data)) &&
+ HeapTupleIsOnlyLocked(old_tuple)) &&
!HeapTupleHeaderIndicatesMovedPartitions(old_tuple->t_data) &&
!(ItemPointerEquals(&(old_tuple->t_self),
&(old_tuple->t_data->t_ctid))))
@@ -413,7 +414,7 @@ rewrite_heap_tuple(RewriteState state,
OldToNewMapping mapping;
memset(&hashkey, 0, sizeof(hashkey));
- hashkey.xmin = HeapTupleHeaderGetUpdateXid(old_tuple->t_data);
+ hashkey.xmin = HeapTupleGetUpdateXidAny(old_tuple);
hashkey.tid = old_tuple->t_data->t_ctid;
mapping = (OldToNewMapping)
@@ -486,7 +487,7 @@ rewrite_heap_tuple(RewriteState state,
* RECENTLY_DEAD if and only if the xmin is not before OldestXmin.
*/
if ((new_tuple->t_data->t_infomask & HEAP_UPDATED) &&
- !TransactionIdPrecedes(HeapTupleHeaderGetXmin(new_tuple->t_data),
+ !TransactionIdPrecedes(HeapTupleGetXmin(new_tuple),
state->rs_oldest_xmin))
{
/*
@@ -495,7 +496,7 @@ rewrite_heap_tuple(RewriteState state,
UnresolvedTup unresolved;
memset(&hashkey, 0, sizeof(hashkey));
- hashkey.xmin = HeapTupleHeaderGetXmin(new_tuple->t_data);
+ hashkey.xmin = HeapTupleGetXmin(new_tuple);
hashkey.tid = old_tid;
unresolved = hash_search(state->rs_unresolved_tups, &hashkey,
@@ -583,7 +584,7 @@ rewrite_heap_dead_tuple(RewriteState state, HeapTuple old_tuple)
bool found;
memset(&hashkey, 0, sizeof(hashkey));
- hashkey.xmin = HeapTupleHeaderGetXmin(old_tuple->t_data);
+ hashkey.xmin = HeapTupleGetXmin(old_tuple);
hashkey.tid = old_tuple->t_self;
unresolved = hash_search(state->rs_unresolved_tups, &hashkey,
@@ -619,6 +620,8 @@ raw_heap_insert(RewriteState state, HeapTuple tup)
Size len;
OffsetNumber newoff;
HeapTuple heaptup;
+ TransactionId xmin;
+ bool immutable_tuple;
/*
* If the new tuple is too big for storage or contains already toasted
@@ -653,9 +656,19 @@ raw_heap_insert(RewriteState state, HeapTuple tup)
len = MAXALIGN(heaptup->t_len); /* be conservative */
/*
- * If we're gonna fail for oversize tuple, do it right away
+ * Due to update to 64-xid maximum plain tuple size was decreased due to adding
+ * PageSpecial to a heap page. Pages with tuple that became too large to fit,
+ * should remain in Double Xmax format (read only). Inserting plain tuples with
+ * size over new MaxHeapTupleSizs is prohibited anyway, but vaccum full will
+ * transfer this page to a rebuild relation unmodified.
*/
- if (len > MaxHeapTupleSize)
+ immutable_tuple = len <= MaxHeapTupleSize_32 && len > MaxHeapTupleSize;
+
+ /*
+ * If we're gonna fail for oversize tuple, do it right away. But allow to process
+ * immutable_tuple (see above).
+ */
+ if (len > MaxHeapTupleSize && !immutable_tuple)
ereport(ERROR,
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
errmsg("row is too big: size %zu, maximum size %zu",
@@ -704,10 +717,42 @@ raw_heap_insert(RewriteState state, HeapTuple tup)
if (!state->rs_buffer_valid)
{
/* Initialize a new empty page */
- PageInit(page, BLCKSZ, 0);
+ if (immutable_tuple)
+ /* Initialize DoubleXmax page */
+ PageInit(page, BLCKSZ, 0);
+ else
+ {
+ Size special_size;
+
+ special_size = IsToastRelation(state->rs_new_rel) ?
+ sizeof(ToastPageSpecialData) :
+ sizeof(HeapPageSpecialData);
+ PageInit(page, BLCKSZ, special_size);
+ }
state->rs_buffer_valid = true;
}
+ rewrite_page_prepare_for_xid(page, heaptup,
+ IsToastRelation(state->rs_new_rel));
+
+ /*
+ * Tuple with HEAP_XMIN_FROZEN in t_infomask should have xmin set
+ * to FrozenTransactionId to avoid these tuples be treated like normal.
+ */
+ xmin = HeapTupleGetXmin(heaptup);
+ HeapTupleSetXmin(heaptup, xmin);
+
+ /*
+ * Tuples on DoubleXmax page could not appear modified after they had been
+ * frozen by pg_upgrade. Just check this to be safe.
+ */
+ Assert(!immutable_tuple || xmin == FrozenTransactionId);
+
+ if (!immutable_tuple)
+ HeapTupleHeaderSetXmin(page, heaptup);
+
+ HeapTupleHeaderSetXmax(page, heaptup);
+
/* And now we can insert the tuple into the page */
newoff = PageAddItem(page, (Item) heaptup->t_data, heaptup->t_len,
InvalidOffsetNumber, false, true);
@@ -988,19 +1033,24 @@ logical_rewrite_log_mapping(RewriteState state, TransactionId xid,
*/
if (!found)
{
- char path[MAXPGPATH];
- Oid dboid;
+ char path[MAXPGPATH];
+ Oid dboid;
+ TransactionId current_xid;
if (state->rs_old_rel->rd_rel->relisshared)
dboid = InvalidOid;
else
dboid = MyDatabaseId;
+ current_xid = GetCurrentTransactionId();
snprintf(path, MAXPGPATH,
"pg_logical/mappings/" LOGICAL_REWRITE_FORMAT,
dboid, relid,
LSN_FORMAT_ARGS(state->rs_begin_lsn),
- xid, GetCurrentTransactionId());
+ (uint32) (xid >> 32),
+ (uint32) xid,
+ (uint32) (current_xid >> 32),
+ (uint32) current_xid);
dlist_init(&src->mappings);
src->num_mappings = 0;
@@ -1049,9 +1099,9 @@ logical_rewrite_heap_tuple(RewriteState state, ItemPointerData old_tid,
if (!state->rs_logical_rewrite)
return;
- xmin = HeapTupleHeaderGetXmin(new_tuple->t_data);
+ xmin = HeapTupleGetXmin(new_tuple);
/* use *GetUpdateXid to correctly deal with multixacts */
- xmax = HeapTupleHeaderGetUpdateXid(new_tuple->t_data);
+ xmax = HeapTupleGetUpdateXidAny(new_tuple);
/*
* Log the mapping iff the tuple has been created recently.
@@ -1115,14 +1165,19 @@ heap_xlog_logical_rewrite(XLogReaderState *r)
xl_heap_rewrite_mapping *xlrec;
uint32 len;
char *data;
+ TransactionId xid;
xlrec = (xl_heap_rewrite_mapping *) XLogRecGetData(r);
+ xid = XLogRecGetXid(r);
snprintf(path, MAXPGPATH,
"pg_logical/mappings/" LOGICAL_REWRITE_FORMAT,
xlrec->mapped_db, xlrec->mapped_rel,
LSN_FORMAT_ARGS(xlrec->start_lsn),
- xlrec->mapped_xid, XLogRecGetXid(r));
+ (uint32) (xlrec->mapped_xid >> 32),
+ (uint32) xlrec->mapped_xid,
+ (uint32) (xid >> 32),
+ (uint32) xid);
fd = OpenTransientFile(path,
O_CREAT | O_WRONLY | PG_BINARY);
@@ -1217,10 +1272,12 @@ CheckPointLogicalRewriteHeap(void)
Oid dboid;
Oid relid;
XLogRecPtr lsn;
- TransactionId rewrite_xid;
- TransactionId create_xid;
- uint32 hi,
- lo;
+ uint32 lsn_hi,
+ lsn_lo,
+ rewrite_xid_hi,
+ rewrite_xid_lo,
+ create_xid_hi,
+ create_xid_lo;
PGFileType de_type;
if (strcmp(mapping_de->d_name, ".") == 0 ||
@@ -1238,10 +1295,12 @@ CheckPointLogicalRewriteHeap(void)
continue;
if (sscanf(mapping_de->d_name, LOGICAL_REWRITE_FORMAT,
- &dboid, &relid, &hi, &lo, &rewrite_xid, &create_xid) != 6)
+ &dboid, &relid, &lsn_hi, &lsn_lo,
+ &rewrite_xid_hi, &rewrite_xid_lo,
+ &create_xid_hi, &create_xid_lo) != 8)
elog(ERROR, "could not parse filename \"%s\"", mapping_de->d_name);
- lsn = ((uint64) hi) << 32 | lo;
+ lsn = ((uint64) lsn_hi) << 32 | lsn_lo;
if (lsn < cutoff || cutoff == InvalidXLogRecPtr)
{
diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c
index 864876adf7..5ff535d2a8 100644
--- a/src/backend/access/heap/vacuumlazy.c
+++ b/src/backend/access/heap/vacuumlazy.c
@@ -46,6 +46,7 @@
#include "access/xlog.h"
#include "access/xloginsert.h"
#include "catalog/index.h"
+#include "catalog/catalog.h"
#include "catalog/storage.h"
#include "commands/dbcommands.h"
#include "commands/progress.h"
@@ -267,7 +268,6 @@ static bool lazy_vacuum_all_indexes(LVRelState *vacrel);
static void lazy_vacuum_heap_rel(LVRelState *vacrel);
static int lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno,
Buffer buffer, int index, Buffer *vmbuffer);
-static bool lazy_check_wraparound_failsafe(LVRelState *vacrel);
static void lazy_cleanup_all_indexes(LVRelState *vacrel);
static IndexBulkDeleteResult *lazy_vacuum_one_index(Relation indrel,
IndexBulkDeleteResult *istat,
@@ -528,7 +528,6 @@ heap_vacuum_rel(Relation rel, VacuumParams *params,
* ensure that parallel VACUUM won't be attempted at all when relfrozenxid
* is already dangerously old.)
*/
- lazy_check_wraparound_failsafe(vacrel);
dead_items_alloc(vacrel, params->nworkers);
/*
@@ -645,7 +644,7 @@ heap_vacuum_rel(Relation rel, VacuumParams *params,
WalUsage walusage;
StringInfoData buf;
char *msgfmt;
- int32 diff;
+ int64 diff;
int64 PageHitOp = VacuumPageHit - StartPageHit,
PageMissOp = VacuumPageMiss - StartPageMiss,
PageDirtyOp = VacuumPageDirty - StartPageDirty;
@@ -698,32 +697,35 @@ heap_vacuum_rel(Relation rel, VacuumParams *params,
orig_rel_pages == 0 ? 100.0 :
100.0 * vacrel->scanned_pages / orig_rel_pages);
appendStringInfo(&buf,
- _("tuples: %lld removed, %lld remain, %lld are dead but not yet removable\n"),
+ _("tuples: %lld removed, %lld remain, %lld are dead but not yet removable, oldest xmin: %llu\n"),
(long long) vacrel->tuples_deleted,
(long long) vacrel->new_rel_tuples,
- (long long) vacrel->recently_dead_tuples);
+ (long long) vacrel->recently_dead_tuples,
+ (unsigned long long) OldestXmin);
if (vacrel->missed_dead_tuples > 0)
appendStringInfo(&buf,
_("tuples missed: %lld dead from %u pages not removed due to cleanup lock contention\n"),
(long long) vacrel->missed_dead_tuples,
vacrel->missed_dead_pages);
- diff = (int32) (ReadNextTransactionId() - OldestXmin);
+ diff = (int64) (ReadNextTransactionId() - OldestXmin);
appendStringInfo(&buf,
- _("removable cutoff: %llu, which was %d XIDs old when operation ended\n"),
- (unsigned long long) OldestXmin, diff);
+ _("removable cutoff: %llu, which was %lld XIDs old when operation ended\n"),
+ (unsigned long long) OldestXmin, (long long) diff);
if (frozenxid_updated)
{
- diff = (int32) (vacrel->NewRelfrozenXid - vacrel->relfrozenxid);
+ diff = (int64) (vacrel->NewRelfrozenXid - vacrel->relfrozenxid);
appendStringInfo(&buf,
- _("new relfrozenxid: %llu, which is %d XIDs ahead of previous value\n"),
- (unsigned long long) vacrel->NewRelfrozenXid, diff);
+ _("new relfrozenxid: %llu, which is %lld XIDs ahead of previous value\n"),
+ (unsigned long long) vacrel->NewRelfrozenXid,
+ (long long) diff);
}
if (minmulti_updated)
{
- diff = (int32) (vacrel->NewRelminMxid - vacrel->relminmxid);
+ diff = (int64) (vacrel->NewRelminMxid - vacrel->relminmxid);
appendStringInfo(&buf,
- _("new relminmxid: %llu, which is %d MXIDs ahead of previous value\n"),
- (unsigned long long) vacrel->NewRelminMxid, diff);
+ _("new relminmxid: %llu, which is %lld MXIDs ahead of previous value\n"),
+ (unsigned long long) vacrel->NewRelminMxid,
+ (long long) diff);
}
appendStringInfo(&buf, _("frozen: %u pages from table (%.2f%% of total) had %lld tuples frozen\n"),
vacrel->frozen_pages,
@@ -932,7 +934,6 @@ lazy_scan_heap(LVRelState *vacrel)
*/
if (blkno - next_failsafe_block >= FAILSAFE_EVERY_PAGES)
{
- lazy_check_wraparound_failsafe(vacrel);
next_failsafe_block = blkno;
}
@@ -1452,7 +1453,14 @@ lazy_scan_new_or_empty(LVRelState *vacrel, Buffer buf, BlockNumber blkno,
if (GetRecordedFreeSpace(vacrel->rel, blkno) == 0)
{
- freespace = BLCKSZ - SizeOfPageHeaderData;
+ Size special_size;
+
+ special_size = IsToastRelation(vacrel->rel) ?
+ sizeof(ToastPageSpecialData) :
+ sizeof(HeapPageSpecialData);
+ freespace = BufferGetPageSize(buf)
+ - SizeOfPageHeaderData
+ - special_size;
RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
}
@@ -1556,6 +1564,7 @@ lazy_scan_prune(LVRelState *vacrel,
maxoff;
ItemId itemid;
HeapTupleData tuple;
+ HeapTupleHeader htup;
HTSV_Result res;
int tuples_deleted,
tuples_frozen,
@@ -1599,7 +1608,7 @@ retry:
*/
tuples_deleted = heap_page_prune(rel, buf, vacrel->vistest,
InvalidTransactionId, 0, &nnewlpdead,
- &vacrel->offnum);
+ &vacrel->offnum, true);
/*
* Now scan the page to collect LP_DEAD items and check for tuples
@@ -1664,6 +1673,7 @@ retry:
tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
tuple.t_len = ItemIdGetLength(itemid);
tuple.t_tableOid = RelationGetRelid(rel);
+ HeapTupleCopyBaseFromPage(buf, &tuple, page, IsToastRelation(rel));
/*
* DEAD tuples are almost always pruned into LP_DEAD line pointers by
@@ -1727,7 +1737,7 @@ retry:
* The inserter definitely committed. But is it old enough
* that everyone sees it as committed?
*/
- xmin = HeapTupleHeaderGetXmin(tuple.t_data);
+ xmin = HeapTupleGetXmin(&tuple);
if (!TransactionIdPrecedes(xmin, vacrel->OldestXmin))
{
prunestate->all_visible = false;
@@ -1783,7 +1793,7 @@ retry:
* now.
*/
prunestate->hastup = true; /* page makes rel truncation unsafe */
- if (heap_prepare_freeze_tuple(tuple.t_data,
+ if (heap_prepare_freeze_tuple(&tuple,
vacrel->relfrozenxid,
vacrel->relminmxid,
vacrel->FreezeLimit,
@@ -1840,12 +1850,10 @@ retry:
/* execute collected freezes */
for (int i = 0; i < tuples_frozen; i++)
{
- HeapTupleHeader htup;
-
itemid = PageGetItemId(page, frozen[i].offset);
htup = (HeapTupleHeader) PageGetItem(page, itemid);
-
- heap_execute_freeze_tuple(htup, &frozen[i]);
+ heap_execute_freeze_tuple_page(page, htup, &frozen[i],
+ IsToastRelation(vacrel->rel));
}
/* Now WAL-log freezing if necessary */
@@ -1965,7 +1973,6 @@ lazy_scan_noprune(LVRelState *vacrel,
live_tuples,
recently_dead_tuples,
missed_dead_tuples;
- HeapTupleHeader tupleheader;
TransactionId NewRelfrozenXid = vacrel->NewRelfrozenXid;
MultiXactId NewRelminMxid = vacrel->NewRelminMxid;
OffsetNumber deadoffsets[MaxHeapTuplesPerPage];
@@ -2011,8 +2018,14 @@ lazy_scan_noprune(LVRelState *vacrel,
}
*hastup = true; /* page prevents rel truncation */
- tupleheader = (HeapTupleHeader) PageGetItem(page, itemid);
- if (heap_tuple_would_freeze(tupleheader,
+ ItemPointerSet(&(tuple.t_self), blkno, offnum);
+ tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
+ tuple.t_len = ItemIdGetLength(itemid);
+ tuple.t_tableOid = RelationGetRelid(vacrel->rel);
+ HeapTupleCopyBaseFromPage(buf, &tuple, page,
+ IsToastRelation(vacrel->rel));
+
+ if (heap_tuple_would_freeze(&tuple,
vacrel->FreezeLimit,
vacrel->MultiXactCutoff,
&NewRelfrozenXid, &NewRelminMxid))
@@ -2045,11 +2058,6 @@ lazy_scan_noprune(LVRelState *vacrel,
*/
}
- ItemPointerSet(&(tuple.t_self), blkno, offnum);
- tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
- tuple.t_len = ItemIdGetLength(itemid);
- tuple.t_tableOid = RelationGetRelid(vacrel->rel);
-
switch (HeapTupleSatisfiesVacuum(&tuple, vacrel->OldestXmin, buf))
{
case HEAPTUPLE_DELETE_IN_PROGRESS:
@@ -2319,13 +2327,6 @@ lazy_vacuum_all_indexes(LVRelState *vacrel)
Assert(vacrel->do_index_vacuuming);
Assert(vacrel->do_index_cleanup);
- /* Precheck for XID wraparound emergencies */
- if (lazy_check_wraparound_failsafe(vacrel))
- {
- /* Wraparound emergency -- don't even start an index scan */
- return false;
- }
-
/* Report that we are now vacuuming indexes */
pgstat_progress_update_param(PROGRESS_VACUUM_PHASE,
PROGRESS_VACUUM_PHASE_VACUUM_INDEX);
@@ -2340,13 +2341,6 @@ lazy_vacuum_all_indexes(LVRelState *vacrel)
vacrel->indstats[idx] =
lazy_vacuum_one_index(indrel, istat, vacrel->old_live_tuples,
vacrel);
-
- if (lazy_check_wraparound_failsafe(vacrel))
- {
- /* Wraparound emergency -- end current index scan */
- allindexes = false;
- break;
- }
}
}
else
@@ -2354,13 +2348,6 @@ lazy_vacuum_all_indexes(LVRelState *vacrel)
/* Outsource everything to parallel variant */
parallel_vacuum_bulkdel_all_indexes(vacrel->pvs, vacrel->old_live_tuples,
vacrel->num_index_scans);
-
- /*
- * Do a postcheck to consider applying wraparound failsafe now. Note
- * that parallel VACUUM only gets the precheck and this postcheck.
- */
- if (lazy_check_wraparound_failsafe(vacrel))
- allindexes = false;
}
/*
@@ -2606,58 +2593,6 @@ lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno, Buffer buffer,
return index;
}
-/*
- * Trigger the failsafe to avoid wraparound failure when vacrel table has a
- * relfrozenxid and/or relminmxid that is dangerously far in the past.
- * Triggering the failsafe makes the ongoing VACUUM bypass any further index
- * vacuuming and heap vacuuming. Truncating the heap is also bypassed.
- *
- * Any remaining work (work that VACUUM cannot just bypass) is typically sped
- * up when the failsafe triggers. VACUUM stops applying any cost-based delay
- * that it started out with.
- *
- * Returns true when failsafe has been triggered.
- */
-static bool
-lazy_check_wraparound_failsafe(LVRelState *vacrel)
-{
- Assert(TransactionIdIsNormal(vacrel->relfrozenxid));
- Assert(MultiXactIdIsValid(vacrel->relminmxid));
-
- /* Don't warn more than once per VACUUM */
- if (vacrel->failsafe_active)
- return true;
-
- if (unlikely(vacuum_xid_failsafe_check(vacrel->relfrozenxid,
- vacrel->relminmxid)))
- {
- vacrel->failsafe_active = true;
-
- /* Disable index vacuuming, index cleanup, and heap rel truncation */
- vacrel->do_index_vacuuming = false;
- vacrel->do_index_cleanup = false;
- vacrel->do_rel_truncate = false;
-
- ereport(WARNING,
- (errmsg("bypassing nonessential maintenance of table \"%s.%s.%s\" as a failsafe after %d index scans",
- get_database_name(MyDatabaseId),
- vacrel->relnamespace,
- vacrel->relname,
- vacrel->num_index_scans),
- errdetail("The table's relfrozenxid or relminmxid is too far in the past."),
- errhint("Consider increasing configuration parameter \"maintenance_work_mem\" or \"autovacuum_work_mem\".\n"
- "You might also need to consider other ways for VACUUM to keep up with the allocation of transaction IDs.")));
-
- /* Stop applying cost limits from this point on */
- VacuumCostActive = false;
- VacuumCostBalance = 0;
-
- return true;
- }
-
- return false;
-}
-
/*
* lazy_cleanup_all_indexes() -- cleanup all indexes of relation.
*/
@@ -3285,7 +3220,8 @@ heap_page_is_all_visible(LVRelState *vacrel, Buffer buf,
tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
tuple.t_len = ItemIdGetLength(itemid);
tuple.t_tableOid = RelationGetRelid(vacrel->rel);
-
+ HeapTupleCopyBaseFromPage(buf, &tuple, page,
+ IsToastRelation(vacrel->rel));
switch (HeapTupleSatisfiesVacuum(&tuple, vacrel->OldestXmin, buf))
{
case HEAPTUPLE_LIVE:
@@ -3304,7 +3240,7 @@ heap_page_is_all_visible(LVRelState *vacrel, Buffer buf,
* The inserter definitely committed. But is it old enough
* that everyone sees it as committed?
*/
- xmin = HeapTupleHeaderGetXmin(tuple.t_data);
+ xmin = HeapTupleGetXmin(&tuple);
if (!TransactionIdPrecedes(xmin, vacrel->OldestXmin))
{
all_visible = false;
@@ -3318,7 +3254,7 @@ heap_page_is_all_visible(LVRelState *vacrel, Buffer buf,
/* Check whether this tuple is already frozen or not */
if (all_visible && *all_frozen &&
- heap_tuple_needs_eventual_freeze(tuple.t_data))
+ heap_tuple_needs_eventual_freeze(&tuple))
*all_frozen = false;
}
break;
diff --git a/src/backend/access/nbtree/nbtpage.c b/src/backend/access/nbtree/nbtpage.c
index 8b96708b3e..6c4b1f0f50 100644
--- a/src/backend/access/nbtree/nbtpage.c
+++ b/src/backend/access/nbtree/nbtpage.c
@@ -546,6 +546,7 @@ _bt_getroot(Relation rel, int access)
rootblkno = rootopaque->btpo_next;
}
+ /* Note: can't check btpo_level on deleted pages */
if (rootopaque->btpo_level != rootlevel)
elog(ERROR, "root page %u of index \"%s\" has level %u, expected %u",
rootblkno, RelationGetRelationName(rel),
@@ -649,6 +650,7 @@ _bt_gettrueroot(Relation rel)
rootblkno = rootopaque->btpo_next;
}
+ /* Note: can't check btpo_level on deleted pages */
if (rootopaque->btpo_level != rootlevel)
elog(ERROR, "root page %u of index \"%s\" has level %u, expected %u",
rootblkno, RelationGetRelationName(rel),
diff --git a/src/backend/access/nbtree/nbtsplitloc.c b/src/backend/access/nbtree/nbtsplitloc.c
index 241e26d338..c712ee645f 100644
--- a/src/backend/access/nbtree/nbtsplitloc.c
+++ b/src/backend/access/nbtree/nbtsplitloc.c
@@ -140,6 +140,7 @@ _bt_findsplitloc(Relation rel,
olddataitemstoleft,
perfectpenalty,
leaffillfactor;
+ int maxTupleEnd PG_USED_FOR_ASSERTS_ONLY;
FindSplitData state;
FindSplitStrat strategy;
ItemId itemid;
@@ -153,6 +154,7 @@ _bt_findsplitloc(Relation rel,
opaque = BTPageGetOpaque(origpage);
maxoff = PageGetMaxOffsetNumber(origpage);
+ maxTupleEnd = ItemIdGetTupleEnd(PageGetItemId(origpage, P_HIKEY));
/* Total free space available on a btree page, after fixed overhead */
leftspace = rightspace =
@@ -214,6 +216,18 @@ _bt_findsplitloc(Relation rel,
itemid = PageGetItemId(origpage, offnum);
itemsz = MAXALIGN(ItemIdGetLength(itemid)) + sizeof(ItemIdData);
+#ifdef USE_ASSERT_CHECKING
+
+ /*
+ * Ending of rightmost tuple on a page can be shifted relative to left
+ * boundary of BTPageOpaqueData due to conversion from EE96, which
+ * used different BTPageOpaqueData layout. It is only checked in the
+ * assert below.
+ */
+ if (maxTupleEnd < ItemIdGetTupleEnd(itemid))
+ maxTupleEnd = ItemIdGetTupleEnd(itemid);
+#endif
+
/*
* When item offset number is not newitemoff, neither side of the
* split can be newitem. Record a split after the previous data item
@@ -248,7 +262,7 @@ _bt_findsplitloc(Relation rel,
* (Though only when it's possible that newitem will end up alone on new
* right page.)
*/
- Assert(olddataitemstoleft == olddataitemstotal);
+ Assert(olddataitemstoleft + ((PageHeader) origpage)->pd_special - maxTupleEnd == olddataitemstotal);
if (newitemoff > maxoff)
_bt_recsplitloc(&state, newitemoff, false, olddataitemstotal, 0);
diff --git a/src/backend/access/nbtree/nbtxlog.c b/src/backend/access/nbtree/nbtxlog.c
index ad489e33b3..2c33033441 100644
--- a/src/backend/access/nbtree/nbtxlog.c
+++ b/src/backend/access/nbtree/nbtxlog.c
@@ -15,6 +15,8 @@
#include "postgres.h"
#include "access/bufmask.h"
+#include "access/heapam_xlog.h"
+#include "access/htup_details.h"
#include "access/nbtree.h"
#include "access/nbtxlog.h"
#include "access/transam.h"
diff --git a/src/backend/access/rmgrdesc/gistdesc.c b/src/backend/access/rmgrdesc/gistdesc.c
index ad855894be..818c8c27ee 100644
--- a/src/backend/access/rmgrdesc/gistdesc.c
+++ b/src/backend/access/rmgrdesc/gistdesc.c
@@ -29,7 +29,7 @@ out_gistxlogPageReuse(StringInfo buf, gistxlogPageReuse *xlrec)
appendStringInfo(buf, "rel %u/%u/%u; blk %u; latestRemovedXid %llu",
xlrec->locator.spcOid, xlrec->locator.dbOid,
xlrec->locator.relNumber, xlrec->block,
- (unsigned long long) U64FromFullTransactionId(xlrec->latestRemovedFullXid));
+ (unsigned long long) XidFromFullTransactionId(xlrec->latestRemovedFullXid));
}
static void
@@ -51,7 +51,7 @@ static void
out_gistxlogPageDelete(StringInfo buf, gistxlogPageDelete *xlrec)
{
appendStringInfo(buf, "deleteXid %llu; downlink %u",
- (unsigned long long) U64FromFullTransactionId(xlrec->deleteXid),
+ (unsigned long long) XidFromFullTransactionId(xlrec->deleteXid),
xlrec->downlinkOffset);
}
diff --git a/src/backend/access/rmgrdesc/heapdesc.c b/src/backend/access/rmgrdesc/heapdesc.c
index 503808cf0a..407987d373 100644
--- a/src/backend/access/rmgrdesc/heapdesc.c
+++ b/src/backend/access/rmgrdesc/heapdesc.c
@@ -182,6 +182,23 @@ heap2_desc(StringInfo buf, XLogReaderState *record)
}
}
+void
+heap3_desc(StringInfo buf, XLogReaderState *record)
+{
+ char *rec = XLogRecGetData(record);
+ uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
+
+ info &= XLOG_HEAP_OPMASK;
+ if (info == XLOG_HEAP3_BASE_SHIFT)
+ {
+ xl_heap_base_shift *xlrec = (xl_heap_base_shift *) rec;
+
+ appendStringInfo(buf, "%s delta %lld ",
+ xlrec->multi ? "MultiXactId" : "XactId",
+ (long long) xlrec->delta);
+ }
+}
+
const char *
heap_identify(uint8 info)
{
@@ -265,3 +282,18 @@ heap2_identify(uint8 info)
return id;
}
+
+const char *
+heap3_identify(uint8 info)
+{
+ const char *id = NULL;
+
+ switch (info & ~XLR_INFO_MASK)
+ {
+ case XLOG_HEAP3_BASE_SHIFT:
+ id = "BASE_SHIFT";
+ break;
+ }
+
+ return id;
+}
diff --git a/src/backend/access/rmgrdesc/mxactdesc.c b/src/backend/access/rmgrdesc/mxactdesc.c
index 950f9269f7..b9c1826770 100644
--- a/src/backend/access/rmgrdesc/mxactdesc.c
+++ b/src/backend/access/rmgrdesc/mxactdesc.c
@@ -65,9 +65,9 @@ multixact_desc(StringInfo buf, XLogReaderState *record)
xl_multixact_create *xlrec = (xl_multixact_create *) rec;
int i;
- appendStringInfo(buf, "%llu offset %u nmembers %d: ",
+ appendStringInfo(buf, "%llu offset %llu nmembers %d: ",
(unsigned long long) xlrec->mid,
- xlrec->moff, xlrec->nmembers);
+ (unsigned long long) xlrec->moff, xlrec->nmembers);
for (i = 0; i < xlrec->nmembers; i++)
out_member(buf, &xlrec->members[i]);
}
@@ -75,10 +75,11 @@ multixact_desc(StringInfo buf, XLogReaderState *record)
{
xl_multixact_truncate *xlrec = (xl_multixact_truncate *) rec;
- appendStringInfo(buf, "offsets [%llu, %llu), members [%u, %u)",
+ appendStringInfo(buf, "offsets [%llu, %llu), members [%llu, %llu)",
(unsigned long long) xlrec->startTruncOff,
(unsigned long long) xlrec->endTruncOff,
- xlrec->startTruncMemb, xlrec->endTruncMemb);
+ (unsigned long long) xlrec->startTruncMemb,
+ (unsigned long long) xlrec->endTruncMemb);
}
}
diff --git a/src/backend/access/rmgrdesc/nbtdesc.c b/src/backend/access/rmgrdesc/nbtdesc.c
index bf25c941e4..c739b65942 100644
--- a/src/backend/access/rmgrdesc/nbtdesc.c
+++ b/src/backend/access/rmgrdesc/nbtdesc.c
@@ -83,7 +83,7 @@ btree_desc(StringInfo buf, XLogReaderState *record)
appendStringInfo(buf, "left %u; right %u; level %u; safexid %llu; ",
xlrec->leftsib, xlrec->rightsib, xlrec->level,
- (unsigned long long) U64FromFullTransactionId(xlrec->safexid));
+ (unsigned long long) XidFromFullTransactionId(xlrec->safexid));
appendStringInfo(buf, "leafleft %u; leafright %u; leaftopparent %u",
xlrec->leafleftsib, xlrec->leafrightsib,
xlrec->leaftopparent);
@@ -103,7 +103,7 @@ btree_desc(StringInfo buf, XLogReaderState *record)
appendStringInfo(buf, "rel %u/%u/%u; latestRemovedXid %llu",
xlrec->locator.spcOid, xlrec->locator.dbOid,
xlrec->locator.relNumber,
- (unsigned long long) U64FromFullTransactionId(xlrec->latestRemovedFullXid));
+ (unsigned long long) XidFromFullTransactionId(xlrec->latestRemovedFullXid));
break;
}
case XLOG_BTREE_META_CLEANUP:
diff --git a/src/backend/access/rmgrdesc/xactdesc.c b/src/backend/access/rmgrdesc/xactdesc.c
index b930943bb0..f97996d8e7 100644
--- a/src/backend/access/rmgrdesc/xactdesc.c
+++ b/src/backend/access/rmgrdesc/xactdesc.c
@@ -110,7 +110,8 @@ ParseCommitRecord(uint8 info, xl_xact_commit *xlrec, xl_xact_parsed_commit *pars
{
xl_xact_twophase *xl_twophase = (xl_xact_twophase *) data;
- parsed->twophase_xid = xl_twophase->xid;
+ parsed->twophase_xid =
+ ((uint64) xl_twophase->xid_hi << 32) | xl_twophase->xid_lo;
data += sizeof(xl_xact_twophase);
@@ -205,7 +206,8 @@ ParseAbortRecord(uint8 info, xl_xact_abort *xlrec, xl_xact_parsed_abort *parsed)
{
xl_xact_twophase *xl_twophase = (xl_xact_twophase *) data;
- parsed->twophase_xid = xl_twophase->xid;
+ parsed->twophase_xid =
+ ((uint64) xl_twophase->xid_hi << 32) | xl_twophase->xid_lo;
data += sizeof(xl_xact_twophase);
diff --git a/src/backend/access/rmgrdesc/xlogdesc.c b/src/backend/access/rmgrdesc/xlogdesc.c
index 647d64dc9a..4c70c125b2 100644
--- a/src/backend/access/rmgrdesc/xlogdesc.c
+++ b/src/backend/access/rmgrdesc/xlogdesc.c
@@ -45,7 +45,7 @@ xlog_desc(StringInfo buf, XLogReaderState *record)
CheckPoint *checkpoint = (CheckPoint *) rec;
appendStringInfo(buf, "redo %X/%X; "
- "tli %u; prev tli %u; fpw %s; xid %llu; oid %u; multi %llu; offset %u; "
+ "tli %u; prev tli %u; fpw %s; xid %llu; oid %u; multi %llu; offset %llu; "
"oldest xid %llu in DB %u; oldest multi %llu in DB %u; "
"oldest/newest commit timestamp xid: %llu/%llu; "
"oldest running xid %llu; %s",
@@ -53,10 +53,10 @@ xlog_desc(StringInfo buf, XLogReaderState *record)
checkpoint->ThisTimeLineID,
checkpoint->PrevTimeLineID,
checkpoint->fullPageWrites ? "true" : "false",
- (unsigned long long) U64FromFullTransactionId(checkpoint->nextXid),
+ (unsigned long long) XidFromFullTransactionId(checkpoint->nextXid),
checkpoint->nextOid,
(unsigned long long) checkpoint->nextMulti,
- checkpoint->nextMultiOffset,
+ (unsigned long long) checkpoint->nextMultiOffset,
(unsigned long long) checkpoint->oldestXid,
checkpoint->oldestXidDB,
(unsigned long long) checkpoint->oldestMulti,
diff --git a/src/backend/access/transam/clog.c b/src/backend/access/transam/clog.c
index afbb5d6b11..9ae0ccbd56 100644
--- a/src/backend/access/transam/clog.c
+++ b/src/backend/access/transam/clog.c
@@ -294,7 +294,7 @@ TransactionIdSetPageStatus(TransactionId xid, int nsubxids,
* sub-XIDs and all of the XIDs for which we're adjusting clog should be
* on the same page. Check those conditions, too.
*/
- if (all_xact_same_page && xid == MyProc->xid &&
+ if (all_xact_same_page && xid == pg_atomic_read_u64(&MyProc->xid) &&
nsubxids <= THRESHOLD_SUBTRANS_CLOG_OPT &&
nsubxids == MyProc->subxidStatus.count &&
(nsubxids == 0 ||
@@ -713,7 +713,7 @@ void
BootStrapCLOG(void)
{
int slotno;
- int pageno;
+ int64 pageno;
LWLockAcquire(XactSLRULock, LW_EXCLUSIVE);
@@ -727,7 +727,10 @@ BootStrapCLOG(void)
pageno = TransactionIdToPage(XidFromFullTransactionId(ShmemVariableCache->nextXid));
if (pageno != 0)
{
+ /* Create and zero the first page of the commit log */
slotno = ZeroCLOGPage(pageno, false);
+
+ /* Make sure it's written out */
SimpleLruWritePage(XactCtl, slotno);
Assert(!XactCtl->shared->page_dirty[slotno]);
}
@@ -921,24 +924,11 @@ TruncateCLOG(TransactionId oldestXact, Oid oldestxid_datoid)
SimpleLruTruncate(XactCtl, cutoffPage);
}
-
/*
* Decide whether a CLOG page number is "older" for truncation purposes.
*
- * We need to use comparison of TransactionIds here in order to do the right
- * thing with wraparound XID arithmetic. However, TransactionIdPrecedes()
- * would get weird about permanent xact IDs. So, offset both such that xid1,
- * xid2, and xid2 + CLOG_XACTS_PER_PAGE - 1 are all normal XIDs; this offset
- * is relevant to page 0 and to the page preceding page 0.
- *
- * The page containing oldestXact-2^31 is the important edge case. The
- * portion of that page equaling or following oldestXact-2^31 is expendable,
- * but the portion preceding oldestXact-2^31 is not. When oldestXact-2^31 is
- * the first XID of a page and segment, the entire page and segment is
- * expendable, and we could truncate the segment. Recognizing that case would
- * require making oldestXact, not just the page containing oldestXact,
- * available to this callback. The benefit would be rare and small, so we
- * don't optimize that edge case.
+ * With 64xid this function is just "<", but we left it as a function in order
+ * for its calls remain "vanilla" like.
*/
static bool
CLOGPagePrecedes(int64 page1, int64 page2)
diff --git a/src/backend/access/transam/commit_ts.c b/src/backend/access/transam/commit_ts.c
index b91097ce0e..7ca59025a7 100644
--- a/src/backend/access/transam/commit_ts.c
+++ b/src/backend/access/transam/commit_ts.c
@@ -916,25 +916,6 @@ AdvanceOldestCommitTsXid(TransactionId oldestXact)
/*
* Decide whether a commitTS page number is "older" for truncation purposes.
* Analogous to CLOGPagePrecedes().
- *
- * At default BLCKSZ, (1 << 31) % COMMIT_TS_XACTS_PER_PAGE == 128. This
- * introduces differences compared to CLOG and the other SLRUs having (1 <<
- * 31) % per_page == 0. This function never tests exactly
- * TransactionIdPrecedes(x-2^31, x). When the system reaches xidStopLimit,
- * there are two possible counts of page boundaries between oldestXact and the
- * latest XID assigned, depending on whether oldestXact is within the first
- * 128 entries of its page. Since this function doesn't know the location of
- * oldestXact within page2, it returns false for one page that actually is
- * expendable. This is a wider (yet still negligible) version of the
- * truncation opportunity that CLOGPagePrecedes() cannot recognize.
- *
- * For the sake of a worked example, number entries with decimal values such
- * that page1==1 entries range from 1.0 to 1.999. Let N+0.15 be the number of
- * pages that 2^31 entries will span (N is an integer). If oldestXact=N+2.1,
- * then the final safe XID assignment leaves newestXact=1.95. We keep page 2,
- * because entry=2.85 is the border that toggles whether entries precede the
- * last entry of the oldestXact page. While page 2 is expendable at
- * oldestXact=N+2.1, it would be precious at oldestXact=N+2.9.
*/
static bool
CommitTsPagePrecedes(int64 page1, int64 page2)
diff --git a/src/backend/access/transam/multixact.c b/src/backend/access/transam/multixact.c
index 35b90229a2..e9a6964ef2 100644
--- a/src/backend/access/transam/multixact.c
+++ b/src/backend/access/transam/multixact.c
@@ -112,15 +112,15 @@
((xid) / (MultiXactOffset) MULTIXACT_OFFSETS_PER_PAGE)
#define MultiXactIdToOffsetEntry(xid) \
((xid) % (MultiXactOffset) MULTIXACT_OFFSETS_PER_PAGE)
-#define MultiXactIdToOffsetSegment(xid) (MultiXactIdToOffsetPage(xid) / SLRU_PAGES_PER_SEGMENT)
+#define MultiXactIdToOffsetSegment(xid) ((uint64)(MultiXactIdToOffsetPage(xid) / SLRU_PAGES_PER_SEGMENT))
/*
* The situation for members is a bit more complex: we store one byte of
* additional flag bits for each TransactionId. To do this without getting
- * into alignment issues, we store four bytes of flags, and then the
- * corresponding 4 Xids. Each such 5-word (20-byte) set we call a "group", and
- * are stored as a whole in pages. Thus, with 8kB BLCKSZ, we keep 409 groups
- * per page. This wastes 12 bytes per page, but that's OK -- simplicity (and
+ * into alignment issues, we store eight bytes of flags, and then the
+ * corresponding 8 Xids. Each such 9-word (72-byte) set we call a "group", and
+ * are stored as a whole in pages. Thus, with 8kB BLCKSZ, we keep 113 groups
+ * per page. This wastes 56 bytes per page, but that's OK -- simplicity (and
* performance) trumps space efficiency here.
*
* Note that the "offset" macros work with byte offset, not array indexes, so
@@ -132,7 +132,7 @@
#define MXACT_MEMBER_XACT_BITMASK ((1 << MXACT_MEMBER_BITS_PER_XACT) - 1)
/* how many full bytes of flags are there in a group? */
-#define MULTIXACT_FLAGBYTES_PER_GROUP 4
+#define MULTIXACT_FLAGBYTES_PER_GROUP 8
#define MULTIXACT_MEMBERS_PER_MEMBERGROUP \
(MULTIXACT_FLAGBYTES_PER_GROUP * MXACT_MEMBER_FLAGS_PER_BYTE)
/* size in bytes of a complete group */
@@ -142,22 +142,9 @@
#define MULTIXACT_MEMBERS_PER_PAGE \
(MULTIXACT_MEMBERGROUPS_PER_PAGE * MULTIXACT_MEMBERS_PER_MEMBERGROUP)
-/*
- * Because the number of items per page is not a divisor of the last item
- * number (member 0xFFFFFFFF), the last segment does not use the maximum number
- * of pages, and moreover the last used page therein does not use the same
- * number of items as previous pages. (Another way to say it is that the
- * 0xFFFFFFFF member is somewhere in the middle of the last page, so the page
- * has some empty space after that item.)
- *
- * This constant is the number of members in the last page of the last segment.
- */
-#define MAX_MEMBERS_IN_LAST_MEMBERS_PAGE \
- ((uint32) ((0xFFFFFFFF % MULTIXACT_MEMBERS_PER_PAGE) + 1))
-
/* page in which a member is to be found */
#define MXOffsetToMemberPage(xid) ((xid) / (TransactionId) MULTIXACT_MEMBERS_PER_PAGE)
-#define MXOffsetToMemberSegment(xid) (MXOffsetToMemberPage(xid) / SLRU_PAGES_PER_SEGMENT)
+#define MXOffsetToMemberSegment(xid) ((uint64)(MXOffsetToMemberPage(xid) / SLRU_PAGES_PER_SEGMENT))
/* Location (byte offset within page) of flag word for a given member */
#define MXOffsetToFlagsOffset(xid) \
@@ -216,22 +203,8 @@ typedef struct MultiXactStateData
MultiXactId oldestMultiXactId;
Oid oldestMultiXactDB;
- /*
- * Oldest multixact offset that is potentially referenced by a multixact
- * referenced by a relation. We don't always know this value, so there's
- * a flag here to indicate whether or not we currently do.
- */
- MultiXactOffset oldestOffset;
- bool oldestOffsetKnown;
-
/* support for anti-wraparound measures */
MultiXactId multiVacLimit;
- MultiXactId multiWarnLimit;
- MultiXactId multiStopLimit;
- MultiXactId multiWrapLimit;
-
- /* support for members anti-wraparound measures */
- MultiXactOffset offsetStopLimit; /* known if oldestOffsetKnown */
/*
* Per-backend data starts here. We have two arrays stored in the area
@@ -361,9 +334,6 @@ static bool MultiXactOffsetPrecedes(MultiXactOffset offset1,
MultiXactOffset offset2);
static void ExtendMultiXactOffset(MultiXactId multi);
static void ExtendMultiXactMember(MultiXactOffset offset, int nmembers);
-static bool MultiXactOffsetWouldWrap(MultiXactOffset boundary,
- MultiXactOffset start, uint32 distance);
-static bool SetOffsetVacuumLimit(bool is_startup);
static bool find_multixact_start(MultiXactId multi, MultiXactOffset *result);
static void WriteMZeroPageXlogRec(int64 pageno, uint8 info);
static void WriteMTruncateXlogRec(Oid oldestMultiDB,
@@ -397,6 +367,9 @@ MultiXactIdCreate(TransactionId xid1, MultiXactStatus status1,
/* MultiXactIdSetOldestMember() must have been called already. */
Assert(MultiXactIdIsValid(OldestMemberMXactId[MyBackendId]));
+ /* memset members array because with 64-bit xids it has a padding hole */
+ MemSet(members, 0, sizeof(members));
+
/*
* Note: unlike MultiXactIdExpand, we don't bother to check that both XIDs
* are still running. In typical usage, xid2 will be our own XID and the
@@ -512,7 +485,7 @@ MultiXactIdExpand(MultiXactId multi, TransactionId xid, MultiXactStatus status)
* end of the loop.
*/
newMembers = (MultiXactMember *)
- palloc(sizeof(MultiXactMember) * (nmembers + 1));
+ palloc0(sizeof(MultiXactMember) * (nmembers + 1));
for (i = 0, j = 0; i < nmembers; i++)
{
@@ -527,7 +500,6 @@ MultiXactIdExpand(MultiXactId multi, TransactionId xid, MultiXactStatus status)
newMembers[j].xid = xid;
newMembers[j++].status = status;
-
newMulti = MultiXactIdCreateFromMembers(j, newMembers);
pfree(members);
@@ -903,8 +875,8 @@ RecordNewMultiXact(MultiXactId multi, MultiXactOffset offset,
for (i = 0; i < nmembers; i++, offset++)
{
TransactionId *memberptr;
- uint32 *flagsptr;
- uint32 flagsval;
+ uint64 *flagsptr;
+ uint64 flagsval;
int bshift;
int flagsoff;
int memberoff;
@@ -927,12 +899,12 @@ RecordNewMultiXact(MultiXactId multi, MultiXactOffset offset,
*memberptr = members[i].xid;
- flagsptr = (uint32 *)
+ flagsptr = (uint64 *)
(MultiXactMemberCtl->shared->page_buffer[slotno] + flagsoff);
flagsval = *flagsptr;
- flagsval &= ~(((1 << MXACT_MEMBER_BITS_PER_XACT) - 1) << bshift);
- flagsval |= (members[i].status << bshift);
+ flagsval &= ~((uint64) ((1ULL << MXACT_MEMBER_BITS_PER_XACT) - 1) << bshift);
+ flagsval |= ((uint64) members[i].status << bshift);
*flagsptr = flagsval;
MultiXactMemberCtl->shared->page_dirty[slotno] = true;
@@ -985,8 +957,6 @@ GetNewMultiXactId(int nmembers, MultiXactOffset *offset)
* If we're past multiVacLimit or the safe threshold for member storage
* space, or we don't know what the safe threshold for member storage is,
* start trying to force autovacuum cycles.
- * If we're past multiWarnLimit, start issuing warnings.
- * If we're past multiStopLimit, refuse to create new MultiXactIds.
*
* Note these are pretty much the same protections in GetNewTransactionId.
*----------
@@ -1000,41 +970,9 @@ GetNewMultiXactId(int nmembers, MultiXactOffset *offset)
* possibility of deadlock while doing get_database_name(). First,
* copy all the shared values we'll need in this path.
*/
- MultiXactId multiWarnLimit = MultiXactState->multiWarnLimit;
- MultiXactId multiStopLimit = MultiXactState->multiStopLimit;
- MultiXactId multiWrapLimit = MultiXactState->multiWrapLimit;
- Oid oldest_datoid = MultiXactState->oldestMultiXactDB;
LWLockRelease(MultiXactGenLock);
- if (IsUnderPostmaster &&
- !MultiXactIdPrecedes(result, multiStopLimit))
- {
- char *oldest_datname = get_database_name(oldest_datoid);
-
- /*
- * Immediately kick autovacuum into action as we're already in
- * ERROR territory.
- */
- SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER);
-
- /* complain even if that DB has disappeared */
- if (oldest_datname)
- ereport(ERROR,
- (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
- errmsg("database is not accepting commands that generate new MultiXactIds to avoid wraparound data loss in database \"%s\"",
- oldest_datname),
- errhint("Execute a database-wide VACUUM in that database.\n"
- "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
- else
- ereport(ERROR,
- (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
- errmsg("database is not accepting commands that generate new MultiXactIds to avoid wraparound data loss in database with OID %u",
- oldest_datoid),
- errhint("Execute a database-wide VACUUM in that database.\n"
- "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
- }
-
/*
* To avoid swamping the postmaster with signals, we issue the autovac
* request only once per 64K multis generated. This still gives
@@ -1043,31 +981,6 @@ GetNewMultiXactId(int nmembers, MultiXactOffset *offset)
if (IsUnderPostmaster && (result % 65536) == 0)
SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER);
- if (!MultiXactIdPrecedes(result, multiWarnLimit))
- {
- char *oldest_datname = get_database_name(oldest_datoid);
-
- /* complain even if that DB has disappeared */
- if (oldest_datname)
- ereport(WARNING,
- (errmsg_plural("database \"%s\" must be vacuumed before %u more MultiXactId is used",
- "database \"%s\" must be vacuumed before %u more MultiXactIds are used",
- multiWrapLimit - result,
- oldest_datname,
- multiWrapLimit - result),
- errhint("Execute a database-wide VACUUM in that database.\n"
- "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
- else
- ereport(WARNING,
- (errmsg_plural("database with OID %u must be vacuumed before %u more MultiXactId is used",
- "database with OID %u must be vacuumed before %u more MultiXactIds are used",
- multiWrapLimit - result,
- oldest_datoid,
- multiWrapLimit - result),
- errhint("Execute a database-wide VACUUM in that database.\n"
- "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
- }
-
/* Re-acquire lock and start over */
LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE);
result = MultiXactState->nextMXact;
@@ -1092,78 +1005,6 @@ GetNewMultiXactId(int nmembers, MultiXactOffset *offset)
else
*offset = nextOffset;
- /*----------
- * Protect against overrun of the members space as well, with the
- * following rules:
- *
- * If we're past offsetStopLimit, refuse to generate more multis.
- * If we're close to offsetStopLimit, emit a warning.
- *
- * Arbitrarily, we start emitting warnings when we're 20 segments or less
- * from offsetStopLimit.
- *
- * Note we haven't updated the shared state yet, so if we fail at this
- * point, the multixact ID we grabbed can still be used by the next guy.
- *
- * Note that there is no point in forcing autovacuum runs here: the
- * multixact freeze settings would have to be reduced for that to have any
- * effect.
- *----------
- */
-#define OFFSET_WARN_SEGMENTS 20
- if (MultiXactState->oldestOffsetKnown &&
- MultiXactOffsetWouldWrap(MultiXactState->offsetStopLimit, nextOffset,
- nmembers))
- {
- /* see comment in the corresponding offsets wraparound case */
- SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER);
-
- ereport(ERROR,
- (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
- errmsg("multixact \"members\" limit exceeded"),
- errdetail_plural("This command would create a multixact with %u members, but the remaining space is only enough for %u member.",
- "This command would create a multixact with %u members, but the remaining space is only enough for %u members.",
- MultiXactState->offsetStopLimit - nextOffset - 1,
- nmembers,
- MultiXactState->offsetStopLimit - nextOffset - 1),
- errhint("Execute a database-wide VACUUM in database with OID %u with reduced vacuum_multixact_freeze_min_age and vacuum_multixact_freeze_table_age settings.",
- MultiXactState->oldestMultiXactDB)));
- }
-
- /*
- * Check whether we should kick autovacuum into action, to prevent members
- * wraparound. NB we use a much larger window to trigger autovacuum than
- * just the warning limit. The warning is just a measure of last resort -
- * this is in line with GetNewTransactionId's behaviour.
- */
- if (!MultiXactState->oldestOffsetKnown ||
- (MultiXactState->nextOffset - MultiXactState->oldestOffset
- > MULTIXACT_MEMBER_SAFE_THRESHOLD))
- {
- /*
- * To avoid swamping the postmaster with signals, we issue the autovac
- * request only when crossing a segment boundary. With default
- * compilation settings that's roughly after 50k members. This still
- * gives plenty of chances before we get into real trouble.
- */
- if ((MXOffsetToMemberPage(nextOffset) / SLRU_PAGES_PER_SEGMENT) !=
- (MXOffsetToMemberPage(nextOffset + nmembers) / SLRU_PAGES_PER_SEGMENT))
- SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER);
- }
-
- if (MultiXactState->oldestOffsetKnown &&
- MultiXactOffsetWouldWrap(MultiXactState->offsetStopLimit,
- nextOffset,
- nmembers + MULTIXACT_MEMBERS_PER_PAGE * SLRU_PAGES_PER_SEGMENT * OFFSET_WARN_SEGMENTS))
- ereport(WARNING,
- (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
- errmsg_plural("database with OID %u must be vacuumed before %d more multixact member is used",
- "database with OID %u must be vacuumed before %d more multixact members are used",
- MultiXactState->offsetStopLimit - nextOffset + nmembers,
- MultiXactState->oldestMultiXactDB,
- MultiXactState->offsetStopLimit - nextOffset + nmembers),
- errhint("Execute a database-wide VACUUM in that database with reduced vacuum_multixact_freeze_min_age and vacuum_multixact_freeze_table_age settings.")));
-
ExtendMultiXactMember(nextOffset, nmembers);
/*
@@ -1192,8 +1033,8 @@ GetNewMultiXactId(int nmembers, MultiXactOffset *offset)
LWLockRelease(MultiXactGenLock);
- debug_elog4(DEBUG2, "GetNew: returning %llu offset %u",
- (unsigned long long) result, *offset);
+ debug_elog4(DEBUG2, "GetNew: returning %llu offset %llu",
+ (unsigned long long) result, (unsigned long long) *offset);
return result;
}
@@ -1303,14 +1144,14 @@ GetMultiXactIdMembers(MultiXactId multi, MultiXactMember **members,
if (MultiXactIdPrecedes(multi, oldestMXact))
ereport(ERROR,
(errcode(ERRCODE_INTERNAL_ERROR),
- errmsg("MultiXactId %u does no longer exist -- apparent wraparound",
- multi)));
+ errmsg("MultiXactId %llu does no longer exist -- apparent wraparound",
+ (unsigned long long) multi)));
if (!MultiXactIdPrecedes(multi, nextMXact))
ereport(ERROR,
(errcode(ERRCODE_INTERNAL_ERROR),
- errmsg("MultiXactId %u has not been created yet -- apparent wraparound",
- multi)));
+ errmsg("MultiXactId %llu has not been created yet -- apparent wraparound",
+ (unsigned long long) multi)));
/*
* Find out the offset at which we need to start reading MultiXactMembers
@@ -1356,7 +1197,10 @@ retry:
offptr += entryno;
offset = *offptr;
- Assert(offset != 0);
+ if (offset == 0)
+ ereport(ERROR,
+ (errmsg("found invalid zero offset in multixact %llu",
+ (unsigned long long) multi)));
/*
* Use the same increment rule as GetNewMultiXactId(), that is, don't
@@ -1403,7 +1247,7 @@ retry:
LWLockRelease(MultiXactOffsetSLRULock);
- ptr = (MultiXactMember *) palloc(length * sizeof(MultiXactMember));
+ ptr = (MultiXactMember *) palloc0(length * sizeof(MultiXactMember));
/* Now get the members themselves. */
LWLockAcquire(MultiXactMemberSLRULock, LW_EXCLUSIVE);
@@ -1413,7 +1257,7 @@ retry:
for (i = 0; i < length; i++, offset++)
{
TransactionId *xactptr;
- uint32 *flagsptr;
+ uint64 *flagsptr;
int flagsoff;
int bshift;
int memberoff;
@@ -1439,7 +1283,7 @@ retry:
flagsoff = MXOffsetToFlagsOffset(offset);
bshift = MXOffsetToFlagsBitShift(offset);
- flagsptr = (uint32 *) (MultiXactMemberCtl->shared->page_buffer[slotno] + flagsoff);
+ flagsptr = (uint64 *) (MultiXactMemberCtl->shared->page_buffer[slotno] + flagsoff);
ptr[truelength].xid = *xactptr;
ptr[truelength].status = (*flagsptr >> bshift) & MXACT_MEMBER_XACT_BITMASK;
@@ -1903,7 +1747,7 @@ void
BootStrapMultiXact(void)
{
int slotno;
- int pageno;
+ int64 pageno;
LWLockAcquire(MultiXactOffsetSLRULock, LW_EXCLUSIVE);
@@ -2224,8 +2068,9 @@ void
MultiXactSetNextMXact(MultiXactId nextMulti,
MultiXactOffset nextMultiOffset)
{
- debug_elog4(DEBUG2, "MultiXact: setting next multi to %llu offset %u",
- (unsigned long long) nextMulti, nextMultiOffset);
+ debug_elog4(DEBUG2, "MultiXact: setting next multi to %llu offset %llu",
+ (unsigned long long) nextMulti,
+ (unsigned long long) nextMultiOffset);
LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE);
MultiXactState->nextMXact = nextMulti;
MultiXactState->nextOffset = nextMultiOffset;
@@ -2259,47 +2104,9 @@ SetMultiXactIdLimit(MultiXactId oldest_datminmxid, Oid oldest_datoid,
bool is_startup)
{
MultiXactId multiVacLimit;
- MultiXactId multiWarnLimit;
- MultiXactId multiStopLimit;
- MultiXactId multiWrapLimit;
- MultiXactId curMulti;
- bool needs_offset_vacuum;
Assert(MultiXactIdIsValid(oldest_datminmxid));
- /*
- * We pretend that a wrap will happen halfway through the multixact ID
- * space, but that's not really true, because multixacts wrap differently
- * from transaction IDs. Note that, separately from any concern about
- * multixact IDs wrapping, we must ensure that multixact members do not
- * wrap. Limits for that are set in SetOffsetVacuumLimit, not here.
- */
- multiWrapLimit = oldest_datminmxid + (MaxMultiXactId >> 1);
- if (multiWrapLimit < FirstMultiXactId)
- multiWrapLimit += FirstMultiXactId;
-
- /*
- * We'll refuse to continue assigning MultiXactIds once we get within 3M
- * multi of data loss. See SetTransactionIdLimit.
- */
- multiStopLimit = multiWrapLimit - 3000000;
- if (multiStopLimit < FirstMultiXactId)
- multiStopLimit -= FirstMultiXactId;
-
- /*
- * We'll start complaining loudly when we get within 40M multis of data
- * loss. This is kind of arbitrary, but if you let your gas gauge get
- * down to 2% of full, would you be looking for the next gas station? We
- * need to be fairly liberal about this number because there are lots of
- * scenarios where most transactions are done by automatic clients that
- * won't pay attention to warnings. (No, we're not gonna make this
- * configurable. If you know enough to configure it, you know enough to
- * not get in this kind of trouble in the first place.)
- */
- multiWarnLimit = multiWrapLimit - 40000000;
- if (multiWarnLimit < FirstMultiXactId)
- multiWarnLimit -= FirstMultiXactId;
-
/*
* We'll start trying to force autovacuums when oldest_datminmxid gets to
* be more than autovacuum_multixact_freeze_max_age mxids old.
@@ -2309,25 +2116,14 @@ SetMultiXactIdLimit(MultiXactId oldest_datminmxid, Oid oldest_datoid,
* its value. See SetTransactionIdLimit.
*/
multiVacLimit = oldest_datminmxid + autovacuum_multixact_freeze_max_age;
- if (multiVacLimit < FirstMultiXactId)
- multiVacLimit += FirstMultiXactId;
/* Grab lock for just long enough to set the new limit values */
LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE);
MultiXactState->oldestMultiXactId = oldest_datminmxid;
MultiXactState->oldestMultiXactDB = oldest_datoid;
MultiXactState->multiVacLimit = multiVacLimit;
- MultiXactState->multiWarnLimit = multiWarnLimit;
- MultiXactState->multiStopLimit = multiStopLimit;
- MultiXactState->multiWrapLimit = multiWrapLimit;
- curMulti = MultiXactState->nextMXact;
LWLockRelease(MultiXactGenLock);
- /* Log the info */
- ereport(DEBUG1,
- (errmsg_internal("MultiXactId wrap limit is %u, limited by database with OID %u",
- multiWrapLimit, oldest_datoid)));
-
/*
* Computing the actual limits is only possible once the data directory is
* in a consistent state. There's no need to compute the limits while
@@ -2339,59 +2135,6 @@ SetMultiXactIdLimit(MultiXactId oldest_datminmxid, Oid oldest_datoid,
return;
Assert(!InRecovery);
-
- /* Set limits for offset vacuum. */
- needs_offset_vacuum = SetOffsetVacuumLimit(is_startup);
-
- /*
- * If past the autovacuum force point, immediately signal an autovac
- * request. The reason for this is that autovac only processes one
- * database per invocation. Once it's finished cleaning up the oldest
- * database, it'll call here, and we'll signal the postmaster to start
- * another iteration immediately if there are still any old databases.
- */
- if ((MultiXactIdPrecedes(multiVacLimit, curMulti) ||
- needs_offset_vacuum) && IsUnderPostmaster)
- SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER);
-
- /* Give an immediate warning if past the wrap warn point */
- if (MultiXactIdPrecedes(multiWarnLimit, curMulti))
- {
- char *oldest_datname;
-
- /*
- * We can be called when not inside a transaction, for example during
- * StartupXLOG(). In such a case we cannot do database access, so we
- * must just report the oldest DB's OID.
- *
- * Note: it's also possible that get_database_name fails and returns
- * NULL, for example because the database just got dropped. We'll
- * still warn, even though the warning might now be unnecessary.
- */
- if (IsTransactionState())
- oldest_datname = get_database_name(oldest_datoid);
- else
- oldest_datname = NULL;
-
- if (oldest_datname)
- ereport(WARNING,
- (errmsg_plural("database \"%s\" must be vacuumed before %u more MultiXactId is used",
- "database \"%s\" must be vacuumed before %u more MultiXactIds are used",
- multiWrapLimit - curMulti,
- oldest_datname,
- multiWrapLimit - curMulti),
- errhint("To avoid a database shutdown, execute a database-wide VACUUM in that database.\n"
- "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
- else
- ereport(WARNING,
- (errmsg_plural("database with OID %u must be vacuumed before %u more MultiXactId is used",
- "database with OID %u must be vacuumed before %u more MultiXactIds are used",
- multiWrapLimit - curMulti,
- oldest_datoid,
- multiWrapLimit - curMulti),
- errhint("To avoid a database shutdown, execute a database-wide VACUUM in that database.\n"
- "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
- }
}
/*
@@ -2416,8 +2159,8 @@ MultiXactAdvanceNextMXact(MultiXactId minMulti,
}
if (MultiXactOffsetPrecedes(MultiXactState->nextOffset, minMultiOffset))
{
- debug_elog3(DEBUG2, "MultiXact: setting next offset to %u",
- minMultiOffset);
+ debug_elog3(DEBUG2, "MultiXact: setting next offset to %llu",
+ (unsigned long long) minMultiOffset);
MultiXactState->nextOffset = minMultiOffset;
}
LWLockRelease(MultiXactGenLock);
@@ -2489,7 +2232,7 @@ ExtendMultiXactMember(MultiXactOffset offset, int nmembers)
{
int flagsoff;
int flagsbit;
- uint32 difference;
+ uint64 difference;
/*
* Only zero when at first entry of a page.
@@ -2510,23 +2253,7 @@ ExtendMultiXactMember(MultiXactOffset offset, int nmembers)
LWLockRelease(MultiXactMemberSLRULock);
}
- /*
- * Compute the number of items till end of current page. Careful: if
- * addition of unsigned ints wraps around, we're at the last page of
- * the last segment; since that page holds a different number of items
- * than other pages, we need to do it differently.
- */
- if (offset + MAX_MEMBERS_IN_LAST_MEMBERS_PAGE < offset)
- {
- /*
- * This is the last page of the last segment; we can compute the
- * number of items left to allocate in it without modulo
- * arithmetic.
- */
- difference = MaxMultiXactOffset - offset + 1;
- }
- else
- difference = MULTIXACT_MEMBERS_PER_PAGE - offset % MULTIXACT_MEMBERS_PER_PAGE;
+ difference = MULTIXACT_MEMBERS_PER_PAGE - offset % MULTIXACT_MEMBERS_PER_PAGE;
/*
* Advance to next page, taking care to properly handle the wraparound
@@ -2590,184 +2317,6 @@ GetOldestMultiXactId(void)
return oldestMXact;
}
-/*
- * Determine how aggressively we need to vacuum in order to prevent member
- * wraparound.
- *
- * To do so determine what's the oldest member offset and install the limit
- * info in MultiXactState, where it can be used to prevent overrun of old data
- * in the members SLRU area.
- *
- * The return value is true if emergency autovacuum is required and false
- * otherwise.
- */
-static bool
-SetOffsetVacuumLimit(bool is_startup)
-{
- MultiXactId oldestMultiXactId;
- MultiXactId nextMXact;
- MultiXactOffset oldestOffset = 0; /* placate compiler */
- MultiXactOffset prevOldestOffset;
- MultiXactOffset nextOffset;
- bool oldestOffsetKnown = false;
- bool prevOldestOffsetKnown;
- MultiXactOffset offsetStopLimit = 0;
- MultiXactOffset prevOffsetStopLimit;
-
- /*
- * NB: Have to prevent concurrent truncation, we might otherwise try to
- * lookup an oldestMulti that's concurrently getting truncated away.
- */
- LWLockAcquire(MultiXactTruncationLock, LW_SHARED);
-
- /* Read relevant fields from shared memory. */
- LWLockAcquire(MultiXactGenLock, LW_SHARED);
- oldestMultiXactId = MultiXactState->oldestMultiXactId;
- nextMXact = MultiXactState->nextMXact;
- nextOffset = MultiXactState->nextOffset;
- prevOldestOffsetKnown = MultiXactState->oldestOffsetKnown;
- prevOldestOffset = MultiXactState->oldestOffset;
- prevOffsetStopLimit = MultiXactState->offsetStopLimit;
- Assert(MultiXactState->finishedStartup);
- LWLockRelease(MultiXactGenLock);
-
- /*
- * Determine the offset of the oldest multixact. Normally, we can read
- * the offset from the multixact itself, but there's an important special
- * case: if there are no multixacts in existence at all, oldestMXact
- * obviously can't point to one. It will instead point to the multixact
- * ID that will be assigned the next time one is needed.
- */
- if (oldestMultiXactId == nextMXact)
- {
- /*
- * When the next multixact gets created, it will be stored at the next
- * offset.
- */
- oldestOffset = nextOffset;
- oldestOffsetKnown = true;
- }
- else
- {
- /*
- * Figure out where the oldest existing multixact's offsets are
- * stored. Due to bugs in early release of PostgreSQL 9.3.X and 9.4.X,
- * the supposedly-earliest multixact might not really exist. We are
- * careful not to fail in that case.
- */
- oldestOffsetKnown =
- find_multixact_start(oldestMultiXactId, &oldestOffset);
-
- if (oldestOffsetKnown)
- ereport(DEBUG1,
- (errmsg_internal("oldest MultiXactId member is at offset %llu",
- (unsigned long long) oldestOffset)));
- else
- ereport(LOG,
- (errmsg("MultiXact member wraparound protections are disabled because oldest checkpointed MultiXact %llu does not exist on disk",
- (unsigned long long) oldestMultiXactId)));
- }
-
- LWLockRelease(MultiXactTruncationLock);
-
- /*
- * If we can, compute limits (and install them MultiXactState) to prevent
- * overrun of old data in the members SLRU area. We can only do so if the
- * oldest offset is known though.
- */
- if (oldestOffsetKnown)
- {
- /* move back to start of the corresponding segment */
- offsetStopLimit = oldestOffset - (oldestOffset %
- (MULTIXACT_MEMBERS_PER_PAGE * SLRU_PAGES_PER_SEGMENT));
-
- /* always leave one segment before the wraparound point */
- offsetStopLimit -= (MULTIXACT_MEMBERS_PER_PAGE * SLRU_PAGES_PER_SEGMENT);
-
- if (!prevOldestOffsetKnown && !is_startup)
- ereport(LOG,
- (errmsg("MultiXact member wraparound protections are now enabled")));
-
- ereport(DEBUG1,
- (errmsg_internal("MultiXact member stop limit is now %llu based on MultiXact %llu",
- (unsigned long long) offsetStopLimit,
- (unsigned long long) oldestMultiXactId)));
- }
- else if (prevOldestOffsetKnown)
- {
- /*
- * If we failed to get the oldest offset this time, but we have a
- * value from a previous pass through this function, use the old
- * values rather than automatically forcing an emergency autovacuum
- * cycle again.
- */
- oldestOffset = prevOldestOffset;
- oldestOffsetKnown = true;
- offsetStopLimit = prevOffsetStopLimit;
- }
-
- /* Install the computed values */
- LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE);
- MultiXactState->oldestOffset = oldestOffset;
- MultiXactState->oldestOffsetKnown = oldestOffsetKnown;
- MultiXactState->offsetStopLimit = offsetStopLimit;
- LWLockRelease(MultiXactGenLock);
-
- /*
- * Do we need an emergency autovacuum? If we're not sure, assume yes.
- */
- return !oldestOffsetKnown ||
- (nextOffset - oldestOffset > MULTIXACT_MEMBER_SAFE_THRESHOLD);
-}
-
-/*
- * Return whether adding "distance" to "start" would move past "boundary".
- *
- * We use this to determine whether the addition is "wrapping around" the
- * boundary point, hence the name. The reason we don't want to use the regular
- * 2^31-modulo arithmetic here is that we want to be able to use the whole of
- * the 2^32-1 space here, allowing for more multixacts than would fit
- * otherwise.
- */
-static bool
-MultiXactOffsetWouldWrap(MultiXactOffset boundary, MultiXactOffset start,
- uint32 distance)
-{
- MultiXactOffset finish;
-
- /*
- * Note that offset number 0 is not used (see GetMultiXactIdMembers), so
- * if the addition wraps around the UINT_MAX boundary, skip that value.
- */
- finish = start + distance;
- if (finish < start)
- finish++;
-
- /*-----------------------------------------------------------------------
- * When the boundary is numerically greater than the starting point, any
- * value numerically between the two is not wrapped:
- *
- * <----S----B---->
- * [---) = F wrapped past B (and UINT_MAX)
- * [---) = F not wrapped
- * [----] = F wrapped past B
- *
- * When the boundary is numerically less than the starting point (i.e. the
- * UINT_MAX wraparound occurs somewhere in between) then all values in
- * between are wrapped:
- *
- * <----B----S---->
- * [---) = F not wrapped past B (but wrapped past UINT_MAX)
- * [---) = F wrapped past B (and UINT_MAX)
- * [----] = F not wrapped
- *-----------------------------------------------------------------------
- */
- if (start < boundary)
- return finish >= boundary || finish < start;
- else
- return finish >= boundary && finish < start;
-}
-
/*
* Find the starting offset of the given MultiXactId.
*
@@ -2811,97 +2360,6 @@ find_multixact_start(MultiXactId multi, MultiXactOffset *result)
return true;
}
-/*
- * Determine how many multixacts, and how many multixact members, currently
- * exist. Return false if unable to determine.
- */
-static bool
-ReadMultiXactCounts(uint32 *multixacts, MultiXactOffset *members)
-{
- MultiXactOffset nextOffset;
- MultiXactOffset oldestOffset;
- MultiXactId oldestMultiXactId;
- MultiXactId nextMultiXactId;
- bool oldestOffsetKnown;
-
- LWLockAcquire(MultiXactGenLock, LW_SHARED);
- nextOffset = MultiXactState->nextOffset;
- oldestMultiXactId = MultiXactState->oldestMultiXactId;
- nextMultiXactId = MultiXactState->nextMXact;
- oldestOffset = MultiXactState->oldestOffset;
- oldestOffsetKnown = MultiXactState->oldestOffsetKnown;
- LWLockRelease(MultiXactGenLock);
-
- if (!oldestOffsetKnown)
- return false;
-
- *members = nextOffset - oldestOffset;
- *multixacts = nextMultiXactId - oldestMultiXactId;
- return true;
-}
-
-/*
- * Multixact members can be removed once the multixacts that refer to them
- * are older than every datminmxid. autovacuum_multixact_freeze_max_age and
- * vacuum_multixact_freeze_table_age work together to make sure we never have
- * too many multixacts; we hope that, at least under normal circumstances,
- * this will also be sufficient to keep us from using too many offsets.
- * However, if the average multixact has many members, we might exhaust the
- * members space while still using few enough members that these limits fail
- * to trigger full table scans for relminmxid advancement. At that point,
- * we'd have no choice but to start failing multixact-creating operations
- * with an error.
- *
- * To prevent that, if more than a threshold portion of the members space is
- * used, we effectively reduce autovacuum_multixact_freeze_max_age and
- * to a value just less than the number of multixacts in use. We hope that
- * this will quickly trigger autovacuuming on the table or tables with the
- * oldest relminmxid, thus allowing datminmxid values to advance and removing
- * some members.
- *
- * As the fraction of the member space currently in use grows, we become
- * more aggressive in clamping this value. That not only causes autovacuum
- * to ramp up, but also makes any manual vacuums the user issues more
- * aggressive. This happens because vacuum_set_xid_limits() clamps the
- * freeze table and the minimum freeze age based on the effective
- * autovacuum_multixact_freeze_max_age this function returns. In the worst
- * case, we'll claim the freeze_max_age to zero, and every vacuum of any
- * table will try to freeze every multixact.
- *
- * It's possible that these thresholds should be user-tunable, but for now
- * we keep it simple.
- */
-int
-MultiXactMemberFreezeThreshold(void)
-{
- MultiXactOffset members;
- uint32 multixacts;
- uint32 victim_multixacts;
- double fraction;
-
- /* If we can't determine member space utilization, assume the worst. */
- if (!ReadMultiXactCounts(&multixacts, &members))
- return 0;
-
- /* If member space utilization is low, no special action is required. */
- if (members <= MULTIXACT_MEMBER_SAFE_THRESHOLD)
- return autovacuum_multixact_freeze_max_age;
-
- /*
- * Compute a target for relminmxid advancement. The number of multixacts
- * we try to eliminate from the system is based on how far we are past
- * MULTIXACT_MEMBER_SAFE_THRESHOLD.
- */
- fraction = (double) (members - MULTIXACT_MEMBER_SAFE_THRESHOLD) /
- (MULTIXACT_MEMBER_DANGER_THRESHOLD - MULTIXACT_MEMBER_SAFE_THRESHOLD);
- victim_multixacts = multixacts * fraction;
-
- /* fraction could be > 1.0, but lowest possible freeze age is zero */
- if (victim_multixacts > multixacts)
- return 0;
- return multixacts - victim_multixacts;
-}
-
typedef struct mxtruncinfo
{
int64 earliestExistingPage;
@@ -2928,35 +2386,12 @@ SlruScanDirCbFindEarliest(SlruCtl ctl, char *filename, int64 segpage, void *data
/*
* Delete members segments [oldest, newOldest)
- *
- * The members SLRU can, in contrast to the offsets one, be filled to almost
- * the full range at once. This means SimpleLruTruncate() can't trivially be
- * used - instead the to-be-deleted range is computed using the offsets
- * SLRU. C.f. TruncateMultiXact().
*/
static void
PerformMembersTruncation(MultiXactOffset oldestOffset, MultiXactOffset newOldestOffset)
{
- const int maxsegment = MXOffsetToMemberSegment(MaxMultiXactOffset);
- int startsegment = MXOffsetToMemberSegment(oldestOffset);
- int endsegment = MXOffsetToMemberSegment(newOldestOffset);
- int segment = startsegment;
-
- /*
- * Delete all the segments but the last one. The last segment can still
- * contain, possibly partially, valid data.
- */
- while (segment != endsegment)
- {
- elog(DEBUG2, "truncating multixact members segment %x", segment);
- SlruDeleteSegment(MultiXactMemberCtl, segment);
-
- /* move to next segment, handling wraparound correctly */
- if (segment == maxsegment)
- segment = 0;
- else
- segment += 1;
- }
+ SimpleLruTruncate(MultiXactMemberCtl,
+ MXOffsetToMemberPage(newOldestOffset));
}
/*
@@ -3075,7 +2510,8 @@ TruncateMultiXact(MultiXactId newOldestMulti, Oid newOldestMultiDB)
{
ereport(LOG,
(errmsg("oldest MultiXact %llu not found, earliest MultiXact %llu, skipping truncation",
- (unsigned long long) oldestMulti, (unsigned long long) earliest)));
+ (unsigned long long) oldestMulti,
+ (unsigned long long) earliest)));
LWLockRelease(MultiXactTruncationLock);
return;
}
@@ -3099,14 +2535,14 @@ TruncateMultiXact(MultiXactId newOldestMulti, Oid newOldestMultiDB)
}
elog(DEBUG1, "performing multixact truncation: "
- "offsets [%llu, %llu), offsets segments [%x, %x), "
- "members [%u, %u), members segments [%x, %x)",
+ "offsets [%llu, %llu), offsets segments [%012llx, %012llx), "
+ "members [%lld, %lld), members segments [%012llx, %012llx)",
(unsigned long long) oldestMulti, (unsigned long long) newOldestMulti,
- MultiXactIdToOffsetSegment(oldestMulti),
- MultiXactIdToOffsetSegment(newOldestMulti),
- oldestOffset, newOldestOffset,
- MXOffsetToMemberSegment(oldestOffset),
- MXOffsetToMemberSegment(newOldestOffset));
+ (unsigned long long) MultiXactIdToOffsetSegment(oldestMulti),
+ (unsigned long long) MultiXactIdToOffsetSegment(newOldestMulti),
+ (long long) oldestOffset, (long long) newOldestOffset,
+ (unsigned long long) MXOffsetToMemberSegment(oldestOffset),
+ (unsigned long long) MXOffsetToMemberSegment(newOldestOffset));
/*
* Do truncation, and the WAL logging of the truncation, in a critical
@@ -3180,7 +2616,7 @@ MultiXactOffsetPagePrecedes(int64 page1, int64 page2)
/*
* Decide whether a MultiXactMember page number is "older" for truncation
- * purposes. There is no "invalid offset number" so use the numbers verbatim.
+ * purposes. There is no "invalid offset number" so use the numbers verbatim.
*/
static bool
MultiXactMemberPagePrecedes(int64 page1, int64 page2)
@@ -3205,7 +2641,7 @@ MultiXactMemberPagePrecedes(int64 page1, int64 page2)
bool
MultiXactIdPrecedes(MultiXactId multi1, MultiXactId multi2)
{
- int32 diff = (int32) (multi1 - multi2);
+ int64 diff = (int64) (multi1 - multi2);
return (diff < 0);
}
@@ -3219,7 +2655,7 @@ MultiXactIdPrecedes(MultiXactId multi1, MultiXactId multi2)
bool
MultiXactIdPrecedesOrEquals(MultiXactId multi1, MultiXactId multi2)
{
- int32 diff = (int32) (multi1 - multi2);
+ int64 diff = (int64) (multi1 - multi2);
return (diff <= 0);
}
@@ -3231,7 +2667,7 @@ MultiXactIdPrecedesOrEquals(MultiXactId multi1, MultiXactId multi2)
static bool
MultiXactOffsetPrecedes(MultiXactOffset offset1, MultiXactOffset offset2)
{
- int32 diff = (int32) (offset1 - offset2);
+ int64 diff = (int64) (offset1 - offset2);
return (diff < 0);
}
@@ -3355,15 +2791,16 @@ multixact_redo(XLogReaderState *record)
SizeOfMultiXactTruncate);
elog(DEBUG1, "replaying multixact truncation: "
- "offsets [%llu, %llu), offsets segments [%x, %x), "
- "members [%u, %u), members segments [%x, %x)",
+ "offsets [%llu, %llu), offsets segments [%012llx, %012llx), "
+ "members [%llu, %llu), members segments [%012llx, %012llx)",
(unsigned long long) xlrec.startTruncOff,
(unsigned long long) xlrec.endTruncOff,
- MultiXactIdToOffsetSegment(xlrec.startTruncOff),
- MultiXactIdToOffsetSegment(xlrec.endTruncOff),
- xlrec.startTruncMemb, xlrec.endTruncMemb,
- MXOffsetToMemberSegment(xlrec.startTruncMemb),
- MXOffsetToMemberSegment(xlrec.endTruncMemb));
+ (unsigned long long) MultiXactIdToOffsetSegment(xlrec.startTruncOff),
+ (unsigned long long) MultiXactIdToOffsetSegment(xlrec.endTruncOff),
+ (unsigned long long) xlrec.startTruncMemb,
+ (unsigned long long) xlrec.endTruncMemb,
+ (unsigned long long) MXOffsetToMemberSegment(xlrec.startTruncMemb),
+ (unsigned long long) MXOffsetToMemberSegment(xlrec.endTruncMemb));
/* should not be required, but more than cheap enough */
LWLockAcquire(MultiXactTruncationLock, LW_EXCLUSIVE);
@@ -3407,7 +2844,8 @@ pg_get_multixact_members(PG_FUNCTION_ARGS)
if (mxid < FirstMultiXactId)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
- errmsg("invalid MultiXactId: %llu", (unsigned long long) mxid)));
+ errmsg("invalid MultiXactId: %llu",
+ (unsigned long long) mxid)));
if (SRF_IS_FIRSTCALL())
{
diff --git a/src/backend/access/transam/slru.c b/src/backend/access/transam/slru.c
index 9e765c6c28..c186e177ed 100644
--- a/src/backend/access/transam/slru.c
+++ b/src/backend/access/transam/slru.c
@@ -1428,7 +1428,7 @@ SlruPagePrecedesTestOffset(SlruCtl ctl, int per_page, uint32 offset)
* must not assign.
*/
lhs = per_page + offset; /* skip first page to avoid non-normal XIDs */
- rhs = lhs + (1U << 31);
+ rhs = lhs + (1ULL << 63);
Assert(TransactionIdPrecedes(lhs, rhs));
Assert(TransactionIdPrecedes(rhs, lhs));
Assert(!TransactionIdPrecedes(lhs - 1, rhs));
@@ -1444,13 +1444,14 @@ SlruPagePrecedesTestOffset(SlruCtl ctl, int per_page, uint32 offset)
Assert(ctl->PagePrecedes(rhs / per_page, (lhs - 3 * per_page) / per_page));
Assert(ctl->PagePrecedes(rhs / per_page, (lhs - 2 * per_page) / per_page));
Assert(ctl->PagePrecedes(rhs / per_page, (lhs - 1 * per_page) / per_page)
- || (1U << 31) % per_page != 0); /* See CommitTsPagePrecedes() */
+ || (1ULL << 63) % per_page != 0); /* See CommitTsPagePrecedes() */
Assert(ctl->PagePrecedes((lhs + 1 * per_page) / per_page, rhs / per_page)
- || (1U << 31) % per_page != 0);
+ || (1ULL << 63) % per_page != 0);
Assert(ctl->PagePrecedes((lhs + 2 * per_page) / per_page, rhs / per_page));
Assert(ctl->PagePrecedes((lhs + 3 * per_page) / per_page, rhs / per_page));
Assert(!ctl->PagePrecedes(rhs / per_page, (lhs + per_page) / per_page));
+
/*
* GetNewTransactionId() has assigned the last XID it can safely use, and
* that XID is in the *LAST* page of the second segment. We must not
@@ -1460,7 +1461,7 @@ SlruPagePrecedesTestOffset(SlruCtl ctl, int per_page, uint32 offset)
newestXact = newestPage * per_page + offset;
Assert(newestXact / per_page == newestPage);
oldestXact = newestXact + 1;
- oldestXact -= 1U << 31;
+ oldestXact -= 1ULL << 63;
oldestPage = oldestXact / per_page;
Assert(!SlruMayDeleteSegment(ctl,
(newestPage -
@@ -1476,7 +1477,7 @@ SlruPagePrecedesTestOffset(SlruCtl ctl, int per_page, uint32 offset)
newestXact = newestPage * per_page + offset;
Assert(newestXact / per_page == newestPage);
oldestXact = newestXact + 1;
- oldestXact -= 1U << 31;
+ oldestXact -= 1ULL << 63;
oldestPage = oldestXact / per_page;
Assert(!SlruMayDeleteSegment(ctl,
(newestPage -
@@ -1582,7 +1583,7 @@ SlruScanDirectory(SlruCtl ctl, SlruScanCallback callback, void *data)
if ((len == 12 || len == 13 || len == 14) &&
strspn(clde->d_name, "0123456789ABCDEF") == len)
{
- segno = (int) strtol(clde->d_name, NULL, 16);
+ segno = (int) strtoi64(clde->d_name, NULL, 16);
segpage = segno * SLRU_PAGES_PER_SEGMENT;
elog(DEBUG2, "SlruScanDirectory invoking callback on %s/%s",
diff --git a/src/backend/access/transam/subtrans.c b/src/backend/access/transam/subtrans.c
index e2e20ed06c..2d124d9600 100644
--- a/src/backend/access/transam/subtrans.c
+++ b/src/backend/access/transam/subtrans.c
@@ -212,11 +212,14 @@ void
BootStrapSUBTRANS(void)
{
int slotno;
+ int64 pageno;
+
+ pageno = TransactionIdToPage(XidFromFullTransactionId(ShmemVariableCache->nextXid));
LWLockAcquire(SubtransSLRULock, LW_EXCLUSIVE);
/* Create and zero the first page of the subtrans log */
- slotno = ZeroSUBTRANSPage(0);
+ slotno = ZeroSUBTRANSPage(pageno);
/* Make sure it's written out */
SimpleLruWritePage(SubTransCtl, slotno);
@@ -269,9 +272,6 @@ StartupSUBTRANS(TransactionId oldestActiveXID)
{
(void) ZeroSUBTRANSPage(startPage);
startPage++;
- /* must account for wraparound */
- if (startPage > TransactionIdToPage(MaxTransactionId))
- startPage = 0;
}
(void) ZeroSUBTRANSPage(startPage);
@@ -348,6 +348,7 @@ TruncateSUBTRANS(TransactionId oldestXact)
* a page and oldestXact == next XID. In that case, if we didn't subtract
* one, we'd trigger SimpleLruTruncate's wraparound detection.
*/
+
TransactionIdRetreat(oldestXact);
cutoffPage = TransactionIdToPage(oldestXact);
diff --git a/src/backend/access/transam/transam.c b/src/backend/access/transam/transam.c
index 27410c4697..a2a41e58f2 100644
--- a/src/backend/access/transam/transam.c
+++ b/src/backend/access/transam/transam.c
@@ -274,14 +274,14 @@ TransactionIdPrecedes(TransactionId id1, TransactionId id2)
{
/*
* If either ID is a permanent XID then we can just do unsigned
- * comparison. If both are normal, do a modulo-2^32 comparison.
+ * comparison. If both are normal, do a modulo-2^64 comparison.
*/
- int32 diff;
+ int64 diff;
if (!TransactionIdIsNormal(id1) || !TransactionIdIsNormal(id2))
return (id1 < id2);
- diff = (int32) (id1 - id2);
+ diff = (int64) (id1 - id2);
return (diff < 0);
}
@@ -291,12 +291,12 @@ TransactionIdPrecedes(TransactionId id1, TransactionId id2)
bool
TransactionIdPrecedesOrEquals(TransactionId id1, TransactionId id2)
{
- int32 diff;
+ int64 diff;
if (!TransactionIdIsNormal(id1) || !TransactionIdIsNormal(id2))
return (id1 <= id2);
- diff = (int32) (id1 - id2);
+ diff = (int64) (id1 - id2);
return (diff <= 0);
}
@@ -306,12 +306,12 @@ TransactionIdPrecedesOrEquals(TransactionId id1, TransactionId id2)
bool
TransactionIdFollows(TransactionId id1, TransactionId id2)
{
- int32 diff;
+ int64 diff;
if (!TransactionIdIsNormal(id1) || !TransactionIdIsNormal(id2))
return (id1 > id2);
- diff = (int32) (id1 - id2);
+ diff = (int64) (id1 - id2);
return (diff > 0);
}
@@ -321,12 +321,12 @@ TransactionIdFollows(TransactionId id1, TransactionId id2)
bool
TransactionIdFollowsOrEquals(TransactionId id1, TransactionId id2)
{
- int32 diff;
+ int64 diff;
if (!TransactionIdIsNormal(id1) || !TransactionIdIsNormal(id2))
return (id1 >= id2);
- diff = (int32) (id1 - id2);
+ diff = (int64) (id1 - id2);
return (diff >= 0);
}
diff --git a/src/backend/access/transam/twophase.c b/src/backend/access/transam/twophase.c
index 4b81dfee16..0732393539 100644
--- a/src/backend/access/transam/twophase.c
+++ b/src/backend/access/transam/twophase.c
@@ -476,8 +476,8 @@ MarkAsPreparingGuts(GlobalTransaction gxact, TransactionId xid, const char *gid,
proc->lxid = xid;
proc->backendId = InvalidBackendId;
}
- proc->xid = xid;
- Assert(proc->xmin == InvalidTransactionId);
+ pg_atomic_write_u64(&proc->xid, xid);
+ Assert(pg_atomic_read_u64(&proc->xmin) == InvalidTransactionId);
proc->delayChkptFlags = 0;
proc->statusFlags = 0;
proc->pid = 0;
@@ -792,7 +792,7 @@ pg_prepared_xact(PG_FUNCTION_ARGS)
* Form tuple with appropriate data.
*/
- values[0] = TransactionIdGetDatum(proc->xid);
+ values[0] = TransactionIdGetDatum(pg_atomic_read_u64(&proc->xid));
values[1] = CStringGetTextDatum(gxact->gid);
values[2] = TimestampTzGetDatum(gxact->prepared_at);
values[3] = ObjectIdGetDatum(gxact->owner);
@@ -943,7 +943,7 @@ TwoPhaseGetDummyProc(TransactionId xid, bool lock_held)
/************************************************************************/
#define TwoPhaseFilePath(path, xid) \
- snprintf(path, MAXPGPATH, TWOPHASE_DIR "/%08X", xid)
+ snprintf(path, MAXPGPATH, TWOPHASE_DIR "/%016llX", (unsigned long long) xid)
/*
* 2PC state file format:
@@ -1882,13 +1882,13 @@ restoreTwoPhaseData(void)
cldir = AllocateDir(TWOPHASE_DIR);
while ((clde = ReadDir(cldir, TWOPHASE_DIR)) != NULL)
{
- if (strlen(clde->d_name) == 8 &&
- strspn(clde->d_name, "0123456789ABCDEF") == 8)
+ if (strlen(clde->d_name) == 16 &&
+ strspn(clde->d_name, "0123456789ABCDEF") == 16)
{
TransactionId xid;
char *buf;
- xid = (TransactionId) strtoul(clde->d_name, NULL, 16);
+ xid = (TransactionId) strtou64(clde->d_name, NULL, 16);
buf = ProcessTwoPhaseBuffer(xid, InvalidXLogRecPtr,
true, false, false);
@@ -2220,7 +2220,6 @@ ProcessTwoPhaseBuffer(TransactionId xid,
if (fromdisk)
{
- /* Read and validate file */
buf = ReadTwoPhaseFile(xid, false);
}
else
diff --git a/src/backend/access/transam/varsup.c b/src/backend/access/transam/varsup.c
index 849a7ce9d6..53c79d9a31 100644
--- a/src/backend/access/transam/varsup.c
+++ b/src/backend/access/transam/varsup.c
@@ -66,9 +66,9 @@ GetNewTransactionId(bool isSubXact)
if (IsBootstrapProcessingMode())
{
Assert(!isSubXact);
- MyProc->xid = BootstrapTransactionId;
- ProcGlobal->xids[MyProc->pgxactoff] = BootstrapTransactionId;
- return FullTransactionIdFromEpochAndXid(0, BootstrapTransactionId);
+ pg_atomic_write_u64(&MyProc->xid, BootstrapTransactionId);
+ pg_atomic_write_u64(&ProcGlobal->xids[MyProc->pgxactoff], BootstrapTransactionId);
+ return FullTransactionIdFromXid(BootstrapTransactionId);
}
/* safety check, we should never get this far in a HS standby */
@@ -102,11 +102,6 @@ GetNewTransactionId(bool isSubXact)
* possibility of deadlock while doing get_database_name(). First,
* copy all the shared values we'll need in this path.
*/
- TransactionId xidWarnLimit = ShmemVariableCache->xidWarnLimit;
- TransactionId xidStopLimit = ShmemVariableCache->xidStopLimit;
- TransactionId xidWrapLimit = ShmemVariableCache->xidWrapLimit;
- Oid oldest_datoid = ShmemVariableCache->oldestXidDB;
-
LWLockRelease(XidGenLock);
/*
@@ -117,48 +112,6 @@ GetNewTransactionId(bool isSubXact)
if (IsUnderPostmaster && (xid % 65536) == 0)
SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER);
- if (IsUnderPostmaster &&
- TransactionIdFollowsOrEquals(xid, xidStopLimit))
- {
- char *oldest_datname = get_database_name(oldest_datoid);
-
- /* complain even if that DB has disappeared */
- if (oldest_datname)
- ereport(ERROR,
- (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
- errmsg("database is not accepting commands to avoid wraparound data loss in database \"%s\"",
- oldest_datname),
- errhint("Stop the postmaster and vacuum that database in single-user mode.\n"
- "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
- else
- ereport(ERROR,
- (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
- errmsg("database is not accepting commands to avoid wraparound data loss in database with OID %u",
- oldest_datoid),
- errhint("Stop the postmaster and vacuum that database in single-user mode.\n"
- "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
- }
- else if (TransactionIdFollowsOrEquals(xid, xidWarnLimit))
- {
- char *oldest_datname = get_database_name(oldest_datoid);
-
- /* complain even if that DB has disappeared */
- if (oldest_datname)
- ereport(WARNING,
- (errmsg("database \"%s\" must be vacuumed within %u transactions",
- oldest_datname,
- xidWrapLimit - xid),
- errhint("To avoid a database shutdown, execute a database-wide VACUUM in that database.\n"
- "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
- else
- ereport(WARNING,
- (errmsg("database with OID %u must be vacuumed within %u transactions",
- oldest_datoid,
- xidWrapLimit - xid),
- errhint("To avoid a database shutdown, execute a database-wide VACUUM in that database.\n"
- "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
- }
-
/* Re-acquire lock and start over */
LWLockAcquire(XidGenLock, LW_EXCLUSIVE);
full_xid = ShmemVariableCache->nextXid;
@@ -228,8 +181,8 @@ GetNewTransactionId(bool isSubXact)
Assert(!MyProc->subxidStatus.overflowed);
/* LWLockRelease acts as barrier */
- MyProc->xid = xid;
- ProcGlobal->xids[MyProc->pgxactoff] = xid;
+ pg_atomic_write_u64(&MyProc->xid, xid);
+ pg_atomic_write_u64(&ProcGlobal->xids[MyProc->pgxactoff], xid);
}
else
{
@@ -270,7 +223,7 @@ ReadNextFullTransactionId(void)
}
/*
- * Advance nextXid to the value after a given xid. The epoch is inferred.
+ * Advance nextXid to the value after a given xid.
* This must only be called during recovery or from two-phase start-up code.
*/
void
@@ -278,7 +231,6 @@ AdvanceNextFullTransactionIdPastXid(TransactionId xid)
{
FullTransactionId newNextFullXid;
TransactionId next_xid;
- uint32 epoch;
/*
* It is safe to read nextXid without a lock, because this is only called
@@ -292,19 +244,9 @@ AdvanceNextFullTransactionIdPastXid(TransactionId xid)
if (!TransactionIdFollowsOrEquals(xid, next_xid))
return;
- /*
- * Compute the FullTransactionId that comes after the given xid. To do
- * this, we preserve the existing epoch, but detect when we've wrapped
- * into a new epoch. This is necessary because WAL records and 2PC state
- * currently contain 32 bit xids. The wrap logic is safe in those cases
- * because the span of active xids cannot exceed one epoch at any given
- * point in the WAL stream.
- */
+ /* Compute the FullTransactionId that comes after the given xid. */
TransactionIdAdvance(xid);
- epoch = EpochFromFullTransactionId(ShmemVariableCache->nextXid);
- if (unlikely(xid < next_xid))
- ++epoch;
- newNextFullXid = FullTransactionIdFromEpochAndXid(epoch, xid);
+ newNextFullXid = FullTransactionIdFromXid(xid);
/*
* We still need to take a lock to modify the value when there are
@@ -345,54 +287,10 @@ void
SetTransactionIdLimit(TransactionId oldest_datfrozenxid, Oid oldest_datoid)
{
TransactionId xidVacLimit;
- TransactionId xidWarnLimit;
- TransactionId xidStopLimit;
- TransactionId xidWrapLimit;
TransactionId curXid;
Assert(TransactionIdIsNormal(oldest_datfrozenxid));
- /*
- * The place where we actually get into deep trouble is halfway around
- * from the oldest potentially-existing XID. (This calculation is
- * probably off by one or two counts, because the special XIDs reduce the
- * size of the loop a little bit. But we throw in plenty of slop below,
- * so it doesn't matter.)
- */
- xidWrapLimit = oldest_datfrozenxid + (MaxTransactionId >> 1);
- if (xidWrapLimit < FirstNormalTransactionId)
- xidWrapLimit += FirstNormalTransactionId;
-
- /*
- * We'll refuse to continue assigning XIDs in interactive mode once we get
- * within 3M transactions of data loss. This leaves lots of room for the
- * DBA to fool around fixing things in a standalone backend, while not
- * being significant compared to total XID space. (VACUUM requires an XID
- * if it truncates at wal_level!=minimal. "VACUUM (ANALYZE)", which a DBA
- * might do by reflex, assigns an XID. Hence, we had better be sure
- * there's lots of XIDs left...) Also, at default BLCKSZ, this leaves two
- * completely-idle segments. In the event of edge-case bugs involving
- * page or segment arithmetic, idle segments render the bugs unreachable
- * outside of single-user mode.
- */
- xidStopLimit = xidWrapLimit - 3000000;
- if (xidStopLimit < FirstNormalTransactionId)
- xidStopLimit -= FirstNormalTransactionId;
-
- /*
- * We'll start complaining loudly when we get within 40M transactions of
- * data loss. This is kind of arbitrary, but if you let your gas gauge
- * get down to 2% of full, would you be looking for the next gas station?
- * We need to be fairly liberal about this number because there are lots
- * of scenarios where most transactions are done by automatic clients that
- * won't pay attention to warnings. (No, we're not gonna make this
- * configurable. If you know enough to configure it, you know enough to
- * not get in this kind of trouble in the first place.)
- */
- xidWarnLimit = xidWrapLimit - 40000000;
- if (xidWarnLimit < FirstNormalTransactionId)
- xidWarnLimit -= FirstNormalTransactionId;
-
/*
* We'll start trying to force autovacuums when oldest_datfrozenxid gets
* to be more than autovacuum_freeze_max_age transactions old.
@@ -416,18 +314,10 @@ SetTransactionIdLimit(TransactionId oldest_datfrozenxid, Oid oldest_datoid)
LWLockAcquire(XidGenLock, LW_EXCLUSIVE);
ShmemVariableCache->oldestXid = oldest_datfrozenxid;
ShmemVariableCache->xidVacLimit = xidVacLimit;
- ShmemVariableCache->xidWarnLimit = xidWarnLimit;
- ShmemVariableCache->xidStopLimit = xidStopLimit;
- ShmemVariableCache->xidWrapLimit = xidWrapLimit;
ShmemVariableCache->oldestXidDB = oldest_datoid;
curXid = XidFromFullTransactionId(ShmemVariableCache->nextXid);
LWLockRelease(XidGenLock);
- /* Log the info */
- ereport(DEBUG1,
- (errmsg_internal("transaction ID wrap limit is %u, limited by database with OID %u",
- xidWrapLimit, oldest_datoid)));
-
/*
* If past the autovacuum force point, immediately signal an autovac
* request. The reason for this is that autovac only processes one
@@ -438,41 +328,6 @@ SetTransactionIdLimit(TransactionId oldest_datfrozenxid, Oid oldest_datoid)
if (TransactionIdFollowsOrEquals(curXid, xidVacLimit) &&
IsUnderPostmaster && !InRecovery)
SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER);
-
- /* Give an immediate warning if past the wrap warn point */
- if (TransactionIdFollowsOrEquals(curXid, xidWarnLimit) && !InRecovery)
- {
- char *oldest_datname;
-
- /*
- * We can be called when not inside a transaction, for example during
- * StartupXLOG(). In such a case we cannot do database access, so we
- * must just report the oldest DB's OID.
- *
- * Note: it's also possible that get_database_name fails and returns
- * NULL, for example because the database just got dropped. We'll
- * still warn, even though the warning might now be unnecessary.
- */
- if (IsTransactionState())
- oldest_datname = get_database_name(oldest_datoid);
- else
- oldest_datname = NULL;
-
- if (oldest_datname)
- ereport(WARNING,
- (errmsg("database \"%s\" must be vacuumed within %u transactions",
- oldest_datname,
- xidWrapLimit - curXid),
- errhint("To avoid a database shutdown, execute a database-wide VACUUM in that database.\n"
- "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
- else
- ereport(WARNING,
- (errmsg("database with OID %u must be vacuumed within %u transactions",
- oldest_datoid,
- xidWrapLimit - curXid),
- errhint("To avoid a database shutdown, execute a database-wide VACUUM in that database.\n"
- "You might also need to commit or roll back old prepared transactions, or drop stale replication slots.")));
- }
}
diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c
index a5116d10b1..8a741b48f8 100644
--- a/src/backend/access/transam/xact.c
+++ b/src/backend/access/transam/xact.c
@@ -402,7 +402,6 @@ IsAbortedTransactionBlockState(void)
return false;
}
-
/*
* GetTopTransactionId
*
@@ -5673,6 +5672,17 @@ XactLogCommitRecord(TimestampTz commit_time,
xl_subxacts.nsubxacts = nsubxacts;
}
+ if (TransactionIdIsValid(twophase_xid))
+ {
+ xl_xinfo.xinfo |= XACT_XINFO_HAS_TWOPHASE;
+ xl_twophase.xid_lo = (uint32) (twophase_xid & 0xFFFFFFFF);
+ xl_twophase.xid_hi = (uint32) (twophase_xid >> 32);
+ Assert(twophase_gid != NULL);
+
+ if (XLogLogicalInfoActive())
+ xl_xinfo.xinfo |= XACT_XINFO_HAS_GID;
+ }
+
if (nrels > 0)
{
xl_xinfo.xinfo |= XACT_XINFO_HAS_RELFILELOCATORS;
@@ -5692,16 +5702,6 @@ XactLogCommitRecord(TimestampTz commit_time,
xl_invals.nmsgs = nmsgs;
}
- if (TransactionIdIsValid(twophase_xid))
- {
- xl_xinfo.xinfo |= XACT_XINFO_HAS_TWOPHASE;
- xl_twophase.xid = twophase_xid;
- Assert(twophase_gid != NULL);
-
- if (XLogLogicalInfoActive())
- xl_xinfo.xinfo |= XACT_XINFO_HAS_GID;
- }
-
/* dump transaction origin information */
if (replorigin_session_origin != InvalidRepOriginId)
{
@@ -5822,6 +5822,17 @@ XactLogAbortRecord(TimestampTz abort_time,
xl_subxacts.nsubxacts = nsubxacts;
}
+ if (TransactionIdIsValid(twophase_xid))
+ {
+ xl_xinfo.xinfo |= XACT_XINFO_HAS_TWOPHASE;
+ xl_twophase.xid_lo = (uint32) (twophase_xid & 0xFFFFFFFF);
+ xl_twophase.xid_hi = (uint32) (twophase_xid >> 32);
+ Assert(twophase_gid != NULL);
+
+ if (XLogLogicalInfoActive())
+ xl_xinfo.xinfo |= XACT_XINFO_HAS_GID;
+ }
+
if (nrels > 0)
{
xl_xinfo.xinfo |= XACT_XINFO_HAS_RELFILELOCATORS;
@@ -5838,7 +5849,8 @@ XactLogAbortRecord(TimestampTz abort_time,
if (TransactionIdIsValid(twophase_xid))
{
xl_xinfo.xinfo |= XACT_XINFO_HAS_TWOPHASE;
- xl_twophase.xid = twophase_xid;
+ xl_twophase.xid_lo = (uint32) (twophase_xid & 0xFFFFFFFF);
+ xl_twophase.xid_hi = (uint32) (twophase_xid >> 32);
Assert(twophase_gid != NULL);
if (XLogLogicalInfoActive())
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 3c74f1502a..b3ac2393eb 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -4714,8 +4714,8 @@ BootStrapXLOG(void)
checkPoint.PrevTimeLineID = BootstrapTimeLineID;
checkPoint.fullPageWrites = fullPageWrites;
checkPoint.nextXid =
- FullTransactionIdFromEpochAndXid(0, Max(FirstNormalTransactionId,
- start_xid));
+ FullTransactionIdFromXid(Max(FirstNormalTransactionId,
+ start_xid));
checkPoint.nextOid = FirstGenbkiObjectId;
checkPoint.nextMulti = Max(FirstMultiXactId, start_mxid);
checkPoint.nextMultiOffset = start_mxoff;
@@ -6814,7 +6814,7 @@ CreateCheckPoint(int flags)
UpdateControlFile();
LWLockRelease(ControlFileLock);
- /* Update shared-memory copy of checkpoint XID/epoch */
+ /* Update shared-memory copy of checkpoint XID/base */
SpinLockAcquire(&XLogCtl->info_lck);
XLogCtl->ckptFullXid = checkPoint.nextXid;
SpinLockRelease(&XLogCtl->info_lck);
@@ -7840,7 +7840,7 @@ xlog_redo(XLogReaderState *record)
ControlFile->checkPointCopy.nextXid = checkPoint.nextXid;
LWLockRelease(ControlFileLock);
- /* Update shared-memory copy of checkpoint XID/epoch */
+ /* Update shared-memory copy of checkpoint XID/base */
SpinLockAcquire(&XLogCtl->info_lck);
XLogCtl->ckptFullXid = checkPoint.nextXid;
SpinLockRelease(&XLogCtl->info_lck);
@@ -7901,7 +7901,7 @@ xlog_redo(XLogReaderState *record)
ControlFile->checkPointCopy.nextXid = checkPoint.nextXid;
LWLockRelease(ControlFileLock);
- /* Update shared-memory copy of checkpoint XID/epoch */
+ /* Update shared-memory copy of checkpoint XID/base */
SpinLockAcquire(&XLogCtl->info_lck);
XLogCtl->ckptFullXid = checkPoint.nextXid;
SpinLockRelease(&XLogCtl->info_lck);
diff --git a/src/backend/access/transam/xloginsert.c b/src/backend/access/transam/xloginsert.c
index 5ca15ebbf2..fbeec030f9 100644
--- a/src/backend/access/transam/xloginsert.c
+++ b/src/backend/access/transam/xloginsert.c
@@ -260,6 +260,11 @@ XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags)
BufferGetTag(buffer, ®buf->rlocator, ®buf->forkno, ®buf->block);
regbuf->page = BufferGetPage(buffer);
regbuf->flags = flags;
+ if (IsBufferConverted(buffer))
+ {
+ regbuf->flags |= REGBUF_CONVERTED;
+ MarkBufferConverted(buffer, false);
+ }
regbuf->rdata_tail = (XLogRecData *) ®buf->rdata_head;
regbuf->rdata_len = 0;
@@ -583,6 +588,8 @@ XLogRecordAssemble(RmgrId rmid, uint8 info,
needs_backup = true;
else if (regbuf->flags & REGBUF_NO_IMAGE)
needs_backup = false;
+ else if (regbuf->flags & REGBUF_CONVERTED)
+ needs_backup = true;
else if (!doPageWrites)
needs_backup = false;
else
diff --git a/src/backend/access/transam/xlogreader.c b/src/backend/access/transam/xlogreader.c
index 5a8fe81f82..f5f9f2cb18 100644
--- a/src/backend/access/transam/xlogreader.c
+++ b/src/backend/access/transam/xlogreader.c
@@ -2144,37 +2144,3 @@ RestoreBlockImage(XLogReaderState *record, uint8 block_id, char *page)
return true;
}
-
-#ifndef FRONTEND
-
-/*
- * Extract the FullTransactionId from a WAL record.
- */
-FullTransactionId
-XLogRecGetFullXid(XLogReaderState *record)
-{
- TransactionId xid,
- next_xid;
- uint32 epoch;
-
- /*
- * This function is only safe during replay, because it depends on the
- * replay state. See AdvanceNextFullTransactionIdPastXid() for more.
- */
- Assert(AmStartupProcess() || !IsUnderPostmaster);
-
- xid = XLogRecGetXid(record);
- next_xid = XidFromFullTransactionId(ShmemVariableCache->nextXid);
- epoch = EpochFromFullTransactionId(ShmemVariableCache->nextXid);
-
- /*
- * If xid is numerically greater than next_xid, it has to be from the last
- * epoch.
- */
- if (unlikely(xid > next_xid))
- --epoch;
-
- return FullTransactionIdFromEpochAndXid(epoch, xid);
-}
-
-#endif
diff --git a/src/backend/access/transam/xlogrecovery.c b/src/backend/access/transam/xlogrecovery.c
index e83c7dded9..5a72d24f6f 100644
--- a/src/backend/access/transam/xlogrecovery.c
+++ b/src/backend/access/transam/xlogrecovery.c
@@ -816,7 +816,7 @@ InitWalRecovery(ControlFileData *ControlFile, bool *wasShutdown_ptr,
wasShutdown ? "true" : "false")));
ereport(DEBUG1,
(errmsg_internal("next transaction ID: " UINT64_FORMAT "; next OID: %u",
- U64FromFullTransactionId(checkPoint.nextXid),
+ XidFromFullTransactionId(checkPoint.nextXid),
checkPoint.nextOid)));
ereport(DEBUG1,
(errmsg_internal("next MultiXactId: %llu; next MultiXactOffset: %llu",
diff --git a/src/backend/bootstrap/bootstrap.c b/src/backend/bootstrap/bootstrap.c
index a6edfcda32..af486d324b 100644
--- a/src/backend/bootstrap/bootstrap.c
+++ b/src/backend/bootstrap/bootstrap.c
@@ -120,7 +120,7 @@ static const struct typinfo TypInfo[] = {
F_OIDIN, F_OIDOUT},
{"tid", TIDOID, 0, 6, false, TYPALIGN_SHORT, TYPSTORAGE_PLAIN, InvalidOid,
F_TIDIN, F_TIDOUT},
- {"xid", XIDOID, 0, 4, true, TYPALIGN_INT, TYPSTORAGE_PLAIN, InvalidOid,
+ {"xid", XIDOID, 0, 8, FLOAT8PASSBYVAL, TYPALIGN_XID, TYPSTORAGE_PLAIN, InvalidOid,
F_XIDIN, F_XIDOUT},
{"cid", CIDOID, 0, 4, true, TYPALIGN_INT, TYPSTORAGE_PLAIN, InvalidOid,
F_CIDIN, F_CIDOUT},
@@ -252,15 +252,13 @@ BootstrapModeMain(int argc, char *argv[], bool check_only)
break;
case 'm':
{
- unsigned long value;
- char *endptr;
+ char *endptr;
errno = 0;
- value = strtoul(optarg, &endptr, 0);
- start_mxid = value;
+ start_mxid = strtoull(optarg, &endptr, 0);
if (endptr == optarg || *endptr != '\0' || errno != 0 ||
- value != start_mxid) /* overflow */
+ !StartMultiXactIdIsValid(start_mxid))
{
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
@@ -270,15 +268,13 @@ BootstrapModeMain(int argc, char *argv[], bool check_only)
break;
case 'o':
{
- unsigned long value;
- char *endptr;
+ char *endptr;
errno = 0;
- value = strtoul(optarg, &endptr, 0);
- start_mxoff = value;
+ start_mxoff = strtoull(optarg, &endptr, 0);
if (endptr == optarg || *endptr != '\0' || errno != 0 ||
- value != start_mxoff) /* overflow */
+ !StartMultiXactOffsetIsValid(start_mxoff))
{
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
@@ -303,15 +299,13 @@ BootstrapModeMain(int argc, char *argv[], bool check_only)
break;
case 'x':
{
- unsigned long value;
- char *endptr;
+ char *endptr;
errno = 0;
- value = strtoul(optarg, &endptr, 0);
- start_xid = value;
+ start_xid = strtoull(optarg, &endptr, 0);
if (endptr == optarg || *endptr != '\0' || errno != 0 ||
- value != start_xid) /* overflow */
+ !StartTransactionIdIsValid(start_xid))
{
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c
index 5b49cc5a09..694c2be546 100644
--- a/src/backend/catalog/heap.c
+++ b/src/backend/catalog/heap.c
@@ -159,8 +159,8 @@ static const FormData_pg_attribute a2 = {
.attnum = MinTransactionIdAttributeNumber,
.attcacheoff = -1,
.atttypmod = -1,
- .attbyval = true,
- .attalign = TYPALIGN_INT,
+ .attbyval = FLOAT8PASSBYVAL,
+ .attalign = TYPALIGN_XID,
.attstorage = TYPSTORAGE_PLAIN,
.attnotnull = true,
.attislocal = true,
@@ -187,8 +187,8 @@ static const FormData_pg_attribute a4 = {
.attnum = MaxTransactionIdAttributeNumber,
.attcacheoff = -1,
.atttypmod = -1,
- .attbyval = true,
- .attalign = TYPALIGN_INT,
+ .attbyval = FLOAT8PASSBYVAL,
+ .attalign = TYPALIGN_XID,
.attstorage = TYPSTORAGE_PLAIN,
.attnotnull = true,
.attislocal = true,
diff --git a/src/backend/catalog/pg_inherits.c b/src/backend/catalog/pg_inherits.c
index 92afbc2f25..3a1eda413f 100644
--- a/src/backend/catalog/pg_inherits.c
+++ b/src/backend/catalog/pg_inherits.c
@@ -146,7 +146,7 @@ find_inheritance_children_extended(Oid parentrelId, bool omit_detached,
TransactionId xmin;
Snapshot snap;
- xmin = HeapTupleHeaderGetXmin(inheritsTuple->t_data);
+ xmin = HeapTupleGetXmin(inheritsTuple);
snap = GetActiveSnapshot();
if (!XidInMVCCSnapshot(xmin, snap))
diff --git a/src/backend/commands/async.c b/src/backend/commands/async.c
index 056dca8e47..56295b9aa6 100644
--- a/src/backend/commands/async.c
+++ b/src/backend/commands/async.c
@@ -187,7 +187,7 @@ typedef struct AsyncQueueEntry
} AsyncQueueEntry;
/* Currently, no field of AsyncQueueEntry requires more than int alignment */
-#define QUEUEALIGN(len) INTALIGN(len)
+#define QUEUEALIGN(len) TYPEALIGN(8, len)
#define AsyncQueueEntryEmptySize (offsetof(AsyncQueueEntry, data) + 2)
diff --git a/src/backend/commands/dbcommands.c b/src/backend/commands/dbcommands.c
index 96b46cbc02..44fd9efb23 100644
--- a/src/backend/commands/dbcommands.c
+++ b/src/backend/commands/dbcommands.c
@@ -130,7 +130,8 @@ static void CreateDatabaseUsingWalLog(Oid src_dboid, Oid dst_dboid, Oid src_tsid
static List *ScanSourceDatabasePgClass(Oid tbid, Oid dbid, char *srcpath);
static List *ScanSourceDatabasePgClassPage(Page page, Buffer buf, Oid tbid,
Oid dbid, char *srcpath,
- List *rlocatorlist, Snapshot snapshot);
+ List *rlocatorlist, Snapshot snapshot,
+ bool is_toast);
static CreateDBRelInfo *ScanSourceDatabasePgClassTuple(HeapTupleData *tuple,
Oid tbid, Oid dbid,
char *srcpath);
@@ -308,9 +309,10 @@ ScanSourceDatabasePgClass(Oid tbid, Oid dbid, char *srcpath)
}
/* Append relevant pg_class tuples for current page to rlocatorlist. */
+ /* No toast is expected in sys tables */
rlocatorlist = ScanSourceDatabasePgClassPage(page, buf, tbid, dbid,
srcpath, rlocatorlist,
- snapshot);
+ snapshot, false);
UnlockReleaseBuffer(buf);
}
@@ -328,7 +330,7 @@ ScanSourceDatabasePgClass(Oid tbid, Oid dbid, char *srcpath)
static List *
ScanSourceDatabasePgClassPage(Page page, Buffer buf, Oid tbid, Oid dbid,
char *srcpath, List *rlocatorlist,
- Snapshot snapshot)
+ Snapshot snapshot, bool is_toast)
{
BlockNumber blkno = BufferGetBlockNumber(buf);
OffsetNumber offnum;
@@ -358,6 +360,7 @@ ScanSourceDatabasePgClassPage(Page page, Buffer buf, Oid tbid, Oid dbid,
tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
tuple.t_len = ItemIdGetLength(itemid);
tuple.t_tableOid = RelationRelationId;
+ HeapTupleCopyBaseFromPage(buf, &tuple, page, is_toast);
/* Skip tuples that are not visible to this snapshot. */
if (HeapTupleSatisfiesVisibility(&tuple, snapshot, buf))
diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c
index fd56066c13..f3b19cf188 100644
--- a/src/backend/commands/indexcmds.c
+++ b/src/backend/commands/indexcmds.c
@@ -1656,7 +1656,7 @@ DefineIndex(Oid relationId,
set_indexsafe_procflags();
/* We should now definitely not be advertising any xmin. */
- Assert(MyProc->xmin == InvalidTransactionId);
+ Assert(pg_atomic_read_u64(&MyProc->xmin) == InvalidTransactionId);
/*
* The index is now valid in the sense that it contains all currently
@@ -4339,8 +4339,8 @@ set_indexsafe_procflags(void)
* This should only be called before installing xid or xmin in MyProc;
* otherwise, concurrent processes could see an Xmin that moves backwards.
*/
- Assert(MyProc->xid == InvalidTransactionId &&
- MyProc->xmin == InvalidTransactionId);
+ Assert(pg_atomic_read_u64(&MyProc->xid) == InvalidTransactionId &&
+ pg_atomic_read_u64(&MyProc->xmin) == InvalidTransactionId);
LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
MyProc->statusFlags |= PROC_IN_SAFE_IC;
diff --git a/src/backend/commands/sequence.c b/src/backend/commands/sequence.c
index 99c9f91cba..c72cab394f 100644
--- a/src/backend/commands/sequence.c
+++ b/src/backend/commands/sequence.c
@@ -48,6 +48,23 @@
#include "utils/syscache.h"
#include "utils/varlena.h"
+static inline void
+SeqTupleHeaderSetXmin(HeapTupleHeader htup, TransactionId xid)
+{
+ htup->t_choice.t_heap.t_xmin = xid;
+}
+
+static inline void
+SeqTupleHeaderSetXmax(HeapTupleHeader htup, TransactionId xid)
+{
+ htup->t_choice.t_heap.t_xmax = xid;
+}
+
+static inline TransactionId
+SeqTupleHeaderGetRawXmax(HeapTupleHeader htup)
+{
+ return htup->t_choice.t_heap.t_xmax;
+}
/*
* We don't want to log each fetching of a value from a sequence,
@@ -397,10 +414,10 @@ fill_seq_fork_with_data(Relation rel, HeapTuple tuple, ForkNumber forkNum)
* because if the current transaction aborts, no other xact will ever
* examine the sequence tuple anyway.
*/
- HeapTupleHeaderSetXmin(tuple->t_data, FrozenTransactionId);
+ SeqTupleHeaderSetXmin(tuple->t_data, FrozenTransactionId);
HeapTupleHeaderSetXminFrozen(tuple->t_data);
HeapTupleHeaderSetCmin(tuple->t_data, FirstCommandId);
- HeapTupleHeaderSetXmax(tuple->t_data, InvalidTransactionId);
+ SeqTupleHeaderSetXmax(tuple->t_data, InvalidTransactionId);
tuple->t_data->t_infomask |= HEAP_XMAX_INVALID;
ItemPointerSet(&tuple->t_data->t_ctid, 0, FirstOffsetNumber);
@@ -1232,9 +1249,9 @@ read_seq_tuple(Relation rel, Buffer *buf, HeapTuple seqdatatuple)
* this again if the update gets lost.
*/
Assert(!(seqdatatuple->t_data->t_infomask & HEAP_XMAX_IS_MULTI));
- if (HeapTupleHeaderGetRawXmax(seqdatatuple->t_data) != InvalidTransactionId)
+ if (SeqTupleHeaderGetRawXmax(seqdatatuple->t_data) != InvalidTransactionId)
{
- HeapTupleHeaderSetXmax(seqdatatuple->t_data, InvalidTransactionId);
+ SeqTupleHeaderSetXmax(seqdatatuple->t_data, InvalidTransactionId);
seqdatatuple->t_data->t_infomask &= ~HEAP_XMAX_COMMITTED;
seqdatatuple->t_data->t_infomask |= HEAP_XMAX_INVALID;
MarkBufferDirtyHint(*buf, true);
diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c
index 7ccde07de9..c3d7ae61a9 100644
--- a/src/backend/commands/vacuum.c
+++ b/src/backend/commands/vacuum.c
@@ -63,13 +63,12 @@
/*
* GUC parameters
*/
-int vacuum_freeze_min_age;
-int vacuum_freeze_table_age;
-int vacuum_multixact_freeze_min_age;
-int vacuum_multixact_freeze_table_age;
-int vacuum_failsafe_age;
-int vacuum_multixact_failsafe_age;
-
+int64 vacuum_freeze_min_age;
+int64 vacuum_freeze_table_age;
+int64 vacuum_multixact_freeze_min_age;
+int64 vacuum_multixact_freeze_table_age;
+int64 vacuum_failsafe_age;
+int64 vacuum_multixact_failsafe_age;
/* A few variables that don't seem worth passing around as parameters */
static MemoryContext vac_context = NULL;
@@ -955,10 +954,10 @@ get_all_vacuum_rels(int options)
*/
bool
vacuum_set_xid_limits(Relation rel,
- int freeze_min_age,
- int multixact_freeze_min_age,
- int freeze_table_age,
- int multixact_freeze_table_age,
+ int64 freeze_min_age,
+ int64 multixact_freeze_min_age,
+ int64 freeze_table_age,
+ int64 multixact_freeze_table_age,
TransactionId *oldestXmin,
MultiXactId *oldestMxact,
TransactionId *freezeLimit,
@@ -970,7 +969,7 @@ vacuum_set_xid_limits(Relation rel,
MultiXactId nextMXID,
safeOldestMxact,
aggressiveMXIDCutoff;
- int effective_multixact_freeze_max_age;
+ int64 effective_multixact_freeze_max_age;
/*
* Acquire oldestXmin.
@@ -1038,7 +1037,7 @@ vacuum_set_xid_limits(Relation rel,
* normally autovacuum_multixact_freeze_max_age, but may be less if we are
* short of multixact member space.
*/
- effective_multixact_freeze_max_age = MultiXactMemberFreezeThreshold();
+ effective_multixact_freeze_max_age = autovacuum_multixact_freeze_max_age;
/*
* Determine the minimum multixact freeze age to use: as specified by
@@ -1065,11 +1064,13 @@ vacuum_set_xid_limits(Relation rel,
* held back to an unsafe degree in passing
*/
safeOldestXmin = nextXID - autovacuum_freeze_max_age;
- if (!TransactionIdIsNormal(safeOldestXmin))
+ if (nextXID > FirstNormalTransactionId + autovacuum_freeze_max_age)
safeOldestXmin = FirstNormalTransactionId;
+
safeOldestMxact = nextMXID - effective_multixact_freeze_max_age;
if (safeOldestMxact < FirstMultiXactId)
safeOldestMxact = FirstMultiXactId;
+
if (TransactionIdPrecedes(*oldestXmin, safeOldestXmin))
ereport(WARNING,
(errmsg("cutoff for removing and freezing tuples is far in the past"),
@@ -1378,6 +1379,9 @@ vac_update_relstats(Relation relation,
futurexid = false;
if (frozenxid_updated)
*frozenxid_updated = false;
+
+ Assert(TransactionIdPrecedesOrEquals(frozenxid, ReadNextTransactionId()));
+
if (TransactionIdIsNormal(frozenxid) && oldfrozenxid != frozenxid)
{
bool update = false;
@@ -1401,6 +1405,9 @@ vac_update_relstats(Relation relation,
futuremxid = false;
if (minmulti_updated)
*minmulti_updated = false;
+
+ Assert(MultiXactIdPrecedesOrEquals(minmulti, ReadNextMultiXactId()));
+
if (MultiXactIdIsValid(minmulti) && oldminmulti != minmulti)
{
bool update = false;
@@ -1428,14 +1435,16 @@ vac_update_relstats(Relation relation,
if (futurexid)
ereport(WARNING,
(errcode(ERRCODE_DATA_CORRUPTED),
- errmsg_internal("overwrote invalid relfrozenxid value %u with new value %u for table \"%s\"",
- oldfrozenxid, frozenxid,
+ errmsg_internal("overwrote invalid relfrozenxid value %llu with new value %llu for table \"%s\"",
+ (unsigned long long) oldfrozenxid,
+ (unsigned long long) frozenxid,
RelationGetRelationName(relation))));
if (futuremxid)
ereport(WARNING,
(errcode(ERRCODE_DATA_CORRUPTED),
- errmsg_internal("overwrote invalid relminmxid value %u with new value %u for table \"%s\"",
- oldminmulti, minmulti,
+ errmsg_internal("overwrote invalid relminmxid value %llu with new value %llu for table \"%s\"",
+ (unsigned long long) oldminmulti,
+ (unsigned long long) minmulti,
RelationGetRelationName(relation))));
}
diff --git a/src/backend/executor/execExprInterp.c b/src/backend/executor/execExprInterp.c
index 9b9bbf00a9..2f37cfed60 100644
--- a/src/backend/executor/execExprInterp.c
+++ b/src/backend/executor/execExprInterp.c
@@ -3180,6 +3180,7 @@ ExecEvalFieldStoreDeForm(ExprState *state, ExprEvalStep *op, ExprContext *econte
tmptup.t_len = HeapTupleHeaderGetDatumLength(tuphdr);
ItemPointerSetInvalid(&(tmptup.t_self));
tmptup.t_tableOid = InvalidOid;
+ HeapTupleSetZeroBase(&tmptup);
tmptup.t_data = tuphdr;
heap_deform_tuple(&tmptup, tupDesc,
diff --git a/src/backend/executor/execUtils.c b/src/backend/executor/execUtils.c
index 9df1f81ea8..d8c92f4846 100644
--- a/src/backend/executor/execUtils.c
+++ b/src/backend/executor/execUtils.c
@@ -1070,6 +1070,7 @@ GetAttributeByName(HeapTupleHeader tuple, const char *attname, bool *isNull)
tmptup.t_len = HeapTupleHeaderGetDatumLength(tuple);
ItemPointerSetInvalid(&(tmptup.t_self));
tmptup.t_tableOid = InvalidOid;
+ HeapTupleSetZeroBase(&tmptup);
tmptup.t_data = tuple;
result = heap_getattr(&tmptup,
diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c
index 04454ad6e6..bbb060fdb6 100644
--- a/src/backend/executor/nodeModifyTable.c
+++ b/src/backend/executor/nodeModifyTable.c
@@ -3678,6 +3678,7 @@ ExecModifyTable(PlanState *pstate)
HeapTupleHeaderGetDatumLength(oldtupdata.t_data);
ItemPointerSetInvalid(&(oldtupdata.t_self));
/* Historically, view triggers see invalid t_tableOid. */
+ HeapTupleCopyHeaderXids(&oldtupdata);
oldtupdata.t_tableOid =
(relkind == RELKIND_VIEW) ? InvalidOid :
RelationGetRelid(resultRelInfo->ri_RelationDesc);
diff --git a/src/backend/executor/spi.c b/src/backend/executor/spi.c
index fd5796f1b9..26b60cc77e 100644
--- a/src/backend/executor/spi.c
+++ b/src/backend/executor/spi.c
@@ -1154,6 +1154,7 @@ SPI_modifytuple(Relation rel, HeapTuple tuple, int natts, int *attnum,
mtuple->t_data->t_ctid = tuple->t_data->t_ctid;
mtuple->t_self = tuple->t_self;
mtuple->t_tableOid = tuple->t_tableOid;
+ HeapTupleCopyBase(mtuple, tuple);
}
else
{
diff --git a/src/backend/nodes/gen_node_support.pl b/src/backend/nodes/gen_node_support.pl
index 81b8c184a9..39cf494372 100644
--- a/src/backend/nodes/gen_node_support.pl
+++ b/src/backend/nodes/gen_node_support.pl
@@ -955,13 +955,13 @@ _read${n}(void)
|| $t eq 'bits32'
|| $t eq 'AclMode'
|| $t eq 'BlockNumber'
- || $t eq 'Index'
- || $t eq 'SubTransactionId')
+ || $t eq 'Index')
{
print $off "\tWRITE_UINT_FIELD($f);\n";
print $rff "\tREAD_UINT_FIELD($f);\n" unless $no_read;
}
- elsif ($t eq 'uint64')
+ elsif ($t eq 'uint64'
+ || $t eq 'SubTransactionId')
{
print $off "\tWRITE_UINT64_FIELD($f);\n";
print $rff "\tREAD_UINT64_FIELD($f);\n" unless $no_read;
diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c
index 27e5cdee6f..59f2be3be9 100644
--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@@ -817,7 +817,6 @@ _outConstraint(StringInfo str, const Constraint *node)
}
}
-
/*
* outNode -
* converts a Node into ascii string and append it to 'str'
diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c
index 6d5718ee4c..54697ed00d 100644
--- a/src/backend/optimizer/util/plancat.c
+++ b/src/backend/optimizer/util/plancat.c
@@ -229,7 +229,7 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent,
* src/backend/access/heap/README.HOT for discussion.
*/
if (index->indcheckxmin &&
- !TransactionIdPrecedes(HeapTupleHeaderGetXmin(indexRelation->rd_indextuple->t_data),
+ !TransactionIdPrecedes(HeapTupleGetXmin(indexRelation->rd_indextuple),
TransactionXmin))
{
root->glob->transientPlan = true;
diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c
index 1e90b72b74..1cbdadf792 100644
--- a/src/backend/postmaster/autovacuum.c
+++ b/src/backend/postmaster/autovacuum.c
@@ -123,8 +123,8 @@ int autovacuum_vac_ins_thresh;
double autovacuum_vac_ins_scale;
int autovacuum_anl_thresh;
double autovacuum_anl_scale;
-int autovacuum_freeze_max_age;
-int autovacuum_multixact_freeze_max_age;
+int64 autovacuum_freeze_max_age;
+int64 autovacuum_multixact_freeze_max_age;
double autovacuum_vac_cost_delay;
int autovacuum_vac_cost_limit;
@@ -147,10 +147,10 @@ static TransactionId recentXid;
static MultiXactId recentMulti;
/* Default freeze ages to use for autovacuum (varies by database) */
-static int default_freeze_min_age;
-static int default_freeze_table_age;
-static int default_multixact_freeze_min_age;
-static int default_multixact_freeze_table_age;
+static int64 default_freeze_min_age;
+static int64 default_freeze_table_age;
+static int64 default_multixact_freeze_min_age;
+static int64 default_multixact_freeze_table_age;
/* Memory context for long-lived data */
static MemoryContext AutovacMemCxt;
@@ -326,15 +326,15 @@ static void FreeWorkerInfo(int code, Datum arg);
static autovac_table *table_recheck_autovac(Oid relid, HTAB *table_toast_map,
TupleDesc pg_class_desc,
- int effective_multixact_freeze_max_age);
+ int64 effective_multixact_freeze_max_age);
static void recheck_relation_needs_vacanalyze(Oid relid, AutoVacOpts *avopts,
Form_pg_class classForm,
- int effective_multixact_freeze_max_age,
+ int64 effective_multixact_freeze_max_age,
bool *dovacuum, bool *doanalyze, bool *wraparound);
static void relation_needs_vacanalyze(Oid relid, AutoVacOpts *relopts,
Form_pg_class classForm,
PgStat_StatTabEntry *tabentry,
- int effective_multixact_freeze_max_age,
+ int64 effective_multixact_freeze_max_age,
bool *dovacuum, bool *doanalyze, bool *wraparound);
static void autovacuum_do_vac_analyze(autovac_table *tab,
@@ -1150,6 +1150,7 @@ do_start_worker(void)
ListCell *cell;
TransactionId xidForceLimit;
MultiXactId multiForceLimit;
+ int64 multiMembersThreshold;
bool for_xid_wrap;
bool for_multi_wrap;
avw_dbase *avdb;
@@ -1186,17 +1187,18 @@ do_start_worker(void)
* particular tables, but not loosened.)
*/
recentXid = ReadNextTransactionId();
- xidForceLimit = recentXid - autovacuum_freeze_max_age;
- /* ensure it's a "normal" XID, else TransactionIdPrecedes misbehaves */
- /* this can cause the limit to go backwards by 3, but that's OK */
- if (xidForceLimit < FirstNormalTransactionId)
- xidForceLimit -= FirstNormalTransactionId;
+ if (recentXid > FirstNormalTransactionId + autovacuum_freeze_max_age)
+ xidForceLimit = recentXid - autovacuum_freeze_max_age;
+ else
+ xidForceLimit = FirstNormalTransactionId;
/* Also determine the oldest datminmxid we will consider. */
recentMulti = ReadNextMultiXactId();
- multiForceLimit = recentMulti - MultiXactMemberFreezeThreshold();
- if (multiForceLimit < FirstMultiXactId)
- multiForceLimit -= FirstMultiXactId;
+ multiMembersThreshold = autovacuum_multixact_freeze_max_age;
+ if (recentMulti > FirstMultiXactId + multiMembersThreshold)
+ multiForceLimit = recentMulti - multiMembersThreshold;
+ else
+ multiForceLimit = FirstMultiXactId;
/*
* Choose a database to connect to. We pick the database that was least
@@ -1969,7 +1971,7 @@ do_autovacuum(void)
BufferAccessStrategy bstrategy;
ScanKeyData key;
TupleDesc pg_class_desc;
- int effective_multixact_freeze_max_age;
+ int64 effective_multixact_freeze_max_age;
bool did_vacuum = false;
bool found_concurrent_worker = false;
int i;
@@ -1992,7 +1994,7 @@ do_autovacuum(void)
* normally autovacuum_multixact_freeze_max_age, but may be less if we are
* short of multixact member space.
*/
- effective_multixact_freeze_max_age = MultiXactMemberFreezeThreshold();
+ effective_multixact_freeze_max_age = autovacuum_multixact_freeze_max_age;
/*
* Find the pg_database entry and select the default freeze ages. We use
@@ -2758,7 +2760,7 @@ extract_autovac_opts(HeapTuple tup, TupleDesc pg_class_desc)
static autovac_table *
table_recheck_autovac(Oid relid, HTAB *table_toast_map,
TupleDesc pg_class_desc,
- int effective_multixact_freeze_max_age)
+ int64 effective_multixact_freeze_max_age)
{
Form_pg_class classForm;
HeapTuple classTup;
@@ -2797,10 +2799,10 @@ table_recheck_autovac(Oid relid, HTAB *table_toast_map,
/* OK, it needs something done */
if (doanalyze || dovacuum)
{
- int freeze_min_age;
- int freeze_table_age;
- int multixact_freeze_min_age;
- int multixact_freeze_table_age;
+ int64 freeze_min_age;
+ int64 freeze_table_age;
+ int64 multixact_freeze_min_age;
+ int64 multixact_freeze_table_age;
int vac_cost_limit;
double vac_cost_delay;
int log_min_duration;
@@ -2905,7 +2907,7 @@ static void
recheck_relation_needs_vacanalyze(Oid relid,
AutoVacOpts *avopts,
Form_pg_class classForm,
- int effective_multixact_freeze_max_age,
+ int64 effective_multixact_freeze_max_age,
bool *dovacuum,
bool *doanalyze,
bool *wraparound)
@@ -2967,7 +2969,7 @@ relation_needs_vacanalyze(Oid relid,
AutoVacOpts *relopts,
Form_pg_class classForm,
PgStat_StatTabEntry *tabentry,
- int effective_multixact_freeze_max_age,
+ int64 effective_multixact_freeze_max_age,
/* output params below */
bool *dovacuum,
bool *doanalyze,
@@ -2996,8 +2998,8 @@ relation_needs_vacanalyze(Oid relid,
anltuples;
/* freeze parameters */
- int freeze_max_age;
- int multixact_freeze_max_age;
+ int64 freeze_max_age;
+ int64 multixact_freeze_max_age;
TransactionId xidForceLimit;
MultiXactId multiForceLimit;
@@ -3047,17 +3049,19 @@ relation_needs_vacanalyze(Oid relid,
av_enabled = (relopts ? relopts->enabled : true);
/* Force vacuum if table is at risk of wraparound */
- xidForceLimit = recentXid - freeze_max_age;
- if (xidForceLimit < FirstNormalTransactionId)
- xidForceLimit -= FirstNormalTransactionId;
+ if (recentXid > FirstNormalTransactionId + freeze_max_age)
+ xidForceLimit = recentXid - freeze_max_age;
+ else
+ xidForceLimit = FirstNormalTransactionId;
force_vacuum = (TransactionIdIsNormal(classForm->relfrozenxid) &&
TransactionIdPrecedes(classForm->relfrozenxid,
xidForceLimit));
if (!force_vacuum)
{
- multiForceLimit = recentMulti - multixact_freeze_max_age;
- if (multiForceLimit < FirstMultiXactId)
- multiForceLimit -= FirstMultiXactId;
+ if (recentMulti > FirstMultiXactId + multixact_freeze_max_age)
+ multiForceLimit = recentMulti - multixact_freeze_max_age;
+ else
+ multiForceLimit = FirstMultiXactId;
force_vacuum = MultiXactIdIsValid(classForm->relminmxid) &&
MultiXactIdPrecedes(classForm->relminmxid, multiForceLimit);
}
diff --git a/src/backend/replication/logical/decode.c b/src/backend/replication/logical/decode.c
index 2cc0ac9eb0..dd41f54049 100644
--- a/src/backend/replication/logical/decode.c
+++ b/src/backend/replication/logical/decode.c
@@ -847,8 +847,12 @@ DecodeInsert(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
xl_heap_insert *xlrec;
ReorderBufferChange *change;
RelFileLocator target_locator;
+ bool isinit = (XLogRecGetInfo(r) & XLOG_HEAP_INIT_PAGE) != 0;
+ Pointer rec_data = (Pointer) XLogRecGetData(r);
- xlrec = (xl_heap_insert *) XLogRecGetData(r);
+ if (isinit)
+ rec_data += sizeof(TransactionId);
+ xlrec = (xl_heap_insert *) rec_data;
/*
* Ignore insert records without new tuples (this does happen when
@@ -904,8 +908,12 @@ DecodeUpdate(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
ReorderBufferChange *change;
char *data;
RelFileLocator target_locator;
+ bool isinit = (XLogRecGetInfo(r) & XLOG_HEAP_INIT_PAGE) != 0;
+ Pointer rec_data = (Pointer) XLogRecGetData(r);
- xlrec = (xl_heap_update *) XLogRecGetData(r);
+ if (isinit)
+ rec_data += sizeof(TransactionId);
+ xlrec = (xl_heap_update *) rec_data;
/* only interested in our database */
XLogRecGetBlockTag(r, 0, &target_locator, NULL, NULL);
@@ -1065,8 +1073,12 @@ DecodeMultiInsert(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
char *tupledata;
Size tuplelen;
RelFileLocator rlocator;
+ bool isinit = (XLogRecGetInfo(r) & XLOG_HEAP_INIT_PAGE) != 0;
+ Pointer rec_data = (Pointer) XLogRecGetData(r);
- xlrec = (xl_heap_multi_insert *) XLogRecGetData(r);
+ if (isinit)
+ rec_data += sizeof(TransactionId);
+ xlrec = (xl_heap_multi_insert *) rec_data;
/*
* Ignore insert records without new tuples. This happens when a
diff --git a/src/backend/replication/logical/proto.c b/src/backend/replication/logical/proto.c
index ff8513e2d2..c1c4adad9d 100644
--- a/src/backend/replication/logical/proto.c
+++ b/src/backend/replication/logical/proto.c
@@ -64,7 +64,7 @@ logicalrep_write_begin(StringInfo out, ReorderBufferTXN *txn)
/* fixed fields */
pq_sendint64(out, txn->final_lsn);
pq_sendint64(out, txn->xact_time.commit_time);
- pq_sendint32(out, txn->xid);
+ pq_sendint64(out, txn->xid);
}
/*
@@ -78,7 +78,7 @@ logicalrep_read_begin(StringInfo in, LogicalRepBeginData *begin_data)
if (begin_data->final_lsn == InvalidXLogRecPtr)
elog(ERROR, "final_lsn not set in begin message");
begin_data->committime = pq_getmsgint64(in);
- begin_data->xid = pq_getmsgint(in, 4);
+ begin_data->xid = pq_getmsgint64(in);
}
@@ -132,7 +132,7 @@ logicalrep_write_begin_prepare(StringInfo out, ReorderBufferTXN *txn)
pq_sendint64(out, txn->final_lsn);
pq_sendint64(out, txn->end_lsn);
pq_sendint64(out, txn->xact_time.prepare_time);
- pq_sendint32(out, txn->xid);
+ pq_sendint64(out, txn->xid);
/* send gid */
pq_sendstring(out, txn->gid);
@@ -152,7 +152,7 @@ logicalrep_read_begin_prepare(StringInfo in, LogicalRepPreparedTxnData *begin_da
if (begin_data->end_lsn == InvalidXLogRecPtr)
elog(ERROR, "end_lsn not set in begin prepare message");
begin_data->prepare_time = pq_getmsgint64(in);
- begin_data->xid = pq_getmsgint(in, 4);
+ begin_data->xid = pq_getmsgint64(in);
/* read gid (copy it into a pre-allocated buffer) */
strlcpy(begin_data->gid, pq_getmsgstring(in), sizeof(begin_data->gid));
@@ -185,7 +185,7 @@ logicalrep_write_prepare_common(StringInfo out, LogicalRepMsgType type,
pq_sendint64(out, prepare_lsn);
pq_sendint64(out, txn->end_lsn);
pq_sendint64(out, txn->xact_time.prepare_time);
- pq_sendint32(out, txn->xid);
+ pq_sendint64(out, txn->xid);
/* send gid */
pq_sendstring(out, txn->gid);
@@ -224,7 +224,7 @@ logicalrep_read_prepare_common(StringInfo in, char *msgtype,
if (prepare_data->end_lsn == InvalidXLogRecPtr)
elog(ERROR, "end_lsn is not set in %s message", msgtype);
prepare_data->prepare_time = pq_getmsgint64(in);
- prepare_data->xid = pq_getmsgint(in, 4);
+ prepare_data->xid = pq_getmsgint64(in);
if (prepare_data->xid == InvalidTransactionId)
elog(ERROR, "invalid two-phase transaction ID in %s message", msgtype);
@@ -265,7 +265,7 @@ logicalrep_write_commit_prepared(StringInfo out, ReorderBufferTXN *txn,
pq_sendint64(out, commit_lsn);
pq_sendint64(out, txn->end_lsn);
pq_sendint64(out, txn->xact_time.commit_time);
- pq_sendint32(out, txn->xid);
+ pq_sendint64(out, txn->xid);
/* send gid */
pq_sendstring(out, txn->gid);
@@ -291,7 +291,7 @@ logicalrep_read_commit_prepared(StringInfo in, LogicalRepCommitPreparedTxnData *
if (prepare_data->end_lsn == InvalidXLogRecPtr)
elog(ERROR, "end_lsn is not set in commit prepared message");
prepare_data->commit_time = pq_getmsgint64(in);
- prepare_data->xid = pq_getmsgint(in, 4);
+ prepare_data->xid = pq_getmsgint64(in);
/* read gid (copy it into a pre-allocated buffer) */
strlcpy(prepare_data->gid, pq_getmsgstring(in), sizeof(prepare_data->gid));
@@ -323,7 +323,7 @@ logicalrep_write_rollback_prepared(StringInfo out, ReorderBufferTXN *txn,
pq_sendint64(out, txn->end_lsn);
pq_sendint64(out, prepare_time);
pq_sendint64(out, txn->xact_time.commit_time);
- pq_sendint32(out, txn->xid);
+ pq_sendint64(out, txn->xid);
/* send gid */
pq_sendstring(out, txn->gid);
@@ -351,7 +351,7 @@ logicalrep_read_rollback_prepared(StringInfo in,
elog(ERROR, "rollback_end_lsn is not set in rollback prepared message");
rollback_data->prepare_time = pq_getmsgint64(in);
rollback_data->rollback_time = pq_getmsgint64(in);
- rollback_data->xid = pq_getmsgint(in, 4);
+ rollback_data->xid = pq_getmsgint64(in);
/* read gid (copy it into a pre-allocated buffer) */
strlcpy(rollback_data->gid, pq_getmsgstring(in), sizeof(rollback_data->gid));
@@ -418,7 +418,7 @@ logicalrep_write_insert(StringInfo out, TransactionId xid, Relation rel,
/* transaction ID (if not valid, we're not streaming) */
if (TransactionIdIsValid(xid))
- pq_sendint32(out, xid);
+ pq_sendint64(out, xid);
/* use Oid as relation identifier */
pq_sendint32(out, RelationGetRelid(rel));
@@ -467,7 +467,7 @@ logicalrep_write_update(StringInfo out, TransactionId xid, Relation rel,
/* transaction ID (if not valid, we're not streaming) */
if (TransactionIdIsValid(xid))
- pq_sendint32(out, xid);
+ pq_sendint64(out, xid);
/* use Oid as relation identifier */
pq_sendint32(out, RelationGetRelid(rel));
@@ -541,7 +541,7 @@ logicalrep_write_delete(StringInfo out, TransactionId xid, Relation rel,
/* transaction ID (if not valid, we're not streaming) */
if (TransactionIdIsValid(xid))
- pq_sendint32(out, xid);
+ pq_sendint64(out, xid);
/* use Oid as relation identifier */
pq_sendint32(out, RelationGetRelid(rel));
@@ -595,7 +595,7 @@ logicalrep_write_truncate(StringInfo out,
/* transaction ID (if not valid, we're not streaming) */
if (TransactionIdIsValid(xid))
- pq_sendint32(out, xid);
+ pq_sendint64(out, xid);
pq_sendint32(out, nrelids);
@@ -653,7 +653,7 @@ logicalrep_write_message(StringInfo out, TransactionId xid, XLogRecPtr lsn,
/* transaction ID (if not valid, we're not streaming) */
if (TransactionIdIsValid(xid))
- pq_sendint32(out, xid);
+ pq_sendint64(out, xid);
pq_sendint8(out, flags);
pq_sendint64(out, lsn);
@@ -675,7 +675,7 @@ logicalrep_write_rel(StringInfo out, TransactionId xid, Relation rel,
/* transaction ID (if not valid, we're not streaming) */
if (TransactionIdIsValid(xid))
- pq_sendint32(out, xid);
+ pq_sendint64(out, xid);
/* use Oid as relation identifier */
pq_sendint32(out, RelationGetRelid(rel));
@@ -731,7 +731,7 @@ logicalrep_write_typ(StringInfo out, TransactionId xid, Oid typoid)
/* transaction ID (if not valid, we're not streaming) */
if (TransactionIdIsValid(xid))
- pq_sendint32(out, xid);
+ pq_sendint64(out, xid);
tup = SearchSysCache1(TYPEOID, ObjectIdGetDatum(basetypoid));
if (!HeapTupleIsValid(tup))
@@ -1079,7 +1079,7 @@ logicalrep_write_stream_start(StringInfo out,
Assert(TransactionIdIsValid(xid));
/* transaction ID (we're starting to stream, so must be valid) */
- pq_sendint32(out, xid);
+ pq_sendint64(out, xid);
/* 1 if this is the first streaming segment for this xid */
pq_sendbyte(out, first_segment ? 1 : 0);
@@ -1095,7 +1095,7 @@ logicalrep_read_stream_start(StringInfo in, bool *first_segment)
Assert(first_segment);
- xid = pq_getmsgint(in, 4);
+ xid = pq_getmsgint64(in);
*first_segment = (pq_getmsgbyte(in) == 1);
return xid;
@@ -1124,7 +1124,7 @@ logicalrep_write_stream_commit(StringInfo out, ReorderBufferTXN *txn,
Assert(TransactionIdIsValid(txn->xid));
/* transaction ID */
- pq_sendint32(out, txn->xid);
+ pq_sendint64(out, txn->xid);
/* send the flags field (unused for now) */
pq_sendbyte(out, flags);
@@ -1144,7 +1144,7 @@ logicalrep_read_stream_commit(StringInfo in, LogicalRepCommitData *commit_data)
TransactionId xid;
uint8 flags;
- xid = pq_getmsgint(in, 4);
+ xid = pq_getmsgint64(in);
/* read flags (unused for now) */
flags = pq_getmsgbyte(in);
@@ -1173,8 +1173,8 @@ logicalrep_write_stream_abort(StringInfo out, TransactionId xid,
Assert(TransactionIdIsValid(xid) && TransactionIdIsValid(subxid));
/* transaction ID */
- pq_sendint32(out, xid);
- pq_sendint32(out, subxid);
+ pq_sendint64(out, xid);
+ pq_sendint64(out, subxid);
}
/*
@@ -1186,8 +1186,8 @@ logicalrep_read_stream_abort(StringInfo in, TransactionId *xid,
{
Assert(xid && subxid);
- *xid = pq_getmsgint(in, 4);
- *subxid = pq_getmsgint(in, 4);
+ *xid = pq_getmsgint64(in);
+ *subxid = pq_getmsgint64(in);
}
/*
diff --git a/src/backend/replication/logical/reorderbuffer.c b/src/backend/replication/logical/reorderbuffer.c
index 9b20e158eb..e0259aaa33 100644
--- a/src/backend/replication/logical/reorderbuffer.c
+++ b/src/backend/replication/logical/reorderbuffer.c
@@ -5094,8 +5094,12 @@ UpdateLogicalMappings(HTAB *tuplecid_data, Oid relid, Snapshot snapshot)
TransactionId f_mapped_xid;
TransactionId f_create_xid;
XLogRecPtr f_lsn;
- uint32 f_hi,
- f_lo;
+ uint32 f_lsn_hi,
+ f_lsn_lo,
+ f_mapped_xid_hi,
+ f_mapped_xid_lo,
+ f_create_xid_hi,
+ f_create_xid_lo;
RewriteMappingFile *f;
if (strcmp(mapping_de->d_name, ".") == 0 ||
@@ -5107,11 +5111,14 @@ UpdateLogicalMappings(HTAB *tuplecid_data, Oid relid, Snapshot snapshot)
continue;
if (sscanf(mapping_de->d_name, LOGICAL_REWRITE_FORMAT,
- &f_dboid, &f_relid, &f_hi, &f_lo,
- &f_mapped_xid, &f_create_xid) != 6)
+ &f_dboid, &f_relid, &f_lsn_hi, &f_lsn_lo,
+ &f_mapped_xid_hi, &f_mapped_xid_lo,
+ &f_create_xid_hi, &f_create_xid_lo) != 8)
elog(ERROR, "could not parse filename \"%s\"", mapping_de->d_name);
- f_lsn = ((uint64) f_hi) << 32 | f_lo;
+ f_lsn = ((uint64) f_lsn_hi) << 32 | f_lsn_lo;
+ f_mapped_xid = ((uint64) f_mapped_xid_hi) << 32 | f_mapped_xid_lo;
+ f_create_xid = ((uint64) f_create_xid_hi) << 32 | f_create_xid_lo;
/* mapping for another database */
if (f_dboid != dboid)
diff --git a/src/backend/replication/logical/snapbuild.c b/src/backend/replication/logical/snapbuild.c
index d518746ddd..f0adcb4dd1 100644
--- a/src/backend/replication/logical/snapbuild.c
+++ b/src/backend/replication/logical/snapbuild.c
@@ -579,7 +579,7 @@ SnapBuildInitialSnapshot(SnapBuild *builder)
elog(ERROR, "cannot build an initial slot snapshot, not all transactions are monitored anymore");
/* so we don't overwrite the existing value */
- if (TransactionIdIsValid(MyProc->xmin))
+ if (TransactionIdIsValid(pg_atomic_read_u64(&MyProc->xmin)))
elog(ERROR, "cannot build an initial slot snapshot when MyProc->xmin already is valid");
snap = SnapBuildBuildSnapshot(builder);
@@ -601,7 +601,7 @@ SnapBuildInitialSnapshot(SnapBuild *builder)
}
#endif
- MyProc->xmin = snap->xmin;
+ pg_atomic_write_u64(&MyProc->xmin, snap->xmin);
/* allocate in transaction context */
newxip = (TransactionId *)
@@ -999,9 +999,10 @@ SnapBuildPurgeOlderTxn(SnapBuild *builder)
builder->catchange.xip = NULL;
}
- elog(DEBUG3, "purged catalog modifying transactions from %u to %u, xmin: %u, xmax: %u",
+ elog(DEBUG3, "purged catalog modifying transactions from %u to %u, xmin: %llu, xmax: %llu",
(uint32) builder->catchange.xcnt, (uint32) surviving_xids,
- builder->xmin, builder->xmax);
+ (unsigned long long) builder->xmin,
+ (unsigned long long) builder->xmax);
builder->catchange.xcnt = surviving_xids;
}
}
diff --git a/src/backend/replication/logical/worker.c b/src/backend/replication/logical/worker.c
index e62de43e09..4e01565fca 100644
--- a/src/backend/replication/logical/worker.c
+++ b/src/backend/replication/logical/worker.c
@@ -450,7 +450,7 @@ handle_streamed_transaction(LogicalRepMsgType action, StringInfo s)
* We should have received XID of the subxact as the first part of the
* message, so extract it.
*/
- xid = pq_getmsgint(s, 4);
+ xid = pq_getmsgint64(s);
if (!TransactionIdIsValid(xid))
ereport(ERROR,
diff --git a/src/backend/replication/pgoutput/pgoutput.c b/src/backend/replication/pgoutput/pgoutput.c
index 2ecaa5b907..e7d2593cef 100644
--- a/src/backend/replication/pgoutput/pgoutput.c
+++ b/src/backend/replication/pgoutput/pgoutput.c
@@ -584,7 +584,8 @@ pgoutput_commit_txn(LogicalDecodingContext *ctx, ReorderBufferTXN *txn,
if (!sent_begin_txn)
{
- elog(DEBUG1, "skipped replication of an empty transaction with XID: %u", txn->xid);
+ elog(DEBUG1, "skipped replication of an empty transaction with XID: %llu",
+ (unsigned long long) txn->xid);
return;
}
diff --git a/src/backend/replication/walreceiver.c b/src/backend/replication/walreceiver.c
index 927fd2dbe5..d53bc09a14 100644
--- a/src/backend/replication/walreceiver.c
+++ b/src/backend/replication/walreceiver.c
@@ -1143,10 +1143,6 @@ static void
XLogWalRcvSendHSFeedback(bool immed)
{
TimestampTz now;
- FullTransactionId nextFullXid;
- TransactionId nextXid;
- uint32 xmin_epoch,
- catalog_xmin_epoch;
TransactionId xmin,
catalog_xmin;
static TimestampTz sendTime = 0;
@@ -1203,31 +1199,15 @@ XLogWalRcvSendHSFeedback(bool immed)
catalog_xmin = InvalidTransactionId;
}
- /*
- * Get epoch and adjust if nextXid and oldestXmin are different sides of
- * the epoch boundary.
- */
- nextFullXid = ReadNextFullTransactionId();
- nextXid = XidFromFullTransactionId(nextFullXid);
- xmin_epoch = EpochFromFullTransactionId(nextFullXid);
- catalog_xmin_epoch = xmin_epoch;
- if (nextXid < xmin)
- xmin_epoch--;
- if (nextXid < catalog_xmin)
- catalog_xmin_epoch--;
-
- elog(DEBUG2, "sending hot standby feedback xmin %llu epoch %u catalog_xmin %llu catalog_xmin_epoch %u",
- (unsigned long long) xmin, xmin_epoch,
- (unsigned long long) catalog_xmin, catalog_xmin_epoch);
+ elog(DEBUG2, "sending hot standby feedback xmin %llu catalog_xmin %llu",
+ (unsigned long long) xmin, (unsigned long long) catalog_xmin);
/* Construct the message and send it. */
resetStringInfo(&reply_message);
pq_sendbyte(&reply_message, 'h');
pq_sendint64(&reply_message, GetCurrentTimestamp());
- pq_sendint32(&reply_message, xmin);
- pq_sendint32(&reply_message, xmin_epoch);
- pq_sendint32(&reply_message, catalog_xmin);
- pq_sendint32(&reply_message, catalog_xmin_epoch);
+ pq_sendint64(&reply_message, xmin);
+ pq_sendint64(&reply_message, catalog_xmin);
walrcv_send(wrconn, reply_message.data, reply_message.len);
if (TransactionIdIsValid(xmin) || TransactionIdIsValid(catalog_xmin))
primary_has_standby_xmin = true;
diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c
index 66cbec488c..d0a9bbb6c1 100644
--- a/src/backend/replication/walsender.c
+++ b/src/backend/replication/walsender.c
@@ -255,7 +255,6 @@ static void WalSndUpdateProgress(LogicalDecodingContext *ctx, XLogRecPtr lsn, Tr
static XLogRecPtr WalSndWaitForWal(XLogRecPtr loc);
static void LagTrackerWrite(XLogRecPtr lsn, TimestampTz local_flush_time);
static TimeOffset LagTrackerRead(int head, XLogRecPtr lsn, TimestampTz now);
-static bool TransactionIdInRecentPast(TransactionId xid, uint32 epoch);
static void WalSndSegmentOpen(XLogReaderState *state, XLogSegNo nextSegNo,
TimeLineID *tli_p);
@@ -293,7 +292,7 @@ InitWalSender(void)
*/
if (MyDatabaseId == InvalidOid)
{
- Assert(MyProc->xmin == InvalidTransactionId);
+ Assert(pg_atomic_read_u64(&MyProc->xmin) == InvalidTransactionId);
LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
MyProc->statusFlags |= PROC_AFFECTS_ALL_HORIZONS;
ProcGlobal->statusFlags[MyProc->pgxactoff] = MyProc->statusFlags;
@@ -2166,7 +2165,7 @@ PhysicalReplicationSlotNewXmin(TransactionId feedbackXmin, TransactionId feedbac
ReplicationSlot *slot = MyReplicationSlot;
SpinLockAcquire(&slot->mutex);
- MyProc->xmin = InvalidTransactionId;
+ pg_atomic_write_u64(&MyProc->xmin, InvalidTransactionId);
/*
* For physical replication we don't need the interlock provided by xmin
@@ -2198,44 +2197,6 @@ PhysicalReplicationSlotNewXmin(TransactionId feedbackXmin, TransactionId feedbac
}
}
-/*
- * Check that the provided xmin/epoch are sane, that is, not in the future
- * and not so far back as to be already wrapped around.
- *
- * Epoch of nextXid should be same as standby, or if the counter has
- * wrapped, then one greater than standby.
- *
- * This check doesn't care about whether clog exists for these xids
- * at all.
- */
-static bool
-TransactionIdInRecentPast(TransactionId xid, uint32 epoch)
-{
- FullTransactionId nextFullXid;
- TransactionId nextXid;
- uint32 nextEpoch;
-
- nextFullXid = ReadNextFullTransactionId();
- nextXid = XidFromFullTransactionId(nextFullXid);
- nextEpoch = EpochFromFullTransactionId(nextFullXid);
-
- if (xid <= nextXid)
- {
- if (epoch != nextEpoch)
- return false;
- }
- else
- {
- if (epoch + 1 != nextEpoch)
- return false;
- }
-
- if (!TransactionIdPrecedesOrEquals(xid, nextXid))
- return false; /* epoch OK, but it's wrapped around */
-
- return true;
-}
-
/*
* Hot Standby feedback
*/
@@ -2243,9 +2204,7 @@ static void
ProcessStandbyHSFeedbackMessage(void)
{
TransactionId feedbackXmin;
- uint32 feedbackEpoch;
TransactionId feedbackCatalogXmin;
- uint32 feedbackCatalogEpoch;
TimestampTz replyTime;
/*
@@ -2254,10 +2213,8 @@ ProcessStandbyHSFeedbackMessage(void)
* of this message.
*/
replyTime = pq_getmsgint64(&reply_message);
- feedbackXmin = pq_getmsgint(&reply_message, 4);
- feedbackEpoch = pq_getmsgint(&reply_message, 4);
- feedbackCatalogXmin = pq_getmsgint(&reply_message, 4);
- feedbackCatalogEpoch = pq_getmsgint(&reply_message, 4);
+ feedbackXmin = pq_getmsgint64(&reply_message);
+ feedbackCatalogXmin = pq_getmsgint64(&reply_message);
if (message_level_is_interesting(DEBUG2))
{
@@ -2266,11 +2223,9 @@ ProcessStandbyHSFeedbackMessage(void)
/* Copy because timestamptz_to_str returns a static buffer */
replyTimeStr = pstrdup(timestamptz_to_str(replyTime));
- elog(DEBUG2, "hot standby feedback xmin %llu epoch %u, catalog_xmin %llu epoch %u reply_time %s",
+ elog(DEBUG2, "hot standby feedback xmin %llu, catalog_xmin %llu reply_time %s",
(unsigned long long) feedbackXmin,
- feedbackEpoch,
(unsigned long long) feedbackCatalogXmin,
- feedbackCatalogEpoch,
replyTimeStr);
pfree(replyTimeStr);
@@ -2295,24 +2250,12 @@ ProcessStandbyHSFeedbackMessage(void)
if (!TransactionIdIsNormal(feedbackXmin)
&& !TransactionIdIsNormal(feedbackCatalogXmin))
{
- MyProc->xmin = InvalidTransactionId;
+ pg_atomic_write_u64(&MyProc->xmin, InvalidTransactionId);
if (MyReplicationSlot != NULL)
PhysicalReplicationSlotNewXmin(feedbackXmin, feedbackCatalogXmin);
return;
}
- /*
- * Check that the provided xmin/epoch are sane, that is, not in the future
- * and not so far back as to be already wrapped around. Ignore if not.
- */
- if (TransactionIdIsNormal(feedbackXmin) &&
- !TransactionIdInRecentPast(feedbackXmin, feedbackEpoch))
- return;
-
- if (TransactionIdIsNormal(feedbackCatalogXmin) &&
- !TransactionIdInRecentPast(feedbackCatalogXmin, feedbackCatalogEpoch))
- return;
-
/*
* Set the WalSender's xmin equal to the standby's requested xmin, so that
* the xmin will be taken into account by GetSnapshotData() /
@@ -2350,9 +2293,9 @@ ProcessStandbyHSFeedbackMessage(void)
{
if (TransactionIdIsNormal(feedbackCatalogXmin)
&& TransactionIdPrecedes(feedbackCatalogXmin, feedbackXmin))
- MyProc->xmin = feedbackCatalogXmin;
+ pg_atomic_write_u64(&MyProc->xmin, feedbackCatalogXmin);
else
- MyProc->xmin = feedbackXmin;
+ pg_atomic_write_u64(&MyProc->xmin, feedbackXmin);
}
}
diff --git a/src/backend/statistics/extended_stats.c b/src/backend/statistics/extended_stats.c
index ab97e71dd7..118ff24d1c 100644
--- a/src/backend/statistics/extended_stats.c
+++ b/src/backend/statistics/extended_stats.c
@@ -2481,6 +2481,7 @@ statext_expressions_load(Oid stxoid, bool inh, int idx)
ItemPointerSetInvalid(&(tmptup.t_self));
tmptup.t_tableOid = InvalidOid;
tmptup.t_data = td;
+ HeapTupleCopyHeaderXids(&tmptup);
tup = heap_copytuple(&tmptup);
diff --git a/src/backend/storage/buffer/Makefile b/src/backend/storage/buffer/Makefile
index fd7c40dcb0..ffcc0fc290 100644
--- a/src/backend/storage/buffer/Makefile
+++ b/src/backend/storage/buffer/Makefile
@@ -17,6 +17,7 @@ OBJS = \
buf_table.o \
bufmgr.o \
freelist.o \
- localbuf.o
+ localbuf.o \
+ heap_convert.o
include $(top_srcdir)/src/backend/common.mk
diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index 6b95381481..ba5611b008 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -459,7 +459,8 @@ ForgetPrivateRefCountEntry(PrivateRefCountEntry *ref)
)
-static Buffer ReadBuffer_common(SMgrRelation smgr, char relpersistence,
+static Buffer ReadBuffer_common(Relation reln,
+ SMgrRelation smgr, char relpersistence,
ForkNumber forkNum, BlockNumber blockNum,
ReadBufferMode mode, BufferAccessStrategy strategy,
bool *hit);
@@ -777,7 +778,8 @@ ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum,
* miss.
*/
pgstat_count_buffer_read(reln);
- buf = ReadBuffer_common(RelationGetSmgr(reln), reln->rd_rel->relpersistence,
+ buf = ReadBuffer_common(reln,
+ RelationGetSmgr(reln), reln->rd_rel->relpersistence,
forkNum, blockNum, mode, strategy, &hit);
if (hit)
pgstat_count_buffer_hit(reln);
@@ -804,7 +806,7 @@ ReadBufferWithoutRelcache(RelFileLocator rlocator, ForkNumber forkNum,
SMgrRelation smgr = smgropen(rlocator, InvalidBackendId);
- return ReadBuffer_common(smgr, permanent ? RELPERSISTENCE_PERMANENT :
+ return ReadBuffer_common(NULL, smgr, permanent ? RELPERSISTENCE_PERMANENT :
RELPERSISTENCE_UNLOGGED, forkNum, blockNum,
mode, strategy, &hit);
}
@@ -816,7 +818,8 @@ ReadBufferWithoutRelcache(RelFileLocator rlocator, ForkNumber forkNum,
* *hit is set to true if the request was satisfied from shared buffer cache.
*/
static Buffer
-ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
+ReadBuffer_common(Relation reln,
+ SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
BlockNumber blockNum, ReadBufferMode mode,
BufferAccessStrategy strategy, bool *hit)
{
@@ -1048,6 +1051,30 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
blockNum,
relpath(smgr->smgr_rlocator, forkNum))));
}
+
+ if (PageGetPageLayoutVersion(bufBlock) != PG_PAGE_LAYOUT_VERSION &&
+ !PageIsNew((Page) bufBlock))
+ {
+ Buffer buf = BufferDescriptorGetBuffer(bufHdr);
+
+ /*
+ * All the forks but MAIN_FORKNUM should be converted to the
+ * actual page layout version in pg_upgrade.
+ */
+ if (forkNum != MAIN_FORKNUM)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATA_CORRUPTED),
+ errmsg("invalid fork type (%d) in block %u of relation %s",
+ forkNum, blockNum,
+ relpath(smgr->smgr_rlocator, forkNum))));
+
+ LWLockAcquire(BufferDescriptorGetContentLock(bufHdr), LW_EXCLUSIVE);
+ /* Check for no concurrent changes */
+ if (PageGetPageLayoutVersion(bufBlock) != PG_PAGE_LAYOUT_VERSION)
+ convert_page(reln, bufBlock, buf, blockNum);
+
+ LWLockRelease(BufferDescriptorGetContentLock(bufHdr));
+ }
}
}
@@ -4131,6 +4158,64 @@ MarkBufferDirtyHint(Buffer buffer, bool buffer_std)
}
}
+/*
+ * Mark buffer as converted - ie its format is changed without logical changes.
+ *
+ * It will override `full_page_write` GUC setting in XLogRecordAssemble.
+ */
+void
+MarkBufferConverted(Buffer buffer, bool converted)
+{
+ BufferDesc *bufHdr;
+ uint32 buf_state;
+ bool has_mark;
+
+ if (!BufferIsValid(buffer))
+ elog(ERROR, "bad buffer ID: %d", buffer);
+
+ Assert(!BufferIsLocal(buffer));
+
+ bufHdr = GetBufferDescriptor(buffer - 1);
+
+ Assert(GetPrivateRefCount(buffer) > 0);
+ if (converted)
+ {
+ /* here, either share or exclusive lock is OK */
+ Assert(LWLockHeldByMe(BufferDescriptorGetContentLock(bufHdr)));
+ }
+
+ buf_state = pg_atomic_read_u32(&bufHdr->state);
+ has_mark = (buf_state & BM_CONVERTED) != 0;
+ if (converted == has_mark)
+ return;
+
+ buf_state = LockBufHdr(bufHdr);
+ buf_state &= ~BM_CONVERTED;
+ if (converted)
+ buf_state |= BM_CONVERTED;
+ UnlockBufHdr(bufHdr, buf_state);
+}
+
+bool
+IsBufferConverted(Buffer buffer)
+{
+
+ BufferDesc *bufHdr;
+ uint32 buf_state;
+
+ if (!BufferIsValid(buffer))
+ elog(ERROR, "bad buffer ID: %d", buffer);
+
+ Assert(!BufferIsLocal(buffer));
+
+ bufHdr = GetBufferDescriptor(buffer - 1);
+
+ Assert(GetPrivateRefCount(buffer) > 0);
+
+ buf_state = pg_atomic_read_u32(&bufHdr->state);
+ return (buf_state & BM_CONVERTED) != 0;
+}
+
/*
* Release buffer content locks for shared buffers.
*
@@ -4165,6 +4250,47 @@ UnlockBuffers(void)
}
}
+/*
+ * Is shared buffer is locked?
+ */
+bool
+IsBufferLocked(Buffer buffer)
+{
+ BufferDesc *buf;
+
+ if (buffer == InvalidBuffer)
+ return true;
+
+ Assert(BufferIsPinned(buffer));
+ if (BufferIsLocal(buffer))
+ return true; /* local buffers need no lock */
+
+ buf = GetBufferDescriptor(buffer - 1);
+
+ return LWLockHeldByMe(BufferDescriptorGetContentLock(buf));
+}
+
+/*
+ * Is shared buffer is locked exclusive?
+ */
+bool
+IsBufferLockedExclusive(Buffer buffer)
+{
+ BufferDesc *buf;
+
+ if (buffer == InvalidBuffer)
+ return true;
+
+ Assert(BufferIsPinned(buffer));
+ if (BufferIsLocal(buffer))
+ return true; /* local buffers need no lock */
+
+ buf = GetBufferDescriptor(buffer - 1);
+
+ return LWLockHeldByMeInMode(BufferDescriptorGetContentLock(buf),
+ LW_EXCLUSIVE);
+}
+
/*
* Acquire or release the content_lock for the buffer.
*/
diff --git a/src/backend/storage/buffer/heap_convert.c b/src/backend/storage/buffer/heap_convert.c
new file mode 100644
index 0000000000..e6abac9760
--- /dev/null
+++ b/src/backend/storage/buffer/heap_convert.c
@@ -0,0 +1,546 @@
+/*-------------------------------------------------------------------------
+ *
+ * heap_convert.c
+ * Heap page converter from 32bit to 64bit xid format
+ *
+ * Copyright (c) 2015-2022, Postgres Professional
+ *
+ * IDENTIFICATION
+ * src/backend/storage/buffer/heap_convert.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "access/generic_xlog.h"
+#include "access/heapam.h"
+#include "access/multixact.h"
+#include "catalog/catalog.h"
+#include "storage/bufmgr.h"
+#include "storage/checksum.h"
+
+static void repack_heap_tuples(Relation rel, Page page, Buffer buf,
+ BlockNumber blkno, bool double_xmax);
+
+/*
+ * itemoffcompare
+ * Sorting support for repack_tuples()
+ */
+int
+itemoffcompare(const void *item1, const void *item2)
+{
+ /* Sort in decreasing itemoff order */
+ return ((ItemIdCompactData *) item2)->itemoff -
+ ((ItemIdCompactData *) item1)->itemoff;
+}
+
+/*
+ * Lazy page conversion from 32-bit to 64-bit XID at first read.
+ */
+void
+convert_page(Relation rel, Page page, Buffer buf, BlockNumber blkno)
+{
+ static unsigned logcnt = 0;
+ bool logit;
+ PageHeader hdr = (PageHeader) page;
+ GenericXLogState *state = NULL;
+ uint16 checksum;
+ bool try_double_xmax;
+
+ /* Not during XLog replaying */
+ Assert(rel != NULL);
+
+ /* Verify checksum */
+ if (hdr->pd_checksum)
+ {
+ checksum = pg_checksum_page((char *) page, blkno);
+ if (checksum != hdr->pd_checksum)
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("page verification failed, calculated checksum %u but expected %u",
+ checksum, hdr->pd_checksum)));
+ }
+
+ /*
+ * We occasionally force logging of page conversion, so never-changed
+ * pages are converted in the end. FORCE_LOG_EVERY is chosen arbitrarily
+ * to log neither too much nor too little.
+ */
+#define FORCE_LOG_EVERY 128
+ logit = !RecoveryInProgress() && XLogIsNeeded() && RelationNeedsWAL(rel);
+ logit = logit && (++logcnt % FORCE_LOG_EVERY) == 0;
+ if (logit)
+ {
+ state = GenericXLogStart(rel);
+ page = GenericXLogRegisterBuffer(state, buf,
+ GENERIC_XLOG_FULL_IMAGE);
+ hdr = (PageHeader) page;
+ }
+#ifdef USE_ASSERT_CHECKING
+ else
+ {
+ /* Not already converted */
+ Assert(PageGetPageLayoutVersion(page) != PG_PAGE_LAYOUT_VERSION);
+ /* Page in 32-bit xid format should not have PageSpecial. */
+ Assert(PageGetSpecialSize(page) == 0);
+ }
+#endif
+
+ switch (rel->rd_rel->relkind)
+ {
+ case 't':
+ try_double_xmax = hdr->pd_upper - hdr->pd_lower <
+ MAXALIGN(sizeof(ToastPageSpecialData));
+ repack_heap_tuples(rel, page, buf, blkno, try_double_xmax);
+ break;
+ case 'r':
+ case 'p':
+ case 'm':
+ try_double_xmax = hdr->pd_upper - hdr->pd_lower <
+ MAXALIGN(sizeof(HeapPageSpecialData));
+ repack_heap_tuples(rel, page, buf, blkno, try_double_xmax);
+ break;
+ case 'i':
+ /* no need to convert index */
+ case 'S':
+ /* no real need to convert sequences */
+ break;
+ default:
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("conversion for relation \"%s\" cannot be done",
+ RelationGetRelationName(rel)),
+ errdetail_relkind_not_supported(rel->rd_rel->relkind)));
+ }
+
+ hdr->pd_checksum = pg_checksum_page((char *) page, blkno);
+
+ PageSetPageSizeAndVersion(page, PageGetPageSize(page),
+ PG_PAGE_LAYOUT_VERSION);
+
+ if (logit)
+ {
+ /*
+ * Finish logging buffer conversion and mark buffer as dirty.
+ */
+ Assert(state != NULL);
+ MarkBufferDirty(buf);
+ GenericXLogFinish(state);
+ }
+ else
+ {
+ /*
+ * Otherwise, it will be logged with full-page-write record on first
+ * actual change.
+ */
+ MarkBufferConverted(buf, true);
+ }
+}
+
+/*
+ * Convert xmin and xmax in a tuple.
+ * This also considers special cases: "double xmax" page format and multixact
+ * in xmax.
+ */
+static void
+convert_heap_tuple_xids(HeapTupleHeader tuple, TransactionId xid_base,
+ MultiXactId multi_base, bool double_xmax)
+{
+ /* Convert xmin */
+ if (double_xmax)
+ {
+ /* Prepare tuple for "double xmax" page format */
+ tuple->t_infomask |= HEAP_XMIN_FROZEN;
+ tuple->t_choice.t_heap.t_xmin = 0;
+ }
+ else
+ {
+ TransactionId xmin = tuple->t_choice.t_heap.t_xmin;
+
+ if (TransactionIdIsNormal(xmin))
+ {
+ if (HeapTupleHeaderXminFrozen(tuple))
+ tuple->t_choice.t_heap.t_xmin = FrozenTransactionId;
+ else if (HeapTupleHeaderXminInvalid(tuple))
+ tuple->t_choice.t_heap.t_xmin = InvalidTransactionId;
+ else
+ {
+ Assert(xmin >= xid_base + FirstNormalTransactionId);
+ /* Subtract xid_base from normal xmin */
+ tuple->t_choice.t_heap.t_xmin = xmin - xid_base;
+ }
+ }
+ }
+
+ /* If tuple has multixact flag, handle mxid wraparound */
+ if ((tuple->t_infomask & HEAP_XMAX_IS_MULTI) &&
+ !(tuple->t_infomask & HEAP_XMAX_INVALID))
+ {
+ MultiXactId mxid = tuple->t_choice.t_heap.t_xmax;
+
+ /* Handle mxid wraparound */
+ if (mxid < multi_base)
+ {
+ mxid += ((MultiXactId) 1 << 32) - FirstMultiXactId;
+ Assert(mxid >= multi_base);
+ }
+
+ if (double_xmax)
+ {
+ /* Save converted mxid into "double xmax" format */
+ HeapTupleHeaderSetDoubleXmax(tuple, mxid);
+ }
+ else
+ {
+ /*
+ * Save converted mxid offset relative to (minmxid - 1), which
+ * will be page's mxid base.
+ */
+ Assert(mxid - multi_base + FirstMultiXactId <= PG_UINT32_MAX);
+ tuple->t_choice.t_heap.t_xmax =
+ (uint32) (mxid - multi_base + FirstMultiXactId);
+ }
+ }
+ /* Convert xmax */
+ else if (!(tuple->t_infomask & HEAP_XMAX_INVALID))
+ {
+ TransactionId xmax = tuple->t_choice.t_heap.t_xmax;
+
+ if (double_xmax)
+ {
+ /* Save converted xmax into "double xmax" format */
+ HeapTupleHeaderSetDoubleXmax(tuple, xmax);
+ }
+ else if (TransactionIdIsNormal(xmax))
+ {
+ /* Subtract xid_base from normal xmax */
+ Assert(xmax >= xid_base + FirstNormalTransactionId);
+ tuple->t_choice.t_heap.t_xmax = xmax - xid_base;
+ }
+ }
+ else
+ {
+ if (double_xmax)
+ HeapTupleHeaderSetDoubleXmax(tuple, InvalidTransactionId);
+ else
+ tuple->t_choice.t_heap.t_xmax = InvalidTransactionId;
+ }
+}
+
+/*
+ * Correct page xmin/xmax based on tuple xmin/xmax values.
+ */
+static void
+compute_xid_min_max(HeapTuple tuple, MultiXactId multi_base,
+ TransactionId *xid_min, TransactionId *xid_max,
+ MultiXactId *multi_min, MultiXactId *multi_max)
+{
+ /* xmin */
+ if (!HeapTupleHeaderXminInvalid(tuple->t_data) &&
+ !HeapTupleHeaderXminFrozen(tuple->t_data))
+ {
+ TransactionId xid = HeapTupleGetRawXmin(tuple);
+
+ if (TransactionIdIsNormal(xid))
+ {
+ *xid_max = Max(*xid_max, xid);
+ *xid_min = Min(*xid_min, xid);
+ }
+ }
+
+ /* xmax */
+ if (!(tuple->t_data->t_infomask & HEAP_XMAX_INVALID))
+ {
+ TransactionId xid;
+
+ if (tuple->t_data->t_infomask & HEAP_XMAX_IS_MULTI)
+ {
+ MultiXactId mxid = HeapTupleGetRawXmax(tuple);
+
+ Assert(MultiXactIdIsValid(mxid));
+
+ /* Handle mxid wraparound */
+ if (mxid < multi_base)
+ {
+ mxid += ((MultiXactId) 1 << 32) - FirstMultiXactId;
+ Assert(mxid >= multi_base);
+ }
+
+ *multi_max = Max(*multi_max, mxid);
+ *multi_min = Min(*multi_min, mxid);
+
+ /*
+ * Also take into account hidden update xid, which can be
+ * extracted by the vacuum.
+ */
+ if (tuple->t_data->t_infomask & HEAP_XMAX_LOCK_ONLY)
+ xid = InvalidTransactionId;
+ else
+ xid = HeapTupleGetUpdateXid(tuple);
+ }
+ else
+ {
+ xid = HeapTupleGetRawXmax(tuple);
+ }
+
+ if (TransactionIdIsNormal(xid))
+ {
+ *xid_max = Max(*xid_max, xid);
+ *xid_min = Min(*xid_min, xid);
+ }
+ }
+}
+
+/*
+ * Returns true if both:
+ * - xid_max: an uppper boundary of xmin's and xmax'es of all tuples on a page
+ * - xid_min: a lower boundary of xmin's and xmax'es of all tuples on a page
+ * can be expressed by 32-bit number relative to page's xid_base/multi_base
+ * or invalid.
+ *
+ * True value effectively means that these tuples can be directly put on one
+ * page in 64-xid format.
+ */
+static inline bool
+xids_fit_page(TransactionId xid_min, TransactionId xid_max,
+ MultiXactId multi_min, MultiXactId multi_max)
+{
+ bool xid_max_fits = false;
+ bool multi_max_fits = false;
+
+ if (xid_max == InvalidTransactionId)
+ xid_max_fits = true;
+
+ if (xid_max - xid_min <= MaxShortTransactionId - FirstNormalTransactionId)
+ xid_max_fits = true;
+
+ if (multi_max == InvalidMultiXactId)
+ multi_max_fits = true;
+
+ if (multi_max - multi_min <= MaxShortTransactionId - FirstMultiXactId)
+ multi_max_fits = true;
+
+ return xid_max_fits && multi_max_fits;
+}
+
+/*
+ * Set "base" for page in 64-bit XID format.
+ *
+ * This should not be called for double xmax pages. They do not have place for
+ * page special.
+ */
+static inline void
+heap_page_set_base(Page page,
+ TransactionId xid_min, TransactionId xid_max,
+ MultiXactId multi_min, MultiXactId multi_max,
+ TransactionId *xid_base, MultiXactId *multi_base,
+ bool is_toast)
+{
+ PageHeader hdr = (PageHeader) page;
+
+ if (xid_max != InvalidTransactionId)
+ *xid_base = xid_min - FirstNormalTransactionId;
+ else
+ *xid_base = InvalidTransactionId;
+
+ if (multi_max != InvalidMultiXactId)
+ *multi_base = multi_min - FirstMultiXactId;
+ else
+ *multi_base = InvalidMultiXactId;
+
+ if (is_toast)
+ {
+ ToastPageSpecial special;
+
+ hdr->pd_special = BLCKSZ - MAXALIGN(sizeof(ToastPageSpecialData));
+ special = ToastPageGetSpecial(page);
+ special->pd_xid_base = *xid_base;
+ }
+ else
+ {
+ HeapPageSpecial special;
+
+ hdr->pd_special = BLCKSZ - MAXALIGN(sizeof(HeapPageSpecialData));
+ special = HeapPageGetSpecial(page);
+ special->pd_xid_base = *xid_base;
+ special->pd_multi_base = *multi_base;
+ }
+}
+
+/*
+ * repack_heap_tuples
+ * Convert heap page format reusing space of dead tuples
+ */
+static void
+repack_heap_tuples(Relation rel, Page page, Buffer buf, BlockNumber blkno,
+ bool try_double_xmax)
+{
+ ItemIdCompactData items[MaxHeapTuplesPerPage];
+ ItemIdCompact itemPtr = items;
+ int nitems = 0,
+ maxoff = PageGetMaxOffsetNumber(page),
+ idx,
+ occupied_space = 0;
+ Offset upper;
+ bool double_xmax,
+ special_fits,
+ toast;
+ PageHeader hdr = (PageHeader) page,
+ new_hdr;
+ char new_page[BLCKSZ] = {0};
+ MultiXactId multi_base = rel->rd_rel->relminmxid,
+ multi_min = MaxMultiXactId,
+ multi_max = InvalidMultiXactId;
+ TransactionId xid_base = rel->rd_rel->relfrozenxid,
+ xid_min = MaxTransactionId,
+ xid_max = InvalidTransactionId;
+
+ toast = IsToastRelation(rel);
+
+ if (TransactionIdIsNormal(hdr->pd_prune_xid))
+ xid_min = xid_max = hdr->pd_prune_xid;
+
+ for (idx = 0; idx < maxoff; idx++)
+ {
+ HeapTupleData tuple;
+ ItemId lp;
+
+ lp = PageGetItemId(page, idx + 1);
+
+ /* Skip redirects and items without storage */
+ if (!ItemIdHasStorage(lp))
+ continue;
+
+ /* Build in-memory tuple representation */
+ tuple.t_tableOid = 1; /* doesn't matter in this case */
+ tuple.t_data = (HeapTupleHeader) PageGetItem(page, lp);
+ HeapTupleCopyHeaderXids(&tuple);
+ tuple.t_len = ItemIdGetLength(lp);
+ ItemPointerSet(&(tuple.t_self), blkno, ItemIdGetOffset(lp));
+
+ /*
+ * This is only needed to determine whether tuple is HEAPTUPLE_DEAD or
+ * HEAPTUPLE_RECENTLY_DEAD. And since this is the first time we read
+ * page after pg_upgrade, it cannot be HEAPTUPLE_RECENTLY_DEAD. See
+ * HeapTupleSatisfiesVacuum() for details
+ */
+ if (try_double_xmax &&
+ HeapTupleSatisfiesVacuum(&tuple,
+ (TransactionId) 1 << 32, buf) == HEAPTUPLE_DEAD)
+ {
+ ItemIdSetDead(lp);
+ }
+
+ if (ItemIdIsNormal(lp) && ItemIdHasStorage(lp))
+ {
+ itemPtr->offsetindex = idx;
+ itemPtr->itemoff = ItemIdGetOffset(lp);
+ if (unlikely(itemPtr->itemoff < hdr->pd_upper ||
+ itemPtr->itemoff >= hdr->pd_special))
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_DATA_CORRUPTED),
+ errmsg("corrupted item pointer: %u",
+ itemPtr->itemoff)));
+ }
+
+ itemPtr->alignedlen = MAXALIGN(ItemIdGetLength(lp));
+ occupied_space += itemPtr->alignedlen;
+ nitems++;
+ itemPtr++;
+ if (try_double_xmax)
+ {
+ HeapTupleSetXmin(&tuple, FrozenTransactionId);
+ HeapTupleHeaderSetXminFrozen(tuple.t_data);
+ }
+
+ compute_xid_min_max(&tuple, multi_base,
+ &xid_min, &xid_max,
+ &multi_min, &multi_max);
+ }
+ }
+
+ /* Write new header */
+ new_hdr = (PageHeader) new_page;
+ *new_hdr = *hdr;
+ new_hdr->pd_lower = SizeOfPageHeaderData + maxoff * sizeof(ItemIdData);
+
+ if (toast)
+ special_fits = BLCKSZ - new_hdr->pd_lower - occupied_space >=
+ sizeof(ToastPageSpecialData);
+ else
+ special_fits = BLCKSZ - new_hdr->pd_lower - occupied_space >=
+ sizeof(HeapPageSpecialData);
+
+ double_xmax = !special_fits ||
+ !xids_fit_page(xid_min, xid_max, multi_min, multi_max);
+
+ if (!double_xmax)
+ {
+ Assert(xid_max == InvalidTransactionId || xid_max >= xid_min);
+ Assert(multi_max == InvalidMultiXactId || multi_max >= multi_min);
+
+ heap_page_set_base(new_page,
+ xid_min, xid_max,
+ multi_min, multi_max,
+ &xid_base, &multi_base,
+ toast);
+
+ HeapPageSetPruneXid(new_page, new_hdr->pd_prune_xid, toast);
+ }
+ else
+ {
+ /* No space for special area, switch to "double xmax" format */
+ elog(DEBUG2, "convert heap page %u of relation \"%s\" to double xmax format",
+ blkno, RelationGetRelationName(rel));
+
+ if (try_double_xmax)
+ {
+ xid_base = InvalidTransactionId;
+ multi_base = InvalidMultiXactId;
+ }
+ else
+ {
+ repack_heap_tuples(rel, page, buf, blkno, true);
+ return;
+ }
+ }
+
+ /* Copy ItemIds with an offset */
+ memcpy((char *) new_page + SizeOfPageHeaderData,
+ (char *) page + SizeOfPageHeaderData,
+ hdr->pd_lower - SizeOfPageHeaderData);
+
+ /* Move live tuples */
+ upper = new_hdr->pd_special;
+ for (idx = 0; idx < nitems; idx++)
+ {
+ HeapTupleHeader tuple;
+ ItemId lp;
+
+ itemPtr = &items[idx];
+ lp = PageGetItemId(new_page, itemPtr->offsetindex + 1);
+ upper -= itemPtr->alignedlen;
+ occupied_space -= itemPtr->alignedlen;
+
+ memcpy((char *) new_page + upper,
+ (char *) page + itemPtr->itemoff,
+ itemPtr->alignedlen);
+
+ tuple = (HeapTupleHeader) (((char *) new_page) + upper);
+
+ convert_heap_tuple_xids(tuple, xid_base, multi_base, double_xmax);
+
+ lp->lp_off = upper;
+ }
+
+ Assert(occupied_space == 0);
+
+ new_hdr->pd_upper = upper;
+ if (new_hdr->pd_lower > new_hdr->pd_upper)
+ elog(ERROR, "cannot convert block %u of relation \"%s\"",
+ blkno, RelationGetRelationName(rel));
+
+ memcpy(page, new_page, BLCKSZ);
+}
diff --git a/src/backend/storage/buffer/meson.build b/src/backend/storage/buffer/meson.build
index 56a59b5248..a099145872 100644
--- a/src/backend/storage/buffer/meson.build
+++ b/src/backend/storage/buffer/meson.build
@@ -3,5 +3,6 @@ backend_sources += files(
'buf_table.c',
'bufmgr.c',
'freelist.c',
+ 'heap_convert.c',
'localbuf.c',
)
diff --git a/src/backend/storage/ipc/procarray.c b/src/backend/storage/ipc/procarray.c
index 67c159a492..47589ec53b 100644
--- a/src/backend/storage/ipc/procarray.c
+++ b/src/backend/storage/ipc/procarray.c
@@ -67,7 +67,7 @@
#include "utils/rel.h"
#include "utils/snapmgr.h"
-#define UINT32_ACCESS_ONCE(var) ((uint32)(*((volatile uint32 *)&(var))))
+#define UINT64_ACCESS_ONCE(var) ((uint64)(*((volatile uint64 *)&(var))))
/* Our shared memory area */
typedef struct ProcArrayStruct
@@ -356,9 +356,6 @@ static inline void ProcArrayEndTransactionInternal(PGPROC *proc, TransactionId l
static void ProcArrayGroupClearXid(PGPROC *proc, TransactionId latestXid);
static void MaintainLatestCompletedXid(TransactionId latestXid);
static void MaintainLatestCompletedXidRecovery(TransactionId latestXid);
-
-static inline FullTransactionId FullXidRelativeTo(FullTransactionId rel,
- TransactionId xid);
static void GlobalVisUpdateApply(ComputeXidHorizonsResult *horizons);
/*
@@ -517,7 +514,8 @@ ProcArrayAdd(PGPROC *proc)
arrayP->pgprocnos[index] = proc->pgprocno;
proc->pgxactoff = index;
- ProcGlobal->xids[index] = proc->xid;
+ pg_atomic_write_u64(&ProcGlobal->xids[index],
+ pg_atomic_read_u64(&proc->xid));
ProcGlobal->subxidStates[index] = proc->subxidStatus;
ProcGlobal->statusFlags[index] = proc->statusFlags;
@@ -577,7 +575,7 @@ ProcArrayRemove(PGPROC *proc, TransactionId latestXid)
if (TransactionIdIsValid(latestXid))
{
- Assert(TransactionIdIsValid(ProcGlobal->xids[myoff]));
+ Assert(TransactionIdIsValid(pg_atomic_read_u64(&ProcGlobal->xids[myoff])));
/* Advance global latestCompletedXid while holding the lock */
MaintainLatestCompletedXid(latestXid);
@@ -585,17 +583,17 @@ ProcArrayRemove(PGPROC *proc, TransactionId latestXid)
/* Same with xactCompletionCount */
ShmemVariableCache->xactCompletionCount++;
- ProcGlobal->xids[myoff] = InvalidTransactionId;
+ pg_atomic_write_u64(&ProcGlobal->xids[myoff], InvalidTransactionId);
ProcGlobal->subxidStates[myoff].overflowed = false;
ProcGlobal->subxidStates[myoff].count = 0;
}
else
{
/* Shouldn't be trying to remove a live transaction here */
- Assert(!TransactionIdIsValid(ProcGlobal->xids[myoff]));
+ Assert(!TransactionIdIsValid(pg_atomic_read_u64(&(ProcGlobal->xids[myoff]))));
}
- Assert(!TransactionIdIsValid(ProcGlobal->xids[myoff]));
+ Assert(!TransactionIdIsValid(pg_atomic_read_u64(&(ProcGlobal->xids[myoff]))));
Assert(ProcGlobal->subxidStates[myoff].count == 0);
Assert(ProcGlobal->subxidStates[myoff].overflowed == false);
@@ -641,7 +639,6 @@ ProcArrayRemove(PGPROC *proc, TransactionId latestXid)
LWLockRelease(ProcArrayLock);
}
-
/*
* ProcArrayEndTransaction -- mark a transaction as no longer running
*
@@ -666,7 +663,7 @@ ProcArrayEndTransaction(PGPROC *proc, TransactionId latestXid)
* else is taking a snapshot. See discussion in
* src/backend/access/transam/README.
*/
- Assert(TransactionIdIsValid(proc->xid));
+ Assert(TransactionIdIsValid(pg_atomic_read_u64(&proc->xid)));
/*
* If we can immediately acquire ProcArrayLock, we clear our own XID
@@ -688,12 +685,12 @@ ProcArrayEndTransaction(PGPROC *proc, TransactionId latestXid)
* anyone else's calculation of a snapshot. We might change their
* estimate of global xmin, but that's OK.
*/
- Assert(!TransactionIdIsValid(proc->xid));
+ Assert(!TransactionIdIsValid(pg_atomic_read_u64(&proc->xid)));
Assert(proc->subxidStatus.count == 0);
Assert(!proc->subxidStatus.overflowed);
proc->lxid = InvalidLocalTransactionId;
- proc->xmin = InvalidTransactionId;
+ pg_atomic_write_u64(&proc->xmin, InvalidTransactionId);
/* be sure this is cleared in abort */
proc->delayChkptFlags = 0;
@@ -729,13 +726,14 @@ ProcArrayEndTransactionInternal(PGPROC *proc, TransactionId latestXid)
* processes' PGPROC entries.
*/
Assert(LWLockHeldByMeInMode(ProcArrayLock, LW_EXCLUSIVE));
- Assert(TransactionIdIsValid(ProcGlobal->xids[pgxactoff]));
- Assert(ProcGlobal->xids[pgxactoff] == proc->xid);
+ Assert(TransactionIdIsValid(pg_atomic_read_u64(&ProcGlobal->xids[pgxactoff])));
+ Assert(pg_atomic_read_u64(&ProcGlobal->xids[pgxactoff]) ==
+ pg_atomic_read_u64(&proc->xid));
- ProcGlobal->xids[pgxactoff] = InvalidTransactionId;
- proc->xid = InvalidTransactionId;
+ pg_atomic_write_u64(&ProcGlobal->xids[pgxactoff], InvalidTransactionId);
+ pg_atomic_write_u64(&proc->xid, InvalidTransactionId);
proc->lxid = InvalidLocalTransactionId;
- proc->xmin = InvalidTransactionId;
+ pg_atomic_write_u64(&proc->xmin, InvalidTransactionId);
/* be sure this is cleared in abort */
proc->delayChkptFlags = 0;
@@ -788,7 +786,7 @@ ProcArrayGroupClearXid(PGPROC *proc, TransactionId latestXid)
uint32 wakeidx;
/* We should definitely have an XID to clear. */
- Assert(TransactionIdIsValid(proc->xid));
+ Assert(TransactionIdIsValid(pg_atomic_read_u64(&proc->xid)));
/* Add ourselves to the list of processes needing a group XID clear. */
proc->procArrayGroupMember = true;
@@ -917,11 +915,11 @@ ProcArrayClearTransaction(PGPROC *proc)
pgxactoff = proc->pgxactoff;
- ProcGlobal->xids[pgxactoff] = InvalidTransactionId;
- proc->xid = InvalidTransactionId;
+ pg_atomic_write_u64(&ProcGlobal->xids[pgxactoff], InvalidTransactionId);
+ pg_atomic_write_u64(&proc->xid, InvalidTransactionId);
proc->lxid = InvalidLocalTransactionId;
- proc->xmin = InvalidTransactionId;
+ pg_atomic_write_u64(&proc->xmin, InvalidTransactionId);
proc->recoveryConflictPending = false;
Assert(!(proc->statusFlags & PROC_VACUUM_STATE_MASK));
@@ -965,8 +963,7 @@ MaintainLatestCompletedXid(TransactionId latestXid)
if (TransactionIdPrecedes(XidFromFullTransactionId(cur_latest), latestXid))
{
- ShmemVariableCache->latestCompletedXid =
- FullXidRelativeTo(cur_latest, latestXid);
+ ShmemVariableCache->latestCompletedXid = FullTransactionIdFromXid(latestXid);
}
Assert(IsBootstrapProcessingMode() ||
@@ -980,7 +977,6 @@ static void
MaintainLatestCompletedXidRecovery(TransactionId latestXid)
{
FullTransactionId cur_latest = ShmemVariableCache->latestCompletedXid;
- FullTransactionId rel;
Assert(AmStartupProcess() || !IsUnderPostmaster);
Assert(LWLockHeldByMe(ProcArrayLock));
@@ -990,14 +986,12 @@ MaintainLatestCompletedXidRecovery(TransactionId latestXid)
* latestCompletedXid to be initialized in recovery. But in recovery it's
* safe to access nextXid without a lock for the startup process.
*/
- rel = ShmemVariableCache->nextXid;
Assert(FullTransactionIdIsValid(ShmemVariableCache->nextXid));
if (!FullTransactionIdIsValid(cur_latest) ||
TransactionIdPrecedes(XidFromFullTransactionId(cur_latest), latestXid))
{
- ShmemVariableCache->latestCompletedXid =
- FullXidRelativeTo(rel, latestXid);
+ ShmemVariableCache->latestCompletedXid = FullTransactionIdFromXid(latestXid);
}
Assert(FullTransactionIdIsNormal(ShmemVariableCache->latestCompletedXid));
@@ -1375,7 +1369,7 @@ bool
TransactionIdIsInProgress(TransactionId xid)
{
static TransactionId *xids = NULL;
- static TransactionId *other_xids;
+ static pg_atomic_uint64 *other_xids;
XidCacheStatus *other_subxidstates;
int nxids = 0;
ProcArrayStruct *arrayP = procArray;
@@ -1471,7 +1465,7 @@ TransactionIdIsInProgress(TransactionId xid)
continue;
/* Fetch xid just once - see GetNewTransactionId */
- pxid = UINT32_ACCESS_ONCE(other_xids[pgxactoff]);
+ pxid = pg_atomic_read_u64(&(other_xids[pgxactoff]));
if (!TransactionIdIsValid(pxid))
continue;
@@ -1503,7 +1497,7 @@ TransactionIdIsInProgress(TransactionId xid)
for (j = pxids - 1; j >= 0; j--)
{
/* Fetch xid just once - see GetNewTransactionId */
- TransactionId cxid = UINT32_ACCESS_ONCE(proc->subxids.xids[j]);
+ TransactionId cxid = UINT64_ACCESS_ONCE(proc->subxids.xids[j]);
if (TransactionIdEquals(cxid, xid))
{
@@ -1588,7 +1582,7 @@ TransactionIdIsInProgress(TransactionId xid)
topxid = SubTransGetTopmostTransaction(xid);
Assert(TransactionIdIsValid(topxid));
if (!TransactionIdEquals(topxid, xid) &&
- pg_lfind32(topxid, xids, nxids))
+ pg_lfind64(topxid, xids, nxids))
return true;
cachedXidIsNotInProgress = xid;
@@ -1608,7 +1602,7 @@ TransactionIdIsActive(TransactionId xid)
{
bool result = false;
ProcArrayStruct *arrayP = procArray;
- TransactionId *other_xids = ProcGlobal->xids;
+ pg_atomic_uint64 *other_xids = ProcGlobal->xids;
int i;
/*
@@ -1627,7 +1621,7 @@ TransactionIdIsActive(TransactionId xid)
TransactionId pxid;
/* Fetch xid just once - see GetNewTransactionId */
- pxid = UINT32_ACCESS_ONCE(other_xids[i]);
+ pxid = pg_atomic_read_u64(&(other_xids[i]));
if (!TransactionIdIsValid(pxid))
continue;
@@ -1713,7 +1707,7 @@ ComputeXidHorizons(ComputeXidHorizonsResult *h)
ProcArrayStruct *arrayP = procArray;
TransactionId kaxmin;
bool in_recovery = RecoveryInProgress();
- TransactionId *other_xids = ProcGlobal->xids;
+ pg_atomic_uint64 *other_xids = ProcGlobal->xids;
/* inferred after ProcArrayLock is released */
h->catalog_oldest_nonremovable = InvalidTransactionId;
@@ -1729,7 +1723,8 @@ ComputeXidHorizons(ComputeXidHorizonsResult *h)
* additions.
*/
{
- TransactionId initial;
+ TransactionId initial,
+ xid;
initial = XidFromFullTransactionId(h->latest_completed);
Assert(TransactionIdIsValid(initial));
@@ -1751,8 +1746,9 @@ ComputeXidHorizons(ComputeXidHorizonsResult *h)
* definition, can't be any newer changes in the temp table than
* latestCompletedXid.
*/
- if (TransactionIdIsValid(MyProc->xid))
- h->temp_oldest_nonremovable = MyProc->xid;
+ xid = pg_atomic_read_u64(&MyProc->xid);
+ if (TransactionIdIsValid(xid))
+ h->temp_oldest_nonremovable = xid;
else
h->temp_oldest_nonremovable = initial;
}
@@ -1774,8 +1770,8 @@ ComputeXidHorizons(ComputeXidHorizonsResult *h)
TransactionId xmin;
/* Fetch xid just once - see GetNewTransactionId */
- xid = UINT32_ACCESS_ONCE(other_xids[index]);
- xmin = UINT32_ACCESS_ONCE(proc->xmin);
+ xid = pg_atomic_read_u64(&(other_xids[index]));
+ xmin = pg_atomic_read_u64(&proc->xmin);
/*
* Consider both the transaction's Xmin, and its Xid.
@@ -2150,8 +2146,8 @@ GetSnapshotDataReuse(Snapshot snapshot)
* requirement that concurrent GetSnapshotData() calls yield the same
* xmin.
*/
- if (!TransactionIdIsValid(MyProc->xmin))
- MyProc->xmin = TransactionXmin = snapshot->xmin;
+ if (!TransactionIdIsValid(pg_atomic_read_u64(&MyProc->xmin)))
+ pg_atomic_write_u64(&MyProc->xmin, TransactionXmin = snapshot->xmin);
RecentXmin = snapshot->xmin;
Assert(TransactionIdPrecedesOrEquals(TransactionXmin, RecentXmin));
@@ -2203,7 +2199,7 @@ Snapshot
GetSnapshotData(Snapshot snapshot)
{
ProcArrayStruct *arrayP = procArray;
- TransactionId *other_xids = ProcGlobal->xids;
+ pg_atomic_uint64 *other_xids = ProcGlobal->xids;
TransactionId xmin;
TransactionId xmax;
int count = 0;
@@ -2266,8 +2262,8 @@ GetSnapshotData(Snapshot snapshot)
latest_completed = ShmemVariableCache->latestCompletedXid;
mypgxactoff = MyProc->pgxactoff;
- myxid = other_xids[mypgxactoff];
- Assert(myxid == MyProc->xid);
+ myxid = pg_atomic_read_u64(&other_xids[mypgxactoff]);
+ Assert(myxid == pg_atomic_read_u64(&MyProc->xid));
oldestxid = ShmemVariableCache->oldestXid;
curXactCompletionCount = ShmemVariableCache->xactCompletionCount;
@@ -2301,7 +2297,7 @@ GetSnapshotData(Snapshot snapshot)
for (int pgxactoff = 0; pgxactoff < numProcs; pgxactoff++)
{
/* Fetch xid just once - see GetNewTransactionId */
- TransactionId xid = UINT32_ACCESS_ONCE(other_xids[pgxactoff]);
+ TransactionId xid = pg_atomic_read_u64(&(other_xids[pgxactoff]));
uint8 statusFlags;
Assert(allProcs[arrayP->pgprocnos[pgxactoff]].pgxactoff == pgxactoff);
@@ -2438,8 +2434,8 @@ GetSnapshotData(Snapshot snapshot)
replication_slot_xmin = procArray->replication_slot_xmin;
replication_slot_catalog_xmin = procArray->replication_slot_catalog_xmin;
- if (!TransactionIdIsValid(MyProc->xmin))
- MyProc->xmin = TransactionXmin = xmin;
+ if (!TransactionIdIsValid(pg_atomic_read_u64(&MyProc->xmin)))
+ pg_atomic_write_u64(&MyProc->xmin, TransactionXmin = xmin);
LWLockRelease(ProcArrayLock);
@@ -2451,12 +2447,7 @@ GetSnapshotData(Snapshot snapshot)
FullTransactionId def_vis_fxid_data;
FullTransactionId oldestfxid;
- /*
- * Converting oldestXid is only safe when xid horizon cannot advance,
- * i.e. holding locks. While we don't hold the lock anymore, all the
- * necessary data has been gathered with lock held.
- */
- oldestfxid = FullXidRelativeTo(latest_completed, oldestxid);
+ oldestfxid = FullTransactionIdFromXid(oldestxid);
/* apply vacuum_defer_cleanup_age */
def_vis_xid_data =
@@ -2479,8 +2470,8 @@ GetSnapshotData(Snapshot snapshot)
def_vis_xid =
TransactionIdOlder(replication_slot_catalog_xmin, def_vis_xid);
- def_vis_fxid = FullXidRelativeTo(latest_completed, def_vis_xid);
- def_vis_fxid_data = FullXidRelativeTo(latest_completed, def_vis_xid_data);
+ def_vis_fxid = FullTransactionIdFromXid(def_vis_xid);
+ def_vis_fxid_data = FullTransactionIdFromXid(def_vis_xid_data);
/*
* Check if we can increase upper bound. As a previous
@@ -2499,7 +2490,7 @@ GetSnapshotData(Snapshot snapshot)
/* See temp_oldest_nonremovable computation in ComputeXidHorizons() */
if (TransactionIdIsNormal(myxid))
GlobalVisTempRels.definitely_needed =
- FullXidRelativeTo(latest_completed, myxid);
+ FullTransactionIdFromXid(myxid);
else
{
GlobalVisTempRels.definitely_needed = latest_completed;
@@ -2606,7 +2597,7 @@ ProcArrayInstallImportedXmin(TransactionId xmin,
/*
* Likewise, let's just make real sure its xmin does cover us.
*/
- xid = UINT32_ACCESS_ONCE(proc->xmin);
+ xid = pg_atomic_read_u64(&proc->xmin);
if (!TransactionIdIsNormal(xid) ||
!TransactionIdPrecedesOrEquals(xid, xmin))
continue;
@@ -2617,7 +2608,7 @@ ProcArrayInstallImportedXmin(TransactionId xmin,
* GetSnapshotData first, we'll be overwriting a valid xmin here, so
* we don't check that.)
*/
- MyProc->xmin = TransactionXmin = xmin;
+ pg_atomic_write_u64(&MyProc->xmin, TransactionXmin = xmin);
result = true;
break;
@@ -2661,7 +2652,7 @@ ProcArrayInstallRestoredXmin(TransactionId xmin, PGPROC *proc)
* can't go backwards. Also, make sure it's running in the same database,
* so that the per-database xmin cannot go backwards.
*/
- xid = UINT32_ACCESS_ONCE(proc->xmin);
+ xid = pg_atomic_read_u64(&proc->xmin);
if (proc->databaseId == MyDatabaseId &&
TransactionIdIsNormal(xid) &&
TransactionIdPrecedesOrEquals(xid, xmin))
@@ -2670,7 +2661,7 @@ ProcArrayInstallRestoredXmin(TransactionId xmin, PGPROC *proc)
* Install xmin and propagate the statusFlags that affect how the
* value is interpreted by vacuum.
*/
- MyProc->xmin = TransactionXmin = xmin;
+ pg_atomic_write_u64(&MyProc->xmin, TransactionXmin = xmin);
MyProc->statusFlags = (MyProc->statusFlags & ~PROC_XMIN_FLAGS) |
(proc->statusFlags & PROC_XMIN_FLAGS);
ProcGlobal->statusFlags[MyProc->pgxactoff] = MyProc->statusFlags;
@@ -2721,7 +2712,7 @@ GetRunningTransactionData(void)
static RunningTransactionsData CurrentRunningXactsData;
ProcArrayStruct *arrayP = procArray;
- TransactionId *other_xids = ProcGlobal->xids;
+ pg_atomic_uint64 *other_xids = ProcGlobal->xids;
RunningTransactions CurrentRunningXacts = &CurrentRunningXactsData;
TransactionId latestCompletedXid;
TransactionId oldestRunningXid;
@@ -2780,7 +2771,7 @@ GetRunningTransactionData(void)
TransactionId xid;
/* Fetch xid just once - see GetNewTransactionId */
- xid = UINT32_ACCESS_ONCE(other_xids[index]);
+ xid = pg_atomic_read_u64(&(other_xids[index]));
/*
* We don't need to store transactions that don't have a TransactionId
@@ -2893,7 +2884,7 @@ TransactionId
GetOldestActiveTransactionId(void)
{
ProcArrayStruct *arrayP = procArray;
- TransactionId *other_xids = ProcGlobal->xids;
+ pg_atomic_uint64 *other_xids = ProcGlobal->xids;
TransactionId oldestRunningXid;
int index;
@@ -2919,7 +2910,7 @@ GetOldestActiveTransactionId(void)
TransactionId xid;
/* Fetch xid just once - see GetNewTransactionId */
- xid = UINT32_ACCESS_ONCE(other_xids[index]);
+ xid = pg_atomic_read_u64(&(other_xids[index]));
if (!TransactionIdIsNormal(xid))
continue;
@@ -3007,7 +2998,7 @@ GetOldestSafeDecodingTransactionId(bool catalogOnly)
*/
if (!recovery_in_progress)
{
- TransactionId *other_xids = ProcGlobal->xids;
+ pg_atomic_uint64 *other_xids = ProcGlobal->xids;
/*
* Spin over procArray collecting min(ProcGlobal->xids[i])
@@ -3017,7 +3008,7 @@ GetOldestSafeDecodingTransactionId(bool catalogOnly)
TransactionId xid;
/* Fetch xid just once - see GetNewTransactionId */
- xid = UINT32_ACCESS_ONCE(other_xids[index]);
+ xid = pg_atomic_read_u64(&(other_xids[index]));
if (!TransactionIdIsNormal(xid))
continue;
@@ -3212,7 +3203,7 @@ BackendXidGetPid(TransactionId xid)
{
int result = 0;
ProcArrayStruct *arrayP = procArray;
- TransactionId *other_xids = ProcGlobal->xids;
+ pg_atomic_uint64 *other_xids = ProcGlobal->xids;
int index;
if (xid == InvalidTransactionId) /* never match invalid xid */
@@ -3225,7 +3216,7 @@ BackendXidGetPid(TransactionId xid)
int pgprocno = arrayP->pgprocnos[index];
PGPROC *proc = &allProcs[pgprocno];
- if (other_xids[index] == xid)
+ if (pg_atomic_read_u64(&other_xids[index]) == xid)
{
result = proc->pid;
break;
@@ -3306,7 +3297,7 @@ GetCurrentVirtualXIDs(TransactionId limitXmin, bool excludeXmin0,
if (allDbs || proc->databaseId == MyDatabaseId)
{
/* Fetch xmin just once - might change on us */
- TransactionId pxmin = UINT32_ACCESS_ONCE(proc->xmin);
+ TransactionId pxmin = pg_atomic_read_u64(&proc->xmin);
if (excludeXmin0 && !TransactionIdIsValid(pxmin))
continue;
@@ -3401,7 +3392,7 @@ GetConflictingVirtualXIDs(TransactionId limitXmin, Oid dbOid)
proc->databaseId == dbOid)
{
/* Fetch xmin just once - can't change on us, but good coding */
- TransactionId pxmin = UINT32_ACCESS_ONCE(proc->xmin);
+ TransactionId pxmin = pg_atomic_read_u64(&proc->xmin);
/*
* We ignore an invalid pxmin because this means that backend has
@@ -3528,7 +3519,7 @@ MinimumActiveBackends(int min)
continue; /* do not count deleted entries */
if (proc == MyProc)
continue; /* do not count myself */
- if (proc->xid == InvalidTransactionId)
+ if (pg_atomic_read_u64(&proc->xid) == InvalidTransactionId)
continue; /* do not count if no XID assigned */
if (proc->pid == 0)
continue; /* do not count prepared xacts */
@@ -4108,17 +4099,13 @@ static void
GlobalVisUpdateApply(ComputeXidHorizonsResult *horizons)
{
GlobalVisSharedRels.maybe_needed =
- FullXidRelativeTo(horizons->latest_completed,
- horizons->shared_oldest_nonremovable);
+ FullTransactionIdFromXid(horizons->shared_oldest_nonremovable);
GlobalVisCatalogRels.maybe_needed =
- FullXidRelativeTo(horizons->latest_completed,
- horizons->catalog_oldest_nonremovable);
+ FullTransactionIdFromXid(horizons->catalog_oldest_nonremovable);
GlobalVisDataRels.maybe_needed =
- FullXidRelativeTo(horizons->latest_completed,
- horizons->data_oldest_nonremovable);
+ FullTransactionIdFromXid(horizons->data_oldest_nonremovable);
GlobalVisTempRels.maybe_needed =
- FullXidRelativeTo(horizons->latest_completed,
- horizons->temp_oldest_nonremovable);
+ FullTransactionIdFromXid(horizons->temp_oldest_nonremovable);
/*
* In longer running transactions it's possible that transactions we
@@ -4207,15 +4194,7 @@ GlobalVisTestIsRemovableXid(GlobalVisState *state, TransactionId xid)
{
FullTransactionId fxid;
- /*
- * Convert 32 bit argument to FullTransactionId. We can do so safely
- * because we know the xid has to, at the very least, be between
- * [oldestXid, nextXid), i.e. within 2 billion of xid. To avoid taking a
- * lock to determine either, we can just compare with
- * state->definitely_needed, which was based on those value at the time
- * the current snapshot was built.
- */
- fxid = FullXidRelativeTo(state->definitely_needed, xid);
+ fxid = FullTransactionIdFromXid(xid);
return GlobalVisTestIsRemovableFullXid(state, fxid);
}
@@ -4278,32 +4257,6 @@ GlobalVisCheckRemovableXid(Relation rel, TransactionId xid)
return GlobalVisTestIsRemovableXid(state, xid);
}
-/*
- * Convert a 32 bit transaction id into 64 bit transaction id, by assuming it
- * is within MaxTransactionId / 2 of XidFromFullTransactionId(rel).
- *
- * Be very careful about when to use this function. It can only safely be used
- * when there is a guarantee that xid is within MaxTransactionId / 2 xids of
- * rel. That e.g. can be guaranteed if the caller assures a snapshot is
- * held by the backend and xid is from a table (where vacuum/freezing ensures
- * the xid has to be within that range), or if xid is from the procarray and
- * prevents xid wraparound that way.
- */
-static inline FullTransactionId
-FullXidRelativeTo(FullTransactionId rel, TransactionId xid)
-{
- TransactionId rel_xid = XidFromFullTransactionId(rel);
-
- Assert(TransactionIdIsValid(xid));
- Assert(TransactionIdIsValid(rel_xid));
-
- /* not guaranteed to find issues, but likely to catch mistakes */
- AssertTransactionIdInAllowableRange(xid);
-
- return FullTransactionIdFromU64(U64FromFullTransactionId(rel)
- + (int32) (xid - rel_xid));
-}
-
/* ----------------------------------------------
* KnownAssignedTransactionIds sub-module
diff --git a/src/backend/storage/ipc/sinvaladt.c b/src/backend/storage/ipc/sinvaladt.c
index 59310b708f..bf712ba7ee 100644
--- a/src/backend/storage/ipc/sinvaladt.c
+++ b/src/backend/storage/ipc/sinvaladt.c
@@ -426,8 +426,8 @@ BackendIdGetTransactionIds(int backendID, TransactionId *xid, TransactionId *xmi
if (proc != NULL)
{
- *xid = proc->xid;
- *xmin = proc->xmin;
+ *xid = pg_atomic_read_u64(&proc->xid);
+ *xmin = pg_atomic_read_u64(&proc->xmin);
}
}
diff --git a/src/backend/storage/ipc/standby.c b/src/backend/storage/ipc/standby.c
index 99341c3e87..5f1bed8b56 100644
--- a/src/backend/storage/ipc/standby.c
+++ b/src/backend/storage/ipc/standby.c
@@ -37,7 +37,7 @@
#include "utils/timestamp.h"
/* User-settable GUC parameters */
-int vacuum_defer_cleanup_age;
+int64 vacuum_defer_cleanup_age;
int max_standby_archive_delay = 30 * 1000;
int max_standby_streaming_delay = 30 * 1000;
bool log_recovery_conflict_waits = false;
@@ -486,8 +486,8 @@ ResolveRecoveryConflictWithSnapshotFullXid(FullTransactionId latestRemovedFullXi
FullTransactionId nextXid = ReadNextFullTransactionId();
uint64 diff;
- diff = U64FromFullTransactionId(nextXid) -
- U64FromFullTransactionId(latestRemovedFullXid);
+ diff = XidFromFullTransactionId(nextXid) -
+ XidFromFullTransactionId(latestRemovedFullXid);
if (diff < MaxTransactionId / 2)
{
TransactionId latestRemovedXid;
diff --git a/src/backend/storage/lmgr/lmgr.c b/src/backend/storage/lmgr/lmgr.c
index 1043068bac..0aa3ae79ac 100644
--- a/src/backend/storage/lmgr/lmgr.c
+++ b/src/backend/storage/lmgr/lmgr.c
@@ -1163,10 +1163,18 @@ DescribeLockTag(StringInfo buf, const LOCKTAG *tag)
tag->locktag_field1);
break;
case LOCKTAG_TRANSACTION:
- appendStringInfo(buf,
- _("transaction %u"),
- tag->locktag_field1);
- break;
+ {
+ char xid_str[32];
+
+ /* make translatable string */
+ snprintf(xid_str, sizeof(xid_str), "%llu",
+ (unsigned long long)
+ (TransactionId) tag->locktag_field1 |
+ ((TransactionId) tag->locktag_field2 << 32));
+
+ appendStringInfo(buf, _("transaction %s"), xid_str);
+ break;
+ }
case LOCKTAG_VIRTUALTRANSACTION:
appendStringInfo(buf,
_("virtual transaction %d/%u"),
diff --git a/src/backend/storage/lmgr/lock.c b/src/backend/storage/lmgr/lock.c
index 3d1049cf75..68ce82ddf8 100644
--- a/src/backend/storage/lmgr/lock.c
+++ b/src/backend/storage/lmgr/lock.c
@@ -4060,7 +4060,7 @@ GetRunningTransactionLocks(int *nlocks)
{
PGPROC *proc = proclock->tag.myProc;
LOCK *lock = proclock->tag.myLock;
- TransactionId xid = proc->xid;
+ TransactionId xid = pg_atomic_read_u64(&proc->xid);
/*
* Don't record locks for transactions if we know they have
@@ -4689,7 +4689,7 @@ VirtualXactLock(VirtualTransactionId vxid, bool wait)
* so we won't save an XID of a different VXID. It doesn't matter whether
* we save this before or after setting up the primary lock table entry.
*/
- xid = proc->xid;
+ xid = pg_atomic_read_u64(&proc->xid);
/* Done with proc->fpLockBits */
LWLockRelease(&proc->fpInfoLock);
diff --git a/src/backend/storage/lmgr/predicate.c b/src/backend/storage/lmgr/predicate.c
index b71a60952d..529e01eb8f 100644
--- a/src/backend/storage/lmgr/predicate.c
+++ b/src/backend/storage/lmgr/predicate.c
@@ -334,9 +334,9 @@ static SlruCtlData SerialSlruCtlData;
#define SerialValue(slotno, xid) (*((SerCommitSeqNo *) \
(SerialSlruCtl->shared->page_buffer[slotno] + \
- ((((uint32) (xid)) % SERIAL_ENTRIESPERPAGE) * SERIAL_ENTRYSIZE))))
+ ((((uint64) (xid)) % SERIAL_ENTRIESPERPAGE) * SERIAL_ENTRYSIZE))))
-#define SerialPage(xid) (((uint32) (xid)) / SERIAL_ENTRIESPERPAGE)
+#define SerialPage(xid) ((int64) (((uint64) (xid)) / SERIAL_ENTRIESPERPAGE))
typedef struct SerialControlData
{
@@ -4078,7 +4078,7 @@ XidIsConcurrent(TransactionId xid)
if (TransactionIdFollowsOrEquals(xid, snap->xmax))
return true;
- return pg_lfind32(xid, snap->xip, snap->xcnt);
+ return pg_lfind64(xid, snap->xip, snap->xcnt);
}
bool
diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c
index 13fa07b0ff..7c81b1aa6a 100644
--- a/src/backend/storage/lmgr/proc.c
+++ b/src/backend/storage/lmgr/proc.c
@@ -205,7 +205,7 @@ InitProcGlobal(void)
* how hotly they are accessed.
*/
ProcGlobal->xids =
- (TransactionId *) ShmemAlloc(TotalProcs * sizeof(*ProcGlobal->xids));
+ (pg_atomic_uint64 *) ShmemAlloc(TotalProcs * sizeof(*ProcGlobal->xids));
MemSet(ProcGlobal->xids, 0, TotalProcs * sizeof(*ProcGlobal->xids));
ProcGlobal->subxidStates = (XidCacheStatus *) ShmemAlloc(TotalProcs * sizeof(*ProcGlobal->subxidStates));
MemSet(ProcGlobal->subxidStates, 0, TotalProcs * sizeof(*ProcGlobal->subxidStates));
@@ -214,6 +214,7 @@ InitProcGlobal(void)
for (i = 0; i < TotalProcs; i++)
{
+ pg_atomic_init_u64(&ProcGlobal->xids[i], 0);
/* Common initialization for all PGPROCs, regardless of type. */
/*
@@ -383,8 +384,8 @@ InitProcess(void)
MyProc->lxid = InvalidLocalTransactionId;
MyProc->fpVXIDLock = false;
MyProc->fpLocalTransactionId = InvalidLocalTransactionId;
- MyProc->xid = InvalidTransactionId;
- MyProc->xmin = InvalidTransactionId;
+ pg_atomic_init_u64(&MyProc->xid, InvalidTransactionId);
+ pg_atomic_init_u64(&MyProc->xmin, InvalidTransactionId);
MyProc->pid = MyProcPid;
/* backendId, databaseId and roleId will be filled in later */
MyProc->backendId = InvalidBackendId;
@@ -570,8 +571,8 @@ InitAuxiliaryProcess(void)
MyProc->lxid = InvalidLocalTransactionId;
MyProc->fpVXIDLock = false;
MyProc->fpLocalTransactionId = InvalidLocalTransactionId;
- MyProc->xid = InvalidTransactionId;
- MyProc->xmin = InvalidTransactionId;
+ pg_atomic_init_u64(&MyProc->xid, InvalidTransactionId);
+ pg_atomic_init_u64(&MyProc->xmin, InvalidTransactionId);
MyProc->backendId = InvalidBackendId;
MyProc->databaseId = InvalidOid;
MyProc->roleId = InvalidOid;
diff --git a/src/backend/storage/page/bufpage.c b/src/backend/storage/page/bufpage.c
index 8b617c7e79..198fdb87e6 100644
--- a/src/backend/storage/page/bufpage.c
+++ b/src/backend/storage/page/bufpage.c
@@ -21,11 +21,31 @@
#include "storage/checksum.h"
#include "utils/memdebug.h"
#include "utils/memutils.h"
+#include "utils/snapmgr.h"
/* GUC variable */
bool ignore_checksum_failure = false;
+/*
+ * HeapPageSpecialData used when pd_special == BLCKSZ. This is special format
+ * used when page with 32-bit xids doesn't fit HeapPageSpecialData. Then
+ * all xmin's are frozen (can do this for all live tuples after pg_upgrade),
+ * while 64-bit xmax is stored in both t_heap.t_xmin and t_heap.t_xmax.
+ * This is so-called "double xmax" format.
+ */
+static HeapPageSpecialData heapDoubleXmaxSpecialData =
+{
+ .pd_xid_base = MaxTransactionId,
+ .pd_multi_base = MaxTransactionId
+};
+HeapPageSpecial heapDoubleXmaxSpecial = &heapDoubleXmaxSpecialData;
+
+static ToastPageSpecialData toastDoubleXmaxSpecialData =
+{
+ .pd_xid_base = MaxTransactionId
+};
+ToastPageSpecial toastDoubleXmaxSpecial = &toastDoubleXmaxSpecialData;
/* ----------------------------------------------------------------
* Page support functions
@@ -432,15 +452,144 @@ PageRestoreTempPage(Page tempPage, Page oldPage)
}
/*
- * Tuple defrag support for PageRepairFragmentation and PageIndexMultiDelete
+ * Get minimum and maximum values of xid and multixact on "double xmax" page.
*/
-typedef struct itemIdCompactData
+static void
+heap_page_double_xmax_get_min_max(Page page,
+ TransactionId *xid_min,
+ TransactionId *xid_max,
+ MultiXactId *multi_min,
+ MultiXactId *multi_max)
{
- uint16 offsetindex; /* linp array index */
- int16 itemoff; /* page offset of item data */
- uint16 alignedlen; /* MAXALIGN(item data len) */
-} itemIdCompactData;
-typedef itemIdCompactData *itemIdCompact;
+ bool xid_found = false,
+ multi_found = false;
+ OffsetNumber offnum,
+ maxoff;
+
+ maxoff = PageGetMaxOffsetNumber(page);
+
+ for (offnum = FirstOffsetNumber;
+ offnum <= maxoff;
+ offnum = OffsetNumberNext(offnum))
+ {
+ ItemId itemid;
+ HeapTupleHeader htup;
+ TransactionId xmax;
+
+ itemid = PageGetItemId(page, offnum);
+
+ if (!ItemIdIsNormal(itemid))
+ continue;
+
+ htup = (HeapTupleHeader) PageGetItem(page, itemid);
+
+ xmax = HeapTupleHeaderGetDoubleXmax(htup);
+
+ if (!TransactionIdIsNormal(xmax))
+ continue;
+
+ if (!(htup->t_infomask & HEAP_XMAX_IS_MULTI))
+ {
+ if (!xid_found)
+ {
+ *xid_min = *xid_max = xmax;
+ xid_found = true;
+ }
+ else
+ {
+ *xid_min = Min(*xid_min, xmax);
+ *xid_max = Max(*xid_max, xmax);
+ }
+ }
+ else
+ {
+ if (!multi_found)
+ {
+ *multi_min = *multi_max = xmax;
+ multi_found = true;
+ }
+ else
+ {
+ *multi_min = Min(*multi_min, xmax);
+ *multi_max = Max(*multi_max, xmax);
+ }
+ }
+ }
+}
+
+/*
+ * Add special area to heap page, so convert from "double xmax" to normal
+ * format.
+ */
+static void
+heap_page_add_special_area(ItemIdCompact itemidbase, int nitems, Page page,
+ TransactionId xid_base, MultiXactId multi_base,
+ bool is_toast)
+{
+ char newPage[BLCKSZ];
+ PageHeader phdr = (PageHeader) page;
+ PageHeader new_phdr = (PageHeader) newPage;
+ Offset upper;
+ int i;
+
+ memcpy(newPage, page, phdr->pd_lower);
+
+ /* Add special area */
+ if (is_toast)
+ {
+ ToastPageSpecial special;
+
+ new_phdr->pd_special = PageGetPageSize(newPage) - sizeof(ToastPageSpecialData);
+ special = (ToastPageSpecial) ((Pointer) (newPage) + new_phdr->pd_special);
+ special->pd_xid_base = xid_base;
+ }
+ else
+ {
+ HeapPageSpecial special;
+
+ new_phdr->pd_special = PageGetPageSize(newPage) - sizeof(HeapPageSpecialData);
+ special = (HeapPageSpecial) ((Pointer) (newPage) + new_phdr->pd_special);
+ special->pd_xid_base = xid_base;
+ special->pd_multi_base = multi_base;
+ }
+
+ /* sort itemIdSortData array into decreasing itemoff order */
+ qsort((char *) itemidbase, nitems, sizeof(ItemIdCompactData),
+ itemoffcompare);
+
+ upper = new_phdr->pd_special;
+ for (i = 0; i < nitems; i++)
+ {
+ ItemIdCompact itemidptr = &itemidbase[i];
+ ItemId lp;
+ HeapTupleHeader old_htup;
+ HeapTupleHeader new_htup;
+ TransactionId xmax;
+
+ lp = PageGetItemId(page, itemidptr->offsetindex + 1);
+ old_htup = (HeapTupleHeader) PageGetItem(page, lp);
+ upper -= itemidptr->alignedlen;
+ memcpy((Pointer) newPage + upper,
+ (Pointer) page + itemidptr->itemoff,
+ itemidptr->alignedlen);
+ lp = PageGetItemId(newPage, itemidptr->offsetindex + 1);
+ lp->lp_off = upper;
+ new_htup = (HeapTupleHeader) PageGetItem(newPage, lp);
+
+ /* Convert xmax value */
+ new_htup->t_choice.t_heap.t_xmin = FrozenTransactionId;
+ xmax = HeapTupleHeaderGetDoubleXmax(old_htup);
+ if (!(new_htup->t_infomask & HEAP_XMAX_IS_MULTI))
+ new_htup->t_choice.t_heap.t_xmax = NormalTransactionIdToShort(xid_base, xmax);
+ else
+ new_htup->t_choice.t_heap.t_xmax = NormalTransactionIdToShort(multi_base, xmax);
+ }
+
+ new_phdr->pd_upper = upper;
+
+ memcpy(page, newPage, PageGetPageSize(newPage));
+ elog(DEBUG2, "convert heap page from double xmax to normal format");
+}
/*
* After removing or marking some line pointers unused, move the tuples to
@@ -471,21 +620,47 @@ typedef itemIdCompactData *itemIdCompact;
* Callers must ensure that nitems is > 0
*/
static void
-compactify_tuples(itemIdCompact itemidbase, int nitems, Page page, bool presorted)
+compactify_tuples(ItemIdCompact itemidbase, int nitems, Page page,
+ bool presorted, bool addspecial, bool is_toast)
{
PageHeader phdr = (PageHeader) page;
Offset upper;
Offset copy_tail;
Offset copy_head;
- itemIdCompact itemidptr;
+ ItemIdCompact itemidptr;
int i;
/* Code within will not work correctly if nitems == 0 */
Assert(nitems > 0);
- if (presorted)
+ /* Add special area to the heap page if possible */
+ if (addspecial)
{
+ TransactionId xid_min = FirstNormalTransactionId,
+ xid_max = FirstNormalTransactionId;
+ MultiXactId multi_min = FirstNormalTransactionId,
+ multi_max = FirstNormalTransactionId;
+ Assert(phdr->pd_special == PageGetPageSize(page));
+
+ heap_page_double_xmax_get_min_max(page, &xid_min, &xid_max,
+ &multi_min, &multi_max);
+
+ if (xid_max - xid_min < (TransactionId) (MaxShortTransactionId - FirstNormalTransactionId) &&
+ multi_max - multi_min < (TransactionId) (MaxShortTransactionId - FirstNormalTransactionId))
+ {
+ Assert(xid_min >= FirstNormalTransactionId);
+ Assert(multi_min >= FirstNormalTransactionId);
+ heap_page_add_special_area(itemidbase, nitems, page,
+ xid_min - FirstNormalTransactionId,
+ multi_min - FirstNormalTransactionId,
+ is_toast);
+ return;
+ }
+ }
+
+ if (presorted)
+ {
#ifdef USE_ASSERT_CHECKING
{
/*
@@ -696,14 +871,14 @@ compactify_tuples(itemIdCompact itemidbase, int nitems, Page page, bool presorte
* the line pointer array following array truncation.
*/
void
-PageRepairFragmentation(Page page)
+PageRepairFragmentation(Page page, bool is_toast)
{
Offset pd_lower = ((PageHeader) page)->pd_lower;
Offset pd_upper = ((PageHeader) page)->pd_upper;
Offset pd_special = ((PageHeader) page)->pd_special;
Offset last_offset;
- itemIdCompactData itemidbase[MaxHeapTuplesPerPage];
- itemIdCompact itemidptr;
+ ItemIdCompactData itemidbase[MaxHeapTuplesPerPage];
+ ItemIdCompact itemidptr;
ItemId lp;
int nline,
nstorage,
@@ -777,11 +952,30 @@ PageRepairFragmentation(Page page)
nstorage = itemidptr - itemidbase;
if (nstorage == 0)
{
+ if (pd_special == PageGetPageSize(page))
+ {
+ if (is_toast)
+ {
+ pd_special = PageGetPageSize(page) - sizeof(ToastPageSpecialData);
+ ((PageHeader) page)->pd_special = pd_special;
+ ToastPageGetSpecial(page)->pd_xid_base = 0;
+ }
+ else
+ {
+ pd_special = PageGetPageSize(page) - sizeof(HeapPageSpecialData);
+ ((PageHeader) page)->pd_special = pd_special;
+ HeapPageGetSpecial(page)->pd_xid_base = 0;
+ HeapPageGetSpecial(page)->pd_multi_base = 0;
+ }
+ }
+
/* Page is completely empty, so just reset it quickly */
((PageHeader) page)->pd_upper = pd_special;
}
else
{
+ bool addspecial = false;
+
/* Need to compact the page the hard way */
if (totallen > (Size) (pd_special - pd_lower))
ereport(ERROR,
@@ -789,7 +983,25 @@ PageRepairFragmentation(Page page)
errmsg("corrupted item lengths: total %u, available space %u",
(unsigned int) totallen, pd_special - pd_lower)));
- compactify_tuples(itemidbase, nstorage, page, presorted);
+ /*
+ * Try to add special area to the heap page if it has enough of free
+ * space.
+ */
+ if (pd_special == PageGetPageSize(page))
+ {
+ Size special_size,
+ actual_size;
+
+ special_size = is_toast ? sizeof(ToastPageSpecialData) :
+ sizeof(HeapPageSpecialData);
+ actual_size = (Size) (pd_special - pd_lower) - totallen;
+
+ if (actual_size >= special_size)
+ addspecial = true;
+ }
+
+ compactify_tuples(itemidbase, nstorage, page, presorted, addspecial,
+ is_toast);
}
if (finalusedlp != nline)
@@ -992,6 +1204,9 @@ PageGetHeapFreeSpace(Page page)
{
Size space;
+ if (HeapPageIsDoubleXmax(page))
+ return 0;
+
space = PageGetFreeSpace(page);
if (space > 0)
{
@@ -1165,9 +1380,9 @@ PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems)
Offset pd_upper = phdr->pd_upper;
Offset pd_special = phdr->pd_special;
Offset last_offset;
- itemIdCompactData itemidbase[MaxIndexTuplesPerPage];
+ ItemIdCompactData itemidbase[MaxIndexTuplesPerPage];
ItemIdData newitemids[MaxIndexTuplesPerPage];
- itemIdCompact itemidptr;
+ ItemIdCompact itemidptr;
ItemId lp;
int nline,
nused;
@@ -1275,7 +1490,12 @@ PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems)
/* and compactify the tuple data */
if (nused > 0)
- compactify_tuples(itemidbase, nused, page, presorted);
+ {
+ bool is_toast;
+
+ is_toast = BLCKSZ - pd_special == sizeof(ToastPageSpecialData);
+ compactify_tuples(itemidbase, nused, page, presorted, false, is_toast);
+ }
else
phdr->pd_upper = pd_special;
}
diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c
index 662b5b50db..9eebbe1d25 100644
--- a/src/backend/tcop/postgres.c
+++ b/src/backend/tcop/postgres.c
@@ -3786,15 +3786,13 @@ process_postgres_switches(int argc, char *argv[], GucContext ctx,
case 'm':
{
- unsigned long value;
- char *endptr;
+ char *endptr;
errno = 0;
- value = strtoul(optarg, &endptr, 0);
- start_mxid = value;
+ start_mxid = strtoull(optarg, &endptr, 0);
if (endptr == optarg || *endptr != '\0' || errno != 0 ||
- value != start_mxid) /* overflow */
+ !StartMultiXactIdIsValid(start_mxid))
{
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
@@ -3817,15 +3815,13 @@ process_postgres_switches(int argc, char *argv[], GucContext ctx,
case 'o':
{
- unsigned long value;
- char *endptr;
+ char *endptr;
errno = 0;
- value = strtoul(optarg, &endptr, 0);
- start_mxoff = value;
+ start_mxoff = strtoull(optarg, &endptr, 0);
if (endptr == optarg || *endptr != '\0' || errno != 0 ||
- value != start_mxoff) /* overflow */
+ !StartMultiXactOffsetIsValid(start_mxoff))
{
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
@@ -3890,15 +3886,13 @@ process_postgres_switches(int argc, char *argv[], GucContext ctx,
case 'x':
{
- unsigned long value;
- char *endptr;
+ char *endptr;
errno = 0;
- value = strtoul(optarg, &endptr, 0);
- start_xid = value;
+ start_xid = strtoull(optarg, &endptr, 0);
if (endptr == optarg || *endptr != '\0' || errno != 0 ||
- value != start_xid) /* overflow */
+ !StartTransactionIdIsValid(start_xid))
{
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
@@ -4085,7 +4079,6 @@ PostgresSingleUserMain(int argc, char *argv[],
PostgresMain(dbname, username);
}
-
/* ----------------------------------------------------------------
* PostgresMain
* postgres main loop -- all backends, interactive or otherwise loop here
diff --git a/src/backend/utils/adt/enum.c b/src/backend/utils/adt/enum.c
index 0cc7a6d8ad..04ca6ff16e 100644
--- a/src/backend/utils/adt/enum.c
+++ b/src/backend/utils/adt/enum.c
@@ -76,7 +76,7 @@ check_safe_enum_use(HeapTuple enumval_tup)
* Usually, a row would get hinted as committed when it's read or loaded
* into syscache; but just in case not, let's check the xmin directly.
*/
- xmin = HeapTupleHeaderGetXmin(enumval_tup->t_data);
+ xmin = HeapTupleGetXmin(enumval_tup);
if (!TransactionIdIsInProgress(xmin) &&
TransactionIdDidCommit(xmin))
return;
diff --git a/src/backend/utils/adt/jsonfuncs.c b/src/backend/utils/adt/jsonfuncs.c
index fd0d355789..78ffef1071 100644
--- a/src/backend/utils/adt/jsonfuncs.c
+++ b/src/backend/utils/adt/jsonfuncs.c
@@ -3661,6 +3661,7 @@ populate_recordset_record(PopulateRecordsetState *state, JsObject *obj)
tuple.t_len = HeapTupleHeaderGetDatumLength(tuphead);
ItemPointerSetInvalid(&(tuple.t_self));
tuple.t_tableOid = InvalidOid;
+ HeapTupleSetZeroBase(&tuple);
tuple.t_data = tuphead;
tuplestore_puttuple(state->tuple_store, &tuple);
diff --git a/src/backend/utils/adt/lockfuncs.c b/src/backend/utils/adt/lockfuncs.c
index 14151bc81c..6fbdfdfb82 100644
--- a/src/backend/utils/adt/lockfuncs.c
+++ b/src/backend/utils/adt/lockfuncs.c
@@ -77,7 +77,7 @@ VXIDGetDatum(BackendId bid, LocalTransactionId lxid)
* The representation is "<bid>/<lxid>", decimal and unsigned decimal
* respectively. Note that elog.c also knows how to format a vxid.
*/
- char vxidstr[32];
+ char vxidstr[64];
snprintf(vxidstr, sizeof(vxidstr), "%d/%llu", bid,
(unsigned long long) lxid);
@@ -291,7 +291,9 @@ pg_lock_status(PG_FUNCTION_ARGS)
break;
case LOCKTAG_TRANSACTION:
values[6] =
- TransactionIdGetDatum(instance->locktag.locktag_field1);
+ TransactionIdGetDatum(
+ (TransactionId) instance->locktag.locktag_field1 |
+ ((TransactionId) instance->locktag.locktag_field2 << 32));
nulls[1] = true;
nulls[2] = true;
nulls[3] = true;
@@ -303,7 +305,8 @@ pg_lock_status(PG_FUNCTION_ARGS)
break;
case LOCKTAG_VIRTUALTRANSACTION:
values[5] = VXIDGetDatum(instance->locktag.locktag_field1,
- instance->locktag.locktag_field2);
+ (TransactionId) instance->locktag.locktag_field2 |
+ ((TransactionId) instance->locktag.locktag_field3 << 32));
nulls[1] = true;
nulls[2] = true;
nulls[3] = true;
diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c
index eadd8464ff..2f89562dc4 100644
--- a/src/backend/utils/adt/pgstatfuncs.c
+++ b/src/backend/utils/adt/pgstatfuncs.c
@@ -15,6 +15,7 @@
#include "postgres.h"
#include "access/htup_details.h"
+#include "access/xact.h"
#include "access/xlog.h"
#include "access/xlogprefetcher.h"
#include "catalog/pg_authid.h"
diff --git a/src/backend/utils/adt/rowtypes.c b/src/backend/utils/adt/rowtypes.c
index db843a0fbf..5e08d02d64 100644
--- a/src/backend/utils/adt/rowtypes.c
+++ b/src/backend/utils/adt/rowtypes.c
@@ -327,6 +327,7 @@ record_out(PG_FUNCTION_ARGS)
tuple.t_len = HeapTupleHeaderGetDatumLength(rec);
ItemPointerSetInvalid(&(tuple.t_self));
tuple.t_tableOid = InvalidOid;
+ HeapTupleSetZeroBase(&tuple);
tuple.t_data = rec;
/*
@@ -694,6 +695,7 @@ record_send(PG_FUNCTION_ARGS)
tuple.t_len = HeapTupleHeaderGetDatumLength(rec);
ItemPointerSetInvalid(&(tuple.t_self));
tuple.t_tableOid = InvalidOid;
+ HeapTupleSetZeroBase(&tuple);
tuple.t_data = rec;
/*
@@ -844,10 +846,12 @@ record_cmp(FunctionCallInfo fcinfo)
tuple1.t_len = HeapTupleHeaderGetDatumLength(record1);
ItemPointerSetInvalid(&(tuple1.t_self));
tuple1.t_tableOid = InvalidOid;
+ HeapTupleSetZeroBase(&tuple1);
tuple1.t_data = record1;
tuple2.t_len = HeapTupleHeaderGetDatumLength(record2);
ItemPointerSetInvalid(&(tuple2.t_self));
tuple2.t_tableOid = InvalidOid;
+ HeapTupleSetZeroBase(&tuple2);
tuple2.t_data = record2;
/*
@@ -1089,10 +1093,12 @@ record_eq(PG_FUNCTION_ARGS)
ItemPointerSetInvalid(&(tuple1.t_self));
tuple1.t_tableOid = InvalidOid;
tuple1.t_data = record1;
+ HeapTupleSetZeroBase(&tuple1);
tuple2.t_len = HeapTupleHeaderGetDatumLength(record2);
ItemPointerSetInvalid(&(tuple2.t_self));
tuple2.t_tableOid = InvalidOid;
tuple2.t_data = record2;
+ HeapTupleSetZeroBase(&tuple2);
/*
* We arrange to look up the needed comparison info just once per series
@@ -1351,10 +1357,12 @@ record_image_cmp(FunctionCallInfo fcinfo)
ItemPointerSetInvalid(&(tuple1.t_self));
tuple1.t_tableOid = InvalidOid;
tuple1.t_data = record1;
+ HeapTupleSetZeroBase(&tuple1);
tuple2.t_len = HeapTupleHeaderGetDatumLength(record2);
ItemPointerSetInvalid(&(tuple2.t_self));
tuple2.t_tableOid = InvalidOid;
tuple2.t_data = record2;
+ HeapTupleSetZeroBase(&tuple2);
/*
* We arrange to look up the needed comparison info just once per series
@@ -1597,10 +1605,12 @@ record_image_eq(PG_FUNCTION_ARGS)
ItemPointerSetInvalid(&(tuple1.t_self));
tuple1.t_tableOid = InvalidOid;
tuple1.t_data = record1;
+ HeapTupleSetZeroBase(&tuple1);
tuple2.t_len = HeapTupleHeaderGetDatumLength(record2);
ItemPointerSetInvalid(&(tuple2.t_self));
tuple2.t_tableOid = InvalidOid;
tuple2.t_data = record2;
+ HeapTupleSetZeroBase(&tuple2);
/*
* We arrange to look up the needed comparison info just once per series
@@ -1800,6 +1810,7 @@ hash_record(PG_FUNCTION_ARGS)
ItemPointerSetInvalid(&(tuple.t_self));
tuple.t_tableOid = InvalidOid;
tuple.t_data = record;
+ HeapTupleSetZeroBase(&tuple);
/*
* We arrange to look up the needed hashing info just once per series of
@@ -1921,6 +1932,7 @@ hash_record_extended(PG_FUNCTION_ARGS)
ItemPointerSetInvalid(&(tuple.t_self));
tuple.t_tableOid = InvalidOid;
tuple.t_data = record;
+ HeapTupleSetZeroBase(&tuple);
/*
* We arrange to look up the needed hashing info just once per series of
diff --git a/src/backend/utils/adt/xid.c b/src/backend/utils/adt/xid.c
index e4b4952a28..056752cfaf 100644
--- a/src/backend/utils/adt/xid.c
+++ b/src/backend/utils/adt/xid.c
@@ -32,16 +32,16 @@ xidin(PG_FUNCTION_ARGS)
{
char *str = PG_GETARG_CSTRING(0);
- PG_RETURN_TRANSACTIONID((TransactionId) strtoul(str, NULL, 0));
+ PG_RETURN_TRANSACTIONID((TransactionId) strtou64(str, NULL, 0));
}
Datum
xidout(PG_FUNCTION_ARGS)
{
TransactionId transactionId = PG_GETARG_TRANSACTIONID(0);
- char *result = (char *) palloc(16);
+ char *result = (char *) palloc(32);
- snprintf(result, 16, "%lu", (unsigned long) transactionId);
+ snprintf(result, 32, "%llu", (unsigned long long) transactionId);
PG_RETURN_CSTRING(result);
}
@@ -52,8 +52,13 @@ Datum
xidrecv(PG_FUNCTION_ARGS)
{
StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
+ uint32 lo,
+ hi;
+
+ lo = (uint32) pq_getmsgint(buf, sizeof(TransactionId));
+ hi = (uint32) pq_getmsgint(buf, sizeof(TransactionId));
- PG_RETURN_TRANSACTIONID((TransactionId) pq_getmsgint(buf, sizeof(TransactionId)));
+ PG_RETURN_TRANSACTIONID((uint64) lo + ((uint64) hi << 32));
}
/*
@@ -64,9 +69,15 @@ xidsend(PG_FUNCTION_ARGS)
{
TransactionId arg1 = PG_GETARG_TRANSACTIONID(0);
StringInfoData buf;
+ uint32 lo,
+ hi;
+
+ lo = (uint32) (arg1 & 0xFFFFFFFF);
+ hi = (uint32) (arg1 >> 32);
pq_begintypsend(&buf);
- pq_sendint32(&buf, arg1);
+ pq_sendint(&buf, lo, sizeof(lo));
+ pq_sendint(&buf, hi, sizeof(hi));
PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
}
@@ -105,9 +116,9 @@ xid_age(PG_FUNCTION_ARGS)
/* Permanent XIDs are always infinitely old */
if (!TransactionIdIsNormal(xid))
- PG_RETURN_INT32(INT_MAX);
+ PG_RETURN_INT64(PG_INT8_MAX);
- PG_RETURN_INT32((int32) (now - xid));
+ PG_RETURN_INT64((int64) (now - xid));
}
/*
@@ -120,9 +131,9 @@ mxid_age(PG_FUNCTION_ARGS)
MultiXactId now = ReadNextMultiXactId();
if (!MultiXactIdIsValid(xid))
- PG_RETURN_INT32(INT_MAX);
+ PG_RETURN_INT64(PG_INT8_MAX);
- PG_RETURN_INT32((int32) (now - xid));
+ PG_RETURN_INT64((int64) (now - xid));
}
/*
@@ -184,7 +195,7 @@ xid8in(PG_FUNCTION_ARGS)
{
char *str = PG_GETARG_CSTRING(0);
- PG_RETURN_FULLTRANSACTIONID(FullTransactionIdFromU64(strtou64(str, NULL, 0)));
+ PG_RETURN_FULLTRANSACTIONID(FullTransactionIdFromXid(strtou64(str, NULL, 0)));
}
Datum
@@ -193,7 +204,7 @@ xid8out(PG_FUNCTION_ARGS)
FullTransactionId fxid = PG_GETARG_FULLTRANSACTIONID(0);
char *result = (char *) palloc(21);
- snprintf(result, 21, UINT64_FORMAT, U64FromFullTransactionId(fxid));
+ snprintf(result, 21, UINT64_FORMAT, XidFromFullTransactionId(fxid));
PG_RETURN_CSTRING(result);
}
@@ -204,7 +215,7 @@ xid8recv(PG_FUNCTION_ARGS)
uint64 value;
value = (uint64) pq_getmsgint64(buf);
- PG_RETURN_FULLTRANSACTIONID(FullTransactionIdFromU64(value));
+ PG_RETURN_FULLTRANSACTIONID(FullTransactionIdFromXid(value));
}
Datum
@@ -214,7 +225,7 @@ xid8send(PG_FUNCTION_ARGS)
StringInfoData buf;
pq_begintypsend(&buf);
- pq_sendint64(&buf, (uint64) U64FromFullTransactionId(arg1));
+ pq_sendint64(&buf, (uint64) XidFromFullTransactionId(arg1));
PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
}
diff --git a/src/backend/utils/adt/xid8funcs.c b/src/backend/utils/adt/xid8funcs.c
index d8e40b3b96..6257d985d6 100644
--- a/src/backend/utils/adt/xid8funcs.c
+++ b/src/backend/utils/adt/xid8funcs.c
@@ -80,8 +80,7 @@ typedef struct
* It is an ERROR if the xid is in the future. Otherwise, returns true if
* the transaction is still new enough that we can determine whether it
* committed and false otherwise. If *extracted_xid is not NULL, it is set
- * to the low 32 bits of the transaction ID (i.e. the actual XID, without the
- * epoch).
+ * to the actual transaction ID.
*
* The caller must hold XactTruncationLock since it's dealing with arbitrary
* XIDs, and must continue to hold it until it's done with any clog lookups
@@ -90,15 +89,10 @@ typedef struct
static bool
TransactionIdInRecentPast(FullTransactionId fxid, TransactionId *extracted_xid)
{
- uint32 xid_epoch = EpochFromFullTransactionId(fxid);
TransactionId xid = XidFromFullTransactionId(fxid);
- uint32 now_epoch;
- TransactionId now_epoch_next_xid;
FullTransactionId now_fullxid;
now_fullxid = ReadNextFullTransactionId();
- now_epoch_next_xid = XidFromFullTransactionId(now_fullxid);
- now_epoch = EpochFromFullTransactionId(now_fullxid);
if (extracted_xid != NULL)
*extracted_xid = xid;
@@ -115,7 +109,7 @@ TransactionIdInRecentPast(FullTransactionId fxid, TransactionId *extracted_xid)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("transaction ID %llu is in the future",
- (unsigned long long) U64FromFullTransactionId(fxid))));
+ (unsigned long long) XidFromFullTransactionId(fxid))));
/*
* ShmemVariableCache->oldestClogXid is protected by XactTruncationLock,
@@ -127,48 +121,15 @@ TransactionIdInRecentPast(FullTransactionId fxid, TransactionId *extracted_xid)
Assert(LWLockHeldByMe(XactTruncationLock));
/*
- * If the transaction ID has wrapped around, it's definitely too old to
- * determine the commit status. Otherwise, we can compare it to
- * ShmemVariableCache->oldestClogXid to determine whether the relevant
- * CLOG entry is guaranteed to still exist.
+ * We compare xid to ShmemVariableCache->oldestClogXid to determine
+ * whether the relevant CLOG entry is guaranteed to still exist.
*/
- if (xid_epoch + 1 < now_epoch
- || (xid_epoch + 1 == now_epoch && xid < now_epoch_next_xid)
- || TransactionIdPrecedes(xid, ShmemVariableCache->oldestClogXid))
+ if (TransactionIdPrecedes(xid, ShmemVariableCache->oldestClogXid))
return false;
return true;
}
-/*
- * Convert a TransactionId obtained from a snapshot held by the caller to a
- * FullTransactionId. Use next_fxid as a reference FullTransactionId, so that
- * we can compute the high order bits. It must have been obtained by the
- * caller with ReadNextFullTransactionId() after the snapshot was created.
- */
-static FullTransactionId
-widen_snapshot_xid(TransactionId xid, FullTransactionId next_fxid)
-{
- TransactionId next_xid = XidFromFullTransactionId(next_fxid);
- uint32 epoch = EpochFromFullTransactionId(next_fxid);
-
- /* Special transaction ID. */
- if (!TransactionIdIsNormal(xid))
- return FullTransactionIdFromEpochAndXid(0, xid);
-
- /*
- * The 64 bit result must be <= next_fxid, since next_fxid hadn't been
- * issued yet when the snapshot was created. Every TransactionId in the
- * snapshot must therefore be from the same epoch as next_fxid, or the
- * epoch before. We know this because next_fxid is never allow to get
- * more than one epoch ahead of the TransactionIds in any snapshot.
- */
- if (xid > next_xid)
- epoch--;
-
- return FullTransactionIdFromEpochAndXid(epoch, xid);
-}
-
/*
* txid comparator for qsort/bsearch
*/
@@ -295,12 +256,12 @@ parse_snapshot(const char *str)
char *endp;
StringInfo buf;
- xmin = FullTransactionIdFromU64(strtou64(str, &endp, 10));
+ xmin = FullTransactionIdFromXid(strtou64(str, &endp, 10));
if (*endp != ':')
goto bad_format;
str = endp + 1;
- xmax = FullTransactionIdFromU64(strtou64(str, &endp, 10));
+ xmax = FullTransactionIdFromXid(strtou64(str, &endp, 10));
if (*endp != ':')
goto bad_format;
str = endp + 1;
@@ -318,7 +279,7 @@ parse_snapshot(const char *str)
while (*str != '\0')
{
/* read next value */
- val = FullTransactionIdFromU64(strtou64(str, &endp, 10));
+ val = FullTransactionIdFromXid(strtou64(str, &endp, 10));
str = endp;
/* require the input to be in order */
@@ -397,7 +358,6 @@ pg_current_snapshot(PG_FUNCTION_ARGS)
uint32 nxip,
i;
Snapshot cur;
- FullTransactionId next_fxid = ReadNextFullTransactionId();
cur = GetActiveSnapshot();
if (cur == NULL)
@@ -415,11 +375,11 @@ pg_current_snapshot(PG_FUNCTION_ARGS)
snap = palloc(PG_SNAPSHOT_SIZE(nxip));
/* fill */
- snap->xmin = widen_snapshot_xid(cur->xmin, next_fxid);
- snap->xmax = widen_snapshot_xid(cur->xmax, next_fxid);
+ snap->xmin = FullTransactionIdFromXid(cur->xmin);
+ snap->xmax = FullTransactionIdFromXid(cur->xmax);
snap->nxip = nxip;
for (i = 0; i < nxip; i++)
- snap->xip[i] = widen_snapshot_xid(cur->xip[i], next_fxid);
+ snap->xip[i] = FullTransactionIdFromXid(cur->xip[i]);
/*
* We want them guaranteed to be in ascending order. This also removes
@@ -467,16 +427,16 @@ pg_snapshot_out(PG_FUNCTION_ARGS)
initStringInfo(&str);
appendStringInfo(&str, UINT64_FORMAT ":",
- U64FromFullTransactionId(snap->xmin));
+ XidFromFullTransactionId(snap->xmin));
appendStringInfo(&str, UINT64_FORMAT ":",
- U64FromFullTransactionId(snap->xmax));
+ XidFromFullTransactionId(snap->xmax));
for (i = 0; i < snap->nxip; i++)
{
if (i > 0)
appendStringInfoChar(&str, ',');
appendStringInfo(&str, UINT64_FORMAT,
- U64FromFullTransactionId(snap->xip[i]));
+ XidFromFullTransactionId(snap->xip[i]));
}
PG_RETURN_CSTRING(str.data);
@@ -505,8 +465,8 @@ pg_snapshot_recv(PG_FUNCTION_ARGS)
if (nxip < 0 || nxip > PG_SNAPSHOT_MAX_NXIP)
goto bad_format;
- xmin = FullTransactionIdFromU64((uint64) pq_getmsgint64(buf));
- xmax = FullTransactionIdFromU64((uint64) pq_getmsgint64(buf));
+ xmin = FullTransactionIdFromXid((uint64) pq_getmsgint64(buf));
+ xmax = FullTransactionIdFromXid((uint64) pq_getmsgint64(buf));
if (!FullTransactionIdIsValid(xmin) ||
!FullTransactionIdIsValid(xmax) ||
FullTransactionIdPrecedes(xmax, xmin))
@@ -519,7 +479,7 @@ pg_snapshot_recv(PG_FUNCTION_ARGS)
for (i = 0; i < nxip; i++)
{
FullTransactionId cur =
- FullTransactionIdFromU64((uint64) pq_getmsgint64(buf));
+ FullTransactionIdFromXid((uint64) pq_getmsgint64(buf));
if (FullTransactionIdPrecedes(cur, last) ||
FullTransactionIdPrecedes(cur, xmin) ||
@@ -564,10 +524,10 @@ pg_snapshot_send(PG_FUNCTION_ARGS)
pq_begintypsend(&buf);
pq_sendint32(&buf, snap->nxip);
- pq_sendint64(&buf, (int64) U64FromFullTransactionId(snap->xmin));
- pq_sendint64(&buf, (int64) U64FromFullTransactionId(snap->xmax));
+ pq_sendint64(&buf, (int64) XidFromFullTransactionId(snap->xmin));
+ pq_sendint64(&buf, (int64) XidFromFullTransactionId(snap->xmax));
for (i = 0; i < snap->nxip; i++)
- pq_sendint64(&buf, (int64) U64FromFullTransactionId(snap->xip[i]));
+ pq_sendint64(&buf, (int64) XidFromFullTransactionId(snap->xip[i]));
PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
}
@@ -655,8 +615,7 @@ pg_snapshot_xip(PG_FUNCTION_ARGS)
* Report the status of a recent transaction ID, or null for wrapped,
* truncated away or otherwise too old XIDs.
*
- * The passed epoch-qualified xid is treated as a normal xid, not a
- * multixact id.
+ * The passed xid is treated as a normal xid, not a multixact id.
*
* If it points to a committed subxact the result is the subxact status even
* though the parent xact may still be in progress or may have aborted.
diff --git a/src/backend/utils/cache/catcache.c b/src/backend/utils/cache/catcache.c
index 38e943fab2..b69aa01cf9 100644
--- a/src/backend/utils/cache/catcache.c
+++ b/src/backend/utils/cache/catcache.c
@@ -1839,6 +1839,7 @@ CatalogCacheCreateEntry(CatCache *cache, HeapTuple ntp, Datum *arguments,
memcpy((char *) ct->tuple.t_data,
(const char *) dtp->t_data,
dtp->t_len);
+ HeapTupleCopyBase(&ct->tuple, dtp);
MemoryContextSwitchTo(oldcxt);
if (dtp != ntp)
diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c
index 00dc0f2403..233530cf35 100644
--- a/src/backend/utils/cache/relcache.c
+++ b/src/backend/utils/cache/relcache.c
@@ -2307,8 +2307,7 @@ RelationReloadIndexInfo(Relation relation)
relation->rd_index->indislive = index->indislive;
/* Copy xmin too, as that is needed to make sense of indcheckxmin */
- HeapTupleHeaderSetXmin(relation->rd_indextuple->t_data,
- HeapTupleHeaderGetXmin(tuple->t_data));
+ HeapTupleSetXmin(relation->rd_indextuple, HeapTupleGetXmin(tuple));
ReleaseSysCache(tuple);
}
diff --git a/src/backend/utils/fmgr/fmgr.c b/src/backend/utils/fmgr/fmgr.c
index a9dd068095..34fe6bea83 100644
--- a/src/backend/utils/fmgr/fmgr.c
+++ b/src/backend/utils/fmgr/fmgr.c
@@ -540,7 +540,7 @@ lookup_C_func(HeapTuple procedureTuple)
NULL);
if (entry == NULL)
return NULL; /* no such entry */
- if (entry->fn_xmin == HeapTupleHeaderGetRawXmin(procedureTuple->t_data) &&
+ if (entry->fn_xmin == HeapTupleGetRawXmin(procedureTuple) &&
ItemPointerEquals(&entry->fn_tid, &procedureTuple->t_self))
return entry; /* OK */
return NULL; /* entry is out of date */
@@ -576,7 +576,7 @@ record_C_func(HeapTuple procedureTuple,
HASH_ENTER,
&found);
/* OID is already filled in */
- entry->fn_xmin = HeapTupleHeaderGetRawXmin(procedureTuple->t_data);
+ entry->fn_xmin = HeapTupleGetRawXmin(procedureTuple);
entry->fn_tid = procedureTuple->t_self;
entry->user_fn = user_fn;
entry->inforec = inforec;
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 12a1f30f7c..1414b1aef2 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -857,6 +857,14 @@ build_guc_variables(void)
num_vars++;
}
+ for (i = 0; ConfigureNamesInt64[i].gen.name; i++)
+ {
+ struct config_int64 *conf = &ConfigureNamesInt64[i];
+
+ conf->gen.vartype = PGC_INT64;
+ num_vars++;
+ }
+
for (i = 0; ConfigureNamesReal[i].gen.name; i++)
{
struct config_real *conf = &ConfigureNamesReal[i];
@@ -897,6 +905,9 @@ build_guc_variables(void)
for (i = 0; ConfigureNamesInt[i].gen.name; i++)
guc_vars[num_vars++] = &ConfigureNamesInt[i].gen;
+ for (i = 0; ConfigureNamesInt64[i].gen.name; i++)
+ guc_vars[num_vars++] = &ConfigureNamesInt64[i].gen;
+
for (i = 0; ConfigureNamesReal[i].gen.name; i++)
guc_vars[num_vars++] = &ConfigureNamesReal[i].gen;
diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c
index 8869eb4112..5a7ef089f4 100644
--- a/src/backend/utils/misc/guc_tables.c
+++ b/src/backend/utils/misc/guc_tables.c
@@ -2438,74 +2438,6 @@ struct config_int ConfigureNamesInt[] =
NULL, NULL, NULL
},
- {
- {"vacuum_freeze_min_age", PGC_USERSET, CLIENT_CONN_STATEMENT,
- gettext_noop("Minimum age at which VACUUM should freeze a table row."),
- NULL
- },
- &vacuum_freeze_min_age,
- 50000000, 0, 1000000000,
- NULL, NULL, NULL
- },
-
- {
- {"vacuum_freeze_table_age", PGC_USERSET, CLIENT_CONN_STATEMENT,
- gettext_noop("Age at which VACUUM should scan whole table to freeze tuples."),
- NULL
- },
- &vacuum_freeze_table_age,
- 150000000, 0, 2000000000,
- NULL, NULL, NULL
- },
-
- {
- {"vacuum_multixact_freeze_min_age", PGC_USERSET, CLIENT_CONN_STATEMENT,
- gettext_noop("Minimum age at which VACUUM should freeze a MultiXactId in a table row."),
- NULL
- },
- &vacuum_multixact_freeze_min_age,
- 5000000, 0, 1000000000,
- NULL, NULL, NULL
- },
-
- {
- {"vacuum_multixact_freeze_table_age", PGC_USERSET, CLIENT_CONN_STATEMENT,
- gettext_noop("Multixact age at which VACUUM should scan whole table to freeze tuples."),
- NULL
- },
- &vacuum_multixact_freeze_table_age,
- 150000000, 0, 2000000000,
- NULL, NULL, NULL
- },
-
- {
- {"vacuum_defer_cleanup_age", PGC_SIGHUP, REPLICATION_PRIMARY,
- gettext_noop("Number of transactions by which VACUUM and HOT cleanup should be deferred, if any."),
- NULL
- },
- &vacuum_defer_cleanup_age,
- 0, 0, 1000000, /* see ComputeXidHorizons */
- NULL, NULL, NULL
- },
- {
- {"vacuum_failsafe_age", PGC_USERSET, CLIENT_CONN_STATEMENT,
- gettext_noop("Age at which VACUUM should trigger failsafe to avoid a wraparound outage."),
- NULL
- },
- &vacuum_failsafe_age,
- 1600000000, 0, 2100000000,
- NULL, NULL, NULL
- },
- {
- {"vacuum_multixact_failsafe_age", PGC_USERSET, CLIENT_CONN_STATEMENT,
- gettext_noop("Multixact age at which VACUUM should trigger failsafe to avoid a wraparound outage."),
- NULL
- },
- &vacuum_multixact_failsafe_age,
- 1600000000, 0, 2100000000,
- NULL, NULL, NULL
- },
-
/*
* See also CheckRequiredParameterValues() if this parameter changes
*/
@@ -3116,28 +3048,6 @@ struct config_int ConfigureNamesInt[] =
50, 0, INT_MAX,
NULL, NULL, NULL
},
- {
- /* see varsup.c for why this is PGC_POSTMASTER not PGC_SIGHUP */
- {"autovacuum_freeze_max_age", PGC_POSTMASTER, AUTOVACUUM,
- gettext_noop("Age at which to autovacuum a table to prevent transaction ID wraparound."),
- NULL
- },
- &autovacuum_freeze_max_age,
-
- /* see vacuum_failsafe_age if you change the upper-limit value. */
- 200000000, 100000, 2000000000,
- NULL, NULL, NULL
- },
- {
- /* see multixact.c for why this is PGC_POSTMASTER not PGC_SIGHUP */
- {"autovacuum_multixact_freeze_max_age", PGC_POSTMASTER, AUTOVACUUM,
- gettext_noop("Multixact age at which to autovacuum a table to prevent multixact wraparound."),
- NULL
- },
- &autovacuum_multixact_freeze_max_age,
- 400000000, 10000, 2000000000,
- NULL, NULL, NULL
- },
{
/* see max_connections */
{"autovacuum_max_workers", PGC_POSTMASTER, AUTOVACUUM,
@@ -3415,6 +3325,96 @@ struct config_int ConfigureNamesInt[] =
struct config_int64 ConfigureNamesInt64[] =
{
+ {
+ {"vacuum_freeze_min_age", PGC_USERSET, CLIENT_CONN_STATEMENT,
+ gettext_noop("Minimum age at which VACUUM should freeze a table row."),
+ NULL
+ },
+ &vacuum_freeze_min_age,
+ INT64CONST(50000000), INT64CONST(0), INT64CONST(0x7FFFFFFFFFFFFFFF),
+ NULL, NULL, NULL
+ },
+
+ {
+ {"vacuum_freeze_table_age", PGC_USERSET, CLIENT_CONN_STATEMENT,
+ gettext_noop("Age at which VACUUM should scan whole table to freeze tuples."),
+ NULL
+ },
+ &vacuum_freeze_table_age,
+ INT64CONST(150000000), INT64CONST(0), INT64CONST(0x7FFFFFFFFFFFFFFF),
+ NULL, NULL, NULL
+ },
+
+ {
+ {"vacuum_multixact_freeze_min_age", PGC_USERSET, CLIENT_CONN_STATEMENT,
+ gettext_noop("Minimum age at which VACUUM should freeze a MultiXactId in a table row."),
+ NULL
+ },
+ &vacuum_multixact_freeze_min_age,
+ INT64CONST(5000000), INT64CONST(0), INT64CONST(0x7FFFFFFFFFFFFFFF),
+ NULL, NULL, NULL
+ },
+
+ {
+ {"vacuum_multixact_freeze_table_age", PGC_USERSET, CLIENT_CONN_STATEMENT,
+ gettext_noop("Multixact age at which VACUUM should scan whole table to freeze tuples."),
+ NULL
+ },
+ &vacuum_multixact_freeze_table_age,
+ INT64CONST(150000000), INT64CONST(0), INT64CONST(0x7FFFFFFFFFFFFFFF),
+ NULL, NULL, NULL
+ },
+
+ {
+ {"vacuum_defer_cleanup_age", PGC_SIGHUP, REPLICATION_PRIMARY,
+ gettext_noop("Number of transactions by which VACUUM and HOT cleanup should be deferred, if any."),
+ NULL
+ },
+ &vacuum_defer_cleanup_age,
+ INT64CONST(0), INT64CONST(0), INT64CONST(1000000), /* see ComputeXidHorizons */
+ NULL, NULL, NULL
+ },
+ {
+ {"vacuum_failsafe_age", PGC_USERSET, CLIENT_CONN_STATEMENT,
+ gettext_noop("Age at which VACUUM should trigger failsafe to avoid a wraparound outage."),
+ NULL
+ },
+ &vacuum_failsafe_age,
+ INT64CONST(1600000000), INT64CONST(0), INT64CONST(2100000000),
+ NULL, NULL, NULL
+ },
+ {
+ {"vacuum_multixact_failsafe_age", PGC_USERSET, CLIENT_CONN_STATEMENT,
+ gettext_noop("Multixact age at which VACUUM should trigger failsafe to avoid a wraparound outage."),
+ NULL
+ },
+ &vacuum_multixact_failsafe_age,
+ INT64CONST(1600000000), INT64CONST(0), INT64CONST(2100000000),
+ NULL, NULL, NULL
+ },
+ {
+ /* see varsup.c for why this is PGC_POSTMASTER not PGC_SIGHUP */
+ {"autovacuum_freeze_max_age", PGC_POSTMASTER, AUTOVACUUM,
+ gettext_noop("Age at which to autovacuum a table to prevent transaction ID wraparound."),
+ NULL
+ },
+ &autovacuum_freeze_max_age,
+
+ /* see vacuum_failsafe_age if you change the upper-limit value. */
+ INT64CONST(10000000000), INT64CONST(100000), INT64CONST(0x7FFFFFFFFFFFFFFF),
+ NULL, NULL, NULL
+ },
+ {
+ /* see multixact.c for why this is PGC_POSTMASTER not PGC_SIGHUP */
+ {"autovacuum_multixact_freeze_max_age", PGC_POSTMASTER, AUTOVACUUM,
+ gettext_noop("Multixact age at which to autovacuum a table to prevent multixact wraparound."),
+ NULL
+ },
+ &autovacuum_multixact_freeze_max_age,
+ INT64CONST(20000000000), INT64CONST(10000), INT64CONST(0x7FFFFFFFFFFFFFFF),
+ NULL, NULL, NULL
+ },
+
/* End-of-list marker */
{
{NULL, 0, 0, NULL, NULL}, NULL, 0, 0, 0, NULL, NULL, NULL
diff --git a/src/backend/utils/misc/help_config.c b/src/backend/utils/misc/help_config.c
index 61c83f3590..19a316ec99 100644
--- a/src/backend/utils/misc/help_config.c
+++ b/src/backend/utils/misc/help_config.c
@@ -33,6 +33,7 @@ typedef union
struct config_bool _bool;
struct config_real real;
struct config_int integer;
+ struct config_int64 integer8;
struct config_string string;
struct config_enum _enum;
} mixedStruct;
@@ -107,7 +108,12 @@ printMixedStruct(mixedStruct *structToPrint)
structToPrint->integer.min,
structToPrint->integer.max);
break;
-
+ case PGC_INT64:
+ printf("INT64\t%lld\t%lld\t%lld\t",
+ (long long) structToPrint->integer8.reset_val,
+ (long long) structToPrint->integer8.min,
+ (long long) structToPrint->integer8.max);
+ break;
case PGC_REAL:
printf("REAL\t%g\t%g\t%g\t",
structToPrint->real.reset_val,
diff --git a/src/backend/utils/misc/pg_controldata.c b/src/backend/utils/misc/pg_controldata.c
index 4ab4a0a701..ffffdeccbb 100644
--- a/src/backend/utils/misc/pg_controldata.c
+++ b/src/backend/utils/misc/pg_controldata.c
@@ -165,7 +165,7 @@ pg_control_checkpoint(PG_FUNCTION_ARGS)
nulls[5] = false;
values[6] = CStringGetTextDatum(psprintf("%llu",
- (unsigned long long) U64FromFullTransactionId(ControlFile->checkPointCopy.nextXid)));
+ (unsigned long long) XidFromFullTransactionId(ControlFile->checkPointCopy.nextXid)));
nulls[6] = false;
values[7] = ObjectIdGetDatum(ControlFile->checkPointCopy.nextOid);
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index 868d21c351..d51f1d1863 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -647,9 +647,9 @@
#autovacuum_vacuum_insert_scale_factor = 0.2 # fraction of inserts over table
# size before insert vacuum
#autovacuum_analyze_scale_factor = 0.1 # fraction of table size before analyze
-#autovacuum_freeze_max_age = 200000000 # maximum XID age before forced vacuum
+#autovacuum_freeze_max_age = 10000000000 # maximum XID age before forced vacuum
# (change requires restart)
-#autovacuum_multixact_freeze_max_age = 400000000 # maximum multixact age
+#autovacuum_multixact_freeze_max_age = 20000000000 # maximum multixact age
# before forced vacuum
# (change requires restart)
#autovacuum_vacuum_cost_delay = 2ms # default vacuum cost delay for
diff --git a/src/backend/utils/sort/tuplesortvariants.c b/src/backend/utils/sort/tuplesortvariants.c
index afa5bdbf04..3ee2d869e6 100644
--- a/src/backend/utils/sort/tuplesortvariants.c
+++ b/src/backend/utils/sort/tuplesortvariants.c
@@ -1163,11 +1163,16 @@ writetup_cluster(Tuplesortstate *state, LogicalTape *tape, SortTuple *stup)
{
TuplesortPublic *base = TuplesortstateGetPublic(state);
HeapTuple tuple = (HeapTuple) stup->tuple;
- unsigned int tuplen = tuple->t_len + sizeof(ItemPointerData) + sizeof(int);
+ unsigned int tuplen = tuple->t_len +
+ sizeof(ItemPointerData) +
+ 2 * sizeof(TransactionId) + /* tuple xmin, xmax */
+ sizeof(int);
/* We need to store t_self, but not other fields of HeapTupleData */
LogicalTapeWrite(tape, &tuplen, sizeof(tuplen));
LogicalTapeWrite(tape, &tuple->t_self, sizeof(ItemPointerData));
+ LogicalTapeWrite(tape, &tuple->t_xmin, sizeof(TransactionId));
+ LogicalTapeWrite(tape, &tuple->t_xmax, sizeof(TransactionId));
LogicalTapeWrite(tape, tuple->t_data, tuple->t_len);
if (base->sortopt & TUPLESORT_RANDOMACCESS) /* need trailing length word? */
LogicalTapeWrite(tape, &tuplen, sizeof(tuplen));
@@ -1179,7 +1184,10 @@ readtup_cluster(Tuplesortstate *state, SortTuple *stup,
{
TuplesortPublic *base = TuplesortstateGetPublic(state);
TuplesortClusterArg *arg = (TuplesortClusterArg *) base->arg;
- unsigned int t_len = tuplen - sizeof(ItemPointerData) - sizeof(int);
+ unsigned int t_len = tuplen -
+ sizeof(ItemPointerData) -
+ 2 * sizeof(TransactionId) - /* tuple xmin, xmax */
+ sizeof(int);
HeapTuple tuple = (HeapTuple) tuplesort_readtup_alloc(state,
t_len + HEAPTUPLESIZE);
@@ -1187,6 +1195,8 @@ readtup_cluster(Tuplesortstate *state, SortTuple *stup,
tuple->t_data = (HeapTupleHeader) ((char *) tuple + HEAPTUPLESIZE);
tuple->t_len = t_len;
LogicalTapeReadExact(tape, &tuple->t_self, sizeof(ItemPointerData));
+ LogicalTapeReadExact(tape, &tuple->t_xmin, sizeof(TransactionId));
+ LogicalTapeReadExact(tape, &tuple->t_xmax, sizeof(TransactionId));
/* We don't currently bother to reconstruct t_tableOid */
tuple->t_tableOid = InvalidOid;
/* Read in the tuple body */
diff --git a/src/backend/utils/time/combocid.c b/src/backend/utils/time/combocid.c
index 6613dc0534..f673624f68 100644
--- a/src/backend/utils/time/combocid.c
+++ b/src/backend/utils/time/combocid.c
@@ -101,12 +101,13 @@ static CommandId GetRealCmax(CommandId combocid);
*/
CommandId
-HeapTupleHeaderGetCmin(HeapTupleHeader tup)
+HeapTupleGetCmin(HeapTuple tuple)
{
+ HeapTupleHeader tup = tuple->t_data;
CommandId cid = HeapTupleHeaderGetRawCommandId(tup);
Assert(!(tup->t_infomask & HEAP_MOVED));
- Assert(TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmin(tup)));
+ Assert(TransactionIdIsCurrentTransactionId(HeapTupleGetXmin(tuple)));
if (tup->t_infomask & HEAP_COMBOCID)
return GetRealCmin(cid);
@@ -115,8 +116,9 @@ HeapTupleHeaderGetCmin(HeapTupleHeader tup)
}
CommandId
-HeapTupleHeaderGetCmax(HeapTupleHeader tup)
+HeapTupleGetCmax(HeapTuple tuple)
{
+ HeapTupleHeader tup = tuple->t_data;
CommandId cid = HeapTupleHeaderGetRawCommandId(tup);
Assert(!(tup->t_infomask & HEAP_MOVED));
@@ -128,7 +130,7 @@ HeapTupleHeaderGetCmax(HeapTupleHeader tup)
* things too much.
*/
Assert(CritSectionCount > 0 ||
- TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetUpdateXid(tup)));
+ TransactionIdIsCurrentTransactionId(HeapTupleGetUpdateXidAny(tuple)));
if (tup->t_infomask & HEAP_COMBOCID)
return GetRealCmax(cid);
@@ -150,7 +152,7 @@ HeapTupleHeaderGetCmax(HeapTupleHeader tup)
* changes the tuple in shared buffers.
*/
void
-HeapTupleHeaderAdjustCmax(HeapTupleHeader tup,
+HeapTupleHeaderAdjustCmax(HeapTuple tup,
CommandId *cmax,
bool *iscombo)
{
@@ -160,10 +162,10 @@ HeapTupleHeaderAdjustCmax(HeapTupleHeader tup,
* Test for HeapTupleHeaderXminCommitted() first, because it's cheaper
* than a TransactionIdIsCurrentTransactionId call.
*/
- if (!HeapTupleHeaderXminCommitted(tup) &&
- TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmin(tup)))
+ if (!HeapTupleHeaderXminCommitted(tup->t_data) &&
+ TransactionIdIsCurrentTransactionId(HeapTupleGetRawXmin(tup)))
{
- CommandId cmin = HeapTupleHeaderGetCmin(tup);
+ CommandId cmin = HeapTupleGetCmin(tup);
*cmax = GetComboCommandId(cmin, *cmax);
*iscombo = true;
diff --git a/src/backend/utils/time/snapmgr.c b/src/backend/utils/time/snapmgr.c
index f76bab65cb..2be450529f 100644
--- a/src/backend/utils/time/snapmgr.c
+++ b/src/backend/utils/time/snapmgr.c
@@ -950,15 +950,15 @@ SnapshotResetXmin(void)
if (pairingheap_is_empty(&RegisteredSnapshots))
{
- MyProc->xmin = InvalidTransactionId;
+ pg_atomic_write_u64(&MyProc->xmin, InvalidTransactionId);
return;
}
minSnapshot = pairingheap_container(SnapshotData, ph_node,
pairingheap_first(&RegisteredSnapshots));
- if (TransactionIdPrecedes(MyProc->xmin, minSnapshot->xmin))
- MyProc->xmin = minSnapshot->xmin;
+ if (TransactionIdPrecedes(pg_atomic_read_u64(&MyProc->xmin), minSnapshot->xmin))
+ pg_atomic_write_u64(&MyProc->xmin, minSnapshot->xmin);
}
/*
@@ -1111,7 +1111,7 @@ AtEOXact_Snapshot(bool isCommit, bool resetXmin)
if (resetXmin)
SnapshotResetXmin();
- Assert(resetXmin || MyProc->xmin == 0);
+ Assert(resetXmin || pg_atomic_read_u64(&MyProc->xmin) == 0);
}
@@ -1176,8 +1176,9 @@ ExportSnapshot(Snapshot snapshot)
* Generate file path for the snapshot. We start numbering of snapshots
* inside the transaction from 1.
*/
- snprintf(path, sizeof(path), SNAPSHOT_EXPORT_DIR "/%08X-%08X-%d",
- MyProc->backendId, MyProc->lxid, list_length(exportedSnapshots) + 1);
+ snprintf(path, sizeof(path), SNAPSHOT_EXPORT_DIR "/%08X-%08X%08X-%d",
+ MyProc->backendId, (uint32) (MyProc->lxid >> 32),
+ (uint32) MyProc->lxid, list_length(exportedSnapshots) + 1);
/*
* Copy the snapshot into TopTransactionContext, add it to the
@@ -1353,7 +1354,7 @@ parseXidFromText(const char *prefix, char **s, const char *filename)
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("invalid snapshot data in file \"%s\"", filename)));
ptr += prefixlen;
- if (sscanf(ptr, "%u", &val) != 1)
+ if (sscanf(ptr, "%" INT64_MODIFIER "u", &val) != 1)
ereport(ERROR,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("invalid snapshot data in file \"%s\"", filename)));
@@ -1378,7 +1379,7 @@ parseVxidFromText(const char *prefix, char **s, const char *filename,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("invalid snapshot data in file \"%s\"", filename)));
ptr += prefixlen;
- if (sscanf(ptr, "%d/%u", &vxid->backendId, &vxid->localTransactionId) != 2)
+ if (sscanf(ptr, "%d/%" INT64_MODIFIER "u", &vxid->backendId, &vxid->localTransactionId) != 2)
ereport(ERROR,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("invalid snapshot data in file \"%s\"", filename)));
@@ -1837,7 +1838,7 @@ TransactionIdLimitedForOldSnapshots(TransactionId recentXmin,
*/
if (old_snapshot_threshold == 0)
{
- if (TransactionIdPrecedes(latest_xmin, MyProc->xmin)
+ if (TransactionIdPrecedes(latest_xmin, pg_atomic_read_u64(&MyProc->xmin))
&& TransactionIdFollows(latest_xmin, xlimit))
xlimit = latest_xmin;
@@ -2321,7 +2322,7 @@ XidInMVCCSnapshot(TransactionId xid, Snapshot snapshot)
if (!snapshot->suboverflowed)
{
/* we have full data, so search subxip */
- if (pg_lfind32(xid, snapshot->subxip, snapshot->subxcnt))
+ if (pg_lfind64(xid, snapshot->subxip, snapshot->subxcnt))
return true;
/* not there, fall through to search xip[] */
@@ -2343,7 +2344,7 @@ XidInMVCCSnapshot(TransactionId xid, Snapshot snapshot)
return false;
}
- if (pg_lfind32(xid, snapshot->xip, snapshot->xcnt))
+ if (pg_lfind64(xid, snapshot->xip, snapshot->xcnt))
return true;
}
else
@@ -2377,7 +2378,7 @@ XidInMVCCSnapshot(TransactionId xid, Snapshot snapshot)
* indeterminate xid. We don't know whether it's top level or subxact
* but it doesn't matter. If it's present, the xid is visible.
*/
- if (pg_lfind32(xid, snapshot->subxip, snapshot->subxcnt))
+ if (pg_lfind64(xid, snapshot->subxip, snapshot->subxcnt))
return true;
}
diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c
index c56e1ac86c..f2812e1a1e 100644
--- a/src/bin/initdb/initdb.c
+++ b/src/bin/initdb/initdb.c
@@ -1305,7 +1305,7 @@ bootstrap_template1(void)
escape_quotes_bki(username));
/* relfrozenxid must not be less than FirstNormalTransactionId */
- sprintf(buf, "%u", Max(start_xid, 3));
+ sprintf(buf, "%llu", (unsigned long long) Max(start_xid, 3));
bki_lines = replace_token(bki_lines, "RECENTXMIN",
buf);
@@ -1328,13 +1328,13 @@ bootstrap_template1(void)
unsetenv("PGCLIENTENCODING");
snprintf(cmd, sizeof(cmd),
- "\"%s\" --boot -X %d %s %s %u %s %u %s %u %s %s %s",
+ "\"%s\" --boot -X %d %s %s %llu %s %llu %s %llu %s %s %s",
backend_exec,
wal_segment_size_mb * (1024 * 1024),
data_checksums ? "-k" : "",
- "-m", start_mxid,
- "-o", start_mxoff,
- "-x", start_xid,
+ "-m", (unsigned long long) start_mxid,
+ "-o", (unsigned long long) start_mxoff,
+ "-x", (unsigned long long) start_xid,
boot_options, extra_options,
debug ? "-d 5" : "");
@@ -2177,15 +2177,18 @@ usage(const char *progname)
printf(_(" --discard-caches set debug_discard_caches=1\n"));
printf(_(" -L DIRECTORY where to find the input files\n"));
printf(_(" -m, --multixact-id=START_MXID\n"
- " set initial database cluster multixact id\n"));
+ " set initial database cluster multixact id\n"
+ " max value is 2^62-1\n"));
printf(_(" -n, --no-clean do not clean up after errors\n"));
printf(_(" -N, --no-sync do not wait for changes to be written safely to disk\n"));
printf(_(" --no-instructions do not print instructions for next steps\n"));
printf(_(" -o, --multixact-offset=START_MXOFF\n"
- " set initial database cluster multixact offset\n"));
+ " set initial database cluster multixact offset\n"
+ " max value is 2^62-1"));
printf(_(" -s, --show show internal settings\n"));
printf(_(" -S, --sync-only only sync database files to disk, then exit\n"));
- printf(_(" -x, --xid=START_XID set initial database cluster xid\n"));
+ printf(_(" -x, --xid=START_XID set initial database cluster xid\n"
+ " max value is 2^62-1\n"));
printf(_("\nOther options:\n"));
printf(_(" -V, --version output version information, then exit\n"));
printf(_(" -?, --help show this help, then exit\n"));
@@ -2723,13 +2726,16 @@ initialize_data_directory(void)
setup_config();
if (start_mxid != 0)
- printf(_("selecting initial multixact id ... %u\n"), start_mxid);
+ printf(_("selecting initial multixact id ... %llu\n"),
+ (unsigned long long) start_mxid);
if (start_mxoff != 0)
- printf(_("selecting initial multixact offset ... %u\n"), start_mxoff);
+ printf(_("selecting initial multixact offset ... %llu\n"),
+ (unsigned long long) start_mxoff);
if (start_xid != 0)
- printf(_("selecting initial xid ... %u\n"), start_xid);
+ printf(_("selecting initial xid ... %llu\n"),
+ (unsigned long long) start_xid);
/* Bootstrap template1 */
bootstrap_template1();
@@ -2747,11 +2753,11 @@ initialize_data_directory(void)
fflush(stdout);
snprintf(cmd, sizeof(cmd),
- "\"%s\" %s %s %s %u %s %u %s %u template1 >%s",
+ "\"%s\" %s %s %s %llu %s %llu %s %llu template1 >%s",
backend_exec, backend_options, extra_options,
- "-m", start_mxid,
- "-o", start_mxoff,
- "-x", start_xid,
+ "-m", (unsigned long long) start_mxid,
+ "-o", (unsigned long long) start_mxoff,
+ "-x", (unsigned long long) start_xid,
DEVNULL);
PG_CMD_OPEN;
@@ -2918,15 +2924,13 @@ main(int argc, char *argv[])
break;
case 'm':
{
- unsigned long value;
- char *endptr;
+ char *endptr;
errno = 0;
- value = strtoul(optarg, &endptr, 0);
- start_mxid = value;
+ start_mxid = strtoull(optarg, &endptr, 0);
if (endptr == optarg || *endptr != '\0' || errno != 0 ||
- value != start_mxid) /* overflow */
+ !StartMultiXactIdIsValid(start_mxid))
{
pg_log_error("invalid initial database cluster multixact id");
exit(1);
@@ -2951,15 +2955,13 @@ main(int argc, char *argv[])
break;
case 'o':
{
- unsigned long value;
- char *endptr;
+ char *endptr;
errno = 0;
- value = strtoul(optarg, &endptr, 0);
- start_mxoff = value;
+ start_mxoff = strtoull(optarg, &endptr, 0);
if (endptr == optarg || *endptr != '\0' || errno != 0 ||
- value != start_mxoff) /* overflow */
+ !StartMultiXactOffsetIsValid(start_mxoff))
{
pg_log_error("invalid initial database cluster multixact offset");
exit(1);
@@ -3038,15 +3040,13 @@ main(int argc, char *argv[])
break;
case 'x':
{
- unsigned long value;
- char *endptr;
+ char *endptr;
errno = 0;
- value = strtoul(optarg, &endptr, 0);
- start_xid = value;
+ start_xid = strtoull(optarg, &endptr, 0);
if (endptr == optarg || *endptr != '\0' || errno != 0 ||
- value != start_xid) /* overflow */
+ !StartTransactionIdIsValid(start_xid))
{
pg_log_error("invalid value for initial database cluster xid");
exit(1);
diff --git a/src/bin/initdb/t/001_initdb.pl b/src/bin/initdb/t/001_initdb.pl
index 57a68091d4..d595e45bd3 100644
--- a/src/bin/initdb/t/001_initdb.pl
+++ b/src/bin/initdb/t/001_initdb.pl
@@ -149,28 +149,28 @@ command_fails(
# Set non-standard initial mxid/mxoff/xid.
command_fails_like(
- [ 'initdb', '-m', '4294967296', $datadir ],
+ [ 'initdb', '-m', '9223372036854775807', $datadir ],
qr/initdb: error: invalid initial database cluster multixact id/,
'fails for invalid initial database cluster multixact id');
command_fails_like(
- [ 'initdb', '-o', '4294967296', $datadir ],
+ [ 'initdb', '-o', '9223372036854775807', $datadir ],
qr/initdb: error: invalid initial database cluster multixact offset/,
'fails for invalid initial database cluster multixact offset');
command_fails_like(
- [ 'initdb', '-x', '4294967296', $datadir ],
+ [ 'initdb', '-x', '9223372036854775807', $datadir ],
qr/initdb: error: invalid value for initial database cluster xid/,
'fails for invalid initial database cluster xid');
command_fails_like(
- [ 'initdb', '-m', '0x100000000', $datadir ],
+ [ 'initdb', '-m', '0x10000000000000000', $datadir ],
qr/initdb: error: invalid initial database cluster multixact id/,
'fails for invalid initial database cluster multixact id');
command_fails_like(
- [ 'initdb', '-o', '0x100000000', $datadir ],
+ [ 'initdb', '-o', '0x10000000000000000', $datadir ],
qr/initdb: error: invalid initial database cluster multixact offset/,
'fails for invalid initial database cluster multixact offset');
command_fails_like(
- [ 'initdb', '-x', '0x100000000', $datadir ],
+ [ 'initdb', '-x', '0x10000000000000000', $datadir ],
qr/initdb: error: invalid value for initial database cluster xid/,
'fails for invalid initial database cluster xid');
diff --git a/src/bin/pg_amcheck/t/004_verify_heapam.pl b/src/bin/pg_amcheck/t/004_verify_heapam.pl
index 8050811105..6563eb798b 100644
--- a/src/bin/pg_amcheck/t/004_verify_heapam.pl
+++ b/src/bin/pg_amcheck/t/004_verify_heapam.pl
@@ -9,6 +9,8 @@ use PostgreSQL::Test::Utils;
use Test::More;
+use Data::Dumper;
+
# This regression test demonstrates that the pg_amcheck binary correctly
# identifies specific kinds of corruption within pages. To test this, we need
# a mechanism to create corrupt pages with predictable, repeatable corruption.
@@ -85,6 +87,62 @@ use Test::More;
use constant HEAPTUPLE_PACK_CODE => 'LLLSSSSSCCLLCCCCCCCCCCllLL';
use constant HEAPTUPLE_PACK_LENGTH => 58; # Total size
+use constant HEAPPAGE_SPECIAL_PACK_CODE => 'QQ';
+use constant HEAPPAGE_SPECIAL_PACK_LENGTH => 16;
+use constant HEAPPAGE_SIZE => 8192;
+
+# Some #define constants from access/htup_details.h for use while corrupting.
+use constant HEAP_HASNULL => 0x0001;
+use constant HEAP_XMAX_LOCK_ONLY => 0x0080;
+use constant HEAP_XMIN_COMMITTED => 0x0100;
+use constant HEAP_XMIN_INVALID => 0x0200;
+use constant HEAP_XMAX_COMMITTED => 0x0400;
+use constant HEAP_XMAX_INVALID => 0x0800;
+use constant HEAP_NATTS_MASK => 0x07FF;
+use constant HEAP_XMAX_IS_MULTI => 0x1000;
+use constant HEAP_KEYS_UPDATED => 0x2000;
+
+use constant FIRST_NORMAL_TRANSACTION_ID => 3;
+
+# Read page special data
+sub read_special_data
+{
+ my ($fh, $offset) = @_;
+ my ($buffer, %special);
+
+ $offset -= $offset % HEAPPAGE_SIZE;
+ $offset += HEAPPAGE_SIZE - HEAPPAGE_SPECIAL_PACK_LENGTH;
+
+ sysseek($fh, $offset, 0)
+ or BAIL_OUT("sysseek failed: $!");
+ defined(sysread($fh, $buffer, HEAPPAGE_SPECIAL_PACK_LENGTH))
+ or BAIL_OUT("sysread failed: $!");
+
+ @_ = unpack(HEAPPAGE_SPECIAL_PACK_CODE, $buffer);
+ %special = (
+ pd_xid_base => shift,
+ pd_multi_base => shift);
+ return \%special;
+}
+
+# Write page special data
+sub write_special_data
+{
+ my ($fh, $offset, $special) = @_;
+
+ $offset -= $offset % HEAPPAGE_SIZE;
+ $offset += HEAPPAGE_SIZE - HEAPPAGE_SPECIAL_PACK_LENGTH;
+
+ my $buffer = pack(
+ HEAPPAGE_SPECIAL_PACK_CODE,
+ $special->{pd_xid_base}, $special->{pd_multi_base});
+
+ sysseek($fh, $offset, 0)
+ or BAIL_OUT("sysseek failed: $!");
+ defined(syswrite($fh, $buffer, HEAPPAGE_SPECIAL_PACK_LENGTH))
+ or BAIL_OUT("syswrite failed: $!");
+ return;
+}
# Read a tuple of our table from a heap page.
#
@@ -96,8 +154,9 @@ use constant HEAPTUPLE_PACK_LENGTH => 58; # Total size
#
sub read_tuple
{
- my ($fh, $offset) = @_;
+ my ($fh, $offset, $raw) = @_;
my ($buffer, %tup);
+
sysseek($fh, $offset, 0)
or BAIL_OUT("sysseek failed: $!");
defined(sysread($fh, $buffer, HEAPTUPLE_PACK_LENGTH))
@@ -133,6 +192,18 @@ sub read_tuple
c_va_toastrelid => shift);
# Stitch together the text for column 'b'
$tup{b} = join('', map { chr($tup{"b_body$_"}) } (1 .. 7));
+
+ if (!$raw)
+ {
+ my $special = read_special_data($fh, $offset);
+
+ $tup{t_xmin} += $special->{pd_xid_base};
+ my $is_multi = $tup{t_infomask} & HEAP_XMAX_IS_MULTI;
+ $tup{t_xmax} += !$is_multi ?
+ $special->{pd_xid_base} :
+ $special->{pd_multi_base};
+ }
+
return \%tup;
}
@@ -148,7 +219,33 @@ sub read_tuple
#
sub write_tuple
{
- my ($fh, $offset, $tup) = @_;
+ my ($fh, $offset, $tup, $raw) = @_;
+
+ if (!$raw)
+ {
+ my $special = read_special_data($fh, $offset);
+
+ my $xmin = $tup->{t_xmin} - $special->{pd_xid_base};
+ die "tuple x_min $tup->{t_xmin} is too smal for pd_xid_base $special->{pd_xid_base}"
+ if $xmin < 3;
+ $tup->{t_xmin} = $xmin;
+
+ if (($tup->{t_infomask} & HEAP_XMAX_IS_MULTI) == 0)
+ {
+ my $xmax = $tup->{t_xmax} - $special->{pd_xid_base};
+ die "tuple x_max $tup->{t_xmax} is too smal for pd_xid_base $special->{pd_xid_base}"
+ if $xmax < 3;
+ $tup->{t_xmax} = $xmax;
+ }
+ else
+ {
+ my $xmax = $tup->{t_xmax} - $special->{pd_multi_base};
+ die "tuple multi x_max $tup->{t_xmax} is too smal for pd_multi_base $special->{pd_multi_base}"
+ if $xmax < 3;
+ $tup->{t_xmax} = $xmax;
+ }
+ }
+
my $buffer = pack(
HEAPTUPLE_PACK_CODE,
$tup->{t_xmin}, $tup->{t_xmax},
@@ -171,6 +268,41 @@ sub write_tuple
return;
}
+# move pd_xid_base and pd_multi_base to more suitable position for tests.
+sub fixup_page
+{
+ my ($fh, $page, $xid_base, $multi_base, $lp_off) = @_;
+ my $offset = $page * HEAPPAGE_SIZE;
+ my $special = read_special_data($fh, $offset);
+
+ die "xid_base $xid_base should be lesser than existed $special->{pd_xid_base}"
+ if ($xid_base > $special->{pd_xid_base});
+ die "multi_base $multi_base should be lesser than existed $special->{pd_multi_base}"
+ if ($multi_base > $special->{pd_multi_base} && $special->{pd_multi_base} != 0);
+ return if ($xid_base == $special->{pd_xid_base} &&
+ $multi_base == $special->{pd_multi_base});
+
+ my $xid_delta = $special->{pd_xid_base} - $xid_base;
+ my $multi_delta = $special->{pd_multi_base} - $multi_base;
+
+ for my $off (@$lp_off)
+ {
+ # change only tuples on this page.
+ next if ($off < $offset && $off > $offset + HEAPPAGE_SIZE);
+
+ my $tup = read_tuple($fh, $off, 1);
+ $tup->{t_xmin} += $xid_delta;
+ my $is_multi = $tup->{t_infomask} & HEAP_XMAX_IS_MULTI;
+ $tup->{t_xmax} += !$is_multi ? $xid_delta : $multi_delta;
+ write_tuple($fh, $off, $tup, 1);
+ }
+
+ $special->{pd_xid_base} = $xid_base;
+ $special->{pd_multi_base} = $multi_base;
+
+ write_special_data($fh, $offset, $special);
+}
+
# Set umask so test directories and files are created with default permissions
umask(0077);
@@ -233,6 +365,10 @@ my $relfrozenxid = $node->safe_psql('postgres',
q(select relfrozenxid from pg_class where relname = 'test'));
my $datfrozenxid = $node->safe_psql('postgres',
q(select datfrozenxid from pg_database where datname = 'postgres'));
+my $datminmxid = $node->safe_psql('postgres',
+ q(select datminmxid from pg_database where datname = 'postgres'));
+my $txid_current = $node->safe_psql('postgres',
+ q(select txid_current()));
# Sanity check that our 'test' table has a relfrozenxid newer than the
# datfrozenxid for the database, and that the datfrozenxid is greater than the
@@ -263,7 +399,7 @@ select lp_off from heap_page_items(get_raw_page('test', 'main', 0))
$node->stop;
my $file;
open($file, '+<', $relpath)
- or BAIL_OUT("open failed: $!");
+ or BAIL_OUT("open failed: $!");
binmode $file;
my $ENDIANNESS;
@@ -291,8 +427,13 @@ for (my $tupidx = 0; $tupidx < ROWCOUNT; $tupidx++)
# Determine endianness of current platform from the 1-byte varlena header
$ENDIANNESS = $tup->{b_header} == 0x11 ? "little" : "big";
}
+
+# Set 64bit xid bases a bit in the past therefore we can set xmin/xmax a bit
+# in the past
+fixup_page($file, 0, $datfrozenxid - 100, $datminmxid - 100, \@lp_off);
+
close($file)
- or BAIL_OUT("close failed: $!");
+ or BAIL_OUT("close failed: $!");
$node->start;
# Ok, Xids and page layout look ok. We can run corruption tests.
@@ -308,17 +449,6 @@ $node->command_ok([ 'pg_amcheck', '-p', $port, 'postgres' ],
$node->stop;
-# Some #define constants from access/htup_details.h for use while corrupting.
-use constant HEAP_HASNULL => 0x0001;
-use constant HEAP_XMAX_LOCK_ONLY => 0x0080;
-use constant HEAP_XMIN_COMMITTED => 0x0100;
-use constant HEAP_XMIN_INVALID => 0x0200;
-use constant HEAP_XMAX_COMMITTED => 0x0400;
-use constant HEAP_XMAX_INVALID => 0x0800;
-use constant HEAP_NATTS_MASK => 0x07FF;
-use constant HEAP_XMAX_IS_MULTI => 0x1000;
-use constant HEAP_KEYS_UPDATED => 0x2000;
-
# Helper function to generate a regular expression matching the header we
# expect verify_heapam() to return given which fields we expect to be non-null.
sub header
@@ -342,7 +472,7 @@ sub header
#
my @expected;
open($file, '+<', $relpath)
- or BAIL_OUT("open failed: $!");
+ or BAIL_OUT("open failed: $!");
binmode $file;
for (my $tupidx = 0; $tupidx < ROWCOUNT; $tupidx++)
@@ -367,7 +497,7 @@ for (my $tupidx = 0; $tupidx < ROWCOUNT; $tupidx++)
if ($offnum == 2)
{
# Corruptly set xmin < datfrozenxid
- my $xmin = 3;
+ my $xmin = $datfrozenxid - 12;
$tup->{t_xmin} = $xmin;
$tup->{t_infomask} &= ~HEAP_XMIN_COMMITTED;
$tup->{t_infomask} &= ~HEAP_XMIN_INVALID;
@@ -377,24 +507,24 @@ for (my $tupidx = 0; $tupidx < ROWCOUNT; $tupidx++)
}
elsif ($offnum == 3)
{
- # Corruptly set xmin < datfrozenxid, further back, noting circularity
- # of xid comparison. For a new cluster with epoch = 0, the corrupt
- # xmin will be interpreted as in the future
- $tup->{t_xmin} = 4026531839;
+ # Corruptly set xmin > next transaction id.
+ my $xmin = $relfrozenxid + 1000000;
+ $tup->{t_xmin} = $xmin;
$tup->{t_infomask} &= ~HEAP_XMIN_COMMITTED;
$tup->{t_infomask} &= ~HEAP_XMIN_INVALID;
push @expected,
- qr/${$header}xmin 4026531839 equals or exceeds next valid transaction ID \d+/;
+ qr/${$header}xmin $xmin equals or exceeds next valid transaction ID \d+/;
}
elsif ($offnum == 4)
{
- # Corruptly set xmax < relminmxid;
- $tup->{t_xmax} = 4026531839;
+ # Corruptly set xmax > next transaction id.
+ my $xmax = $relfrozenxid + 1000000;
+ $tup->{t_xmax} = $xmax;
$tup->{t_infomask} &= ~HEAP_XMAX_INVALID;
push @expected,
- qr/${$header}xmax 4026531839 equals or exceeds next valid transaction ID \d+/;
+ qr/${$header}xmax $xmax equals or exceeds next valid transaction ID \d+/;
}
elsif ($offnum == 5)
{
@@ -402,8 +532,8 @@ for (my $tupidx = 0; $tupidx < ROWCOUNT; $tupidx++)
$tup->{t_hoff} += 128;
push @expected,
- qr/${$header}data begins at offset 152 beyond the tuple length 58/,
- qr/${$header}tuple data should begin at byte 24, but actually begins at byte 152 \(3 attributes, no nulls\)/;
+ qr/${$header}data begins at offset 152 beyond the tuple length 58/,
+ qr/${$header}tuple data should begin at byte 24, but actually begins at byte 152 \(3 attributes, no nulls\)/;
}
elsif ($offnum == 6)
{
@@ -411,7 +541,7 @@ for (my $tupidx = 0; $tupidx < ROWCOUNT; $tupidx++)
$tup->{t_hoff} += 3;
push @expected,
- qr/${$header}tuple data should begin at byte 24, but actually begins at byte 27 \(3 attributes, no nulls\)/;
+ qr/${$header}tuple data should begin at byte 24, but actually begins at byte 27 \(3 attributes, no nulls\)/;
}
elsif ($offnum == 7)
{
@@ -419,7 +549,7 @@ for (my $tupidx = 0; $tupidx < ROWCOUNT; $tupidx++)
$tup->{t_hoff} -= 8;
push @expected,
- qr/${$header}tuple data should begin at byte 24, but actually begins at byte 16 \(3 attributes, no nulls\)/;
+ qr/${$header}tuple data should begin at byte 24, but actually begins at byte 16 \(3 attributes, no nulls\)/;
}
elsif ($offnum == 8)
{
@@ -427,7 +557,7 @@ for (my $tupidx = 0; $tupidx < ROWCOUNT; $tupidx++)
$tup->{t_hoff} -= 3;
push @expected,
- qr/${$header}tuple data should begin at byte 24, but actually begins at byte 21 \(3 attributes, no nulls\)/;
+ qr/${$header}tuple data should begin at byte 24, but actually begins at byte 21 \(3 attributes, no nulls\)/;
}
elsif ($offnum == 9)
{
@@ -435,7 +565,7 @@ for (my $tupidx = 0; $tupidx < ROWCOUNT; $tupidx++)
$tup->{t_infomask2} |= HEAP_NATTS_MASK;
push @expected,
- qr/${$header}number of attributes 2047 exceeds maximum expected for table 3/;
+ qr/${$header}number of attributes 2047 exceeds maximum expected for table 3/;
}
elsif ($offnum == 10)
{
@@ -447,7 +577,7 @@ for (my $tupidx = 0; $tupidx < ROWCOUNT; $tupidx++)
$tup->{t_bits} = 0xAA;
push @expected,
- qr/${$header}tuple data should begin at byte 280, but actually begins at byte 24 \(2047 attributes, has nulls\)/;
+ qr/${$header}tuple data should begin at byte 280, but actually begins at byte 24 \(2047 attributes, has nulls\)/;
}
elsif ($offnum == 11)
{
@@ -458,7 +588,7 @@ for (my $tupidx = 0; $tupidx < ROWCOUNT; $tupidx++)
$tup->{t_hoff} = 32;
push @expected,
- qr/${$header}number of attributes 67 exceeds maximum expected for table 3/;
+ qr/${$header}number of attributes 67 exceeds maximum expected for table 3/;
}
elsif ($offnum == 12)
{
@@ -482,7 +612,7 @@ for (my $tupidx = 0; $tupidx < ROWCOUNT; $tupidx++)
$header = header(0, $offnum, 1);
push @expected,
- qr/${header}attribute with length \d+ ends at offset \d+ beyond total tuple length \d+/;
+ qr/${header}attribute with length \d+ ends at offset \d+ beyond total tuple length \d+/;
}
elsif ($offnum == 13)
{
@@ -497,25 +627,27 @@ for (my $tupidx = 0; $tupidx < ROWCOUNT; $tupidx++)
# Set both HEAP_XMAX_COMMITTED and HEAP_XMAX_IS_MULTI
$tup->{t_infomask} |= HEAP_XMAX_COMMITTED;
$tup->{t_infomask} |= HEAP_XMAX_IS_MULTI;
- $tup->{t_xmax} = 4;
+ my $xmax = $datminmxid + 1000000;
+ $tup->{t_xmax} = $xmax;
push @expected,
- qr/${header}multitransaction ID 4 equals or exceeds next valid multitransaction ID 1/;
+ qr/${header}multitransaction ID $xmax equals or exceeds next valid multitransaction ID \d+/;
}
elsif ($offnum == 15) # Last offnum must equal ROWCOUNT
{
# Set both HEAP_XMAX_COMMITTED and HEAP_XMAX_IS_MULTI
$tup->{t_infomask} |= HEAP_XMAX_COMMITTED;
$tup->{t_infomask} |= HEAP_XMAX_IS_MULTI;
- $tup->{t_xmax} = 4000000000;
+ my $xmax = $datminmxid - 10;
+ $tup->{t_xmax} = $xmax;
push @expected,
- qr/${header}multitransaction ID 4000000000 precedes relation minimum multitransaction ID threshold 1/;
+ qr/${header}multitransaction ID $xmax precedes relation minimum multitransaction ID threshold \d+/;
}
write_tuple($file, $offset, $tup);
}
close($file)
- or BAIL_OUT("close failed: $!");
+ or BAIL_OUT("close failed: $!");
$node->start;
# Run pg_amcheck against the corrupt table with epoch=0, comparing actual
diff --git a/src/bin/pg_controldata/pg_controldata.c b/src/bin/pg_controldata/pg_controldata.c
index a8a46d5bf0..ffc89b3184 100644
--- a/src/bin/pg_controldata/pg_controldata.c
+++ b/src/bin/pg_controldata/pg_controldata.c
@@ -248,7 +248,7 @@ main(int argc, char *argv[])
printf(_("Latest checkpoint's full_page_writes: %s\n"),
ControlFile->checkPointCopy.fullPageWrites ? _("on") : _("off"));
printf(_("Latest checkpoint's NextXID: %llu\n"),
- (unsigned long long) U64FromFullTransactionId(ControlFile->checkPointCopy.nextXid));
+ (unsigned long long) XidFromFullTransactionId(ControlFile->checkPointCopy.nextXid));
printf(_("Latest checkpoint's NextOID: %u\n"),
ControlFile->checkPointCopy.nextOid);
printf(_("Latest checkpoint's NextMultiXactId: %llu\n"),
diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c
index dcb3b11d57..bab5773b2e 100644
--- a/src/bin/pg_dump/pg_dump.c
+++ b/src/bin/pg_dump/pg_dump.c
@@ -41,6 +41,7 @@
#include "access/attnum.h"
#include "access/sysattr.h"
#include "access/transam.h"
+#include "c.h"
#include "catalog/pg_aggregate_d.h"
#include "catalog/pg_am_d.h"
#include "catalog/pg_attribute_d.h"
@@ -2830,7 +2831,7 @@ dumpDatabase(Archive *fout)
*datistemplate,
*datconnlimit,
*tablespace;
- uint32 frozenxid,
+ uint64 frozenxid,
minmxid;
char *qdatname;
@@ -2891,8 +2892,8 @@ dumpDatabase(Archive *fout)
iculocale = PQgetvalue(res, 0, i_daticulocale);
else
iculocale = NULL;
- frozenxid = atooid(PQgetvalue(res, 0, i_frozenxid));
- minmxid = atooid(PQgetvalue(res, 0, i_minmxid));
+ frozenxid = strtou64(PQgetvalue(res, 0, i_frozenxid), NULL, 0);
+ minmxid = strtou64(PQgetvalue(res, 0, i_minmxid), NULL, 0);
dbdacl.acl = PQgetvalue(res, 0, i_datacl);
dbdacl.acldefault = PQgetvalue(res, 0, i_acldefault);
datistemplate = PQgetvalue(res, 0, i_datistemplate);
@@ -3178,10 +3179,16 @@ dumpDatabase(Archive *fout)
RelFileNumber relfilenumber;
appendPQExpBuffer(loHorizonQry, "UPDATE pg_catalog.pg_class\n"
- "SET relfrozenxid = '%u', relminmxid = '%u'\n"
+ "SET relfrozenxid = '%llu', relminmxid = '%llu'\n"
"WHERE oid = %u;\n",
- atooid(PQgetvalue(lo_res, i, ii_relfrozenxid)),
- atooid(PQgetvalue(lo_res, i, ii_relminmxid)),
+ (unsigned long long) strtou64(PQgetvalue(lo_res,
+ i,
+ ii_relfrozenxid),
+ NULL, 0),
+ (unsigned long long) strtou64(PQgetvalue(lo_res,
+ i,
+ ii_relminmxid),
+ NULL, 0),
atooid(PQgetvalue(lo_res, i, ii_oid)));
oid = atooid(PQgetvalue(lo_res, i, ii_oid));
@@ -6461,11 +6468,11 @@ getTables(Archive *fout, int *numTables)
tblinfo[i].relreplident = *(PQgetvalue(res, i, i_relreplident));
tblinfo[i].rowsec = (strcmp(PQgetvalue(res, i, i_relrowsec), "t") == 0);
tblinfo[i].forcerowsec = (strcmp(PQgetvalue(res, i, i_relforcerowsec), "t") == 0);
- tblinfo[i].frozenxid = atooid(PQgetvalue(res, i, i_relfrozenxid));
- tblinfo[i].toast_frozenxid = atooid(PQgetvalue(res, i, i_toastfrozenxid));
+ tblinfo[i].frozenxid = strtou64(PQgetvalue(res, i, i_relfrozenxid), NULL, 0);
+ tblinfo[i].toast_frozenxid = strtou64(PQgetvalue(res, i, i_toastfrozenxid), NULL, 0);
tblinfo[i].toast_oid = atooid(PQgetvalue(res, i, i_toastoid));
- tblinfo[i].minmxid = atooid(PQgetvalue(res, i, i_relminmxid));
- tblinfo[i].toast_minmxid = atooid(PQgetvalue(res, i, i_toastminmxid));
+ tblinfo[i].minmxid = strtou64(PQgetvalue(res, i, i_relminmxid), NULL, 0);
+ tblinfo[i].toast_minmxid = strtou64(PQgetvalue(res, i, i_toastminmxid), NULL, 0);
tblinfo[i].reloptions = pg_strdup(PQgetvalue(res, i, i_reloptions));
if (PQgetisnull(res, i, i_checkoption))
tblinfo[i].checkoption = NULL;
diff --git a/src/bin/pg_dump/pg_dump.h b/src/bin/pg_dump/pg_dump.h
index 427f5d45f6..f47a403b15 100644
--- a/src/bin/pg_dump/pg_dump.h
+++ b/src/bin/pg_dump/pg_dump.h
@@ -298,11 +298,11 @@ typedef struct _tableInfo
bool rowsec; /* is row security enabled? */
bool forcerowsec; /* is row security forced? */
bool hasoids; /* does it have OIDs? */
- uint32 frozenxid; /* table's relfrozenxid */
- uint32 minmxid; /* table's relminmxid */
+ uint64 frozenxid; /* table's relfrozenxid */
+ uint64 minmxid; /* table's relminmxid */
Oid toast_oid; /* toast table's OID, or 0 if none */
- uint32 toast_frozenxid; /* toast table's relfrozenxid, if any */
- uint32 toast_minmxid; /* toast table's relminmxid */
+ uint64 toast_frozenxid; /* toast table's relfrozenxid, if any */
+ uint64 toast_minmxid; /* toast table's relminmxid */
int ncheck; /* # of CHECK expressions */
Oid reltype; /* OID of table's composite type, if any */
Oid reloftype; /* underlying type for typed table */
diff --git a/src/bin/pg_resetwal/pg_resetwal.c b/src/bin/pg_resetwal/pg_resetwal.c
index 977491b875..24967f4e71 100644
--- a/src/bin/pg_resetwal/pg_resetwal.c
+++ b/src/bin/pg_resetwal/pg_resetwal.c
@@ -63,7 +63,6 @@ static ControlFileData ControlFile; /* pg_control values */
static XLogSegNo newXlogSegNo; /* new XLOG segment # */
static bool guessed = false; /* T if we had to guess at any values */
static const char *progname;
-static uint32 set_xid_epoch = (uint32) -1;
static TransactionId set_oldest_xid = 0;
static TransactionId set_xid = 0;
static TransactionId set_oldest_commit_ts_xid = 0;
@@ -95,7 +94,6 @@ main(int argc, char *argv[])
static struct option long_options[] = {
{"commit-timestamp-ids", required_argument, NULL, 'c'},
{"pgdata", required_argument, NULL, 'D'},
- {"epoch", required_argument, NULL, 'e'},
{"force", no_argument, NULL, 'f'},
{"next-wal-file", required_argument, NULL, 'l'},
{"multixact-ids", required_argument, NULL, 'm'},
@@ -137,7 +135,7 @@ main(int argc, char *argv[])
}
- while ((c = getopt_long(argc, argv, "c:D:e:fl:m:no:O:u:x:", long_options, NULL)) != -1)
+ while ((c = getopt_long(argc, argv, "c:D:fl:m:no:O:u:x:", long_options, NULL)) != -1)
{
switch (c)
{
@@ -153,24 +151,9 @@ main(int argc, char *argv[])
noupdate = true;
break;
- case 'e':
- errno = 0;
- set_xid_epoch = strtoul(optarg, &endptr, 0);
- if (endptr == optarg || *endptr != '\0' || errno != 0)
- {
- /*------
- translator: the second %s is a command line argument (-e, etc) */
- pg_log_error("invalid argument for option %s", "-e");
- pg_log_error_hint("Try \"%s --help\" for more information.", progname);
- exit(1);
- }
- if (set_xid_epoch == -1)
- pg_fatal("transaction ID epoch (-e) must not be -1");
- break;
-
case 'u':
errno = 0;
- set_oldest_xid = strtoul(optarg, &endptr, 0);
+ set_oldest_xid = strtou64(optarg, &endptr, 0);
if (endptr == optarg || *endptr != '\0' || errno != 0)
{
pg_log_error("invalid argument for option %s", "-u");
@@ -184,7 +167,7 @@ main(int argc, char *argv[])
case 'x':
errno = 0;
- set_xid = strtoul(optarg, &endptr, 0);
+ set_xid = strtou64(optarg, &endptr, 0);
if (endptr == optarg || *endptr != '\0' || errno != 0)
{
pg_log_error("invalid argument for option %s", "-x");
@@ -198,14 +181,14 @@ main(int argc, char *argv[])
case 'c':
errno = 0;
- set_oldest_commit_ts_xid = strtoul(optarg, &endptr, 0);
+ set_oldest_commit_ts_xid = strtou64(optarg, &endptr, 0);
if (endptr == optarg || *endptr != ',' || errno != 0)
{
pg_log_error("invalid argument for option %s", "-c");
pg_log_error_hint("Try \"%s --help\" for more information.", progname);
exit(1);
}
- set_newest_commit_ts_xid = strtoul(endptr + 1, &endptr2, 0);
+ set_newest_commit_ts_xid = strtou64(endptr + 1, &endptr2, 0);
if (endptr2 == endptr + 1 || *endptr2 != '\0' || errno != 0)
{
pg_log_error("invalid argument for option %s", "-c");
@@ -237,7 +220,7 @@ main(int argc, char *argv[])
case 'm':
errno = 0;
- set_mxid = strtoul(optarg, &endptr, 0);
+ set_mxid = strtou64(optarg, &endptr, 0);
if (endptr == optarg || *endptr != ',' || errno != 0)
{
pg_log_error("invalid argument for option %s", "-m");
@@ -245,7 +228,7 @@ main(int argc, char *argv[])
exit(1);
}
- set_oldestmxid = strtoul(endptr + 1, &endptr2, 0);
+ set_oldestmxid = strtou64(endptr + 1, &endptr2, 0);
if (endptr2 == endptr + 1 || *endptr2 != '\0' || errno != 0)
{
pg_log_error("invalid argument for option %s", "-m");
@@ -265,7 +248,7 @@ main(int argc, char *argv[])
case 'O':
errno = 0;
- set_mxoff = strtoul(optarg, &endptr, 0);
+ set_mxoff = strtou64(optarg, &endptr, 0);
if (endptr == optarg || *endptr != '\0' || errno != 0)
{
pg_log_error("invalid argument for option %s", "-O");
@@ -408,11 +391,6 @@ main(int argc, char *argv[])
* Adjust fields if required by switches. (Do this now so that printout,
* if any, includes these values.)
*/
- if (set_xid_epoch != -1)
- ControlFile.checkPointCopy.nextXid =
- FullTransactionIdFromEpochAndXid(set_xid_epoch,
- XidFromFullTransactionId(ControlFile.checkPointCopy.nextXid));
-
if (set_oldest_xid != 0)
{
ControlFile.checkPointCopy.oldestXid = set_oldest_xid;
@@ -420,9 +398,7 @@ main(int argc, char *argv[])
}
if (set_xid != 0)
- ControlFile.checkPointCopy.nextXid =
- FullTransactionIdFromEpochAndXid(EpochFromFullTransactionId(ControlFile.checkPointCopy.nextXid),
- set_xid);
+ ControlFile.checkPointCopy.nextXid = FullTransactionIdFromXid(set_xid);
if (set_oldest_commit_ts_xid != 0)
ControlFile.checkPointCopy.oldestCommitTsXid = set_oldest_commit_ts_xid;
@@ -655,7 +631,7 @@ GuessControlValues(void)
ControlFile.checkPointCopy.PrevTimeLineID = 1;
ControlFile.checkPointCopy.fullPageWrites = false;
ControlFile.checkPointCopy.nextXid =
- FullTransactionIdFromEpochAndXid(0, FirstNormalTransactionId);
+ FullTransactionIdFromXid(FirstNormalTransactionId);
ControlFile.checkPointCopy.nextOid = FirstGenbkiObjectId;
ControlFile.checkPointCopy.nextMulti = FirstMultiXactId;
ControlFile.checkPointCopy.nextMultiOffset = 0;
@@ -706,6 +682,8 @@ GuessControlValues(void)
*
* NB: this display should be just those fields that will not be
* reset by RewriteControlFile().
+ *
+ * Special macros help to make translatable strings.
*/
static void
PrintControlValues(bool guessed)
@@ -725,8 +703,7 @@ PrintControlValues(bool guessed)
ControlFile.checkPointCopy.ThisTimeLineID);
printf(_("Latest checkpoint's full_page_writes: %s\n"),
ControlFile.checkPointCopy.fullPageWrites ? _("on") : _("off"));
- printf(_("Latest checkpoint's NextXID: %u:%llu\n"),
- EpochFromFullTransactionId(ControlFile.checkPointCopy.nextXid),
+ printf(_("Latest checkpoint's NextXID: %llu\n"),
(unsigned long long) XidFromFullTransactionId(ControlFile.checkPointCopy.nextXid));
printf(_("Latest checkpoint's NextOID: %u\n"),
ControlFile.checkPointCopy.nextOid);
@@ -824,12 +801,6 @@ PrintNewControlValues(void)
ControlFile.checkPointCopy.oldestXidDB);
}
- if (set_xid_epoch != -1)
- {
- printf(_("NextXID epoch: %u\n"),
- EpochFromFullTransactionId(ControlFile.checkPointCopy.nextXid));
- }
-
if (set_oldest_commit_ts_xid != 0)
{
printf(_("oldestCommitTsXid: %llu\n"),
@@ -1133,7 +1104,6 @@ usage(void)
" set oldest and newest transactions bearing\n"
" commit timestamp (zero means no change)\n"));
printf(_(" [-D, --pgdata=]DATADIR data directory\n"));
- printf(_(" -e, --epoch=XIDEPOCH set next transaction ID epoch\n"));
printf(_(" -f, --force force update to be done\n"));
printf(_(" -l, --next-wal-file=WALFILE set minimum starting location for new WAL\n"));
printf(_(" -m, --multixact-ids=MXID,MXID set next and oldest multitransaction ID\n"));
diff --git a/src/bin/pg_upgrade/Makefile b/src/bin/pg_upgrade/Makefile
index 7f8042f34a..1d47abd975 100644
--- a/src/bin/pg_upgrade/Makefile
+++ b/src/bin/pg_upgrade/Makefile
@@ -20,6 +20,7 @@ OBJS = \
parallel.o \
pg_upgrade.o \
relfilenumber.o \
+ segresize.o \
server.o \
tablespace.o \
util.o \
diff --git a/src/bin/pg_upgrade/check.c b/src/bin/pg_upgrade/check.c
index f1bc1e6886..3ff1951185 100644
--- a/src/bin/pg_upgrade/check.c
+++ b/src/bin/pg_upgrade/check.c
@@ -33,6 +33,8 @@ static void check_for_pg_role_prefix(ClusterInfo *cluster);
static void check_for_new_tablespace_dir(ClusterInfo *new_cluster);
static void check_for_user_defined_encoding_conversions(ClusterInfo *cluster);
static char *get_canonical_locale_name(int category, const char *locale);
+static void check_for_32bit_xid_usage(ClusterInfo *cluster);
+static bool is_xid_wraparound(ClusterInfo *cluster);
/*
@@ -82,7 +84,7 @@ output_check_banner(bool live_check)
void
-check_and_dump_old_cluster(bool live_check)
+check_and_dump_old_cluster(bool live_check, bool *is_wraparound)
{
/* -- OLD -- */
@@ -168,6 +170,17 @@ check_and_dump_old_cluster(bool live_check)
if (GET_MAJOR_VERSION(old_cluster.major_version) <= 903)
old_9_3_check_for_line_data_type_usage(&old_cluster);
+ /* Prepare for 64bit xid */
+ if (old_cluster.controldata.cat_ver < XID_FORMATCHANGE_CAT_VER)
+ {
+ /* Check if 32-bit xid type is used in tables */
+ check_for_32bit_xid_usage(&old_cluster);
+ /* Check indexes to be upgraded */
+ invalidate_spgist_indexes(&old_cluster, true);
+ invalidate_gin_indexes(&old_cluster, true);
+ invalidate_external_indexes(&old_cluster, true);
+ }
+
/*
* While not a check option, we do this now because this is the only time
* the old server is running.
@@ -175,6 +188,8 @@ check_and_dump_old_cluster(bool live_check)
if (!user_opts.check)
generate_old_dump();
+ *is_wraparound = is_xid_wraparound(&old_cluster);
+
if (!live_check)
stop_postmaster(false);
}
@@ -244,6 +259,17 @@ issue_warnings_and_set_wal_level(void)
if (GET_MAJOR_VERSION(old_cluster.major_version) <= 906)
old_9_6_invalidate_hash_indexes(&new_cluster, false);
+ /* Raindex for 64bit xid */
+ if (old_cluster.controldata.cat_ver < XID_FORMATCHANGE_CAT_VER)
+ {
+ /* Check if 32-bit xid type is used in tables */
+ check_for_32bit_xid_usage(&old_cluster);
+ /* Check indexes to be upgraded */
+ invalidate_spgist_indexes(&old_cluster, true);
+ invalidate_gin_indexes(&old_cluster, true);
+ invalidate_external_indexes(&old_cluster, true);
+ }
+
report_extension_updates(&new_cluster);
stop_postmaster(false);
@@ -1502,3 +1528,124 @@ get_canonical_locale_name(int category, const char *locale)
return res;
}
+
+/*
+ * check_for_32bit_xid_usage()
+ *
+ * Postgres Pro Enterprise changes xid storage format to 64-bit. Check if
+ * xid type is used in tables.
+ */
+static void
+check_for_32bit_xid_usage(ClusterInfo *cluster)
+{
+ int dbnum;
+ FILE *script = NULL;
+ bool found = false;
+ char output_path[MAXPGPATH];
+
+ prep_status("Checking for incompatible \"xid\" data type");
+
+ snprintf(output_path, sizeof(output_path), "tables_using_xid.txt");
+
+ for (dbnum = 0; dbnum < cluster->dbarr.ndbs; dbnum++)
+ {
+ PGresult *res;
+ bool db_used = false;
+ int ntups;
+ int rowno;
+ int i_nspname,
+ i_relname,
+ i_attname;
+ DbInfo *active_db = &cluster->dbarr.dbs[dbnum];
+ PGconn *conn = connectToServer(cluster, active_db->db_name);
+
+ /*
+ * While several relkinds don't store any data, e.g. views, they can
+ * be used to define data types of other columns, so we check all
+ * relkinds.
+ */
+ res = executeQueryOrDie(conn,
+ "SELECT n.nspname, c.relname, a.attname "
+ "FROM pg_catalog.pg_class c, "
+ " pg_catalog.pg_namespace n, "
+ " pg_catalog.pg_attribute a "
+ "WHERE c.oid = a.attrelid AND "
+ " a.attnum >= 1 AND "
+ " a.atttypid = 'pg_catalog.xid'::pg_catalog.regtype AND "
+ " c.relnamespace = n.oid AND "
+ /* exclude possible orphaned temp tables */
+ " n.nspname !~ '^pg_temp_' AND "
+ " n.nspname NOT IN ('pg_catalog', 'information_schema')");
+
+ ntups = PQntuples(res);
+ i_nspname = PQfnumber(res, "nspname");
+ i_relname = PQfnumber(res, "relname");
+ i_attname = PQfnumber(res, "attname");
+ for (rowno = 0; rowno < ntups; rowno++)
+ {
+ found = true;
+ if (script == NULL && (script = fopen_priv(output_path, "w")) == NULL)
+ pg_fatal("could not open file \"%s\": %s\n",
+ output_path, strerror(errno));
+ if (!db_used)
+ {
+ fprintf(script, "Database: %s\n", active_db->db_name);
+ db_used = true;
+ }
+ fprintf(script, " %s.%s.%s\n",
+ PQgetvalue(res, rowno, i_nspname),
+ PQgetvalue(res, rowno, i_relname),
+ PQgetvalue(res, rowno, i_attname));
+ }
+
+ PQclear(res);
+
+ PQfinish(conn);
+ }
+
+ if (script)
+ fclose(script);
+
+ if (found)
+ {
+ pg_log(PG_REPORT, "fatal\n");
+ pg_fatal("Your installation contains the \"xid\" data type in user tables.\n"
+ "The internal format of \"xid\" changed in Postgres Pro Enterprise so this cluster\n"
+ "cannot currently be upgraded. Note that even dropped attributes cause a problem.\n"
+ "You can remove the problem tables and restart the upgrade.\n"
+ "A list of the problem columns is in the file:\n"
+ " %s\n\n", output_path);
+ }
+ else
+ check_ok();
+}
+
+/*
+ * is_xid_wraparound()
+ *
+ * Return true if 32-xid cluster had wraparound.
+ */
+static bool
+is_xid_wraparound(ClusterInfo *cluster)
+{
+ PGconn *conn;
+ PGresult *res;
+ bool is_wraparound;
+
+ conn = connectToServer(cluster, "template1");
+
+ /*
+ * txid_current is extended with an "epoch" counter, so to check
+ * wraparound in old 32-xid cluster we cut epoch by casting to int4.
+ */
+ res = executeQueryOrDie(conn,
+ "SELECT 1 "
+ "FROM pg_catalog.pg_database, txid_current() tx "
+ "WHERE (tx %% 4294967295)::bigint <= datfrozenxid::text::bigint "
+ "LIMIT 1");
+ is_wraparound = PQntuples(res) ? true : false;
+ PQclear(res);
+ PQfinish(conn);
+
+ return is_wraparound;
+}
diff --git a/src/bin/pg_upgrade/controldata.c b/src/bin/pg_upgrade/controldata.c
index a5b4a77570..5021f0112b 100644
--- a/src/bin/pg_upgrade/controldata.c
+++ b/src/bin/pg_upgrade/controldata.c
@@ -284,6 +284,8 @@ get_control_data(ClusterInfo *cluster, bool live_check)
xid.value = strtou64(p, NULL, 10);
/*
+ * Try to read 32-bit XID format 'epoch:xid'.
+ *
* Delimiter changed from '/' to ':' in 9.6. We don't test for
* the catalog version of the change because the catalog version
* is pulled from pg_controldata too, and it isn't worth adding an
@@ -299,8 +301,7 @@ get_control_data(ClusterInfo *cluster, bool live_check)
if (p == NULL)
{
/* FullTransactionId representation */
- cluster->controldata.chkpnt_nxtxid = XidFromFullTransactionId(xid);
- cluster->controldata.chkpnt_nxtepoch = EpochFromFullTransactionId(xid);
+ cluster->controldata.chkpnt_nxtxid = xid.value;
}
else
{
@@ -309,8 +310,8 @@ get_control_data(ClusterInfo *cluster, bool live_check)
/* Epoch:Xid representation */
p++; /* remove '/' or ':' char */
- cluster->controldata.chkpnt_nxtxid = str2uint(p);
- cluster->controldata.chkpnt_nxtepoch = (TransactionId) XidFromFullTransactionId(xid);
+ cluster->controldata.chkpnt_nxtxid = (XidFromFullTransactionId(xid)) << 32 |
+ (TransactionId) str2uint(p);
}
got_xid = true;
@@ -334,7 +335,7 @@ get_control_data(ClusterInfo *cluster, bool live_check)
pg_fatal("%d: controldata retrieval problem", __LINE__);
p++; /* remove ':' char */
- cluster->controldata.chkpnt_nxtmulti = str2uint(p);
+ cluster->controldata.chkpnt_nxtmulti = strtou64(p, NULL, 10);
got_multi = true;
}
else if ((p = strstr(bufin, "Latest checkpoint's oldestXID:")) != NULL)
@@ -345,7 +346,7 @@ get_control_data(ClusterInfo *cluster, bool live_check)
pg_fatal("%d: controldata retrieval problem", __LINE__);
p++; /* remove ':' char */
- cluster->controldata.chkpnt_oldstxid = str2uint(p);
+ cluster->controldata.chkpnt_oldstxid = strtou64(p, NULL, 10);
got_oldestxid = true;
}
else if ((p = strstr(bufin, "Latest checkpoint's oldestMultiXid:")) != NULL)
@@ -356,7 +357,7 @@ get_control_data(ClusterInfo *cluster, bool live_check)
pg_fatal("%d: controldata retrieval problem", __LINE__);
p++; /* remove ':' char */
- cluster->controldata.chkpnt_oldstMulti = str2uint(p);
+ cluster->controldata.chkpnt_oldstMulti = strtou64(p, NULL, 10);
got_oldestmulti = true;
}
else if ((p = strstr(bufin, "Latest checkpoint's NextMultiOffset:")) != NULL)
@@ -367,7 +368,7 @@ get_control_data(ClusterInfo *cluster, bool live_check)
pg_fatal("%d: controldata retrieval problem", __LINE__);
p++; /* remove ':' char */
- cluster->controldata.chkpnt_nxtmxoff = str2uint(p);
+ cluster->controldata.chkpnt_nxtmxoff = strtou64(p, NULL, 10);
got_mxoff = true;
}
else if ((p = strstr(bufin, "First log segment after reset:")) != NULL)
diff --git a/src/bin/pg_upgrade/file.c b/src/bin/pg_upgrade/file.c
index 079fbda838..dedd8ad2b7 100644
--- a/src/bin/pg_upgrade/file.c
+++ b/src/bin/pg_upgrade/file.c
@@ -174,7 +174,8 @@ linkFile(const char *src, const char *dst,
*/
void
rewriteVisibilityMap(const char *fromfile, const char *tofile,
- const char *schemaName, const char *relName)
+ const char *schemaName, const char *relName,
+ bool update_version)
{
int src_fd;
int dst_fd;
@@ -290,6 +291,11 @@ rewriteVisibilityMap(const char *fromfile, const char *tofile,
if (old_lastpart && empty)
break;
+ if (update_version)
+ PageSetPageSizeAndVersion((Page) new_vmbuf.data,
+ PageGetPageSize((Page) new_vmbuf.data),
+ PG_PAGE_LAYOUT_VERSION);
+
/* Set new checksum for visibility map page, if enabled */
if (new_cluster.controldata.data_checksum_version != 0)
((PageHeader) new_vmbuf.data)->pd_checksum =
@@ -316,6 +322,97 @@ rewriteVisibilityMap(const char *fromfile, const char *tofile,
close(src_fd);
}
+/*
+ * updateSegmentVersion()
+ *
+ * Transform a segment file, copying from src to dst.
+ * schemaName/relName are relation's SQL name (used for error messages only).
+ *
+ * Read segment pages one by one and set version to PG_PAGE_LAYOUT_VERSION.
+ *
+ * Although FSM and MV formats does not change while switch to 64-bit XIDs, we
+ * must upgrade pages version in order to avoid lazy conversion on first read.
+ */
+void
+updateSegmentPagesVersion(const char *fromfile, const char *tofile,
+ const char *schemaName, const char *relName)
+{
+ int src_fd;
+ int dst_fd;
+ struct stat statbuf;
+ ssize_t src_filesize;
+ ssize_t totalBytesRead;
+ ssize_t bytesRead;
+ BlockNumber blkno;
+ PGAlignedBlock buf;
+
+ if ((src_fd = open(fromfile, O_RDONLY | PG_BINARY, 0)) < 0)
+ pg_fatal("error while copying relation \"%s.%s\": could not open file \"%s\": %s",
+ schemaName, relName, fromfile, strerror(errno));
+
+ if (fstat(src_fd, &statbuf) != 0)
+ pg_fatal("error while copying relation \"%s.%s\": could not stat file \"%s\": %s",
+ schemaName, relName, fromfile, strerror(errno));
+
+ if ((dst_fd = open(tofile, O_RDWR | O_CREAT | O_EXCL | PG_BINARY,
+ pg_file_create_mode)) < 0)
+ pg_fatal("error while copying relation \"%s.%s\": could not create file \"%s\": %s",
+ schemaName, relName, tofile, strerror(errno));
+
+ /* Save old file size */
+ src_filesize = statbuf.st_size;
+ totalBytesRead = 0;
+ blkno = 0;
+
+ while (totalBytesRead < src_filesize)
+ {
+ errno = 0;
+ if ((bytesRead = read(src_fd, buf.data, BLCKSZ)) != BLCKSZ)
+ {
+ if (bytesRead < 0)
+ pg_fatal("error while copying relation \"%s.%s\": could not read file \"%s\": %s",
+ schemaName, relName, fromfile, strerror(errno));
+ else
+ pg_fatal("error while copying relation \"%s.%s\": partial page found in file \"%s\"",
+ schemaName, relName, fromfile);
+ }
+
+ totalBytesRead += BLCKSZ;
+ PageSetPageSizeAndVersion((Page) buf.data,
+ PageGetPageSize((Page) buf.data),
+ PG_PAGE_LAYOUT_VERSION);
+
+ /* Set new checksum for page, if enabled */
+ if (new_cluster.controldata.data_checksum_version != 0)
+ ((PageHeader) buf.data)->pd_checksum =
+ pg_checksum_page(buf.data, blkno);
+
+ /*
+ * We dealing here only with FSM and VM pages.
+ */
+ if (((PageHeader) buf.data)->pd_lower != SizeOfPageHeaderData ||
+ ((PageHeader) buf.data)->pd_upper != BLCKSZ)
+ pg_fatal("error while copying relation \"%s.%s\": unknown page format found in file \"%s\"",
+ schemaName, relName, fromfile);
+
+ errno = 0;
+ if (write(dst_fd, buf.data, BLCKSZ) != BLCKSZ)
+ {
+ /* if write didn't set errno, assume problem is no disk space */
+ if (errno == 0)
+ errno = ENOSPC;
+ pg_fatal("error while copying relation \"%s.%s\": could not write file \"%s\": %s",
+ schemaName, relName, tofile, strerror(errno));
+ }
+
+ blkno++;
+ }
+
+ /* Clean up */
+ close(dst_fd);
+ close(src_fd);
+}
+
void
check_file_clone(void)
{
diff --git a/src/bin/pg_upgrade/meson.build b/src/bin/pg_upgrade/meson.build
index 02f030e0cc..8822377e07 100644
--- a/src/bin/pg_upgrade/meson.build
+++ b/src/bin/pg_upgrade/meson.build
@@ -10,6 +10,7 @@ pg_upgrade_sources = files(
'parallel.c',
'pg_upgrade.c',
'relfilenumber.c',
+ 'segresize.c',
'server.c',
'tablespace.c',
'util.c',
diff --git a/src/bin/pg_upgrade/pg_upgrade.c b/src/bin/pg_upgrade/pg_upgrade.c
index a8b1e0ed84..326dc9c8d5 100644
--- a/src/bin/pg_upgrade/pg_upgrade.c
+++ b/src/bin/pg_upgrade/pg_upgrade.c
@@ -44,6 +44,9 @@
#include <langinfo.h>
#endif
+#include "access/multixact.h"
+#include "access/transam.h"
+#include "access/xlog_internal.h"
#include "catalog/pg_class_d.h"
#include "common/file_perm.h"
#include "common/logging.h"
@@ -53,7 +56,7 @@
static void prepare_new_cluster(void);
static void prepare_new_globals(void);
-static void create_new_objects(void);
+static void create_new_objects(bool is_wraparound);
static void copy_xact_xlog_xid(void);
static void set_frozenxids(bool minmxid_only);
static void make_outputdirs(char *pgdata);
@@ -80,6 +83,7 @@ main(int argc, char **argv)
{
char *deletion_script_file_name = NULL;
bool live_check = false;
+ bool is_wraparound = false;
/*
* pg_upgrade doesn't currently use common/logging.c, but initialize it
@@ -125,7 +129,7 @@ main(int argc, char **argv)
check_cluster_compatibility(live_check);
- check_and_dump_old_cluster(live_check);
+ check_and_dump_old_cluster(live_check, &is_wraparound);
/* -- NEW -- */
@@ -156,7 +160,7 @@ main(int argc, char **argv)
prepare_new_globals();
- create_new_objects();
+ create_new_objects(is_wraparound);
stop_postmaster(false);
@@ -365,7 +369,6 @@ setup(char *argv0, bool *live_check)
}
}
-
static void
prepare_new_cluster(void)
{
@@ -419,7 +422,7 @@ prepare_new_globals(void)
static void
-create_new_objects(void)
+create_new_objects(bool is_wraparound)
{
int dbnum;
@@ -511,11 +514,23 @@ create_new_objects(void)
check_ok();
/*
- * We don't have minmxids for databases or relations in pre-9.3 clusters,
- * so set those after we have restored the schema.
+ * Refix datfrozenxid and datminmxid
*/
if (GET_MAJOR_VERSION(old_cluster.major_version) <= 902)
set_frozenxids(true);
+ else if (old_cluster.controldata.cat_ver < XID_FORMATCHANGE_CAT_VER &&
+ new_cluster.controldata.cat_ver >= XID_FORMATCHANGE_CAT_VER)
+ {
+ /*
+ * During upgrade from 32-bit to 64-bit xids save relfrozenxids if
+ * there was no wraparound in old cluster. Otherwise, reset them to
+ * FirstNormalTransactionId value.
+ */
+ if (is_wraparound)
+ set_frozenxids(false);
+ else
+ set_frozenxids(true);
+ }
/* update new_cluster info now that we have objects in the databases */
get_db_and_rel_infos(&new_cluster);
@@ -569,14 +584,37 @@ copy_subdir_files(const char *old_subdir, const char *new_subdir)
static void
copy_xact_xlog_xid(void)
{
- /*
- * Copy old commit logs to new data dir. pg_clog has been renamed to
- * pg_xact in post-10 clusters.
- */
- copy_subdir_files(GET_MAJOR_VERSION(old_cluster.major_version) <= 906 ?
- "pg_clog" : "pg_xact",
- GET_MAJOR_VERSION(new_cluster.major_version) <= 906 ?
- "pg_clog" : "pg_xact");
+ TransactionId next_xid;
+
+#define GetClogDirName(cluster) \
+ GET_MAJOR_VERSION(cluster.major_version) <= 906 ? "pg_clog" : "pg_xact"
+
+ /* Set next xid to 2^32 if we're upgrading from 32 bit postgres */
+ if (old_cluster.controldata.cat_ver < XID_FORMATCHANGE_CAT_VER &&
+ new_cluster.controldata.cat_ver >= XID_FORMATCHANGE_CAT_VER)
+ next_xid = ((TransactionId) 1 << 32);
+ else
+ next_xid = old_cluster.controldata.chkpnt_nxtxid;
+
+ if (old_cluster.controldata.cat_ver < XID_FORMATCHANGE_CAT_VER &&
+ new_cluster.controldata.cat_ver >= XID_FORMATCHANGE_CAT_VER)
+ {
+ /* Convert commit logs and copy to the new data dir */
+ prep_status("Transforming commit log segments");
+ convert_xact(psprintf("%s/%s", old_cluster.pgdata, GetClogDirName(old_cluster)),
+ psprintf("%s/%s", new_cluster.pgdata, GetClogDirName(new_cluster)));
+ check_ok();
+ }
+ else
+ {
+ /*
+ * Copy old commit logs to new data dir. pg_clog has been renamed to
+ * pg_xact in post-10 clusters.
+ */
+ prep_status("Copying commit log segments");
+ copy_subdir_files(GetClogDirName(old_cluster), GetClogDirName(new_cluster));
+ check_ok();
+ }
prep_status("Setting oldest XID for new cluster");
exec_prog(UTILITY_LOG_FILE, NULL, true, true,
@@ -590,19 +628,20 @@ copy_xact_xlog_xid(void)
prep_status("Setting next transaction ID and epoch for new cluster");
exec_prog(UTILITY_LOG_FILE, NULL, true, true,
"\"%s/pg_resetwal\" -f -x %llu \"%s\"",
- new_cluster.bindir,
- (unsigned long long) old_cluster.controldata.chkpnt_nxtxid,
+ new_cluster.bindir, (unsigned long long) next_xid,
new_cluster.pgdata);
+#ifdef NOT_USED
exec_prog(UTILITY_LOG_FILE, NULL, true, true,
"\"%s/pg_resetwal\" -f -e %u \"%s\"",
new_cluster.bindir, old_cluster.controldata.chkpnt_nxtepoch,
new_cluster.pgdata);
+#endif
/* must reset commit timestamp limits also */
exec_prog(UTILITY_LOG_FILE, NULL, true, true,
"\"%s/pg_resetwal\" -f -c %llu,%llu \"%s\"",
new_cluster.bindir,
- (unsigned long long) old_cluster.controldata.chkpnt_nxtxid,
- (unsigned long long) old_cluster.controldata.chkpnt_nxtxid,
+ (unsigned long long) next_xid,
+ (unsigned long long) next_xid,
new_cluster.pgdata);
check_ok();
@@ -615,8 +654,48 @@ copy_xact_xlog_xid(void)
if (old_cluster.controldata.cat_ver >= MULTIXACT_FORMATCHANGE_CAT_VER &&
new_cluster.controldata.cat_ver >= MULTIXACT_FORMATCHANGE_CAT_VER)
{
- copy_subdir_files("pg_multixact/offsets", "pg_multixact/offsets");
- copy_subdir_files("pg_multixact/members", "pg_multixact/members");
+ uint64 oldest_mxid = old_cluster.controldata.chkpnt_oldstMulti;
+ uint64 next_mxid = old_cluster.controldata.chkpnt_nxtmulti;
+ uint64 next_mxoff = old_cluster.controldata.chkpnt_nxtmxoff;
+
+ if (old_cluster.controldata.cat_ver >= XID_FORMATCHANGE_CAT_VER)
+ {
+ copy_subdir_files("pg_multixact/offsets", "pg_multixact/offsets");
+ copy_subdir_files("pg_multixact/members", "pg_multixact/members");
+ }
+ else
+ {
+ MultiXactOffset oldest_mxoff;
+
+ remove_new_subdir("pg_multixact/offsets", false);
+ oldest_mxoff = convert_multixact_offsets("pg_multixact/offsets", "pg_multixact/offsets");
+
+ remove_new_subdir("pg_multixact/members", false);
+ convert_multixact_members("pg_multixact/members", "pg_multixact/members", oldest_mxoff);
+
+ /*
+ * Handle wraparound if we're upgrading from 32 bit postgres.
+ * Invalid 0 mxids/offsets are skipped, so 1 becomes 2^32.
+ */
+ if (oldest_mxoff)
+ {
+ if (next_mxid < oldest_mxid)
+ next_mxid += ((MultiXactId) 1 << 32) - FirstMultiXactId;
+
+ if (next_mxoff < oldest_mxoff)
+ next_mxoff += ((MultiXactOffset) 1 << 32) - 1;
+
+ /* Offsets and members were rewritten, oldest_mxoff = 1 */
+ next_mxoff -= oldest_mxoff - 1;
+ oldest_mxoff = 1;
+
+ /*
+ * Save converted next_mxid for possible usage in
+ * set_frozenxids()
+ */
+ old_cluster.controldata.chkpnt_nxtmulti = next_mxid;
+ }
+ }
prep_status("Setting next multixact ID and offset for new cluster");
@@ -627,9 +706,9 @@ copy_xact_xlog_xid(void)
exec_prog(UTILITY_LOG_FILE, NULL, true, true,
"\"%s/pg_resetwal\" -O %llu -m %llu,%llu \"%s\"",
new_cluster.bindir,
- (unsigned long long) old_cluster.controldata.chkpnt_nxtmxoff,
- (unsigned long long) old_cluster.controldata.chkpnt_nxtmulti,
- (unsigned long long) old_cluster.controldata.chkpnt_oldstMulti,
+ (unsigned long long) next_mxoff,
+ (unsigned long long) next_mxid,
+ (unsigned long long) oldest_mxid,
new_cluster.pgdata);
check_ok();
}
@@ -703,6 +782,8 @@ set_frozenxids(bool minmxid_only)
int ntups;
int i_datname;
int i_datallowconn;
+ TransactionId frozen_xid;
+ MultiXactId minmxid;
if (!minmxid_only)
prep_status("Setting frozenxid and minmxid counters in new cluster");
@@ -711,18 +792,26 @@ set_frozenxids(bool minmxid_only)
conn_template1 = connectToServer(&new_cluster, "template1");
+ if (old_cluster.controldata.cat_ver < XID_FORMATCHANGE_CAT_VER &&
+ new_cluster.controldata.cat_ver >= XID_FORMATCHANGE_CAT_VER)
+ frozen_xid = FirstNormalTransactionId;
+ else
+ frozen_xid = old_cluster.controldata.chkpnt_nxtxid;
+
+ minmxid = old_cluster.controldata.chkpnt_nxtmulti;
+
if (!minmxid_only)
/* set pg_database.datfrozenxid */
PQclear(executeQueryOrDie(conn_template1,
"UPDATE pg_catalog.pg_database "
"SET datfrozenxid = '%llu'",
- (unsigned long long) old_cluster.controldata.chkpnt_nxtxid));
+ (unsigned long long) frozen_xid));
/* set pg_database.datminmxid */
PQclear(executeQueryOrDie(conn_template1,
"UPDATE pg_catalog.pg_database "
"SET datminmxid = '%llu'",
- (unsigned long long) old_cluster.controldata.chkpnt_nxtmulti));
+ (unsigned long long) minmxid));
/* get database names */
dbres = executeQueryOrDie(conn_template1,
@@ -762,7 +851,7 @@ set_frozenxids(bool minmxid_only)
CppAsString2(RELKIND_RELATION) ", "
CppAsString2(RELKIND_MATVIEW) ", "
CppAsString2(RELKIND_TOASTVALUE) ")",
- (unsigned long long) old_cluster.controldata.chkpnt_nxtxid));
+ (unsigned long long) frozen_xid));
/* set pg_class.relminmxid */
PQclear(executeQueryOrDie(conn,
@@ -773,7 +862,7 @@ set_frozenxids(bool minmxid_only)
CppAsString2(RELKIND_RELATION) ", "
CppAsString2(RELKIND_MATVIEW) ", "
CppAsString2(RELKIND_TOASTVALUE) ")",
- (unsigned long long) old_cluster.controldata.chkpnt_nxtmulti));
+ (unsigned long long) minmxid));
PQfinish(conn);
/* Reset datallowconn flag */
diff --git a/src/bin/pg_upgrade/pg_upgrade.h b/src/bin/pg_upgrade/pg_upgrade.h
index 31589b0fdc..538994d5e5 100644
--- a/src/bin/pg_upgrade/pg_upgrade.h
+++ b/src/bin/pg_upgrade/pg_upgrade.h
@@ -115,6 +115,11 @@ extern char *output_files[];
*/
#define MULTIXACT_FORMATCHANGE_CAT_VER 201301231
+/*
+ * xid format changed from 32-bit to 64-bit.
+ */
+#define XID_FORMATCHANGE_CAT_VER 999999999
+
/*
* large object chunk size added to pg_controldata,
* commit 5f93c37805e7485488480916b4585e098d3cc883
@@ -199,13 +204,13 @@ typedef struct
uint32 ctrl_ver;
uint32 cat_ver;
char nextxlogfile[25];
- uint32 chkpnt_nxtxid;
- uint32 chkpnt_nxtepoch;
+ uint64 chkpnt_nxtxid;
+ uint32 chkpnt_nxtepoch; /* for 32bit xids only */
uint32 chkpnt_nxtoid;
- uint32 chkpnt_nxtmulti;
- uint32 chkpnt_nxtmxoff;
- uint32 chkpnt_oldstMulti;
- uint32 chkpnt_oldstxid;
+ uint64 chkpnt_nxtmulti;
+ uint64 chkpnt_nxtmxoff;
+ uint64 chkpnt_oldstMulti;
+ uint64 chkpnt_oldstxid;
uint32 align;
uint32 blocksz;
uint32 largesz;
@@ -333,7 +338,7 @@ extern OSInfo os_info;
/* check.c */
void output_check_banner(bool live_check);
-void check_and_dump_old_cluster(bool live_check);
+void check_and_dump_old_cluster(bool live_check, bool *is_wraparound);
void check_new_cluster(void);
void report_clusters_compatible(void);
void issue_warnings_and_set_wal_level(void);
@@ -374,7 +379,10 @@ void copyFile(const char *src, const char *dst,
void linkFile(const char *src, const char *dst,
const char *schemaName, const char *relName);
void rewriteVisibilityMap(const char *fromfile, const char *tofile,
- const char *schemaName, const char *relName);
+ const char *schemaName, const char *relName,
+ bool update_version);
+void updateSegmentPagesVersion(const char *fromfile, const char *tofile,
+ const char *schemaName, const char *relName);
void check_file_clone(void);
void check_hard_link(void);
@@ -456,6 +464,10 @@ void old_9_6_invalidate_hash_indexes(ClusterInfo *cluster,
void old_11_check_for_sql_identifier_data_type_usage(ClusterInfo *cluster);
void report_extension_updates(ClusterInfo *cluster);
+void invalidate_spgist_indexes(ClusterInfo *cluster, bool check_mode);
+void invalidate_gin_indexes(ClusterInfo *cluster, bool check_mode);
+void invalidate_external_indexes(ClusterInfo *cluster, bool check_mode);
+
/* parallel.c */
void parallel_exec_prog(const char *log_file, const char *opt_log_file,
const char *fmt,...) pg_attribute_printf(3, 4);
@@ -463,3 +475,9 @@ void parallel_transfer_all_new_dbs(DbInfoArr *old_db_arr, DbInfoArr *new_db_arr
char *old_pgdata, char *new_pgdata,
char *old_tablespace);
bool reap_child(bool wait_for_child);
+
+/* segresize.c */
+void convert_xact(const char *olddir, const char *newdir);
+MultiXactOffset convert_multixact_offsets(const char *olddir, const char *newdir);
+void convert_multixact_members(const char *olddir, const char *newdir,
+ MultiXactOffset oldest_mxoff);
diff --git a/src/bin/pg_upgrade/relfilenumber.c b/src/bin/pg_upgrade/relfilenumber.c
index c3f3d6bc0a..678e74051e 100644
--- a/src/bin/pg_upgrade/relfilenumber.c
+++ b/src/bin/pg_upgrade/relfilenumber.c
@@ -16,7 +16,8 @@
#include "pg_upgrade.h"
static void transfer_single_new_db(FileNameMap *maps, int size, char *old_tablespace);
-static void transfer_relfile(FileNameMap *map, const char *type_suffix, bool vm_must_add_frozenbit);
+static void transfer_relfile(FileNameMap *map, const char *type_suffix,
+ bool vm_must_add_frozenbit, bool update_version);
/*
@@ -136,6 +137,7 @@ transfer_single_new_db(FileNameMap *maps, int size, char *old_tablespace)
{
int mapnum;
bool vm_must_add_frozenbit = false;
+ bool update_version = false;
/*
* Do we need to rewrite visibilitymap?
@@ -144,19 +146,28 @@ transfer_single_new_db(FileNameMap *maps, int size, char *old_tablespace)
new_cluster.controldata.cat_ver >= VISIBILITY_MAP_FROZEN_BIT_CAT_VER)
vm_must_add_frozenbit = true;
+ /*
+ * Need to update FSM and VM pages version to avoid lazy conversion.
+ */
+ if (old_cluster.controldata.cat_ver < new_cluster.controldata.cat_ver)
+ update_version = true;
+
for (mapnum = 0; mapnum < size; mapnum++)
{
if (old_tablespace == NULL ||
strcmp(maps[mapnum].old_tablespace, old_tablespace) == 0)
{
/* transfer primary file */
- transfer_relfile(&maps[mapnum], "", vm_must_add_frozenbit);
+ transfer_relfile(&maps[mapnum], "", vm_must_add_frozenbit,
+ update_version);
/*
* Copy/link any fsm and vm files, if they exist
*/
- transfer_relfile(&maps[mapnum], "_fsm", vm_must_add_frozenbit);
- transfer_relfile(&maps[mapnum], "_vm", vm_must_add_frozenbit);
+ transfer_relfile(&maps[mapnum], "_fsm", vm_must_add_frozenbit,
+ update_version);
+ transfer_relfile(&maps[mapnum], "_vm", vm_must_add_frozenbit,
+ update_version);
}
}
}
@@ -170,7 +181,8 @@ transfer_single_new_db(FileNameMap *maps, int size, char *old_tablespace)
* mode.
*/
static void
-transfer_relfile(FileNameMap *map, const char *type_suffix, bool vm_must_add_frozenbit)
+transfer_relfile(FileNameMap *map, const char *type_suffix,
+ bool vm_must_add_frozenbit, bool update_version)
{
char old_file[MAXPGPATH];
char new_file[MAXPGPATH];
@@ -235,7 +247,17 @@ transfer_relfile(FileNameMap *map, const char *type_suffix, bool vm_must_add_fro
/* Need to rewrite visibility map format */
pg_log(PG_VERBOSE, "rewriting \"%s\" to \"%s\"",
old_file, new_file);
- rewriteVisibilityMap(old_file, new_file, map->nspname, map->relname);
+ rewriteVisibilityMap(old_file, new_file, map->nspname,
+ map->relname, update_version);
+ }
+ else if ((update_version && strcmp(type_suffix, "_vm") == 0) ||
+ (update_version && strcmp(type_suffix, "_fsm") == 0))
+ {
+ /* Need to update pages version */
+ pg_log(PG_VERBOSE, "rewriting \"%s\" to \"%s\"",
+ old_file, new_file);
+ updateSegmentPagesVersion(old_file, new_file, map->nspname,
+ map->relname);
}
else
switch (user_opts.transfer_mode)
diff --git a/src/bin/pg_upgrade/segresize.c b/src/bin/pg_upgrade/segresize.c
new file mode 100644
index 0000000000..99e2c5ecde
--- /dev/null
+++ b/src/bin/pg_upgrade/segresize.c
@@ -0,0 +1,586 @@
+/*-------------------------------------------------------------------------
+ *
+ * segresize.c
+ * SLRU segment resize utility from 32bit to 64bit xid format
+ *
+ * Copyright (c) 2015-2022, Postgres Professional
+ *
+ * IDENTIFICATION
+ * src/bin/pg_upgrade/segresize.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres_fe.h"
+
+#include "pg_upgrade.h"
+#include "access/multixact.h"
+#include "access/transam.h"
+
+#define SLRU_PAGES_PER_SEGMENT_OLD 32
+#define SLRU_PAGES_PER_SEGMENT 32 /* Should be equal to value from slru.h */
+
+#define CLOG_BITS_PER_XACT 2
+#define CLOG_XACTS_PER_BYTE 4
+#define CLOG_XACTS_PER_PAGE (BLCKSZ * CLOG_XACTS_PER_BYTE)
+
+typedef uint32 MultiXactId32;
+typedef uint32 MultiXactOffset32;
+typedef uint32 TransactionId32;
+
+#define MaxTransactionId32 ((TransactionId32) 0xFFFFFFFF)
+#define MaxMultiXactId32 ((MultiXactId32) 0xFFFFFFFF)
+#define MaxMultiXactOffset32 ((MultiXactOffset32) 0xFFFFFFFF)
+
+#define MULTIXACT_OFFSETS_PER_PAGE_OLD (BLCKSZ / sizeof(MultiXactOffset32))
+#define MULTIXACT_OFFSETS_PER_PAGE (BLCKSZ / sizeof(MultiXactOffset))
+
+#define MXACT_MEMBER_FLAGS_PER_BYTE 1
+
+/* 64xid */
+#define MULTIXACT_FLAGBYTES_PER_GROUP 8
+#define MULTIXACT_MEMBERS_PER_MEMBERGROUP \
+ (MULTIXACT_FLAGBYTES_PER_GROUP * MXACT_MEMBER_FLAGS_PER_BYTE)
+/* size in bytes of a complete group */
+#define MULTIXACT_MEMBERGROUP_SIZE \
+ (sizeof(TransactionId) * MULTIXACT_MEMBERS_PER_MEMBERGROUP + MULTIXACT_FLAGBYTES_PER_GROUP)
+#define MULTIXACT_MEMBERGROUPS_PER_PAGE (BLCKSZ / MULTIXACT_MEMBERGROUP_SIZE)
+
+/* 32xid */
+#define MULTIXACT_FLAGBYTES_PER_GROUP_OLD 4
+#define MULTIXACT_MEMBERS_PER_MEMBERGROUP_OLD \
+ (MULTIXACT_FLAGBYTES_PER_GROUP_OLD * MXACT_MEMBER_FLAGS_PER_BYTE)
+/* size in bytes of a complete group */
+#define MULTIXACT_MEMBERGROUP_SIZE_OLD \
+ (sizeof(TransactionId32) * MULTIXACT_MEMBERS_PER_MEMBERGROUP_OLD + MULTIXACT_FLAGBYTES_PER_GROUP_OLD)
+#define MULTIXACT_MEMBERGROUPS_PER_PAGE_OLD (BLCKSZ / MULTIXACT_MEMBERGROUP_SIZE_OLD)
+#define MULTIXACT_MEMBERS_PER_PAGE_OLD \
+ (MULTIXACT_MEMBERGROUPS_PER_PAGE_OLD * MULTIXACT_MEMBERS_PER_MEMBERGROUP_OLD)
+
+typedef struct SLRUSegmentState
+{
+ const char *dir;
+ FILE *file;
+ int64 segno;
+ int64 pageno;
+ bool is_empty_segment;
+} SLRUSegmentState;
+
+static char *
+slru_filename_old(const char *path, int64 segno)
+{
+ Assert(segno <= PG_INT32_MAX);
+ return psprintf("%s/%04X", path, (int) segno);
+}
+
+static char *
+slru_filename_new(const char *path, int64 segno)
+{
+ return psprintf("%s/%012llX", path, (long long) segno);
+}
+
+static inline FILE *
+open_file(SLRUSegmentState *state,
+ char * (filename_fn)(const char *path, int64 segno),
+ char *mode, char *fatal_msg)
+{
+ char *filename = filename_fn(state->dir, state->segno);
+ FILE *fd = fopen(filename, mode);
+
+ if (!fd)
+ pg_fatal(fatal_msg, filename);
+
+ pfree(filename);
+
+ return fd;
+}
+
+static void
+close_file(SLRUSegmentState *state,
+ char * (filename_fn)(const char *path, int64 segno))
+{
+ if (state->file != NULL)
+ {
+ if (fclose(state->file) != 0)
+ pg_fatal("could not close file \"%s\": %m",
+ filename_fn(state->dir, state->segno));
+ state->file = NULL;
+ }
+}
+
+static inline int
+read_file(SLRUSegmentState *state, void *buf)
+{
+ size_t n = fread(buf, sizeof(char), BLCKSZ, state->file);
+
+ if (n != 0)
+ return n;
+
+ if (ferror(state->file))
+ pg_fatal("could not read file \"%s\": %m",
+ slru_filename_old(state->dir, state->segno));
+
+ if (!feof(state->file))
+ pg_fatal("unknown file read state \"%s\": %m",
+ slru_filename_old(state->dir, state->segno));
+
+ close_file(state, slru_filename_old);
+
+ return 0;
+}
+
+static int
+read_old_segment_page(SLRUSegmentState *state, void *buf, bool *is_empty)
+{
+ int n;
+
+ /* Open next segment file, if needed */
+ if (!state->file)
+ {
+ state->file = open_file(state, slru_filename_old, "rb",
+ "could not open source file \"%s\": %m");
+
+ /* Set position to the needed page */
+ if (fseek(state->file, state->pageno * BLCKSZ, SEEK_SET))
+ close_file(state, slru_filename_old);
+
+ /*
+ * Skip segment conversion if segment file doesn't exist.
+ * First segment file should exist in any case.
+ */
+ if (state->segno != 0)
+ state->is_empty_segment = true;
+ }
+
+ if (state->file)
+ {
+ /* Segment file does exist, read page from it */
+ state->is_empty_segment = false;
+
+ /* Try to read BLCKSZ bytes */
+ n = read_file(state, buf);
+ *is_empty = (n == 0);
+
+ /* Zeroing buf tail if needed */
+ if (n)
+ memset((char *) buf + n, 0, BLCKSZ - n);
+ }
+ else
+ {
+ n = state->is_empty_segment ?
+ BLCKSZ : /* Skip empty block at the end of segment */
+ 0; /* We reached the last segment */
+ *is_empty = true;
+
+ if (n)
+ memset((char *) buf, 0, BLCKSZ);
+ }
+
+ state->pageno++;
+
+ if (state->pageno >= SLRU_PAGES_PER_SEGMENT_OLD)
+ {
+ /* Start new segment */
+ state->segno++;
+ state->pageno = 0;
+ close_file(state, slru_filename_old);
+ }
+
+ return n;
+}
+
+static void
+write_new_segment_page(SLRUSegmentState *state, void *buf, bool is_empty)
+{
+ /*
+ * Create a new segment file if we still didn't. Creation is postponed
+ * until the first non-empty page is found. This helps not to create
+ * completely empty segments.
+ */
+ if (!state->file && !is_empty)
+ {
+ state->file = open_file(state, slru_filename_new, "wb",
+ "could not open target file \"%s\": %m");
+
+ /* Write zeroes to the previously skipped prefix */
+ if (state->pageno > 0)
+ {
+ char zerobuf[BLCKSZ] = {0};
+
+ for (int64 i = 0; i < state->pageno; i++)
+ {
+ if (fwrite(zerobuf, sizeof(char), BLCKSZ, state->file) != BLCKSZ)
+ pg_fatal("could not write file \"%s\": %m",
+ slru_filename_new(state->dir, state->segno));
+ }
+ }
+
+ }
+
+ /* Write page to the new segment (if it was created) */
+ if (state->file)
+ {
+ if (fwrite(buf, sizeof(char), BLCKSZ, state->file) != BLCKSZ)
+ pg_fatal("could not write file \"%s\": %m",
+ slru_filename_new(state->dir, state->segno));
+ }
+
+ state->pageno++;
+
+ /*
+ * Did we reach the maximum page number? Then close segment file and
+ * create a new one on the next iteration
+ */
+ if (state->pageno >= SLRU_PAGES_PER_SEGMENT)
+ {
+ state->segno++;
+ state->pageno = 0;
+ close_file(state, slru_filename_new);
+ }
+}
+
+/*
+ * Convert pg_xact segments.
+ */
+void
+convert_xact(const char *old_subdir, const char *new_subdir)
+{
+ SLRUSegmentState oldseg = {0};
+ SLRUSegmentState newseg = {0};
+ TransactionId oldest_xid = old_cluster.controldata.chkpnt_oldstxid;
+ TransactionId next_xid = old_cluster.controldata.chkpnt_nxtxid;
+ TransactionId xid;
+ int64 pageno;
+ char buf[BLCKSZ] = {0};
+
+ oldseg.dir = old_subdir;
+ newseg.dir = new_subdir;
+
+ pageno = oldest_xid / CLOG_XACTS_PER_PAGE;
+
+ oldseg.segno = pageno / SLRU_PAGES_PER_SEGMENT_OLD;
+ oldseg.pageno = pageno % SLRU_PAGES_PER_SEGMENT_OLD;
+
+ newseg.segno = pageno / SLRU_PAGES_PER_SEGMENT;
+ newseg.pageno = pageno % SLRU_PAGES_PER_SEGMENT;
+
+ if (next_xid < oldest_xid)
+ next_xid += (TransactionId) 1 << 32; /* wraparound */
+
+ /* Copy xid flags reading only needed segment pages */
+ for (xid = oldest_xid & ~(CLOG_XACTS_PER_PAGE - 1);
+ xid <= ((next_xid - 1) & ~(CLOG_XACTS_PER_PAGE - 1));
+ xid += CLOG_XACTS_PER_PAGE)
+ {
+ bool is_empty;
+
+ /* Handle possible segment wraparound */
+ if (oldseg.segno > MaxTransactionId32 / CLOG_XACTS_PER_PAGE / SLRU_PAGES_PER_SEGMENT_OLD)
+ {
+ pageno = (MaxTransactionId32 + 1) / CLOG_XACTS_PER_PAGE;
+
+ Assert(oldseg.segno == pageno / SLRU_PAGES_PER_SEGMENT_OLD);
+ Assert(!oldseg.pageno);
+ Assert(!oldseg.file);
+ oldseg.segno = 0;
+
+ Assert(newseg.segno == pageno / SLRU_PAGES_PER_SEGMENT);
+ Assert(!newseg.pageno);
+ Assert(!newseg.file);
+ newseg.segno = 0;
+ }
+
+ read_old_segment_page(&oldseg, buf, &is_empty);
+ write_new_segment_page(&newseg, buf, is_empty);
+ }
+
+ /* Release resources */
+ close_file(&oldseg, slru_filename_old);
+ close_file(&newseg, slru_filename_new);
+}
+
+static inline SLRUSegmentState
+create_slru_segment_state(MultiXactId mxid,
+ int offsets_per_page,
+ int pages_per_segment,
+ char *dir)
+{
+ SLRUSegmentState seg = {0};
+ int64 n;
+
+ n = mxid / offsets_per_page;
+ seg.pageno = n % pages_per_segment;
+ seg.segno = n / pages_per_segment;
+ seg.dir = dir;
+
+ return seg;
+}
+
+/*
+ * Convert pg_multixact/offsets segments and return oldest mxid offset.
+ */
+MultiXactOffset
+convert_multixact_offsets(const char *old_subdir, const char *new_subdir)
+{
+ SLRUSegmentState oldseg,
+ newseg;
+ MultiXactOffset32 oldbuf[MULTIXACT_OFFSETS_PER_PAGE_OLD] = {0};
+ MultiXactOffset newbuf[MULTIXACT_OFFSETS_PER_PAGE] = {0};
+ MultiXactOffset32 oldest_mxoff = 0;
+ MultiXactId oldest_mxid,
+ next_mxid,
+ mxid;
+ uint64 old_entry,
+ new_entry;
+ bool oldest_mxoff_known = false;
+
+ StaticAssertStmt((sizeof(oldbuf) == BLCKSZ && sizeof(newbuf) == BLCKSZ),
+ "buf should be BLCKSZ");
+
+ oldest_mxid = old_cluster.controldata.chkpnt_oldstMulti;
+
+ oldseg = create_slru_segment_state(oldest_mxid,
+ MULTIXACT_OFFSETS_PER_PAGE_OLD,
+ SLRU_PAGES_PER_SEGMENT_OLD,
+ psprintf("%s/%s", old_cluster.pgdata,
+ old_subdir));
+
+ newseg = create_slru_segment_state(oldest_mxid,
+ MULTIXACT_OFFSETS_PER_PAGE,
+ SLRU_PAGES_PER_SEGMENT,
+ psprintf("%s/%s", new_cluster.pgdata,
+ new_subdir));
+
+ old_entry = oldest_mxid % MULTIXACT_OFFSETS_PER_PAGE_OLD;
+ new_entry = oldest_mxid % MULTIXACT_OFFSETS_PER_PAGE;
+
+ next_mxid = old_cluster.controldata.chkpnt_nxtmulti;
+ if (next_mxid < oldest_mxid)
+ next_mxid += (MultiXactId) 1 << 32; /* wraparound */
+
+ prep_status("Converting old %s to new format", old_subdir);
+
+ /* Copy mxid offsets reading only needed segment pages */
+ for (mxid = oldest_mxid; mxid < next_mxid; old_entry = 0)
+ {
+ int oldlen;
+ bool is_empty;
+
+ /* Handle possible segment wraparound */
+ if (oldseg.segno > MaxMultiXactId32 / MULTIXACT_OFFSETS_PER_PAGE_OLD / SLRU_PAGES_PER_SEGMENT_OLD) /* 0xFFFF */
+ oldseg.segno = 0;
+
+ oldlen = read_old_segment_page(&oldseg, oldbuf, &is_empty);
+
+ if (oldlen == 0 || is_empty)
+ pg_fatal("cannot read page %lld from segment: %s\n",
+ (long long) oldseg.pageno,
+ slru_filename_old(oldseg.dir, oldseg.segno));
+
+ /* Save oldest mxid offset */
+ if (!oldest_mxoff_known)
+ {
+ oldest_mxoff = oldbuf[old_entry];
+ oldest_mxoff_known = true;
+ }
+
+ /* Skip wrapped-around invalid MultiXactIds */
+ if (mxid == (MultiXactId) 1 << 32)
+ {
+ Assert(oldseg.segno == 0);
+ Assert(oldseg.pageno == 1);
+ Assert(old_entry == 0);
+ mxid += FirstMultiXactId;
+ old_entry = FirstMultiXactId;
+ }
+
+ /* Copy entries to the new page */
+ for (; mxid < next_mxid && old_entry < MULTIXACT_OFFSETS_PER_PAGE_OLD;
+ mxid++, old_entry++)
+ {
+ MultiXactOffset mxoff = oldbuf[old_entry];
+
+ /* Handle possible offset wraparound (1 becomes 2^32) */
+ if (mxoff < oldest_mxoff)
+ mxoff += ((MultiXactOffset) 1 << 32) - 1;
+
+ /* Subtract oldest_mxoff, so new offsets will start from 1 */
+ newbuf[new_entry++] = mxoff - oldest_mxoff + 1;
+
+ if (new_entry >= MULTIXACT_OFFSETS_PER_PAGE)
+ {
+ /* Write new page */
+ write_new_segment_page(&newseg, newbuf, false);
+ new_entry = 0;
+ }
+ }
+ }
+
+ /* Write the last incomplete page */
+ if (new_entry > 0 || oldest_mxid == next_mxid)
+ {
+ memset(&newbuf[new_entry], 0,
+ sizeof(newbuf[0]) * (MULTIXACT_OFFSETS_PER_PAGE - new_entry));
+ write_new_segment_page(&newseg, newbuf, false);
+ }
+
+ /* Use next_mxoff as oldest_mxoff, if oldest_mxid == next_mxid */
+ if (!oldest_mxoff_known)
+ {
+ Assert(oldest_mxid == next_mxid);
+ oldest_mxoff = (MultiXactOffset) old_cluster.controldata.chkpnt_nxtmxoff;
+ }
+
+ /* Release resources */
+ close_file(&oldseg, slru_filename_old);
+ close_file(&newseg, slru_filename_new);
+
+ pfree((char *) oldseg.dir);
+ pfree((char *) newseg.dir);
+
+ check_ok();
+
+ return oldest_mxoff;
+}
+
+/*
+ * Convert pg_multixact/members segments, offsets will start from 1.
+ */
+void
+convert_multixact_members(const char *old_subdir, const char *new_subdir,
+ MultiXactOffset oldest_mxoff)
+{
+ MultiXactOffset next_mxoff,
+ mxoff;
+ SLRUSegmentState oldseg,
+ newseg;
+ char oldbuf[BLCKSZ] = {0},
+ newbuf[BLCKSZ] = {0};
+ int newgroup,
+ newmember;
+ char *newflag = newbuf;
+ TransactionId *newxid;
+ int oldidx,
+ newidx;
+
+ prep_status("Converting old %s to new format", old_subdir);
+
+ next_mxoff = (MultiXactOffset) old_cluster.controldata.chkpnt_nxtmxoff;
+ if (next_mxoff < oldest_mxoff)
+ next_mxoff += (MultiXactOffset) 1 << 32;
+
+ newxid = (TransactionId *) (newflag + MXACT_MEMBER_FLAGS_PER_BYTE * MULTIXACT_MEMBERS_PER_MEMBERGROUP);
+
+ /* Initialize old starting position */
+ oldidx = oldest_mxoff % MULTIXACT_MEMBERS_PER_PAGE_OLD;
+ oldseg = create_slru_segment_state(oldest_mxoff,
+ MULTIXACT_MEMBERS_PER_PAGE_OLD,
+ SLRU_PAGES_PER_SEGMENT_OLD,
+ psprintf("%s/%s", old_cluster.pgdata,
+ old_subdir));
+
+ /* Initialize empty new segment */
+ newseg = create_slru_segment_state(0, 1, 1,
+ psprintf("%s/%s", new_cluster.pgdata,
+ new_subdir));
+
+ /* Initialize new starting position (skip invalid zero offset) */
+ newgroup = 0;
+ newidx = 1;
+ newmember = 1;
+ newflag++;
+ newxid++;
+
+ /* Iterate through the original directory */
+ for (mxoff = oldest_mxoff; mxoff < next_mxoff; oldidx = 0)
+ {
+ bool old_is_empty;
+ int oldlen;
+ int ngroups;
+ int oldgroup;
+ int oldmember;
+
+ oldlen = read_old_segment_page(&oldseg, oldbuf, &old_is_empty);
+
+ if (oldlen == 0 || old_is_empty)
+ pg_fatal("cannot read page %lld from segment: %s\n",
+ (long long) oldseg.pageno,
+ slru_filename_old(oldseg.dir, oldseg.segno));
+
+ ngroups = oldlen / MULTIXACT_MEMBERGROUP_SIZE_OLD;
+
+ /* Iterate through old member groups */
+ for (oldgroup = oldidx / MULTIXACT_MEMBERS_PER_MEMBERGROUP_OLD,
+ oldmember = oldidx % MULTIXACT_MEMBERS_PER_MEMBERGROUP_OLD;
+ oldgroup < ngroups && mxoff < next_mxoff;
+ oldgroup++, oldmember = 0)
+ {
+ char *oldflag = (char *) oldbuf + oldgroup * MULTIXACT_MEMBERGROUP_SIZE_OLD;
+ TransactionId32 *oldxid = (TransactionId32 *) (oldflag + MULTIXACT_FLAGBYTES_PER_GROUP_OLD);
+
+ oldxid += oldmember;
+ oldflag += oldmember;
+
+ /* Iterate through old members */
+ for (int i = 0;
+ i < MULTIXACT_MEMBERS_PER_MEMBERGROUP_OLD && mxoff < next_mxoff;
+ i++)
+ {
+ /* Copy member's xid and flags to the new page */
+ *newflag++ = *oldflag++;
+ *newxid++ = (TransactionId) * oldxid++;
+
+ newidx++;
+ oldidx++;
+ mxoff++;
+
+ if (++newmember >= MULTIXACT_MEMBERS_PER_MEMBERGROUP)
+ {
+ /* Start next member group */
+ newmember = 0;
+
+ if (++newgroup >= MULTIXACT_MEMBERGROUPS_PER_PAGE)
+ {
+ /* Write current page and start new */
+ newgroup = 0;
+ newidx = 0;
+ write_new_segment_page(&newseg, newbuf, false);
+ memset(newbuf, 0, BLCKSZ);
+ }
+
+ newflag = (char *) newbuf + newgroup * MULTIXACT_MEMBERGROUP_SIZE;
+ newxid = (TransactionId *) (newflag + MXACT_MEMBER_FLAGS_PER_BYTE * MULTIXACT_MEMBERS_PER_MEMBERGROUP);
+ }
+
+ /* Handle offset wraparound */
+ if (mxoff > MaxMultiXactOffset32)
+ {
+ Assert(mxoff == (MultiXactOffset) 1 << 32);
+ Assert(oldseg.segno == MaxMultiXactOffset32 / MULTIXACT_MEMBERS_PER_PAGE_OLD / SLRU_PAGES_PER_SEGMENT_OLD);
+ Assert(oldseg.pageno == MaxMultiXactOffset32 / MULTIXACT_MEMBERS_PER_PAGE_OLD % SLRU_PAGES_PER_SEGMENT_OLD);
+ Assert(oldmember == MaxMultiXactOffset32 % MULTIXACT_MEMBERS_PER_PAGE_OLD);
+
+ /* Switch to segment 0000 */
+ close_file(&oldseg, slru_filename_old);
+ oldseg.segno = 0;
+ oldseg.pageno = 0;
+
+ oldidx = 1; /* skip invalid zero mxid offset */
+ }
+ }
+ }
+ }
+
+ /* Write last page, unless it is empty */
+ if (newflag > (char *) newbuf || oldest_mxoff == next_mxoff)
+ write_new_segment_page(&newseg, newbuf, false);
+
+ /* Release resources */
+ close_file(&oldseg, slru_filename_old);
+ close_file(&newseg, slru_filename_new);
+
+ pfree((char *) oldseg.dir);
+ pfree((char *) newseg.dir);
+
+ check_ok();
+}
diff --git a/src/bin/pg_upgrade/t/002_pg_upgrade.pl b/src/bin/pg_upgrade/t/002_pg_upgrade.pl
index add6ea9c34..9ebb8d1063 100644
--- a/src/bin/pg_upgrade/t/002_pg_upgrade.pl
+++ b/src/bin/pg_upgrade/t/002_pg_upgrade.pl
@@ -84,7 +84,7 @@ my $oldnode =
# increasing test runtime, run these tests with a custom setting.
# --allow-group-access and --wal-segsize have been added in v11.
my %node_params = ();
-$node_params{extra} = [ '--wal-segsize', '1', '--allow-group-access' ]
+$node_params{extra} = [ '--wal-segsize', '1', '--allow-group-access', '-x', '21000000000' ]
if $oldnode->pg_version >= 11;
$oldnode->init(%node_params);
$oldnode->start;
@@ -178,6 +178,14 @@ if (defined($ENV{oldinstall}))
'ran adapt script');
}
+$oldnode->safe_psql('regression',
+ "CREATE TABLE t1 (id SERIAL NOT NULL PRIMARY KEY, plt text, pln NUMERIC(8, 4));
+ INSERT INTO t1 (plt, pln) SELECT md5(random()::text), random() * 9999 FROM generate_series(1, 1000);");
+my $relfrozenxid = $oldnode->safe_psql('regression',
+ "SELECT relfrozenxid FROM pg_class WHERE relname = 't1';");
+my $relminmxid = $oldnode->safe_psql('regression',
+ "SELECT relminmxid FROM pg_class WHERE relname = 't1';");
+
# Take a dump before performing the upgrade as a base comparison. Note
# that we need to use pg_dumpall from the new node here.
my @dump_command = (
@@ -290,6 +298,16 @@ ok( !-d $newnode->data_dir . "/pg_upgrade_output.d",
$newnode->start;
+my $relfrozenxid_new = $newnode->safe_psql('regression',
+ "SELECT relfrozenxid FROM pg_class WHERE relname = 't1';");
+
+is($relfrozenxid_new, $relfrozenxid, 'old and new relfrozenxid match after pg_upgrade');
+
+my $relminmxid_new = $newnode->safe_psql('regression',
+ "SELECT relminmxid FROM pg_class WHERE relname = 't1';");
+
+is($relminmxid_new, $relminmxid, 'old and new relminmxid match after pg_upgrade');
+
# Check if there are any logs coming from pg_upgrade, that would only be
# retained on failure.
my $log_path = $newnode->data_dir . "/pg_upgrade_output.d";
diff --git a/src/bin/pg_upgrade/version.c b/src/bin/pg_upgrade/version.c
index dc19fc6ec8..20763af6d2 100644
--- a/src/bin/pg_upgrade/version.c
+++ b/src/bin/pg_upgrade/version.c
@@ -9,6 +9,7 @@
#include "postgres_fe.h"
+#include "access/transam.h"
#include "catalog/pg_class_d.h"
#include "fe_utils/string_utils.h"
#include "pg_upgrade.h"
@@ -242,19 +243,21 @@ old_9_6_check_for_unknown_data_type_usage(ClusterInfo *cluster)
}
/*
- * old_9_6_invalidate_hash_indexes()
- * 9.6 -> 10
- * Hash index binary format has changed from 9.6->10.0
+ * invalidate_indexes()
+ * Invalidates all indexes satisfying given predicate.
*/
-void
-old_9_6_invalidate_hash_indexes(ClusterInfo *cluster, bool check_mode)
+static void
+invalidate_indexes(ClusterInfo *cluster, bool check_mode,
+ const char *name, const char *pred)
{
int dbnum;
FILE *script = NULL;
bool found = false;
- char *output_path = "reindex_hash.sql";
+ char output_path[MAXPGPATH];
+
+ snprintf(output_path, sizeof(output_path), "reindex_%s.sql", name);
- prep_status("Checking for hash indexes");
+ prep_status("Checking for %s indexes", name);
for (dbnum = 0; dbnum < cluster->dbarr.ndbs; dbnum++)
{
@@ -267,9 +270,16 @@ old_9_6_invalidate_hash_indexes(ClusterInfo *cluster, bool check_mode)
DbInfo *active_db = &cluster->dbarr.dbs[dbnum];
PGconn *conn = connectToServer(cluster, active_db->db_name);
- /* find hash indexes */
- res = executeQueryOrDie(conn,
- "SELECT n.nspname, c.relname "
+
+ /*
+ * Find indexes satisfying predicate.
+ *
+ * System indexes (with oids < FirstNormalObjectId) are excluded from
+ * the search as they are recreated in the new cluster during initdb.
+ */
+ res = executeQueryOrDie(
+ conn,
+ "SELECT n.nspname, c.relname, i.indexrelid "
"FROM pg_catalog.pg_class c, "
" pg_catalog.pg_index i, "
" pg_catalog.pg_am a, "
@@ -277,8 +287,11 @@ old_9_6_invalidate_hash_indexes(ClusterInfo *cluster, bool check_mode)
"WHERE i.indexrelid = c.oid AND "
" c.relam = a.oid AND "
" c.relnamespace = n.oid AND "
- " a.amname = 'hash'"
- );
+ " i.indexrelid >= '%u'::pg_catalog.oid AND "
+ " %s "
+ "ORDER BY i.indexrelid ASC",
+ FirstNormalObjectId,
+ pred);
ntups = PQntuples(res);
i_nspname = PQfnumber(res, "nspname");
@@ -311,8 +324,14 @@ old_9_6_invalidate_hash_indexes(ClusterInfo *cluster, bool check_mode)
if (!check_mode && db_used)
{
- /* mark hash indexes as invalid */
- PQclear(executeQueryOrDie(conn,
+ /*
+ * Mark indexes satisfying predicate as invalid.
+ *
+ * System indexes (with oids < FirstNormalObjectId) are excluded
+ * from the search (see above).
+ */
+ PQclear(executeQueryOrDie(
+ conn,
"UPDATE pg_catalog.pg_index i "
"SET indisvalid = false "
"FROM pg_catalog.pg_class c, "
@@ -321,7 +340,10 @@ old_9_6_invalidate_hash_indexes(ClusterInfo *cluster, bool check_mode)
"WHERE i.indexrelid = c.oid AND "
" c.relam = a.oid AND "
" c.relnamespace = n.oid AND "
- " a.amname = 'hash'"));
+ " i.indexrelid >= '%u'::pg_catalog.oid AND "
+ " %s",
+ FirstNormalObjectId,
+ pred));
}
PQfinish(conn);
@@ -335,24 +357,37 @@ old_9_6_invalidate_hash_indexes(ClusterInfo *cluster, bool check_mode)
report_status(PG_WARNING, "warning");
if (check_mode)
pg_log(PG_WARNING, "\n"
- "Your installation contains hash indexes. These indexes have different\n"
+ "Your installation contains %s indexes. These indexes have different\n"
"internal formats between your old and new clusters, so they must be\n"
"reindexed with the REINDEX command. After upgrading, you will be given\n"
- "REINDEX instructions.");
+ "REINDEX instructions.",
+ name);
else
pg_log(PG_WARNING, "\n"
- "Your installation contains hash indexes. These indexes have different\n"
+ "Your installation contains %s indexes. These indexes have different\n"
"internal formats between your old and new clusters, so they must be\n"
"reindexed with the REINDEX command. The file\n"
" %s\n"
"when executed by psql by the database superuser will recreate all invalid\n"
"indexes; until then, none of these indexes will be used.",
+ name,
output_path);
}
else
check_ok();
}
+/*
+ * old_9_6_invalidate_hash_indexes()
+ * 9.6 -> 10
+ * Hash index binary format has changed from 9.6->10.0
+ */
+void
+old_9_6_invalidate_hash_indexes(ClusterInfo *cluster, bool check_mode)
+{
+ invalidate_indexes(cluster, check_mode, "hash", "a.amname = 'hash'");
+}
+
/*
* old_11_check_for_sql_identifier_data_type_usage()
* 11 -> 12
@@ -459,3 +494,36 @@ report_extension_updates(ClusterInfo *cluster)
else
check_ok();
}
+
+/*
+ * invalidate_spgist_indexes()
+ * 32bit -> 64bit
+ * SP-GIST contains xids.
+ */
+void
+invalidate_spgist_indexes(ClusterInfo *cluster, bool check_mode)
+{
+ invalidate_indexes(cluster, check_mode, "spgist", "a.amname = 'spgist'");
+}
+
+/*
+ * invalidate_gin_indexes()
+ * 32bit -> 64bit
+ * Gin indexes contains xids in deleted pages.
+ */
+void
+invalidate_gin_indexes(ClusterInfo *cluster, bool check_mode)
+{
+ invalidate_indexes(cluster, check_mode, "gin", "a.amname = 'gin'");
+}
+
+/*
+ * invalidate_external_indexes()
+ * Generate script to REINDEX non standard external indexes (like RUM etc)
+ */
+void
+invalidate_external_indexes(ClusterInfo *cluster, bool check_mode)
+{
+ invalidate_indexes(cluster, check_mode, "external",
+ "NOT a.amname IN ('btree', 'hash', 'gist', 'gin', 'spgist', 'brin')");
+}
diff --git a/src/bin/pg_verifybackup/t/003_corruption.pl b/src/bin/pg_verifybackup/t/003_corruption.pl
index f1ceb4a4bd..f4109471ac 100644
--- a/src/bin/pg_verifybackup/t/003_corruption.pl
+++ b/src/bin/pg_verifybackup/t/003_corruption.pl
@@ -174,7 +174,7 @@ sub mutilate_extra_tablespace_file
sub mutilate_missing_file
{
my ($backup_path) = @_;
- my $pathname = "$backup_path/pg_xact/000000000000";
+ my $pathname = "$backup_path/pg_xact/000000123000";
unlink($pathname) || die "$pathname: $!";
return;
}
diff --git a/src/bin/pg_waldump/pg_waldump.c b/src/bin/pg_waldump/pg_waldump.c
index 5a82cfdab2..cb11ca27f5 100644
--- a/src/bin/pg_waldump/pg_waldump.c
+++ b/src/bin/pg_waldump/pg_waldump.c
@@ -919,7 +919,7 @@ main(int argc, char **argv)
config.filter_by_fpw = true;
break;
case 'x':
- if (sscanf(optarg, "%u", &config.filter_by_xid) != 1)
+ if (sscanf(optarg, "%" INT64_MODIFIER "u", &config.filter_by_xid) != 1)
{
pg_log_error("invalid transaction ID specification: \"%s\"",
optarg);
diff --git a/src/include/access/clog.h b/src/include/access/clog.h
index 543f2e2643..73bc172309 100644
--- a/src/include/access/clog.h
+++ b/src/include/access/clog.h
@@ -31,7 +31,7 @@ typedef int XidStatus;
typedef struct xl_clog_truncate
{
- int pageno;
+ int64 pageno;
TransactionId oldestXact;
Oid oldestXactDb;
} xl_clog_truncate;
diff --git a/src/include/access/ginblock.h b/src/include/access/ginblock.h
index 9347f464f3..4db042c319 100644
--- a/src/include/access/ginblock.h
+++ b/src/include/access/ginblock.h
@@ -133,8 +133,15 @@ typedef struct GinMetaPageData
* We should reclaim deleted page only once every transaction started before
* its deletion is over.
*/
-#define GinPageGetDeleteXid(page) ( ((PageHeader) (page))->pd_prune_xid )
-#define GinPageSetDeleteXid(page, xid) ( ((PageHeader) (page))->pd_prune_xid = xid)
+#define GinPageGetDeleteXid(page) ( \
+ (((PageHeader) (page))->pd_upper == BLCKSZ - sizeof(GinPageOpaqueData) - sizeof(TransactionId)) ? \
+ *((TransactionId *) ((char *) (page) + BLCKSZ - sizeof(GinPageOpaqueData) - sizeof(TransactionId))) : \
+ InvalidTransactionId )
+#define GinPageSetDeleteXid(page, xid) \
+ do { \
+ ((PageHeader) (page))->pd_upper = BLCKSZ - sizeof(GinPageOpaqueData) - sizeof(TransactionId); \
+ *((TransactionId *) ((char *) (page) + BLCKSZ - sizeof(GinPageOpaqueData) - sizeof(TransactionId))) = xid; \
+ } while (false)
extern bool GinPageIsRecyclable(Page page);
/*
diff --git a/src/include/access/gist.h b/src/include/access/gist.h
index a3337627b8..41de3052fd 100644
--- a/src/include/access/gist.h
+++ b/src/include/access/gist.h
@@ -223,7 +223,7 @@ GistPageGetDeleteXid(Page page)
return ((GISTDeletedPageContents *) PageGetContents(page))->deleteXid;
}
else
- return FullTransactionIdFromEpochAndXid(0, FirstNormalTransactionId);
+ return FullTransactionIdFromXid(FirstNormalTransactionId);
}
/*
diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h
index 9dab35551e..ed1e7e44f3 100644
--- a/src/include/access/heapam.h
+++ b/src/include/access/heapam.h
@@ -146,6 +146,8 @@ extern void ReleaseBulkInsertStatePin(BulkInsertState bistate);
extern void heap_insert(Relation relation, HeapTuple tup, CommandId cid,
int options, BulkInsertState bistate);
+extern void rewrite_page_prepare_for_xid(Page page, HeapTuple tup,
+ bool is_toast);
extern void heap_multi_insert(Relation relation, struct TupleTableSlot **slots,
int ntuples, CommandId cid, int options,
BulkInsertState bistate);
@@ -164,14 +166,14 @@ extern TM_Result heap_lock_tuple(Relation relation, HeapTuple tuple,
Buffer *buffer, struct TM_FailureData *tmfd);
extern void heap_inplace_update(Relation relation, HeapTuple tuple);
-extern bool heap_freeze_tuple(HeapTupleHeader tuple,
+extern bool heap_freeze_tuple(HeapTuple tuple,
TransactionId relfrozenxid, TransactionId relminmxid,
TransactionId cutoff_xid, TransactionId cutoff_multi);
-extern bool heap_tuple_would_freeze(HeapTupleHeader tuple, TransactionId cutoff_xid,
+extern bool heap_tuple_would_freeze(HeapTuple htup, TransactionId cutoff_xid,
MultiXactId cutoff_multi,
TransactionId *relfrozenxid_out,
MultiXactId *relminmxid_out);
-extern bool heap_tuple_needs_eventual_freeze(HeapTupleHeader tuple);
+extern bool heap_tuple_needs_eventual_freeze(HeapTuple htup);
extern void simple_heap_insert(Relation relation, HeapTuple tup);
extern void simple_heap_delete(Relation relation, ItemPointer tid);
@@ -189,12 +191,16 @@ extern int heap_page_prune(Relation relation, Buffer buffer,
TransactionId old_snap_xmin,
TimestampTz old_snap_ts,
int *nnewlpdead,
- OffsetNumber *off_loc);
+ OffsetNumber *off_loc,
+ bool repairFragmentation);
extern void heap_page_prune_execute(Buffer buffer,
OffsetNumber *redirected, int nredirected,
OffsetNumber *nowdead, int ndead,
- OffsetNumber *nowunused, int nunused);
-extern void heap_get_root_tuples(Page page, OffsetNumber *root_offsets);
+ OffsetNumber *nowunused, int nunused,
+ bool repairFragmentation,
+ bool is_toast);
+extern void heap_get_root_tuples(Relation relation, Buffer buffer, Page page,
+ OffsetNumber *root_offsets);
/* in heap/vacuumlazy.c */
struct VacuumParams;
@@ -212,7 +218,7 @@ extern HTSV_Result HeapTupleSatisfiesVacuumHorizon(HeapTuple htup, Buffer buffer
TransactionId *dead_after);
extern void HeapTupleSetHintBits(HeapTupleHeader tuple, Buffer buffer,
uint16 infomask, TransactionId xid);
-extern bool HeapTupleHeaderIsOnlyLocked(HeapTupleHeader tuple);
+extern bool HeapTupleIsOnlyLocked(HeapTuple htup);
extern bool XidInMVCCSnapshot(TransactionId xid, Snapshot snapshot);
extern bool HeapTupleIsSurelyDead(HeapTuple htup,
struct GlobalVisState *vistest);
diff --git a/src/include/access/heapam_xlog.h b/src/include/access/heapam_xlog.h
index 34220d93cf..54e3e3759a 100644
--- a/src/include/access/heapam_xlog.h
+++ b/src/include/access/heapam_xlog.h
@@ -59,6 +59,8 @@
#define XLOG_HEAP2_LOCK_UPDATED 0x60
#define XLOG_HEAP2_NEW_CID 0x70
+#define XLOG_HEAP3_BASE_SHIFT 0x00
+
/*
* xl_heap_insert/xl_heap_multi_insert flag values, 8 bits are available.
*/
@@ -98,6 +100,7 @@
#define XLH_DELETE_CONTAINS_OLD_KEY (1<<2)
#define XLH_DELETE_IS_SUPER (1<<3)
#define XLH_DELETE_IS_PARTITION_MOVE (1<<4)
+#define XLH_DELETE_PAGE_ON_TOAST_RELATION (1<<5)
/* convenience macro for checking whether any form of old tuple was logged */
#define XLH_DELETE_CONTAINS_OLD \
@@ -240,15 +243,19 @@ typedef struct xl_heap_update
*
* Acquires a full cleanup lock.
*/
+#define XLH_PRUNE_ON_TOAST_RELATION 0x01
+#define XLH_PRUNE_REPAIR_FRAGMENTATION 0x02
+
typedef struct xl_heap_prune
{
TransactionId latestRemovedXid;
uint16 nredirected;
uint16 ndead;
+ uint8 flags;
/* OFFSET NUMBERS are in the block reference 0 */
} xl_heap_prune;
-#define SizeOfHeapPrune (offsetof(xl_heap_prune, ndead) + sizeof(uint16))
+#define SizeOfHeapPrune (offsetof(xl_heap_prune, flags) + sizeof(uint8))
/*
* The vacuum page record is similar to the prune record, but can only mark
@@ -336,13 +343,16 @@ typedef struct xl_heap_freeze_tuple
* Backup block 0's data contains an array of xl_heap_freeze_tuple structs,
* one for each tuple.
*/
+#define XLH_FREEZE_PAGE_ON_TOAST_RELATION 0x01
+
typedef struct xl_heap_freeze_page
{
TransactionId cutoff_xid;
uint16 ntuples;
+ uint8 flags;
} xl_heap_freeze_page;
-#define SizeOfHeapFreezePage (offsetof(xl_heap_freeze_page, ntuples) + sizeof(uint16))
+#define SizeOfHeapFreezePage (offsetof(xl_heap_freeze_page, flags) + sizeof(uint8))
/*
* This is what we need to know about setting a visibility map bit
@@ -389,7 +399,19 @@ typedef struct xl_heap_rewrite_mapping
XLogRecPtr start_lsn; /* Insert LSN at begin of rewrite */
} xl_heap_rewrite_mapping;
-extern void HeapTupleHeaderAdvanceLatestRemovedXid(HeapTupleHeader tuple,
+#define XLH_BASE_SHIFT_ON_TOAST_RELATION 0x01
+
+/* shift the base of xids on heap page */
+typedef struct xl_heap_base_shift
+{
+ int64 delta; /* delta value to shift the base */
+ bool multi; /* true to shift multixact base */
+ uint8 flags;
+} xl_heap_base_shift;
+
+#define SizeOfHeapBaseShift (offsetof(xl_heap_base_shift, flags) + sizeof(uint8))
+
+extern void HeapTupleHeaderAdvanceLatestRemovedXid(HeapTuple tuple,
TransactionId *latestRemovedXid);
extern void heap_redo(XLogReaderState *record);
@@ -399,12 +421,15 @@ extern void heap_mask(char *pagedata, BlockNumber blkno);
extern void heap2_redo(XLogReaderState *record);
extern void heap2_desc(StringInfo buf, XLogReaderState *record);
extern const char *heap2_identify(uint8 info);
+extern void heap3_redo(XLogReaderState *record);
+extern void heap3_desc(StringInfo buf, XLogReaderState *record);
+extern const char *heap3_identify(uint8 info);
extern void heap_xlog_logical_rewrite(XLogReaderState *r);
extern XLogRecPtr log_heap_freeze(Relation reln, Buffer buffer,
TransactionId cutoff_xid, xl_heap_freeze_tuple *tuples,
int ntuples);
-extern bool heap_prepare_freeze_tuple(HeapTupleHeader tuple,
+extern bool heap_prepare_freeze_tuple(HeapTuple htup,
TransactionId relfrozenxid,
TransactionId relminmxid,
TransactionId cutoff_xid,
@@ -413,8 +438,11 @@ extern bool heap_prepare_freeze_tuple(HeapTupleHeader tuple,
bool *totally_frozen,
TransactionId *relfrozenxid_out,
MultiXactId *relminmxid_out);
-extern void heap_execute_freeze_tuple(HeapTupleHeader tuple,
+extern void heap_execute_freeze_tuple(HeapTuple tuple,
xl_heap_freeze_tuple *frz);
+extern void heap_execute_freeze_tuple_page(Page page, HeapTupleHeader htup,
+ xl_heap_freeze_tuple *xlrec_tp,
+ bool is_toast);
extern XLogRecPtr log_heap_visible(RelFileLocator rlocator, Buffer heap_buffer,
Buffer vm_buffer, TransactionId cutoff_xid, uint8 vmflags);
diff --git a/src/include/access/heaptoast.h b/src/include/access/heaptoast.h
index a75699054a..3916a5f05e 100644
--- a/src/include/access/heaptoast.h
+++ b/src/include/access/heaptoast.h
@@ -20,10 +20,19 @@
/*
* Find the maximum size of a tuple if there are to be N tuples per page.
*/
+#if MAXIMUM_ALIGNOF == 8
#define MaximumBytesPerTuple(tuplesPerPage) \
MAXALIGN_DOWN((BLCKSZ - \
- MAXALIGN(SizeOfPageHeaderData + (tuplesPerPage) * sizeof(ItemIdData))) \
+ MAXALIGN(SizeOfPageHeaderData + (tuplesPerPage) * sizeof(ItemIdData)) - MAXALIGN(sizeof(HeapPageSpecialData))) \
/ (tuplesPerPage))
+#elif MAXIMUM_ALIGNOF == 4
+#define MaximumBytesPerTuple(tuplesPerPage) \
+ MAXALIGN_DOWN((BLCKSZ - \
+ MAXALIGN(SizeOfPageHeaderData + (tuplesPerPage) * sizeof(ItemIdData)) - MAXALIGN(sizeof(ToastPageSpecialData))) \
+ / (tuplesPerPage))
+#else
+#error "unknown arch bitness"
+#endif
/*
* These symbols control toaster activation. If a tuple is larger than
diff --git a/src/include/access/htup.h b/src/include/access/htup.h
index a4bc7256ed..ae61f92471 100644
--- a/src/include/access/htup.h
+++ b/src/include/access/htup.h
@@ -54,6 +54,12 @@ typedef MinimalTupleData *MinimalTuple;
* this can't be told apart from case #1 by inspection; code setting up
* or destroying this representation has to know what it's doing.
*
+ * t_xmin and t_xmax are TransactionId values stored in heap tuple header.
+ * Normally they are calculated from ShortTransactionId-sized on-disk tuple
+ * xmin/xmax representation:
+ * t_data->t_choice.t_heap.t_xmin/t_data->t_choice.t_heap.t_xmin
+ * and pd_xid_base and pd_multi_base commmon values for all tuples on a page.
+ *
* t_len should always be valid, except in the pointer-to-nothing case.
* t_self and t_tableOid should be valid if the HeapTupleData points to
* a disk buffer, or if it represents a copy of a tuple on disk. They
@@ -61,10 +67,12 @@ typedef MinimalTupleData *MinimalTuple;
*/
typedef struct HeapTupleData
{
+ TransactionId t_xmin; /* calculated tuple xmin */
+ TransactionId t_xmax; /* calculated tuple xmax */
uint32 t_len; /* length of *t_data */
ItemPointerData t_self; /* SelfItemPointer */
Oid t_tableOid; /* table the tuple came from */
-#define FIELDNO_HEAPTUPLEDATA_DATA 3
+#define FIELDNO_HEAPTUPLEDATA_DATA 5
HeapTupleHeader t_data; /* -> tuple header and data */
} HeapTupleData;
@@ -78,12 +86,12 @@ typedef HeapTupleData *HeapTuple;
#define HeapTupleIsValid(tuple) PointerIsValid(tuple)
/* HeapTupleHeader functions implemented in utils/time/combocid.c */
-extern CommandId HeapTupleHeaderGetCmin(HeapTupleHeader tup);
-extern CommandId HeapTupleHeaderGetCmax(HeapTupleHeader tup);
-extern void HeapTupleHeaderAdjustCmax(HeapTupleHeader tup,
+extern CommandId HeapTupleGetCmin(HeapTuple tup);
+extern CommandId HeapTupleGetCmax(HeapTuple tup);
+extern void HeapTupleHeaderAdjustCmax(HeapTuple tup,
CommandId *cmax, bool *iscombo);
/* Prototype for HeapTupleHeader accessors in heapam.c */
-extern TransactionId HeapTupleGetUpdateXid(HeapTupleHeader tuple);
+extern TransactionId HeapTupleGetUpdateXid(HeapTuple tuple);
#endif /* HTUP_H */
diff --git a/src/include/access/htup_details.h b/src/include/access/htup_details.h
index 9561c835f2..8bd9cfb82c 100644
--- a/src/include/access/htup_details.h
+++ b/src/include/access/htup_details.h
@@ -19,6 +19,7 @@
#include "access/tupdesc.h"
#include "access/tupmacs.h"
#include "storage/bufpage.h"
+#include "storage/bufmgr.h"
/*
* MaxTupleAttributeNumber limits the number of (user) columns in a tuple.
@@ -120,13 +121,13 @@
typedef struct HeapTupleFields
{
- TransactionId t_xmin; /* inserting xact ID */
- TransactionId t_xmax; /* deleting or locking xact ID */
+ ShortTransactionId t_xmin; /* inserting xact ID */
+ ShortTransactionId t_xmax; /* deleting or locking xact ID */
union
{
CommandId t_cid; /* inserting or deleting command ID, or both */
- TransactionId t_xvac; /* old-style VACUUM FULL xact ID */
+ ShortTransactionId t_xvac; /* old-style VACUUM FULL xact ID */
} t_field3;
} HeapTupleFields;
@@ -222,7 +223,7 @@ struct HeapTupleHeaderData
* HEAP_XMAX_LOCK_ONLY bit is set; or, for pg_upgrade's sake, if the Xmax is
* not a multi and the EXCL_LOCK bit is set.
*
- * See also HeapTupleHeaderIsOnlyLocked, which also checks for a possible
+ * See also HeapTupleIsOnlyLocked, which also checks for a possible
* aborted updater transaction.
*
* Beware of multiple evaluations of the argument.
@@ -298,27 +299,85 @@ struct HeapTupleHeaderData
*/
/*
- * HeapTupleHeaderGetRawXmin returns the "raw" xmin field, which is the xid
+ * Copy base values for xid and multixacts from one heap tuple to heap tuple.
+ * Should be called on tuple copy or making desc tuple on the base on src tuple
+ * saving visibility information.
+ */
+#define HeapTupleCopyBase(dest, src) \
+{ \
+ (dest)->t_xmin = (src)->t_xmin; \
+ (dest)->t_xmax = (src)->t_xmax; \
+}
+
+/*
+ * Set base values for tuple xids/multixacts to zero. Used when visibility
+ * infromation is negligible or will be set later.
+ */
+#define HeapTupleSetZeroBase(tup) \
+{ \
+ (tup)->t_xmin = 0; \
+ (tup)->t_xmax = 0; \
+}
+
+/*
+ * Copy HeapTupleHeader xmin/xmax in raw way ???
+ */
+#define HeapTupleCopyHeaderXids(tup) \
+{ \
+ (tup)->t_xmin = (tup)->t_data->t_choice.t_heap.t_xmin; \
+ (tup)->t_xmax = (tup)->t_data->t_choice.t_heap.t_xmax; \
+}
+
+/*
+ * Macros for accessing "double xmax". On pg_upgraded instances, it might
+ * happend that we can't fit new special area to the page. But we still
+ * might neep to write xmax of tuples for updates and deletes. The trick is
+ * that we actually don't need xmin field. After pg_upgrade (wich implies
+ * restart) no insertions went to this page yet (otherwise special area could
+ * fit). So, if tuple is visible (othewise it would be deleted), then it's
+ * visible for everybody. Thus, t_xmin isn't needed. Therefore, we can use
+ * both t_xmin and t_xmax to store 64-bit xmax.
+ *
+ * See heap_convert.c for details.
+ */
+#define HeapTupleHeaderGetDoubleXmax(tup) \
+ ((TransactionId)(tup)->t_choice.t_heap.t_xmax + \
+ ((TransactionId)(tup)->t_choice.t_heap.t_xmin << 32))
+
+#define HeapTupleHeaderSetDoubleXmax(tup, xid) \
+do { \
+ (tup)->t_choice.t_heap.t_xmax = (TransactionId) (xid) & 0xFFFFFFFF; \
+ (tup)->t_choice.t_heap.t_xmin = ((TransactionId) (xid) >> 32) & 0xFFFFFFFF; \
+} while (0)
+
+/*
+ * HeapTupleGetRawXmin returns the "raw" xmin field, which is the xid
* originally used to insert the tuple. However, the tuple might actually
* be frozen (via HeapTupleHeaderSetXminFrozen) in which case the tuple's xmin
* is visible to every snapshot. Prior to PostgreSQL 9.4, we actually changed
* the xmin to FrozenTransactionId, and that value may still be encountered
* on disk.
*/
-#define HeapTupleHeaderGetRawXmin(tup) \
+#define HeapTupleGetRawXmin(tup) ((tup)->t_xmin)
+
+#define HeapTupleGetXmin(tup) \
( \
- (tup)->t_choice.t_heap.t_xmin \
+ HeapTupleHeaderXminFrozen((tup)->t_data) ? \
+ FrozenTransactionId : HeapTupleGetRawXmin(tup) \
)
-#define HeapTupleHeaderGetXmin(tup) \
+#define HeapTupleSetXmin(tup, xid) ((tup)->t_xmin = (xid))
+
+#define HeapTupleHeaderSetXmin(page, tup) \
( \
- HeapTupleHeaderXminFrozen(tup) ? \
- FrozenTransactionId : HeapTupleHeaderGetRawXmin(tup) \
+ AssertMacro(!HeapPageIsDoubleXmax(page)), \
+ (tup)->t_data->t_choice.t_heap.t_xmin = NormalTransactionIdToShort(HeapPageGetSpecial(page)->pd_xid_base, (tup)->t_xmin) \
)
-#define HeapTupleHeaderSetXmin(tup, xid) \
+#define ToastTupleHeaderSetXmin(page, tup) \
( \
- (tup)->t_choice.t_heap.t_xmin = (xid) \
+ AssertMacro(!HeapPageIsDoubleXmax(page)), \
+ (tup)->t_data->t_choice.t_heap.t_xmin = NormalTransactionIdToShort(ToastPageGetSpecial(page)->pd_xid_base, (tup)->t_xmin) \
)
#define HeapTupleHeaderXminCommitted(tup) \
@@ -337,18 +396,6 @@ struct HeapTupleHeaderData
((tup)->t_infomask & (HEAP_XMIN_FROZEN)) == HEAP_XMIN_FROZEN \
)
-#define HeapTupleHeaderSetXminCommitted(tup) \
-( \
- AssertMacro(!HeapTupleHeaderXminInvalid(tup)), \
- ((tup)->t_infomask |= HEAP_XMIN_COMMITTED) \
-)
-
-#define HeapTupleHeaderSetXminInvalid(tup) \
-( \
- AssertMacro(!HeapTupleHeaderXminCommitted(tup)), \
- ((tup)->t_infomask |= HEAP_XMIN_INVALID) \
-)
-
#define HeapTupleHeaderSetXminFrozen(tup) \
( \
AssertMacro(!HeapTupleHeaderXminInvalid(tup)), \
@@ -362,30 +409,67 @@ struct HeapTupleHeaderData
* to resolve the MultiXactId if necessary. This might involve multixact I/O,
* so it should only be used if absolutely necessary.
*/
-#define HeapTupleHeaderGetUpdateXid(tup) \
+#define HeapTupleGetUpdateXidAny(tup) \
( \
- (!((tup)->t_infomask & HEAP_XMAX_INVALID) && \
- ((tup)->t_infomask & HEAP_XMAX_IS_MULTI) && \
- !((tup)->t_infomask & HEAP_XMAX_LOCK_ONLY)) ? \
+ (!((tup)->t_data->t_infomask & HEAP_XMAX_INVALID) && \
+ ((tup)->t_data->t_infomask & HEAP_XMAX_IS_MULTI) && \
+ !((tup)->t_data->t_infomask & HEAP_XMAX_LOCK_ONLY)) ? \
HeapTupleGetUpdateXid(tup) \
: \
- HeapTupleHeaderGetRawXmax(tup) \
+ HeapTupleGetRawXmax(tup) \
)
-#define HeapTupleHeaderGetRawXmax(tup) \
+#define HeapTupleGetRawXmax(tup) ((tup)->t_xmax)
+
+#define HeapTupleHeaderGetRawXmax(page, tup) \
( \
- (tup)->t_choice.t_heap.t_xmax \
+ HeapPageIsDoubleXmax(page) ? \
+ HeapTupleHeaderGetDoubleXmax(tup) : \
+ ShortTransactionIdToNormal( \
+ ((tup)->t_infomask & HEAP_XMAX_IS_MULTI) ? HeapPageGetSpecial(page)->pd_multi_base : HeapPageGetSpecial(page)->pd_xid_base, \
+ (tup)->t_choice.t_heap.t_xmax) \
)
-#define HeapTupleHeaderSetXmax(tup, xid) \
+#define ToastTupleHeaderGetRawXmax(page, tup) \
( \
- (tup)->t_choice.t_heap.t_xmax = (xid) \
+ HeapPageIsDoubleXmax(page) ? \
+ HeapTupleHeaderGetDoubleXmax(tup) : \
+ ShortTransactionIdToNormal( \
+ ToastPageGetSpecial(page)->pd_xid_base, \
+ (tup)->t_choice.t_heap.t_xmax) \
)
+#define HeapTupleSetXmax(tup, xid) \
+do { \
+ (tup)->t_xmax = (xid); \
+} while (0)
+
+#define HeapTupleHeaderSetXmax(page, tup) \
+do { \
+ if (HeapPageIsDoubleXmax(page)) \
+ HeapTupleHeaderSetDoubleXmax((tup)->t_data, (tup)->t_xmax); \
+ else \
+ (tup)->t_data->t_choice.t_heap.t_xmax = \
+ NormalTransactionIdToShort( \
+ ((tup)->t_data->t_infomask & HEAP_XMAX_IS_MULTI) ? HeapPageGetSpecial(page)->pd_multi_base : HeapPageGetSpecial(page)->pd_xid_base, \
+ ((tup)->t_xmax)); \
+} while (0)
+
+#define ToastTupleHeaderSetXmax(page, tup) \
+do { \
+ if (HeapPageIsDoubleXmax(page)) \
+ HeapTupleHeaderSetDoubleXmax((tup)->t_data, (tup)->t_xmax); \
+ else \
+ (tup)->t_data->t_choice.t_heap.t_xmax = \
+ NormalTransactionIdToShort( \
+ ToastPageGetSpecial(page)->pd_xid_base, \
+ ((tup)->t_xmax)); \
+} while (0)
+
/*
* HeapTupleHeaderGetRawCommandId will give you what's in the header whether
- * it is useful or not. Most code should use HeapTupleHeaderGetCmin or
- * HeapTupleHeaderGetCmax instead, but note that those Assert that you can
+ * it is useful or not. Most code should use HeapTupleGetCmin or
+ * HeapTupleGetCmax instead, but note that those Assert that you can
* get a legitimate result, ie you are in the originating transaction!
*/
#define HeapTupleHeaderGetRawCommandId(tup) \
@@ -555,8 +639,16 @@ do { \
* an otherwise-empty page can indeed hold a tuple of this size. Because
* ItemIds and tuples have different alignment requirements, don't assume that
* you can, say, fit 2 tuples of size MaxHeapTupleSize/2 on the same page.
+ *
+ * On shift to 64-bit XIDs MaxHeapTupleSize decreased by sizeof(HeapPageSpecialData).
+ * Extant tuples with length over new MaxHeapTupleSize are inherited on DoubleXmax
+ * pages. They could be read, but can not be updated unless their length decreases
+ * to fit MaxHeapTupleSize. Vacuum full will also copy these double xmax pages
+ * without change.
*/
-#define MaxHeapTupleSize (BLCKSZ - MAXALIGN(SizeOfPageHeaderData + sizeof(ItemIdData)))
+
+#define MaxHeapTupleSize (BLCKSZ - MAXALIGN(SizeOfPageHeaderData + sizeof(ItemIdData)) - MAXALIGN(sizeof(HeapPageSpecialData)))
+#define MaxHeapTupleSize_32 (BLCKSZ - MAXALIGN(SizeOfPageHeaderData + sizeof(ItemIdData)))
#define MinHeapTupleSize MAXALIGN(SizeofHeapTupleHeader)
/*
@@ -690,6 +782,79 @@ struct MinimalTupleData
#define HeapTupleClearHeapOnly(tuple) \
HeapTupleHeaderClearHeapOnly((tuple)->t_data)
+static inline void
+HeapTupleCopyXminFromPage(HeapTuple tup, Page page, bool is_toast)
+{
+ TransactionId base;
+ ShortTransactionId xmin; /* short xmin from tuple header */
+
+ if (HeapTupleHeaderXminFrozen(tup->t_data))
+ {
+ tup->t_xmin = FrozenTransactionId;
+ return;
+ }
+
+ xmin = tup->t_data->t_choice.t_heap.t_xmin;
+
+ if (!TransactionIdIsNormal(xmin))
+ base = 0;
+ else if (is_toast)
+ base = ToastPageGetSpecial(page)->pd_xid_base;
+ else
+ base = HeapPageGetSpecial(page)->pd_xid_base;
+
+ tup->t_xmin = ShortTransactionIdToNormal(base, xmin);
+}
+
+static inline void
+HeapTupleCopyXmaxFromPage(HeapTuple tup, Page page, bool is_toast)
+{
+ TransactionId base;
+ ShortTransactionId xmax; /* short xmax from tuple header */
+
+ xmax = tup->t_data->t_choice.t_heap.t_xmax;
+
+ if (!TransactionIdIsNormal(xmax))
+ base = 0;
+ else if (is_toast)
+ /*
+ * Toast page is not expected to have multixacts in chunks and
+ * has shorter special.
+ */
+ base = ToastPageGetSpecial(page)->pd_xid_base;
+ else if (tup->t_data->t_infomask & HEAP_XMAX_IS_MULTI)
+ base = HeapPageGetSpecial(page)->pd_multi_base;
+ else
+ base = HeapPageGetSpecial(page)->pd_xid_base;
+
+ tup->t_xmax = ShortTransactionIdToNormal(base, xmax);
+}
+
+/*
+ * Copy base values for xid and multixacts from page to heap tuple. Should be
+ * called each time tuple is read from page. Otherwise, it would be impossible
+ * to correctly read tuple xmin and xmax.
+ */
+static inline void
+HeapTupleCopyBaseFromPage(Buffer buffer, HeapTuple tup, Page page,
+ bool is_toast)
+{
+ Assert(IsBufferLocked(buffer));
+
+ if (HeapPageIsDoubleXmax(page))
+ {
+ /*
+ * On double xmax pages, xmax is extracted from tuple header.
+ */
+ tup->t_xmin = FrozenTransactionId;
+ tup->t_xmax = HeapTupleHeaderGetDoubleXmax(tup->t_data);
+ return;
+ }
+
+ HeapTupleCopyXminFromPage(tup, page, is_toast);
+ HeapTupleCopyXmaxFromPage(tup, page, is_toast);
+}
+
/* prototypes for functions in common/heaptuple.c */
extern Size heap_compute_data_size(TupleDesc tupleDesc,
Datum *values, bool *isnull);
diff --git a/src/include/access/multixact.h b/src/include/access/multixact.h
index 4cbe17de7b..980f8bb747 100644
--- a/src/include/access/multixact.h
+++ b/src/include/access/multixact.h
@@ -18,16 +18,16 @@
/*
* The first two MultiXactId values are reserved to store the truncation Xid
- * and epoch of the first segment, so we start assigning multixact values from
+ * and base of the first segment, so we start assigning multixact values from
* 2.
*/
-#define InvalidMultiXactId ((MultiXactId) 0)
-#define FirstMultiXactId ((MultiXactId) 1)
-#define MaxMultiXactId ((MultiXactId) 0xFFFFFFFF)
+#define InvalidMultiXactId UINT64CONST(0)
+#define FirstMultiXactId UINT64CONST(1)
+#define MaxMultiXactId UINT64CONST(0xFFFFFFFFFFFFFFFF)
#define MultiXactIdIsValid(multi) ((multi) != InvalidMultiXactId)
-#define MaxMultiXactOffset ((MultiXactOffset) 0xFFFFFFFF)
+#define MaxMultiXactOffset UINT64CONST(0xFFFFFFFFFFFFFFFF)
/* Number of SLRU buffers to use for multixact */
#define NUM_MULTIXACTOFFSET_BUFFERS 8
@@ -147,7 +147,6 @@ extern void MultiXactSetNextMXact(MultiXactId nextMulti,
extern void MultiXactAdvanceNextMXact(MultiXactId minMulti,
MultiXactOffset minMultiOffset);
extern void MultiXactAdvanceOldest(MultiXactId oldestMulti, Oid oldestMultiDB);
-extern int MultiXactMemberFreezeThreshold(void);
extern void multixact_twophase_recover(TransactionId xid, uint16 info,
void *recdata, uint32 len);
diff --git a/src/include/access/nbtree.h b/src/include/access/nbtree.h
index 8e4f6864e5..72b4ff5c52 100644
--- a/src/include/access/nbtree.h
+++ b/src/include/access/nbtree.h
@@ -62,8 +62,10 @@ typedef uint16 BTCycleId;
typedef struct BTPageOpaqueData
{
BlockNumber btpo_prev; /* left sibling, or P_NONE if leftmost */
+ /* ... or next transaction ID (lower part) */
BlockNumber btpo_next; /* right sibling, or P_NONE if rightmost */
uint32 btpo_level; /* tree level --- zero for leaf pages */
+ /* ... or next transaction ID (lower part) */
uint16 btpo_flags; /* flag bits, see below */
BTCycleId btpo_cycleid; /* vacuum cycle ID of latest split */
} BTPageOpaqueData;
@@ -92,6 +94,14 @@ typedef BTPageOpaqueData *BTPageOpaque;
*/
#define MAX_BT_CYCLE_ID 0xFF7F
+/* Macros for access xact */
+#define BTP_GET_XACT(opaque) (((uint64) ((BTPageOpaque) opaque)->btpo_prev << 32) | \
+ (uint64) ((BTPageOpaque) opaque)->btpo_level)
+#define BTP_SET_XACT(opaque, xact) \
+do { \
+ ((BTPageOpaque) opaque)->btpo_prev = (uint32) (xact >> 32); \
+ ((BTPageOpaque) opaque)->btpo_level = (uint32) xact; \
+} while (0)
/*
* The Meta page is always the first page in the btree index.
diff --git a/src/include/access/rewriteheap.h b/src/include/access/rewriteheap.h
index 5cc04756a5..cd7913858f 100644
--- a/src/include/access/rewriteheap.h
+++ b/src/include/access/rewriteheap.h
@@ -51,7 +51,7 @@ typedef struct LogicalRewriteMappingData
* 6) xid of the xact performing the mapping
* ---
*/
-#define LOGICAL_REWRITE_FORMAT "map-%x-%x-%X_%X-%x-%x"
-extern void CheckPointLogicalRewriteHeap(void);
+#define LOGICAL_REWRITE_FORMAT "map-%x-%x-%X_%X-%x_%x-%x_%x"
+extern void CheckPointLogicalRewriteHeap(void);
#endif /* REWRITE_HEAP_H */
diff --git a/src/include/access/rmgrlist.h b/src/include/access/rmgrlist.h
index 000bcbfdaf..8200ccff3e 100644
--- a/src/include/access/rmgrlist.h
+++ b/src/include/access/rmgrlist.h
@@ -47,3 +47,4 @@ PG_RMGR(RM_COMMIT_TS_ID, "CommitTs", commit_ts_redo, commit_ts_desc, commit_ts_i
PG_RMGR(RM_REPLORIGIN_ID, "ReplicationOrigin", replorigin_redo, replorigin_desc, replorigin_identify, NULL, NULL, NULL, NULL)
PG_RMGR(RM_GENERIC_ID, "Generic", generic_redo, generic_desc, generic_identify, NULL, NULL, generic_mask, NULL)
PG_RMGR(RM_LOGICALMSG_ID, "LogicalMessage", logicalmsg_redo, logicalmsg_desc, logicalmsg_identify, NULL, NULL, NULL, logicalmsg_decode)
+PG_RMGR(RM_HEAP3_ID, "Heap3", heap3_redo, heap3_desc, heap3_identify, NULL, NULL, heap_mask, NULL)
diff --git a/src/include/access/slru.h b/src/include/access/slru.h
index 4f5a324da2..767854419d 100644
--- a/src/include/access/slru.h
+++ b/src/include/access/slru.h
@@ -21,15 +21,7 @@
/*
* Define SLRU segment size. A page is the same BLCKSZ as is used everywhere
* else in Postgres. The segment size can be chosen somewhat arbitrarily;
- * we make it 32 pages by default, or 256Kb, i.e. 1M transactions for CLOG
- * or 64K transactions for SUBTRANS.
- *
- * Note: because TransactionIds are 32 bits and wrap around at 0xFFFFFFFF,
- * page numbering also wraps around at 0xFFFFFFFF/xxxx_XACTS_PER_PAGE (where
- * xxxx is CLOG or SUBTRANS, respectively), and segment numbering at
- * 0xFFFFFFFF/xxxx_XACTS_PER_PAGE/SLRU_PAGES_PER_SEGMENT. We need
- * take no explicit notice of that fact in slru.c, except when comparing
- * segment and page numbers in SimpleLruTruncate (see PagePrecedes()).
+ * we make it 32 pages by default.
*/
#define SLRU_PAGES_PER_SEGMENT 32
diff --git a/src/include/access/tableam.h b/src/include/access/tableam.h
index e45d73eae3..6f89426e57 100644
--- a/src/include/access/tableam.h
+++ b/src/include/access/tableam.h
@@ -118,7 +118,7 @@ typedef enum TM_Result
* cmax is the outdating command's CID, but only when the failure code is
* TM_SelfModified (i.e., something in the current transaction outdated the
* tuple); otherwise cmax is zero. (We make this restriction because
- * HeapTupleHeaderGetCmax doesn't work for tuples outdated in other
+ * HeapTupleGetCmax doesn't work for tuples outdated in other
* transactions.)
*/
typedef struct TM_FailureData
diff --git a/src/include/access/transam.h b/src/include/access/transam.h
index 775471d2a7..e7cf1206df 100644
--- a/src/include/access/transam.h
+++ b/src/include/access/transam.h
@@ -17,6 +17,10 @@
#include "access/xlogdefs.h"
+#ifndef FRONTEND
+#include "utils/elog.h"
+#endif
+
/* ----------------
* Special transaction ID values
*
@@ -28,11 +32,12 @@
* Note: if you need to change it, you must change pg_class.h as well.
* ----------------
*/
-#define InvalidTransactionId ((TransactionId) 0)
-#define BootstrapTransactionId ((TransactionId) 1)
-#define FrozenTransactionId ((TransactionId) 2)
-#define FirstNormalTransactionId ((TransactionId) 3)
-#define MaxTransactionId ((TransactionId) 0xFFFFFFFF)
+#define InvalidTransactionId UINT64CONST(0)
+#define BootstrapTransactionId UINT64CONST(1)
+#define FrozenTransactionId UINT64CONST(2)
+#define FirstNormalTransactionId UINT64CONST(3)
+#define MaxTransactionId UINT64CONST(0xFFFFFFFFFFFFFFFF)
+#define MaxShortTransactionId ((TransactionId) 0x7FFFFFFF)
/* ----------------
* transaction ID manipulation macros
@@ -44,17 +49,48 @@
#define TransactionIdStore(xid, dest) (*(dest) = (xid))
#define StoreInvalidTransactionId(dest) (*(dest) = InvalidTransactionId)
-#define EpochFromFullTransactionId(x) ((uint32) ((x).value >> 32))
-#define XidFromFullTransactionId(x) ((uint32) (x).value)
-#define U64FromFullTransactionId(x) ((x).value)
+/*
+ * Convert short xid from/to full xid. Assertion should fail if we full xid
+ * doesn't fit to xid base.
+ */
+static inline TransactionId
+ShortTransactionIdToNormal(TransactionId base, ShortTransactionId xid)
+{
+ if (!TransactionIdIsNormal(xid))
+ return (TransactionId) xid;
+
+#ifndef FRONTEND
+ /* xid + base should not overflow TransactionId */
+ Assert(xid + base >= base);
+#endif
+
+ return (TransactionId) (xid + base);
+}
+
+static inline ShortTransactionId
+NormalTransactionIdToShort(TransactionId base, TransactionId xid)
+{
+ if (!TransactionIdIsNormal(xid))
+ return (ShortTransactionId) (xid);
+
+#ifndef FRONTEND
+ /* xid should fit ShortTransactionId */
+ Assert(xid >= base + FirstNormalTransactionId &&
+ xid <= base + MaxShortTransactionId);
+#endif
+
+ return (ShortTransactionId) (xid - base);
+}
+
+#define XidFromFullTransactionId(x) ((x).value)
#define FullTransactionIdEquals(a, b) ((a).value == (b).value)
#define FullTransactionIdPrecedes(a, b) ((a).value < (b).value)
#define FullTransactionIdPrecedesOrEquals(a, b) ((a).value <= (b).value)
#define FullTransactionIdFollows(a, b) ((a).value > (b).value)
#define FullTransactionIdFollowsOrEquals(a, b) ((a).value >= (b).value)
#define FullTransactionIdIsValid(x) TransactionIdIsValid(XidFromFullTransactionId(x))
-#define InvalidFullTransactionId FullTransactionIdFromEpochAndXid(0, InvalidTransactionId)
-#define FirstNormalFullTransactionId FullTransactionIdFromEpochAndXid(0, FirstNormalTransactionId)
+#define InvalidFullTransactionId FullTransactionIdFromXid(InvalidTransactionId)
+#define FirstNormalFullTransactionId FullTransactionIdFromXid(FirstNormalTransactionId)
#define FullTransactionIdIsNormal(x) FullTransactionIdFollowsOrEquals(x, FirstNormalFullTransactionId)
/*
@@ -68,21 +104,11 @@ typedef struct FullTransactionId
} FullTransactionId;
static inline FullTransactionId
-FullTransactionIdFromEpochAndXid(uint32 epoch, TransactionId xid)
-{
- FullTransactionId result;
-
- result.value = ((uint64) epoch) << 32 | xid;
-
- return result;
-}
-
-static inline FullTransactionId
-FullTransactionIdFromU64(uint64 value)
+FullTransactionIdFromXid(TransactionId xid)
{
FullTransactionId result;
- result.value = value;
+ result.value = xid;
return result;
}
@@ -91,8 +117,7 @@ FullTransactionIdFromU64(uint64 value)
#define TransactionIdAdvance(dest) \
do { \
(dest)++; \
- if ((dest) < FirstNormalTransactionId) \
- (dest) = FirstNormalTransactionId; \
+ Assert(TransactionIdIsNormal(dest)); \
} while(0)
/*
@@ -140,18 +165,19 @@ FullTransactionIdAdvance(FullTransactionId *dest)
/* back up a transaction ID variable, handling wraparound correctly */
#define TransactionIdRetreat(dest) \
do { \
+ Assert(TransactionIdIsNormal(dest)); \
(dest)--; \
- } while ((dest) < FirstNormalTransactionId)
+ } while(0)
/* compare two XIDs already known to be normal; this is a macro for speed */
#define NormalTransactionIdPrecedes(id1, id2) \
(AssertMacro(TransactionIdIsNormal(id1) && TransactionIdIsNormal(id2)), \
- (int32) ((id1) - (id2)) < 0)
+ (int64) ((id1) - (id2)) < 0)
/* compare two XIDs already known to be normal; this is a macro for speed */
#define NormalTransactionIdFollows(id1, id2) \
(AssertMacro(TransactionIdIsNormal(id1) && TransactionIdIsNormal(id2)), \
- (int32) ((id1) - (id2)) > 0)
+ (int64) ((id1) - (id2)) > 0)
/* ----------
* Object ID (OID) zero is InvalidOid.
@@ -221,9 +247,6 @@ typedef struct VariableCacheData
TransactionId oldestXid; /* cluster-wide minimum datfrozenxid */
TransactionId xidVacLimit; /* start forcing autovacuums here */
- TransactionId xidWarnLimit; /* start complaining here */
- TransactionId xidStopLimit; /* refuse to advance nextXid beyond here */
- TransactionId xidWrapLimit; /* where the world ends */
Oid oldestXidDB; /* database with minimum datfrozenxid */
/*
@@ -276,10 +299,6 @@ extern bool TransactionIdDidAbort(TransactionId transactionId);
extern void TransactionIdCommitTree(TransactionId xid, int nxids, TransactionId *xids);
extern void TransactionIdAsyncCommitTree(TransactionId xid, int nxids, TransactionId *xids, XLogRecPtr lsn);
extern void TransactionIdAbortTree(TransactionId xid, int nxids, TransactionId *xids);
-extern bool TransactionIdPrecedes(TransactionId id1, TransactionId id2);
-extern bool TransactionIdPrecedesOrEquals(TransactionId id1, TransactionId id2);
-extern bool TransactionIdFollows(TransactionId id1, TransactionId id2);
-extern bool TransactionIdFollowsOrEquals(TransactionId id1, TransactionId id2);
extern TransactionId TransactionIdLatest(TransactionId mainxid,
int nxids, const TransactionId *xids);
extern XLogRecPtr TransactionIdGetCommitLSN(TransactionId xid);
@@ -319,7 +338,7 @@ ReadNextTransactionId(void)
/* return transaction ID backed up by amount, handling wraparound correctly */
static inline TransactionId
-TransactionIdRetreatedBy(TransactionId xid, uint32 amount)
+TransactionIdRetreatedBy(TransactionId xid, uint64 amount)
{
xid -= amount;
diff --git a/src/include/access/tupmacs.h b/src/include/access/tupmacs.h
index 8b24cd3658..db8dae182d 100644
--- a/src/include/access/tupmacs.h
+++ b/src/include/access/tupmacs.h
@@ -131,10 +131,11 @@ fetch_att(const void *T, bool attbyval, int attlen)
((attalign) == TYPALIGN_INT) ? INTALIGN(cur_offset) : \
(((attalign) == TYPALIGN_CHAR) ? (uintptr_t) (cur_offset) : \
(((attalign) == TYPALIGN_DOUBLE) ? DOUBLEALIGN(cur_offset) : \
+ (((attalign) == TYPALIGN_XID) ? MAXALIGN(cur_offset) : \
( \
AssertMacro((attalign) == TYPALIGN_SHORT), \
SHORTALIGN(cur_offset) \
- ))) \
+ )))) \
)
/*
diff --git a/src/include/access/xact.h b/src/include/access/xact.h
index c604ee11f8..3e39b01527 100644
--- a/src/include/access/xact.h
+++ b/src/include/access/xact.h
@@ -242,7 +242,7 @@ typedef struct xl_xact_xinfo
* Commit records can be large, so copying large portions isn't
* attractive.
*/
- uint32 xinfo;
+ uint64 xinfo;
} xl_xact_xinfo;
typedef struct xl_xact_dbinfo
@@ -295,7 +295,12 @@ typedef struct xl_xact_invals
typedef struct xl_xact_twophase
{
- TransactionId xid;
+ /*
+ * TransactionId is split into 32-bit parts because xl_xact_twophase is
+ * only int-aligned.
+ */
+ uint32 xid_lo;
+ uint32 xid_hi;
} xl_xact_twophase;
typedef struct xl_xact_origin
@@ -314,7 +319,7 @@ typedef struct xl_xact_commit
/* xl_xact_relfilelocators follows if XINFO_HAS_RELFILELOCATORS */
/* xl_xact_stats_items follows if XINFO_HAS_DROPPED_STATS */
/* xl_xact_invals follows if XINFO_HAS_INVALS */
- /* xl_xact_twophase follows if XINFO_HAS_TWOPHASE */
+ /* xl_xact_twophase follows if XINFO_HAS_TWOPHASE (xid is int-aligned!) */
/* twophase_gid follows if XINFO_HAS_GID. As a null-terminated string. */
/* xl_xact_origin follows if XINFO_HAS_ORIGIN, stored unaligned! */
} xl_xact_commit;
@@ -330,7 +335,7 @@ typedef struct xl_xact_abort
/* xl_xact_relfilelocators follows if XINFO_HAS_RELFILELOCATORS */
/* xl_xact_stats_items follows if XINFO_HAS_DROPPED_STATS */
/* No invalidation messages needed. */
- /* xl_xact_twophase follows if XINFO_HAS_TWOPHASE */
+ /* xl_xact_twophase follows if XINFO_HAS_TWOPHASE (xid is int-aligned!) */
/* twophase_gid follows if XINFO_HAS_GID. As a null-terminated string. */
/* xl_xact_origin follows if XINFO_HAS_ORIGIN, stored unaligned! */
} xl_xact_abort;
diff --git a/src/include/access/xloginsert.h b/src/include/access/xloginsert.h
index 001ff2f521..6694551c71 100644
--- a/src/include/access/xloginsert.h
+++ b/src/include/access/xloginsert.h
@@ -37,6 +37,7 @@
* will be skipped) */
#define REGBUF_KEEP_DATA 0x10 /* include data even if a full-page image
* is taken */
+#define REGBUF_CONVERTED 0x20 /* buffer had format convertion */
/* prototypes for public functions in xloginsert.c: */
extern void XLogBeginInsert(void);
diff --git a/src/include/access/xlogreader.h b/src/include/access/xlogreader.h
index e87f91316a..222c15dc24 100644
--- a/src/include/access/xlogreader.h
+++ b/src/include/access/xlogreader.h
@@ -426,10 +426,6 @@ extern bool DecodeXLogRecord(XLogReaderState *state,
#define XLogRecBlockImageApply(decoder, block_id) \
((decoder)->record->blocks[block_id].apply_image)
-#ifndef FRONTEND
-extern FullTransactionId XLogRecGetFullXid(XLogReaderState *record);
-#endif
-
extern bool RestoreBlockImage(XLogReaderState *record, uint8 block_id, char *page);
extern char *XLogRecGetBlockData(XLogReaderState *record, uint8 block_id, Size *len);
extern void XLogRecGetBlockTag(XLogReaderState *record, uint8 block_id,
diff --git a/src/include/access/xlogrecord.h b/src/include/access/xlogrecord.h
index 835151ec92..9eab0f53eb 100644
--- a/src/include/access/xlogrecord.h
+++ b/src/include/access/xlogrecord.h
@@ -41,18 +41,17 @@
typedef struct XLogRecord
{
uint32 xl_tot_len; /* total len of entire record */
+ pg_crc32c xl_crc; /* CRC for this record */
TransactionId xl_xid; /* xact id */
XLogRecPtr xl_prev; /* ptr to previous record in log */
uint8 xl_info; /* flag bits, see below */
RmgrId xl_rmid; /* resource manager for this record */
- /* 2 bytes of padding here, initialize to zero */
- pg_crc32c xl_crc; /* CRC for this record */
/* XLogRecordBlockHeaders and XLogRecordDataHeader follow, no padding */
} XLogRecord;
-#define SizeOfXLogRecord (offsetof(XLogRecord, xl_crc) + sizeof(pg_crc32c))
+#define SizeOfXLogRecord (offsetof(XLogRecord, xl_rmid) + sizeof(RmgrId))
/*
* The high 4 bits in xl_info may be used freely by rmgr. The
diff --git a/src/include/c.h b/src/include/c.h
index c8f72e44d8..2aea4dadb3 100644
--- a/src/include/c.h
+++ b/src/include/c.h
@@ -75,6 +75,10 @@
#include <libintl.h>
#endif
+#if HAVE_INTTYPES_H
+#include "inttypes.h"
+#endif
+
/* ----------------------------------------------------------------
* Section 1: compiler characteristics
@@ -585,19 +589,29 @@ typedef double float8;
typedef Oid regproc;
typedef regproc RegProcedure;
-typedef uint32 TransactionId;
+typedef uint64 TransactionId;
-typedef uint32 LocalTransactionId;
+extern bool TransactionIdPrecedes(TransactionId id1, TransactionId id2);
+extern bool TransactionIdPrecedesOrEquals(TransactionId id1, TransactionId id2);
+extern bool TransactionIdFollows(TransactionId id1, TransactionId id2);
+extern bool TransactionIdFollowsOrEquals(TransactionId id1, TransactionId id2);
-typedef uint32 SubTransactionId;
+typedef uint32 ShortTransactionId;
+typedef uint64 LocalTransactionId;
+typedef uint64 SubTransactionId;
-#define InvalidSubTransactionId ((SubTransactionId) 0)
-#define TopSubTransactionId ((SubTransactionId) 1)
+#define InvalidSubTransactionId ((SubTransactionId) 0)
+#define TopSubTransactionId ((SubTransactionId) 1)
/* MultiXactId must be equivalent to TransactionId, to fit in t_xmax */
typedef TransactionId MultiXactId;
-typedef uint32 MultiXactOffset;
+typedef uint64 MultiXactOffset;
+
+#define MAX_START_XID UINT64CONST(0x3FFFFFFFFFFFFFFF) /* 2^62 - 1 */
+#define StartTransactionIdIsValid(xid) ((xid) <= MAX_START_XID)
+#define StartMultiXactIdIsValid(mxid) ((mxid) <= MAX_START_XID)
+#define StartMultiXactOffsetIsValid(mxoff) ((mxoff) <= MAX_START_XID)
typedef uint32 CommandId;
@@ -771,7 +785,6 @@ typedef NameData *Name;
/* we don't currently need wider versions of the other ALIGN macros */
#define MAXALIGN64(LEN) TYPEALIGN64(MAXIMUM_ALIGNOF, (LEN))
-
/* ----------------------------------------------------------------
* Section 6: assertions
* ----------------------------------------------------------------
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h
index c1af6eaf5f..c89ba99071 100644
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -57,6 +57,7 @@
*/
/* yyyymmddN */
-#define CATALOG_VERSION_NO 202209291
+/* XXX: should de changed to actual version on commit */
+#define CATALOG_VERSION_NO 999999999
#endif
diff --git a/src/include/catalog/pg_amproc.dat b/src/include/catalog/pg_amproc.dat
index 4cc129bebd..4f20977635 100644
--- a/src/include/catalog/pg_amproc.dat
+++ b/src/include/catalog/pg_amproc.dat
@@ -403,9 +403,9 @@
amprocrighttype => 'bytea', amprocnum => '2',
amproc => 'hashvarlenaextended' },
{ amprocfamily => 'hash/xid_ops', amproclefttype => 'xid',
- amprocrighttype => 'xid', amprocnum => '1', amproc => 'hashint4' },
+ amprocrighttype => 'xid', amprocnum => '1', amproc => 'hashint8' },
{ amprocfamily => 'hash/xid_ops', amproclefttype => 'xid',
- amprocrighttype => 'xid', amprocnum => '2', amproc => 'hashint4extended' },
+ amprocrighttype => 'xid', amprocnum => '2', amproc => 'hashint8extended' },
{ amprocfamily => 'hash/xid8_ops', amproclefttype => 'xid8',
amprocrighttype => 'xid8', amprocnum => '1', amproc => 'hashint8' },
{ amprocfamily => 'hash/xid8_ops', amproclefttype => 'xid8',
diff --git a/src/include/catalog/pg_control.h b/src/include/catalog/pg_control.h
index 06368e2366..ab02a0896f 100644
--- a/src/include/catalog/pg_control.h
+++ b/src/include/catalog/pg_control.h
@@ -247,4 +247,10 @@ typedef struct ControlFileData
*/
#define PG_CONTROL_FILE_SIZE 8192
+#define CONTROLFILE_GET_OLDEDITION(control) \
+ ((control)->pg_old_version >> 16)
+
+#define CONTROLFILE_SET_OLDEDITION(control, v) \
+ (control)->pg_old_version = ((v) << 16)
+
#endif /* PG_CONTROL_H */
diff --git a/src/include/catalog/pg_operator.dat b/src/include/catalog/pg_operator.dat
index bc5f8213f3..912ab48ecf 100644
--- a/src/include/catalog/pg_operator.dat
+++ b/src/include/catalog/pg_operator.dat
@@ -183,16 +183,16 @@
oprresult => 'bool', oprcom => '=(xid,xid)', oprnegate => '<>(xid,xid)',
oprcode => 'xideq', oprrest => 'eqsel', oprjoin => 'eqjoinsel' },
{ oid => '353', descr => 'equal',
- oprname => '=', oprleft => 'xid', oprright => 'int4', oprresult => 'bool',
- oprnegate => '<>(xid,int4)', oprcode => 'xideqint4', oprrest => 'eqsel',
+ oprname => '=', oprleft => 'xid', oprright => 'int8', oprresult => 'bool',
+ oprnegate => '<>(xid,int8)', oprcode => 'xideqint8', oprrest => 'eqsel',
oprjoin => 'eqjoinsel' },
{ oid => '3315', descr => 'not equal',
oprname => '<>', oprleft => 'xid', oprright => 'xid', oprresult => 'bool',
oprcom => '<>(xid,xid)', oprnegate => '=(xid,xid)', oprcode => 'xidneq',
oprrest => 'neqsel', oprjoin => 'neqjoinsel' },
{ oid => '3316', descr => 'not equal',
- oprname => '<>', oprleft => 'xid', oprright => 'int4', oprresult => 'bool',
- oprnegate => '=(xid,int4)', oprcode => 'xidneqint4', oprrest => 'neqsel',
+ oprname => '<>', oprleft => 'xid', oprright => 'int8', oprresult => 'bool',
+ oprnegate => '=(xid,int8)', oprcode => 'xidneqint8', oprrest => 'neqsel',
oprjoin => 'neqjoinsel' },
{ oid => '5068', descr => 'equal',
oprname => '=', oprcanmerge => 't', oprcanhash => 't', oprleft => 'xid8',
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index 68bb032d3e..58e6ceb80a 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -2369,10 +2369,10 @@
{ oid => '1181',
descr => 'age of a transaction ID, in transactions before current transaction',
proname => 'age', provolatile => 's', proparallel => 'r',
- prorettype => 'int4', proargtypes => 'xid', prosrc => 'xid_age' },
+ prorettype => 'int8', proargtypes => 'xid', prosrc => 'xid_age' },
{ oid => '3939',
descr => 'age of a multi-transaction ID, in multi-transactions before current multi-transaction',
- proname => 'mxid_age', provolatile => 's', prorettype => 'int4',
+ proname => 'mxid_age', provolatile => 's', prorettype => 'int8',
proargtypes => 'xid', prosrc => 'mxid_age' },
{ oid => '1188',
@@ -2707,11 +2707,11 @@
prosrc => 'bpcharlen' },
{ oid => '1319',
- proname => 'xideqint4', proleakproof => 't', prorettype => 'bool',
- proargtypes => 'xid int4', prosrc => 'xideq' },
+ proname => 'xideqint8', proleakproof => 't', prorettype => 'bool',
+ proargtypes => 'xid int8', prosrc => 'xideq' },
{ oid => '3309',
- proname => 'xidneqint4', proleakproof => 't', prorettype => 'bool',
- proargtypes => 'xid int4', prosrc => 'xidneq' },
+ proname => 'xidneqint8', proleakproof => 't', prorettype => 'bool',
+ proargtypes => 'xid int8', prosrc => 'xidneq' },
{ oid => '1326',
proname => 'interval_div', prorettype => 'interval',
diff --git a/src/include/catalog/pg_type.dat b/src/include/catalog/pg_type.dat
index df45879463..9ecd608aa9 100644
--- a/src/include/catalog/pg_type.dat
+++ b/src/include/catalog/pg_type.dat
@@ -95,9 +95,9 @@
typinput => 'tidin', typoutput => 'tidout', typreceive => 'tidrecv',
typsend => 'tidsend', typalign => 's' },
{ oid => '28', array_type_oid => '1011', descr => 'transaction id',
- typname => 'xid', typlen => '4', typbyval => 't', typcategory => 'U',
+ typname => 'xid', typlen => '8', typbyval => 'FLOAT8PASSBYVAL', typcategory => 'U',
typinput => 'xidin', typoutput => 'xidout', typreceive => 'xidrecv',
- typsend => 'xidsend', typalign => 'i' },
+ typsend => 'xidsend', typalign => 'x' },
{ oid => '29', array_type_oid => '1012',
descr => 'command identifier type, sequence in transaction id',
typname => 'cid', typlen => '4', typbyval => 't', typcategory => 'U',
diff --git a/src/include/catalog/pg_type.h b/src/include/catalog/pg_type.h
index 48a2559137..71f5f547f4 100644
--- a/src/include/catalog/pg_type.h
+++ b/src/include/catalog/pg_type.h
@@ -300,6 +300,11 @@ DECLARE_UNIQUE_INDEX(pg_type_typname_nsp_index, 2704, TypeNameNspIndexId, on pg_
#define TYPALIGN_SHORT 's' /* short alignment (typically 2 bytes) */
#define TYPALIGN_INT 'i' /* int alignment (typically 4 bytes) */
#define TYPALIGN_DOUBLE 'd' /* double alignment (often 8 bytes) */
+/*
+ * We need to use alignment sutable for 8-byte XID values.
+ * On system like AIX double alignment (4 bytes) is not enough.
+ */
+#define TYPALIGN_XID 'x'
#define TYPSTORAGE_PLAIN 'p' /* type not prepared for toasting */
#define TYPSTORAGE_EXTERNAL 'e' /* toastable, don't try to compress */
diff --git a/src/include/commands/vacuum.h b/src/include/commands/vacuum.h
index 5d816ba7f4..7255b2eeda 100644
--- a/src/include/commands/vacuum.h
+++ b/src/include/commands/vacuum.h
@@ -213,12 +213,12 @@ typedef enum VacOptValue
*/
typedef struct VacuumParams
{
- bits32 options; /* bitmask of VACOPT_* */
- int freeze_min_age; /* min freeze age, -1 to use default */
- int freeze_table_age; /* age at which to scan whole table */
- int multixact_freeze_min_age; /* min multixact freeze age, -1 to
+ bits32 options; /* bitmask of VacuumOption */
+ int64 freeze_min_age; /* min freeze age, -1 to use default */
+ int64 freeze_table_age; /* age at which to scan whole table */
+ int64 multixact_freeze_min_age; /* min multixact freeze age, -1 to
* use default */
- int multixact_freeze_table_age; /* multixact age at which to scan
+ int64 multixact_freeze_table_age; /* multixact age at which to scan
* whole table */
bool is_wraparound; /* force a for-wraparound vacuum */
int log_min_duration; /* minimum execution threshold in ms at
@@ -252,12 +252,12 @@ typedef struct VacDeadItems
/* GUC parameters */
extern PGDLLIMPORT int default_statistics_target; /* PGDLLIMPORT for PostGIS */
-extern PGDLLIMPORT int vacuum_freeze_min_age;
-extern PGDLLIMPORT int vacuum_freeze_table_age;
-extern PGDLLIMPORT int vacuum_multixact_freeze_min_age;
-extern PGDLLIMPORT int vacuum_multixact_freeze_table_age;
-extern PGDLLIMPORT int vacuum_failsafe_age;
-extern PGDLLIMPORT int vacuum_multixact_failsafe_age;
+extern PGDLLIMPORT int64 vacuum_freeze_min_age;
+extern PGDLLIMPORT int64 vacuum_freeze_table_age;
+extern PGDLLIMPORT int64 vacuum_multixact_freeze_min_age;
+extern PGDLLIMPORT int64 vacuum_multixact_freeze_table_age;
+extern PGDLLIMPORT int64 vacuum_failsafe_age;
+extern PGDLLIMPORT int64 vacuum_multixact_failsafe_age;
/* Variables for cost-based parallel vacuum */
extern PGDLLIMPORT pg_atomic_uint32 *VacuumSharedCostBalance;
@@ -287,10 +287,10 @@ extern void vac_update_relstats(Relation relation,
bool *minmulti_updated,
bool in_outer_xact);
extern bool vacuum_set_xid_limits(Relation rel,
- int freeze_min_age,
- int multixact_freeze_min_age,
- int freeze_table_age,
- int multixact_freeze_table_age,
+ int64 freeze_min_age,
+ int64 multixact_freeze_min_age,
+ int64 freeze_table_age,
+ int64 multixact_freeze_table_age,
TransactionId *oldestXmin,
MultiXactId *oldestMxact,
TransactionId *freezeLimit,
diff --git a/src/include/fmgr.h b/src/include/fmgr.h
index 380a82b9de..3711cac3da 100644
--- a/src/include/fmgr.h
+++ b/src/include/fmgr.h
@@ -281,6 +281,7 @@ extern struct varlena *pg_detoast_datum_packed(struct varlena *datum);
#define PG_GETARG_FLOAT4(n) DatumGetFloat4(PG_GETARG_DATUM(n))
#define PG_GETARG_FLOAT8(n) DatumGetFloat8(PG_GETARG_DATUM(n))
#define PG_GETARG_INT64(n) DatumGetInt64(PG_GETARG_DATUM(n))
+#define PG_GETARG_TRANSACTIONID(n) DatumGetTransactionId(PG_GETARG_DATUM(n))
/* use this if you want the raw, possibly-toasted input datum: */
#define PG_GETARG_RAW_VARLENA_P(n) ((struct varlena *) PG_GETARG_POINTER(n))
/* use this if you want the input datum de-toasted: */
@@ -367,6 +368,7 @@ extern struct varlena *pg_detoast_datum_packed(struct varlena *datum);
#define PG_RETURN_FLOAT8(x) return Float8GetDatum(x)
#define PG_RETURN_INT64(x) return Int64GetDatum(x)
#define PG_RETURN_UINT64(x) return UInt64GetDatum(x)
+#define PG_RETURN_TRANSACTIONID(x) return TransactionIdGetDatum(x)
/* RETURN macros for other pass-by-ref types will typically look like this: */
#define PG_RETURN_BYTEA_P(x) PG_RETURN_POINTER(x)
#define PG_RETURN_TEXT_P(x) PG_RETURN_POINTER(x)
diff --git a/src/include/nodes/pg_list.h b/src/include/nodes/pg_list.h
index dc991626ba..78d4d08c4a 100644
--- a/src/include/nodes/pg_list.h
+++ b/src/include/nodes/pg_list.h
@@ -44,6 +44,7 @@ typedef union ListCell
{
void *ptr_value;
int int_value;
+ int64 int64_value;
Oid oid_value;
TransactionId xid_value;
} ListCell;
@@ -169,6 +170,7 @@ list_length(const List *l)
*/
#define lfirst(lc) ((lc)->ptr_value)
#define lfirst_int(lc) ((lc)->int_value)
+#define lfirst_int64(lc) ((lc)->int64_value)
#define lfirst_oid(lc) ((lc)->oid_value)
#define lfirst_xid(lc) ((lc)->xid_value)
#define lfirst_node(type,lc) castNode(type, lfirst(lc))
@@ -195,6 +197,7 @@ list_length(const List *l)
#define llast(l) lfirst(list_last_cell(l))
#define llast_int(l) lfirst_int(list_last_cell(l))
+#define llast_int64(l) lfirst_int64(list_last_cell(l))
#define llast_oid(l) lfirst_oid(list_last_cell(l))
#define llast_xid(l) lfirst_xid(list_last_cell(l))
#define llast_node(type,l) castNode(type, llast(l))
@@ -557,6 +560,7 @@ extern List *list_make5_impl(NodeTag t, ListCell datum1, ListCell datum2,
extern pg_nodiscard List *lappend(List *list, void *datum);
extern pg_nodiscard List *lappend_int(List *list, int datum);
+extern pg_nodiscard List *lappend_int64(List *list, int64 datum);
extern pg_nodiscard List *lappend_oid(List *list, Oid datum);
extern pg_nodiscard List *lappend_xid(List *list, TransactionId datum);
diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in
index c5a80b829e..c1b49b018d 100644
--- a/src/include/pg_config.h.in
+++ b/src/include/pg_config.h.in
@@ -775,6 +775,9 @@
# endif
#endif
+/* Postgres Pro use 64bit xids */
+#undef XID_IS_64BIT
+
/* Size of a WAL file block. This need have no particular relation to BLCKSZ.
XLOG_BLCKSZ must be a power of 2, and if your system supports O_DIRECT I/O,
XLOG_BLCKSZ must be a multiple of the alignment requirement for direct-I/O
diff --git a/src/include/port/pg_lfind.h b/src/include/port/pg_lfind.h
index 0625cac6b5..40e0b1bd78 100644
--- a/src/include/port/pg_lfind.h
+++ b/src/include/port/pg_lfind.h
@@ -81,35 +81,21 @@ pg_lfind8_le(uint8 key, uint8 *base, uint32 nelem)
}
/*
- * pg_lfind32
+ * pg_lfind64
*
* Return true if there is an element in 'base' that equals 'key', otherwise
* return false.
*/
static inline bool
-pg_lfind32(uint32 key, uint32 *base, uint32 nelem)
+pg_lfind64(uint64 key, uint64 *base, uint32 nelem)
{
- uint32 i = 0;
-
-#ifndef USE_NO_SIMD
-
- /*
- * For better instruction-level parallelism, each loop iteration operates
- * on a block of four registers. Testing for SSE2 has showed this is ~40%
- * faster than using a block of two registers.
- */
- const Vector32 keys = vector32_broadcast(key); /* load copies of key */
- const uint32 nelem_per_vector = sizeof(Vector32) / sizeof(uint32);
- const uint32 nelem_per_iteration = 4 * nelem_per_vector;
-
- /* round down to multiple of elements per iteration */
- const uint32 tail_idx = nelem & ~(nelem_per_iteration - 1);
-
+ uint32 i,
+ iterations;
#if defined(USE_ASSERT_CHECKING)
bool assert_result = false;
/* pre-compute the result for assert checking */
- for (i = 0; i < nelem; i++)
+ for (i = 0; i < nelem; ++i)
{
if (key == base[i])
{
@@ -119,62 +105,127 @@ pg_lfind32(uint32 key, uint32 *base, uint32 nelem)
}
#endif
- for (i = 0; i < tail_idx; i += nelem_per_iteration)
+#define UNROLL_FACTOR 8
+ StaticAssertStmt((UNROLL_FACTOR & (UNROLL_FACTOR - 1)) == 0,
+ "Loop unroll factor must be power of 2");
+ iterations = nelem & ~(UNROLL_FACTOR - 1);
+ for (i = 0; i < iterations; i += UNROLL_FACTOR)
{
- Vector32 vals1,
- vals2,
- vals3,
- vals4,
- result1,
- result2,
- result3,
- result4,
- tmp1,
- tmp2,
- result;
-
- /* load the next block into 4 registers */
- vector32_load(&vals1, &base[i]);
- vector32_load(&vals2, &base[i + nelem_per_vector]);
- vector32_load(&vals3, &base[i + nelem_per_vector * 2]);
- vector32_load(&vals4, &base[i + nelem_per_vector * 3]);
-
- /* compare each value to the key */
- result1 = vector32_eq(keys, vals1);
- result2 = vector32_eq(keys, vals2);
- result3 = vector32_eq(keys, vals3);
- result4 = vector32_eq(keys, vals4);
-
- /* combine the results into a single variable */
- tmp1 = vector32_or(result1, result2);
- tmp2 = vector32_or(result3, result4);
- result = vector32_or(tmp1, tmp2);
-
- /* see if there was a match */
- if (vector32_is_highbit_set(result))
+ if (base[0] == key || base[1] == key || base[2] == key ||
+ base[3] == key || base[4] == key || base[5] == key ||
+ base[6] == key || base[7] == key)
{
+#if defined(USE_ASSERT_CHECKING)
Assert(assert_result == true);
+#endif
return true;
}
+ base += UNROLL_FACTOR;
}
-#endif /* ! USE_NO_SIMD */
/* Process the remaining elements one at a time. */
- for (; i < nelem; i++)
+ iterations = nelem & (UNROLL_FACTOR - 1);
+ for (i = 0; i < iterations; ++i)
{
- if (key == base[i])
+ if (key == *base++)
{
-#ifndef USE_NO_SIMD
+#if defined(USE_ASSERT_CHECKING)
Assert(assert_result == true);
#endif
return true;
}
}
-#ifndef USE_NO_SIMD
+#if defined(USE_ASSERT_CHECKING)
Assert(assert_result == false);
#endif
return false;
+// uint32 i = 0;
+//
+//#ifndef USE_NO_SIMD
+//
+// /*
+// * For better instruction-level parallelism, each loop iteration operates
+// * on a block of four registers. Testing for SSE2 has showed this is ~40%
+// * faster than using a block of two registers.
+// */
+// const Vector32 keys = vector32_broadcast(key); /* load copies of key */
+// const uint32 nelem_per_vector = sizeof(Vector32) / sizeof(uint32);
+// const uint32 nelem_per_iteration = 4 * nelem_per_vector;
+//
+// /* round down to multiple of elements per iteration */
+// const uint32 tail_idx = nelem & ~(nelem_per_iteration - 1);
+//
+//#if defined(USE_ASSERT_CHECKING)
+// bool assert_result = false;
+//
+// /* pre-compute the result for assert checking */
+// for (i = 0; i < nelem; i++)
+// {
+// if (key == base[i])
+// {
+// assert_result = true;
+// break;
+// }
+// }
+//#endif
+//
+// for (i = 0; i < tail_idx; i += nelem_per_iteration)
+// {
+// Vector32 vals1,
+// vals2,
+// vals3,
+// vals4,
+// result1,
+// result2,
+// result3,
+// result4,
+// tmp1,
+// tmp2,
+// result;
+//
+// /* load the next block into 4 registers */
+// vector32_load(&vals1, &base[i]);
+// vector32_load(&vals2, &base[i + nelem_per_vector]);
+// vector32_load(&vals3, &base[i + nelem_per_vector * 2]);
+// vector32_load(&vals4, &base[i + nelem_per_vector * 3]);
+//
+// /* compare each value to the key */
+// result1 = vector32_eq(keys, vals1);
+// result2 = vector32_eq(keys, vals2);
+// result3 = vector32_eq(keys, vals3);
+// result4 = vector32_eq(keys, vals4);
+//
+// /* combine the results into a single variable */
+// tmp1 = vector32_or(result1, result2);
+// tmp2 = vector32_or(result3, result4);
+// result = vector32_or(tmp1, tmp2);
+//
+// /* see if there was a match */
+// if (vector32_is_highbit_set(result))
+// {
+// Assert(assert_result == true);
+// return true;
+// }
+// }
+//#endif /* ! USE_NO_SIMD */
+//
+// /* Process the remaining elements one at a time. */
+// for (; i < nelem; i++)
+// {
+// if (key == base[i])
+// {
+//#ifndef USE_NO_SIMD
+// Assert(assert_result == true);
+//#endif
+// return true;
+// }
+// }
+//
+//#ifndef USE_NO_SIMD
+// Assert(assert_result == false);
+//#endif
+// return false;
}
#endif /* PG_LFIND_H */
diff --git a/src/include/postgres.h b/src/include/postgres.h
index 5f6a1e3d5a..cf46515829 100644
--- a/src/include/postgres.h
+++ b/src/include/postgres.h
@@ -428,6 +428,9 @@ typedef struct NullableDatum
#define SIZEOF_DATUM SIZEOF_VOID_P
+static uint64 DatumGetUInt64(Datum X);
+static Datum UInt64GetDatum(uint64 X);
+
/*
* DatumGetBool
* Returns boolean value of a datum.
@@ -609,7 +612,7 @@ ObjectIdGetDatum(Oid X)
static inline TransactionId
DatumGetTransactionId(Datum X)
{
- return (TransactionId) X;
+ return DatumGetUInt64(X);
}
/*
@@ -619,7 +622,7 @@ DatumGetTransactionId(Datum X)
static inline Datum
TransactionIdGetDatum(TransactionId X)
{
- return (Datum) X;
+ return UInt64GetDatum(X);
}
/*
@@ -629,7 +632,7 @@ TransactionIdGetDatum(TransactionId X)
static inline Datum
MultiXactIdGetDatum(MultiXactId X)
{
- return (Datum) X;
+ return UInt64GetDatum(X);
}
/*
diff --git a/src/include/postmaster/autovacuum.h b/src/include/postmaster/autovacuum.h
index 9d40fd6d54..03024361ea 100644
--- a/src/include/postmaster/autovacuum.h
+++ b/src/include/postmaster/autovacuum.h
@@ -37,8 +37,8 @@ extern PGDLLIMPORT int autovacuum_vac_ins_thresh;
extern PGDLLIMPORT double autovacuum_vac_ins_scale;
extern PGDLLIMPORT int autovacuum_anl_thresh;
extern PGDLLIMPORT double autovacuum_anl_scale;
-extern PGDLLIMPORT int autovacuum_freeze_max_age;
-extern PGDLLIMPORT int autovacuum_multixact_freeze_max_age;
+extern PGDLLIMPORT int64 autovacuum_freeze_max_age;
+extern PGDLLIMPORT int64 autovacuum_multixact_freeze_max_age;
extern PGDLLIMPORT double autovacuum_vac_cost_delay;
extern PGDLLIMPORT int autovacuum_vac_cost_limit;
diff --git a/src/include/storage/buf_internals.h b/src/include/storage/buf_internals.h
index 406db6be78..df62ffa605 100644
--- a/src/include/storage/buf_internals.h
+++ b/src/include/storage/buf_internals.h
@@ -40,10 +40,10 @@
*/
#define BUF_REFCOUNT_ONE 1
#define BUF_REFCOUNT_MASK ((1U << 18) - 1)
-#define BUF_USAGECOUNT_MASK 0x003C0000U
+#define BUF_USAGECOUNT_MASK 0x001C0000U
#define BUF_USAGECOUNT_ONE (1U << 18)
#define BUF_USAGECOUNT_SHIFT 18
-#define BUF_FLAG_MASK 0xFFC00000U
+#define BUF_FLAG_MASK 0xFFE00000U
/* Get refcount and usagecount from buffer state */
#define BUF_STATE_GET_REFCOUNT(state) ((state) & BUF_REFCOUNT_MASK)
@@ -55,6 +55,7 @@
* Note: BM_TAG_VALID essentially means that there is a buffer hashtable
* entry associated with the buffer's tag.
*/
+#define BM_CONVERTED (1U << 21) /* buffer were converted to 64xid */
#define BM_LOCKED (1U << 22) /* buffer header is locked */
#define BM_DIRTY (1U << 23) /* data needs writing */
#define BM_VALID (1U << 24) /* data is valid */
diff --git a/src/include/storage/bufmgr.h b/src/include/storage/bufmgr.h
index 6f4dfa0960..fc9d1af846 100644
--- a/src/include/storage/bufmgr.h
+++ b/src/include/storage/bufmgr.h
@@ -157,8 +157,12 @@ extern void BufferGetTag(Buffer buffer, RelFileLocator *rlocator,
ForkNumber *forknum, BlockNumber *blknum);
extern void MarkBufferDirtyHint(Buffer buffer, bool buffer_std);
+extern void MarkBufferConverted(Buffer buffer, bool converted);
+extern bool IsBufferConverted(Buffer buffer);
extern void UnlockBuffers(void);
+extern bool IsBufferLocked(Buffer buffer);
+extern bool IsBufferLockedExclusive(Buffer buffer);
extern void LockBuffer(Buffer buffer, int mode);
extern bool ConditionalLockBuffer(Buffer buffer);
extern void LockBufferForCleanup(Buffer buffer);
@@ -184,6 +188,8 @@ extern void AtProcExit_LocalBuffers(void);
extern BufferAccessStrategy GetAccessStrategy(BufferAccessStrategyType btype);
extern void FreeAccessStrategy(BufferAccessStrategy strategy);
+/* old tuple format support */
+extern void convert_page(Relation rel, Page orig_page, Buffer buf, BlockNumber blkno);
/* inline functions */
diff --git a/src/include/storage/bufpage.h b/src/include/storage/bufpage.h
index 2708c4b683..87375bbf79 100644
--- a/src/include/storage/bufpage.h
+++ b/src/include/storage/bufpage.h
@@ -14,10 +14,13 @@
#ifndef BUFPAGE_H
#define BUFPAGE_H
+#include "access/transam.h"
#include "access/xlogdefs.h"
#include "storage/block.h"
#include "storage/item.h"
#include "storage/off.h"
+#include "postgres.h"
+#include "utils/rel.h"
/*
* A postgres disk page is an abstraction layered on top of a postgres
@@ -163,12 +166,41 @@ typedef struct PageHeaderData
LocationIndex pd_upper; /* offset to end of free space */
LocationIndex pd_special; /* offset to start of special space */
uint16 pd_pagesize_version;
- TransactionId pd_prune_xid; /* oldest prunable XID, or zero if none */
+ ShortTransactionId pd_prune_xid; /* oldest prunable XID, or zero if
+ * none */
ItemIdData pd_linp[FLEXIBLE_ARRAY_MEMBER]; /* line pointer array */
} PageHeaderData;
typedef PageHeaderData *PageHeader;
+
+/*
+ * HeapPageSpecialData -- data that stored at the end of each heap page.
+ *
+ * pd_xid_base - base value for transaction IDs on page
+ * pd_multi_base - base value for multixact IDs on page
+ *
+ * pd_xid_base and pd_multi_base are base values for calculation of transaction
+ * identifiers from t_xmin and t_xmax in each heap tuple header on the page.
+ */
+typedef struct HeapPageSpecialData
+{
+ TransactionId pd_xid_base; /* base value for transaction IDs on page */
+ TransactionId pd_multi_base; /* base value for multixact IDs on page */
+} HeapPageSpecialData;
+
+typedef HeapPageSpecialData *HeapPageSpecial;
+
+typedef struct ToastPageSpecialData
+{
+ TransactionId pd_xid_base; /* base value for transaction IDs on page */
+} ToastPageSpecialData;
+
+typedef ToastPageSpecialData *ToastPageSpecial;
+
+extern PGDLLIMPORT HeapPageSpecial heapDoubleXmaxSpecial;
+extern PGDLLIMPORT ToastPageSpecial toastDoubleXmaxSpecial;
+
/*
* pd_flags contains the following flag bits. Undefined bits are initialized
* to zero and may be used in the future.
@@ -200,7 +232,7 @@ typedef PageHeaderData *PageHeader;
* As of Release 9.3, the checksum version must also be considered when
* handling pages.
*/
-#define PG_PAGE_LAYOUT_VERSION 4
+#define PG_PAGE_LAYOUT_VERSION 5
#define PG_DATA_CHECKSUM_VERSION 1
/* ----------------------------------------------------------------
@@ -440,18 +472,177 @@ PageClearAllVisible(Page page)
}
/*
- * These two require "access/transam.h", so left as macros.
+ * Check if page is in "double xmax" format.
*/
-#define PageSetPrunable(page, xid) \
-do { \
- Assert(TransactionIdIsNormal(xid)); \
- if (!TransactionIdIsValid(((PageHeader) (page))->pd_prune_xid) || \
- TransactionIdPrecedes(xid, ((PageHeader) (page))->pd_prune_xid)) \
- ((PageHeader) (page))->pd_prune_xid = (xid); \
-} while (0)
-#define PageClearPrunable(page) \
- (((PageHeader) (page))->pd_prune_xid = InvalidTransactionId)
+static inline bool
+HeapPageIsDoubleXmax(Page page)
+{
+ return ((PageHeader) (page))->pd_special == BLCKSZ;
+}
+/*
+ * Get pointer to HeapPageSpecialData.
+ *
+ * Can be used for non-consistent reads from non-locked pages.
+ *
+ * Return doubleXmaxSpecial when pd_special == BLCKSZ (i.e. "double xmax"
+ * format).
+ */
+static inline HeapPageSpecial
+HeapPageGetSpecialNoAssert(Page page)
+{
+ if (HeapPageIsDoubleXmax(page))
+ return heapDoubleXmaxSpecial;
+
+ return (HeapPageSpecial) ((char *) page +
+ ((PageHeader) page)->pd_special);
+}
+
+/*
+ * Get pointer to ToastPageSpecialData.
+ *
+ * Can be used for non-consistent reads from non-locked pages.
+ *
+ * Return doubleXmaxSpecial when pd_special == BLCKSZ (i.e. "double xmax"
+ * format).
+ */
+static inline ToastPageSpecial
+ToastPageGetSpecialNoAssert(Page page)
+{
+ if (HeapPageIsDoubleXmax(page))
+ return toastDoubleXmaxSpecial;
+
+ return (ToastPageSpecial) ((char *) page +
+ ((PageHeader) page)->pd_special);
+}
+
+/*
+ * Wrapper for HeapPageGetSpecialNoAssert for general use.
+ */
+static inline HeapPageSpecial
+HeapPageGetSpecial(Page page)
+{
+ if (HeapPageIsDoubleXmax(page))
+ return heapDoubleXmaxSpecial;
+
+ Assert(((PageHeader) page)->pd_special ==
+ BLCKSZ - MAXALIGN(sizeof(HeapPageSpecialData)));
+
+ return (HeapPageSpecial) ((char *) page +
+ ((PageHeader) page)->pd_special);
+}
+
+/*
+ * Wrapper for ToastPageGetSpecialNoAssert for general use.
+ */
+static inline ToastPageSpecial
+ToastPageGetSpecial(Page page)
+{
+ if (HeapPageIsDoubleXmax(page))
+ return toastDoubleXmaxSpecial;
+
+ Assert(((PageHeader) page)->pd_special ==
+ BLCKSZ - MAXALIGN(sizeof(ToastPageSpecialData)));
+
+ return (ToastPageSpecial) ((char *) page +
+ ((PageHeader) page)->pd_special);
+}
+
+/*
+ * Set pd_prune_xid.
+ */
+static inline void
+HeapPageSetPruneXid(Page page, TransactionId xid, bool is_toast)
+{
+ TransactionId base;
+
+ if (HeapPageIsDoubleXmax(page))
+ return;
+
+ if (!TransactionIdIsNormal(xid))
+ {
+ ((PageHeader) (page))->pd_prune_xid = xid;
+ return;
+ }
+
+ base = is_toast ? ToastPageGetSpecial(page)->pd_xid_base :
+ HeapPageGetSpecial(page)->pd_xid_base;
+
+ ((PageHeader) (page))->pd_prune_xid = NormalTransactionIdToShort(base, xid);
+ Assert(((PageHeader) (page))->pd_prune_xid <= MaxShortTransactionId);
+}
+
+static inline void
+ToastPageSetPruneXid(Page page, TransactionId xid)
+{
+ if (HeapPageIsDoubleXmax(page))
+ return;
+
+ if (!TransactionIdIsNormal(xid))
+ {
+ ((PageHeader) (page))->pd_prune_xid = xid;
+ return;
+ }
+
+ ((PageHeader) (page))->pd_prune_xid =
+ NormalTransactionIdToShort(ToastPageGetSpecial(page)->pd_xid_base, (xid));
+
+ Assert(((PageHeader) (page))->pd_prune_xid <= MaxShortTransactionId);
+}
+
+/*
+ * Get pd_prune_xid from locked page.
+ */
+static inline TransactionId
+HeapPageGetPruneXid(Page page, bool is_toast)
+{
+ TransactionId base;
+
+ if (HeapPageIsDoubleXmax(page))
+ return ((PageHeader) (page))->pd_prune_xid;
+
+ base = is_toast ? ToastPageGetSpecial(page)->pd_xid_base :
+ HeapPageGetSpecial(page)->pd_xid_base;
+
+ return ShortTransactionIdToNormal(base,
+ ((PageHeader) (page))->pd_prune_xid);
+}
+
+static inline void
+PageSetPrunable(Page page, TransactionId xid, bool is_toast)
+{
+ TransactionId prune_xid;
+
+ Assert(TransactionIdIsNormal(xid));
+
+ if (HeapPageIsDoubleXmax(page))
+ return;
+
+ prune_xid = HeapPageGetPruneXid(page, is_toast);
+ if ((!TransactionIdIsValid(prune_xid) ||
+ TransactionIdPrecedes(xid, prune_xid)))
+ {
+ HeapPageSetPruneXid(page, xid, is_toast);
+ }
+}
+
+/*
+ * Get pd_prune_xid from non-locked page. May return invalid value, but doen't
+ * causes assert failures.
+ */
+static inline TransactionId
+HeapPageGetPruneXidNoAssert(Page page, bool is_toast)
+{
+ TransactionId base;
+
+ if (HeapPageIsDoubleXmax(page))
+ return ((PageHeader) (page))->pd_prune_xid;
+
+ base = is_toast ? ToastPageGetSpecialNoAssert(page)->pd_xid_base :
+ HeapPageGetSpecialNoAssert(page)->pd_xid_base;
+ return ShortTransactionIdToNormal(base,
+ ((PageHeader) (page))->pd_prune_xid);
+}
/* ----------------------------------------------------------------
* extern declarations
@@ -485,6 +676,21 @@ do { \
StaticAssertDecl(BLCKSZ == ((BLCKSZ / sizeof(size_t)) * sizeof(size_t)),
"BLCKSZ has to be a multiple of sizeof(size_t)");
+/*
+ * Tuple defrag support for PageRepairFragmentation and PageIndexMultiDelete
+ */
+typedef struct ItemIdCompactData
+{
+ uint16 offsetindex; /* linp array index */
+ int16 itemoff; /* page offset of item data */
+ uint16 alignedlen; /* MAXALIGN(item data len) */
+} ItemIdCompactData;
+
+typedef ItemIdCompactData *ItemIdCompact;
+typedef RelationData *Relation;
+
+extern int itemoffcompare(const void *item1, const void *item2);
+
extern void PageInit(Page page, Size pageSize, Size specialSize);
extern bool PageIsVerifiedExtended(Page page, BlockNumber blkno, int flags);
extern OffsetNumber PageAddItemExtended(Page page, Item item, Size size,
@@ -493,7 +699,7 @@ extern Page PageGetTempPage(Page page);
extern Page PageGetTempPageCopy(Page page);
extern Page PageGetTempPageCopySpecial(Page page);
extern void PageRestoreTempPage(Page tempPage, Page oldPage);
-extern void PageRepairFragmentation(Page page);
+extern void PageRepairFragmentation(Page page, bool is_toast);
extern void PageTruncateLinePointerArray(Page page);
extern Size PageGetFreeSpace(Page page);
extern Size PageGetFreeSpaceForMultipleTuples(Page page, int ntups);
diff --git a/src/include/storage/itemid.h b/src/include/storage/itemid.h
index e33637ff21..442a72d658 100644
--- a/src/include/storage/itemid.h
+++ b/src/include/storage/itemid.h
@@ -78,6 +78,8 @@ typedef uint16 ItemLength;
#define ItemIdGetRedirect(itemId) \
((itemId)->lp_off)
+#define ItemIdGetTupleEnd(itemId) \
+ (MAXALIGN(ItemIdGetLength((itemId))) + ItemIdGetOffset((itemId)))
/*
* ItemIdIsValid
* True iff item identifier is valid.
diff --git a/src/include/storage/lock.h b/src/include/storage/lock.h
index e4e1495b24..0e0cd79bb0 100644
--- a/src/include/storage/lock.h
+++ b/src/include/storage/lock.h
@@ -227,8 +227,8 @@ typedef struct LOCKTAG
/* ID info for a transaction is its TransactionId */
#define SET_LOCKTAG_TRANSACTION(locktag,xid) \
- ((locktag).locktag_field1 = (xid), \
- (locktag).locktag_field2 = 0, \
+ ((locktag).locktag_field1 = (uint32)((xid) & 0xFFFFFFFF), \
+ (locktag).locktag_field2 = (uint32)((xid) >> 32), \
(locktag).locktag_field3 = 0, \
(locktag).locktag_field4 = 0, \
(locktag).locktag_type = LOCKTAG_TRANSACTION, \
@@ -237,8 +237,8 @@ typedef struct LOCKTAG
/* ID info for a virtual transaction is its VirtualTransactionId */
#define SET_LOCKTAG_VIRTUALTRANSACTION(locktag,vxid) \
((locktag).locktag_field1 = (vxid).backendId, \
- (locktag).locktag_field2 = (vxid).localTransactionId, \
- (locktag).locktag_field3 = 0, \
+ (locktag).locktag_field2 = (uint32)((vxid).localTransactionId & 0xFFFFFFFF), \
+ (locktag).locktag_field3 = (uint32)((vxid).localTransactionId >> 32), \
(locktag).locktag_field4 = 0, \
(locktag).locktag_type = LOCKTAG_VIRTUALTRANSACTION, \
(locktag).locktag_lockmethodid = DEFAULT_LOCKMETHOD)
@@ -248,9 +248,9 @@ typedef struct LOCKTAG
* its speculative insert counter.
*/
#define SET_LOCKTAG_SPECULATIVE_INSERTION(locktag,xid,token) \
- ((locktag).locktag_field1 = (xid), \
- (locktag).locktag_field2 = (token), \
- (locktag).locktag_field3 = 0, \
+ ((locktag).locktag_field1 = (uint32)((xid) & 0xFFFFFFFF), \
+ (locktag).locktag_field2 = (uint32)((xid) >> 32), \
+ (locktag).locktag_field3 = (token), \
(locktag).locktag_field4 = 0, \
(locktag).locktag_type = LOCKTAG_SPECULATIVE_TOKEN, \
(locktag).locktag_lockmethodid = DEFAULT_LOCKMETHOD)
diff --git a/src/include/storage/proc.h b/src/include/storage/proc.h
index fdc18f8180..4ff7353e8c 100644
--- a/src/include/storage/proc.h
+++ b/src/include/storage/proc.h
@@ -17,6 +17,7 @@
#include "access/clog.h"
#include "access/xlogdefs.h"
#include "lib/ilist.h"
+#include "port/atomics.h"
#include "storage/latch.h"
#include "storage/lock.h"
#include "storage/pg_sema.h"
@@ -176,12 +177,12 @@ struct PGPROC
Latch procLatch; /* generic latch for process */
- TransactionId xid; /* id of top-level transaction currently being
+ pg_atomic_uint64 xid; /* id of top-level transaction currently being
* executed by this proc, if running and XID
* is assigned; else InvalidTransactionId.
* mirrored in ProcGlobal->xids[pgxactoff] */
- TransactionId xmin; /* minimal running XID as it was when we were
+ pg_atomic_uint64 xmin; /* minimal running XID as it was when we were
* starting our xact, excluding LAZY VACUUM:
* vacuum must not remove tuples deleted by
* xid >= xmin ! */
@@ -364,7 +365,7 @@ typedef struct PROC_HDR
PGPROC *allProcs;
/* Array mirroring PGPROC.xid for each PGPROC currently in the procarray */
- TransactionId *xids;
+ pg_atomic_uint64 *xids;
/*
* Array mirroring PGPROC.subxidStatus for each PGPROC currently in the
diff --git a/src/include/storage/standby.h b/src/include/storage/standby.h
index f5da98dc73..209615f3cc 100644
--- a/src/include/storage/standby.h
+++ b/src/include/storage/standby.h
@@ -21,7 +21,7 @@
#include "storage/standbydefs.h"
/* User-settable GUC parameters */
-extern PGDLLIMPORT int vacuum_defer_cleanup_age;
+extern PGDLLIMPORT int64 vacuum_defer_cleanup_age;
extern PGDLLIMPORT int max_standby_archive_delay;
extern PGDLLIMPORT int max_standby_streaming_delay;
extern PGDLLIMPORT bool log_recovery_conflict_waits;
diff --git a/src/include/utils/combocid.h b/src/include/utils/combocid.h
index 80fe6d2cea..8465768b6f 100644
--- a/src/include/utils/combocid.h
+++ b/src/include/utils/combocid.h
@@ -15,7 +15,7 @@
#define COMBOCID_H
/*
- * HeapTupleHeaderGetCmin and HeapTupleHeaderGetCmax function prototypes
+ * HeapTupleGetCmin and HeapTupleGetCmax function prototypes
* are in access/htup.h, because that's where the macro definitions that
* those functions replaced used to be.
*/
diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h
index 7dc401cf0d..3e1e5a5e4a 100644
--- a/src/include/utils/rel.h
+++ b/src/include/utils/rel.h
@@ -308,12 +308,12 @@ typedef struct AutoVacOpts
int vacuum_ins_threshold;
int analyze_threshold;
int vacuum_cost_limit;
- int freeze_min_age;
- int freeze_max_age;
- int freeze_table_age;
- int multixact_freeze_min_age;
- int multixact_freeze_max_age;
- int multixact_freeze_table_age;
+ int64 freeze_min_age;
+ int64 freeze_max_age;
+ int64 freeze_table_age;
+ int64 multixact_freeze_min_age;
+ int64 multixact_freeze_max_age;
+ int64 multixact_freeze_table_age;
int log_min_duration;
float8 vacuum_cost_delay;
float8 vacuum_scale_factor;
diff --git a/src/include/utils/xid8.h b/src/include/utils/xid8.h
index 9c5ce241db..1fdd1e86c1 100644
--- a/src/include/utils/xid8.h
+++ b/src/include/utils/xid8.h
@@ -17,13 +17,13 @@
static inline FullTransactionId
DatumGetFullTransactionId(Datum X)
{
- return FullTransactionIdFromU64(DatumGetUInt64(X));
+ return FullTransactionIdFromXid(DatumGetUInt64(X));
}
static inline Datum
FullTransactionIdGetDatum(FullTransactionId X)
{
- return UInt64GetDatum(U64FromFullTransactionId(X));
+ return UInt64GetDatum(XidFromFullTransactionId(X));
}
#define PG_GETARG_FULLTRANSACTIONID(X) DatumGetFullTransactionId(PG_GETARG_DATUM(X))
diff --git a/src/pl/plperl/plperl.c b/src/pl/plperl/plperl.c
index 8f21e0d701..6772f8c360 100644
--- a/src/pl/plperl/plperl.c
+++ b/src/pl/plperl/plperl.c
@@ -2664,7 +2664,7 @@ validate_plperl_function(plperl_proc_ptr *proc_ptr, HeapTuple procTup)
* This is needed because CREATE OR REPLACE FUNCTION can modify the
* function's pg_proc entry without changing its OID.
************************************************************/
- uptodate = (prodesc->fn_xmin == HeapTupleHeaderGetRawXmin(procTup->t_data) &&
+ uptodate = (prodesc->fn_xmin == HeapTupleGetRawXmin(procTup) &&
ItemPointerEquals(&prodesc->fn_tid, &procTup->t_self));
if (uptodate)
@@ -2788,7 +2788,7 @@ compile_plperl_function(Oid fn_oid, bool is_trigger, bool is_event_trigger)
MemoryContextSetIdentifier(proc_cxt, prodesc->proname);
prodesc->fn_cxt = proc_cxt;
prodesc->fn_refcount = 0;
- prodesc->fn_xmin = HeapTupleHeaderGetRawXmin(procTup->t_data);
+ prodesc->fn_xmin = HeapTupleGetRawXmin(procTup);
prodesc->fn_tid = procTup->t_self;
prodesc->nargs = procStruct->pronargs;
prodesc->arg_out_func = (FmgrInfo *) palloc0(prodesc->nargs * sizeof(FmgrInfo));
diff --git a/src/pl/plpgsql/src/pl_comp.c b/src/pl/plpgsql/src/pl_comp.c
index b286f2a50c..61db613c2f 100644
--- a/src/pl/plpgsql/src/pl_comp.c
+++ b/src/pl/plpgsql/src/pl_comp.c
@@ -171,7 +171,7 @@ recheck:
if (function)
{
/* We have a compiled function, but is it still valid? */
- if (function->fn_xmin == HeapTupleHeaderGetRawXmin(procTup->t_data) &&
+ if (function->fn_xmin == HeapTupleGetRawXmin(procTup) &&
ItemPointerEquals(&function->fn_tid, &procTup->t_self))
function_valid = true;
else
@@ -348,7 +348,7 @@ do_compile(FunctionCallInfo fcinfo,
function->fn_signature = format_procedure(fcinfo->flinfo->fn_oid);
MemoryContextSetIdentifier(func_cxt, function->fn_signature);
function->fn_oid = fcinfo->flinfo->fn_oid;
- function->fn_xmin = HeapTupleHeaderGetRawXmin(procTup->t_data);
+ function->fn_xmin = HeapTupleGetRawXmin(procTup);
function->fn_tid = procTup->t_self;
function->fn_input_collation = fcinfo->fncollation;
function->fn_cxt = func_cxt;
diff --git a/src/pl/plpgsql/src/pl_exec.c b/src/pl/plpgsql/src/pl_exec.c
index a647342948..6763b6a4b7 100644
--- a/src/pl/plpgsql/src/pl_exec.c
+++ b/src/pl/plpgsql/src/pl_exec.c
@@ -7376,6 +7376,7 @@ deconstruct_composite_datum(Datum value, HeapTupleData *tmptup)
tmptup->t_len = HeapTupleHeaderGetDatumLength(td);
ItemPointerSetInvalid(&(tmptup->t_self));
tmptup->t_tableOid = InvalidOid;
+ HeapTupleSetZeroBase(tmptup);
tmptup->t_data = td;
/* Extract rowtype info and find a tupdesc */
@@ -7550,6 +7551,7 @@ exec_move_row_from_datum(PLpgSQL_execstate *estate,
tmptup.t_len = HeapTupleHeaderGetDatumLength(td);
ItemPointerSetInvalid(&(tmptup.t_self));
tmptup.t_tableOid = InvalidOid;
+ HeapTupleSetZeroBase(&tmptup);
tmptup.t_data = td;
/* Extract rowtype info */
diff --git a/src/pl/plpython/plpy_procedure.c b/src/pl/plpython/plpy_procedure.c
index 494f109b32..9884f74fa7 100644
--- a/src/pl/plpython/plpy_procedure.c
+++ b/src/pl/plpython/plpy_procedure.c
@@ -178,7 +178,7 @@ PLy_procedure_create(HeapTuple procTup, Oid fn_oid, bool is_trigger)
proc->proname = pstrdup(NameStr(procStruct->proname));
MemoryContextSetIdentifier(cxt, proc->proname);
proc->pyname = pstrdup(procName);
- proc->fn_xmin = HeapTupleHeaderGetRawXmin(procTup->t_data);
+ proc->fn_xmin = HeapTupleGetRawXmin(procTup);
proc->fn_tid = procTup->t_self;
proc->fn_readonly = (procStruct->provolatile != PROVOLATILE_VOLATILE);
proc->is_setof = procStruct->proretset;
@@ -419,7 +419,7 @@ PLy_procedure_valid(PLyProcedure *proc, HeapTuple procTup)
return false;
/* If the pg_proc tuple has changed, it's not valid */
- if (!(proc->fn_xmin == HeapTupleHeaderGetRawXmin(procTup->t_data) &&
+ if (!(proc->fn_xmin == HeapTupleGetRawXmin(procTup) &&
ItemPointerEquals(&proc->fn_tid, &procTup->t_self)))
return false;
diff --git a/src/pl/tcl/pltcl.c b/src/pl/tcl/pltcl.c
index eaa98d42c2..bd5b071c13 100644
--- a/src/pl/tcl/pltcl.c
+++ b/src/pl/tcl/pltcl.c
@@ -1428,7 +1428,7 @@ compile_pltcl_function(Oid fn_oid, Oid tgreloid,
* function's pg_proc entry without changing its OID.
************************************************************/
if (prodesc != NULL &&
- prodesc->fn_xmin == HeapTupleHeaderGetRawXmin(procTup->t_data) &&
+ prodesc->fn_xmin == HeapTupleGetRawXmin(procTup) &&
ItemPointerEquals(&prodesc->fn_tid, &procTup->t_self))
{
/* It's still up-to-date, so we can use it */
@@ -1495,7 +1495,7 @@ compile_pltcl_function(Oid fn_oid, Oid tgreloid,
prodesc->internal_proname = pstrdup(internal_proname);
prodesc->fn_cxt = proc_cxt;
prodesc->fn_refcount = 0;
- prodesc->fn_xmin = HeapTupleHeaderGetRawXmin(procTup->t_data);
+ prodesc->fn_xmin = HeapTupleGetRawXmin(procTup);
prodesc->fn_tid = procTup->t_self;
prodesc->nargs = procStruct->pronargs;
prodesc->arg_out_func = (FmgrInfo *) palloc0(prodesc->nargs * sizeof(FmgrInfo));
diff --git a/src/test/Makefile b/src/test/Makefile
index dbd3192874..8e0f39289e 100644
--- a/src/test/Makefile
+++ b/src/test/Makefile
@@ -12,7 +12,8 @@ subdir = src/test
top_builddir = ../..
include $(top_builddir)/src/Makefile.global
-SUBDIRS = perl regress isolation modules authentication recovery subscription
+SUBDIRS = perl regress isolation modules authentication recovery subscription \
+ xid-64
ifeq ($(with_icu),yes)
SUBDIRS += icu
diff --git a/src/test/meson.build b/src/test/meson.build
index 241d9d48aa..650936bd66 100644
--- a/src/test/meson.build
+++ b/src/test/meson.build
@@ -5,6 +5,7 @@ subdir('authentication')
subdir('recovery')
subdir('subscription')
subdir('modules')
+subdir('xid-64')
if ssl.found()
subdir('ssl')
diff --git a/src/test/modules/test_lfind/test_lfind.c b/src/test/modules/test_lfind/test_lfind.c
index 82673d54c6..e4fd52c134 100644
--- a/src/test/modules/test_lfind/test_lfind.c
+++ b/src/test/modules/test_lfind/test_lfind.c
@@ -120,29 +120,29 @@ Datum
test_lfind32(PG_FUNCTION_ARGS)
{
#define TEST_ARRAY_SIZE 135
- uint32 test_array[TEST_ARRAY_SIZE] = {0};
+ uint64 test_array[TEST_ARRAY_SIZE] = {0};
test_array[8] = 1;
test_array[64] = 2;
test_array[TEST_ARRAY_SIZE - 1] = 3;
- if (pg_lfind32(1, test_array, 4))
- elog(ERROR, "pg_lfind32() found nonexistent element");
- if (!pg_lfind32(1, test_array, TEST_ARRAY_SIZE))
- elog(ERROR, "pg_lfind32() did not find existing element");
+ if (pg_lfind64(1, test_array, 4))
+ elog(ERROR, "pg_lfind64() found nonexistent element");
+ if (!pg_lfind64(1, test_array, TEST_ARRAY_SIZE))
+ elog(ERROR, "pg_lfind64() did not find existing element");
- if (pg_lfind32(2, test_array, 32))
- elog(ERROR, "pg_lfind32() found nonexistent element");
- if (!pg_lfind32(2, test_array, TEST_ARRAY_SIZE))
- elog(ERROR, "pg_lfind32() did not find existing element");
+ if (pg_lfind64(2, test_array, 32))
+ elog(ERROR, "pg_lfind64() found nonexistent element");
+ if (!pg_lfind64(2, test_array, TEST_ARRAY_SIZE))
+ elog(ERROR, "pg_lfind64() did not find existing element");
- if (pg_lfind32(3, test_array, 96))
- elog(ERROR, "pg_lfind32() found nonexistent element");
- if (!pg_lfind32(3, test_array, TEST_ARRAY_SIZE))
- elog(ERROR, "pg_lfind32() did not find existing element");
+ if (pg_lfind64(3, test_array, 96))
+ elog(ERROR, "pg_lfind64() found nonexistent element");
+ if (!pg_lfind64(3, test_array, TEST_ARRAY_SIZE))
+ elog(ERROR, "pg_lfind64() did not find existing element");
- if (pg_lfind32(4, test_array, TEST_ARRAY_SIZE))
- elog(ERROR, "pg_lfind32() found nonexistent element");
+ if (pg_lfind64(4, test_array, TEST_ARRAY_SIZE))
+ elog(ERROR, "pg_lfind64() found nonexistent element");
PG_RETURN_VOID();
}
diff --git a/src/test/perl/PostgreSQL/Test/Cluster.pm b/src/test/perl/PostgreSQL/Test/Cluster.pm
index 4fef9c12e6..cb535c0906 100644
--- a/src/test/perl/PostgreSQL/Test/Cluster.pm
+++ b/src/test/perl/PostgreSQL/Test/Cluster.pm
@@ -476,7 +476,9 @@ sub init
mkdir $self->archive_dir;
PostgreSQL::Test::Utils::system_or_bail('initdb', '-D', $pgdata, '-A',
- 'trust', '-N', @{ $params{extra} });
+ 'trust', '-N',
+ '-x', '1249835483136', '-m', '2422361554944', '-o', '3594887626752',
+ @{ $params{extra} });
PostgreSQL::Test::Utils::system_or_bail($ENV{PG_REGRESS},
'--config-auth', $pgdata, @{ $params{auth_extra} });
diff --git a/src/test/recovery/t/003_recovery_targets.pl b/src/test/recovery/t/003_recovery_targets.pl
index e8e1a420bc..8329d2ff7e 100644
--- a/src/test/recovery/t/003_recovery_targets.pl
+++ b/src/test/recovery/t/003_recovery_targets.pl
@@ -57,7 +57,7 @@ $node_primary->init(has_archiving => 1, allows_streaming => 1);
# Bump the transaction ID epoch. This is useful to stress the portability
# of recovery_target_xid parsing.
-system_or_bail('pg_resetwal', '--epoch', '1', $node_primary->data_dir);
+system_or_bail('pg_resetwal', $node_primary->data_dir);
# Start it
$node_primary->start;
diff --git a/src/test/regress/expected/indirect_toast.out b/src/test/regress/expected/indirect_toast.out
index 44b54dc37f..313482b866 100644
--- a/src/test/regress/expected/indirect_toast.out
+++ b/src/test/regress/expected/indirect_toast.out
@@ -161,6 +161,14 @@ SELECT substring(indtoasttest::text, 1, 200) FROM indtoasttest;
("one-toasted,one-null, via indirect",0,1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890
(5 rows)
+create or replace function random_string(len integer) returns text as $$
+select substr((select string_agg(r,'') from (select random()::text as r from generate_series(1,(len+15)/16)) s1), 1, len);
+$$ language sql;
+create table toasttest_main(t text);
+alter table toasttest_main alter column t set storage main;
+insert into toasttest_main (select random_string(len) from generate_series(7000,8000) len);
DROP TABLE indtoasttest;
+DROP TABLE toasttest_main;
DROP FUNCTION update_using_indirect();
+DROP FUNCTION random_string(integer);
RESET default_toast_compression;
diff --git a/src/test/regress/expected/insert.out b/src/test/regress/expected/insert.out
index dd4354fc7d..d52545b443 100644
--- a/src/test/regress/expected/insert.out
+++ b/src/test/regress/expected/insert.out
@@ -100,7 +100,7 @@ SELECT pg_size_pretty(pg_relation_size('large_tuple_test'::regclass, 'main'));
INSERT INTO large_tuple_test (select 3, NULL);
-- now this tuple won't fit on the second page, but the insert should
-- still succeed by extending the relation
-INSERT INTO large_tuple_test (select 4, repeat('a', 8126));
+INSERT INTO large_tuple_test (select 4, repeat('a', 8112));
DROP TABLE large_tuple_test;
--
-- check indirection (field/array assignment), cf bug #14265
@@ -980,3 +980,17 @@ insert into returningwrtest values (2, 'foo') returning returningwrtest;
(1 row)
drop table returningwrtest;
+-- Check for MaxHeapTupleSize
+create table maxheaptuplesize_test(value text);
+alter table maxheaptuplesize_test alter column value set storage external;
+insert into maxheaptuplesize_test values (repeat('x', 8104));
+insert into maxheaptuplesize_test values (repeat('x', 8112));
+insert into maxheaptuplesize_test values (repeat('x', 8120));
+insert into maxheaptuplesize_test values (repeat('x', 8128));
+insert into maxheaptuplesize_test values (repeat('x', 8136));
+insert into maxheaptuplesize_test values (repeat('x', 8144));
+insert into maxheaptuplesize_test values (repeat('x', 8152));
+insert into maxheaptuplesize_test values (repeat('x', 8160));
+insert into maxheaptuplesize_test values (repeat('x', 8168));
+insert into maxheaptuplesize_test values (repeat('x', 8176));
+drop table maxheaptuplesize_test;
diff --git a/src/test/regress/expected/opr_sanity.out b/src/test/regress/expected/opr_sanity.out
index 330eb0f765..ce4a2ab432 100644
--- a/src/test/regress/expected/opr_sanity.out
+++ b/src/test/regress/expected/opr_sanity.out
@@ -197,7 +197,7 @@ WHERE p1.oid != p2.oid AND
ORDER BY 1, 2;
proargtypes | proargtypes
-----------------------------+--------------------------
- integer | xid
+ bigint | xid
timestamp without time zone | timestamp with time zone
bit | bit varying
txid_snapshot | pg_snapshot
@@ -705,7 +705,7 @@ int8(oid)
tideq(tid,tid)
timestamptz_cmp(timestamp with time zone,timestamp with time zone)
interval_cmp(interval,interval)
-xideqint4(xid,integer)
+xideqint8(xid,bigint)
timetz_eq(time with time zone,time with time zone)
timetz_ne(time with time zone,time with time zone)
timetz_lt(time with time zone,time with time zone)
@@ -819,7 +819,7 @@ pg_lsn_gt(pg_lsn,pg_lsn)
pg_lsn_ne(pg_lsn,pg_lsn)
pg_lsn_cmp(pg_lsn,pg_lsn)
xidneq(xid,xid)
-xidneqint4(xid,integer)
+xidneqint8(xid,bigint)
sha224(bytea)
sha256(bytea)
sha384(bytea)
diff --git a/src/test/regress/expected/select_views.out b/src/test/regress/expected/select_views.out
index 1aeed8452b..d3be84754c 100644
--- a/src/test/regress/expected/select_views.out
+++ b/src/test/regress/expected/select_views.out
@@ -2,9 +2,22 @@
-- SELECT_VIEWS
-- test the views defined in CREATE_VIEWS
--
-SELECT * FROM street;
+SELECT * FROM street ORDER BY name COLLATE "C", thepath::text COLLATE "C";
name | thepath | cname
------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-----------
+ 100th Ave | [(-122.1657,37.429),(-122.1647,37.432)] | Oakland
+ 107th Ave | [(-122.1555,37.403),(-122.1531,37.41)] | Oakland
+ 14th St | [(-122.299,37.147),(-122.3,37.148)] | Lafayette
+ 19th Ave | [(-122.2366,37.897),(-122.2359,37.905)] | Berkeley
+ 1st St | [(-121.75508,37.89294),(-121.753581,37.90031)] | Oakland
+ 5th St | [(-122.278,37),(-122.2792,37.005),(-122.2803,37.009)] | Lafayette
+ 5th St | [(-122.296,37.615),(-122.2953,37.598)] | Berkeley
+ 82nd Ave | [(-122.1695,37.596),(-122.1681,37.603)] | Berkeley
+ 85th Ave | [(-122.1877,37.466),(-122.186,37.476)] | Oakland
+ 89th Ave | [(-122.1822,37.459),(-122.1803,37.471)] | Oakland
+ 98th Ave | [(-122.1568,37.498),(-122.1558,37.502)] | Oakland
+ 98th Ave | [(-122.1693,37.438),(-122.1682,37.444)] | Oakland
+ 98th Ave | [(-122.2001,37.258),(-122.1974,37.27)] | Lafayette
Access Rd 25 | [(-121.9283,37.894),(-121.9283,37.9)] | Oakland
Ada St | [(-122.2487,37.398),(-122.2496,37.401)] | Lafayette
Agua Fria Creek | [(-121.9254,37.922),(-121.9281,37.889)] | Oakland
@@ -22,8 +35,8 @@ SELECT * FROM street;
Arroyo Las Positas | [(-121.7973,37.997),(-121.7957,37.005)] | Oakland
Arroyo Seco | [(-121.7073,37.766),(-121.6997,37.729)] | Oakland
Ash St | [(-122.0408,37.31),(-122.04,37.292)] | Oakland
- Avenue 134th | [(-122.1823,37.002),(-122.1851,37.992)] | Oakland
Avenue 134th | [(-122.1823,37.002),(-122.1851,37.992)] | Berkeley
+ Avenue 134th | [(-122.1823,37.002),(-122.1851,37.992)] | Oakland
Avenue 140th | [(-122.1656,37.003),(-122.1691,37.988)] | Oakland
Avenue 140th | [(-122.1656,37.003),(-122.1691,37.988)] | Berkeley
Avenue D | [(-122.298,37.848),(-122.3024,37.849)] | Berkeley
@@ -37,14 +50,14 @@ SELECT * FROM street;
Broadmore Ave | [(-122.095,37.522),(-122.0936,37.497)] | Oakland
Broadway | [(-122.2409,37.586),(-122.2395,37.601)] | Berkeley
Buckingham Blvd | [(-122.2231,37.59),(-122.2214,37.606)] | Berkeley
+ Butterfield Dr | [(-122.0838,37.002),(-122.0834,37.987)] | Berkeley
Butterfield Dr | [(-122.0838,37.002),(-122.0834,37.987)] | Oakland
Butterfield Dr | [(-122.0838,37.002),(-122.0834,37.987)] | Oakland
- Butterfield Dr | [(-122.0838,37.002),(-122.0834,37.987)] | Berkeley
C St | [(-122.1768,37.46),(-122.1749,37.435)] | Oakland
Calaveras Creek | [(-121.8203,37.035),(-121.8207,37.931)] | Oakland
Calaveras Creek | [(-121.8203,37.035),(-121.8207,37.931)] | Oakland
- California St | [(-122.2032,37.005),(-122.2016,37.996)] | Berkeley
California St | [(-122.2032,37.005),(-122.2016,37.996)] | Lafayette
+ California St | [(-122.2032,37.005),(-122.2016,37.996)] | Berkeley
Cameron Ave | [(-122.1316,37.502),(-122.1327,37.481)] | Oakland
Campus Dr | [(-122.1704,37.905),(-122.1678,37.868),(-122.1671,37.865)] | Berkeley
Capricorn Ave | [(-122.2176,37.404),(-122.2164,37.384)] | Lafayette
@@ -55,8 +68,8 @@ SELECT * FROM street;
Central Ave | [(-122.2343,37.602),(-122.2331,37.595)] | Berkeley
Chambers Dr | [(-122.2004,37.352),(-122.1972,37.368)] | Lafayette
Chambers Lane | [(-122.2001,37.359),(-122.1975,37.371)] | Lafayette
- Champion St | [(-122.214,37.991),(-122.2147,37.002)] | Berkeley
Champion St | [(-122.214,37.991),(-122.2147,37.002)] | Lafayette
+ Champion St | [(-122.214,37.991),(-122.2147,37.002)] | Berkeley
Chapman Dr | [(-122.0421,37.504),(-122.0414,37.498)] | Oakland
Charles St | [(-122.0255,37.505),(-122.0252,37.499)] | Oakland
Cherry St | [(-122.0437,37.42),(-122.0434,37.413)] | Oakland
@@ -77,9 +90,9 @@ SELECT * FROM street;
Cull Canyon Road | [(-122.0536,37.435),(-122.0499,37.315)] | Oakland
Cull Creek | [(-122.0624,37.875),(-122.0582,37.527)] | Berkeley
D St | [(-122.1811,37.505),(-122.1805,37.497)] | Oakland
+ Decoto Road | [(-122.0159,37.006),(-122.016,37.002),(-122.0164,37.993)] | Berkeley
Decoto Road | [(-122.0159,37.006),(-122.016,37.002),(-122.0164,37.993)] | Oakland
Decoto Road | [(-122.0159,37.006),(-122.016,37.002),(-122.0164,37.993)] | Oakland
- Decoto Road | [(-122.0159,37.006),(-122.016,37.002),(-122.0164,37.993)] | Berkeley
Deering St | [(-122.2146,37.904),(-122.2126,37.897)] | Berkeley
Dimond Ave | [(-122.2167,37.994),(-122.2162,37.006)] | Berkeley
Dimond Ave | [(-122.2167,37.994),(-122.2162,37.006)] | Lafayette
@@ -117,12 +130,12 @@ SELECT * FROM street;
I- 580 | [(-121.9322,37.989),(-121.9243,37.006),(-121.9217,37.014)] | Oakland
I- 580 | [(-122.018,37.019),(-122.0009,37.032),(-121.9787,37.983),(-121.958,37.984),(-121.9571,37.986)] | Oakland
I- 580 | [(-122.018,37.019),(-122.0009,37.032),(-121.9787,37.983),(-121.958,37.984),(-121.9571,37.986)] | Oakland
- I- 580 | [(-122.1108,37.023),(-122.1101,37.02),(-122.108103,37.00764),(-122.108,37.007),(-122.1069,37.998),(-122.1064,37.994),(-122.1053,37.982),(-122.1048,37.977),(-122.1032,37.958),(-122.1026,37.953),(-122.1013,37.938),(-122.0989,37.911),(-122.0984,37.91),(-122.098,37.908)] | Oakland
I- 580 | [(-122.1108,37.023),(-122.1101,37.02),(-122.108103,37.00764),(-122.108,37.007),(-122.1069,37.998),(-122.1064,37.994),(-122.1053,37.982),(-122.1048,37.977),(-122.1032,37.958),(-122.1026,37.953),(-122.1013,37.938),(-122.0989,37.911),(-122.0984,37.91),(-122.098,37.908)] | Berkeley
+ I- 580 | [(-122.1108,37.023),(-122.1101,37.02),(-122.108103,37.00764),(-122.108,37.007),(-122.1069,37.998),(-122.1064,37.994),(-122.1053,37.982),(-122.1048,37.977),(-122.1032,37.958),(-122.1026,37.953),(-122.1013,37.938),(-122.0989,37.911),(-122.0984,37.91),(-122.098,37.908)] | Oakland
I- 580 | [(-122.1543,37.703),(-122.1535,37.694),(-122.1512,37.655),(-122.1475,37.603),(-122.1468,37.583),(-122.1472,37.569),(-122.149044,37.54874),(-122.1493,37.546),(-122.1501,37.532),(-122.1506,37.509),(-122.1495,37.482),(-122.1487,37.467),(-122.1477,37.447),(-122.1414,37.383),(-122.1404,37.376),(-122.1398,37.372),(-122.139,37.356),(-122.1388,37.353),(-122.1385,37.34),(-122.1382,37.33),(-122.1378,37.316)] | Oakland
I- 580 | [(-122.1543,37.703),(-122.1535,37.694),(-122.1512,37.655),(-122.1475,37.603),(-122.1468,37.583),(-122.1472,37.569),(-122.149044,37.54874),(-122.1493,37.546),(-122.1501,37.532),(-122.1506,37.509),(-122.1495,37.482),(-122.1487,37.467),(-122.1477,37.447),(-122.1414,37.383),(-122.1404,37.376),(-122.1398,37.372),(-122.139,37.356),(-122.1388,37.353),(-122.1385,37.34),(-122.1382,37.33),(-122.1378,37.316)] | Berkeley
- I- 580 | [(-122.2197,37.99),(-122.22,37.99),(-122.222092,37.99523),(-122.2232,37.998),(-122.224146,37.99963),(-122.2261,37.003),(-122.2278,37.007),(-122.2302,37.026),(-122.2323,37.043),(-122.2344,37.059),(-122.235405,37.06427),(-122.2365,37.07)] | Berkeley
I- 580 | [(-122.2197,37.99),(-122.22,37.99),(-122.222092,37.99523),(-122.2232,37.998),(-122.224146,37.99963),(-122.2261,37.003),(-122.2278,37.007),(-122.2302,37.026),(-122.2323,37.043),(-122.2344,37.059),(-122.235405,37.06427),(-122.2365,37.07)] | Lafayette
+ I- 580 | [(-122.2197,37.99),(-122.22,37.99),(-122.222092,37.99523),(-122.2232,37.998),(-122.224146,37.99963),(-122.2261,37.003),(-122.2278,37.007),(-122.2302,37.026),(-122.2323,37.043),(-122.2344,37.059),(-122.235405,37.06427),(-122.2365,37.07)] | Berkeley
I- 580 Ramp | [(-121.8521,37.011),(-121.8479,37.999),(-121.8476,37.999),(-121.8456,37.01),(-121.8455,37.011)] | Oakland
I- 580 Ramp | [(-121.8521,37.011),(-121.8479,37.999),(-121.8476,37.999),(-121.8456,37.01),(-121.8455,37.011)] | Oakland
I- 580 Ramp | [(-121.8743,37.014),(-121.8722,37.999),(-121.8714,37.999)] | Oakland
@@ -136,8 +149,8 @@ SELECT * FROM street;
I- 580 Ramp | [(-122.0941,37.897),(-122.0943,37.902)] | Berkeley
I- 580 Ramp | [(-122.096,37.888),(-122.0962,37.891),(-122.0964,37.9)] | Berkeley
I- 580 Ramp | [(-122.101,37.898),(-122.1005,37.902),(-122.0989,37.911)] | Berkeley
- I- 580 Ramp | [(-122.1086,37.003),(-122.1068,37.993),(-122.1066,37.992),(-122.1053,37.982)] | Oakland
I- 580 Ramp | [(-122.1086,37.003),(-122.1068,37.993),(-122.1066,37.992),(-122.1053,37.982)] | Berkeley
+ I- 580 Ramp | [(-122.1086,37.003),(-122.1068,37.993),(-122.1066,37.992),(-122.1053,37.982)] | Oakland
I- 580 Ramp | [(-122.1414,37.383),(-122.1407,37.376),(-122.1403,37.372),(-122.139,37.356)] | Oakland
I- 580/I-680 Ramp | ((-121.9207,37.988),(-121.9192,37.016)) | Oakland
I- 580/I-680 Ramp | ((-121.9207,37.988),(-121.9192,37.016)) | Oakland
@@ -158,16 +171,16 @@ SELECT * FROM street;
I- 880 | ((-121.9669,37.075),(-121.9663,37.071),(-121.9656,37.065),(-121.9618,37.037),(-121.95689,37),(-121.948,37.933)) | Oakland
I- 880 | [(-121.948,37.933),(-121.9471,37.925),(-121.9467,37.923),(-121.946,37.918),(-121.9452,37.912),(-121.937,37.852)] | Oakland
I- 880 | [(-122.0219,37.466),(-122.0205,37.447),(-122.020331,37.44447),(-122.020008,37.43962),(-122.0195,37.432),(-122.0193,37.429),(-122.0164,37.393),(-122.010219,37.34771),(-122.0041,37.313)] | Oakland
- I- 880 | [(-122.0375,37.632),(-122.0359,37.619),(-122.0358,37.616),(-122.034514,37.60409),(-122.031876,37.57965),(-122.031193,37.57332),(-122.03016,37.56375),(-122.02943,37.55698),(-122.028689,37.54929),(-122.027833,37.53908),(-122.025979,37.51698),(-122.0238,37.491)] | Oakland
I- 880 | [(-122.0375,37.632),(-122.0359,37.619),(-122.0358,37.616),(-122.034514,37.60409),(-122.031876,37.57965),(-122.031193,37.57332),(-122.03016,37.56375),(-122.02943,37.55698),(-122.028689,37.54929),(-122.027833,37.53908),(-122.025979,37.51698),(-122.0238,37.491)] | Berkeley
- I- 880 | [(-122.0612,37.003),(-122.0604,37.991),(-122.0596,37.982),(-122.0585,37.967),(-122.0583,37.961),(-122.0553,37.918),(-122.053635,37.89475),(-122.050759,37.8546),(-122.05,37.844),(-122.0485,37.817),(-122.0483,37.813),(-122.0482,37.811)] | Oakland
+ I- 880 | [(-122.0375,37.632),(-122.0359,37.619),(-122.0358,37.616),(-122.034514,37.60409),(-122.031876,37.57965),(-122.031193,37.57332),(-122.03016,37.56375),(-122.02943,37.55698),(-122.028689,37.54929),(-122.027833,37.53908),(-122.025979,37.51698),(-122.0238,37.491)] | Oakland
I- 880 | [(-122.0612,37.003),(-122.0604,37.991),(-122.0596,37.982),(-122.0585,37.967),(-122.0583,37.961),(-122.0553,37.918),(-122.053635,37.89475),(-122.050759,37.8546),(-122.05,37.844),(-122.0485,37.817),(-122.0483,37.813),(-122.0482,37.811)] | Oakland
I- 880 | [(-122.0612,37.003),(-122.0604,37.991),(-122.0596,37.982),(-122.0585,37.967),(-122.0583,37.961),(-122.0553,37.918),(-122.053635,37.89475),(-122.050759,37.8546),(-122.05,37.844),(-122.0485,37.817),(-122.0483,37.813),(-122.0482,37.811)] | Berkeley
+ I- 880 | [(-122.0612,37.003),(-122.0604,37.991),(-122.0596,37.982),(-122.0585,37.967),(-122.0583,37.961),(-122.0553,37.918),(-122.053635,37.89475),(-122.050759,37.8546),(-122.05,37.844),(-122.0485,37.817),(-122.0483,37.813),(-122.0482,37.811)] | Oakland
I- 880 | [(-122.0831,37.312),(-122.0819,37.296),(-122.081,37.285),(-122.0786,37.248),(-122.078,37.24),(-122.077642,37.23496),(-122.076983,37.22567),(-122.076599,37.22026),(-122.076229,37.21505),(-122.0758,37.209)] | Oakland
I- 880 | [(-122.0978,37.528),(-122.096,37.496),(-122.0931,37.453),(-122.09277,37.4496),(-122.090189,37.41442),(-122.0896,37.405),(-122.085,37.34)] | Oakland
I- 880 | [(-122.1365,37.902),(-122.1358,37.898),(-122.1333,37.881),(-122.1323,37.874),(-122.1311,37.866),(-122.1308,37.865),(-122.1307,37.864),(-122.1289,37.851),(-122.1277,37.843),(-122.1264,37.834),(-122.1231,37.812),(-122.1165,37.766),(-122.1104,37.72),(-122.109695,37.71094),(-122.109,37.702),(-122.108312,37.69168),(-122.1076,37.681)] | Berkeley
- I- 880 | [(-122.1755,37.185),(-122.1747,37.178),(-122.1742,37.173),(-122.1692,37.126),(-122.167792,37.11594),(-122.16757,37.11435),(-122.1671,37.111),(-122.1655,37.1),(-122.165169,37.09811),(-122.1641,37.092),(-122.1596,37.061),(-122.158381,37.05275),(-122.155991,37.03657),(-122.1531,37.017),(-122.1478,37.98),(-122.1407,37.932),(-122.1394,37.924),(-122.1389,37.92),(-122.1376,37.91)] | Oakland
I- 880 | [(-122.1755,37.185),(-122.1747,37.178),(-122.1742,37.173),(-122.1692,37.126),(-122.167792,37.11594),(-122.16757,37.11435),(-122.1671,37.111),(-122.1655,37.1),(-122.165169,37.09811),(-122.1641,37.092),(-122.1596,37.061),(-122.158381,37.05275),(-122.155991,37.03657),(-122.1531,37.017),(-122.1478,37.98),(-122.1407,37.932),(-122.1394,37.924),(-122.1389,37.92),(-122.1376,37.91)] | Berkeley
+ I- 880 | [(-122.1755,37.185),(-122.1747,37.178),(-122.1742,37.173),(-122.1692,37.126),(-122.167792,37.11594),(-122.16757,37.11435),(-122.1671,37.111),(-122.1655,37.1),(-122.165169,37.09811),(-122.1641,37.092),(-122.1596,37.061),(-122.158381,37.05275),(-122.155991,37.03657),(-122.1531,37.017),(-122.1478,37.98),(-122.1407,37.932),(-122.1394,37.924),(-122.1389,37.92),(-122.1376,37.91)] | Oakland
I- 880 | [(-122.2214,37.711),(-122.2202,37.699),(-122.2199,37.695),(-122.219,37.682),(-122.2184,37.672),(-122.2173,37.652),(-122.2159,37.638),(-122.2144,37.616),(-122.2138,37.612),(-122.2135,37.609),(-122.212,37.592),(-122.2116,37.586),(-122.2111,37.581)] | Berkeley
I- 880 | [(-122.2707,37.975),(-122.2693,37.972),(-122.2681,37.966),(-122.267,37.962),(-122.2659,37.957),(-122.2648,37.952),(-122.2636,37.946),(-122.2625,37.935),(-122.2617,37.927),(-122.2607,37.921),(-122.2593,37.916),(-122.258,37.911),(-122.2536,37.898),(-122.2432,37.858),(-122.2408,37.845),(-122.2386,37.827),(-122.2374,37.811)] | Berkeley
I- 880 Ramp | [(-122.0019,37.301),(-122.002,37.293)] | Oakland
@@ -202,28 +215,28 @@ SELECT * FROM street;
Laguna Ave | [(-122.2099,37.989),(-122.2089,37)] | Berkeley
Laguna Ave | [(-122.2099,37.989),(-122.2089,37)] | Lafayette
Lakehurst Cir | [(-122.284729,37.89025),(-122.286096,37.90364)] | Berkeley
- Lakeshore Ave | [(-122.2586,37.99),(-122.2556,37.006)] | Berkeley
Lakeshore Ave | [(-122.2586,37.99),(-122.2556,37.006)] | Lafayette
+ Lakeshore Ave | [(-122.2586,37.99),(-122.2556,37.006)] | Berkeley
Las Positas Road | [(-121.764488,37.99199),(-121.75569,37.02022)] | Oakland
Las Positas Road | [(-121.764488,37.99199),(-121.75569,37.02022)] | Oakland
- Linden St | [(-122.2867,37.998),(-122.2864,37.008)] | Berkeley
Linden St | [(-122.2867,37.998),(-122.2864,37.008)] | Lafayette
+ Linden St | [(-122.2867,37.998),(-122.2864,37.008)] | Berkeley
Livermore Ave | [(-121.7687,37.448),(-121.769,37.375)] | Oakland
Livermore Ave | [(-121.7687,37.448),(-121.769,37.375)] | Oakland
Livermore Ave | [(-121.772719,37.99085),(-121.7728,37.001)] | Oakland
Livermore Ave | [(-121.772719,37.99085),(-121.7728,37.001)] | Oakland
- Locust St | [(-122.1606,37.007),(-122.1593,37.987)] | Oakland
Locust St | [(-122.1606,37.007),(-122.1593,37.987)] | Berkeley
+ Locust St | [(-122.1606,37.007),(-122.1593,37.987)] | Oakland
Logan Ct | [(-122.0053,37.492),(-122.0061,37.484)] | Oakland
Magnolia St | [(-122.0971,37.5),(-122.0962,37.484)] | Oakland
Mandalay Road | [(-122.2322,37.397),(-122.2321,37.403)] | Lafayette
Marin Ave | [(-122.2741,37.894),(-122.272,37.901)] | Berkeley
Martin Luther King Jr Way | [(-122.2712,37.608),(-122.2711,37.599)] | Berkeley
Mattos Dr | [(-122.0005,37.502),(-122.000898,37.49683)] | Oakland
- Maubert Ave | [(-122.1114,37.009),(-122.1096,37.995)] | Oakland
Maubert Ave | [(-122.1114,37.009),(-122.1096,37.995)] | Berkeley
- McClure Ave | [(-122.1431,37.001),(-122.1436,37.998)] | Oakland
+ Maubert Ave | [(-122.1114,37.009),(-122.1096,37.995)] | Oakland
McClure Ave | [(-122.1431,37.001),(-122.1436,37.998)] | Berkeley
+ McClure Ave | [(-122.1431,37.001),(-122.1436,37.998)] | Oakland
Medlar Dr | [(-122.0627,37.378),(-122.0625,37.375)] | Oakland
Mildred Ct | [(-122.0002,37.388),(-121.9998,37.386)] | Oakland
Miller Road | [(-122.0902,37.645),(-122.0865,37.545)] | Berkeley
@@ -242,8 +255,8 @@ SELECT * FROM street;
Parkridge Dr | [(-122.1438,37.884),(-122.1428,37.9)] | Berkeley
Parkside Dr | [(-122.0475,37.603),(-122.0443,37.596)] | Berkeley
Paseo Padre Pkwy | [(-121.9143,37.005),(-121.913522,37)] | Oakland
- Paseo Padre Pkwy | [(-122.0021,37.639),(-121.996,37.628)] | Oakland
Paseo Padre Pkwy | [(-122.0021,37.639),(-121.996,37.628)] | Berkeley
+ Paseo Padre Pkwy | [(-122.0021,37.639),(-121.996,37.628)] | Oakland
Pearl St | [(-122.2383,37.594),(-122.2366,37.615)] | Berkeley
Periwinkle Road | [(-122.0451,37.301),(-122.044758,37.29844)] | Oakland
Pimlico Dr | [(-121.8616,37.998),(-121.8618,37.008)] | Oakland
@@ -254,11 +267,11 @@ SELECT * FROM street;
Railroad Ave | [(-122.0245,37.013),(-122.0234,37.003),(-122.0223,37.993)] | Oakland
Railroad Ave | [(-122.0245,37.013),(-122.0234,37.003),(-122.0223,37.993)] | Berkeley
Ranspot Dr | [(-122.0972,37.999),(-122.0959,37)] | Oakland
- Ranspot Dr | [(-122.0972,37.999),(-122.0959,37)] | Oakland
Ranspot Dr | [(-122.0972,37.999),(-122.0959,37)] | Berkeley
+ Ranspot Dr | [(-122.0972,37.999),(-122.0959,37)] | Oakland
Redding St | [(-122.1978,37.901),(-122.1975,37.895)] | Berkeley
- Redwood Road | [(-122.1493,37.98),(-122.1437,37.001)] | Oakland
Redwood Road | [(-122.1493,37.98),(-122.1437,37.001)] | Berkeley
+ Redwood Road | [(-122.1493,37.98),(-122.1437,37.001)] | Oakland
Roca Dr | [(-122.0335,37.609),(-122.0314,37.599)] | Berkeley
Rosedale Ct | [(-121.9232,37.9),(-121.924,37.897)] | Oakland
Sacramento St | [(-122.2799,37.606),(-122.2797,37.597)] | Berkeley
@@ -266,8 +279,8 @@ SELECT * FROM street;
Saginaw Ct | [(-121.8803,37.898),(-121.8806,37.901)] | Oakland
San Andreas Dr | [(-122.0609,37.9),(-122.0614,37.895)] | Berkeley
Santa Maria Ave | [(-122.0773,37),(-122.0773,37.98)] | Oakland
- Santa Maria Ave | [(-122.0773,37),(-122.0773,37.98)] | Oakland
Santa Maria Ave | [(-122.0773,37),(-122.0773,37.98)] | Berkeley
+ Santa Maria Ave | [(-122.0773,37),(-122.0773,37.98)] | Oakland
Shattuck Ave | [(-122.2686,37.904),(-122.2686,37.897)] | Berkeley
Sheridan Road | [(-122.2279,37.425),(-122.2253,37.411),(-122.2223,37.377)] | Lafayette
Shoreline Dr | [(-122.2657,37.603),(-122.2648,37.6)] | Berkeley
@@ -317,27 +330,14 @@ SELECT * FROM street;
Welch Creek Road | [(-121.7695,37.386),(-121.7737,37.413)] | Oakland
West Loop Road | [(-122.0576,37.604),(-122.0602,37.586)] | Berkeley
Western Pacific Railroad Spur | [(-122.0394,37.018),(-122.0394,37.961)] | Oakland
- Western Pacific Railroad Spur | [(-122.0394,37.018),(-122.0394,37.961)] | Oakland
Western Pacific Railroad Spur | [(-122.0394,37.018),(-122.0394,37.961)] | Berkeley
+ Western Pacific Railroad Spur | [(-122.0394,37.018),(-122.0394,37.961)] | Oakland
Whitlock Creek | [(-121.74683,37.91276),(-121.733107,37)] | Oakland
Whitlock Creek | [(-121.74683,37.91276),(-121.733107,37)] | Oakland
Willimet Way | [(-122.0964,37.517),(-122.0949,37.493)] | Oakland
Wisconsin St | [(-122.1994,37.017),(-122.1975,37.998),(-122.1971,37.994)] | Oakland
Wisconsin St | [(-122.1994,37.017),(-122.1975,37.998),(-122.1971,37.994)] | Berkeley
Wp Railroad | [(-122.254,37.902),(-122.2506,37.891)] | Berkeley
- 100th Ave | [(-122.1657,37.429),(-122.1647,37.432)] | Oakland
- 107th Ave | [(-122.1555,37.403),(-122.1531,37.41)] | Oakland
- 14th St | [(-122.299,37.147),(-122.3,37.148)] | Lafayette
- 19th Ave | [(-122.2366,37.897),(-122.2359,37.905)] | Berkeley
- 1st St | [(-121.75508,37.89294),(-121.753581,37.90031)] | Oakland
- 5th St | [(-122.278,37),(-122.2792,37.005),(-122.2803,37.009)] | Lafayette
- 5th St | [(-122.296,37.615),(-122.2953,37.598)] | Berkeley
- 82nd Ave | [(-122.1695,37.596),(-122.1681,37.603)] | Berkeley
- 85th Ave | [(-122.1877,37.466),(-122.186,37.476)] | Oakland
- 89th Ave | [(-122.1822,37.459),(-122.1803,37.471)] | Oakland
- 98th Ave | [(-122.1568,37.498),(-122.1558,37.502)] | Oakland
- 98th Ave | [(-122.1693,37.438),(-122.1682,37.444)] | Oakland
- 98th Ave | [(-122.2001,37.258),(-122.1974,37.27)] | Lafayette
(333 rows)
SELECT name, #thepath FROM iexit ORDER BY name COLLATE "C", 2;
diff --git a/src/test/regress/expected/txid.out b/src/test/regress/expected/txid.out
index 95ba66e95e..2ea4434f51 100644
--- a/src/test/regress/expected/txid.out
+++ b/src/test/regress/expected/txid.out
@@ -238,9 +238,11 @@ SELECT txid_snapshot '1:9223372036854775807:3';
(1 row)
SELECT txid_snapshot '1:9223372036854775808:3';
-ERROR: invalid input syntax for type pg_snapshot: "1:9223372036854775808:3"
-LINE 1: SELECT txid_snapshot '1:9223372036854775808:3';
- ^
+ txid_snapshot
+-------------------------
+ 1:9223372036854775808:3
+(1 row)
+
-- test txid_current_if_assigned
BEGIN;
SELECT txid_current_if_assigned() IS NULL;
diff --git a/src/test/regress/expected/type_sanity.out b/src/test/regress/expected/type_sanity.out
index d3ac08c9ee..952019b2e2 100644
--- a/src/test/regress/expected/type_sanity.out
+++ b/src/test/regress/expected/type_sanity.out
@@ -19,7 +19,7 @@ WHERE t1.typnamespace = 0 OR
(t1.typlen <= 0 AND t1.typlen != -1 AND t1.typlen != -2) OR
(t1.typtype not in ('b', 'c', 'd', 'e', 'm', 'p', 'r')) OR
NOT t1.typisdefined OR
- (t1.typalign not in ('c', 's', 'i', 'd')) OR
+ (t1.typalign not in ('c', 's', 'i', 'd', 'x')) OR
(t1.typstorage not in ('p', 'x', 'e', 'm'));
oid | typname
-----+---------
@@ -32,7 +32,8 @@ WHERE t1.typbyval AND
(t1.typlen != 1 OR t1.typalign != 'c') AND
(t1.typlen != 2 OR t1.typalign != 's') AND
(t1.typlen != 4 OR t1.typalign != 'i') AND
- (t1.typlen != 8 OR t1.typalign != 'd');
+ (t1.typlen != 8 OR t1.typalign != 'd') AND
+ (t1.typlen != 8 OR t1.typalign != 'x');
oid | typname
-----+---------
(0 rows)
diff --git a/src/test/regress/expected/xid.out b/src/test/regress/expected/xid.out
index d8e76f3321..3252bdc28d 100644
--- a/src/test/regress/expected/xid.out
+++ b/src/test/regress/expected/xid.out
@@ -8,9 +8,9 @@ select '010'::xid,
'42'::xid8,
'0xffffffffffffffff'::xid8,
'-1'::xid8;
- xid | xid | xid | xid | xid8 | xid8 | xid8 | xid8
------+-----+------------+------------+------+------+----------------------+----------------------
- 8 | 42 | 4294967295 | 4294967295 | 8 | 42 | 18446744073709551615 | 18446744073709551615
+ xid | xid | xid | xid | xid8 | xid8 | xid8 | xid8
+-----+-----+------------+----------------------+------+------+----------------------+----------------------
+ 8 | 42 | 4294967295 | 18446744073709551615 | 8 | 42 | 18446744073709551615 | 18446744073709551615
(1 row)
-- garbage values are not yet rejected (perhaps they should be)
@@ -381,9 +381,11 @@ SELECT pg_snapshot '1:9223372036854775807:3';
(1 row)
SELECT pg_snapshot '1:9223372036854775808:3';
-ERROR: invalid input syntax for type pg_snapshot: "1:9223372036854775808:3"
-LINE 1: SELECT pg_snapshot '1:9223372036854775808:3';
- ^
+ pg_snapshot
+-------------------------
+ 1:9223372036854775808:3
+(1 row)
+
-- test pg_current_xact_id_if_assigned
BEGIN;
SELECT pg_current_xact_id_if_assigned() IS NULL;
diff --git a/src/test/regress/expected/xid64.out b/src/test/regress/expected/xid64.out
new file mode 100644
index 0000000000..c30c5b5739
--- /dev/null
+++ b/src/test/regress/expected/xid64.out
@@ -0,0 +1,92 @@
+---
+--- Unit test for xid64 functions
+---
+-- directory paths and dlsuffix are passed to us in environment variables
+\getenv libdir PG_LIBDIR
+\getenv dlsuffix PG_DLSUFFIX
+\set regresslib :libdir '/regress' :dlsuffix
+CREATE FUNCTION xid64_test_1(rel regclass) RETURNS VOID
+ AS :'regresslib', 'xid64_test_1' LANGUAGE C STRICT;
+CREATE FUNCTION xid64_test_2(rel regclass) RETURNS VOID
+ AS :'regresslib', 'xid64_test_2' LANGUAGE C STRICT;
+CREATE FUNCTION xid64_test_double_xmax(rel regclass) RETURNS VOID
+ AS :'regresslib', 'xid64_test_double_xmax' LANGUAGE C STRICT;
+---
+--- Check page consistency after conversion
+---
+CREATE UNLOGGED TABLE test_xid64_table(a int);
+ALTER TABLE test_xid64_table SET (autovacuum_enabled = false);
+INSERT INTO test_xid64_table(a) SELECT a FROM generate_series(1, 1000) AS a;
+SELECT xid64_test_1('test_xid64_table');
+INFO: test 1: page is converted to xid64 format
+ xid64_test_1
+--------------
+
+(1 row)
+
+DROP TABLE test_xid64_table;
+---
+--- Check tuples consistency after conversion
+---
+CREATE UNLOGGED TABLE test_xid64_table(s serial, i int, t text);
+ALTER TABLE test_xid64_table SET (autovacuum_enabled = false);
+DO $$
+BEGIN
+ FOR j IN 1..20 LOOP
+ INSERT INTO test_xid64_table(i, t) VALUES (random()::int, md5(random()::text));
+ COMMIT;
+ END LOOP;
+END $$;
+DO $$
+BEGIN
+ FOR j IN 1..10 LOOP
+ DELETE FROM test_xid64_table WHERE ctid IN (SELECT ctid FROM test_xid64_table TABLESAMPLE BERNOULLI (5));
+ COMMIT;
+ END LOOP;
+END $$;
+SELECT xid64_test_2('test_xid64_table');
+ xid64_test_2
+--------------
+
+(1 row)
+
+DROP TABLE test_xid64_table;
+---
+--- Check tuples consistency after conversion to double xmax (on full page)
+---
+CREATE UNLOGGED TABLE test_xid64_table(i int);
+DO $$
+BEGIN
+ FOR j IN 1..40 LOOP
+ INSERT INTO test_xid64_table SELECT i FROM generate_series(1, 100) AS i;
+ COMMIT;
+ END LOOP;
+END $$;
+SELECT xid64_test_2('test_xid64_table');
+ xid64_test_2
+--------------
+
+(1 row)
+
+DROP TABLE test_xid64_table;
+CREATE UNLOGGED TABLE test_xid64_table(i text);
+INSERT INTO test_xid64_table(i) VALUES ('NNBABCDSDFGHJKLP');
+DO $$
+BEGIN
+ FOR j IN 1..40 LOOP
+ INSERT INTO test_xid64_table(i) SELECT 'A' FROM generate_series(1, 100) AS i;
+ COMMIT;
+ END LOOP;
+END $$;
+SELECT xid64_test_double_xmax('test_xid64_table');
+INFO: test double xmax: page 0 is converted into double xmax format
+INFO: test double xmax: end
+ xid64_test_double_xmax
+------------------------
+
+(1 row)
+
+DROP TABLE test_xid64_table;
+DROP FUNCTION xid64_test_1(rel regclass);
+DROP FUNCTION xid64_test_2(rel regclass);
+DROP FUNCTION xid64_test_double_xmax(rel regclass);
diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule
index 9f644a0c1b..2344cb38b3 100644
--- a/src/test/regress/parallel_schedule
+++ b/src/test/regress/parallel_schedule
@@ -33,7 +33,7 @@ test: strings numerology point lseg line box path polygon circle date time timet
# geometry depends on point, lseg, line, box, path, polygon, circle
# horology depends on date, time, timetz, timestamp, timestamptz, interval
# ----------
-test: geometry horology tstypes regex type_sanity opr_sanity misc_sanity comments expressions unicode xid mvcc
+test: geometry horology tstypes regex type_sanity opr_sanity misc_sanity comments expressions unicode xid xid64 mvcc
# ----------
# Load huge amounts of data
diff --git a/src/test/regress/pg_regress.c b/src/test/regress/pg_regress.c
index dda076847a..b6ede1f800 100644
--- a/src/test/regress/pg_regress.c
+++ b/src/test/regress/pg_regress.c
@@ -2252,7 +2252,7 @@ regression_main(int argc, char *argv[],
/* initdb */
header(_("initializing database system"));
snprintf(buf, sizeof(buf),
- "\"%s%sinitdb\" -D \"%s/data\" --no-clean --no-sync%s%s > \"%s/log/initdb.log\" 2>&1",
+ "\"%s%sinitdb\" -D \"%s/data\" -x 1249835483136 -m 2422361554944 -o 3594887626752 --no-clean --no-sync%s%s > \"%s/log/initdb.log\" 2>&1",
bindir ? bindir : "",
bindir ? "/" : "",
temp_instance,
diff --git a/src/test/regress/regress.c b/src/test/regress/regress.c
index 548afb4438..fd6dd77ce5 100644
--- a/src/test/regress/regress.c
+++ b/src/test/regress/regress.c
@@ -23,6 +23,7 @@
#include "access/htup_details.h"
#include "access/transam.h"
#include "access/xact.h"
+#include "catalog/catalog.h"
#include "catalog/namespace.h"
#include "catalog/pg_operator.h"
#include "catalog/pg_type.h"
@@ -1257,3 +1258,293 @@ get_columns_length(PG_FUNCTION_ARGS)
PG_RETURN_INT32(column_offset);
}
+
+#include "access/hio.h"
+#include "access/relation.h"
+#include "storage/bufmgr.h"
+#include "utils/rel.h"
+
+static void
+CheckNewPage(char *msg, Page page)
+{
+ uint16 size;
+
+ if (PageGetPageLayoutVersion(page) != PG_PAGE_LAYOUT_VERSION)
+ elog(ERROR, "%s: page version is %d, expected %d ",
+ msg, PageGetPageLayoutVersion(page), PG_PAGE_LAYOUT_VERSION);
+
+ size = PageGetSpecialSize(page);
+ if (size == MAXALIGN(sizeof(HeapPageSpecialData)))
+ elog(INFO, "%s: page is converted to xid64 format", msg);
+ else if (HeapPageIsDoubleXmax(page))
+ elog(INFO, "%s: page is converted into double xmax format", msg);
+ else
+ elog(ERROR, "%s: converted page has pageSpecial size %u, expected %llu",
+ msg, size,
+ (unsigned long long) MAXALIGN(sizeof(HeapPageSpecialData)));
+}
+
+/*
+ * Get page from relation.
+ * Make this page look like in 32-bit xid format.
+ * Convert it to 64-bit xid format.
+ * Run basic checks.
+ */
+PG_FUNCTION_INFO_V1(xid64_test_1);
+Datum
+xid64_test_1(PG_FUNCTION_ARGS)
+{
+ Oid relid;
+ Relation rel;
+ Buffer buf;
+ Page page;
+ PageHeader hdr;
+
+ relid = PG_GETARG_OID(0);
+ rel = relation_open(relid, AccessExclusiveLock);
+ buf = ReadBuffer(rel, 0);
+ LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
+
+ page = BufferGetPage(buf);
+ hdr = (PageHeader) page;
+
+ if (PageGetSpecialSize(page) != MAXALIGN(sizeof(HeapPageSpecialData)))
+ elog(ERROR, "page expected in new format");
+
+ if (PageGetPageLayoutVersion(page) != PG_PAGE_LAYOUT_VERSION)
+ elog(ERROR, "unknown page version (%u)",
+ PageGetPageLayoutVersion(page));
+
+ hdr->pd_special = BLCKSZ;
+ PageSetPageSizeAndVersion(page, BLCKSZ, PG_PAGE_LAYOUT_VERSION - 1);
+
+ convert_page(rel, page, buf, 0);
+ CheckNewPage("test 1", page);
+
+ UnlockReleaseBuffer(buf);
+ relation_close(rel, AccessExclusiveLock);
+
+ PG_RETURN_VOID();
+}
+
+typedef struct TupleCheckValues
+{
+ TransactionId xmin;
+ TransactionId xmax;
+} TupleCheckValues;
+
+typedef struct RelCheckValues
+{
+ TupleCheckValues *tcv;
+ Size ntuples;
+} RelCheckValues;
+
+static RelCheckValues
+FillRelCheckValues(Relation rel, Buffer buffer, Page page)
+{
+ RelCheckValues set;
+ Size n;
+
+#define DEFAULT_SET_SIZE 64
+ n = DEFAULT_SET_SIZE;
+ set.ntuples = 0;
+ set.tcv = palloc(sizeof(set.tcv[0]) * n);
+
+ {
+ OffsetNumber maxoff,
+ offnum;
+ HeapTupleHeader tuphdr;
+ ItemId itemid;
+ HeapTupleData tuple;
+ TransactionId xmin,
+ xmax;
+
+ maxoff = PageGetMaxOffsetNumber(page);
+
+ for (offnum = FirstOffsetNumber;
+ offnum <= maxoff;
+ offnum = OffsetNumberNext(offnum))
+ {
+ itemid = PageGetItemId(page, offnum);
+ tuphdr = (HeapTupleHeader) PageGetItem(page, itemid);
+ tuple.t_data = tuphdr;
+ tuple.t_len = ItemIdGetLength(itemid);
+ tuple.t_tableOid = RelationGetRelid(rel);
+
+ if (HeapPageGetSpecial(page) == heapDoubleXmaxSpecial)
+ {
+ xmin = tuphdr->t_choice.t_heap.t_xmin;
+ xmax = tuphdr->t_choice.t_heap.t_xmax;
+ }
+ else
+ {
+ HeapTupleCopyBaseFromPage(buffer, &tuple, page,
+ IsToastRelation(rel));
+
+ xmin = HeapTupleGetRawXmin(&tuple);
+ xmax = HeapTupleGetRawXmax(&tuple);
+ }
+
+ if (set.ntuples == n)
+ {
+ n *= 2;
+ set.tcv = repalloc(set.tcv, sizeof(set.tcv[0]) * n);
+ }
+
+ set.tcv[set.ntuples].xmin = xmin;
+ set.tcv[set.ntuples].xmax = xmax;
+ set.ntuples++;
+ }
+ }
+
+ return set;
+}
+
+/*
+ * Test xmin/xmax invariant when converting page from 32bit xid to 64xid.
+ *
+ * Scenario:
+ * - enforce all relation pages to 32bit xid format, discarding pd_xid_base and
+ * pd_multi_base
+ * - store all xmin/xmax in array
+ * - convert all the pages from relation into 64xid format
+ * - store all new xmin/xmax in array
+ * - compare old and new xmin/xmax
+ *
+ * NOTE: inital xid value does not affect test as pd_xid_base/pd_multi_base
+ * discarded.
+ */
+PG_FUNCTION_INFO_V1(xid64_test_2);
+Datum
+xid64_test_2(PG_FUNCTION_ARGS)
+{
+ Oid relid;
+ Relation rel;
+ RelCheckValues before,
+ after;
+ BlockNumber pageno,
+ npages;
+ Size i;
+
+ relid = PG_GETARG_OID(0);
+ rel = relation_open(relid, AccessExclusiveLock);
+ npages = RelationGetNumberOfBlocks(rel);
+
+ for (pageno = 0; pageno != npages; ++pageno)
+ {
+ Buffer buf;
+ Page page;
+ PageHeader hdr;
+
+ /* get page */
+ buf = ReadBuffer(rel, pageno);
+ LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
+ page = BufferGetPage(buf);
+ hdr = (PageHeader) page;
+
+ /* make page look like 32-bit xid page */
+ hdr->pd_special = BLCKSZ;
+ PageSetPageSizeAndVersion(page, BLCKSZ, PG_PAGE_LAYOUT_VERSION - 1);
+
+ before = FillRelCheckValues(rel, buf, page);
+ convert_page(rel, page, buf, pageno);
+ after = FillRelCheckValues(rel, buf, page);
+
+ /* check */
+ if (before.ntuples != after.ntuples)
+ elog(ERROR, "numer of tuples must be equal");
+
+ for (i = 0; i != before.ntuples; ++i)
+ {
+ if (before.tcv[i].xmin != after.tcv[i].xmin && after.tcv[i].xmin)
+ elog(ERROR, "old and new xmin does not match (%llu != %llu)",
+ (unsigned long long) before.tcv[i].xmin,
+ (unsigned long long) after.tcv[i].xmin);
+
+ if (before.tcv[i].xmax != after.tcv[i].xmax)
+ elog(ERROR, "old and new xmax does not match (%llu != %llu)",
+ (unsigned long long) before.tcv[i].xmax,
+ (unsigned long long) after.tcv[i].xmax);
+ }
+
+ Assert(npages != 0);
+ pfree(before.tcv);
+ pfree(after.tcv);
+
+ UnlockReleaseBuffer(buf);
+ }
+
+ relation_close(rel, AccessExclusiveLock);
+
+ PG_RETURN_VOID();
+}
+
+PG_FUNCTION_INFO_V1(xid64_test_double_xmax);
+Datum
+xid64_test_double_xmax(PG_FUNCTION_ARGS)
+{
+ Oid relid;
+ Relation rel;
+ BlockNumber pageno,
+ npages;
+ bool found;
+
+ relid = PG_GETARG_OID(0);
+ rel = relation_open(relid, AccessExclusiveLock);
+ npages = RelationGetNumberOfBlocks(rel);
+ found = false;
+
+ for (pageno = 0; pageno != npages; ++pageno)
+ {
+ Buffer buf;
+ Page page;
+ PageHeader hdr;
+ ItemId itemid;
+ OffsetNumber offnum;
+ HeapTupleHeader tuphdr;
+
+ buf = ReadBuffer(rel, pageno);
+ LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
+ page = BufferGetPage(buf);
+ hdr = (PageHeader) page;
+
+ if (pageno == 0)
+ {
+ itemid = PageGetItemId(page, FirstOffsetNumber);
+ itemid->lp_len += 16; /* Move to overlap special */
+ }
+
+ for (offnum = FirstOffsetNumber;
+ offnum <= PageGetMaxOffsetNumber(page);
+ offnum = OffsetNumberNext(offnum))
+ {
+ itemid = PageGetItemId(page, offnum);
+ tuphdr = (HeapTupleHeader) PageGetItem(page, itemid);
+ tuphdr->t_infomask |= HEAP_XMIN_COMMITTED;
+ }
+
+ hdr->pd_special = BLCKSZ;
+ PageSetPageSizeAndVersion(page, BLCKSZ, PG_PAGE_LAYOUT_VERSION - 1);
+
+ convert_page(rel, page, buf, pageno);
+
+ if (HeapPageIsDoubleXmax(page))
+ {
+ found = true;
+ elog(INFO, "test double xmax: page %u is converted into double xmax format",
+ pageno);
+ }
+
+ UnlockReleaseBuffer(buf);
+ }
+
+ if (!found)
+ elog(ERROR, "test double xmax: failed, no double xmax");
+
+ Assert(npages != 0);
+ elog(INFO, "test double xmax: end");
+
+ relation_close(rel, AccessExclusiveLock);
+
+ PG_RETURN_VOID();
+}
diff --git a/src/test/regress/sql/indirect_toast.sql b/src/test/regress/sql/indirect_toast.sql
index 3e2f6c0237..ea087b5128 100644
--- a/src/test/regress/sql/indirect_toast.sql
+++ b/src/test/regress/sql/indirect_toast.sql
@@ -76,7 +76,18 @@ SELECT substring(indtoasttest::text, 1, 200) FROM indtoasttest;
VACUUM FREEZE indtoasttest;
SELECT substring(indtoasttest::text, 1, 200) FROM indtoasttest;
+create or replace function random_string(len integer) returns text as $$
+select substr((select string_agg(r,'') from (select random()::text as r from generate_series(1,(len+15)/16)) s1), 1, len);
+$$ language sql;
+
+create table toasttest_main(t text);
+alter table toasttest_main alter column t set storage main;
+
+insert into toasttest_main (select random_string(len) from generate_series(7000,8000) len);
+
DROP TABLE indtoasttest;
+DROP TABLE toasttest_main;
DROP FUNCTION update_using_indirect();
+DROP FUNCTION random_string(integer);
RESET default_toast_compression;
diff --git a/src/test/regress/sql/insert.sql b/src/test/regress/sql/insert.sql
index bdcffd0314..7ada0801eb 100644
--- a/src/test/regress/sql/insert.sql
+++ b/src/test/regress/sql/insert.sql
@@ -55,7 +55,7 @@ INSERT INTO large_tuple_test (select 3, NULL);
-- now this tuple won't fit on the second page, but the insert should
-- still succeed by extending the relation
-INSERT INTO large_tuple_test (select 4, repeat('a', 8126));
+INSERT INTO large_tuple_test (select 4, repeat('a', 8112));
DROP TABLE large_tuple_test;
@@ -597,3 +597,18 @@ alter table returningwrtest2 drop c;
alter table returningwrtest attach partition returningwrtest2 for values in (2);
insert into returningwrtest values (2, 'foo') returning returningwrtest;
drop table returningwrtest;
+
+-- Check for MaxHeapTupleSize
+create table maxheaptuplesize_test(value text);
+alter table maxheaptuplesize_test alter column value set storage external;
+insert into maxheaptuplesize_test values (repeat('x', 8104));
+insert into maxheaptuplesize_test values (repeat('x', 8112));
+insert into maxheaptuplesize_test values (repeat('x', 8120));
+insert into maxheaptuplesize_test values (repeat('x', 8128));
+insert into maxheaptuplesize_test values (repeat('x', 8136));
+insert into maxheaptuplesize_test values (repeat('x', 8144));
+insert into maxheaptuplesize_test values (repeat('x', 8152));
+insert into maxheaptuplesize_test values (repeat('x', 8160));
+insert into maxheaptuplesize_test values (repeat('x', 8168));
+insert into maxheaptuplesize_test values (repeat('x', 8176));
+drop table maxheaptuplesize_test;
diff --git a/src/test/regress/sql/select_views.sql b/src/test/regress/sql/select_views.sql
index e742f13699..70e663e350 100644
--- a/src/test/regress/sql/select_views.sql
+++ b/src/test/regress/sql/select_views.sql
@@ -3,7 +3,7 @@
-- test the views defined in CREATE_VIEWS
--
-SELECT * FROM street;
+SELECT * FROM street ORDER BY name COLLATE "C", thepath::text COLLATE "C";
SELECT name, #thepath FROM iexit ORDER BY name COLLATE "C", 2;
diff --git a/src/test/regress/sql/type_sanity.sql b/src/test/regress/sql/type_sanity.sql
index 5edc1f1f6e..2b6c4aff6c 100644
--- a/src/test/regress/sql/type_sanity.sql
+++ b/src/test/regress/sql/type_sanity.sql
@@ -22,7 +22,7 @@ WHERE t1.typnamespace = 0 OR
(t1.typlen <= 0 AND t1.typlen != -1 AND t1.typlen != -2) OR
(t1.typtype not in ('b', 'c', 'd', 'e', 'm', 'p', 'r')) OR
NOT t1.typisdefined OR
- (t1.typalign not in ('c', 's', 'i', 'd')) OR
+ (t1.typalign not in ('c', 's', 'i', 'd', 'x')) OR
(t1.typstorage not in ('p', 'x', 'e', 'm'));
-- Look for "pass by value" types that can't be passed by value.
@@ -33,7 +33,8 @@ WHERE t1.typbyval AND
(t1.typlen != 1 OR t1.typalign != 'c') AND
(t1.typlen != 2 OR t1.typalign != 's') AND
(t1.typlen != 4 OR t1.typalign != 'i') AND
- (t1.typlen != 8 OR t1.typalign != 'd');
+ (t1.typlen != 8 OR t1.typalign != 'd') AND
+ (t1.typlen != 8 OR t1.typalign != 'x');
-- Look for "toastable" types that aren't varlena.
diff --git a/src/test/regress/sql/xid64.sql b/src/test/regress/sql/xid64.sql
new file mode 100644
index 0000000000..caa97a0ed9
--- /dev/null
+++ b/src/test/regress/sql/xid64.sql
@@ -0,0 +1,84 @@
+---
+--- Unit test for xid64 functions
+---
+
+-- directory paths and dlsuffix are passed to us in environment variables
+\getenv libdir PG_LIBDIR
+\getenv dlsuffix PG_DLSUFFIX
+
+\set regresslib :libdir '/regress' :dlsuffix
+
+CREATE FUNCTION xid64_test_1(rel regclass) RETURNS VOID
+ AS :'regresslib', 'xid64_test_1' LANGUAGE C STRICT;
+CREATE FUNCTION xid64_test_2(rel regclass) RETURNS VOID
+ AS :'regresslib', 'xid64_test_2' LANGUAGE C STRICT;
+CREATE FUNCTION xid64_test_double_xmax(rel regclass) RETURNS VOID
+ AS :'regresslib', 'xid64_test_double_xmax' LANGUAGE C STRICT;
+
+---
+--- Check page consistency after conversion
+---
+CREATE UNLOGGED TABLE test_xid64_table(a int);
+ALTER TABLE test_xid64_table SET (autovacuum_enabled = false);
+INSERT INTO test_xid64_table(a) SELECT a FROM generate_series(1, 1000) AS a;
+SELECT xid64_test_1('test_xid64_table');
+DROP TABLE test_xid64_table;
+
+---
+--- Check tuples consistency after conversion
+---
+CREATE UNLOGGED TABLE test_xid64_table(s serial, i int, t text);
+ALTER TABLE test_xid64_table SET (autovacuum_enabled = false);
+
+DO $$
+BEGIN
+ FOR j IN 1..20 LOOP
+ INSERT INTO test_xid64_table(i, t) VALUES (random()::int, md5(random()::text));
+ COMMIT;
+ END LOOP;
+END $$;
+
+DO $$
+BEGIN
+ FOR j IN 1..10 LOOP
+ DELETE FROM test_xid64_table WHERE ctid IN (SELECT ctid FROM test_xid64_table TABLESAMPLE BERNOULLI (5));
+ COMMIT;
+ END LOOP;
+END $$;
+
+SELECT xid64_test_2('test_xid64_table');
+DROP TABLE test_xid64_table;
+
+---
+--- Check tuples consistency after conversion to double xmax (on full page)
+---
+CREATE UNLOGGED TABLE test_xid64_table(i int);
+
+DO $$
+BEGIN
+ FOR j IN 1..40 LOOP
+ INSERT INTO test_xid64_table SELECT i FROM generate_series(1, 100) AS i;
+ COMMIT;
+ END LOOP;
+END $$;
+
+SELECT xid64_test_2('test_xid64_table');
+DROP TABLE test_xid64_table;
+
+CREATE UNLOGGED TABLE test_xid64_table(i text);
+INSERT INTO test_xid64_table(i) VALUES ('NNBABCDSDFGHJKLP');
+
+DO $$
+BEGIN
+ FOR j IN 1..40 LOOP
+ INSERT INTO test_xid64_table(i) SELECT 'A' FROM generate_series(1, 100) AS i;
+ COMMIT;
+ END LOOP;
+END $$;
+
+SELECT xid64_test_double_xmax('test_xid64_table');
+DROP TABLE test_xid64_table;
+
+DROP FUNCTION xid64_test_1(rel regclass);
+DROP FUNCTION xid64_test_2(rel regclass);
+DROP FUNCTION xid64_test_double_xmax(rel regclass);
diff --git a/src/test/xid-64/Makefile b/src/test/xid-64/Makefile
new file mode 100644
index 0000000000..3b1e50dfc0
--- /dev/null
+++ b/src/test/xid-64/Makefile
@@ -0,0 +1,22 @@
+#-------------------------------------------------------------------------
+#
+# Makefile for src/test/xid-64
+#
+# Copyright (c) 2018, Postgres Professional
+#
+# src/test/xid-64/Makefile
+#
+#-------------------------------------------------------------------------
+
+subdir = src/test/xid-64
+top_builddir = ../../..
+include $(top_builddir)/src/Makefile.global
+
+check:
+ $(prove_check)
+
+installcheck:
+ $(prove_installcheck)
+
+clean distclean maintainer-clean:
+ rm -rf tmp_check
diff --git a/src/test/xid-64/README b/src/test/xid-64/README
new file mode 100644
index 0000000000..01c0a1a1f7
--- /dev/null
+++ b/src/test/xid-64/README
@@ -0,0 +1,16 @@
+src/test/xid-64/README
+
+Regression tests for 64-bit XIDs
+=============================================
+
+This directory contains a test suite for 64-bit xids.
+
+Running the tests
+=================
+
+ make check
+
+NOTE: This creates a temporary installation, and some tests may
+create one or multiple nodes.
+
+NOTE: This requires the --enable-tap-tests argument to configure.
diff --git a/src/test/xid-64/meson.build b/src/test/xid-64/meson.build
new file mode 100644
index 0000000000..f55ebdf41c
--- /dev/null
+++ b/src/test/xid-64/meson.build
@@ -0,0 +1,15 @@
+tests += {
+ 'name': 'xid-64',
+ 'sd': meson.current_source_dir(),
+ 'bd': meson.current_build_dir(),
+ 'tap': {
+ 'tests': [
+ 't/001_test_large_xids.pl',
+ 't/002_test_gucs.pl',
+ 't/003_test_integrity.pl',
+ 't/004_test_relminmxid.pl',
+ 't/005_stream_subxact.pl',
+ 't/006_zeropage.pl',
+ ],
+ },
+}
diff --git a/src/test/xid-64/t/001_test_large_xids.pl b/src/test/xid-64/t/001_test_large_xids.pl
new file mode 100644
index 0000000000..4c7dbc6cb1
--- /dev/null
+++ b/src/test/xid-64/t/001_test_large_xids.pl
@@ -0,0 +1,54 @@
+# Tests for large xid values
+use strict;
+use warnings;
+use PostgreSQL::Test::Cluster;
+use PostgreSQL::Test::Utils;
+use Test::More;
+use bigint;
+
+sub command_output
+{
+ my ($cmd) = @_;
+ my ($stdout, $stderr);
+ print("# Running: " . join(" ", @{$cmd}) . "\n");
+ my $result = IPC::Run::run $cmd, '>', \$stdout, '2>', \$stderr;
+ ok($result, "@$cmd exit code 0");
+ is($stderr, '', "@$cmd no stderr");
+ return $stdout;
+}
+
+my $START_VAL = 2**32;
+my $MAX_VAL = 2**62;
+
+my $ixid = $START_VAL + int(rand($MAX_VAL - $START_VAL));
+my $imxid = $START_VAL + int(rand($MAX_VAL - $START_VAL));
+my $imoff = $START_VAL + int(rand($MAX_VAL - $START_VAL));
+
+# Initialize master node with the random xid-related parameters
+my $node = PostgreSQL::Test::Cluster->new('master');
+$node->init(extra => [ "--xid=$ixid", "--multixact-id=$imxid", "--multixact-offset=$imoff" ]);
+$node->start;
+
+# Initialize master node and check the xid-related parameters
+my $pgcd_output = command_output(
+ [ 'pg_controldata', '-D', $node->data_dir ] );
+print($pgcd_output); print('\n');
+ok($pgcd_output =~ qr/Latest checkpoint's NextXID:\s*(\d+)/, "XID found");
+my ($nextxid) = ($1);
+ok($nextxid >= $ixid && $nextxid < $ixid + 1000,
+ "Latest checkpoint's NextXID ($nextxid) is close to the initial xid ($ixid).");
+ok($pgcd_output =~ qr/Latest checkpoint's NextMultiXactId:\s*(\d+)/, "MultiXactId found");
+my ($nextmxid) = ($1);
+ok($nextmxid >= $imxid && $nextmxid < $imxid + 1000,
+ "Latest checkpoint's NextMultiXactId ($nextmxid) is close to the initial multiXactId ($imxid).");
+ok($pgcd_output =~ qr/Latest checkpoint's NextMultiOffset:\s*(\d+)/, "MultiOffset found");
+my ($nextmoff) = ($1);
+ok($nextmoff >= $imoff && $nextmoff < $imoff + 1000,
+ "Latest checkpoint's NextMultiOffset ($nextmoff) is close to the initial multiOffset ($imoff).");
+
+# Run pgbench to check whether the database is working properly
+$node->command_ok(
+ [ qw(pgbench --initialize --no-vacuum --scale=10) ],
+ 'pgbench finished without errors');
+
+done_testing();
\ No newline at end of file
diff --git a/src/test/xid-64/t/002_test_gucs.pl b/src/test/xid-64/t/002_test_gucs.pl
new file mode 100644
index 0000000000..ff9f2f3052
--- /dev/null
+++ b/src/test/xid-64/t/002_test_gucs.pl
@@ -0,0 +1,79 @@
+# Tests for guc boundary values
+use strict;
+use warnings;
+use PostgreSQL::Test::Cluster;
+use PostgreSQL::Test::Utils;
+use Test::More;
+use bigint;
+
+sub command_output
+{
+ my ($cmd) = @_;
+ my ($stdout, $stderr);
+ print("# Running: " . join(" ", @{$cmd}) . "\n");
+ my $result = IPC::Run::run $cmd, '>', \$stdout, '2>', \$stderr;
+ ok($result, "@$cmd exit code 0");
+ is($stderr, '', "@$cmd no stderr");
+ return $stdout;
+}
+
+sub set_guc
+{
+ my ($node, $guc, $val) = @_;
+ print("SET $guc = $val\n");
+ $node->safe_psql('postgres', "ALTER SYSTEM SET $guc = $val");
+ $node->restart();
+}
+
+sub test_pgbench
+{
+ my ($node) = @_;
+ $node->command_ok(
+ [ qw(pgbench --progress=5 --transactions=1000 --jobs=5 --client=5) ],
+ 'pgbench finished without errors');
+}
+
+my @guc_vals = (
+ [ "autovacuum_freeze_max_age", 100000, 2**63 - 1 ],
+ [ "autovacuum_multixact_freeze_max_age", 10000, 2**63 - 1 ],
+ [ "vacuum_freeze_min_age", 0, 2**63 - 1 ],
+ [ "vacuum_freeze_table_age", 0, 2**63 - 1 ],
+ [ "vacuum_multixact_freeze_min_age", 0, 2**63 - 1 ],
+ [ "vacuum_multixact_freeze_table_age", 0, 2**63 -1 ]
+);
+
+my $START_VAL = 2**32;
+my $MAX_VAL = 2**62;
+
+my $ixid = $START_VAL + int(rand($MAX_VAL - $START_VAL));
+my $imxid = $START_VAL + int(rand($MAX_VAL - $START_VAL));
+my $imoff = $START_VAL + int(rand($MAX_VAL - $START_VAL));
+
+# Initialize master node
+my $node = PostgreSQL::Test::Cluster->new('master');
+$node->init(extra => [ "--xid=$ixid", "--multixact-id=$imxid", "--multixact-offset=$imoff" ]);
+# Disable logging of all statements to avoid log bloat during pgbench
+$node->append_conf('postgresql.conf', "log_statement = none");
+$node->start;
+
+# Fill the test database with the pgbench data
+$node->command_ok(
+ [ qw(pgbench --initialize --scale=10) ],
+ 'pgbench finished without errors');
+
+# Test all GUCs with minimum, maximum and random value inbetween
+# (run pgbench for every configuration setting)
+foreach my $gi (0 .. $#guc_vals) {
+ print($guc_vals[$gi][0]); print("\n");
+ my $guc = $guc_vals[$gi][0];
+ my $minval = $guc_vals[$gi][1];
+ my $maxval = $guc_vals[$gi][2];
+ set_guc($node, $guc, $minval);
+ test_pgbench($node);
+ set_guc($node, $guc, $maxval);
+ test_pgbench($node);
+ set_guc($node, $guc, $minval + int(rand($maxval - $minval)));
+ test_pgbench($node);
+}
+
+done_testing();
\ No newline at end of file
diff --git a/src/test/xid-64/t/003_test_integrity.pl b/src/test/xid-64/t/003_test_integrity.pl
new file mode 100644
index 0000000000..ca079f11cb
--- /dev/null
+++ b/src/test/xid-64/t/003_test_integrity.pl
@@ -0,0 +1,58 @@
+# Check integrity after dump/restore with different xids
+use strict;
+use warnings;
+use PostgreSQL::Test::Cluster;
+use PostgreSQL::Test::Utils;
+use Test::More;
+use File::Compare;
+
+my $tempdir = PostgreSQL::Test::Utils::tempdir;
+use bigint;
+
+my $START_VAL = 2**32;
+my $MAX_VAL = 2**62;
+
+my $ixid = $START_VAL + int(rand($MAX_VAL - $START_VAL));
+my $imxid = $START_VAL + int(rand($MAX_VAL - $START_VAL));
+my $imoff = $START_VAL + int(rand($MAX_VAL - $START_VAL));
+
+# Initialize master node
+my $node = PostgreSQL::Test::Cluster->new('master');
+$node->init();
+$node->start;
+
+# Create a database and fill it with the pgbench data
+$node->safe_psql('postgres', "CREATE DATABASE pgbench_db");
+$node->command_ok(
+ [ qw(pgbench --initialize --scale=2 pgbench_db) ],
+ 'pgbench finished without errors');
+# Dump the database (cluster the main table to put data in a determined order)
+$node->safe_psql('pgbench_db', qq(
+ CREATE INDEX pa_aid_idx ON pgbench_accounts (aid);
+ CLUSTER pgbench_accounts USING pa_aid_idx));
+$node->command_ok(
+ [ "pg_dump", "-w", "--inserts", "--file=$tempdir/pgbench.sql", "pgbench_db" ],
+ 'pgdump finished without errors');
+$node->stop('fast');
+
+# Initialize second node
+my $node2 = PostgreSQL::Test::Cluster->new('master2');
+$node2->init(extra => [ "--xid=$ixid", "--multixact-id=$imxid", "--multixact-offset=$imoff" ]);
+# Disable logging of all statements to avoid log bloat during restore
+$node2->append_conf('postgresql.conf', "log_statement = none");
+$node2->start;
+
+# Create a database and restore the previous dump
+$node2->safe_psql('postgres', "CREATE DATABASE pgbench_db");
+my $txid0 = $node2->safe_psql('pgbench_db', 'SELECT txid_current()');
+print("# Initial txid_current: $txid0\n");
+$node2->command_ok(["psql", "-q", "-f", "$tempdir/pgbench.sql", "pgbench_db"]);
+
+# Dump the database and compare the dumped content with the previous one
+$node2->safe_psql('pgbench_db', 'CLUSTER pgbench_accounts');
+$node2->command_ok(
+ [ "pg_dump", "-w", "--inserts", "--file=$tempdir/pgbench2.sql", "pgbench_db" ],
+ 'pgdump finished without errors');
+ok(File::Compare::compare_text("$tempdir/pgbench.sql", "$tempdir/pgbench2.sql") == 0, "no differences detected");
+
+done_testing();
\ No newline at end of file
diff --git a/src/test/xid-64/t/004_test_relminmxid.pl b/src/test/xid-64/t/004_test_relminmxid.pl
new file mode 100644
index 0000000000..e1f6e556e5
--- /dev/null
+++ b/src/test/xid-64/t/004_test_relminmxid.pl
@@ -0,0 +1,90 @@
+# Check integrity after dump/restore with different xids
+use strict;
+use warnings;
+use PostgreSQL::Test::Cluster;
+use PostgreSQL::Test::Utils;
+use Test::More;
+use bigint;
+
+my ($node, $rmm, $vacout);
+$node = PostgreSQL::Test::Cluster->new('master');
+$node->init(extra => [ "--xid=3", "--multixact-id=1", "--multixact-offset=0" ]);
+$node->append_conf('postgresql.conf', 'max_prepared_transactions = 2');
+$node->start;
+
+sub relminmxid
+{
+ my $rmm = $node->safe_psql("postgres", qq(
+ SELECT relminmxid
+ FROM pg_class
+ WHERE relname = 'foo';));
+ return $rmm + 0;
+}
+
+sub vacuum
+{
+ my ($rc, $stdout, $stderr) = $node->psql("postgres", "VACUUM foo;");
+ return $stdout.$stderr;
+}
+
+sub gen_multixact
+{
+ $node->safe_psql("postgres", qq(
+ BEGIN;
+ SELECT * FROM foo FOR KEY SHARE;
+ PREPARE TRANSACTION 'fooshare';
+ ));
+
+ my $xmax = $node->safe_psql("postgres", qq(
+ SELECT xmax FROM foo;
+ ));
+ isnt($xmax + 0, 0, "xmax not empty");
+
+ $node->safe_psql("postgres", qq(
+ BEGIN;
+ SELECT * FROM foo FOR KEY SHARE;
+ COMMIT;
+ COMMIT PREPARED 'fooshare';
+ ));
+
+ my $mxact = $node->safe_psql("postgres", qq(
+ SELECT xmax FROM foo;
+ ));
+ isnt($mxact + 0, 0, "mxact not empty");
+ cmp_ok($xmax, '>', $mxact, "xmax is greater than mxact");
+}
+
+# Initialize master node with the random xid-related parameters
+$node->safe_psql("postgres", "CREATE TABLE foo (a int); INSERT INTO foo VALUES (1);");
+
+is(relminmxid(), 1, "relminmxid is default");
+
+vacuum();
+is(relminmxid(), 1, "relminmxid is still default");
+
+gen_multixact();
+is(relminmxid(), 1, "relminmxid is still still default");
+
+unlike(vacuum(), qr/multixact.*before relminmxid/, "no relminmxid error");
+
+# No intentionally break relminmxid
+$node->safe_psql("postgres", qq(
+ UPDATE pg_class SET relminmxid = ((1::int8<<62) + 1)::text::xid
+ WHERE relname = 'foo'
+));
+cmp_ok(relminmxid(), '>', 2**62, "relminmxid broken (intentionally)");
+
+gen_multixact();
+like(vacuum(), qr/multixact.*before relminmxid/, "got relminmxid error");
+cmp_ok(relminmxid(), '>', 2**62, "relminmxid broken (still)");
+
+# Fix relminmxid by setting to default
+$node->safe_psql("postgres", qq(
+ UPDATE pg_class SET relminmxid = '1'
+ WHERE relname = 'foo'
+));
+is(relminmxid(), 1, "relminmxid is default again");
+
+unlike(vacuum(), qr/multixact.*before relminmxid/, "no relminmxid error again");
+
+done_testing();
diff --git a/src/test/xid-64/t/005_stream_subxact.pl b/src/test/xid-64/t/005_stream_subxact.pl
new file mode 100644
index 0000000000..1379af6816
--- /dev/null
+++ b/src/test/xid-64/t/005_stream_subxact.pl
@@ -0,0 +1,100 @@
+
+# Copyright (c) 2021, PostgreSQL Global Development Group
+
+# Test xids streaming of large transaction containing large subtransactions
+# near 32-bit boundary.
+#
+# Mostly it is a copy of 016_stream_subxact.pl, but with publisher xid inited
+# just before 32-bit boundary, so if xids are replicated as 32-bit values,
+# subscriber will get 0 xid value.
+use strict;
+use warnings;
+use PostgreSQL::Test::Cluster;
+use PostgreSQL::Test::Utils;
+use Test::More;
+
+# Create publisher node
+my $node_publisher = PostgreSQL::Test::Cluster->new('publisher');
+$node_publisher->init(allows_streaming => 'logical', extra => ['-x', '4294966545']);
+$node_publisher->append_conf('postgresql.conf',
+ 'logical_decoding_work_mem = 64kB');
+$node_publisher->start;
+
+# Create subscriber node
+my $node_subscriber = PostgreSQL::Test::Cluster->new('subscriber');
+$node_subscriber->init(allows_streaming => 'logical');
+$node_subscriber->start;
+
+# Create some preexisting content on publisher
+$node_publisher->safe_psql('postgres',
+ "CREATE TABLE test_tab (a int primary key, b varchar)");
+$node_publisher->safe_psql('postgres',
+ "INSERT INTO test_tab VALUES (1, 'foo'), (2, 'bar')");
+
+# Setup structure on subscriber
+$node_subscriber->safe_psql('postgres',
+ "CREATE TABLE test_tab (a int primary key, b text, c timestamptz DEFAULT now(), d bigint DEFAULT 999)"
+);
+
+# Setup logical replication
+my $publisher_connstr = $node_publisher->connstr . ' dbname=postgres';
+$node_publisher->safe_psql('postgres',
+ "CREATE PUBLICATION tap_pub FOR TABLE test_tab");
+
+my $appname = 'tap_sub';
+$node_subscriber->safe_psql('postgres',
+ "CREATE SUBSCRIPTION tap_sub CONNECTION '$publisher_connstr application_name=$appname' PUBLICATION tap_pub WITH (streaming = on)"
+);
+
+$node_publisher->wait_for_catchup($appname);
+
+# Also wait for initial table sync to finish
+my $synced_query =
+ "SELECT count(1) = 0 FROM pg_subscription_rel WHERE srsubstate NOT IN ('r', 's');";
+$node_subscriber->poll_query_until('postgres', $synced_query)
+ or die "Timed out while waiting for subscriber to synchronize data";
+
+my $result =
+ $node_subscriber->safe_psql('postgres',
+ "SELECT count(*), count(c), count(d = 999) FROM test_tab");
+is($result, qq(2|2|2), 'check initial data was copied to subscriber');
+
+# Insert, update and delete enough rows to exceed 64kB limit.
+$node_publisher->safe_psql(
+ 'postgres', q{
+BEGIN;
+INSERT INTO test_tab SELECT i, md5(i::text) FROM generate_series( 3, 500) s(i);
+UPDATE test_tab SET b = md5(b) WHERE mod(a,2) = 0;
+DELETE FROM test_tab WHERE mod(a,3) = 0;
+SAVEPOINT s1;
+INSERT INTO test_tab SELECT i, md5(i::text) FROM generate_series(501, 1000) s(i);
+UPDATE test_tab SET b = md5(b) WHERE mod(a,2) = 0;
+DELETE FROM test_tab WHERE mod(a,3) = 0;
+SAVEPOINT s2;
+INSERT INTO test_tab SELECT i, md5(i::text) FROM generate_series(1001, 1500) s(i);
+UPDATE test_tab SET b = md5(b) WHERE mod(a,2) = 0;
+DELETE FROM test_tab WHERE mod(a,3) = 0;
+SAVEPOINT s3;
+INSERT INTO test_tab SELECT i, md5(i::text) FROM generate_series(1501, 2000) s(i);
+UPDATE test_tab SET b = md5(b) WHERE mod(a,2) = 0;
+DELETE FROM test_tab WHERE mod(a,3) = 0;
+SAVEPOINT s4;
+INSERT INTO test_tab SELECT i, md5(i::text) FROM generate_series(2001, 2500) s(i);
+UPDATE test_tab SET b = md5(b) WHERE mod(a,2) = 0;
+DELETE FROM test_tab WHERE mod(a,3) = 0;
+COMMIT;
+});
+
+$node_publisher->wait_for_catchup($appname);
+
+$result =
+ $node_subscriber->safe_psql('postgres',
+ "SELECT count(*), count(c), count(d = 999) FROM test_tab");
+is($result, qq(1667|1667|1667),
+ 'check data was copied to subscriber in streaming mode and extra columns contain local defaults'
+);
+
+$node_subscriber->stop;
+$node_publisher->stop;
+
+done_testing();
diff --git a/src/test/xid-64/t/006_zeropage.pl b/src/test/xid-64/t/006_zeropage.pl
new file mode 100644
index 0000000000..fd3ac3973f
--- /dev/null
+++ b/src/test/xid-64/t/006_zeropage.pl
@@ -0,0 +1,33 @@
+use strict;
+use warnings;
+
+use PostgreSQL::Test::Cluster;
+use PostgreSQL::Test::Utils;
+use Test::More;
+
+# Check WAL for ZEROPAGE record.
+
+sub command_output
+{
+ my ($cmd) = @_;
+ my ($stdout, $stderr);
+ print("# Running: " . join(" ", @{$cmd}) . "\n");
+ my $result = IPC::Run::run $cmd, '>', \$stdout, '2>', \$stderr;
+ return $stdout;
+}
+
+my $node = PostgreSQL::Test::Cluster->new('main');
+$node->init(extra => [ "--xid=3", "--multixact-id=3", "--multixact-offset=0" ]);;
+$node->start;
+my $pgdata = $node->data_dir;
+my $xlogfilename0 = $node->safe_psql('postgres',
+ "SELECT pg_walfile_name(pg_current_wal_lsn())");
+#$node->command_like(
+# [ 'pg_waldump', '-S', "$pgdata/pg_wal/$xlogfilename0" ],
+# qr/ZEROPAGE/,
+# 'pg_waldump prints start timestamp');
+my $wd_output = command_output(
+ [ 'pg_waldump', "$pgdata/pg_wal/$xlogfilename0" ]);
+ok($wd_output =~ qr/ZEROPAGE page 0/, "ZEROPAGE found");
+
+done_testing();
diff --git a/src/tools/msvc/Solution.pm b/src/tools/msvc/Solution.pm
index c2acb58df0..74f2216ad1 100644
--- a/src/tools/msvc/Solution.pm
+++ b/src/tools/msvc/Solution.pm
@@ -399,6 +399,7 @@ sub GenerateFiles
PACKAGE_TARNAME => lc qq{"$package_name"},
PACKAGE_URL => qq{"$package_url"},
PACKAGE_VERSION => qq{"$package_version"},
+ XID_IS_64BIT => 1,
PG_INT128_TYPE => undef,
PG_INT64_TYPE => 'long long int',
PG_KRB_SRVNAM => qq{"postgres"},
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 97c9bc1861..4912da144f 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -3360,8 +3360,8 @@ intset_internal_node
intset_leaf_node
intset_node
intvKEY
-itemIdCompact
-itemIdCompactData
+ItemIdCompact
+ItemIdCompactData
iterator
jmp_buf
join_search_hook_type
--
2.37.0 (Apple Git-136)