v2-0001-Fix-Windows-file-IO.patch

text/plain

Filename: v2-0001-Fix-Windows-file-IO.patch
Type: text/plain
Part: 0
Message: Re: [Patch] Windows relation extension failure at 2GB and 4GB
From d3f7543a35b3b72a7069188302cbfc7e4de9120b Mon Sep 17 00:00:00 2001
From: Bryan Green <dbryan.green@gmail.com>
Date: Thu, 6 Nov 2025 10:56:02 -0600
Subject: [PATCH] Fix Windows file I/O to support files larger than 2GB

PostgreSQL's Windows port has been unable to handle files larger than 2GB
due to pervasive use of off_t for file offsets, which is only 32-bit on
Windows. This causes signed integer overflow at exactly 2^31 bytes.

The codebase already defines pgoff_t as __int64 (64-bit) on Windows for
this purpose, and some function declarations in headers use it, but many
implementations still used off_t.

This issue is unlikely to affect most users since the default RELSEG_SIZE
is 1GB, keeping individual segment files small. However, anyone building
with --with-segsize larger than 2 would hit this bug. Tested with
--with-segsize=8 and verified that files can now grow beyond 4GB.

This version also addresses three additional code paths in WAL handling
that used casts to off_t when calling pg_pread() or pg_pwrite():
- xlogrecovery.c: pg_pread() called with cast to off_t
- xlogreader.c: pg_pread() with cast to off_t
- walreceiver.c: pg_pwrite() with cast to off_t

While these are not critical (WAL segments have a max size of 1GB), the
casts are now corrected to pgoff_t for consistency and to avoid any
potential future issues.

Note: off_t is still used in other parts of the codebase (e.g. buffile.c)
which may have similar issues on Windows, but those are outside the
critical path for relation file extension and can be addressed separately.

On Unix-like systems, pgoff_t is defined as off_t, so this change only
affects Windows behavior.
---
 src/backend/access/transam/xlogreader.c   |  2 +-
 src/backend/access/transam/xlogrecovery.c |  2 +-
 src/backend/replication/walreceiver.c     |  2 +-
 src/backend/storage/file/fd.c             | 38 ++++++++---------
 src/backend/storage/smgr/md.c             | 50 +++++++++++------------
 src/common/file_utils.c                   |  4 +-
 src/include/common/file_utils.h           |  4 +-
 src/include/port/pg_iovec.h               |  4 +-
 src/include/port/win32_port.h             |  4 +-
 src/include/storage/fd.h                  | 26 ++++++------
 src/port/win32pread.c                     | 10 ++---
 src/port/win32pwrite.c                    | 10 ++---
 12 files changed, 78 insertions(+), 78 deletions(-)

diff --git a/src/backend/access/transam/xlogreader.c b/src/backend/access/transam/xlogreader.c
index dcc8d4f9c1..8ea837003f 100644
--- a/src/backend/access/transam/xlogreader.c
+++ b/src/backend/access/transam/xlogreader.c
@@ -1574,7 +1574,7 @@ WALRead(XLogReaderState *state,
 
 		/* Reset errno first; eases reporting non-errno-affecting errors */
 		errno = 0;
-		readbytes = pg_pread(state->seg.ws_file, p, segbytes, (off_t) startoff);
+		readbytes = pg_pread(state->seg.ws_file, p, segbytes, (pgoff_t) startoff);
 
 #ifndef FRONTEND
 		pgstat_report_wait_end();
diff --git a/src/backend/access/transam/xlogrecovery.c b/src/backend/access/transam/xlogrecovery.c
index 550de6e4a5..c723d03d96 100644
--- a/src/backend/access/transam/xlogrecovery.c
+++ b/src/backend/access/transam/xlogrecovery.c
@@ -3429,7 +3429,7 @@ retry:
 	io_start = pgstat_prepare_io_time(track_wal_io_timing);
 
 	pgstat_report_wait_start(WAIT_EVENT_WAL_READ);
-	r = pg_pread(readFile, readBuf, XLOG_BLCKSZ, (off_t) readOff);
+	r = pg_pread(readFile, readBuf, XLOG_BLCKSZ, (pgoff_t) readOff);
 	if (r != XLOG_BLCKSZ)
 	{
 		char		fname[MAXFNAMELEN];
diff --git a/src/backend/replication/walreceiver.c b/src/backend/replication/walreceiver.c
index 7361ffc9dc..ec243db3a4 100644
--- a/src/backend/replication/walreceiver.c
+++ b/src/backend/replication/walreceiver.c
@@ -928,7 +928,7 @@ XLogWalRcvWrite(char *buf, Size nbytes, XLogRecPtr recptr, TimeLineID tli)
 		start = pgstat_prepare_io_time(track_wal_io_timing);
 
 		pgstat_report_wait_start(WAIT_EVENT_WAL_WRITE);
-		byteswritten = pg_pwrite(recvFile, buf, segbytes, (off_t) startoff);
+		byteswritten = pg_pwrite(recvFile, buf, segbytes, (pgoff_t) startoff);
 		pgstat_report_wait_end();
 
 		pgstat_count_io_op_time(IOOBJECT_WAL, IOCONTEXT_NORMAL,
diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c
index a4ec7959f3..b25e74831e 100644
--- a/src/backend/storage/file/fd.c
+++ b/src/backend/storage/file/fd.c
@@ -201,7 +201,7 @@ typedef struct vfd
 	File		nextFree;		/* link to next free VFD, if in freelist */
 	File		lruMoreRecently;	/* doubly linked recency-of-use list */
 	File		lruLessRecently;
-	off_t		fileSize;		/* current size of file (0 if not temporary) */
+	pgoff_t		fileSize;		/* current size of file (0 if not temporary) */
 	char	   *fileName;		/* name of file, or NULL for unused VFD */
 	/* NB: fileName is malloc'd, and must be free'd when closing the VFD */
 	int			fileFlags;		/* open(2) flags for (re)opening the file */
@@ -519,7 +519,7 @@ pg_file_exists(const char *name)
  * offset of 0 with nbytes 0 means that the entire file should be flushed
  */
 void
-pg_flush_data(int fd, off_t offset, off_t nbytes)
+pg_flush_data(int fd, pgoff_t offset, pgoff_t nbytes)
 {
 	/*
 	 * Right now file flushing is primarily used to avoid making later
@@ -635,7 +635,7 @@ retry:
 		 * may simply not be enough address space.  If so, silently fall
 		 * through to the next implementation.
 		 */
-		if (nbytes <= (off_t) SSIZE_MAX)
+		if (nbytes <= (pgoff_t) SSIZE_MAX)
 			p = mmap(NULL, nbytes, PROT_READ, MAP_SHARED, fd, offset);
 		else
 			p = MAP_FAILED;
@@ -697,7 +697,7 @@ retry:
  * Truncate an open file to a given length.
  */
 static int
-pg_ftruncate(int fd, off_t length)
+pg_ftruncate(int fd, pgoff_t length)
 {
 	int			ret;
 
@@ -714,7 +714,7 @@ retry:
  * Truncate a file to a given length by name.
  */
 int
-pg_truncate(const char *path, off_t length)
+pg_truncate(const char *path, pgoff_t length)
 {
 	int			ret;
 #ifdef WIN32
@@ -1526,7 +1526,7 @@ FileAccess(File file)
  * Called whenever a temporary file is deleted to report its size.
  */
 static void
-ReportTemporaryFileUsage(const char *path, off_t size)
+ReportTemporaryFileUsage(const char *path, pgoff_t size)
 {
 	pgstat_report_tempfile(size);
 
@@ -2077,7 +2077,7 @@ FileClose(File file)
  * this.
  */
 int
-FilePrefetch(File file, off_t offset, off_t amount, uint32 wait_event_info)
+FilePrefetch(File file, pgoff_t offset, pgoff_t amount, uint32 wait_event_info)
 {
 	Assert(FileIsValid(file));
 
@@ -2108,7 +2108,7 @@ retry:
 	{
 		struct radvisory
 		{
-			off_t		ra_offset;	/* offset into the file */
+			pgoff_t		ra_offset;	/* offset into the file */
 			int			ra_count;	/* size of the read     */
 		}			ra;
 		int			returnCode;
@@ -2133,7 +2133,7 @@ retry:
 }
 
 void
-FileWriteback(File file, off_t offset, off_t nbytes, uint32 wait_event_info)
+FileWriteback(File file, pgoff_t offset, pgoff_t nbytes, uint32 wait_event_info)
 {
 	int			returnCode;
 
@@ -2159,7 +2159,7 @@ FileWriteback(File file, off_t offset, off_t nbytes, uint32 wait_event_info)
 }
 
 ssize_t
-FileReadV(File file, const struct iovec *iov, int iovcnt, off_t offset,
+FileReadV(File file, const struct iovec *iov, int iovcnt, pgoff_t offset,
 		  uint32 wait_event_info)
 {
 	ssize_t		returnCode;
@@ -2216,7 +2216,7 @@ retry:
 
 int
 FileStartReadV(PgAioHandle *ioh, File file,
-			   int iovcnt, off_t offset,
+			   int iovcnt, pgoff_t offset,
 			   uint32 wait_event_info)
 {
 	int			returnCode;
@@ -2241,7 +2241,7 @@ FileStartReadV(PgAioHandle *ioh, File file,
 }
 
 ssize_t
-FileWriteV(File file, const struct iovec *iov, int iovcnt, off_t offset,
+FileWriteV(File file, const struct iovec *iov, int iovcnt, pgoff_t offset,
 		   uint32 wait_event_info)
 {
 	ssize_t		returnCode;
@@ -2270,7 +2270,7 @@ FileWriteV(File file, const struct iovec *iov, int iovcnt, off_t offset,
 	 */
 	if (temp_file_limit >= 0 && (vfdP->fdstate & FD_TEMP_FILE_LIMIT))
 	{
-		off_t		past_write = offset;
+		pgoff_t		past_write = offset;
 
 		for (int i = 0; i < iovcnt; ++i)
 			past_write += iov[i].iov_len;
@@ -2309,7 +2309,7 @@ retry:
 		 */
 		if (vfdP->fdstate & FD_TEMP_FILE_LIMIT)
 		{
-			off_t		past_write = offset + returnCode;
+			pgoff_t		past_write = offset + returnCode;
 
 			if (past_write > vfdP->fileSize)
 			{
@@ -2373,7 +2373,7 @@ FileSync(File file, uint32 wait_event_info)
  * appropriate error.
  */
 int
-FileZero(File file, off_t offset, off_t amount, uint32 wait_event_info)
+FileZero(File file, pgoff_t offset, pgoff_t amount, uint32 wait_event_info)
 {
 	int			returnCode;
 	ssize_t		written;
@@ -2418,7 +2418,7 @@ FileZero(File file, off_t offset, off_t amount, uint32 wait_event_info)
  * appropriate error.
  */
 int
-FileFallocate(File file, off_t offset, off_t amount, uint32 wait_event_info)
+FileFallocate(File file, pgoff_t offset, pgoff_t amount, uint32 wait_event_info)
 {
 #ifdef HAVE_POSIX_FALLOCATE
 	int			returnCode;
@@ -2457,7 +2457,7 @@ retry:
 	return FileZero(file, offset, amount, wait_event_info);
 }
 
-off_t
+pgoff_t
 FileSize(File file)
 {
 	Assert(FileIsValid(file));
@@ -2468,14 +2468,14 @@ FileSize(File file)
 	if (FileIsNotOpen(file))
 	{
 		if (FileAccess(file) < 0)
-			return (off_t) -1;
+			return (pgoff_t) -1;
 	}
 
 	return lseek(VfdCache[file].fd, 0, SEEK_END);
 }
 
 int
-FileTruncate(File file, off_t offset, uint32 wait_event_info)
+FileTruncate(File file, pgoff_t offset, uint32 wait_event_info)
 {
 	int			returnCode;
 
diff --git a/src/backend/storage/smgr/md.c b/src/backend/storage/smgr/md.c
index 235ba7e191..e3f335a834 100644
--- a/src/backend/storage/smgr/md.c
+++ b/src/backend/storage/smgr/md.c
@@ -487,7 +487,7 @@ void
 mdextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
 		 const void *buffer, bool skipFsync)
 {
-	off_t		seekpos;
+	pgoff_t		seekpos;
 	int			nbytes;
 	MdfdVec    *v;
 
@@ -515,9 +515,9 @@ mdextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
 
 	v = _mdfd_getseg(reln, forknum, blocknum, skipFsync, EXTENSION_CREATE);
 
-	seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
+	seekpos = (pgoff_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
 
-	Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE);
+	Assert(seekpos < (pgoff_t) BLCKSZ * RELSEG_SIZE);
 
 	if ((nbytes = FileWrite(v->mdfd_vfd, buffer, BLCKSZ, seekpos, WAIT_EVENT_DATA_FILE_EXTEND)) != BLCKSZ)
 	{
@@ -578,7 +578,7 @@ mdzeroextend(SMgrRelation reln, ForkNumber forknum,
 	while (remblocks > 0)
 	{
 		BlockNumber segstartblock = curblocknum % ((BlockNumber) RELSEG_SIZE);
-		off_t		seekpos = (off_t) BLCKSZ * segstartblock;
+		pgoff_t		seekpos = (pgoff_t) BLCKSZ * segstartblock;
 		int			numblocks;
 
 		if (segstartblock + remblocks > RELSEG_SIZE)
@@ -607,7 +607,7 @@ mdzeroextend(SMgrRelation reln, ForkNumber forknum,
 			int			ret;
 
 			ret = FileFallocate(v->mdfd_vfd,
-								seekpos, (off_t) BLCKSZ * numblocks,
+								seekpos, (pgoff_t) BLCKSZ * numblocks,
 								WAIT_EVENT_DATA_FILE_EXTEND);
 			if (ret != 0)
 			{
@@ -630,7 +630,7 @@ mdzeroextend(SMgrRelation reln, ForkNumber forknum,
 			 * whole length of the extension.
 			 */
 			ret = FileZero(v->mdfd_vfd,
-						   seekpos, (off_t) BLCKSZ * numblocks,
+						   seekpos, (pgoff_t) BLCKSZ * numblocks,
 						   WAIT_EVENT_DATA_FILE_EXTEND);
 			if (ret < 0)
 				ereport(ERROR,
@@ -745,7 +745,7 @@ mdprefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
 
 	while (nblocks > 0)
 	{
-		off_t		seekpos;
+		pgoff_t		seekpos;
 		MdfdVec    *v;
 		int			nblocks_this_segment;
 
@@ -754,9 +754,9 @@ mdprefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
 		if (v == NULL)
 			return false;
 
-		seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
+		seekpos = (pgoff_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
 
-		Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE);
+		Assert(seekpos < (pgoff_t) BLCKSZ * RELSEG_SIZE);
 
 		nblocks_this_segment =
 			Min(nblocks,
@@ -851,7 +851,7 @@ mdreadv(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
 	{
 		struct iovec iov[PG_IOV_MAX];
 		int			iovcnt;
-		off_t		seekpos;
+		pgoff_t		seekpos;
 		int			nbytes;
 		MdfdVec    *v;
 		BlockNumber nblocks_this_segment;
@@ -861,9 +861,9 @@ mdreadv(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
 		v = _mdfd_getseg(reln, forknum, blocknum, false,
 						 EXTENSION_FAIL | EXTENSION_CREATE_RECOVERY);
 
-		seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
+		seekpos = (pgoff_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
 
-		Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE);
+		Assert(seekpos < (pgoff_t) BLCKSZ * RELSEG_SIZE);
 
 		nblocks_this_segment =
 			Min(nblocks,
@@ -986,7 +986,7 @@ mdstartreadv(PgAioHandle *ioh,
 			 SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
 			 void **buffers, BlockNumber nblocks)
 {
-	off_t		seekpos;
+	pgoff_t		seekpos;
 	MdfdVec    *v;
 	BlockNumber nblocks_this_segment;
 	struct iovec *iov;
@@ -996,9 +996,9 @@ mdstartreadv(PgAioHandle *ioh,
 	v = _mdfd_getseg(reln, forknum, blocknum, false,
 					 EXTENSION_FAIL | EXTENSION_CREATE_RECOVERY);
 
-	seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
+	seekpos = (pgoff_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
 
-	Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE);
+	Assert(seekpos < (pgoff_t) BLCKSZ * RELSEG_SIZE);
 
 	nblocks_this_segment =
 		Min(nblocks,
@@ -1068,7 +1068,7 @@ mdwritev(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
 	{
 		struct iovec iov[PG_IOV_MAX];
 		int			iovcnt;
-		off_t		seekpos;
+		pgoff_t		seekpos;
 		int			nbytes;
 		MdfdVec    *v;
 		BlockNumber nblocks_this_segment;
@@ -1078,9 +1078,9 @@ mdwritev(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
 		v = _mdfd_getseg(reln, forknum, blocknum, skipFsync,
 						 EXTENSION_FAIL | EXTENSION_CREATE_RECOVERY);
 
-		seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
+		seekpos = (pgoff_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
 
-		Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE);
+		Assert(seekpos < (pgoff_t) BLCKSZ * RELSEG_SIZE);
 
 		nblocks_this_segment =
 			Min(nblocks,
@@ -1173,7 +1173,7 @@ mdwriteback(SMgrRelation reln, ForkNumber forknum,
 	while (nblocks > 0)
 	{
 		BlockNumber nflush = nblocks;
-		off_t		seekpos;
+		pgoff_t		seekpos;
 		MdfdVec    *v;
 		int			segnum_start,
 					segnum_end;
@@ -1202,9 +1202,9 @@ mdwriteback(SMgrRelation reln, ForkNumber forknum,
 		Assert(nflush >= 1);
 		Assert(nflush <= nblocks);
 
-		seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
+		seekpos = (pgoff_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
 
-		FileWriteback(v->mdfd_vfd, seekpos, (off_t) BLCKSZ * nflush, WAIT_EVENT_DATA_FILE_FLUSH);
+		FileWriteback(v->mdfd_vfd, seekpos, (pgoff_t) BLCKSZ * nflush, WAIT_EVENT_DATA_FILE_FLUSH);
 
 		nblocks -= nflush;
 		blocknum += nflush;
@@ -1348,7 +1348,7 @@ mdtruncate(SMgrRelation reln, ForkNumber forknum,
 			 */
 			BlockNumber lastsegblocks = nblocks - priorblocks;
 
-			if (FileTruncate(v->mdfd_vfd, (off_t) lastsegblocks * BLCKSZ, WAIT_EVENT_DATA_FILE_TRUNCATE) < 0)
+			if (FileTruncate(v->mdfd_vfd, (pgoff_t) lastsegblocks * BLCKSZ, WAIT_EVENT_DATA_FILE_TRUNCATE) < 0)
 				ereport(ERROR,
 						(errcode_for_file_access(),
 						 errmsg("could not truncate file \"%s\" to %u blocks: %m",
@@ -1484,9 +1484,9 @@ mdfd(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, uint32 *off)
 	v = _mdfd_getseg(reln, forknum, blocknum, false,
 					 EXTENSION_FAIL);
 
-	*off = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
+	*off = (pgoff_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
 
-	Assert(*off < (off_t) BLCKSZ * RELSEG_SIZE);
+	Assert(*off < (pgoff_t) BLCKSZ * RELSEG_SIZE);
 
 	return FileGetRawDesc(v->mdfd_vfd);
 }
@@ -1868,7 +1868,7 @@ _mdfd_getseg(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno,
 static BlockNumber
 _mdnblocks(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg)
 {
-	off_t		len;
+	pgoff_t		len;
 
 	len = FileSize(seg->mdfd_vfd);
 	if (len < 0)
diff --git a/src/common/file_utils.c b/src/common/file_utils.c
index 7b62687a2a..cdf08ab5cb 100644
--- a/src/common/file_utils.c
+++ b/src/common/file_utils.c
@@ -656,7 +656,7 @@ compute_remaining_iovec(struct iovec *destination,
  * error is returned, it is unspecified how much has been written.
  */
 ssize_t
-pg_pwritev_with_retry(int fd, const struct iovec *iov, int iovcnt, off_t offset)
+pg_pwritev_with_retry(int fd, const struct iovec *iov, int iovcnt, pgoff_t offset)
 {
 	struct iovec iov_copy[PG_IOV_MAX];
 	ssize_t		sum = 0;
@@ -706,7 +706,7 @@ pg_pwritev_with_retry(int fd, const struct iovec *iov, int iovcnt, off_t offset)
  * is returned with errno set.
  */
 ssize_t
-pg_pwrite_zeros(int fd, size_t size, off_t offset)
+pg_pwrite_zeros(int fd, size_t size, pgoff_t offset)
 {
 	static const PGIOAlignedBlock zbuffer = {0};	/* worth BLCKSZ */
 	void	   *zerobuf_addr = unconstify(PGIOAlignedBlock *, &zbuffer)->data;
diff --git a/src/include/common/file_utils.h b/src/include/common/file_utils.h
index 9fd88953e4..4239713803 100644
--- a/src/include/common/file_utils.h
+++ b/src/include/common/file_utils.h
@@ -55,9 +55,9 @@ extern int	compute_remaining_iovec(struct iovec *destination,
 extern ssize_t pg_pwritev_with_retry(int fd,
 									 const struct iovec *iov,
 									 int iovcnt,
-									 off_t offset);
+									 pgoff_t offset);
 
-extern ssize_t pg_pwrite_zeros(int fd, size_t size, off_t offset);
+extern ssize_t pg_pwrite_zeros(int fd, size_t size, pgoff_t offset);
 
 /* Filename components */
 #define PG_TEMP_FILES_DIR "pgsql_tmp"
diff --git a/src/include/port/pg_iovec.h b/src/include/port/pg_iovec.h
index 90be3af449..845ded8c71 100644
--- a/src/include/port/pg_iovec.h
+++ b/src/include/port/pg_iovec.h
@@ -51,7 +51,7 @@ struct iovec
  * this changes the current file position.
  */
 static inline ssize_t
-pg_preadv(int fd, const struct iovec *iov, int iovcnt, off_t offset)
+pg_preadv(int fd, const struct iovec *iov, int iovcnt, pgoff_t offset)
 {
 #if HAVE_DECL_PREADV
 	/*
@@ -90,7 +90,7 @@ pg_preadv(int fd, const struct iovec *iov, int iovcnt, off_t offset)
  * this changes the current file position.
  */
 static inline ssize_t
-pg_pwritev(int fd, const struct iovec *iov, int iovcnt, off_t offset)
+pg_pwritev(int fd, const struct iovec *iov, int iovcnt, pgoff_t offset)
 {
 #if HAVE_DECL_PWRITEV
 	/*
diff --git a/src/include/port/win32_port.h b/src/include/port/win32_port.h
index ff7028bdc8..f54ccef7db 100644
--- a/src/include/port/win32_port.h
+++ b/src/include/port/win32_port.h
@@ -584,9 +584,9 @@ typedef unsigned short mode_t;
 #endif
 
 /* in port/win32pread.c */
-extern ssize_t pg_pread(int fd, void *buf, size_t nbyte, off_t offset);
+extern ssize_t pg_pread(int fd, void *buf, size_t nbyte, pgoff_t offset);
 
 /* in port/win32pwrite.c */
-extern ssize_t pg_pwrite(int fd, const void *buf, size_t nbyte, off_t offset);
+extern ssize_t pg_pwrite(int fd, const void *buf, size_t nbyte, pgoff_t offset);
 
 #endif							/* PG_WIN32_PORT_H */
diff --git a/src/include/storage/fd.h b/src/include/storage/fd.h
index b77d8e5e30..3e821ce8fb 100644
--- a/src/include/storage/fd.h
+++ b/src/include/storage/fd.h
@@ -108,17 +108,17 @@ extern File PathNameOpenFile(const char *fileName, int fileFlags);
 extern File PathNameOpenFilePerm(const char *fileName, int fileFlags, mode_t fileMode);
 extern File OpenTemporaryFile(bool interXact);
 extern void FileClose(File file);
-extern int	FilePrefetch(File file, off_t offset, off_t amount, uint32 wait_event_info);
-extern ssize_t FileReadV(File file, const struct iovec *iov, int iovcnt, off_t offset, uint32 wait_event_info);
-extern ssize_t FileWriteV(File file, const struct iovec *iov, int iovcnt, off_t offset, uint32 wait_event_info);
-extern int	FileStartReadV(struct PgAioHandle *ioh, File file, int iovcnt, off_t offset, uint32 wait_event_info);
+extern int	FilePrefetch(File file, pgoff_t offset, pgoff_t amount, uint32 wait_event_info);
+extern ssize_t FileReadV(File file, const struct iovec *iov, int iovcnt, pgoff_t offset, uint32 wait_event_info);
+extern ssize_t FileWriteV(File file, const struct iovec *iov, int iovcnt, pgoff_t offset, uint32 wait_event_info);
+extern int	FileStartReadV(struct PgAioHandle *ioh, File file, int iovcnt, pgoff_t offset, uint32 wait_event_info);
 extern int	FileSync(File file, uint32 wait_event_info);
-extern int	FileZero(File file, off_t offset, off_t amount, uint32 wait_event_info);
-extern int	FileFallocate(File file, off_t offset, off_t amount, uint32 wait_event_info);
+extern int	FileZero(File file, pgoff_t offset, pgoff_t amount, uint32 wait_event_info);
+extern int	FileFallocate(File file, pgoff_t offset, pgoff_t amount, uint32 wait_event_info);
 
-extern off_t FileSize(File file);
-extern int	FileTruncate(File file, off_t offset, uint32 wait_event_info);
-extern void FileWriteback(File file, off_t offset, off_t nbytes, uint32 wait_event_info);
+extern pgoff_t FileSize(File file);
+extern int	FileTruncate(File file, pgoff_t offset, uint32 wait_event_info);
+extern void FileWriteback(File file, pgoff_t offset, pgoff_t nbytes, uint32 wait_event_info);
 extern char *FilePathName(File file);
 extern int	FileGetRawDesc(File file);
 extern int	FileGetRawFlags(File file);
@@ -186,8 +186,8 @@ extern int	pg_fsync_no_writethrough(int fd);
 extern int	pg_fsync_writethrough(int fd);
 extern int	pg_fdatasync(int fd);
 extern bool pg_file_exists(const char *name);
-extern void pg_flush_data(int fd, off_t offset, off_t nbytes);
-extern int	pg_truncate(const char *path, off_t length);
+extern void pg_flush_data(int fd, pgoff_t offset, pgoff_t nbytes);
+extern int	pg_truncate(const char *path, pgoff_t length);
 extern void fsync_fname(const char *fname, bool isdir);
 extern int	fsync_fname_ext(const char *fname, bool isdir, bool ignore_perm, int elevel);
 extern int	durable_rename(const char *oldfile, const char *newfile, int elevel);
@@ -196,7 +196,7 @@ extern void SyncDataDirectory(void);
 extern int	data_sync_elevel(int elevel);
 
 static inline ssize_t
-FileRead(File file, void *buffer, size_t amount, off_t offset,
+FileRead(File file, void *buffer, size_t amount, pgoff_t offset,
 		 uint32 wait_event_info)
 {
 	struct iovec iov = {
@@ -208,7 +208,7 @@ FileRead(File file, void *buffer, size_t amount, off_t offset,
 }
 
 static inline ssize_t
-FileWrite(File file, const void *buffer, size_t amount, off_t offset,
+FileWrite(File file, const void *buffer, size_t amount, pgoff_t offset,
 		  uint32 wait_event_info)
 {
 	struct iovec iov = {
diff --git a/src/port/win32pread.c b/src/port/win32pread.c
index 32d56c462e..1f00dfd8e6 100644
--- a/src/port/win32pread.c
+++ b/src/port/win32pread.c
@@ -17,7 +17,7 @@
 #include <windows.h>
 
 ssize_t
-pg_pread(int fd, void *buf, size_t size, off_t offset)
+pg_pread(int fd, void *buf, size_t size, pgoff_t offset)
 {
 	OVERLAPPED	overlapped = {0};
 	HANDLE		handle;
@@ -30,16 +30,16 @@ pg_pread(int fd, void *buf, size_t size, off_t offset)
 		return -1;
 	}
 
-	/* Avoid overflowing DWORD. */
+	/* Avoid overflowing DWORD */
 	size = Min(size, 1024 * 1024 * 1024);
 
-	/* Note that this changes the file position, despite not using it. */
-	overlapped.Offset = offset;
+	overlapped.Offset = (DWORD) offset;
+	overlapped.OffsetHigh = (DWORD) (offset >> 32);
+
 	if (!ReadFile(handle, buf, size, &result, &overlapped))
 	{
 		if (GetLastError() == ERROR_HANDLE_EOF)
 			return 0;
-
 		_dosmaperr(GetLastError());
 		return -1;
 	}
diff --git a/src/port/win32pwrite.c b/src/port/win32pwrite.c
index 249aa6c468..d9a0d23c2b 100644
--- a/src/port/win32pwrite.c
+++ b/src/port/win32pwrite.c
@@ -15,9 +15,8 @@
 #include "c.h"
 
 #include <windows.h>
-
 ssize_t
-pg_pwrite(int fd, const void *buf, size_t size, off_t offset)
+pg_pwrite(int fd, const void *buf, size_t size, pgoff_t offset)
 {
 	OVERLAPPED	overlapped = {0};
 	HANDLE		handle;
@@ -30,11 +29,12 @@ pg_pwrite(int fd, const void *buf, size_t size, off_t offset)
 		return -1;
 	}
 
-	/* Avoid overflowing DWORD. */
+	/* Avoid overflowing DWORD */
 	size = Min(size, 1024 * 1024 * 1024);
 
-	/* Note that this changes the file position, despite not using it. */
-	overlapped.Offset = offset;
+	overlapped.Offset = (DWORD) offset;
+	overlapped.OffsetHigh = (DWORD) (offset >> 32);
+
 	if (!WriteFile(handle, buf, size, &result, &overlapped))
 	{
 		_dosmaperr(GetLastError());
-- 
2.46.0.windows.1