diff -rcN postgresql_with_9fujii_patch/src/backend/access/transam/xlog.c postgresql_with_patch/src/backend/access/transam/xlog.c *** postgresql_with_9fujii_patch/src/backend/access/transam/xlog.c 2011-10-06 06:06:19.000000000 +0900 --- postgresql_with_patch/src/backend/access/transam/xlog.c 2011-10-09 02:11:12.000000000 +0900 *************** *** 364,369 **** --- 364,372 ---- bool exclusiveBackup; int nonExclusiveBackups; XLogRecPtr lastBackupStart; + + /* the startup or the walwriter is logged to its own FPW */ + bool fullPageWrites; } XLogCtlInsert; /* *************** *** 453,458 **** --- 456,464 ---- bool recoveryPause; slock_t info_lck; /* locks shared variables shown above */ + + /* latest LSN that has recovered a WAL which fpw is changed 'off' */ + XLogRecPtr lastFpwDisabledLSN; } XLogCtlData; static XLogCtlData *XLogCtl = NULL; *************** *** 564,569 **** --- 570,578 ---- /* Have we launched bgwriter during recovery? */ static bool bgwriterLaunched = false; + /* */ + static bool master_fpw; + /* * Information logged when we detect a change in one of the parameters * important for Hot Standby. *************** *** 763,769 **** * don't yet have the insert lock, forcePageWrites could change under us, * but we'll recheck it once we have the lock. */ ! doPageWrites = fullPageWrites || Insert->forcePageWrites; INIT_CRC32(rdata_crc); len = 0; --- 772,778 ---- * don't yet have the insert lock, forcePageWrites could change under us, * but we'll recheck it once we have the lock. */ ! doPageWrites = Insert->fullPageWrites || Insert->forcePageWrites; INIT_CRC32(rdata_crc); len = 0; *************** *** 909,915 **** * just turned off, we could recompute the record without full pages, but * we choose not to bother.) */ ! if (Insert->forcePageWrites && !doPageWrites) { /* Oops, must redo it with full-page data */ LWLockRelease(WALInsertLock); --- 918,924 ---- * just turned off, we could recompute the record without full pages, but * we choose not to bother.) */ ! if ((Insert->fullPageWrites || Insert->forcePageWrites) && !doPageWrites) { /* Oops, must redo it with full-page data */ LWLockRelease(WALInsertLock); *************** *** 6370,6377 **** /* No need to hold ControlFileLock yet, we aren't up far enough */ UpdateControlFile(); ! /* initialize our local copy of minRecoveryPoint */ minRecoveryPoint = ControlFile->minRecoveryPoint; /* * Reset pgstat data, because it may be invalid after recovery. --- 6379,6387 ---- /* No need to hold ControlFileLock yet, we aren't up far enough */ UpdateControlFile(); ! /* initialize our local copy of minRecoveryPoint and fullPageWrites */ minRecoveryPoint = ControlFile->minRecoveryPoint; + master_fpw = ControlFile->checkPointCopy.fullPageWrites; /* * Reset pgstat data, because it may be invalid after recovery. *************** *** 6865,6870 **** --- 6875,6889 ---- /* Pre-scan prepared transactions to find out the range of XIDs present */ oldestActiveXID = PrescanPreparedTransactions(NULL, NULL); + /* + * the startup updates FPW after REDO. However, it must perform before writing + * the WAL of the CHECKPOINT. This is because of the need to update own fpw to + * shared memory before writing the WAL of its CHECKPOTNT. + */ + LocalSetXLogInsertAllowed(); + ReportFpwParameters(true); + LocalXLogInsertAllowed = -1; + if (InRecovery) { int rmid; *************** *** 7723,7728 **** --- 7742,7750 ---- checkPoint.ThisTimeLineID = ThisTimeLineID; + /* record current FPW to the WAL of the CHECKPOINT. */ + checkPoint.fullPageWrites = Insert->fullPageWrites; + /* * Compute new REDO record ptr = location of next XLOG record. * *************** *** 8636,8641 **** --- 8658,8676 ---- /* Check to see if any changes to max_connections give problems */ CheckRequiredParameterValues(); } + else if (info == XLOG_FPW_CHANGE) + { + /* use volatile pointer to prevent code rearrangement */ + volatile XLogCtlData *xlogctl = XLogCtl; + + memcpy(&master_fpw, XLogRecGetData(record), sizeof(master_fpw)); + + /* record the LSN when FPW is changed false on master */ + SpinLockAcquire(&xlogctl->info_lck); + if (!master_fpw) + xlogctl->lastFpwDisabledLSN = lsn; + SpinLockRelease(&xlogctl->info_lck); + } } void *************** *** 8650,8656 **** appendStringInfo(buf, "checkpoint: redo %X/%X; " "tli %u; xid %u/%u; oid %u; multi %u; offset %u; " ! "oldest xid %u in DB %u; oldest running xid %u; %s", checkpoint->redo.xlogid, checkpoint->redo.xrecoff, checkpoint->ThisTimeLineID, checkpoint->nextXidEpoch, checkpoint->nextXid, --- 8685,8691 ---- appendStringInfo(buf, "checkpoint: redo %X/%X; " "tli %u; xid %u/%u; oid %u; multi %u; offset %u; " ! "oldest xid %u in DB %u; oldest running xid %u; full_page_writes %s; %s", checkpoint->redo.xlogid, checkpoint->redo.xrecoff, checkpoint->ThisTimeLineID, checkpoint->nextXidEpoch, checkpoint->nextXid, *************** *** 8660,8665 **** --- 8695,8701 ---- checkpoint->oldestXid, checkpoint->oldestXidDB, checkpoint->oldestActiveXid, + checkpoint->fullPageWrites ? "true" : "false", (info == XLOG_CHECKPOINT_SHUTDOWN) ? "shutdown" : "online"); } else if (info == XLOG_NOOP) *************** *** 8717,8722 **** --- 8753,8766 ---- xlrec.max_locks_per_xact, wal_level_str); } + else if (info == XLOG_FPW_CHANGE) + { + bool fpw; + + memcpy(&fpw, rec, sizeof(fpw)); + appendStringInfo(buf, "fpw change: %s", + fpw ? "true" : "false"); + } else appendStringInfo(buf, "UNKNOWN"); } *************** *** 9089,9094 **** --- 9133,9149 ---- gotUniqueStartpoint = true; } while (!gotUniqueStartpoint); + /* + * check whether the master's FPW is 'off' when latest CHECKPOINT or + * since then. + */ + if (recovery_in_progress && + (!ControlFile->checkPointCopy.fullPageWrites || + XLByteLE(startpoint, XLogCtl->lastFpwDisabledLSN))) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("full_page_writes on master is set invalid more than once since latest checkpoint"))); + XLByteToSeg(startpoint, _logId, _logSeg); XLogFileName(xlogfilename, ThisTimeLineID, _logId, _logSeg); *************** *** 9372,9377 **** --- 9427,9438 ---- "though pg_start_backup() was executed during recovery"), errhint("The database backup will not be usable."))); + /* check whether the master's FPW is 'off' since pg_start_backup. */ + if (recovery_in_progress && XLByteLE(startpoint, XLogCtl->lastFpwDisabledLSN)) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("full_page_writes on master is set invalid more than once during online backup"))); + /* * During recovery, we don't write an end-of-backup record. We can * assume that pg_control was backed up just before pg_stop_backup() *************** *** 10743,10745 **** --- 10804,10856 ---- { SetLatch(&XLogCtl->recoveryWakeupLatch); } + + /* + * insert a WAL of XLOG_FPW_CHANGE or update to the shared memory if there + * is a change of FPW. However, always update when the startup have finished. + */ + void + ReportFpwParameters(bool startup_finish) + { + bool fpwReport = false; + bool fpwXLogInsert = true; + + if (startup_finish) + { + fpwReport = true; + if (master_fpw != fullPageWrites) + fpwXLogInsert = false; + } + else + { + LWLockAcquire(WALInsertLock, LW_EXCLUSIVE); + if (XLogCtl->Insert.fullPageWrites != fullPageWrites) + fpwReport = true; + LWLockRelease(WALInsertLock); + } + + if (fpwReport) + { + /* + * insert own fpw to a WAL. However, it does not perform + * when wal_level is not 'hotstandby' or fpw is same as shared-memory. + */ + if (XLogStandbyInfoActive() && fpwXLogInsert) + { + XLogRecData rdata; + bool record = fullPageWrites; + + rdata.buffer = InvalidBuffer; + rdata.data = (char *) &record; + rdata.len = sizeof(record); + rdata.next = NULL; + + XLogInsert(RM_XLOG_ID, XLOG_FPW_CHANGE, &rdata); + } + + /* update own fpw in shared-memory */ + LWLockAcquire(WALInsertLock, LW_EXCLUSIVE); + XLogCtl->Insert.fullPageWrites = fullPageWrites; + LWLockRelease(WALInsertLock); + } + } diff -rcN postgresql_with_9fujii_patch/src/backend/postmaster/walwriter.c postgresql_with_patch/src/backend/postmaster/walwriter.c *** postgresql_with_9fujii_patch/src/backend/postmaster/walwriter.c 2011-10-06 06:05:45.000000000 +0900 --- postgresql_with_patch/src/backend/postmaster/walwriter.c 2011-10-09 01:40:52.000000000 +0900 *************** *** 216,221 **** --- 216,227 ---- PG_SETMASK(&UnBlockSig); /* + * After the startup process, the walwriter manages the FPW. Because + * the walwriter may have not received a SIGHUP then, it updates the FPW. + */ + ReportFpwParameters(false); + + /* * Loop forever */ for (;;) *************** *** 236,241 **** --- 242,252 ---- { got_SIGHUP = false; ProcessConfigFile(PGC_SIGHUP); + /* + * the walwriter manages the FPW. When the walwriter has received + * a SIGHUP, it updates the FPW. + */ + ReportFpwParameters(false); } if (shutdown_requested) { diff -rcN postgresql_with_9fujii_patch/src/bin/pg_controldata/pg_controldata.c postgresql_with_patch/src/bin/pg_controldata/pg_controldata.c *** postgresql_with_9fujii_patch/src/bin/pg_controldata/pg_controldata.c 2011-10-06 06:06:19.000000000 +0900 --- postgresql_with_patch/src/bin/pg_controldata/pg_controldata.c 2011-10-09 01:40:52.000000000 +0900 *************** *** 224,229 **** --- 224,231 ---- ControlFile.checkPointCopy.oldestXidDB); printf(_("Latest checkpoint's oldestActiveXID: %u\n"), ControlFile.checkPointCopy.oldestActiveXid); + printf(_("Latest checkpoint's full_page_writes: %s\n"), + ControlFile.checkPointCopy.fullPageWrites ? "true" : "false"); printf(_("Time of latest checkpoint: %s\n"), ckpttime_str); printf(_("Minimum recovery ending location: %X/%X\n"), diff -rcN postgresql_with_9fujii_patch/src/bin/pg_resetxlog/pg_resetxlog.c postgresql_with_patch/src/bin/pg_resetxlog/pg_resetxlog.c *** postgresql_with_9fujii_patch/src/bin/pg_resetxlog/pg_resetxlog.c 2011-10-06 06:06:19.000000000 +0900 --- postgresql_with_patch/src/bin/pg_resetxlog/pg_resetxlog.c 2011-10-09 01:40:52.000000000 +0900 *************** *** 498,503 **** --- 498,504 ---- ControlFile.checkPointCopy.oldestXidDB = InvalidOid; ControlFile.checkPointCopy.time = (pg_time_t) time(NULL); ControlFile.checkPointCopy.oldestActiveXid = InvalidTransactionId; + ControlFile.checkPointCopy.fullPageWrites = true; ControlFile.state = DB_SHUTDOWNED; ControlFile.time = (pg_time_t) time(NULL); *************** *** 584,589 **** --- 585,592 ---- ControlFile.checkPointCopy.oldestXidDB); printf(_("Latest checkpoint's oldestActiveXID: %u\n"), ControlFile.checkPointCopy.oldestActiveXid); + printf(_("Latest checkpoint's full_page_writes: %s\n"), + ControlFile.checkPointCopy.fullPageWrites ? "true" : "false"); printf(_("Maximum data alignment: %u\n"), ControlFile.maxAlign); /* we don't print floatFormat since can't say much useful about it */ diff -rcN postgresql_with_9fujii_patch/src/include/access/xlog.h postgresql_with_patch/src/include/access/xlog.h *** postgresql_with_9fujii_patch/src/include/access/xlog.h 2011-10-06 06:05:45.000000000 +0900 --- postgresql_with_patch/src/include/access/xlog.h 2011-10-09 01:40:52.000000000 +0900 *************** *** 316,321 **** --- 316,322 ---- extern void StartupProcessMain(void); extern bool CheckPromoteSignal(void); extern void WakeupRecovery(void); + extern void ReportFpwParameters(bool startup_finish); /* * Starting/stopping a base backup diff -rcN postgresql_with_9fujii_patch/src/include/catalog/pg_control.h postgresql_with_patch/src/include/catalog/pg_control.h *** postgresql_with_9fujii_patch/src/include/catalog/pg_control.h 2011-10-06 06:06:19.000000000 +0900 --- postgresql_with_patch/src/include/catalog/pg_control.h 2011-10-09 01:40:51.000000000 +0900 *************** *** 49,54 **** --- 49,59 ---- * it's set to InvalidTransactionId. */ TransactionId oldestActiveXid; + + /* + * current FPW. It is used when executing pg_start_backup on hot standby. + */ + bool fullPageWrites; } CheckPoint; /* XLOG info values for XLOG rmgr */ *************** *** 60,65 **** --- 65,71 ---- #define XLOG_BACKUP_END 0x50 #define XLOG_PARAMETER_CHANGE 0x60 #define XLOG_RESTORE_POINT 0x70 + #define XLOG_FPW_CHANGE 0x80 /*