From edbe7493547c7e2a83847acf4e34c72ea1fd7575 Mon Sep 17 00:00:00 2001 From: Robert Haas Date: Thu, 27 Feb 2020 17:46:43 +0530 Subject: [PATCH v8 4/5] Modify server code to generate backup manifest in JSON format. This will eventually get merged into the previous patch to add backup manifest functionality to the server, but I'm keeping it separate for now because I don't have the validator working with this format yet. --- src/backend/replication/basebackup.c | 156 +++++++++++++-------------- 1 file changed, 78 insertions(+), 78 deletions(-) diff --git a/src/backend/replication/basebackup.c b/src/backend/replication/basebackup.c index 99e102b2a7..56a10ae259 100644 --- a/src/backend/replication/basebackup.c +++ b/src/backend/replication/basebackup.c @@ -40,6 +40,7 @@ #include "storage/ipc.h" #include "storage/reinit.h" #include "utils/builtins.h" +#include "utils/json.h" #include "utils/ps_status.h" #include "utils/relcache.h" #include "utils/resowner.h" @@ -63,6 +64,7 @@ struct manifest_info pg_checksum_type checksum_type; pg_sha256_ctx manifest_ctx; uint64 manifest_size; + bool first_file; bool still_checksumming; }; @@ -87,7 +89,6 @@ static void AddFileToManifest(manifest_info *manifest, const char *spcoid, const char *pathname, size_t size, time_t mtime, pg_checksum_context *checksum_ctx); static void SendBackupManifest(manifest_info *manifest); -static char *escape_field_for_manifest(const char *s); static void perform_base_backup(basebackup_options *opt); static void parse_basebackup_options(List *options, basebackup_options *opt); static void SendXlogRecPtrResult(XLogRecPtr ptr, TimeLineID tli); @@ -947,9 +948,12 @@ InitializeManifest(manifest_info *manifest, pg_checksum_type checksum_type) manifest->checksum_type = checksum_type; pg_sha256_init(&manifest->manifest_ctx); manifest->manifest_size = UINT64CONST(0); + manifest->first_file = true; manifest->still_checksumming = true; - AppendToManifest(manifest, "PostgreSQL-Backup-Manifest-Version\t1\n"); + AppendToManifest(manifest, + "{ \"PostgreSQL-Backup-Manifest-Version\": 1,\n" + "\"Files\": ["); } /* @@ -980,8 +984,8 @@ AddFileToManifest(manifest_info *manifest, const char *spcoid, pg_checksum_context *checksum_ctx) { char pathbuf[MAXPGPATH]; - char *escaped_filename; - static char timebuf[128]; + int pathlen; + StringInfoData buf; /* * If this file is part of a tablespace, the filename passed to this @@ -991,44 +995,86 @@ AddFileToManifest(manifest_info *manifest, const char *spcoid, */ if (spcoid != NULL) { - snprintf(pathbuf, sizeof(pathbuf), "pg_tblspc/%s/%s", spcoid, filename); - filename = pathbuf; + snprintf(pathbuf, sizeof(pathbuf), "pg_tblspc/%s/%s", spcoid, + pathname); + pathname = pathbuf; } - /* Escape filename, if necessary. */ - escaped_filename = escape_field_for_manifest(filename); + /* + * Each file's entry need to be separated from any entry that follows + * by a comma, but there's no comma before the first one or after the + * last one. To make that work, adding a file to the manifest starts + * by terminating the most recently added line, with a comma if + * appropriate, but does not terminate the line inserted for this file. + */ + initStringInfo(&buf); + if (manifest->first_file) + { + appendStringInfoString(&buf, "\n"); + manifest->first_file = false; + } + else + appendStringInfoString(&buf, ",\n"); + + /* + * Write the relative pathname to this file out to the manifest. The + * manifest is always stored in UTF-8, so we have to encode paths that + * are not valid in that encoding. + */ + pathlen = strlen(pathname); + if (pg_verify_mbstr(PG_UTF8, pathname, pathlen, true)) + { + appendStringInfoString(&buf, "{ \"Path\": "); + escape_json(&buf, pathname); + appendStringInfoString(&buf, ", "); + } + else + { + appendStringInfoString(&buf, "{ \"Encoded-Path\": \""); + enlargeStringInfo(&buf, 2 * pathlen); + buf.len += hex_encode((char *) pathname, pathlen, + &buf.data[buf.len]); + appendStringInfoString(&buf, "\", "); + } + + appendStringInfo(&buf, "\"Size\": %zu, ", size); /* - * Convert time to a string. Since it's not clear what time zone to use - * and since time zone definitions can change, possibly causing confusion, - * use GMT always. + * Convert last modification time to a string and append it to the + * manifest. Since it's not clear what time zone to use and since time + * zone definitions can change, possibly causing confusion, use GMT always. */ - pg_strftime(timebuf, sizeof(timebuf), "%Y-%m-%d %H:%M:%S %Z", - pg_gmtime(&mtime)); + appendStringInfoString(&buf, "\"Last-Modified\": \""); + enlargeStringInfo(&buf, 128); + buf.len += pg_strftime(&buf.data[buf.len], 128, "%Y-%m-%d %H:%M:%S %Z", + pg_gmtime(&mtime)); + appendStringInfoString(&buf, "\""); - /* Add to manifest. */ - AppendToManifest(manifest, "File\t%s\t%zu\t%s\t%s", - escaped_filename == NULL ? filename : escaped_filename, - size, timebuf, pg_checksum_type_name(checksum_ctx->type)); + /* Add checksum information. */ if (checksum_ctx->type != CHECKSUM_TYPE_NONE) { uint8 checksumbuf[PG_CHECKSUM_MAX_LENGTH]; - char checksumstringbuf[PG_CHECKSUM_MAX_LENGTH * 2 + 1]; int checksumlen; - int checksumstringlen; - AppendStringToManifest(manifest, ":"); checksumlen = pg_checksum_final(checksum_ctx, checksumbuf); - checksumstringlen = hex_encode((char *) checksumbuf, checksumlen, - checksumstringbuf); - checksumstringbuf[checksumstringlen] = '\0'; - AppendStringToManifest(manifest, checksumstringbuf); + + appendStringInfo(&buf, + ", \"Checksum-Algorithm\": \"%s\", \"Checksum\": \"", + pg_checksum_type_name(checksum_ctx->type)); + enlargeStringInfo(&buf, 2 * checksumlen); + buf.len += hex_encode((char *) checksumbuf, checksumlen, + &buf.data[buf.len]); + appendStringInfoString(&buf, "\""); } - AppendStringToManifest(manifest, "\n"); + + /* Close out the object. */ + appendStringInfoString(&buf, " }"); + + /* OK, add it to the manifest. */ + AppendStringToManifest(manifest, buf.data); /* Avoid leaking memory. */ - if (escaped_filename != NULL) - pfree(escaped_filename); + pfree(buf.data); } /* @@ -1042,6 +1088,9 @@ SendBackupManifest(manifest_info *manifest) char checksumstringbuf[PG_SHA256_DIGEST_STRING_LENGTH]; size_t manifest_bytes_done = 0; + /* Terminate the list of files. */ + AppendStringToManifest(manifest, "],\n"); + /* * Append manifest checksum, so that the problems with the manifest itself * can be detected. @@ -1055,11 +1104,11 @@ SendBackupManifest(manifest_info *manifest) */ manifest->still_checksumming = false; pg_sha256_final(&manifest->manifest_ctx, checksumbuf); - AppendStringToManifest(manifest, "Manifest-Checksum\t"); + AppendStringToManifest(manifest, "\"Manifest-Checksum\": \""); hex_encode((char *) checksumbuf, sizeof checksumbuf, checksumstringbuf); checksumstringbuf[PG_SHA256_DIGEST_STRING_LENGTH - 1] = '\0'; AppendStringToManifest(manifest, checksumstringbuf); - AppendStringToManifest(manifest, "\n"); + AppendStringToManifest(manifest, "\"}\n"); /* * We've written all the data to the manifest file. Rewind the file so @@ -1107,55 +1156,6 @@ SendBackupManifest(manifest_info *manifest) BufFileClose(manifest->buffile); } -/* - * Escape a field for inclusion in a manifest. - * - * We use the following escaping rule: If a field contains \t, \r, or \n, - * the field must be surrounded by double-quotes, and any internal double - * quotes must be doubled. Otherwise, no escaping is required. - * - * The return value is a new palloc'd string with escaping added, or NULL - * if no escaping is required. - */ -static char * -escape_field_for_manifest(const char *s) -{ - bool escaping_required = false; - int escaped_length = 2; - const char *t; - char *result; - char *r; - - for (t = s; *t != '\0'; ++t) - { - if (*t == '\t' || *t == '\r' || *t == '\n') - escaping_required = true; - if (*t == '"') - ++escaped_length; - ++escaped_length; - } - - if (!escaping_required) - return NULL; - - result = palloc(escaped_length + 1); - result[0] = '"'; - result[escaped_length - 1] = '"'; - result[escaped_length] = '\0'; - r = result + 1; - - for (t = s; *t != '\0'; ++t) - { - *(r++) = *t; - if (*t == '"') - *(r++) = *t; - } - - Assert(r == &result[escaped_length - 1]); - - return result; -} - /* * Send a single resultset containing just a single * XLogRecPtr record (in text format) -- 2.17.2 (Apple Git-113)