0001-refactor-detoast-to-use-decoded-external-toast-abstr.patch
application/octet-stream
Filename: 0001-refactor-detoast-to-use-decoded-external-toast-abstr.patch
Type: application/octet-stream
Part: 1
From 49939fe3416803d446ba43c404eae9711a3ae21b Mon Sep 17 00:00:00 2001
From: Dharin Shah <8616130+Dharin-shah@users.noreply.github.com>
Date: Sun, 28 Dec 2025 23:21:21 +0100
Subject: [PATCH v1] refactor detoast to use decoded external toast abstraction
---
src/backend/access/common/detoast.c | 326 ++++++++++++++++++++-------
src/include/access/toast_internals.h | 39 ++++
2 files changed, 278 insertions(+), 87 deletions(-)
diff --git a/src/backend/access/common/detoast.c b/src/backend/access/common/detoast.c
index 62651787742..fdade574913 100644
--- a/src/backend/access/common/detoast.c
+++ b/src/backend/access/common/detoast.c
@@ -29,6 +29,70 @@ static struct varlena *toast_fetch_datum_slice(struct varlena *attr,
static struct varlena *toast_decompress_datum(struct varlena *attr);
static struct varlena *toast_decompress_datum_slice(struct varlena *attr, int32 slicelength);
+static struct varlena *toast_fetch_datum_decoded(const DecodedExternalToast *decoded);
+static struct varlena *toast_decompress_decoded(const struct varlena *compressed,
+ const DecodedExternalToast *decoded);
+
+/* ----------
+ * decode_external_toast_pointer -
+ *
+ * Decode external varlena into DecodedExternalToast struct.
+ * Only handles VARTAG_ONDISK with known compression methods.
+ * Returns false for INDIRECT, EXPANDED, or unrecognized compression.
+ * ----------
+ */
+static bool
+decode_external_toast_pointer(const struct varlena *attr,
+ DecodedExternalToast *decoded)
+{
+ struct varatt_external toast_pointer;
+ vartag_external tag;
+
+ Assert(VARATT_IS_EXTERNAL(attr));
+
+ tag = VARTAG_EXTERNAL(attr);
+
+ if (tag == VARTAG_ONDISK)
+ {
+ VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
+
+ decoded->toastrelid = toast_pointer.va_toastrelid;
+ decoded->valueid = toast_pointer.va_valueid;
+ decoded->rawsize = toast_pointer.va_rawsize;
+ decoded->extsize = VARATT_EXTERNAL_GET_EXTSIZE(toast_pointer);
+ decoded->flags = TOAST_EXT_IS_ONDISK;
+
+ if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer))
+ {
+ uint32 cmid = VARATT_EXTERNAL_GET_COMPRESS_METHOD(toast_pointer);
+
+ switch (cmid)
+ {
+ case TOAST_PGLZ_COMPRESSION_ID:
+ decoded->method = TOAST_DECOMP_PGLZ;
+ decoded->flags |= TOAST_EXT_HAS_TCINFO;
+ break;
+ case TOAST_LZ4_COMPRESSION_ID:
+ decoded->method = TOAST_DECOMP_LZ4;
+ decoded->flags |= TOAST_EXT_HAS_TCINFO;
+ break;
+ default:
+ /* Unknown compression - let caller fall back */
+ return false;
+ }
+ }
+ else
+ {
+ decoded->method = TOAST_DECOMP_NONE;
+ }
+
+ return true;
+ }
+
+ /* Not an on-disk datum (INDIRECT, EXPANDED, etc.) */
+ return false;
+}
+
/* ----------
* detoast_external_attr -
*
@@ -115,57 +179,78 @@ detoast_external_attr(struct varlena *attr)
struct varlena *
detoast_attr(struct varlena *attr)
{
- if (VARATT_IS_EXTERNAL_ONDISK(attr))
+ if (VARATT_IS_EXTERNAL(attr))
{
- /*
- * This is an externally stored datum --- fetch it back from there
- */
- attr = toast_fetch_datum(attr);
- /* If it's compressed, decompress it */
- if (VARATT_IS_COMPRESSED(attr))
+ DecodedExternalToast decoded;
+
+ if (decode_external_toast_pointer(attr, &decoded))
{
- struct varlena *tmp = attr;
+ struct varlena *fetched = toast_fetch_datum_decoded(&decoded);
+
+ if (decoded.method != TOAST_DECOMP_NONE)
+ {
+ struct varlena *result = toast_decompress_decoded(fetched, &decoded);
+ pfree(fetched);
+ return result;
+ }
+ return fetched;
+ }
- attr = toast_decompress_datum(tmp);
- pfree(tmp);
+ /* Decode failed: INDIRECT, EXPANDED, or unrecognized compression */
+ if (VARATT_IS_EXTERNAL_ONDISK(attr))
+ {
+ /* Unrecognized compression - legacy path preserves error behavior */
+ attr = toast_fetch_datum(attr);
+ if (VARATT_IS_COMPRESSED(attr))
+ {
+ struct varlena *tmp = attr;
+
+ attr = toast_decompress_datum(tmp);
+ pfree(tmp);
+ }
+ return attr;
}
- }
- else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
- {
- /*
- * This is an indirect pointer --- dereference it
- */
- struct varatt_indirect redirect;
+ else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
+ {
+ /*
+ * This is an indirect pointer --- dereference it
+ */
+ struct varatt_indirect redirect;
- VARATT_EXTERNAL_GET_POINTER(redirect, attr);
- attr = (struct varlena *) redirect.pointer;
+ VARATT_EXTERNAL_GET_POINTER(redirect, attr);
+ attr = (struct varlena *) redirect.pointer;
- /* nested indirect Datums aren't allowed */
- Assert(!VARATT_IS_EXTERNAL_INDIRECT(attr));
+ /* nested indirect Datums aren't allowed */
+ Assert(!VARATT_IS_EXTERNAL_INDIRECT(attr));
- /* recurse in case value is still extended in some other way */
- attr = detoast_attr(attr);
+ /* recurse in case value is still extended in some other way */
+ attr = detoast_attr(attr);
- /* if it isn't, we'd better copy it */
- if (attr == (struct varlena *) redirect.pointer)
- {
- struct varlena *result;
+ /* if it isn't, we'd better copy it */
+ if (attr == (struct varlena *) redirect.pointer)
+ {
+ struct varlena *result;
- result = (struct varlena *) palloc(VARSIZE_ANY(attr));
- memcpy(result, attr, VARSIZE_ANY(attr));
- attr = result;
+ result = (struct varlena *) palloc(VARSIZE_ANY(attr));
+ memcpy(result, attr, VARSIZE_ANY(attr));
+ attr = result;
+ }
+ return attr;
+ }
+ else if (VARATT_IS_EXTERNAL_EXPANDED(attr))
+ {
+ /*
+ * This is an expanded-object pointer --- get flat format
+ */
+ attr = detoast_external_attr(attr);
+ /* flatteners are not allowed to produce compressed/short output */
+ Assert(!VARATT_IS_EXTENDED(attr));
+ return attr;
}
}
- else if (VARATT_IS_EXTERNAL_EXPANDED(attr))
- {
- /*
- * This is an expanded-object pointer --- get flat format
- */
- attr = detoast_external_attr(attr);
- /* flatteners are not allowed to produce compressed/short output */
- Assert(!VARATT_IS_EXTENDED(attr));
- }
- else if (VARATT_IS_COMPRESSED(attr))
+
+ /* Handle inline cases (not external) */
+ if (VARATT_IS_COMPRESSED(attr))
{
/*
* This is a compressed value inside of the main tuple
@@ -223,63 +308,75 @@ detoast_attr_slice(struct varlena *attr,
else if (pg_add_s32_overflow(sliceoffset, slicelength, &slicelimit))
slicelength = slicelimit = -1;
- if (VARATT_IS_EXTERNAL_ONDISK(attr))
+ if (VARATT_IS_EXTERNAL(attr))
{
- struct varatt_external toast_pointer;
+ DecodedExternalToast decoded;
- VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
+ if (decode_external_toast_pointer(attr, &decoded))
+ {
+ if (decoded.method == TOAST_DECOMP_NONE)
+ return toast_fetch_datum_slice(attr, sliceoffset, slicelength);
+
+ /* Compressed: fetch enough to decompress the requested prefix */
+ if (slicelimit >= 0)
+ {
+ int32 max_size = decoded.extsize;
+
+ /* PGLZ supports partial decompression; LZ4 needs all data */
+ if (decoded.method == TOAST_DECOMP_PGLZ)
+ max_size = pglz_maximum_compressed_size(slicelimit, max_size);
+
+ preslice = toast_fetch_datum_slice(attr, 0, max_size);
+ }
+ else
+ preslice = toast_fetch_datum_decoded(&decoded);
+ }
+ else if (VARATT_IS_EXTERNAL_ONDISK(attr))
+ {
+ /* Unrecognized compression - legacy path */
+ struct varatt_external toast_pointer;
- /* fast path for non-compressed external datums */
- if (!VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer))
- return toast_fetch_datum_slice(attr, sliceoffset, slicelength);
+ VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
- /*
- * For compressed values, we need to fetch enough slices to decompress
- * at least the requested part (when a prefix is requested).
- * Otherwise, just fetch all slices.
- */
- if (slicelimit >= 0)
- {
- int32 max_size = VARATT_EXTERNAL_GET_EXTSIZE(toast_pointer);
+ if (!VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer))
+ return toast_fetch_datum_slice(attr, sliceoffset, slicelength);
- /*
- * Determine maximum amount of compressed data needed for a prefix
- * of a given length (after decompression).
- *
- * At least for now, if it's LZ4 data, we'll have to fetch the
- * whole thing, because there doesn't seem to be an API call to
- * determine how much compressed data we need to be sure of being
- * able to decompress the required slice.
- */
- if (VARATT_EXTERNAL_GET_COMPRESS_METHOD(toast_pointer) ==
- TOAST_PGLZ_COMPRESSION_ID)
- max_size = pglz_maximum_compressed_size(slicelimit, max_size);
+ if (slicelimit >= 0)
+ {
+ int32 max_size = VARATT_EXTERNAL_GET_EXTSIZE(toast_pointer);
- /*
- * Fetch enough compressed slices (compressed marker will get set
- * automatically).
- */
- preslice = toast_fetch_datum_slice(attr, 0, max_size);
+ if (VARATT_EXTERNAL_GET_COMPRESS_METHOD(toast_pointer) ==
+ TOAST_PGLZ_COMPRESSION_ID)
+ max_size = pglz_maximum_compressed_size(slicelimit, max_size);
+
+ preslice = toast_fetch_datum_slice(attr, 0, max_size);
+ }
+ else
+ preslice = toast_fetch_datum(attr);
}
- else
- preslice = toast_fetch_datum(attr);
- }
- else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
- {
- struct varatt_indirect redirect;
+ else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
+ {
+ struct varatt_indirect redirect;
- VARATT_EXTERNAL_GET_POINTER(redirect, attr);
+ VARATT_EXTERNAL_GET_POINTER(redirect, attr);
- /* nested indirect Datums aren't allowed */
- Assert(!VARATT_IS_EXTERNAL_INDIRECT(redirect.pointer));
+ /* nested indirect Datums aren't allowed */
+ Assert(!VARATT_IS_EXTERNAL_INDIRECT(redirect.pointer));
- return detoast_attr_slice(redirect.pointer,
- sliceoffset, slicelength);
- }
- else if (VARATT_IS_EXTERNAL_EXPANDED(attr))
- {
- /* pass it off to detoast_external_attr to flatten */
- preslice = detoast_external_attr(attr);
+ return detoast_attr_slice(redirect.pointer,
+ sliceoffset, slicelength);
+ }
+ else if (VARATT_IS_EXTERNAL_EXPANDED(attr))
+ {
+ /* pass it off to detoast_external_attr to flatten */
+ preslice = detoast_external_attr(attr);
+ }
+ else
+ {
+ /* Should not reach here - unknown external type */
+ elog(ERROR, "unexpected external varlena type");
+ preslice = attr; /* keep compiler quiet */
+ }
}
else
preslice = attr;
@@ -462,6 +559,61 @@ toast_fetch_datum_slice(struct varlena *attr, int32 sliceoffset,
return result;
}
+/* ----------
+ * toast_fetch_datum_decoded -
+ *
+ * Fetch TOAST data using pre-decoded pointer info.
+ * ----------
+ */
+static struct varlena *
+toast_fetch_datum_decoded(const DecodedExternalToast *decoded)
+{
+ Relation toastrel;
+ struct varlena *result;
+ int32 attrsize = decoded->extsize;
+
+ result = (struct varlena *) palloc(attrsize + VARHDRSZ);
+
+ /* HAS_TCINFO determines header format, not "is compressed" */
+ if (decoded->flags & TOAST_EXT_HAS_TCINFO)
+ SET_VARSIZE_COMPRESSED(result, attrsize + VARHDRSZ);
+ else
+ SET_VARSIZE(result, attrsize + VARHDRSZ);
+
+ if (attrsize == 0)
+ return result;
+
+ toastrel = table_open(decoded->toastrelid, AccessShareLock);
+ table_relation_fetch_toast_slice(toastrel, decoded->valueid,
+ attrsize, 0, attrsize, result);
+ table_close(toastrel, AccessShareLock);
+
+ return result;
+}
+
+/* ----------
+ * toast_decompress_decoded -
+ *
+ * Decompress TOAST data using decoded method.
+ * ----------
+ */
+static struct varlena *
+toast_decompress_decoded(const struct varlena *compressed,
+ const DecodedExternalToast *decoded)
+{
+ switch (decoded->method)
+ {
+ case TOAST_DECOMP_PGLZ:
+ return pglz_decompress_datum(compressed);
+ case TOAST_DECOMP_LZ4:
+ return lz4_decompress_datum(compressed);
+ case TOAST_DECOMP_NONE:
+ default:
+ elog(ERROR, "unexpected decompression method %d", decoded->method);
+ return NULL;
+ }
+}
+
/* ----------
* toast_decompress_datum -
*
diff --git a/src/include/access/toast_internals.h b/src/include/access/toast_internals.h
index 06ae8583c1e..0245f67b246 100644
--- a/src/include/access/toast_internals.h
+++ b/src/include/access/toast_internals.h
@@ -16,6 +16,45 @@
#include "storage/lockdefs.h"
#include "utils/relcache.h"
#include "utils/snapshot.h"
+#include "varatt.h"
+
+/*
+ * Decompression method for decoded toast pointers. Separate from
+ * ToastCompressionId (2-bit on-disk encoding) to allow future methods.
+ */
+typedef enum ToastDecompressMethod
+{
+ TOAST_DECOMP_NONE = 0,
+ TOAST_DECOMP_PGLZ = 1,
+ TOAST_DECOMP_LZ4 = 2
+} ToastDecompressMethod;
+
+/*
+ * Flags for DecodedExternalToast.
+ *
+ * HAS_TCINFO: Payload starts with tcinfo header. True for PGLZ/LZ4 external;
+ * false for uncompressed or future methods storing raw compressed data.
+ * IS_ONDISK: Set for VARTAG_ONDISK (for future vartag extension).
+ */
+#define TOAST_EXT_HAS_TCINFO 0x01
+#define TOAST_EXT_IS_ONDISK 0x02
+
+/*
+ * Decoded representation of an external on-disk TOAST pointer.
+ * Normalizes vartag/va_extinfo variations; decode once, use throughout.
+ *
+ * HAS_TCINFO indicates payload format (has tcinfo header), distinct from
+ * "is compressed" (extsize < rawsize) - future methods may omit tcinfo.
+ */
+typedef struct DecodedExternalToast
+{
+ Oid toastrelid;
+ Oid valueid;
+ uint32 rawsize; /* Decompressed size; for future methods without tcinfo */
+ uint32 extsize; /* On-disk payload size */
+ ToastDecompressMethod method;
+ uint8 flags;
+} DecodedExternalToast;
/*
* The information at the start of the compressed toast data.
--
2.39.3 (Apple Git-146)