log-crashed-query-v2.patch
text/x-patch
Filename: log-crashed-query-v2.patch
Type: text/x-patch
Part: 0
Patch
Same data as JSON:
GET /api/v1/attachments/:id/patch
the parsed metadata as JSON — format, series position, per-file stats; never the diff bytes.
API reference →
Format: format-patch
Series: patch v2
Subject: Log crashed backend's query (activity string)
| File | + | − |
|---|---|---|
| src/backend/postmaster/pgstat.c | 59 | 0 |
| src/backend/postmaster/postmaster.c | 12 | 5 |
| src/backend/utils/adt/ascii.c | 31 | 0 |
| src/include/pgstat.h | 1 | 0 |
| src/include/utils/ascii.h | 1 | 0 |
From 0f46bb1357bafbe940e7df8fce38c01e2237f57e Mon Sep 17 00:00:00 2001
From: Marti Raudsepp <marti@juffo.org>
Date: Wed, 28 Sep 2011 00:46:48 +0300
Subject: [PATCH] Log crashed backend's query (activity string)
The crashing query is often a good starting point in debugging the
cause, and much more easily accessible than core dumps.
We're extra-paranoid in reading the activity buffer since it might be
corrupt. All non-ASCII characters are replaced with '?'
Example output:
LOG: server process (PID 31451) was terminated by signal 9: Killed
DETAIL: Running query: DO LANGUAGE plpythonu 'import os;os.kill(os.getpid(),9)'
---
src/backend/postmaster/pgstat.c | 59 +++++++++++++++++++++++++++++++++++
src/backend/postmaster/postmaster.c | 17 +++++++---
src/backend/utils/adt/ascii.c | 31 ++++++++++++++++++
src/include/pgstat.h | 1 +
src/include/utils/ascii.h | 1 +
5 files changed, 104 insertions(+), 5 deletions(-)
diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c
index eb9adc8..812bf04 100644
--- a/src/backend/postmaster/pgstat.c
+++ b/src/backend/postmaster/pgstat.c
@@ -58,6 +58,7 @@
#include "storage/pg_shmem.h"
#include "storage/pmsignal.h"
#include "storage/procsignal.h"
+#include "utils/ascii.h"
#include "utils/guc.h"
#include "utils/memutils.h"
#include "utils/ps_status.h"
@@ -2747,6 +2748,64 @@ pgstat_get_backend_current_activity(int pid, bool checkUser)
return "<backend information not available>";
}
+/* ----------
+ * pgstat_get_backend_current_activity() -
+ *
+ * Return a string representing the current activity of the backend with
+ * the specified PID. Like the function above, but reads shared memory with
+ * the expectation that it may be corrupt.
+ *
+ * This function is only intended to be used by postmaster to report the
+ * query that crashed the backend. In particular, no attempt is made to
+ * follow the correct concurrency protocol when accessing the
+ * BackendStatusArray. But that's OK, in the worst case we get a corrupted
+ * message. We also must take care not to trip on ereport(ERROR).
+ *
+ * Note: return strings for special cases match pg_stat_get_backend_activity.
+ * ----------
+ */
+const char *
+pgstat_get_crashed_backend_activity(int pid)
+{
+ volatile PgBackendStatus *beentry;
+ int i;
+
+ beentry = BackendStatusArray;
+ for (i = 1; i <= MaxBackends; i++)
+ {
+ if (beentry->st_procpid == pid)
+ {
+ /* Read pointer just once, so it can't change after validation */
+ const char *activity = beentry->st_activity;
+ char *buffer;
+
+ /*
+ * We can't access activity pointer before we verify that it
+ * falls into BackendActivityBuffer. To make sure that the
+ * string's ending is contained within the buffer, the string
+ * can start at offset (MaxBackends - 1) at most.
+ */
+ if (activity < BackendActivityBuffer ||
+ activity > (BackendActivityBuffer +
+ (MaxBackends - 1) * pgstat_track_activity_query_size))
+ return "<command string corrupt>";
+
+ if (*(activity) == '\0')
+ return "<command string not enabled>";
+
+ buffer = (char *) palloc(pgstat_track_activity_query_size);
+ ascii_safe_strncpy(buffer, activity,
+ pgstat_track_activity_query_size);
+
+ return buffer;
+ }
+
+ beentry++;
+ }
+
+ /* PID not found */
+ return "<backend information not available>";
+}
/* ------------------------------------------------------------
* Local support functions follow
diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c
index 94b57fa..9ba622c 100644
--- a/src/backend/postmaster/postmaster.c
+++ b/src/backend/postmaster/postmaster.c
@@ -2763,6 +2763,8 @@ HandleChildCrash(int pid, int exitstatus, const char *procname)
static void
LogChildExit(int lev, const char *procname, int pid, int exitstatus)
{
+ const char *activity = pgstat_get_crashed_backend_activity(pid);
+
if (WIFEXITED(exitstatus))
ereport(lev,
@@ -2770,7 +2772,8 @@ LogChildExit(int lev, const char *procname, int pid, int exitstatus)
translator: %s is a noun phrase describing a child process, such as
"server process" */
(errmsg("%s (PID %d) exited with exit code %d",
- procname, pid, WEXITSTATUS(exitstatus))));
+ procname, pid, WEXITSTATUS(exitstatus)),
+ errdetail("Running query: %s", activity)));
else if (WIFSIGNALED(exitstatus))
#if defined(WIN32)
ereport(lev,
@@ -2780,7 +2783,8 @@ LogChildExit(int lev, const char *procname, int pid, int exitstatus)
"server process" */
(errmsg("%s (PID %d) was terminated by exception 0x%X",
procname, pid, WTERMSIG(exitstatus)),
- errhint("See C include file \"ntstatus.h\" for a description of the hexadecimal value.")));
+ errhint("See C include file \"ntstatus.h\" for a description of the hexadecimal value."),
+ errdetail("Running query: %s", activity)));
#elif defined(HAVE_DECL_SYS_SIGLIST) && HAVE_DECL_SYS_SIGLIST
ereport(lev,
@@ -2790,7 +2794,8 @@ LogChildExit(int lev, const char *procname, int pid, int exitstatus)
(errmsg("%s (PID %d) was terminated by signal %d: %s",
procname, pid, WTERMSIG(exitstatus),
WTERMSIG(exitstatus) < NSIG ?
- sys_siglist[WTERMSIG(exitstatus)] : "(unknown)")));
+ sys_siglist[WTERMSIG(exitstatus)] : "(unknown)"),
+ errdetail("Running query: %s", activity)));
#else
ereport(lev,
@@ -2798,7 +2803,8 @@ LogChildExit(int lev, const char *procname, int pid, int exitstatus)
translator: %s is a noun phrase describing a child process, such as
"server process" */
(errmsg("%s (PID %d) was terminated by signal %d",
- procname, pid, WTERMSIG(exitstatus))));
+ procname, pid, WTERMSIG(exitstatus)),
+ errdetail("Running query: %s", activity)));
#endif
else
ereport(lev,
@@ -2807,7 +2813,8 @@ LogChildExit(int lev, const char *procname, int pid, int exitstatus)
translator: %s is a noun phrase describing a child process, such as
"server process" */
(errmsg("%s (PID %d) exited with unrecognized status %d",
- procname, pid, exitstatus)));
+ procname, pid, exitstatus),
+ errdetail("Running query: %s", activity)));
}
/*
diff --git a/src/backend/utils/adt/ascii.c b/src/backend/utils/adt/ascii.c
index 7ecf494..d59f6ec 100644
--- a/src/backend/utils/adt/ascii.c
+++ b/src/backend/utils/adt/ascii.c
@@ -158,3 +158,34 @@ to_ascii_default(PG_FUNCTION_ARGS)
PG_RETURN_TEXT_P(encode_to_ascii(data, enc));
}
+
+/* ----------
+ * convert a string in unknown encoding to ASCII. Replace non-ASCII bytes
+ * with '?'
+ * This must not trigger ereport(ERROR), as this is called from postmaster.
+ * ----------
+ */
+void
+ascii_safe_strncpy(char *dest, const char *src, int len)
+{
+ int i;
+
+ for (i = 0; i < (len - 1); i++)
+ {
+ char ch = src[i];
+
+ if (ch == '\0')
+ break;
+ /* Keep printable ASCII characters */
+ if (32 <= ch && ch <= 127)
+ dest[i] = src[i];
+ /* White-space is also OK */
+ else if (ch == '\n' || ch == '\r' || ch == '\t')
+ dest[i] = src[i];
+ /* Everything else is replaced with '?' */
+ else
+ dest[i] = '?';
+ }
+
+ dest[i] = '\0';
+}
diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index 20c4d43..9694896 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -720,6 +720,7 @@ extern void pgstat_report_appname(const char *appname);
extern void pgstat_report_xact_timestamp(TimestampTz tstamp);
extern void pgstat_report_waiting(bool waiting);
extern const char *pgstat_get_backend_current_activity(int pid, bool checkUser);
+extern const char *pgstat_get_crashed_backend_activity(int pid);
extern PgStat_TableStatus *find_tabstat_entry(Oid rel_id);
extern PgStat_BackendFunctionEntry *find_funcstat_entry(Oid func_id);
diff --git a/src/include/utils/ascii.h b/src/include/utils/ascii.h
index ad70e03..adeddb5 100644
--- a/src/include/utils/ascii.h
+++ b/src/include/utils/ascii.h
@@ -16,5 +16,6 @@
extern Datum to_ascii_encname(PG_FUNCTION_ARGS);
extern Datum to_ascii_enc(PG_FUNCTION_ARGS);
extern Datum to_ascii_default(PG_FUNCTION_ARGS);
+extern void ascii_safe_strncpy(char *dest, const char *src, int len);
#endif /* _ASCII_H_ */
--
1.7.6.3