Thread

  1. [PATCH] Restructure repack worker teardown

    Álvaro Herrera <alvherre@kurilemu.de> — 2026-05-18T17:13:24Z

    The original code would leave a shared memory segment unreleased if we
    fail partway through initialization.  Change the shutdown order so that
    we already free it.
    
    Author: Álvaro Herrera <alvherre@kurilemu.de>
    Discussion: https://postgr.es/m/agtNn6ZCmdI2KJFn@alvherre.pgsql
    ---
     src/backend/commands/repack.c | 67 ++++++++++++++++-------------------
     1 file changed, 31 insertions(+), 36 deletions(-)
    
    diff --git a/src/backend/commands/repack.c b/src/backend/commands/repack.c
    index bfc62c8f752..c9064d8fd13 100644
    --- a/src/backend/commands/repack.c
    +++ b/src/backend/commands/repack.c
    @@ -3411,10 +3411,14 @@ start_repack_decoding_worker(Oid relid)
     	shm_mq_handle *mqh;
     	BackgroundWorker bgw;
     
    +	decoding_worker = palloc0_object(DecodingWorker);
    +
     	/* Setup shared memory. */
     	size = BUFFERALIGN(offsetof(DecodingWorkerShared, error_queue)) +
     		BUFFERALIGN(REPACK_ERROR_QUEUE_SIZE);
     	seg = dsm_create(size, 0);
    +	decoding_worker->seg = seg;
    +
     	shared = (DecodingWorkerShared *) dsm_segment_address(seg);
     	shared->initialized = false;
     	shared->lsn_upto = InvalidXLogRecPtr;
    @@ -3454,14 +3458,12 @@ start_repack_decoding_worker(Oid relid)
     	bgw.bgw_main_arg = UInt32GetDatum(dsm_segment_handle(seg));
     	bgw.bgw_notify_pid = MyProcPid;
     
    -	decoding_worker = palloc0_object(DecodingWorker);
     	if (!RegisterDynamicBackgroundWorker(&bgw, &decoding_worker->handle))
     		ereport(ERROR,
     				errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED),
     				errmsg("out of background worker slots"),
     				errhint("You might need to increase \"%s\".", "max_worker_processes"));
     
    -	decoding_worker->seg = seg;
     	decoding_worker->error_mqh = mqh;
     
     	/*
    @@ -3487,17 +3489,6 @@ start_repack_decoding_worker(Oid relid)
     	ConditionVariableCancelSleep();
     }
     
    -/*
    - * PG_ENSURE_ERROR_CLEANUP callback to stop the decoding worker.
    - * This ensures the worker is terminated on both ERROR and FATAL exits,
    - * unlike PG_FINALLY which only handles ERROR.
    - */
    -static void
    -repack_decoding_worker_cleanup_cb(int code, Datum arg)
    -{
    -	stop_repack_decoding_worker();
    -}
    -
     /*
      * Stop the decoding worker and cleanup the related resources.
      *
    @@ -3508,39 +3499,43 @@ static void
     stop_repack_decoding_worker(void)
     {
     	BgwHandleStatus status;
    +	dsm_segment	   *dsmseg;
     
    -	/* Haven't reached the worker startup? */
    +	/* Nothing to do if no worker was set up. */
     	if (decoding_worker == NULL)
     		return;
     
    -	/* Could not register the worker? */
    -	if (decoding_worker->handle == NULL)
    -		return;
    +	/* Terminate the worker process, if one is running. */
    +	if (decoding_worker->handle != NULL)
    +	{
    +		TerminateBackgroundWorker(decoding_worker->handle);
    +		/* The worker should really exit before the REPACK command does. */
    +		HOLD_INTERRUPTS();
    +		status = WaitForBackgroundWorkerShutdown(decoding_worker->handle);
    +		RESUME_INTERRUPTS();
     
    -	TerminateBackgroundWorker(decoding_worker->handle);
    -	/* The worker should really exit before the REPACK command does. */
    -	HOLD_INTERRUPTS();
    -	status = WaitForBackgroundWorkerShutdown(decoding_worker->handle);
    -	RESUME_INTERRUPTS();
    -
    -	if (status == BGWH_POSTMASTER_DIED)
    -		ereport(FATAL,
    -				errcode(ERRCODE_ADMIN_SHUTDOWN),
    -				errmsg("postmaster exited during REPACK command"));
    -
    -	shm_mq_detach(decoding_worker->error_mqh);
    +		if (status == BGWH_POSTMASTER_DIED)
    +			ereport(FATAL,
    +					errcode(ERRCODE_ADMIN_SHUTDOWN),
    +					errmsg("postmaster exited during REPACK command"));
    +	}
     
     	/*
    -	 * If we could not cancel the current sleep due to ERROR, do that before
    -	 * we detach from the shared memory the condition variable is located in.
    -	 * If we did not, the bgworker ERROR handling code would try and fail
    -	 * badly.
    +	 * Now detach from our shared memory segment.  In error cases there might
    +	 * still be messages from the worker in the queue, which ProcessInterrupts
    +	 * would try to read; this is pointless (and causes an assertion failure),
    +	 * so set the global pointer to NULL to have ProcessRepackMessages ignore
    +	 * them.
     	 */
    -	ConditionVariableCancelSleep();
    -
    -	dsm_detach(decoding_worker->seg);
    +	dsmseg = decoding_worker->seg;
     	pfree(decoding_worker);
     	decoding_worker = NULL;
    +
    +	/* We must also cancel the current sleep, if one is still set up */
    +	ConditionVariableCancelSleep();
    +
    +	if (dsmseg != NULL)
    +		dsm_detach(dsmseg);
     }
     
     /* stop_repack_decoding_worker, wrapped as a before_shmem_exit callback */
    -- 
    2.47.3
    
    
    --b42ct6adeyjj4cbm--