From dd6fc4b69cc675374ca5ff0cd830dd4f4b0a3122 Mon Sep 17 00:00:00 2001 From: Jonathan Miller Date: Tue, 26 May 2026 15:16:30 -0500 Subject: [PATCH] fix(actions): retry workflow insertion on database deadlock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When multiple workflows are triggered by a single event (e.g. a pull_request with several matching workflow files), each InsertRun transaction acquires an X-lock on the repository row via UpdateRepoRunsNumbers and an index lock on action_run. Two concurrent transactions can deadlock when each holds one lock and waits for the other. InnoDB kills the lighter transaction, but handleWorkflows only logged the error and silently dropped the workflow run — making it appear as though pull_request events were never fired. This was the root cause of API-created PRs appearing to not trigger Actions workflows: the notification pipeline was correct, but the DB insert was lost to an unretried deadlock. The fix wraps PrepareRunAndInsert in a retry loop (up to 3 attempts with exponential backoff) that detects deadlock errors across MySQL, PostgreSQL, and SQLite. On deadlock, the rolled-back run fields are reset before the next attempt. Also adds db.IsErrDeadlock() for cross-engine deadlock detection and unit tests for the same. Closes #220 Co-Authored-By: Claude Opus 4.6 (1M context) --- models/db/error.go | 25 ++++++++++++++ models/db/error_test.go | 31 ++++++++++++++++++ services/actions/notifier_helper.go | 51 ++++++++++++++++++++++++++++- 3 files changed, 106 insertions(+), 1 deletion(-) create mode 100644 models/db/error_test.go diff --git a/models/db/error.go b/models/db/error.go index a2f6dfc1c2..afab17f0af 100644 --- a/models/db/error.go +++ b/models/db/error.go @@ -5,6 +5,7 @@ package db import ( "fmt" + "strings" "git.mokoconsulting.tech/MokoConsulting/MokoGitea/modules/util" ) @@ -72,3 +73,27 @@ func (err ErrNotExist) Error() string { func (err ErrNotExist) Unwrap() error { return util.ErrNotExist } + +// IsErrDeadlock checks whether err is a database deadlock. +// MySQL returns error 1213 (ER_LOCK_DEADLOCK / SQLSTATE 40001). +// PostgreSQL returns SQLSTATE 40P01 with "deadlock detected". +// SQLite returns SQLITE_BUSY (error 5) with "database is locked". +func IsErrDeadlock(err error) bool { + if err == nil { + return false + } + msg := err.Error() + // MySQL / MariaDB: "Error 1213 (40001): Deadlock found when trying to get lock" + if strings.Contains(msg, "Error 1213") || strings.Contains(msg, "40001") { + return true + } + // PostgreSQL: "deadlock detected" + if strings.Contains(msg, "deadlock detected") { + return true + } + // SQLite: "database is locked" + if strings.Contains(msg, "database is locked") { + return true + } + return false +} diff --git a/models/db/error_test.go b/models/db/error_test.go new file mode 100644 index 0000000000..f328ba34dd --- /dev/null +++ b/models/db/error_test.go @@ -0,0 +1,31 @@ +// Copyright 2026 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package db + +import ( + "errors" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestIsErrDeadlock(t *testing.T) { + tests := []struct { + name string + err error + want bool + }{ + {name: "nil", err: nil, want: false}, + {name: "unrelated", err: errors.New("connection refused"), want: false}, + {name: "mysql 1213", err: errors.New("Error 1213 (40001): Deadlock found when trying to get lock; try restarting transaction"), want: true}, + {name: "mysql sqlstate", err: errors.New("SQLSTATE 40001: serialization failure"), want: true}, + {name: "postgres", err: errors.New("pq: deadlock detected"), want: true}, + {name: "sqlite", err: errors.New("database is locked"), want: true}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assert.Equal(t, tt.want, IsErrDeadlock(tt.err)) + }) + } +} diff --git a/services/actions/notifier_helper.go b/services/actions/notifier_helper.go index 4b0817aac1..f528ca36d0 100644 --- a/services/actions/notifier_helper.go +++ b/services/actions/notifier_helper.go @@ -9,6 +9,7 @@ import ( "fmt" "slices" "strings" + "time" actions_model "git.mokoconsulting.tech/MokoConsulting/MokoGitea/models/actions" "git.mokoconsulting.tech/MokoConsulting/MokoGitea/models/db" @@ -344,7 +345,7 @@ func handleWorkflows( run.NeedApproval = need - if err := PrepareRunAndInsert(ctx, dwf.Content, run, nil); err != nil { + if err := prepareRunAndInsertWithRetry(ctx, dwf.Content, run); err != nil { log.Error("PrepareRunAndInsert: %v", err) continue } @@ -352,6 +353,54 @@ func handleWorkflows( return nil } +// prepareRunAndInsertWithRetry wraps PrepareRunAndInsert with retries on +// database deadlocks. When multiple workflow runs are inserted for the same +// event (e.g. several workflows triggered by a single pull_request), each +// InsertRun transaction acquires an X-lock on the repository row (via +// UpdateRepoRunsNumbers) and an index lock on action_run. Two concurrent +// transactions can deadlock when each holds one lock and waits for the other. +// InnoDB resolves this by killing the lighter transaction, but handleWorkflows +// only logged the error and moved on — silently dropping the workflow run. +// Retrying the insert is safe because the rolled-back transaction left no +// partial state. +func prepareRunAndInsertWithRetry(ctx context.Context, content []byte, run *actions_model.ActionRun) error { + const maxRetries = 3 + backoff := 50 * time.Millisecond + + // Save original values that InsertRun mutates inside its transaction. + // On deadlock rollback these become stale and must be reset before retry. + origTitle := run.Title + + var err error + for attempt := range maxRetries { + if err = PrepareRunAndInsert(ctx, content, run, nil); err == nil { + return nil + } + if !db.IsErrDeadlock(err) { + return err + } + log.Warn("PrepareRunAndInsert deadlock (attempt %d/%d) for workflow %s in repo %d, retrying: %v", + attempt+1, maxRetries, run.WorkflowID, run.RepoID, err) + + // Reset fields that InsertRun sets inside the (now rolled-back) transaction + // so the next attempt starts clean. + run.ID = 0 + run.Index = 0 + run.Status = actions_model.StatusWaiting + run.Title = origTitle + run.ConcurrencyGroup = "" + run.ConcurrencyCancel = false + + select { + case <-ctx.Done(): + return ctx.Err() + case <-time.After(backoff): + } + backoff *= 2 + } + return fmt.Errorf("deadlock persisted after %d retries: %w", maxRetries, err) +} + func newNotifyInputFromIssue(issue *issues_model.Issue, event webhook_module.HookEventType) *notifyInput { return newNotifyInput(issue.Repo, issue.Poster, event) } -- 2.52.0