From 8f3202f431373baab81545d8970236929676f71b Mon Sep 17 00:00:00 2001 From: MHSanaei Date: Mon, 11 May 2026 16:01:04 +0200 Subject: [PATCH] fix(traffic-writer): replace sync.Once with Start/Stop cycle so SIGHUP restart works MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After a SIGHUP-driven panel restart (which is exactly what the frontend triggers after a successful DB import via /panel/setting/restartPanel), the previous implementation deadlocked: 1. server.Stop() called StopTrafficWriter — cancels the context and waits for the consumer goroutine to exit. The goroutine dies. 2. server.Start() called StartTrafficWriter, but sync.Once had already fired, so it was a no-op. twQueue still pointed to the old channel with no consumer. 3. startTask() → RestartXray(true) → GetXrayConfig() → InboundService.AddTraffic(nil, nil) → submitTrafficWrite. The send to twQueue succeeded (buffer space) but <-req.done blocked forever because no goroutine was draining the channel. 4. RestartXray held the global xray lock for the entire hang, so every subsequent restart attempt from the panel UI also blocked on lock.Lock(). User-visible symptom: xray stopped silently after DB import and no panel action could revive it. Replace sync.Once with a mutex-guarded Start that spawns a fresh goroutine on each cycle, and a Stop that resets the package state so the next Start works. runTrafficWriter now takes its channels as parameters instead of reading package vars, so the old goroutine can't interfere with a new one if their lifetimes briefly overlap. --- web/service/traffic_writer.go | 65 +++++++++++++++++++++++++---------- 1 file changed, 47 insertions(+), 18 deletions(-) diff --git a/web/service/traffic_writer.go b/web/service/traffic_writer.go index b3fa97b6..b15c459a 100644 --- a/web/service/traffic_writer.go +++ b/web/service/traffic_writer.go @@ -21,39 +21,64 @@ type trafficWriteRequest struct { } var ( + twMu sync.Mutex twQueue chan *trafficWriteRequest - twCtx context.Context twCancel context.CancelFunc twDone chan struct{} - twOnce sync.Once ) +// StartTrafficWriter spins up the serial writer goroutine. Safe to call again +// after StopTrafficWriter — each Start/Stop cycle gets fresh channels. The +// previous sync.Once-based implementation deadlocked after a SIGHUP-driven +// panel restart: Stop killed the consumer goroutine but Once prevented Start +// from spawning a new one, so every later submitTrafficWrite blocked forever +// on <-req.done with no consumer (including the AddTraffic call inside +// XrayService.GetXrayConfig that runs from startTask). func StartTrafficWriter() { - twOnce.Do(func() { - twQueue = make(chan *trafficWriteRequest, trafficWriterQueueSize) - twCtx, twCancel = context.WithCancel(context.Background()) - twDone = make(chan struct{}) - go runTrafficWriter() - }) + twMu.Lock() + defer twMu.Unlock() + if twQueue != nil { + return + } + queue := make(chan *trafficWriteRequest, trafficWriterQueueSize) + ctx, cancel := context.WithCancel(context.Background()) + done := make(chan struct{}) + twQueue = queue + twCancel = cancel + twDone = done + go runTrafficWriter(queue, ctx, done) } +// StopTrafficWriter cancels the writer context and waits for the goroutine to +// drain any pending requests before returning. Resets the package state so a +// subsequent StartTrafficWriter can spawn a fresh consumer. func StopTrafficWriter() { - if twCancel != nil { - twCancel() - <-twDone + twMu.Lock() + cancel := twCancel + done := twDone + twQueue = nil + twCancel = nil + twDone = nil + twMu.Unlock() + + if cancel != nil { + cancel() + } + if done != nil { + <-done } } -func runTrafficWriter() { - defer close(twDone) +func runTrafficWriter(queue chan *trafficWriteRequest, ctx context.Context, done chan struct{}) { + defer close(done) for { select { - case req := <-twQueue: + case req := <-queue: req.done <- safeApply(req.apply) - case <-twCtx.Done(): + case <-ctx.Done(): for { select { - case req := <-twQueue: + case req := <-queue: req.done <- safeApply(req.apply) default: return @@ -74,12 +99,16 @@ func safeApply(fn func() error) (err error) { } func submitTrafficWrite(fn func() error) error { - if twQueue == nil { + twMu.Lock() + queue := twQueue + twMu.Unlock() + + if queue == nil { return safeApply(fn) } req := &trafficWriteRequest{apply: fn, done: make(chan error, 1)} select { - case twQueue <- req: + case queue <- req: case <-time.After(trafficWriterSubmitTimeout): return errors.New("traffic writer queue full") }