[client] Fix exit node menu not refreshing on Windows (#5553)

* [client] Fix exit node menu not refreshing on Windows

TrayOpenedCh is not implemented in the systray library on Windows,
so exit nodes were never refreshed after the initial connect. Combined
with the management sync not having populated routes yet when the
Connected status fires, this caused the exit node menu to remain empty
permanently after disconnect/reconnect cycles.
Add a background poller on Windows that refreshes exit nodes while
connected, with fast initial polling to catch routes from management
sync followed by a steady 10s interval. On macOS/Linux, TrayOpenedCh
continues to handle refreshes on each tray open.
Also fix a data race on connectClient assignment in the server's connect()
method and add nil checks in CleanState/DeleteState to prevent panics
when connectClient is nil.

* Remove unused exitNodeIDs

* Remove unused exitNodeState struct
This commit is contained in:
Maycon Santos
2026-03-09 18:39:11 +01:00
committed by GitHub
parent 11eb725ac8
commit 15aa6bae1b
6 changed files with 267 additions and 38 deletions

View File

@@ -1625,9 +1625,14 @@ func (s *Server) GetFeatures(ctx context.Context, msg *proto.GetFeaturesRequest)
func (s *Server) connect(ctx context.Context, config *profilemanager.Config, statusRecorder *peer.Status, doInitialAutoUpdate bool, runningChan chan struct{}) error {
log.Tracef("running client connection")
s.connectClient = internal.NewConnectClient(ctx, config, statusRecorder, doInitialAutoUpdate)
s.connectClient.SetSyncResponsePersistence(s.persistSyncResponse)
if err := s.connectClient.Run(runningChan, s.logFile); err != nil {
client := internal.NewConnectClient(ctx, config, statusRecorder, doInitialAutoUpdate)
client.SetSyncResponsePersistence(s.persistSyncResponse)
s.mutex.Lock()
s.connectClient = client
s.mutex.Unlock()
if err := client.Run(runningChan, s.logFile); err != nil {
return err
}
return nil

View File

@@ -0,0 +1,187 @@
package server
import (
"context"
"sync"
"testing"
"time"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/netbirdio/netbird/client/internal"
"github.com/netbirdio/netbird/client/internal/peer"
"github.com/netbirdio/netbird/client/proto"
)
func newTestServer() *Server {
return &Server{
rootCtx: context.Background(),
statusRecorder: peer.NewRecorder(""),
}
}
func newDummyConnectClient(ctx context.Context) *internal.ConnectClient {
return internal.NewConnectClient(ctx, nil, nil, false)
}
// TestConnectSetsClientWithMutex validates that connect() sets s.connectClient
// under mutex protection so concurrent readers see a consistent value.
func TestConnectSetsClientWithMutex(t *testing.T) {
s := newTestServer()
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
// Manually simulate what connect() does (without calling Run which panics without full setup)
client := newDummyConnectClient(ctx)
s.mutex.Lock()
s.connectClient = client
s.mutex.Unlock()
// Verify the assignment is visible under mutex
s.mutex.Lock()
assert.Equal(t, client, s.connectClient, "connectClient should be set")
s.mutex.Unlock()
}
// TestConcurrentConnectClientAccess validates that concurrent reads of
// s.connectClient under mutex don't race with a write.
func TestConcurrentConnectClientAccess(t *testing.T) {
s := newTestServer()
ctx := context.Background()
client := newDummyConnectClient(ctx)
var wg sync.WaitGroup
nilCount := 0
setCount := 0
var mu sync.Mutex
// Start readers
for i := 0; i < 50; i++ {
wg.Add(1)
go func() {
defer wg.Done()
s.mutex.Lock()
c := s.connectClient
s.mutex.Unlock()
mu.Lock()
defer mu.Unlock()
if c == nil {
nilCount++
} else {
setCount++
}
}()
}
// Simulate connect() writing under mutex
time.Sleep(5 * time.Millisecond)
s.mutex.Lock()
s.connectClient = client
s.mutex.Unlock()
wg.Wait()
assert.Equal(t, 50, nilCount+setCount, "all goroutines should complete without panic")
}
// TestCleanupConnection_ClearsConnectClient validates that cleanupConnection
// properly nils out connectClient.
func TestCleanupConnection_ClearsConnectClient(t *testing.T) {
s := newTestServer()
_, cancel := context.WithCancel(context.Background())
s.actCancel = cancel
s.connectClient = newDummyConnectClient(context.Background())
s.clientRunning = true
err := s.cleanupConnection()
require.NoError(t, err)
assert.Nil(t, s.connectClient, "connectClient should be nil after cleanup")
}
// TestCleanState_NilConnectClient validates that CleanState doesn't panic
// when connectClient is nil.
func TestCleanState_NilConnectClient(t *testing.T) {
s := newTestServer()
s.connectClient = nil
s.profileManager = nil // will cause error if it tries to proceed past the nil check
// Should not panic — the nil check should prevent calling Status() on nil
assert.NotPanics(t, func() {
_, _ = s.CleanState(context.Background(), &proto.CleanStateRequest{All: true})
})
}
// TestDeleteState_NilConnectClient validates that DeleteState doesn't panic
// when connectClient is nil.
func TestDeleteState_NilConnectClient(t *testing.T) {
s := newTestServer()
s.connectClient = nil
s.profileManager = nil
assert.NotPanics(t, func() {
_, _ = s.DeleteState(context.Background(), &proto.DeleteStateRequest{All: true})
})
}
// TestDownThenUp_StaleRunningChan documents the known state issue where
// clientRunningChan from a previous connection is already closed, causing
// waitForUp() to return immediately on reconnect.
func TestDownThenUp_StaleRunningChan(t *testing.T) {
s := newTestServer()
// Simulate state after a successful connection
s.clientRunning = true
s.clientRunningChan = make(chan struct{})
close(s.clientRunningChan) // closed when engine started
s.clientGiveUpChan = make(chan struct{})
s.connectClient = newDummyConnectClient(context.Background())
_, cancel := context.WithCancel(context.Background())
s.actCancel = cancel
// Simulate Down(): cleanupConnection sets connectClient = nil
s.mutex.Lock()
err := s.cleanupConnection()
s.mutex.Unlock()
require.NoError(t, err)
// After cleanup: connectClient is nil, clientRunning still true
// (goroutine hasn't exited yet)
s.mutex.Lock()
assert.Nil(t, s.connectClient, "connectClient should be nil after cleanup")
assert.True(t, s.clientRunning, "clientRunning still true until goroutine exits")
s.mutex.Unlock()
// waitForUp() returns immediately due to stale closed clientRunningChan
ctx, ctxCancel := context.WithTimeout(context.Background(), 2*time.Second)
defer ctxCancel()
waitDone := make(chan error, 1)
go func() {
_, err := s.waitForUp(ctx)
waitDone <- err
}()
select {
case err := <-waitDone:
assert.NoError(t, err, "waitForUp returns success on stale channel")
// But connectClient is still nil — this is the stale state issue
s.mutex.Lock()
assert.Nil(t, s.connectClient, "connectClient is nil despite waitForUp success")
s.mutex.Unlock()
case <-time.After(1 * time.Second):
t.Fatal("waitForUp should have returned immediately due to stale closed channel")
}
}
// TestConnectClient_EngineNilOnFreshClient validates that a newly created
// ConnectClient has nil Engine (before Run is called).
func TestConnectClient_EngineNilOnFreshClient(t *testing.T) {
client := newDummyConnectClient(context.Background())
assert.Nil(t, client.Engine(), "engine should be nil on fresh ConnectClient")
}

View File

@@ -39,7 +39,7 @@ func (s *Server) ListStates(_ context.Context, _ *proto.ListStatesRequest) (*pro
// CleanState handles cleaning of states (performing cleanup operations)
func (s *Server) CleanState(ctx context.Context, req *proto.CleanStateRequest) (*proto.CleanStateResponse, error) {
if s.connectClient.Status() == internal.StatusConnected || s.connectClient.Status() == internal.StatusConnecting {
if s.connectClient != nil && (s.connectClient.Status() == internal.StatusConnected || s.connectClient.Status() == internal.StatusConnecting) {
return nil, status.Errorf(codes.FailedPrecondition, "cannot clean state while connecting or connected, run 'netbird down' first.")
}
@@ -82,7 +82,7 @@ func (s *Server) CleanState(ctx context.Context, req *proto.CleanStateRequest) (
// DeleteState handles deletion of states without cleanup
func (s *Server) DeleteState(ctx context.Context, req *proto.DeleteStateRequest) (*proto.DeleteStateResponse, error) {
if s.connectClient.Status() == internal.StatusConnected || s.connectClient.Status() == internal.StatusConnecting {
if s.connectClient != nil && (s.connectClient.Status() == internal.StatusConnected || s.connectClient.Status() == internal.StatusConnecting) {
return nil, status.Errorf(codes.FailedPrecondition, "cannot clean state while connecting or connected, run 'netbird down' first.")
}

View File

@@ -323,7 +323,7 @@ type serviceClient struct {
exitNodeMu sync.Mutex
mExitNodeItems []menuHandler
exitNodeStates []exitNodeState
exitNodeRetryCancel context.CancelFunc
mExitNodeDeselectAll *systray.MenuItem
logFile string
wLoginURL fyne.Window
@@ -924,7 +924,7 @@ func (s *serviceClient) updateStatus() error {
s.mDown.Enable()
s.mNetworks.Enable()
s.mExitNode.Enable()
go s.updateExitNodes()
s.startExitNodeRefresh()
systrayIconState = true
case status.Status == string(internal.StatusConnecting):
s.setConnectingStatus()
@@ -985,6 +985,7 @@ func (s *serviceClient) setDisconnectedStatus() {
s.mUp.Enable()
s.mNetworks.Disable()
s.mExitNode.Disable()
s.cancelExitNodeRetry()
go s.updateExitNodes()
}

View File

@@ -100,8 +100,7 @@ func (h *eventHandler) handleConnectClick() {
func (h *eventHandler) handleDisconnectClick() {
h.client.mDown.Disable()
h.client.exitNodeStates = []exitNodeState{}
h.client.cancelExitNodeRetry()
if h.client.connectCancel != nil {
log.Debugf("cancelling ongoing connect operation")

View File

@@ -6,7 +6,6 @@ import (
"context"
"fmt"
"runtime"
"slices"
"sort"
"strings"
"time"
@@ -34,11 +33,6 @@ const (
type filter string
type exitNodeState struct {
id string
selected bool
}
func (s *serviceClient) showNetworksUI() {
s.wNetworks = s.app.NewWindow("Networks")
s.wNetworks.SetOnClosed(s.cancel)
@@ -335,16 +329,75 @@ func (s *serviceClient) updateNetworksBasedOnDisplayTab(tabs *container.AppTabs,
s.updateNetworks(grid, f)
}
func (s *serviceClient) updateExitNodes() {
// startExitNodeRefresh initiates exit node menu refresh after connecting.
// On Windows, TrayOpenedCh is not supported by the systray library, so we use
// a background poller to keep exit nodes in sync while connected.
// On macOS/Linux, TrayOpenedCh handles refreshes on each tray open.
func (s *serviceClient) startExitNodeRefresh() {
s.cancelExitNodeRetry()
if runtime.GOOS == "windows" {
ctx, cancel := context.WithCancel(s.ctx)
s.exitNodeMu.Lock()
s.exitNodeRetryCancel = cancel
s.exitNodeMu.Unlock()
go s.pollExitNodes(ctx)
} else {
go s.updateExitNodes()
}
}
func (s *serviceClient) cancelExitNodeRetry() {
s.exitNodeMu.Lock()
if s.exitNodeRetryCancel != nil {
s.exitNodeRetryCancel()
s.exitNodeRetryCancel = nil
}
s.exitNodeMu.Unlock()
}
// pollExitNodes periodically refreshes exit nodes while connected.
// Uses a short initial interval to catch routes from the management sync,
// then switches to a longer interval for ongoing updates.
func (s *serviceClient) pollExitNodes(ctx context.Context) {
// Initial fast polling to catch routes as they appear after connect.
for i := 0; i < 5; i++ {
if s.updateExitNodes() {
break
}
select {
case <-ctx.Done():
return
case <-time.After(2 * time.Second):
}
}
ticker := time.NewTicker(10 * time.Second)
defer ticker.Stop()
for {
select {
case <-ctx.Done():
return
case <-ticker.C:
s.updateExitNodes()
}
}
}
// updateExitNodes fetches exit nodes from the daemon and recreates the menu.
// Returns true if exit nodes were found.
func (s *serviceClient) updateExitNodes() bool {
conn, err := s.getSrvClient(defaultFailTimeout)
if err != nil {
log.Errorf("get client: %v", err)
return
return false
}
exitNodes, err := s.getExitNodes(conn)
if err != nil {
log.Errorf("get exit nodes: %v", err)
return
return false
}
s.exitNodeMu.Lock()
@@ -354,28 +407,14 @@ func (s *serviceClient) updateExitNodes() {
if len(s.mExitNodeItems) > 0 {
s.mExitNode.Enable()
} else {
s.mExitNode.Disable()
return true
}
s.mExitNode.Disable()
return false
}
func (s *serviceClient) recreateExitNodeMenu(exitNodes []*proto.Network) {
var exitNodeIDs []exitNodeState
for _, node := range exitNodes {
exitNodeIDs = append(exitNodeIDs, exitNodeState{
id: node.ID,
selected: node.Selected,
})
}
sort.Slice(exitNodeIDs, func(i, j int) bool {
return exitNodeIDs[i].id < exitNodeIDs[j].id
})
if slices.Equal(s.exitNodeStates, exitNodeIDs) {
log.Debug("Exit node menu already up to date")
return
}
for _, node := range s.mExitNodeItems {
node.cancel()
node.Hide()
@@ -413,8 +452,6 @@ func (s *serviceClient) recreateExitNodeMenu(exitNodes []*proto.Network) {
go s.handleChecked(ctx, node.ID, menuItem)
}
s.exitNodeStates = exitNodeIDs
if showDeselectAll {
s.mExitNode.AddSeparator()
deselectAllItem := s.mExitNode.AddSubMenuItem("Deselect All", "Deselect All")