[management] Move service reload outside transaction in account settings update (#5325)

Bug Fixes

Network and DNS updates now defer service and reverse-proxy reloads until after account updates complete, preventing inconsistent proxy state and race conditions.
Chores

Removed automatic peer/broadcast updates immediately following bulk service reloads.
Tests

Added a test ensuring network-range changes complete without deadlock.
This commit is contained in:
Bethuel Mmbaga
2026-02-14 20:27:15 +01:00
committed by GitHub
parent 01a9cd4651
commit 68c481fa44
3 changed files with 40 additions and 5 deletions

View File

@@ -473,8 +473,6 @@ func (m *managerImpl) ReloadAllServicesForAccount(ctx context.Context, accountID
m.proxyGRPCServer.SendServiceUpdateToCluster(service.ToProtoMapping(reverseproxy.Update, "", m.proxyGRPCServer.GetOIDCValidationConfig()), service.ProxyCluster)
}
m.accountManager.UpdateAccountPeers(ctx, accountID)
return nil
}

View File

@@ -297,6 +297,7 @@ func (am *DefaultAccountManager) UpdateAccountSettings(ctx context.Context, acco
var oldSettings *types.Settings
var updateAccountPeers bool
var groupChangesAffectPeers bool
var reloadReverseProxy bool
err = am.Store.ExecuteInTransaction(ctx, func(transaction store.Store) error {
var groupsUpdated bool
@@ -327,9 +328,7 @@ func (am *DefaultAccountManager) UpdateAccountSettings(ctx context.Context, acco
if err = am.reallocateAccountPeerIPs(ctx, transaction, accountID, newSettings.NetworkRange); err != nil {
return err
}
if err = am.reverseProxyManager.ReloadAllServicesForAccount(ctx, accountID); err != nil {
log.WithContext(ctx).Warnf("failed to reload all services for account %s: %v", accountID, err)
}
reloadReverseProxy = true
updateAccountPeers = true
}
@@ -394,6 +393,11 @@ func (am *DefaultAccountManager) UpdateAccountSettings(ctx context.Context, acco
}
am.StoreEvent(ctx, userID, accountID, accountID, activity.AccountNetworkRangeUpdated, eventMeta)
}
if reloadReverseProxy {
if err = am.reverseProxyManager.ReloadAllServicesForAccount(ctx, accountID); err != nil {
log.WithContext(ctx).Warnf("failed to reload all services for account %s: %v", accountID, err)
}
}
if updateAccountPeers || extraSettingsChanged || groupChangesAffectPeers {
go am.UpdateAccountPeers(ctx, accountID)

View File

@@ -3918,3 +3918,36 @@ func TestAddNewUserToDomainAccountWithoutApproval(t *testing.T) {
assert.False(t, user.PendingApproval, "User should not be pending approval")
assert.Equal(t, existingAccountID, user.AccountID)
}
// TestDefaultAccountManager_UpdateAccountSettings_NetworkRangeChange verifies that
// changing NetworkRange via UpdateAccountSettings does not deadlock.
// The deadlock occurs because ReloadAllServicesForAccount is called inside a DB
// transaction but uses the main store connection, which blocks on the transaction lock.
func TestDefaultAccountManager_UpdateAccountSettings_NetworkRangeChange(t *testing.T) {
manager, _, err := createManager(t)
require.NoError(t, err)
accountID, err := manager.GetAccountIDByUserID(context.Background(), auth.UserAuth{UserId: userID})
require.NoError(t, err)
ctx := context.Background()
// Use a channel to detect if the call completes or hangs
done := make(chan error, 1)
go func() {
_, err := manager.UpdateAccountSettings(ctx, accountID, userID, &types.Settings{
PeerLoginExpiration: time.Hour,
PeerLoginExpirationEnabled: true,
NetworkRange: netip.MustParsePrefix("10.100.0.0/16"),
Extra: &types.ExtraSettings{},
})
done <- err
}()
select {
case err := <-done:
require.NoError(t, err, "UpdateAccountSettings should complete without error")
case <-time.After(10 * time.Second):
t.Fatal("UpdateAccountSettings deadlocked when changing NetworkRange")
}
}