[client] add DIAG logging to trace v6 exit-pair mirror

The write-time mirror did not eliminate the leak in field testing. Re-add the
DIAG diagnostics around the exit-node selection flow to capture a fresh trace:

- UpdateRoutes: incoming client networks, selector state before/after the
  management update, and the networks remaining after FilterSelectedExitNodes.
- mirrorV6ExitPairSelections: the NetIDs present in this update and the v6 pairs
  V6ExitMergeSet derives from them (reveals whether the v4 base and its ::/0 pair
  are present in the same update so the pair can be matched).
- SyncPairedSelection: the base/paired state before and after the sync.
- FilterSelectedExitNodes / applyExitNodeFilter: per-route SKIP/KEEP/DROP and the
  selection lookups behind each decision.
- updateExitNodeSelections / logExitNodeUpdate: categorization and deselect set.

Temporary; to be removed once the root cause is confirmed.
This commit is contained in:
Zoltan Papp
2026-06-03 01:01:53 +02:00
parent d25c8d881d
commit ed76f8f065
2 changed files with 83 additions and 1 deletions

View File

@@ -429,10 +429,16 @@ func (m *DefaultManager) UpdateRoutes(
var merr *multierror.Error
if !m.disableClientRoutes {
log.Debugf("DIAG UpdateRoutes: incoming %d client networks: %v", len(clientRoutes), haKeysList(clientRoutes))
log.Debugf("DIAG UpdateRoutes: selector BEFORE management update: %s", m.routeSelector.MarshalSummary())
// Update route selector based on management server's isSelected status
m.updateRouteSelectorFromManagement(clientRoutes)
log.Debugf("DIAG UpdateRoutes: selector AFTER management update: %s", m.routeSelector.MarshalSummary())
filteredClientRoutes := m.routeSelector.FilterSelectedExitNodes(clientRoutes)
log.Debugf("DIAG UpdateRoutes: AFTER filter, %d networks remain: %v", len(filteredClientRoutes), haKeysList(filteredClientRoutes))
if err := m.updateSystemRoutes(filteredClientRoutes); err != nil {
merr = multierror.Append(merr, fmt.Errorf("update system routes: %w", err))
@@ -724,12 +730,38 @@ func (m *DefaultManager) mirrorV6ExitPairSelections(clientRoutes route.HAMap) {
routesByNetID[haID.NetID()] = routes
}
for v6ID := range route.V6ExitMergeSet(routesByNetID) {
v6Pairs := route.V6ExitMergeSet(routesByNetID)
log.Debugf("DIAG mirrorV6ExitPairSelections: netIDs=%v v6Pairs=%v", netIDKeysList(routesByNetID), netIDSetList(v6Pairs))
for v6ID := range v6Pairs {
baseID := route.NetID(strings.TrimSuffix(string(v6ID), route.V6ExitSuffix))
m.routeSelector.SyncPairedSelection(baseID, v6ID)
}
}
func netIDKeysList(m map[route.NetID][]*route.Route) []route.NetID {
out := make([]route.NetID, 0, len(m))
for k := range m {
out = append(out, k)
}
return out
}
func netIDSetList(m map[route.NetID]struct{}) []route.NetID {
out := make([]route.NetID, 0, len(m))
for k := range m {
out = append(out, k)
}
return out
}
func haKeysList(m route.HAMap) []route.HAUniqueID {
out := make([]route.HAUniqueID, 0, len(m))
for k := range m {
out = append(out, k)
}
return out
}
type exitNodeInfo struct {
allIDs []route.NetID
selectedByManagement []route.NetID
@@ -784,6 +816,8 @@ func (m *DefaultManager) checkManagementSelection(routes []*route.Route, netID r
func (m *DefaultManager) updateExitNodeSelections(info exitNodeInfo) {
routesToDeselect := m.getRoutesToDeselect(info.allIDs)
log.Debugf("DIAG updateExitNodeSelections: allIDs=%v userSelected=%v userDeselected=%v selectedByManagement=%v -> routesToDeselect(no user selection)=%v",
info.allIDs, info.userSelected, info.userDeselected, info.selectedByManagement, routesToDeselect)
m.deselectExitNodes(routesToDeselect)
m.selectExitNodesByManagement(info.selectedByManagement, info.allIDs)
}
@@ -823,4 +857,6 @@ func (m *DefaultManager) selectExitNodesByManagement(selectedByManagement []rout
func (m *DefaultManager) logExitNodeUpdate(info exitNodeInfo) {
log.Debugf("Updated route selector: %d exit nodes available, %d selected by management, %d user-selected, %d user-deselected",
len(info.allIDs), len(info.selectedByManagement), len(info.userSelected), len(info.userDeselected))
log.Debugf("DIAG logExitNodeUpdate: allIDs=%v selectedByManagement=%v userSelected=%v userDeselected=%v",
info.allIDs, info.selectedByManagement, info.userSelected, info.userDeselected)
}

View File

@@ -7,6 +7,7 @@ import (
"sync"
"github.com/hashicorp/go-multierror"
log "github.com/sirupsen/logrus"
"github.com/netbirdio/netbird/client/errors"
"github.com/netbirdio/netbird/route"
@@ -133,11 +134,14 @@ func (rs *RouteSelector) SyncPairedSelection(baseID, pairedID route.NetID) {
defer rs.mu.Unlock()
if rs.deselectAll {
log.Debugf("DIAG SyncPairedSelection: deselectAll set, skip base=%q paired=%q", baseID, pairedID)
return
}
_, baseSelected := rs.selectedRoutes[baseID]
_, baseDeselected := rs.deselectedRoutes[baseID]
_, pairedSelectedBefore := rs.selectedRoutes[pairedID]
_, pairedDeselectedBefore := rs.deselectedRoutes[pairedID]
delete(rs.selectedRoutes, pairedID)
delete(rs.deselectedRoutes, pairedID)
@@ -148,6 +152,18 @@ func (rs *RouteSelector) SyncPairedSelection(baseID, pairedID route.NetID) {
case baseDeselected:
rs.deselectedRoutes[pairedID] = struct{}{}
}
log.Debugf("DIAG SyncPairedSelection: base=%q (selected=%v deselected=%v) paired=%q before(selected=%v deselected=%v) -> after(selected=%v deselected=%v)",
baseID, baseSelected, baseDeselected, pairedID,
pairedSelectedBefore, pairedDeselectedBefore,
baseSelected, baseDeselected)
}
// MarshalSummary returns a short human-readable description of the selector state for diagnostics.
func (rs *RouteSelector) MarshalSummary() string {
rs.mu.RLock()
defer rs.mu.RUnlock()
return fmt.Sprintf("deselectAll=%v selected=%v deselected=%v", rs.deselectAll, keysOf(rs.selectedRoutes), keysOf(rs.deselectedRoutes))
}
// FilterSelected removes unselected routes from the provided map.
@@ -186,24 +202,47 @@ func (rs *RouteSelector) FilterSelectedExitNodes(routes route.HAMap) route.HAMap
return route.HAMap{}
}
log.Debugf("DIAG FilterSelectedExitNodes: incoming %d networks, deselected=%v selected=%v deselectAll=%v",
len(routes), keysOf(rs.deselectedRoutes), keysOf(rs.selectedRoutes), rs.deselectAll)
filtered := make(route.HAMap, len(routes))
for id, rt := range routes {
netID := id.NetID()
if rs.isDeselectedLocked(netID) {
log.Debugf("DIAG FilterSelectedExitNodes: SKIP id=%q netID=%q (literally deselected)", id, netID)
continue
}
if !isExitNode(rt) {
log.Debugf("DIAG FilterSelectedExitNodes: KEEP id=%q netID=%q (not an exit node)", id, netID)
filtered[id] = rt
continue
}
log.Debugf("DIAG FilterSelectedExitNodes: EXITNODE id=%q netID=%q -> applyExitNodeFilter", id, netID)
rs.applyExitNodeFilter(id, netID, rt, filtered)
}
log.Debugf("DIAG FilterSelectedExitNodes: result keeps %d networks: %v", len(filtered), haKeysOf(filtered))
return filtered
}
func keysOf(m map[route.NetID]struct{}) []route.NetID {
out := make([]route.NetID, 0, len(m))
for k := range m {
out = append(out, k)
}
return out
}
func haKeysOf(m route.HAMap) []route.HAUniqueID {
out := make([]route.HAUniqueID, 0, len(m))
for k := range m {
out = append(out, k)
}
return out
}
// MarshalJSON implements the json.Marshaler interface
func (rs *RouteSelector) MarshalJSON() ([]byte, error) {
rs.mu.RLock()
@@ -286,15 +325,22 @@ func (rs *RouteSelector) applyExitNodeFilter(
rt []*route.Route,
out route.HAMap,
) {
log.Debugf("DIAG applyExitNodeFilter: id=%q netID=%q hasUserSel=%v isSelected=%v",
id, netID, rs.hasUserSelectionForRouteLocked(netID), rs.isSelectedLocked(netID))
if rs.hasUserSelectionForRouteLocked(netID) {
if rs.isSelectedLocked(netID) {
log.Debugf("DIAG applyExitNodeFilter: KEEP id=%q (netID %q is selected)", id, netID)
out[id] = rt
} else {
log.Debugf("DIAG applyExitNodeFilter: DROP id=%q (netID %q is deselected)", id, netID)
}
return
}
// no explicit selection for this route: defer to management's SkipAutoApply flag
sel := collectSelected(rt)
log.Debugf("DIAG applyExitNodeFilter: no user selection for netID %q; SkipAutoApply filter kept %d/%d routes for id=%q",
netID, len(sel), len(rt), id)
if len(sel) > 0 {
out[id] = sel
}