mirror of
https://github.com/netbirdio/netbird.git
synced 2026-07-04 21:21:53 -04:00
[client] add DIAG logging to trace v6 exit-pair mirror
The write-time mirror did not eliminate the leak in field testing. Re-add the DIAG diagnostics around the exit-node selection flow to capture a fresh trace: - UpdateRoutes: incoming client networks, selector state before/after the management update, and the networks remaining after FilterSelectedExitNodes. - mirrorV6ExitPairSelections: the NetIDs present in this update and the v6 pairs V6ExitMergeSet derives from them (reveals whether the v4 base and its ::/0 pair are present in the same update so the pair can be matched). - SyncPairedSelection: the base/paired state before and after the sync. - FilterSelectedExitNodes / applyExitNodeFilter: per-route SKIP/KEEP/DROP and the selection lookups behind each decision. - updateExitNodeSelections / logExitNodeUpdate: categorization and deselect set. Temporary; to be removed once the root cause is confirmed.
This commit is contained in:
@@ -429,10 +429,16 @@ func (m *DefaultManager) UpdateRoutes(
|
||||
var merr *multierror.Error
|
||||
if !m.disableClientRoutes {
|
||||
|
||||
log.Debugf("DIAG UpdateRoutes: incoming %d client networks: %v", len(clientRoutes), haKeysList(clientRoutes))
|
||||
log.Debugf("DIAG UpdateRoutes: selector BEFORE management update: %s", m.routeSelector.MarshalSummary())
|
||||
|
||||
// Update route selector based on management server's isSelected status
|
||||
m.updateRouteSelectorFromManagement(clientRoutes)
|
||||
|
||||
log.Debugf("DIAG UpdateRoutes: selector AFTER management update: %s", m.routeSelector.MarshalSummary())
|
||||
|
||||
filteredClientRoutes := m.routeSelector.FilterSelectedExitNodes(clientRoutes)
|
||||
log.Debugf("DIAG UpdateRoutes: AFTER filter, %d networks remain: %v", len(filteredClientRoutes), haKeysList(filteredClientRoutes))
|
||||
|
||||
if err := m.updateSystemRoutes(filteredClientRoutes); err != nil {
|
||||
merr = multierror.Append(merr, fmt.Errorf("update system routes: %w", err))
|
||||
@@ -724,12 +730,38 @@ func (m *DefaultManager) mirrorV6ExitPairSelections(clientRoutes route.HAMap) {
|
||||
routesByNetID[haID.NetID()] = routes
|
||||
}
|
||||
|
||||
for v6ID := range route.V6ExitMergeSet(routesByNetID) {
|
||||
v6Pairs := route.V6ExitMergeSet(routesByNetID)
|
||||
log.Debugf("DIAG mirrorV6ExitPairSelections: netIDs=%v v6Pairs=%v", netIDKeysList(routesByNetID), netIDSetList(v6Pairs))
|
||||
for v6ID := range v6Pairs {
|
||||
baseID := route.NetID(strings.TrimSuffix(string(v6ID), route.V6ExitSuffix))
|
||||
m.routeSelector.SyncPairedSelection(baseID, v6ID)
|
||||
}
|
||||
}
|
||||
|
||||
func netIDKeysList(m map[route.NetID][]*route.Route) []route.NetID {
|
||||
out := make([]route.NetID, 0, len(m))
|
||||
for k := range m {
|
||||
out = append(out, k)
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func netIDSetList(m map[route.NetID]struct{}) []route.NetID {
|
||||
out := make([]route.NetID, 0, len(m))
|
||||
for k := range m {
|
||||
out = append(out, k)
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func haKeysList(m route.HAMap) []route.HAUniqueID {
|
||||
out := make([]route.HAUniqueID, 0, len(m))
|
||||
for k := range m {
|
||||
out = append(out, k)
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
type exitNodeInfo struct {
|
||||
allIDs []route.NetID
|
||||
selectedByManagement []route.NetID
|
||||
@@ -784,6 +816,8 @@ func (m *DefaultManager) checkManagementSelection(routes []*route.Route, netID r
|
||||
|
||||
func (m *DefaultManager) updateExitNodeSelections(info exitNodeInfo) {
|
||||
routesToDeselect := m.getRoutesToDeselect(info.allIDs)
|
||||
log.Debugf("DIAG updateExitNodeSelections: allIDs=%v userSelected=%v userDeselected=%v selectedByManagement=%v -> routesToDeselect(no user selection)=%v",
|
||||
info.allIDs, info.userSelected, info.userDeselected, info.selectedByManagement, routesToDeselect)
|
||||
m.deselectExitNodes(routesToDeselect)
|
||||
m.selectExitNodesByManagement(info.selectedByManagement, info.allIDs)
|
||||
}
|
||||
@@ -823,4 +857,6 @@ func (m *DefaultManager) selectExitNodesByManagement(selectedByManagement []rout
|
||||
func (m *DefaultManager) logExitNodeUpdate(info exitNodeInfo) {
|
||||
log.Debugf("Updated route selector: %d exit nodes available, %d selected by management, %d user-selected, %d user-deselected",
|
||||
len(info.allIDs), len(info.selectedByManagement), len(info.userSelected), len(info.userDeselected))
|
||||
log.Debugf("DIAG logExitNodeUpdate: allIDs=%v selectedByManagement=%v userSelected=%v userDeselected=%v",
|
||||
info.allIDs, info.selectedByManagement, info.userSelected, info.userDeselected)
|
||||
}
|
||||
|
||||
@@ -7,6 +7,7 @@ import (
|
||||
"sync"
|
||||
|
||||
"github.com/hashicorp/go-multierror"
|
||||
log "github.com/sirupsen/logrus"
|
||||
|
||||
"github.com/netbirdio/netbird/client/errors"
|
||||
"github.com/netbirdio/netbird/route"
|
||||
@@ -133,11 +134,14 @@ func (rs *RouteSelector) SyncPairedSelection(baseID, pairedID route.NetID) {
|
||||
defer rs.mu.Unlock()
|
||||
|
||||
if rs.deselectAll {
|
||||
log.Debugf("DIAG SyncPairedSelection: deselectAll set, skip base=%q paired=%q", baseID, pairedID)
|
||||
return
|
||||
}
|
||||
|
||||
_, baseSelected := rs.selectedRoutes[baseID]
|
||||
_, baseDeselected := rs.deselectedRoutes[baseID]
|
||||
_, pairedSelectedBefore := rs.selectedRoutes[pairedID]
|
||||
_, pairedDeselectedBefore := rs.deselectedRoutes[pairedID]
|
||||
|
||||
delete(rs.selectedRoutes, pairedID)
|
||||
delete(rs.deselectedRoutes, pairedID)
|
||||
@@ -148,6 +152,18 @@ func (rs *RouteSelector) SyncPairedSelection(baseID, pairedID route.NetID) {
|
||||
case baseDeselected:
|
||||
rs.deselectedRoutes[pairedID] = struct{}{}
|
||||
}
|
||||
|
||||
log.Debugf("DIAG SyncPairedSelection: base=%q (selected=%v deselected=%v) paired=%q before(selected=%v deselected=%v) -> after(selected=%v deselected=%v)",
|
||||
baseID, baseSelected, baseDeselected, pairedID,
|
||||
pairedSelectedBefore, pairedDeselectedBefore,
|
||||
baseSelected, baseDeselected)
|
||||
}
|
||||
|
||||
// MarshalSummary returns a short human-readable description of the selector state for diagnostics.
|
||||
func (rs *RouteSelector) MarshalSummary() string {
|
||||
rs.mu.RLock()
|
||||
defer rs.mu.RUnlock()
|
||||
return fmt.Sprintf("deselectAll=%v selected=%v deselected=%v", rs.deselectAll, keysOf(rs.selectedRoutes), keysOf(rs.deselectedRoutes))
|
||||
}
|
||||
|
||||
// FilterSelected removes unselected routes from the provided map.
|
||||
@@ -186,24 +202,47 @@ func (rs *RouteSelector) FilterSelectedExitNodes(routes route.HAMap) route.HAMap
|
||||
return route.HAMap{}
|
||||
}
|
||||
|
||||
log.Debugf("DIAG FilterSelectedExitNodes: incoming %d networks, deselected=%v selected=%v deselectAll=%v",
|
||||
len(routes), keysOf(rs.deselectedRoutes), keysOf(rs.selectedRoutes), rs.deselectAll)
|
||||
|
||||
filtered := make(route.HAMap, len(routes))
|
||||
for id, rt := range routes {
|
||||
netID := id.NetID()
|
||||
if rs.isDeselectedLocked(netID) {
|
||||
log.Debugf("DIAG FilterSelectedExitNodes: SKIP id=%q netID=%q (literally deselected)", id, netID)
|
||||
continue
|
||||
}
|
||||
|
||||
if !isExitNode(rt) {
|
||||
log.Debugf("DIAG FilterSelectedExitNodes: KEEP id=%q netID=%q (not an exit node)", id, netID)
|
||||
filtered[id] = rt
|
||||
continue
|
||||
}
|
||||
|
||||
log.Debugf("DIAG FilterSelectedExitNodes: EXITNODE id=%q netID=%q -> applyExitNodeFilter", id, netID)
|
||||
rs.applyExitNodeFilter(id, netID, rt, filtered)
|
||||
}
|
||||
|
||||
log.Debugf("DIAG FilterSelectedExitNodes: result keeps %d networks: %v", len(filtered), haKeysOf(filtered))
|
||||
return filtered
|
||||
}
|
||||
|
||||
func keysOf(m map[route.NetID]struct{}) []route.NetID {
|
||||
out := make([]route.NetID, 0, len(m))
|
||||
for k := range m {
|
||||
out = append(out, k)
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func haKeysOf(m route.HAMap) []route.HAUniqueID {
|
||||
out := make([]route.HAUniqueID, 0, len(m))
|
||||
for k := range m {
|
||||
out = append(out, k)
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// MarshalJSON implements the json.Marshaler interface
|
||||
func (rs *RouteSelector) MarshalJSON() ([]byte, error) {
|
||||
rs.mu.RLock()
|
||||
@@ -286,15 +325,22 @@ func (rs *RouteSelector) applyExitNodeFilter(
|
||||
rt []*route.Route,
|
||||
out route.HAMap,
|
||||
) {
|
||||
log.Debugf("DIAG applyExitNodeFilter: id=%q netID=%q hasUserSel=%v isSelected=%v",
|
||||
id, netID, rs.hasUserSelectionForRouteLocked(netID), rs.isSelectedLocked(netID))
|
||||
if rs.hasUserSelectionForRouteLocked(netID) {
|
||||
if rs.isSelectedLocked(netID) {
|
||||
log.Debugf("DIAG applyExitNodeFilter: KEEP id=%q (netID %q is selected)", id, netID)
|
||||
out[id] = rt
|
||||
} else {
|
||||
log.Debugf("DIAG applyExitNodeFilter: DROP id=%q (netID %q is deselected)", id, netID)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// no explicit selection for this route: defer to management's SkipAutoApply flag
|
||||
sel := collectSelected(rt)
|
||||
log.Debugf("DIAG applyExitNodeFilter: no user selection for netID %q; SkipAutoApply filter kept %d/%d routes for id=%q",
|
||||
netID, len(sel), len(rt), id)
|
||||
if len(sel) > 0 {
|
||||
out[id] = sel
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user