mirror of
https://github.com/netbirdio/netbird.git
synced 2026-04-21 00:22:15 -04:00
Compare commits
13 Commits
coderabbit
...
refactor/f
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
5b09078da2 | ||
|
|
3098f48b25 | ||
|
|
3a52a3aed8 | ||
|
|
14279fce89 | ||
|
|
7f023ce801 | ||
|
|
e361126515 | ||
|
|
95213f7157 | ||
|
|
2e0e3a3601 | ||
|
|
d2c18fdb95 | ||
|
|
1bd68a523b | ||
|
|
2734a3356e | ||
|
|
c6d660df4e | ||
|
|
721251460c |
2
Makefile
2
Makefile
@@ -5,7 +5,7 @@ GOLANGCI_LINT := $(shell pwd)/bin/golangci-lint
|
||||
$(GOLANGCI_LINT):
|
||||
@echo "Installing golangci-lint..."
|
||||
@mkdir -p ./bin
|
||||
@GOBIN=$(shell pwd)/bin go install github.com/golangci/golangci-lint/cmd/golangci-lint@latest
|
||||
@GOBIN=$(shell pwd)/bin go install github.com/golangci/golangci-lint/v2/cmd/golangci-lint@latest
|
||||
|
||||
# Lint only changed files (fast, for pre-push)
|
||||
lint: $(GOLANGCI_LINT)
|
||||
|
||||
@@ -8,6 +8,7 @@ import (
|
||||
"os"
|
||||
"slices"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"golang.org/x/exp/maps"
|
||||
|
||||
@@ -15,6 +16,7 @@ import (
|
||||
|
||||
"github.com/netbirdio/netbird/client/iface/device"
|
||||
"github.com/netbirdio/netbird/client/internal"
|
||||
"github.com/netbirdio/netbird/client/internal/debug"
|
||||
"github.com/netbirdio/netbird/client/internal/dns"
|
||||
"github.com/netbirdio/netbird/client/internal/listener"
|
||||
"github.com/netbirdio/netbird/client/internal/peer"
|
||||
@@ -26,6 +28,7 @@ import (
|
||||
"github.com/netbirdio/netbird/formatter"
|
||||
"github.com/netbirdio/netbird/route"
|
||||
"github.com/netbirdio/netbird/shared/management/domain"
|
||||
types "github.com/netbirdio/netbird/upload-server/types"
|
||||
)
|
||||
|
||||
// ConnectionListener export internal Listener for mobile
|
||||
@@ -68,7 +71,30 @@ type Client struct {
|
||||
uiVersion string
|
||||
networkChangeListener listener.NetworkChangeListener
|
||||
|
||||
stateMu sync.RWMutex
|
||||
connectClient *internal.ConnectClient
|
||||
config *profilemanager.Config
|
||||
cacheDir string
|
||||
}
|
||||
|
||||
func (c *Client) setState(cfg *profilemanager.Config, cacheDir string, cc *internal.ConnectClient) {
|
||||
c.stateMu.Lock()
|
||||
defer c.stateMu.Unlock()
|
||||
c.config = cfg
|
||||
c.cacheDir = cacheDir
|
||||
c.connectClient = cc
|
||||
}
|
||||
|
||||
func (c *Client) stateSnapshot() (*profilemanager.Config, string, *internal.ConnectClient) {
|
||||
c.stateMu.RLock()
|
||||
defer c.stateMu.RUnlock()
|
||||
return c.config, c.cacheDir, c.connectClient
|
||||
}
|
||||
|
||||
func (c *Client) getConnectClient() *internal.ConnectClient {
|
||||
c.stateMu.RLock()
|
||||
defer c.stateMu.RUnlock()
|
||||
return c.connectClient
|
||||
}
|
||||
|
||||
// NewClient instantiate a new Client
|
||||
@@ -93,6 +119,7 @@ func (c *Client) Run(platformFiles PlatformFiles, urlOpener URLOpener, isAndroid
|
||||
|
||||
cfgFile := platformFiles.ConfigurationFilePath()
|
||||
stateFile := platformFiles.StateFilePath()
|
||||
cacheDir := platformFiles.CacheDir()
|
||||
|
||||
log.Infof("Starting client with config: %s, state: %s", cfgFile, stateFile)
|
||||
|
||||
@@ -124,8 +151,9 @@ func (c *Client) Run(platformFiles PlatformFiles, urlOpener URLOpener, isAndroid
|
||||
|
||||
// todo do not throw error in case of cancelled context
|
||||
ctx = internal.CtxInitState(ctx)
|
||||
c.connectClient = internal.NewConnectClient(ctx, cfg, c.recorder)
|
||||
return c.connectClient.RunOnAndroid(c.tunAdapter, c.iFaceDiscover, c.networkChangeListener, slices.Clone(dns.items), dnsReadyListener, stateFile)
|
||||
connectClient := internal.NewConnectClient(ctx, cfg, c.recorder)
|
||||
c.setState(cfg, cacheDir, connectClient)
|
||||
return connectClient.RunOnAndroid(c.tunAdapter, c.iFaceDiscover, c.networkChangeListener, slices.Clone(dns.items), dnsReadyListener, stateFile, cacheDir)
|
||||
}
|
||||
|
||||
// RunWithoutLogin we apply this type of run function when the backed has been started without UI (i.e. after reboot).
|
||||
@@ -135,6 +163,7 @@ func (c *Client) RunWithoutLogin(platformFiles PlatformFiles, dns *DNSList, dnsR
|
||||
|
||||
cfgFile := platformFiles.ConfigurationFilePath()
|
||||
stateFile := platformFiles.StateFilePath()
|
||||
cacheDir := platformFiles.CacheDir()
|
||||
|
||||
log.Infof("Starting client without login with config: %s, state: %s", cfgFile, stateFile)
|
||||
|
||||
@@ -157,8 +186,9 @@ func (c *Client) RunWithoutLogin(platformFiles PlatformFiles, dns *DNSList, dnsR
|
||||
|
||||
// todo do not throw error in case of cancelled context
|
||||
ctx = internal.CtxInitState(ctx)
|
||||
c.connectClient = internal.NewConnectClient(ctx, cfg, c.recorder)
|
||||
return c.connectClient.RunOnAndroid(c.tunAdapter, c.iFaceDiscover, c.networkChangeListener, slices.Clone(dns.items), dnsReadyListener, stateFile)
|
||||
connectClient := internal.NewConnectClient(ctx, cfg, c.recorder)
|
||||
c.setState(cfg, cacheDir, connectClient)
|
||||
return connectClient.RunOnAndroid(c.tunAdapter, c.iFaceDiscover, c.networkChangeListener, slices.Clone(dns.items), dnsReadyListener, stateFile, cacheDir)
|
||||
}
|
||||
|
||||
// Stop the internal client and free the resources
|
||||
@@ -173,11 +203,12 @@ func (c *Client) Stop() {
|
||||
}
|
||||
|
||||
func (c *Client) RenewTun(fd int) error {
|
||||
if c.connectClient == nil {
|
||||
cc := c.getConnectClient()
|
||||
if cc == nil {
|
||||
return fmt.Errorf("engine not running")
|
||||
}
|
||||
|
||||
e := c.connectClient.Engine()
|
||||
e := cc.Engine()
|
||||
if e == nil {
|
||||
return fmt.Errorf("engine not initialized")
|
||||
}
|
||||
@@ -185,6 +216,73 @@ func (c *Client) RenewTun(fd int) error {
|
||||
return e.RenewTun(fd)
|
||||
}
|
||||
|
||||
// DebugBundle generates a debug bundle, uploads it, and returns the upload key.
|
||||
// It works both with and without a running engine.
|
||||
func (c *Client) DebugBundle(platformFiles PlatformFiles, anonymize bool) (string, error) {
|
||||
cfg, cacheDir, cc := c.stateSnapshot()
|
||||
|
||||
// If the engine hasn't been started, load config from disk
|
||||
if cfg == nil {
|
||||
var err error
|
||||
cfg, err = profilemanager.UpdateOrCreateConfig(profilemanager.ConfigInput{
|
||||
ConfigPath: platformFiles.ConfigurationFilePath(),
|
||||
})
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("load config: %w", err)
|
||||
}
|
||||
cacheDir = platformFiles.CacheDir()
|
||||
}
|
||||
|
||||
deps := debug.GeneratorDependencies{
|
||||
InternalConfig: cfg,
|
||||
StatusRecorder: c.recorder,
|
||||
TempDir: cacheDir,
|
||||
}
|
||||
|
||||
if cc != nil {
|
||||
resp, err := cc.GetLatestSyncResponse()
|
||||
if err != nil {
|
||||
log.Warnf("get latest sync response: %v", err)
|
||||
}
|
||||
deps.SyncResponse = resp
|
||||
|
||||
if e := cc.Engine(); e != nil {
|
||||
if cm := e.GetClientMetrics(); cm != nil {
|
||||
deps.ClientMetrics = cm
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bundleGenerator := debug.NewBundleGenerator(
|
||||
deps,
|
||||
debug.BundleConfig{
|
||||
Anonymize: anonymize,
|
||||
IncludeSystemInfo: true,
|
||||
},
|
||||
)
|
||||
|
||||
path, err := bundleGenerator.Generate()
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("generate debug bundle: %w", err)
|
||||
}
|
||||
defer func() {
|
||||
if err := os.Remove(path); err != nil {
|
||||
log.Errorf("failed to remove debug bundle file: %v", err)
|
||||
}
|
||||
}()
|
||||
|
||||
uploadCtx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
|
||||
defer cancel()
|
||||
|
||||
key, err := debug.UploadDebugBundle(uploadCtx, types.DefaultBundleURL, cfg.ManagementURL.String(), path)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("upload debug bundle: %w", err)
|
||||
}
|
||||
|
||||
log.Infof("debug bundle uploaded with key %s", key)
|
||||
return key, nil
|
||||
}
|
||||
|
||||
// SetTraceLogLevel configure the logger to trace level
|
||||
func (c *Client) SetTraceLogLevel() {
|
||||
log.SetLevel(log.TraceLevel)
|
||||
@@ -214,12 +312,13 @@ func (c *Client) PeersList() *PeerInfoArray {
|
||||
}
|
||||
|
||||
func (c *Client) Networks() *NetworkArray {
|
||||
if c.connectClient == nil {
|
||||
cc := c.getConnectClient()
|
||||
if cc == nil {
|
||||
log.Error("not connected")
|
||||
return nil
|
||||
}
|
||||
|
||||
engine := c.connectClient.Engine()
|
||||
engine := cc.Engine()
|
||||
if engine == nil {
|
||||
log.Error("could not get engine")
|
||||
return nil
|
||||
@@ -300,7 +399,7 @@ func (c *Client) toggleRoute(command routeCommand) error {
|
||||
}
|
||||
|
||||
func (c *Client) getRouteManager() (routemanager.Manager, error) {
|
||||
client := c.connectClient
|
||||
client := c.getConnectClient()
|
||||
if client == nil {
|
||||
return nil, fmt.Errorf("not connected")
|
||||
}
|
||||
|
||||
@@ -7,4 +7,5 @@ package android
|
||||
type PlatformFiles interface {
|
||||
ConfigurationFilePath() string
|
||||
StateFilePath() string
|
||||
CacheDir() string
|
||||
}
|
||||
|
||||
@@ -217,7 +217,6 @@ func (w *WGIface) RemoveAllowedIP(peerKey string, allowedIP netip.Prefix) error
|
||||
// Close closes the tunnel interface
|
||||
func (w *WGIface) Close() error {
|
||||
w.mu.Lock()
|
||||
defer w.mu.Unlock()
|
||||
|
||||
var result *multierror.Error
|
||||
|
||||
@@ -225,7 +224,15 @@ func (w *WGIface) Close() error {
|
||||
result = multierror.Append(result, fmt.Errorf("failed to free WireGuard proxy: %w", err))
|
||||
}
|
||||
|
||||
if err := w.tun.Close(); err != nil {
|
||||
// Release w.mu before calling w.tun.Close(): the underlying
|
||||
// wireguard-go device.Close() waits for its send/receive goroutines
|
||||
// to drain. Some of those goroutines re-enter WGIface methods that
|
||||
// take w.mu (e.g. the packet filter DNS hook calls GetDevice()), so
|
||||
// holding the mutex here would deadlock the shutdown path.
|
||||
tun := w.tun
|
||||
w.mu.Unlock()
|
||||
|
||||
if err := tun.Close(); err != nil {
|
||||
result = multierror.Append(result, fmt.Errorf("failed to close wireguard interface %s: %w", w.Name(), err))
|
||||
}
|
||||
|
||||
|
||||
113
client/iface/iface_close_test.go
Normal file
113
client/iface/iface_close_test.go
Normal file
@@ -0,0 +1,113 @@
|
||||
//go:build !android
|
||||
|
||||
package iface
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"sync"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
wgdevice "golang.zx2c4.com/wireguard/device"
|
||||
"golang.zx2c4.com/wireguard/tun/netstack"
|
||||
|
||||
"github.com/netbirdio/netbird/client/iface/device"
|
||||
"github.com/netbirdio/netbird/client/iface/udpmux"
|
||||
"github.com/netbirdio/netbird/client/iface/wgaddr"
|
||||
"github.com/netbirdio/netbird/client/iface/wgproxy"
|
||||
)
|
||||
|
||||
// fakeTunDevice implements WGTunDevice and lets the test control when
|
||||
// Close() returns. It mimics the wireguard-go shutdown path, which blocks
|
||||
// until its goroutines drain. Some of those goroutines (e.g. the packet
|
||||
// filter DNS hook in client/internal/dns) call back into WGIface, so if
|
||||
// WGIface.Close() held w.mu across tun.Close() the shutdown would
|
||||
// deadlock.
|
||||
type fakeTunDevice struct {
|
||||
closeStarted chan struct{}
|
||||
unblockClose chan struct{}
|
||||
}
|
||||
|
||||
func (f *fakeTunDevice) Create() (device.WGConfigurer, error) {
|
||||
return nil, errors.New("not implemented")
|
||||
}
|
||||
func (f *fakeTunDevice) Up() (*udpmux.UniversalUDPMuxDefault, error) {
|
||||
return nil, errors.New("not implemented")
|
||||
}
|
||||
func (f *fakeTunDevice) UpdateAddr(wgaddr.Address) error { return nil }
|
||||
func (f *fakeTunDevice) WgAddress() wgaddr.Address { return wgaddr.Address{} }
|
||||
func (f *fakeTunDevice) MTU() uint16 { return DefaultMTU }
|
||||
func (f *fakeTunDevice) DeviceName() string { return "nb-close-test" }
|
||||
func (f *fakeTunDevice) FilteredDevice() *device.FilteredDevice { return nil }
|
||||
func (f *fakeTunDevice) Device() *wgdevice.Device { return nil }
|
||||
func (f *fakeTunDevice) GetNet() *netstack.Net { return nil }
|
||||
func (f *fakeTunDevice) GetICEBind() device.EndpointManager { return nil }
|
||||
|
||||
func (f *fakeTunDevice) Close() error {
|
||||
close(f.closeStarted)
|
||||
<-f.unblockClose
|
||||
return nil
|
||||
}
|
||||
|
||||
type fakeProxyFactory struct{}
|
||||
|
||||
func (fakeProxyFactory) GetProxy() wgproxy.Proxy { return nil }
|
||||
func (fakeProxyFactory) GetProxyPort() uint16 { return 0 }
|
||||
func (fakeProxyFactory) Free() error { return nil }
|
||||
|
||||
// TestWGIface_CloseReleasesMutexBeforeTunClose guards against a deadlock
|
||||
// that surfaces as a macOS test-timeout in
|
||||
// TestDNSPermanent_updateUpstream: WGIface.Close() used to hold w.mu
|
||||
// while waiting for the wireguard-go device goroutines to finish, and
|
||||
// one of those goroutines (the DNS filter hook) calls back into
|
||||
// WGIface.GetDevice() which needs the same mutex. The fix is to drop
|
||||
// the lock before tun.Close() returns control.
|
||||
func TestWGIface_CloseReleasesMutexBeforeTunClose(t *testing.T) {
|
||||
tun := &fakeTunDevice{
|
||||
closeStarted: make(chan struct{}),
|
||||
unblockClose: make(chan struct{}),
|
||||
}
|
||||
w := &WGIface{
|
||||
tun: tun,
|
||||
wgProxyFactory: fakeProxyFactory{},
|
||||
}
|
||||
|
||||
closeDone := make(chan error, 1)
|
||||
go func() {
|
||||
closeDone <- w.Close()
|
||||
}()
|
||||
|
||||
select {
|
||||
case <-tun.closeStarted:
|
||||
case <-time.After(2 * time.Second):
|
||||
close(tun.unblockClose)
|
||||
t.Fatal("tun.Close() was never invoked")
|
||||
}
|
||||
|
||||
// Simulate the WireGuard read goroutine calling back into WGIface
|
||||
// via the packet filter's DNS hook. If Close() still held w.mu
|
||||
// during tun.Close(), this would block until the test timeout.
|
||||
getDeviceDone := make(chan struct{})
|
||||
var wg sync.WaitGroup
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
_ = w.GetDevice()
|
||||
close(getDeviceDone)
|
||||
}()
|
||||
|
||||
select {
|
||||
case <-getDeviceDone:
|
||||
case <-time.After(2 * time.Second):
|
||||
close(tun.unblockClose)
|
||||
wg.Wait()
|
||||
t.Fatal("GetDevice() deadlocked while WGIface.Close was closing the tun")
|
||||
}
|
||||
|
||||
close(tun.unblockClose)
|
||||
select {
|
||||
case <-closeDone:
|
||||
case <-time.After(2 * time.Second):
|
||||
t.Fatal("WGIface.Close() never returned after the tun was unblocked")
|
||||
}
|
||||
}
|
||||
@@ -171,7 +171,7 @@ func (u *UDPConn) performFilterCheck(addr net.Addr) error {
|
||||
}
|
||||
|
||||
if u.address.Network.Contains(a) {
|
||||
log.Warnf("Address %s is part of the NetBird network %s, refusing to write", addr, u.address)
|
||||
log.Warnf("address %s is part of the NetBird network %s, refusing to write", addr, u.address)
|
||||
return fmt.Errorf("address %s is part of the NetBird network %s, refusing to write", addr, u.address)
|
||||
}
|
||||
|
||||
@@ -181,7 +181,7 @@ func (u *UDPConn) performFilterCheck(addr net.Addr) error {
|
||||
u.addrCache.Store(addr.String(), isRouted)
|
||||
if isRouted {
|
||||
// Extra log, as the error only shows up with ICE logging enabled
|
||||
log.Infof("Address %s is part of routed network %s, refusing to write", addr, prefix)
|
||||
log.Infof("address %s is part of routed network %s, refusing to write", addr, prefix)
|
||||
return fmt.Errorf("address %s is part of routed network %s, refusing to write", addr, prefix)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -94,6 +94,7 @@ func (c *ConnectClient) RunOnAndroid(
|
||||
dnsAddresses []netip.AddrPort,
|
||||
dnsReadyListener dns.ReadyListener,
|
||||
stateFilePath string,
|
||||
cacheDir string,
|
||||
) error {
|
||||
// in case of non Android os these variables will be nil
|
||||
mobileDependency := MobileDependency{
|
||||
@@ -103,6 +104,7 @@ func (c *ConnectClient) RunOnAndroid(
|
||||
HostDNSAddresses: dnsAddresses,
|
||||
DnsReadyListener: dnsReadyListener,
|
||||
StateFilePath: stateFilePath,
|
||||
TempDir: cacheDir,
|
||||
}
|
||||
return c.run(mobileDependency, nil, "")
|
||||
}
|
||||
@@ -338,6 +340,7 @@ func (c *ConnectClient) run(mobileDependency MobileDependency, runningChan chan
|
||||
log.Error(err)
|
||||
return wrapErr(err)
|
||||
}
|
||||
engineConfig.TempDir = mobileDependency.TempDir
|
||||
|
||||
relayManager := relayClient.NewManager(engineCtx, relayURLs, myPrivateKey.PublicKey().String(), engineConfig.MTU)
|
||||
c.statusRecorder.SetRelayMgr(relayManager)
|
||||
|
||||
@@ -16,7 +16,6 @@ import (
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"runtime/pprof"
|
||||
"slices"
|
||||
"sort"
|
||||
"strings"
|
||||
"time"
|
||||
@@ -31,7 +30,6 @@ import (
|
||||
"github.com/netbirdio/netbird/client/internal/updater/installer"
|
||||
nbstatus "github.com/netbirdio/netbird/client/status"
|
||||
mgmProto "github.com/netbirdio/netbird/shared/management/proto"
|
||||
"github.com/netbirdio/netbird/util"
|
||||
)
|
||||
|
||||
const readmeContent = `Netbird debug bundle
|
||||
@@ -234,6 +232,7 @@ type BundleGenerator struct {
|
||||
statusRecorder *peer.Status
|
||||
syncResponse *mgmProto.SyncResponse
|
||||
logPath string
|
||||
tempDir string
|
||||
cpuProfile []byte
|
||||
refreshStatus func() // Optional callback to refresh status before bundle generation
|
||||
clientMetrics MetricsExporter
|
||||
@@ -256,6 +255,7 @@ type GeneratorDependencies struct {
|
||||
StatusRecorder *peer.Status
|
||||
SyncResponse *mgmProto.SyncResponse
|
||||
LogPath string
|
||||
TempDir string // Directory for temporary bundle zip files. If empty, os.TempDir() is used.
|
||||
CPUProfile []byte
|
||||
RefreshStatus func() // Optional callback to refresh status before bundle generation
|
||||
ClientMetrics MetricsExporter
|
||||
@@ -275,6 +275,7 @@ func NewBundleGenerator(deps GeneratorDependencies, cfg BundleConfig) *BundleGen
|
||||
statusRecorder: deps.StatusRecorder,
|
||||
syncResponse: deps.SyncResponse,
|
||||
logPath: deps.LogPath,
|
||||
tempDir: deps.TempDir,
|
||||
cpuProfile: deps.CPUProfile,
|
||||
refreshStatus: deps.RefreshStatus,
|
||||
clientMetrics: deps.ClientMetrics,
|
||||
@@ -287,7 +288,7 @@ func NewBundleGenerator(deps GeneratorDependencies, cfg BundleConfig) *BundleGen
|
||||
|
||||
// Generate creates a debug bundle and returns the location.
|
||||
func (g *BundleGenerator) Generate() (resp string, err error) {
|
||||
bundlePath, err := os.CreateTemp("", "netbird.debug.*.zip")
|
||||
bundlePath, err := os.CreateTemp(g.tempDir, "netbird.debug.*.zip")
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("create zip file: %w", err)
|
||||
}
|
||||
@@ -373,15 +374,8 @@ func (g *BundleGenerator) createArchive() error {
|
||||
log.Errorf("failed to add wg show output: %v", err)
|
||||
}
|
||||
|
||||
if g.logPath != "" && !slices.Contains(util.SpecialLogs, g.logPath) {
|
||||
if err := g.addLogfile(); err != nil {
|
||||
log.Errorf("failed to add log file to debug bundle: %v", err)
|
||||
if err := g.trySystemdLogFallback(); err != nil {
|
||||
log.Errorf("failed to add systemd logs as fallback: %v", err)
|
||||
}
|
||||
}
|
||||
} else if err := g.trySystemdLogFallback(); err != nil {
|
||||
log.Errorf("failed to add systemd logs: %v", err)
|
||||
if err := g.addPlatformLog(); err != nil {
|
||||
log.Errorf("failed to add logs to debug bundle: %v", err)
|
||||
}
|
||||
|
||||
if err := g.addUpdateLogs(); err != nil {
|
||||
|
||||
41
client/internal/debug/debug_android.go
Normal file
41
client/internal/debug/debug_android.go
Normal file
@@ -0,0 +1,41 @@
|
||||
//go:build android
|
||||
|
||||
package debug
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"os/exec"
|
||||
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
func (g *BundleGenerator) addPlatformLog() error {
|
||||
cmd := exec.Command("/system/bin/logcat", "-d")
|
||||
stdout, err := cmd.StdoutPipe()
|
||||
if err != nil {
|
||||
return fmt.Errorf("logcat stdout pipe: %w", err)
|
||||
}
|
||||
|
||||
if err := cmd.Start(); err != nil {
|
||||
return fmt.Errorf("start logcat: %w", err)
|
||||
}
|
||||
|
||||
var logReader io.Reader = stdout
|
||||
if g.anonymize {
|
||||
var pw *io.PipeWriter
|
||||
logReader, pw = io.Pipe()
|
||||
go anonymizeLog(stdout, pw, g.anonymizer)
|
||||
}
|
||||
|
||||
if err := g.addFileToZip(logReader, "logcat.txt"); err != nil {
|
||||
return fmt.Errorf("add logcat to zip: %w", err)
|
||||
}
|
||||
|
||||
if err := cmd.Wait(); err != nil {
|
||||
return fmt.Errorf("wait logcat: %w", err)
|
||||
}
|
||||
|
||||
log.Debug("added logcat output to debug bundle")
|
||||
return nil
|
||||
}
|
||||
25
client/internal/debug/debug_nonandroid.go
Normal file
25
client/internal/debug/debug_nonandroid.go
Normal file
@@ -0,0 +1,25 @@
|
||||
//go:build !android
|
||||
|
||||
package debug
|
||||
|
||||
import (
|
||||
"slices"
|
||||
|
||||
log "github.com/sirupsen/logrus"
|
||||
|
||||
"github.com/netbirdio/netbird/util"
|
||||
)
|
||||
|
||||
func (g *BundleGenerator) addPlatformLog() error {
|
||||
if g.logPath != "" && !slices.Contains(util.SpecialLogs, g.logPath) {
|
||||
if err := g.addLogfile(); err != nil {
|
||||
log.Errorf("failed to add log file to debug bundle: %v", err)
|
||||
if err := g.trySystemdLogFallback(); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
} else if err := g.trySystemdLogFallback(); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
@@ -140,6 +140,7 @@ type EngineConfig struct {
|
||||
ProfileConfig *profilemanager.Config
|
||||
|
||||
LogPath string
|
||||
TempDir string
|
||||
}
|
||||
|
||||
// EngineServices holds the external service dependencies required by the Engine.
|
||||
@@ -569,7 +570,7 @@ func (e *Engine) Start(netbirdConfig *mgmProto.NetbirdConfig, mgmtURL *url.URL)
|
||||
e.connMgr.Start(e.ctx)
|
||||
|
||||
e.srWatcher = guard.NewSRWatcher(e.signal, e.relayManager, e.mobileDep.IFaceDiscover, iceCfg)
|
||||
e.srWatcher.Start()
|
||||
e.srWatcher.Start(peer.IsForceRelayed())
|
||||
|
||||
e.receiveSignalEvents()
|
||||
e.receiveManagementEvents()
|
||||
@@ -1095,6 +1096,7 @@ func (e *Engine) handleBundle(params *mgmProto.BundleParameters) (*mgmProto.JobR
|
||||
StatusRecorder: e.statusRecorder,
|
||||
SyncResponse: syncResponse,
|
||||
LogPath: e.config.LogPath,
|
||||
TempDir: e.config.TempDir,
|
||||
ClientMetrics: e.clientMetrics,
|
||||
RefreshStatus: func() {
|
||||
e.RunHealthProbes(true)
|
||||
|
||||
@@ -22,4 +22,8 @@ type MobileDependency struct {
|
||||
DnsManager dns.IosDnsManager
|
||||
FileDescriptor int32
|
||||
StateFilePath string
|
||||
|
||||
// TempDir is a writable directory for temporary files (e.g., debug bundle zip).
|
||||
// On Android, this should be set to the app's cache directory.
|
||||
TempDir string
|
||||
}
|
||||
|
||||
@@ -185,17 +185,20 @@ func (conn *Conn) Open(engineCtx context.Context) error {
|
||||
|
||||
conn.workerRelay = NewWorkerRelay(conn.ctx, conn.Log, isController(conn.config), conn.config, conn, conn.relayManager)
|
||||
|
||||
relayIsSupportedLocally := conn.workerRelay.RelayIsSupportedLocally()
|
||||
workerICE, err := NewWorkerICE(conn.ctx, conn.Log, conn.config, conn, conn.signaler, conn.iFaceDiscover, conn.statusRecorder, relayIsSupportedLocally)
|
||||
if err != nil {
|
||||
return err
|
||||
forceRelay := IsForceRelayed()
|
||||
if !forceRelay {
|
||||
relayIsSupportedLocally := conn.workerRelay.RelayIsSupportedLocally()
|
||||
workerICE, err := NewWorkerICE(conn.ctx, conn.Log, conn.config, conn, conn.signaler, conn.iFaceDiscover, conn.statusRecorder, relayIsSupportedLocally)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
conn.workerICE = workerICE
|
||||
}
|
||||
conn.workerICE = workerICE
|
||||
|
||||
conn.handshaker = NewHandshaker(conn.Log, conn.config, conn.signaler, conn.workerICE, conn.workerRelay, conn.metricsStages)
|
||||
|
||||
conn.handshaker.AddRelayListener(conn.workerRelay.OnNewOffer)
|
||||
if !isForceRelayed() {
|
||||
if !forceRelay {
|
||||
conn.handshaker.AddICEListener(conn.workerICE.OnNewOffer)
|
||||
}
|
||||
|
||||
@@ -251,7 +254,9 @@ func (conn *Conn) Close(signalToRemote bool) {
|
||||
conn.wgWatcherCancel()
|
||||
}
|
||||
conn.workerRelay.CloseConn()
|
||||
conn.workerICE.Close()
|
||||
if conn.workerICE != nil {
|
||||
conn.workerICE.Close()
|
||||
}
|
||||
|
||||
if conn.wgProxyRelay != nil {
|
||||
err := conn.wgProxyRelay.CloseConn()
|
||||
@@ -294,7 +299,9 @@ func (conn *Conn) OnRemoteAnswer(answer OfferAnswer) {
|
||||
// OnRemoteCandidate Handles ICE connection Candidate provided by the remote peer.
|
||||
func (conn *Conn) OnRemoteCandidate(candidate ice.Candidate, haRoutes route.HAMap) {
|
||||
conn.dumpState.RemoteCandidate()
|
||||
conn.workerICE.OnRemoteCandidate(candidate, haRoutes)
|
||||
if conn.workerICE != nil {
|
||||
conn.workerICE.OnRemoteCandidate(candidate, haRoutes)
|
||||
}
|
||||
}
|
||||
|
||||
// SetOnConnected sets a handler function to be triggered by Conn when a new connection to a remote peer established
|
||||
@@ -712,33 +719,35 @@ func (conn *Conn) evalStatus() ConnStatus {
|
||||
return StatusConnecting
|
||||
}
|
||||
|
||||
func (conn *Conn) isConnectedOnAllWay() (connected bool) {
|
||||
// would be better to protect this with a mutex, but it could cause deadlock with Close function
|
||||
|
||||
// isConnectedOnAllWay evaluates the overall connection status based on ICE and Relay transports.
|
||||
//
|
||||
// The result is a tri-state:
|
||||
// - ConnStatusConnected: all available transports are up
|
||||
// - ConnStatusPartiallyConnected: relay is up but ICE is still pending/reconnecting
|
||||
// - ConnStatusDisconnected: no working transport
|
||||
func (conn *Conn) isConnectedOnAllWay() (status guard.ConnStatus) {
|
||||
defer func() {
|
||||
if !connected {
|
||||
if status == guard.ConnStatusDisconnected {
|
||||
conn.logTraceConnState()
|
||||
}
|
||||
}()
|
||||
|
||||
// For JS platform: only relay connection is supported
|
||||
if runtime.GOOS == "js" {
|
||||
return conn.statusRelay.Get() == worker.StatusConnected
|
||||
iceWorkerCreated := conn.workerICE != nil
|
||||
|
||||
var iceInProgress bool
|
||||
if iceWorkerCreated {
|
||||
iceInProgress = conn.workerICE.InProgress()
|
||||
}
|
||||
|
||||
// For non-JS platforms: check ICE connection status
|
||||
if conn.statusICE.Get() == worker.StatusDisconnected && !conn.workerICE.InProgress() {
|
||||
return false
|
||||
}
|
||||
|
||||
// If relay is supported with peer, it must also be connected
|
||||
if conn.workerRelay.IsRelayConnectionSupportedWithPeer() {
|
||||
if conn.statusRelay.Get() == worker.StatusDisconnected {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
return true
|
||||
return evalConnStatus(connStatusInputs{
|
||||
forceRelay: IsForceRelayed(),
|
||||
peerUsesRelay: conn.workerRelay.IsRelayConnectionSupportedWithPeer(),
|
||||
relayConnected: conn.statusRelay.Get() == worker.StatusConnected,
|
||||
remoteSupportsICE: conn.handshaker.RemoteICESupported(),
|
||||
iceWorkerCreated: iceWorkerCreated,
|
||||
iceStatusConnecting: conn.statusICE.Get() != worker.StatusDisconnected,
|
||||
iceInProgress: iceInProgress,
|
||||
})
|
||||
}
|
||||
|
||||
func (conn *Conn) enableWgWatcherIfNeeded(enabledTime time.Time) {
|
||||
@@ -926,3 +935,43 @@ func isController(config ConnConfig) bool {
|
||||
func isRosenpassEnabled(remoteRosenpassPubKey []byte) bool {
|
||||
return remoteRosenpassPubKey != nil
|
||||
}
|
||||
|
||||
func evalConnStatus(in connStatusInputs) guard.ConnStatus {
|
||||
// "Relay up and needed" — the peer uses relay and the transport is connected.
|
||||
relayUsedAndUp := in.peerUsesRelay && in.relayConnected
|
||||
|
||||
// Force-relay mode: ICE never runs. Relay is the only transport and must be up.
|
||||
if in.forceRelay {
|
||||
return boolToConnStatus(relayUsedAndUp)
|
||||
}
|
||||
|
||||
// Remote peer doesn't support ICE, or we haven't created the worker yet:
|
||||
// relay is the only possible transport.
|
||||
if !in.remoteSupportsICE || !in.iceWorkerCreated {
|
||||
return boolToConnStatus(relayUsedAndUp)
|
||||
}
|
||||
|
||||
// ICE counts as "up" when the status is anything other than Disconnected, OR
|
||||
// when a negotiation is currently in progress (so we don't spam offers while one is in flight).
|
||||
iceUp := in.iceStatusConnecting || in.iceInProgress
|
||||
|
||||
// Relay side is acceptable if the peer doesn't rely on relay, or relay is connected.
|
||||
relayOK := !in.peerUsesRelay || in.relayConnected
|
||||
|
||||
switch {
|
||||
case iceUp && relayOK:
|
||||
return guard.ConnStatusConnected
|
||||
case relayUsedAndUp:
|
||||
// Relay is up but ICE is down — partially connected.
|
||||
return guard.ConnStatusPartiallyConnected
|
||||
default:
|
||||
return guard.ConnStatusDisconnected
|
||||
}
|
||||
}
|
||||
|
||||
func boolToConnStatus(connected bool) guard.ConnStatus {
|
||||
if connected {
|
||||
return guard.ConnStatusConnected
|
||||
}
|
||||
return guard.ConnStatusDisconnected
|
||||
}
|
||||
|
||||
@@ -13,6 +13,20 @@ const (
|
||||
StatusConnected
|
||||
)
|
||||
|
||||
// connStatusInputs is the primitive-valued snapshot of the state that drives the
|
||||
// tri-state connection classification. Extracted so the decision logic can be unit-tested
|
||||
// without constructing full Worker/Handshaker objects.
|
||||
type connStatusInputs struct {
|
||||
forceRelay bool // NB_FORCE_RELAY or JS/WASM
|
||||
peerUsesRelay bool // remote peer advertises relay support AND local has relay
|
||||
relayConnected bool // statusRelay reports Connected (independent of whether peer uses relay)
|
||||
remoteSupportsICE bool // remote peer sent ICE credentials
|
||||
iceWorkerCreated bool // local WorkerICE exists (false in force-relay mode)
|
||||
iceStatusConnecting bool // statusICE is anything other than Disconnected
|
||||
iceInProgress bool // a negotiation is currently in flight
|
||||
}
|
||||
|
||||
|
||||
// ConnStatus describe the status of a peer's connection
|
||||
type ConnStatus int32
|
||||
|
||||
|
||||
201
client/internal/peer/conn_status_eval_test.go
Normal file
201
client/internal/peer/conn_status_eval_test.go
Normal file
@@ -0,0 +1,201 @@
|
||||
package peer
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/netbirdio/netbird/client/internal/peer/guard"
|
||||
)
|
||||
|
||||
func TestEvalConnStatus_ForceRelay(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
in connStatusInputs
|
||||
want guard.ConnStatus
|
||||
}{
|
||||
{
|
||||
name: "force relay, peer uses relay, relay up",
|
||||
in: connStatusInputs{
|
||||
forceRelay: true,
|
||||
peerUsesRelay: true,
|
||||
relayConnected: true,
|
||||
},
|
||||
want: guard.ConnStatusConnected,
|
||||
},
|
||||
{
|
||||
name: "force relay, peer uses relay, relay down",
|
||||
in: connStatusInputs{
|
||||
forceRelay: true,
|
||||
peerUsesRelay: true,
|
||||
relayConnected: false,
|
||||
},
|
||||
want: guard.ConnStatusDisconnected,
|
||||
},
|
||||
{
|
||||
name: "force relay, peer does NOT use relay - disconnected forever",
|
||||
in: connStatusInputs{
|
||||
forceRelay: true,
|
||||
peerUsesRelay: false,
|
||||
relayConnected: true,
|
||||
},
|
||||
want: guard.ConnStatusDisconnected,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range tests {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
if got := evalConnStatus(tc.in); got != tc.want {
|
||||
t.Fatalf("evalConnStatus = %v, want %v", got, tc.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestEvalConnStatus_ICEUnavailable(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
in connStatusInputs
|
||||
want guard.ConnStatus
|
||||
}{
|
||||
{
|
||||
name: "remote does not support ICE, peer uses relay, relay up",
|
||||
in: connStatusInputs{
|
||||
peerUsesRelay: true,
|
||||
relayConnected: true,
|
||||
remoteSupportsICE: false,
|
||||
iceWorkerCreated: true,
|
||||
},
|
||||
want: guard.ConnStatusConnected,
|
||||
},
|
||||
{
|
||||
name: "remote does not support ICE, peer uses relay, relay down",
|
||||
in: connStatusInputs{
|
||||
peerUsesRelay: true,
|
||||
relayConnected: false,
|
||||
remoteSupportsICE: false,
|
||||
iceWorkerCreated: true,
|
||||
},
|
||||
want: guard.ConnStatusDisconnected,
|
||||
},
|
||||
{
|
||||
name: "ICE worker not yet created, relay up",
|
||||
in: connStatusInputs{
|
||||
peerUsesRelay: true,
|
||||
relayConnected: true,
|
||||
remoteSupportsICE: true,
|
||||
iceWorkerCreated: false,
|
||||
},
|
||||
want: guard.ConnStatusConnected,
|
||||
},
|
||||
{
|
||||
name: "remote does not support ICE, peer does not use relay",
|
||||
in: connStatusInputs{
|
||||
peerUsesRelay: false,
|
||||
relayConnected: false,
|
||||
remoteSupportsICE: false,
|
||||
iceWorkerCreated: true,
|
||||
},
|
||||
want: guard.ConnStatusDisconnected,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range tests {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
if got := evalConnStatus(tc.in); got != tc.want {
|
||||
t.Fatalf("evalConnStatus = %v, want %v", got, tc.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestEvalConnStatus_FullyAvailable(t *testing.T) {
|
||||
base := connStatusInputs{
|
||||
remoteSupportsICE: true,
|
||||
iceWorkerCreated: true,
|
||||
}
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
mutator func(*connStatusInputs)
|
||||
want guard.ConnStatus
|
||||
}{
|
||||
{
|
||||
name: "ICE connected, relay connected, peer uses relay",
|
||||
mutator: func(in *connStatusInputs) {
|
||||
in.peerUsesRelay = true
|
||||
in.relayConnected = true
|
||||
in.iceStatusConnecting = true
|
||||
},
|
||||
want: guard.ConnStatusConnected,
|
||||
},
|
||||
{
|
||||
name: "ICE connected, peer does NOT use relay",
|
||||
mutator: func(in *connStatusInputs) {
|
||||
in.peerUsesRelay = false
|
||||
in.relayConnected = false
|
||||
in.iceStatusConnecting = true
|
||||
},
|
||||
want: guard.ConnStatusConnected,
|
||||
},
|
||||
{
|
||||
name: "ICE InProgress only, peer does NOT use relay",
|
||||
mutator: func(in *connStatusInputs) {
|
||||
in.peerUsesRelay = false
|
||||
in.iceStatusConnecting = false
|
||||
in.iceInProgress = true
|
||||
},
|
||||
want: guard.ConnStatusConnected,
|
||||
},
|
||||
{
|
||||
name: "ICE down, relay up, peer uses relay -> partial",
|
||||
mutator: func(in *connStatusInputs) {
|
||||
in.peerUsesRelay = true
|
||||
in.relayConnected = true
|
||||
in.iceStatusConnecting = false
|
||||
in.iceInProgress = false
|
||||
},
|
||||
want: guard.ConnStatusPartiallyConnected,
|
||||
},
|
||||
{
|
||||
name: "ICE down, peer does NOT use relay -> disconnected",
|
||||
mutator: func(in *connStatusInputs) {
|
||||
in.peerUsesRelay = false
|
||||
in.relayConnected = false
|
||||
in.iceStatusConnecting = false
|
||||
in.iceInProgress = false
|
||||
},
|
||||
want: guard.ConnStatusDisconnected,
|
||||
},
|
||||
{
|
||||
name: "ICE up, peer uses relay but relay down -> partial (relay required, ICE ignored)",
|
||||
mutator: func(in *connStatusInputs) {
|
||||
in.peerUsesRelay = true
|
||||
in.relayConnected = false
|
||||
in.iceStatusConnecting = true
|
||||
},
|
||||
// relayOK = false (peer uses relay but it's down), iceUp = true
|
||||
// first switch arm fails (relayOK false), relayUsedAndUp = false (relay down),
|
||||
// falls into default: Disconnected.
|
||||
want: guard.ConnStatusDisconnected,
|
||||
},
|
||||
{
|
||||
name: "ICE down, relay up but peer does not use relay -> disconnected",
|
||||
mutator: func(in *connStatusInputs) {
|
||||
in.peerUsesRelay = false
|
||||
in.relayConnected = true // not actually used since peer doesn't rely on it
|
||||
in.iceStatusConnecting = false
|
||||
in.iceInProgress = false
|
||||
},
|
||||
want: guard.ConnStatusDisconnected,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range tests {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
in := base
|
||||
tc.mutator(&in)
|
||||
if got := evalConnStatus(in); got != tc.want {
|
||||
t.Fatalf("evalConnStatus = %v, want %v (inputs: %+v)", got, tc.want, in)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -10,7 +10,7 @@ const (
|
||||
EnvKeyNBForceRelay = "NB_FORCE_RELAY"
|
||||
)
|
||||
|
||||
func isForceRelayed() bool {
|
||||
func IsForceRelayed() bool {
|
||||
if runtime.GOOS == "js" {
|
||||
return true
|
||||
}
|
||||
|
||||
@@ -8,7 +8,19 @@ import (
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
type isConnectedFunc func() bool
|
||||
// ConnStatus represents the connection state as seen by the guard.
|
||||
type ConnStatus int
|
||||
|
||||
const (
|
||||
// ConnStatusDisconnected means neither ICE nor Relay is connected.
|
||||
ConnStatusDisconnected ConnStatus = iota
|
||||
// ConnStatusPartiallyConnected means Relay is connected but ICE is not.
|
||||
ConnStatusPartiallyConnected
|
||||
// ConnStatusConnected means all required connections are established.
|
||||
ConnStatusConnected
|
||||
)
|
||||
|
||||
type connStatusFunc func() ConnStatus
|
||||
|
||||
// Guard is responsible for the reconnection logic.
|
||||
// It will trigger to send an offer to the peer then has connection issues.
|
||||
@@ -20,14 +32,14 @@ type isConnectedFunc func() bool
|
||||
// - ICE candidate changes
|
||||
type Guard struct {
|
||||
log *log.Entry
|
||||
isConnectedOnAllWay isConnectedFunc
|
||||
isConnectedOnAllWay connStatusFunc
|
||||
timeout time.Duration
|
||||
srWatcher *SRWatcher
|
||||
relayedConnDisconnected chan struct{}
|
||||
iCEConnDisconnected chan struct{}
|
||||
}
|
||||
|
||||
func NewGuard(log *log.Entry, isConnectedFn isConnectedFunc, timeout time.Duration, srWatcher *SRWatcher) *Guard {
|
||||
func NewGuard(log *log.Entry, isConnectedFn connStatusFunc, timeout time.Duration, srWatcher *SRWatcher) *Guard {
|
||||
return &Guard{
|
||||
log: log,
|
||||
isConnectedOnAllWay: isConnectedFn,
|
||||
@@ -57,8 +69,17 @@ func (g *Guard) SetICEConnDisconnected() {
|
||||
}
|
||||
}
|
||||
|
||||
// reconnectLoopWithRetry periodically check the connection status.
|
||||
// Try to send offer while the P2P is not established or while the Relay is not connected if is it supported
|
||||
// reconnectLoopWithRetry periodically checks the connection status and sends offers to re-establish connectivity.
|
||||
//
|
||||
// Behavior depends on the connection state reported by isConnectedOnAllWay:
|
||||
// - Connected: no action, the peer is fully reachable.
|
||||
// - Disconnected (neither ICE nor Relay): retries aggressively with exponential backoff (800ms doubling
|
||||
// up to timeout), never gives up. This ensures rapid recovery when the peer has no connectivity at all.
|
||||
// - PartiallyConnected (Relay up, ICE not): retries up to 3 times with exponential backoff, then switches
|
||||
// to one attempt per hour. This limits signaling traffic when relay already provides connectivity.
|
||||
//
|
||||
// External events (relay/ICE disconnect, signal/relay reconnect, candidate changes) reset the retry
|
||||
// counter and backoff ticker, giving ICE a fresh chance after network conditions change.
|
||||
func (g *Guard) reconnectLoopWithRetry(ctx context.Context, callback func()) {
|
||||
srReconnectedChan := g.srWatcher.NewListener()
|
||||
defer g.srWatcher.RemoveListener(srReconnectedChan)
|
||||
@@ -68,36 +89,47 @@ func (g *Guard) reconnectLoopWithRetry(ctx context.Context, callback func()) {
|
||||
|
||||
tickerChannel := ticker.C
|
||||
|
||||
iceState := &iceRetryState{log: g.log}
|
||||
defer iceState.reset()
|
||||
|
||||
for {
|
||||
select {
|
||||
case t := <-tickerChannel:
|
||||
if t.IsZero() {
|
||||
g.log.Infof("retry timed out, stop periodic offer sending")
|
||||
// after backoff timeout the ticker.C will be closed. We need to a dummy channel to avoid loop
|
||||
tickerChannel = make(<-chan time.Time)
|
||||
continue
|
||||
case <-tickerChannel:
|
||||
switch g.isConnectedOnAllWay() {
|
||||
case ConnStatusConnected:
|
||||
// all good, nothing to do
|
||||
case ConnStatusDisconnected:
|
||||
callback()
|
||||
case ConnStatusPartiallyConnected:
|
||||
if iceState.shouldRetry() {
|
||||
callback()
|
||||
} else {
|
||||
iceState.enterHourlyMode()
|
||||
ticker.Stop()
|
||||
tickerChannel = iceState.hourlyC()
|
||||
}
|
||||
}
|
||||
|
||||
if !g.isConnectedOnAllWay() {
|
||||
callback()
|
||||
}
|
||||
case <-g.relayedConnDisconnected:
|
||||
g.log.Debugf("Relay connection changed, reset reconnection ticker")
|
||||
ticker.Stop()
|
||||
ticker = g.prepareExponentTicker(ctx)
|
||||
ticker = g.newReconnectTicker(ctx)
|
||||
tickerChannel = ticker.C
|
||||
iceState.reset()
|
||||
|
||||
case <-g.iCEConnDisconnected:
|
||||
g.log.Debugf("ICE connection changed, reset reconnection ticker")
|
||||
ticker.Stop()
|
||||
ticker = g.prepareExponentTicker(ctx)
|
||||
ticker = g.newReconnectTicker(ctx)
|
||||
tickerChannel = ticker.C
|
||||
iceState.reset()
|
||||
|
||||
case <-srReconnectedChan:
|
||||
g.log.Debugf("has network changes, reset reconnection ticker")
|
||||
ticker.Stop()
|
||||
ticker = g.prepareExponentTicker(ctx)
|
||||
ticker = g.newReconnectTicker(ctx)
|
||||
tickerChannel = ticker.C
|
||||
iceState.reset()
|
||||
|
||||
case <-ctx.Done():
|
||||
g.log.Debugf("context is done, stop reconnect loop")
|
||||
@@ -120,7 +152,7 @@ func (g *Guard) initialTicker(ctx context.Context) *backoff.Ticker {
|
||||
return backoff.NewTicker(bo)
|
||||
}
|
||||
|
||||
func (g *Guard) prepareExponentTicker(ctx context.Context) *backoff.Ticker {
|
||||
func (g *Guard) newReconnectTicker(ctx context.Context) *backoff.Ticker {
|
||||
bo := backoff.WithContext(&backoff.ExponentialBackOff{
|
||||
InitialInterval: 800 * time.Millisecond,
|
||||
RandomizationFactor: 0.1,
|
||||
|
||||
61
client/internal/peer/guard/ice_retry_state.go
Normal file
61
client/internal/peer/guard/ice_retry_state.go
Normal file
@@ -0,0 +1,61 @@
|
||||
package guard
|
||||
|
||||
import (
|
||||
"time"
|
||||
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
const (
|
||||
// maxICERetries is the maximum number of ICE offer attempts when relay is connected
|
||||
maxICERetries = 3
|
||||
// iceRetryInterval is the periodic retry interval after ICE retries are exhausted
|
||||
iceRetryInterval = 1 * time.Hour
|
||||
)
|
||||
|
||||
// iceRetryState tracks the limited ICE retry attempts when relay is already connected.
|
||||
// After maxICERetries attempts it switches to a periodic hourly retry.
|
||||
type iceRetryState struct {
|
||||
log *log.Entry
|
||||
retries int
|
||||
hourly *time.Ticker
|
||||
}
|
||||
|
||||
func (s *iceRetryState) reset() {
|
||||
s.retries = 0
|
||||
if s.hourly != nil {
|
||||
s.hourly.Stop()
|
||||
s.hourly = nil
|
||||
}
|
||||
}
|
||||
|
||||
// shouldRetry reports whether the caller should send another ICE offer on this tick.
|
||||
// Returns false when the per-cycle retry budget is exhausted and the caller must switch
|
||||
// to the hourly ticker via enterHourlyMode + hourlyC.
|
||||
func (s *iceRetryState) shouldRetry() bool {
|
||||
if s.hourly != nil {
|
||||
s.log.Debugf("hourly ICE retry attempt")
|
||||
return true
|
||||
}
|
||||
|
||||
s.retries++
|
||||
if s.retries <= maxICERetries {
|
||||
s.log.Debugf("ICE retry attempt %d/%d", s.retries, maxICERetries)
|
||||
return true
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
// enterHourlyMode starts the hourly retry ticker. Must be called after shouldRetry returns false.
|
||||
func (s *iceRetryState) enterHourlyMode() {
|
||||
s.log.Infof("ICE retries exhausted (%d/%d), switching to hourly retry", maxICERetries, maxICERetries)
|
||||
s.hourly = time.NewTicker(iceRetryInterval)
|
||||
}
|
||||
|
||||
func (s *iceRetryState) hourlyC() <-chan time.Time {
|
||||
if s.hourly == nil {
|
||||
return nil
|
||||
}
|
||||
return s.hourly.C
|
||||
}
|
||||
103
client/internal/peer/guard/ice_retry_state_test.go
Normal file
103
client/internal/peer/guard/ice_retry_state_test.go
Normal file
@@ -0,0 +1,103 @@
|
||||
package guard
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
func newTestRetryState() *iceRetryState {
|
||||
return &iceRetryState{log: log.NewEntry(log.StandardLogger())}
|
||||
}
|
||||
|
||||
func TestICERetryState_AllowsInitialBudget(t *testing.T) {
|
||||
s := newTestRetryState()
|
||||
|
||||
for i := 1; i <= maxICERetries; i++ {
|
||||
if !s.shouldRetry() {
|
||||
t.Fatalf("shouldRetry returned false on attempt %d, want true (budget = %d)", i, maxICERetries)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestICERetryState_ExhaustsAfterBudget(t *testing.T) {
|
||||
s := newTestRetryState()
|
||||
|
||||
for i := 0; i < maxICERetries; i++ {
|
||||
_ = s.shouldRetry()
|
||||
}
|
||||
|
||||
if s.shouldRetry() {
|
||||
t.Fatalf("shouldRetry returned true after budget exhausted, want false")
|
||||
}
|
||||
}
|
||||
|
||||
func TestICERetryState_HourlyCNilBeforeEnterHourlyMode(t *testing.T) {
|
||||
s := newTestRetryState()
|
||||
|
||||
if s.hourlyC() != nil {
|
||||
t.Fatalf("hourlyC returned non-nil channel before enterHourlyMode")
|
||||
}
|
||||
}
|
||||
|
||||
func TestICERetryState_EnterHourlyModeArmsTicker(t *testing.T) {
|
||||
s := newTestRetryState()
|
||||
for i := 0; i < maxICERetries+1; i++ {
|
||||
_ = s.shouldRetry()
|
||||
}
|
||||
|
||||
s.enterHourlyMode()
|
||||
defer s.reset()
|
||||
|
||||
if s.hourlyC() == nil {
|
||||
t.Fatalf("hourlyC returned nil after enterHourlyMode")
|
||||
}
|
||||
}
|
||||
|
||||
func TestICERetryState_ShouldRetryTrueInHourlyMode(t *testing.T) {
|
||||
s := newTestRetryState()
|
||||
s.enterHourlyMode()
|
||||
defer s.reset()
|
||||
|
||||
if !s.shouldRetry() {
|
||||
t.Fatalf("shouldRetry returned false in hourly mode, want true")
|
||||
}
|
||||
|
||||
// Subsequent calls also return true — we keep retrying on each hourly tick.
|
||||
if !s.shouldRetry() {
|
||||
t.Fatalf("second shouldRetry returned false in hourly mode, want true")
|
||||
}
|
||||
}
|
||||
|
||||
func TestICERetryState_ResetRestoresBudget(t *testing.T) {
|
||||
s := newTestRetryState()
|
||||
for i := 0; i < maxICERetries+1; i++ {
|
||||
_ = s.shouldRetry()
|
||||
}
|
||||
s.enterHourlyMode()
|
||||
|
||||
s.reset()
|
||||
|
||||
if s.hourlyC() != nil {
|
||||
t.Fatalf("hourlyC returned non-nil channel after reset")
|
||||
}
|
||||
if s.retries != 0 {
|
||||
t.Fatalf("retries = %d after reset, want 0", s.retries)
|
||||
}
|
||||
|
||||
for i := 1; i <= maxICERetries; i++ {
|
||||
if !s.shouldRetry() {
|
||||
t.Fatalf("shouldRetry returned false on attempt %d after reset, want true", i)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestICERetryState_ResetIsIdempotent(t *testing.T) {
|
||||
s := newTestRetryState()
|
||||
s.reset()
|
||||
s.reset() // second call must not panic or re-stop a nil ticker
|
||||
|
||||
if s.hourlyC() != nil {
|
||||
t.Fatalf("hourlyC non-nil after double reset")
|
||||
}
|
||||
}
|
||||
@@ -39,7 +39,7 @@ func NewSRWatcher(signalClient chNotifier, relayManager chNotifier, iFaceDiscove
|
||||
return srw
|
||||
}
|
||||
|
||||
func (w *SRWatcher) Start() {
|
||||
func (w *SRWatcher) Start(disableICEMonitor bool) {
|
||||
w.mu.Lock()
|
||||
defer w.mu.Unlock()
|
||||
|
||||
@@ -50,8 +50,10 @@ func (w *SRWatcher) Start() {
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
w.cancelIceMonitor = cancel
|
||||
|
||||
iceMonitor := NewICEMonitor(w.iFaceDiscover, w.iceConfig, GetICEMonitorPeriod())
|
||||
go iceMonitor.Start(ctx, w.onICEChanged)
|
||||
if !disableICEMonitor {
|
||||
iceMonitor := NewICEMonitor(w.iFaceDiscover, w.iceConfig, GetICEMonitorPeriod())
|
||||
go iceMonitor.Start(ctx, w.onICEChanged)
|
||||
}
|
||||
w.signalClient.SetOnReconnectedListener(w.onReconnected)
|
||||
w.relayManager.SetOnReconnectedListener(w.onReconnected)
|
||||
|
||||
|
||||
@@ -4,6 +4,7 @@ import (
|
||||
"context"
|
||||
"errors"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
|
||||
log "github.com/sirupsen/logrus"
|
||||
|
||||
@@ -59,6 +60,10 @@ type Handshaker struct {
|
||||
relayListener *AsyncOfferListener
|
||||
iceListener func(remoteOfferAnswer *OfferAnswer)
|
||||
|
||||
// remoteICESupported tracks whether the remote peer includes ICE credentials in its offers/answers.
|
||||
// When false, the local side skips ICE listener dispatch and suppresses ICE credentials in responses.
|
||||
remoteICESupported atomic.Bool
|
||||
|
||||
// remoteOffersCh is a channel used to wait for remote credentials to proceed with the connection
|
||||
remoteOffersCh chan OfferAnswer
|
||||
// remoteAnswerCh is a channel used to wait for remote credentials answer (confirmation of our offer) to proceed with the connection
|
||||
@@ -66,7 +71,7 @@ type Handshaker struct {
|
||||
}
|
||||
|
||||
func NewHandshaker(log *log.Entry, config ConnConfig, signaler *Signaler, ice *WorkerICE, relay *WorkerRelay, metricsStages *MetricsStages) *Handshaker {
|
||||
return &Handshaker{
|
||||
h := &Handshaker{
|
||||
log: log,
|
||||
config: config,
|
||||
signaler: signaler,
|
||||
@@ -76,6 +81,13 @@ func NewHandshaker(log *log.Entry, config ConnConfig, signaler *Signaler, ice *W
|
||||
remoteOffersCh: make(chan OfferAnswer),
|
||||
remoteAnswerCh: make(chan OfferAnswer),
|
||||
}
|
||||
// assume remote supports ICE until we learn otherwise from received offers
|
||||
h.remoteICESupported.Store(ice != nil)
|
||||
return h
|
||||
}
|
||||
|
||||
func (h *Handshaker) RemoteICESupported() bool {
|
||||
return h.remoteICESupported.Load()
|
||||
}
|
||||
|
||||
func (h *Handshaker) AddRelayListener(offer func(remoteOfferAnswer *OfferAnswer)) {
|
||||
@@ -97,11 +109,13 @@ func (h *Handshaker) Listen(ctx context.Context) {
|
||||
h.metricsStages.RecordSignalingReceived()
|
||||
}
|
||||
|
||||
h.updateRemoteICEState(&remoteOfferAnswer)
|
||||
|
||||
if h.relayListener != nil {
|
||||
h.relayListener.Notify(&remoteOfferAnswer)
|
||||
}
|
||||
|
||||
if h.iceListener != nil {
|
||||
if h.iceListener != nil && h.RemoteICESupported() {
|
||||
h.iceListener(&remoteOfferAnswer)
|
||||
}
|
||||
|
||||
@@ -117,11 +131,13 @@ func (h *Handshaker) Listen(ctx context.Context) {
|
||||
h.metricsStages.RecordSignalingReceived()
|
||||
}
|
||||
|
||||
h.updateRemoteICEState(&remoteOfferAnswer)
|
||||
|
||||
if h.relayListener != nil {
|
||||
h.relayListener.Notify(&remoteOfferAnswer)
|
||||
}
|
||||
|
||||
if h.iceListener != nil {
|
||||
if h.iceListener != nil && h.RemoteICESupported() {
|
||||
h.iceListener(&remoteOfferAnswer)
|
||||
}
|
||||
case <-ctx.Done():
|
||||
@@ -183,15 +199,18 @@ func (h *Handshaker) sendAnswer() error {
|
||||
}
|
||||
|
||||
func (h *Handshaker) buildOfferAnswer() OfferAnswer {
|
||||
uFrag, pwd := h.ice.GetLocalUserCredentials()
|
||||
sid := h.ice.SessionID()
|
||||
answer := OfferAnswer{
|
||||
IceCredentials: IceCredentials{uFrag, pwd},
|
||||
WgListenPort: h.config.LocalWgPort,
|
||||
Version: version.NetbirdVersion(),
|
||||
RosenpassPubKey: h.config.RosenpassConfig.PubKey,
|
||||
RosenpassAddr: h.config.RosenpassConfig.Addr,
|
||||
SessionID: &sid,
|
||||
}
|
||||
|
||||
if h.ice != nil && h.RemoteICESupported() {
|
||||
uFrag, pwd := h.ice.GetLocalUserCredentials()
|
||||
sid := h.ice.SessionID()
|
||||
answer.IceCredentials = IceCredentials{uFrag, pwd}
|
||||
answer.SessionID = &sid
|
||||
}
|
||||
|
||||
if addr, err := h.relay.RelayInstanceAddress(); err == nil {
|
||||
@@ -200,3 +219,18 @@ func (h *Handshaker) buildOfferAnswer() OfferAnswer {
|
||||
|
||||
return answer
|
||||
}
|
||||
|
||||
func (h *Handshaker) updateRemoteICEState(offer *OfferAnswer) {
|
||||
hasICE := offer.IceCredentials.UFrag != "" && offer.IceCredentials.Pwd != ""
|
||||
prev := h.remoteICESupported.Swap(hasICE)
|
||||
if prev != hasICE {
|
||||
if hasICE {
|
||||
h.log.Infof("remote peer started sending ICE credentials")
|
||||
} else {
|
||||
h.log.Infof("remote peer stopped sending ICE credentials")
|
||||
if h.ice != nil {
|
||||
h.ice.Close()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -46,9 +46,13 @@ func (s *Signaler) Ready() bool {
|
||||
|
||||
// SignalOfferAnswer signals either an offer or an answer to remote peer
|
||||
func (s *Signaler) signalOfferAnswer(offerAnswer OfferAnswer, remoteKey string, bodyType sProto.Body_Type) error {
|
||||
sessionIDBytes, err := offerAnswer.SessionID.Bytes()
|
||||
if err != nil {
|
||||
log.Warnf("failed to get session ID bytes: %v", err)
|
||||
var sessionIDBytes []byte
|
||||
if offerAnswer.SessionID != nil {
|
||||
var err error
|
||||
sessionIDBytes, err = offerAnswer.SessionID.Bytes()
|
||||
if err != nil {
|
||||
log.Warnf("failed to get session ID bytes: %v", err)
|
||||
}
|
||||
}
|
||||
msg, err := signal.MarshalCredential(
|
||||
s.wgPrivateKey,
|
||||
|
||||
@@ -0,0 +1,10 @@
|
||||
//go:build (dragonfly || freebsd || netbsd || openbsd) && !darwin
|
||||
|
||||
package systemops
|
||||
|
||||
// Non-darwin BSDs don't support the IP_BOUND_IF + scoped default model. They
|
||||
// always fall through to the ref-counter exclusion-route path; these stubs
|
||||
// exist only so systemops_unix.go compiles.
|
||||
func (r *SysOps) setupAdvancedRouting() error { return nil }
|
||||
func (r *SysOps) cleanupAdvancedRouting() error { return nil }
|
||||
func (r *SysOps) flushPlatformExtras() error { return nil }
|
||||
241
client/internal/routemanager/systemops/systemops_darwin.go
Normal file
241
client/internal/routemanager/systemops/systemops_darwin.go
Normal file
@@ -0,0 +1,241 @@
|
||||
//go:build darwin && !ios
|
||||
|
||||
package systemops
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"net/netip"
|
||||
"os"
|
||||
"time"
|
||||
|
||||
"github.com/hashicorp/go-multierror"
|
||||
log "github.com/sirupsen/logrus"
|
||||
"golang.org/x/net/route"
|
||||
"golang.org/x/sys/unix"
|
||||
|
||||
nberrors "github.com/netbirdio/netbird/client/errors"
|
||||
"github.com/netbirdio/netbird/client/internal/routemanager/vars"
|
||||
nbnet "github.com/netbirdio/netbird/client/net"
|
||||
)
|
||||
|
||||
// scopedRouteBudget bounds retries for the scoped default route. Installing or
|
||||
// deleting it matters enough that we're willing to spend longer waiting for the
|
||||
// kernel reply than for per-prefix exclusion routes.
|
||||
const scopedRouteBudget = 5 * time.Second
|
||||
|
||||
// setupAdvancedRouting installs an RTF_IFSCOPE default route per address family
|
||||
// pinned to the current physical egress, so IP_BOUND_IF scoped lookups can
|
||||
// resolve gateway'd destinations while the VPN's split default owns the
|
||||
// unscoped table.
|
||||
//
|
||||
// Timing note: this runs during routeManager.Init, which happens before the
|
||||
// VPN interface is created and before any peer routes propagate. The initial
|
||||
// mgmt / signal / relay TCP dials always fire before this runs, so those
|
||||
// sockets miss the IP_BOUND_IF binding and rely on the kernel's normal route
|
||||
// lookup, which at that point correctly picks the physical default. Those
|
||||
// already-established TCP flows keep their originally-selected interface for
|
||||
// their lifetime on Darwin because the kernel caches the egress route
|
||||
// per-socket at connect time; adding the VPN's 0/1 + 128/1 split default
|
||||
// afterwards does not migrate them since the original en0 default stays in
|
||||
// the table. Any subsequent reconnect via nbnet.NewDialer picks up the
|
||||
// populated bound-iface cache and gets IP_BOUND_IF set cleanly.
|
||||
func (r *SysOps) setupAdvancedRouting() error {
|
||||
// Drop any previously-cached egress interface before reinstalling. On a
|
||||
// refresh, a family that no longer resolves would otherwise keep the stale
|
||||
// binding, causing new sockets to scope to an interface without a matching
|
||||
// scoped default.
|
||||
nbnet.ClearBoundInterfaces()
|
||||
|
||||
if err := r.flushScopedDefaults(); err != nil {
|
||||
log.Warnf("flush residual scoped defaults: %v", err)
|
||||
}
|
||||
|
||||
var merr *multierror.Error
|
||||
installed := 0
|
||||
|
||||
for _, unspec := range []netip.Addr{netip.IPv4Unspecified(), netip.IPv6Unspecified()} {
|
||||
ok, err := r.installScopedDefaultFor(unspec)
|
||||
if err != nil {
|
||||
merr = multierror.Append(merr, err)
|
||||
continue
|
||||
}
|
||||
if ok {
|
||||
installed++
|
||||
}
|
||||
}
|
||||
|
||||
if installed == 0 && merr != nil {
|
||||
return nberrors.FormatErrorOrNil(merr)
|
||||
}
|
||||
if merr != nil {
|
||||
log.Warnf("advanced routing setup partially succeeded: %v", nberrors.FormatErrorOrNil(merr))
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// installScopedDefaultFor resolves the physical default nexthop for the given
|
||||
// address family, installs a scoped default via it, and caches the iface for
|
||||
// subsequent IP_BOUND_IF / IPV6_BOUND_IF socket binds.
|
||||
func (r *SysOps) installScopedDefaultFor(unspec netip.Addr) (bool, error) {
|
||||
nexthop, err := GetNextHop(unspec)
|
||||
if err != nil {
|
||||
if errors.Is(err, vars.ErrRouteNotFound) {
|
||||
return false, nil
|
||||
}
|
||||
return false, fmt.Errorf("get default nexthop for %s: %w", unspec, err)
|
||||
}
|
||||
if nexthop.Intf == nil {
|
||||
return false, fmt.Errorf("unusable default nexthop for %s (no interface)", unspec)
|
||||
}
|
||||
|
||||
if err := r.addScopedDefault(unspec, nexthop); err != nil {
|
||||
return false, fmt.Errorf("add scoped default on %s: %w", nexthop.Intf.Name, err)
|
||||
}
|
||||
|
||||
af := unix.AF_INET
|
||||
if unspec.Is6() {
|
||||
af = unix.AF_INET6
|
||||
}
|
||||
nbnet.SetBoundInterface(af, nexthop.Intf)
|
||||
via := "point-to-point"
|
||||
if nexthop.IP.IsValid() {
|
||||
via = nexthop.IP.String()
|
||||
}
|
||||
log.Infof("installed scoped default route via %s on %s for %s", via, nexthop.Intf.Name, afOf(unspec))
|
||||
return true, nil
|
||||
}
|
||||
|
||||
func (r *SysOps) cleanupAdvancedRouting() error {
|
||||
nbnet.ClearBoundInterfaces()
|
||||
return r.flushScopedDefaults()
|
||||
}
|
||||
|
||||
// flushPlatformExtras runs darwin-specific residual cleanup hooked into the
|
||||
// generic FlushMarkedRoutes path, so a crashed daemon's scoped defaults get
|
||||
// removed on the next boot regardless of whether a profile is brought up.
|
||||
func (r *SysOps) flushPlatformExtras() error {
|
||||
return r.flushScopedDefaults()
|
||||
}
|
||||
|
||||
// flushScopedDefaults removes any scoped default routes tagged with routeProtoFlag.
|
||||
// Safe to call at startup to clear residual entries from a prior session.
|
||||
func (r *SysOps) flushScopedDefaults() error {
|
||||
rib, err := retryFetchRIB()
|
||||
if err != nil {
|
||||
return fmt.Errorf("fetch routing table: %w", err)
|
||||
}
|
||||
|
||||
msgs, err := route.ParseRIB(route.RIBTypeRoute, rib)
|
||||
if err != nil {
|
||||
return fmt.Errorf("parse routing table: %w", err)
|
||||
}
|
||||
|
||||
var merr *multierror.Error
|
||||
removed := 0
|
||||
|
||||
for _, msg := range msgs {
|
||||
rtMsg, ok := msg.(*route.RouteMessage)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
if rtMsg.Flags&routeProtoFlag == 0 {
|
||||
continue
|
||||
}
|
||||
if rtMsg.Flags&unix.RTF_IFSCOPE == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
info, err := MsgToRoute(rtMsg)
|
||||
if err != nil {
|
||||
log.Debugf("skip scoped flush: %v", err)
|
||||
continue
|
||||
}
|
||||
if !info.Dst.IsValid() || info.Dst.Bits() != 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
if err := r.deleteScopedRoute(rtMsg); err != nil {
|
||||
merr = multierror.Append(merr, fmt.Errorf("delete scoped default %s on index %d: %w",
|
||||
info.Dst, rtMsg.Index, err))
|
||||
continue
|
||||
}
|
||||
removed++
|
||||
log.Debugf("flushed residual scoped default %s on index %d", info.Dst, rtMsg.Index)
|
||||
}
|
||||
|
||||
if removed > 0 {
|
||||
log.Infof("flushed %d residual scoped default route(s)", removed)
|
||||
}
|
||||
return nberrors.FormatErrorOrNil(merr)
|
||||
}
|
||||
|
||||
func (r *SysOps) addScopedDefault(unspec netip.Addr, nexthop Nexthop) error {
|
||||
return r.scopedRouteSocket(unix.RTM_ADD, unspec, nexthop)
|
||||
}
|
||||
|
||||
func (r *SysOps) deleteScopedRoute(rtMsg *route.RouteMessage) error {
|
||||
// Preserve identifying flags from the stored route (including RTF_GATEWAY
|
||||
// only if present); kernel-set bits like RTF_DONE don't belong on RTM_DELETE.
|
||||
keep := unix.RTF_UP | unix.RTF_STATIC | unix.RTF_GATEWAY | unix.RTF_IFSCOPE | routeProtoFlag
|
||||
del := &route.RouteMessage{
|
||||
Type: unix.RTM_DELETE,
|
||||
Flags: rtMsg.Flags & keep,
|
||||
Version: unix.RTM_VERSION,
|
||||
Seq: r.getSeq(),
|
||||
Index: rtMsg.Index,
|
||||
Addrs: rtMsg.Addrs,
|
||||
}
|
||||
return r.writeRouteMessage(del, scopedRouteBudget)
|
||||
}
|
||||
|
||||
func (r *SysOps) scopedRouteSocket(action int, unspec netip.Addr, nexthop Nexthop) error {
|
||||
flags := unix.RTF_UP | unix.RTF_STATIC | unix.RTF_IFSCOPE | routeProtoFlag
|
||||
|
||||
msg := &route.RouteMessage{
|
||||
Type: action,
|
||||
Flags: flags,
|
||||
Version: unix.RTM_VERSION,
|
||||
ID: uintptr(os.Getpid()),
|
||||
Seq: r.getSeq(),
|
||||
Index: nexthop.Intf.Index,
|
||||
}
|
||||
|
||||
const numAddrs = unix.RTAX_NETMASK + 1
|
||||
addrs := make([]route.Addr, numAddrs)
|
||||
|
||||
dst, err := addrToRouteAddr(unspec)
|
||||
if err != nil {
|
||||
return fmt.Errorf("build destination: %w", err)
|
||||
}
|
||||
mask, err := prefixToRouteNetmask(netip.PrefixFrom(unspec, 0))
|
||||
if err != nil {
|
||||
return fmt.Errorf("build netmask: %w", err)
|
||||
}
|
||||
addrs[unix.RTAX_DST] = dst
|
||||
addrs[unix.RTAX_NETMASK] = mask
|
||||
|
||||
if nexthop.IP.IsValid() {
|
||||
msg.Flags |= unix.RTF_GATEWAY
|
||||
gw, err := addrToRouteAddr(nexthop.IP.Unmap())
|
||||
if err != nil {
|
||||
return fmt.Errorf("build gateway: %w", err)
|
||||
}
|
||||
addrs[unix.RTAX_GATEWAY] = gw
|
||||
} else {
|
||||
addrs[unix.RTAX_GATEWAY] = &route.LinkAddr{
|
||||
Index: nexthop.Intf.Index,
|
||||
Name: nexthop.Intf.Name,
|
||||
}
|
||||
}
|
||||
msg.Addrs = addrs
|
||||
|
||||
return r.writeRouteMessage(msg, scopedRouteBudget)
|
||||
}
|
||||
|
||||
func afOf(a netip.Addr) string {
|
||||
if a.Is4() {
|
||||
return "IPv4"
|
||||
}
|
||||
return "IPv6"
|
||||
}
|
||||
@@ -21,6 +21,7 @@ import (
|
||||
"github.com/netbirdio/netbird/client/internal/routemanager/util"
|
||||
"github.com/netbirdio/netbird/client/internal/routemanager/vars"
|
||||
"github.com/netbirdio/netbird/client/internal/statemanager"
|
||||
nbnet "github.com/netbirdio/netbird/client/net"
|
||||
"github.com/netbirdio/netbird/client/net/hooks"
|
||||
)
|
||||
|
||||
@@ -31,8 +32,6 @@ var splitDefaultv4_2 = netip.PrefixFrom(netip.AddrFrom4([4]byte{128}), 1)
|
||||
var splitDefaultv6_1 = netip.PrefixFrom(netip.IPv6Unspecified(), 1)
|
||||
var splitDefaultv6_2 = netip.PrefixFrom(netip.AddrFrom16([16]byte{0x80}), 1)
|
||||
|
||||
var ErrRoutingIsSeparate = errors.New("routing is separate")
|
||||
|
||||
func (r *SysOps) setupRefCounter(initAddresses []net.IP, stateManager *statemanager.Manager) error {
|
||||
stateManager.RegisterState(&ShutdownState{})
|
||||
|
||||
@@ -397,12 +396,16 @@ func ipToAddr(ip net.IP, intf *net.Interface) (netip.Addr, error) {
|
||||
}
|
||||
|
||||
// IsAddrRouted checks if the candidate address would route to the vpn, in which case it returns true and the matched prefix.
|
||||
// When advanced routing is active the WG socket is bound to the physical interface (fwmark on linux,
|
||||
// IP_UNICAST_IF on windows, IP_BOUND_IF on darwin) and bypasses the main routing table, so the check is skipped.
|
||||
func IsAddrRouted(addr netip.Addr, vpnRoutes []netip.Prefix) (bool, netip.Prefix) {
|
||||
localRoutes, err := hasSeparateRouting()
|
||||
if nbnet.AdvancedRouting() {
|
||||
return false, netip.Prefix{}
|
||||
}
|
||||
|
||||
localRoutes, err := GetRoutesFromTable()
|
||||
if err != nil {
|
||||
if !errors.Is(err, ErrRoutingIsSeparate) {
|
||||
log.Errorf("Failed to get routes: %v", err)
|
||||
}
|
||||
log.Errorf("Failed to get routes: %v", err)
|
||||
return false, netip.Prefix{}
|
||||
}
|
||||
|
||||
|
||||
@@ -22,10 +22,6 @@ func GetRoutesFromTable() ([]netip.Prefix, error) {
|
||||
return []netip.Prefix{}, nil
|
||||
}
|
||||
|
||||
func hasSeparateRouting() ([]netip.Prefix, error) {
|
||||
return []netip.Prefix{}, nil
|
||||
}
|
||||
|
||||
// GetDetailedRoutesFromTable returns empty routes for WASM.
|
||||
func GetDetailedRoutesFromTable() ([]DetailedRoute, error) {
|
||||
return []DetailedRoute{}, nil
|
||||
|
||||
@@ -894,13 +894,6 @@ func getAddressFamily(prefix netip.Prefix) int {
|
||||
return netlink.FAMILY_V6
|
||||
}
|
||||
|
||||
func hasSeparateRouting() ([]netip.Prefix, error) {
|
||||
if !nbnet.AdvancedRouting() {
|
||||
return GetRoutesFromTable()
|
||||
}
|
||||
return nil, ErrRoutingIsSeparate
|
||||
}
|
||||
|
||||
func isOpErr(err error) bool {
|
||||
// EAFTNOSUPPORT when ipv6 is disabled via sysctl, EOPNOTSUPP when disabled in boot options or otherwise not supported
|
||||
if errors.Is(err, syscall.EAFNOSUPPORT) || errors.Is(err, syscall.EOPNOTSUPP) {
|
||||
|
||||
@@ -48,10 +48,6 @@ func EnableIPForwarding() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func hasSeparateRouting() ([]netip.Prefix, error) {
|
||||
return GetRoutesFromTable()
|
||||
}
|
||||
|
||||
// GetIPRules returns IP rules for debugging (not supported on non-Linux platforms)
|
||||
func GetIPRules() ([]IPRule, error) {
|
||||
log.Infof("IP rules collection is not supported on %s", runtime.GOOS)
|
||||
|
||||
@@ -25,6 +25,9 @@ import (
|
||||
|
||||
const (
|
||||
envRouteProtoFlag = "NB_ROUTE_PROTO_FLAG"
|
||||
|
||||
// routeBudget bounds retries for per-prefix exclusion route programming.
|
||||
routeBudget = 1 * time.Second
|
||||
)
|
||||
|
||||
var routeProtoFlag int
|
||||
@@ -41,26 +44,42 @@ func init() {
|
||||
}
|
||||
|
||||
func (r *SysOps) SetupRouting(initAddresses []net.IP, stateManager *statemanager.Manager, advancedRouting bool) error {
|
||||
if advancedRouting {
|
||||
return r.setupAdvancedRouting()
|
||||
}
|
||||
|
||||
log.Infof("Using legacy routing setup with ref counters")
|
||||
return r.setupRefCounter(initAddresses, stateManager)
|
||||
}
|
||||
|
||||
func (r *SysOps) CleanupRouting(stateManager *statemanager.Manager, advancedRouting bool) error {
|
||||
if advancedRouting {
|
||||
return r.cleanupAdvancedRouting()
|
||||
}
|
||||
|
||||
return r.cleanupRefCounter(stateManager)
|
||||
}
|
||||
|
||||
// FlushMarkedRoutes removes single IP exclusion routes marked with the configured RTF_PROTO flag.
|
||||
// On darwin it also flushes residual RTF_IFSCOPE scoped default routes so a
|
||||
// crashed prior session can't leave crud in the table.
|
||||
func (r *SysOps) FlushMarkedRoutes() error {
|
||||
var merr *multierror.Error
|
||||
|
||||
if err := r.flushPlatformExtras(); err != nil {
|
||||
merr = multierror.Append(merr, fmt.Errorf("flush platform extras: %w", err))
|
||||
}
|
||||
|
||||
rib, err := retryFetchRIB()
|
||||
if err != nil {
|
||||
return fmt.Errorf("fetch routing table: %w", err)
|
||||
return nberrors.FormatErrorOrNil(multierror.Append(merr, fmt.Errorf("fetch routing table: %w", err)))
|
||||
}
|
||||
|
||||
msgs, err := route.ParseRIB(route.RIBTypeRoute, rib)
|
||||
if err != nil {
|
||||
return fmt.Errorf("parse routing table: %w", err)
|
||||
return nberrors.FormatErrorOrNil(multierror.Append(merr, fmt.Errorf("parse routing table: %w", err)))
|
||||
}
|
||||
|
||||
var merr *multierror.Error
|
||||
flushedCount := 0
|
||||
|
||||
for _, msg := range msgs {
|
||||
@@ -117,12 +136,12 @@ func (r *SysOps) routeSocket(action int, prefix netip.Prefix, nexthop Nexthop) e
|
||||
return fmt.Errorf("invalid prefix: %s", prefix)
|
||||
}
|
||||
|
||||
expBackOff := backoff.NewExponentialBackOff()
|
||||
expBackOff.InitialInterval = 50 * time.Millisecond
|
||||
expBackOff.MaxInterval = 500 * time.Millisecond
|
||||
expBackOff.MaxElapsedTime = 1 * time.Second
|
||||
msg, err := r.buildRouteMessage(action, prefix, nexthop)
|
||||
if err != nil {
|
||||
return fmt.Errorf("build route message: %w", err)
|
||||
}
|
||||
|
||||
if err := backoff.Retry(r.routeOp(action, prefix, nexthop), expBackOff); err != nil {
|
||||
if err := r.writeRouteMessage(msg, routeBudget); err != nil {
|
||||
a := "add"
|
||||
if action == unix.RTM_DELETE {
|
||||
a = "remove"
|
||||
@@ -132,50 +151,91 @@ func (r *SysOps) routeSocket(action int, prefix netip.Prefix, nexthop Nexthop) e
|
||||
return nil
|
||||
}
|
||||
|
||||
func (r *SysOps) routeOp(action int, prefix netip.Prefix, nexthop Nexthop) func() error {
|
||||
operation := func() error {
|
||||
fd, err := unix.Socket(syscall.AF_ROUTE, syscall.SOCK_RAW, syscall.AF_UNSPEC)
|
||||
if err != nil {
|
||||
return fmt.Errorf("open routing socket: %w", err)
|
||||
// writeRouteMessage sends a route message over AF_ROUTE and waits for the
|
||||
// kernel's matching reply, retrying transient failures until budget elapses.
|
||||
// Callers do not need to manage sockets or seq numbers themselves.
|
||||
func (r *SysOps) writeRouteMessage(msg *route.RouteMessage, budget time.Duration) error {
|
||||
expBackOff := backoff.NewExponentialBackOff()
|
||||
expBackOff.InitialInterval = 50 * time.Millisecond
|
||||
expBackOff.MaxInterval = 500 * time.Millisecond
|
||||
expBackOff.MaxElapsedTime = budget
|
||||
|
||||
return backoff.Retry(func() error { return routeMessageRoundtrip(msg) }, expBackOff)
|
||||
}
|
||||
|
||||
func routeMessageRoundtrip(msg *route.RouteMessage) error {
|
||||
fd, err := unix.Socket(syscall.AF_ROUTE, syscall.SOCK_RAW, syscall.AF_UNSPEC)
|
||||
if err != nil {
|
||||
return fmt.Errorf("open routing socket: %w", err)
|
||||
}
|
||||
defer func() {
|
||||
if err := unix.Close(fd); err != nil && !errors.Is(err, unix.EBADF) {
|
||||
log.Warnf("close routing socket: %v", err)
|
||||
}
|
||||
defer func() {
|
||||
if err := unix.Close(fd); err != nil && !errors.Is(err, unix.EBADF) {
|
||||
log.Warnf("failed to close routing socket: %v", err)
|
||||
}()
|
||||
|
||||
tv := unix.Timeval{Sec: 1}
|
||||
if err := unix.SetsockoptTimeval(fd, unix.SOL_SOCKET, unix.SO_RCVTIMEO, &tv); err != nil {
|
||||
return backoff.Permanent(fmt.Errorf("set recv timeout: %w", err))
|
||||
}
|
||||
|
||||
// AF_ROUTE is a broadcast channel: every route socket on the host sees
|
||||
// every RTM_* event. With concurrent route programming the default
|
||||
// per-socket queue overflows and our own reply gets dropped.
|
||||
if err := unix.SetsockoptInt(fd, unix.SOL_SOCKET, unix.SO_RCVBUF, 1<<20); err != nil {
|
||||
log.Debugf("set SO_RCVBUF on route socket: %v", err)
|
||||
}
|
||||
|
||||
bytes, err := msg.Marshal()
|
||||
if err != nil {
|
||||
return backoff.Permanent(fmt.Errorf("marshal: %w", err))
|
||||
}
|
||||
|
||||
if _, err = unix.Write(fd, bytes); err != nil {
|
||||
if errors.Is(err, unix.ENOBUFS) || errors.Is(err, unix.EAGAIN) {
|
||||
return fmt.Errorf("write: %w", err)
|
||||
}
|
||||
return backoff.Permanent(fmt.Errorf("write: %w", err))
|
||||
}
|
||||
return readRouteResponse(fd, msg.Type, msg.Seq)
|
||||
}
|
||||
|
||||
// readRouteResponse reads from the AF_ROUTE socket until it sees a reply
|
||||
// matching our write (same type, seq, and pid). AF_ROUTE SOCK_RAW is a
|
||||
// broadcast channel: interface up/down, third-party route changes and neighbor
|
||||
// discovery events can all land between our write and read, so we must filter.
|
||||
func readRouteResponse(fd, wantType, wantSeq int) error {
|
||||
pid := int32(os.Getpid())
|
||||
resp := make([]byte, 2048)
|
||||
deadline := time.Now().Add(time.Second)
|
||||
for {
|
||||
if time.Now().After(deadline) {
|
||||
// Transient: under concurrent pressure the kernel can drop our reply
|
||||
// from the socket buffer. Let backoff.Retry re-send with a fresh seq.
|
||||
return fmt.Errorf("read: timeout waiting for route reply type=%d seq=%d", wantType, wantSeq)
|
||||
}
|
||||
n, err := unix.Read(fd, resp)
|
||||
if err != nil {
|
||||
if errors.Is(err, unix.EAGAIN) || errors.Is(err, unix.EWOULDBLOCK) {
|
||||
// SO_RCVTIMEO fired while waiting; loop to re-check the absolute deadline.
|
||||
continue
|
||||
}
|
||||
}()
|
||||
|
||||
msg, err := r.buildRouteMessage(action, prefix, nexthop)
|
||||
if err != nil {
|
||||
return backoff.Permanent(fmt.Errorf("build route message: %w", err))
|
||||
return backoff.Permanent(fmt.Errorf("read: %w", err))
|
||||
}
|
||||
|
||||
msgBytes, err := msg.Marshal()
|
||||
if err != nil {
|
||||
return backoff.Permanent(fmt.Errorf("marshal route message: %w", err))
|
||||
if n < int(unsafe.Sizeof(unix.RtMsghdr{})) {
|
||||
continue
|
||||
}
|
||||
|
||||
if _, err = unix.Write(fd, msgBytes); err != nil {
|
||||
if errors.Is(err, unix.ENOBUFS) || errors.Is(err, unix.EAGAIN) {
|
||||
return fmt.Errorf("write: %w", err)
|
||||
}
|
||||
return backoff.Permanent(fmt.Errorf("write: %w", err))
|
||||
hdr := (*unix.RtMsghdr)(unsafe.Pointer(&resp[0]))
|
||||
// Darwin reflects the sender's pid on replies; matching (Type, Seq, Pid)
|
||||
// uniquely identifies our own reply among broadcast traffic.
|
||||
if int(hdr.Type) != wantType || int(hdr.Seq) != wantSeq || hdr.Pid != pid {
|
||||
continue
|
||||
}
|
||||
|
||||
respBuf := make([]byte, 2048)
|
||||
n, err := unix.Read(fd, respBuf)
|
||||
if err != nil {
|
||||
return backoff.Permanent(fmt.Errorf("read route response: %w", err))
|
||||
if hdr.Errno != 0 {
|
||||
return backoff.Permanent(fmt.Errorf("kernel: %w", syscall.Errno(hdr.Errno)))
|
||||
}
|
||||
|
||||
if n > 0 {
|
||||
if err := r.parseRouteResponse(respBuf[:n]); err != nil {
|
||||
return backoff.Permanent(err)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
return operation
|
||||
}
|
||||
|
||||
func (r *SysOps) buildRouteMessage(action int, prefix netip.Prefix, nexthop Nexthop) (msg *route.RouteMessage, err error) {
|
||||
@@ -183,6 +243,7 @@ func (r *SysOps) buildRouteMessage(action int, prefix netip.Prefix, nexthop Next
|
||||
Type: action,
|
||||
Flags: unix.RTF_UP | routeProtoFlag,
|
||||
Version: unix.RTM_VERSION,
|
||||
ID: uintptr(os.Getpid()),
|
||||
Seq: r.getSeq(),
|
||||
}
|
||||
|
||||
@@ -221,19 +282,6 @@ func (r *SysOps) buildRouteMessage(action int, prefix netip.Prefix, nexthop Next
|
||||
return msg, nil
|
||||
}
|
||||
|
||||
func (r *SysOps) parseRouteResponse(buf []byte) error {
|
||||
if len(buf) < int(unsafe.Sizeof(unix.RtMsghdr{})) {
|
||||
return nil
|
||||
}
|
||||
|
||||
rtMsg := (*unix.RtMsghdr)(unsafe.Pointer(&buf[0]))
|
||||
if rtMsg.Errno != 0 {
|
||||
return fmt.Errorf("parse: %d", rtMsg.Errno)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// addrToRouteAddr converts a netip.Addr to the appropriate route.Addr (*route.Inet4Addr or *route.Inet6Addr).
|
||||
func addrToRouteAddr(addr netip.Addr) (route.Addr, error) {
|
||||
if addr.Is4() {
|
||||
|
||||
5
client/net/dialer_init_darwin.go
Normal file
5
client/net/dialer_init_darwin.go
Normal file
@@ -0,0 +1,5 @@
|
||||
package net
|
||||
|
||||
func (d *Dialer) init() {
|
||||
d.Dialer.Control = applyBoundIfToSocket
|
||||
}
|
||||
@@ -1,4 +1,4 @@
|
||||
//go:build !linux && !windows
|
||||
//go:build !linux && !windows && !darwin
|
||||
|
||||
package net
|
||||
|
||||
|
||||
@@ -1,24 +0,0 @@
|
||||
//go:build android
|
||||
|
||||
package net
|
||||
|
||||
// Init initializes the network environment for Android
|
||||
func Init() {
|
||||
// No initialization needed on Android
|
||||
}
|
||||
|
||||
// AdvancedRouting reports whether routing loops can be avoided without using exclusion routes.
|
||||
// Always returns true on Android since we cannot handle routes dynamically.
|
||||
func AdvancedRouting() bool {
|
||||
return true
|
||||
}
|
||||
|
||||
// SetVPNInterfaceName is a no-op on Android
|
||||
func SetVPNInterfaceName(name string) {
|
||||
// No-op on Android - not needed for Android VPN service
|
||||
}
|
||||
|
||||
// GetVPNInterfaceName returns empty string on Android
|
||||
func GetVPNInterfaceName() string {
|
||||
return ""
|
||||
}
|
||||
@@ -1,4 +1,4 @@
|
||||
//go:build windows
|
||||
//go:build (darwin && !ios) || windows
|
||||
|
||||
package net
|
||||
|
||||
@@ -24,17 +24,22 @@ func Init() {
|
||||
}
|
||||
|
||||
func checkAdvancedRoutingSupport() bool {
|
||||
var err error
|
||||
var legacyRouting bool
|
||||
legacyRouting := false
|
||||
if val := os.Getenv(envUseLegacyRouting); val != "" {
|
||||
legacyRouting, err = strconv.ParseBool(val)
|
||||
parsed, err := strconv.ParseBool(val)
|
||||
if err != nil {
|
||||
log.Warnf("failed to parse %s: %v", envUseLegacyRouting, err)
|
||||
log.Warnf("ignoring unparsable %s=%q: %v", envUseLegacyRouting, val, err)
|
||||
} else {
|
||||
legacyRouting = parsed
|
||||
}
|
||||
}
|
||||
|
||||
if legacyRouting || netstack.IsEnabled() {
|
||||
log.Info("advanced routing has been requested to be disabled")
|
||||
if legacyRouting {
|
||||
log.Infof("advanced routing disabled: legacy routing requested via %s", envUseLegacyRouting)
|
||||
return false
|
||||
}
|
||||
if netstack.IsEnabled() {
|
||||
log.Info("advanced routing disabled: netstack mode is enabled")
|
||||
return false
|
||||
}
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
//go:build !linux && !windows && !android
|
||||
//go:build !linux && !windows && !darwin
|
||||
|
||||
package net
|
||||
|
||||
|
||||
25
client/net/env_mobile.go
Normal file
25
client/net/env_mobile.go
Normal file
@@ -0,0 +1,25 @@
|
||||
//go:build ios || android
|
||||
|
||||
package net
|
||||
|
||||
// Init initializes the network environment for mobile platforms.
|
||||
func Init() {
|
||||
// no-op on mobile: routing scope is owned by the VPN extension.
|
||||
}
|
||||
|
||||
// AdvancedRouting reports whether routing loops can be avoided without using exclusion routes.
|
||||
// Always returns true on mobile since routes cannot be handled dynamically and the VPN extension
|
||||
// owns the routing scope.
|
||||
func AdvancedRouting() bool {
|
||||
return true
|
||||
}
|
||||
|
||||
// SetVPNInterfaceName is a no-op on mobile.
|
||||
func SetVPNInterfaceName(string) {
|
||||
// no-op on mobile: the VPN extension manages the interface.
|
||||
}
|
||||
|
||||
// GetVPNInterfaceName returns an empty string on mobile.
|
||||
func GetVPNInterfaceName() string {
|
||||
return ""
|
||||
}
|
||||
5
client/net/listener_init_darwin.go
Normal file
5
client/net/listener_init_darwin.go
Normal file
@@ -0,0 +1,5 @@
|
||||
package net
|
||||
|
||||
func (l *ListenerConfig) init() {
|
||||
l.ListenConfig.Control = applyBoundIfToSocket
|
||||
}
|
||||
@@ -1,4 +1,4 @@
|
||||
//go:build !linux && !windows
|
||||
//go:build !linux && !windows && !darwin
|
||||
|
||||
package net
|
||||
|
||||
|
||||
160
client/net/net_darwin.go
Normal file
160
client/net/net_darwin.go
Normal file
@@ -0,0 +1,160 @@
|
||||
package net
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"net"
|
||||
"net/netip"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"syscall"
|
||||
|
||||
log "github.com/sirupsen/logrus"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
// On darwin IPV6_BOUND_IF also scopes v4-mapped egress from dual-stack
|
||||
// (IPV6_V6ONLY=0) AF_INET6 sockets, so a single setsockopt on "udp6"/"tcp6"
|
||||
// covers both families. Setting IP_BOUND_IF on an AF_INET6 socket returns
|
||||
// EINVAL regardless of V6ONLY because the IPPROTO_IP ctloutput path is
|
||||
// dispatched by socket domain (AF_INET only) not by inp_vflag.
|
||||
|
||||
// boundIface holds the physical interface chosen at routing setup time. Sockets
|
||||
// created via nbnet.NewDialer / nbnet.NewListener bind to it via IP_BOUND_IF
|
||||
// (IPv4) or IPV6_BOUND_IF (IPv6 / dual-stack) so their scoped route lookup
|
||||
// hits the RTF_IFSCOPE default installed by the routemanager, rather than
|
||||
// following the VPN's split default.
|
||||
var (
|
||||
boundIfaceMu sync.RWMutex
|
||||
boundIface4 *net.Interface
|
||||
boundIface6 *net.Interface
|
||||
)
|
||||
|
||||
// SetBoundInterface records the egress interface for an address family. Called
|
||||
// by the routemanager after a scoped default route has been installed.
|
||||
// af must be unix.AF_INET or unix.AF_INET6; other values are ignored.
|
||||
// nil iface is rejected — use ClearBoundInterfaces to clear all slots.
|
||||
func SetBoundInterface(af int, iface *net.Interface) {
|
||||
if iface == nil {
|
||||
log.Warnf("SetBoundInterface: nil iface for AF %d, ignored", af)
|
||||
return
|
||||
}
|
||||
boundIfaceMu.Lock()
|
||||
defer boundIfaceMu.Unlock()
|
||||
switch af {
|
||||
case unix.AF_INET:
|
||||
boundIface4 = iface
|
||||
case unix.AF_INET6:
|
||||
boundIface6 = iface
|
||||
default:
|
||||
log.Warnf("SetBoundInterface: unsupported address family %d", af)
|
||||
}
|
||||
}
|
||||
|
||||
// ClearBoundInterfaces resets the cached egress interfaces. Called by the
|
||||
// routemanager during cleanup.
|
||||
func ClearBoundInterfaces() {
|
||||
boundIfaceMu.Lock()
|
||||
defer boundIfaceMu.Unlock()
|
||||
boundIface4 = nil
|
||||
boundIface6 = nil
|
||||
}
|
||||
|
||||
// boundInterfaceFor returns the cached egress interface for a socket's address
|
||||
// family, falling back to the other family if the preferred slot is empty.
|
||||
// The kernel stores both IP_BOUND_IF and IPV6_BOUND_IF in inp_boundifp, so
|
||||
// either setsockopt scopes the socket; preferring same-family still matters
|
||||
// when v4 and v6 defaults egress different NICs.
|
||||
func boundInterfaceFor(network, address string) *net.Interface {
|
||||
if iface := zoneInterface(address); iface != nil {
|
||||
return iface
|
||||
}
|
||||
|
||||
boundIfaceMu.RLock()
|
||||
defer boundIfaceMu.RUnlock()
|
||||
|
||||
primary, secondary := boundIface4, boundIface6
|
||||
if isV6Network(network) {
|
||||
primary, secondary = boundIface6, boundIface4
|
||||
}
|
||||
if primary != nil {
|
||||
return primary
|
||||
}
|
||||
return secondary
|
||||
}
|
||||
|
||||
func isV6Network(network string) bool {
|
||||
return strings.HasSuffix(network, "6")
|
||||
}
|
||||
|
||||
// zoneInterface extracts an explicit interface from an IPv6 link-local zone (e.g. fe80::1%en0).
|
||||
func zoneInterface(address string) *net.Interface {
|
||||
if address == "" {
|
||||
return nil
|
||||
}
|
||||
addr, err := netip.ParseAddrPort(address)
|
||||
if err != nil {
|
||||
a, err := netip.ParseAddr(address)
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
addr = netip.AddrPortFrom(a, 0)
|
||||
}
|
||||
zone := addr.Addr().Zone()
|
||||
if zone == "" {
|
||||
return nil
|
||||
}
|
||||
if iface, err := net.InterfaceByName(zone); err == nil {
|
||||
return iface
|
||||
}
|
||||
if idx, err := strconv.Atoi(zone); err == nil {
|
||||
if iface, err := net.InterfaceByIndex(idx); err == nil {
|
||||
return iface
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func setIPv4BoundIf(fd uintptr, iface *net.Interface) error {
|
||||
if err := unix.SetsockoptInt(int(fd), unix.IPPROTO_IP, unix.IP_BOUND_IF, iface.Index); err != nil {
|
||||
return fmt.Errorf("set IP_BOUND_IF: %w (interface: %s, index: %d)", err, iface.Name, iface.Index)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func setIPv6BoundIf(fd uintptr, iface *net.Interface) error {
|
||||
if err := unix.SetsockoptInt(int(fd), unix.IPPROTO_IPV6, unix.IPV6_BOUND_IF, iface.Index); err != nil {
|
||||
return fmt.Errorf("set IPV6_BOUND_IF: %w (interface: %s, index: %d)", err, iface.Name, iface.Index)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// applyBoundIfToSocket binds the socket to the cached physical egress interface
|
||||
// so scoped route lookup avoids the VPN utun and egresses the underlay directly.
|
||||
func applyBoundIfToSocket(network, address string, c syscall.RawConn) error {
|
||||
if !AdvancedRouting() {
|
||||
return nil
|
||||
}
|
||||
|
||||
iface := boundInterfaceFor(network, address)
|
||||
if iface == nil {
|
||||
log.Debugf("no bound iface cached for %s to %s, skipping BOUND_IF", network, address)
|
||||
return nil
|
||||
}
|
||||
|
||||
isV6 := isV6Network(network)
|
||||
var controlErr error
|
||||
if err := c.Control(func(fd uintptr) {
|
||||
if isV6 {
|
||||
controlErr = setIPv6BoundIf(fd, iface)
|
||||
} else {
|
||||
controlErr = setIPv4BoundIf(fd, iface)
|
||||
}
|
||||
if controlErr == nil {
|
||||
log.Debugf("set BOUND_IF=%d on %s for %s to %s", iface.Index, iface.Name, network, address)
|
||||
}
|
||||
}); err != nil {
|
||||
return fmt.Errorf("control: %w", err)
|
||||
}
|
||||
return controlErr
|
||||
}
|
||||
@@ -12,7 +12,6 @@ import (
|
||||
"github.com/netbirdio/netbird/client/internal"
|
||||
"github.com/netbirdio/netbird/client/internal/routemanager/systemops"
|
||||
"github.com/netbirdio/netbird/client/internal/statemanager"
|
||||
nbnet "github.com/netbirdio/netbird/client/net"
|
||||
"github.com/netbirdio/netbird/client/proto"
|
||||
)
|
||||
|
||||
@@ -138,10 +137,8 @@ func restoreResidualState(ctx context.Context, statePath string) error {
|
||||
}
|
||||
|
||||
// clean up any remaining routes independently of the state file
|
||||
if !nbnet.AdvancedRouting() {
|
||||
if err := systemops.New(nil, nil).FlushMarkedRoutes(); err != nil {
|
||||
merr = multierror.Append(merr, fmt.Errorf("flush marked routes: %w", err))
|
||||
}
|
||||
if err := systemops.New(nil, nil).FlushMarkedRoutes(); err != nil {
|
||||
merr = multierror.Append(merr, fmt.Errorf("flush marked routes: %w", err))
|
||||
}
|
||||
|
||||
return nberrors.FormatErrorOrNil(merr)
|
||||
|
||||
@@ -187,24 +187,23 @@ func (m *Manager) buildPeerConfig(allHostPatterns []string) (string, error) {
|
||||
return "", fmt.Errorf("get NetBird executable path: %w", err)
|
||||
}
|
||||
|
||||
hostLine := strings.Join(deduplicatedPatterns, " ")
|
||||
config := fmt.Sprintf("Host %s\n", hostLine)
|
||||
config += fmt.Sprintf(" Match exec \"%s ssh detect %%h %%p\"\n", execPath)
|
||||
config += " PreferredAuthentications password,publickey,keyboard-interactive\n"
|
||||
config += " PasswordAuthentication yes\n"
|
||||
config += " PubkeyAuthentication yes\n"
|
||||
config += " BatchMode no\n"
|
||||
config += fmt.Sprintf(" ProxyCommand %s ssh proxy %%h %%p\n", execPath)
|
||||
config += " StrictHostKeyChecking no\n"
|
||||
hostList := strings.Join(deduplicatedPatterns, ",")
|
||||
config := fmt.Sprintf("Match host \"%s\" exec \"%s ssh detect %%h %%p\"\n", hostList, execPath)
|
||||
config += " PreferredAuthentications password,publickey,keyboard-interactive\n"
|
||||
config += " PasswordAuthentication yes\n"
|
||||
config += " PubkeyAuthentication yes\n"
|
||||
config += " BatchMode no\n"
|
||||
config += fmt.Sprintf(" ProxyCommand %s ssh proxy %%h %%p\n", execPath)
|
||||
config += " StrictHostKeyChecking no\n"
|
||||
|
||||
if runtime.GOOS == "windows" {
|
||||
config += " UserKnownHostsFile NUL\n"
|
||||
config += " UserKnownHostsFile NUL\n"
|
||||
} else {
|
||||
config += " UserKnownHostsFile /dev/null\n"
|
||||
config += " UserKnownHostsFile /dev/null\n"
|
||||
}
|
||||
|
||||
config += " CheckHostIP no\n"
|
||||
config += " LogLevel ERROR\n\n"
|
||||
config += " CheckHostIP no\n"
|
||||
config += " LogLevel ERROR\n\n"
|
||||
|
||||
return config, nil
|
||||
}
|
||||
|
||||
@@ -116,6 +116,37 @@ func TestManager_PeerLimit(t *testing.T) {
|
||||
assert.True(t, os.IsNotExist(err), "SSH config should not be created with too many peers")
|
||||
}
|
||||
|
||||
func TestManager_MatchHostFormat(t *testing.T) {
|
||||
tempDir, err := os.MkdirTemp("", "netbird-ssh-config-test")
|
||||
require.NoError(t, err)
|
||||
defer func() { assert.NoError(t, os.RemoveAll(tempDir)) }()
|
||||
|
||||
manager := &Manager{
|
||||
sshConfigDir: filepath.Join(tempDir, "ssh_config.d"),
|
||||
sshConfigFile: "99-netbird.conf",
|
||||
}
|
||||
|
||||
peers := []PeerSSHInfo{
|
||||
{Hostname: "peer1", IP: "100.125.1.1", FQDN: "peer1.nb.internal"},
|
||||
{Hostname: "peer2", IP: "100.125.1.2", FQDN: "peer2.nb.internal"},
|
||||
}
|
||||
|
||||
err = manager.SetupSSHClientConfig(peers)
|
||||
require.NoError(t, err)
|
||||
|
||||
configPath := filepath.Join(manager.sshConfigDir, manager.sshConfigFile)
|
||||
content, err := os.ReadFile(configPath)
|
||||
require.NoError(t, err)
|
||||
configStr := string(content)
|
||||
|
||||
// Must use "Match host" with comma-separated patterns, not a bare "Host" directive.
|
||||
// A bare "Host" followed by "Match exec" is incorrect per ssh_config(5): the Host block
|
||||
// ends at the next Match keyword, making it a no-op and leaving the Match exec unscoped.
|
||||
assert.NotContains(t, configStr, "\nHost ", "should not use bare Host directive")
|
||||
assert.Contains(t, configStr, "Match host \"100.125.1.1,peer1.nb.internal,peer1,100.125.1.2,peer2.nb.internal,peer2\"",
|
||||
"should use Match host with comma-separated patterns")
|
||||
}
|
||||
|
||||
func TestManager_ForcedSSHConfig(t *testing.T) {
|
||||
// Set force environment variable
|
||||
t.Setenv(EnvForceSSHConfig, "true")
|
||||
|
||||
@@ -2,7 +2,6 @@ package system
|
||||
|
||||
import (
|
||||
"context"
|
||||
"net"
|
||||
"net/netip"
|
||||
"strings"
|
||||
|
||||
@@ -145,59 +144,6 @@ func extractDeviceName(ctx context.Context, defaultName string) string {
|
||||
return v
|
||||
}
|
||||
|
||||
func networkAddresses() ([]NetworkAddress, error) {
|
||||
interfaces, err := net.Interfaces()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var netAddresses []NetworkAddress
|
||||
for _, iface := range interfaces {
|
||||
if iface.Flags&net.FlagUp == 0 {
|
||||
continue
|
||||
}
|
||||
if iface.HardwareAddr.String() == "" {
|
||||
continue
|
||||
}
|
||||
addrs, err := iface.Addrs()
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
|
||||
for _, address := range addrs {
|
||||
ipNet, ok := address.(*net.IPNet)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
|
||||
if ipNet.IP.IsLoopback() {
|
||||
continue
|
||||
}
|
||||
|
||||
netAddr := NetworkAddress{
|
||||
NetIP: netip.MustParsePrefix(ipNet.String()),
|
||||
Mac: iface.HardwareAddr.String(),
|
||||
}
|
||||
|
||||
if isDuplicated(netAddresses, netAddr) {
|
||||
continue
|
||||
}
|
||||
|
||||
netAddresses = append(netAddresses, netAddr)
|
||||
}
|
||||
}
|
||||
return netAddresses, nil
|
||||
}
|
||||
|
||||
func isDuplicated(addresses []NetworkAddress, addr NetworkAddress) bool {
|
||||
for _, duplicated := range addresses {
|
||||
if duplicated.NetIP == addr.NetIP {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// GetInfoWithChecks retrieves and parses the system information with applied checks.
|
||||
func GetInfoWithChecks(ctx context.Context, checks []*proto.Checks) (*Info, error) {
|
||||
log.Debugf("gathering system information with checks: %d", len(checks))
|
||||
|
||||
@@ -2,6 +2,8 @@ package system
|
||||
|
||||
import (
|
||||
"context"
|
||||
"net"
|
||||
"net/netip"
|
||||
"runtime"
|
||||
|
||||
log "github.com/sirupsen/logrus"
|
||||
@@ -42,6 +44,66 @@ func GetInfo(ctx context.Context) *Info {
|
||||
return gio
|
||||
}
|
||||
|
||||
// networkAddresses returns the list of network addresses on iOS.
|
||||
// On iOS, hardware (MAC) addresses are not available due to Apple's privacy
|
||||
// restrictions (iOS returns a fixed 02:00:00:00:00:00 placeholder), so we
|
||||
// leave Mac empty to match Android's behavior. We also skip the HardwareAddr
|
||||
// check that other platforms use and filter out link-local addresses as they
|
||||
// are not useful for posture checks.
|
||||
func networkAddresses() ([]NetworkAddress, error) {
|
||||
interfaces, err := net.Interfaces()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var netAddresses []NetworkAddress
|
||||
for _, iface := range interfaces {
|
||||
if iface.Flags&net.FlagUp == 0 {
|
||||
continue
|
||||
}
|
||||
addrs, err := iface.Addrs()
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
|
||||
for _, address := range addrs {
|
||||
netAddr, ok := toNetworkAddress(address)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
if isDuplicated(netAddresses, netAddr) {
|
||||
continue
|
||||
}
|
||||
netAddresses = append(netAddresses, netAddr)
|
||||
}
|
||||
}
|
||||
return netAddresses, nil
|
||||
}
|
||||
|
||||
func toNetworkAddress(address net.Addr) (NetworkAddress, bool) {
|
||||
ipNet, ok := address.(*net.IPNet)
|
||||
if !ok {
|
||||
return NetworkAddress{}, false
|
||||
}
|
||||
if ipNet.IP.IsLoopback() || ipNet.IP.IsLinkLocalUnicast() || ipNet.IP.IsMulticast() {
|
||||
return NetworkAddress{}, false
|
||||
}
|
||||
prefix, err := netip.ParsePrefix(ipNet.String())
|
||||
if err != nil {
|
||||
return NetworkAddress{}, false
|
||||
}
|
||||
return NetworkAddress{NetIP: prefix, Mac: ""}, true
|
||||
}
|
||||
|
||||
func isDuplicated(addresses []NetworkAddress, addr NetworkAddress) bool {
|
||||
for _, duplicated := range addresses {
|
||||
if duplicated.NetIP == addr.NetIP {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// checkFileAndProcess checks if the file path exists and if a process is running at that path.
|
||||
func checkFileAndProcess(paths []string) ([]File, error) {
|
||||
return []File{}, nil
|
||||
|
||||
66
client/system/network_addr.go
Normal file
66
client/system/network_addr.go
Normal file
@@ -0,0 +1,66 @@
|
||||
//go:build !ios
|
||||
|
||||
package system
|
||||
|
||||
import (
|
||||
"net"
|
||||
"net/netip"
|
||||
)
|
||||
|
||||
func networkAddresses() ([]NetworkAddress, error) {
|
||||
interfaces, err := net.Interfaces()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var netAddresses []NetworkAddress
|
||||
for _, iface := range interfaces {
|
||||
if iface.Flags&net.FlagUp == 0 {
|
||||
continue
|
||||
}
|
||||
if iface.HardwareAddr.String() == "" {
|
||||
continue
|
||||
}
|
||||
addrs, err := iface.Addrs()
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
|
||||
mac := iface.HardwareAddr.String()
|
||||
for _, address := range addrs {
|
||||
netAddr, ok := toNetworkAddress(address, mac)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
if isDuplicated(netAddresses, netAddr) {
|
||||
continue
|
||||
}
|
||||
netAddresses = append(netAddresses, netAddr)
|
||||
}
|
||||
}
|
||||
return netAddresses, nil
|
||||
}
|
||||
|
||||
func toNetworkAddress(address net.Addr, mac string) (NetworkAddress, bool) {
|
||||
ipNet, ok := address.(*net.IPNet)
|
||||
if !ok {
|
||||
return NetworkAddress{}, false
|
||||
}
|
||||
if ipNet.IP.IsLoopback() {
|
||||
return NetworkAddress{}, false
|
||||
}
|
||||
prefix, err := netip.ParsePrefix(ipNet.String())
|
||||
if err != nil {
|
||||
return NetworkAddress{}, false
|
||||
}
|
||||
return NetworkAddress{NetIP: prefix, Mac: mac}, true
|
||||
}
|
||||
|
||||
func isDuplicated(addresses []NetworkAddress, addr NetworkAddress) bool {
|
||||
for _, duplicated := range addresses {
|
||||
if duplicated.NetIP == addr.NetIP {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
Reference in New Issue
Block a user