Compare commits

...

5 Commits

Author SHA1 Message Date
Zoltán Papp
5d403a79ba Remove unused function 2025-01-29 14:54:24 +01:00
Zoltán Papp
14a99a8693 Remove the on reconnect callback from client layer. This event will be managed by guard. 2025-01-29 13:25:56 +01:00
Viktor Liu
e20be2397c [client] Add missing peer ACL flush (#3247) 2025-01-28 23:25:22 +01:00
Maycon Santos
46766e7e24 [misc] Update sign pipeline version (#3246) 2025-01-28 22:48:19 +01:00
Viktor Liu
a7ddb8f1f8 [client] Replace engine probes with direct calls (#3195) 2025-01-28 12:25:45 +01:00
10 changed files with 132 additions and 230 deletions

View File

@@ -9,7 +9,7 @@ on:
pull_request:
env:
SIGN_PIPE_VER: "v0.0.17"
SIGN_PIPE_VER: "v0.0.18"
GORELEASER_VER: "v2.3.2"
PRODUCT_NAME: "NetBird"
COPYRIGHT: "Wiretrustee UG (haftungsbeschreankt)"

View File

@@ -190,7 +190,7 @@ func runInForegroundMode(ctx context.Context, cmd *cobra.Command) error {
r.GetFullStatus()
connectClient := internal.NewConnectClient(ctx, config, r)
return connectClient.Run()
return connectClient.Run(nil)
}
func runInDaemonMode(ctx context.Context, cmd *cobra.Command) error {

View File

@@ -348,6 +348,10 @@ func (m *AclManager) addIOFiltering(
UserData: userData,
})
if err := m.rConn.Flush(); err != nil {
return nil, fmt.Errorf(flushError, err)
}
rule := &Rule{
nftRule: nftRule,
mangleRule: m.createPreroutingRule(expressions, userData),
@@ -359,6 +363,7 @@ func (m *AclManager) addIOFiltering(
if ipset != nil {
m.ipsetStore.AddReferenceToIpset(ipset.Name)
}
return rule, nil
}

View File

@@ -59,13 +59,8 @@ func NewConnectClient(
}
// Run with main logic.
func (c *ConnectClient) Run() error {
return c.run(MobileDependency{}, nil, nil)
}
// RunWithProbes runs the client's main logic with probes attached
func (c *ConnectClient) RunWithProbes(probes *ProbeHolder, runningChan chan error) error {
return c.run(MobileDependency{}, probes, runningChan)
func (c *ConnectClient) Run(runningChan chan error) error {
return c.run(MobileDependency{}, runningChan)
}
// RunOnAndroid with main logic on mobile system
@@ -84,7 +79,7 @@ func (c *ConnectClient) RunOnAndroid(
HostDNSAddresses: dnsAddresses,
DnsReadyListener: dnsReadyListener,
}
return c.run(mobileDependency, nil, nil)
return c.run(mobileDependency, nil)
}
func (c *ConnectClient) RunOniOS(
@@ -102,10 +97,10 @@ func (c *ConnectClient) RunOniOS(
DnsManager: dnsManager,
StateFilePath: stateFilePath,
}
return c.run(mobileDependency, nil, nil)
return c.run(mobileDependency, nil)
}
func (c *ConnectClient) run(mobileDependency MobileDependency, probes *ProbeHolder, runningChan chan error) error {
func (c *ConnectClient) run(mobileDependency MobileDependency, runningChan chan error) error {
defer func() {
if r := recover(); r != nil {
log.Panicf("Panic occurred: %v, stack trace: %s", r, string(debug.Stack()))
@@ -261,7 +256,7 @@ func (c *ConnectClient) run(mobileDependency MobileDependency, probes *ProbeHold
checks := loginResp.GetChecks()
c.engineMutex.Lock()
c.engine = NewEngineWithProbes(engineCtx, cancel, signalClient, mgmClient, relayManager, engineConfig, mobileDependency, c.statusRecorder, probes, checks)
c.engine = NewEngine(engineCtx, cancel, signalClient, mgmClient, relayManager, engineConfig, mobileDependency, c.statusRecorder, checks)
c.engine.SetNetworkMapPersistence(c.persistNetworkMap)
c.engineMutex.Unlock()

View File

@@ -175,8 +175,6 @@ type Engine struct {
dnsServer dns.Server
probes *ProbeHolder
// checks are the client-applied posture checks that need to be evaluated on the client
checks []*mgmProto.Checks
@@ -196,7 +194,7 @@ type Peer struct {
WgAllowedIps string
}
// NewEngine creates a new Connection Engine
// NewEngine creates a new Connection Engine with probes attached
func NewEngine(
clientCtx context.Context,
clientCancel context.CancelFunc,
@@ -207,33 +205,6 @@ func NewEngine(
mobileDep MobileDependency,
statusRecorder *peer.Status,
checks []*mgmProto.Checks,
) *Engine {
return NewEngineWithProbes(
clientCtx,
clientCancel,
signalClient,
mgmClient,
relayManager,
config,
mobileDep,
statusRecorder,
nil,
checks,
)
}
// NewEngineWithProbes creates a new Connection Engine with probes attached
func NewEngineWithProbes(
clientCtx context.Context,
clientCancel context.CancelFunc,
signalClient signal.Client,
mgmClient mgm.Client,
relayManager *relayClient.Manager,
config *EngineConfig,
mobileDep MobileDependency,
statusRecorder *peer.Status,
probes *ProbeHolder,
checks []*mgmProto.Checks,
) *Engine {
engine := &Engine{
clientCtx: clientCtx,
@@ -251,7 +222,6 @@ func NewEngineWithProbes(
networkSerial: 0,
sshServerFunc: nbssh.DefaultSSHServer,
statusRecorder: statusRecorder,
probes: probes,
checks: checks,
connSemaphore: semaphoregroup.NewSemaphoreGroup(connInitLimit),
}
@@ -450,7 +420,6 @@ func (e *Engine) Start() error {
e.receiveSignalEvents()
e.receiveManagementEvents()
e.receiveProbeEvents()
// starting network monitor at the very last to avoid disruptions
e.startNetworkMonitor()
@@ -1513,72 +1482,58 @@ func (e *Engine) getRosenpassAddr() string {
return ""
}
func (e *Engine) receiveProbeEvents() {
if e.probes == nil {
return
// RunHealthProbes executes health checks for Signal, Management, Relay and WireGuard services
// and updates the status recorder with the latest states.
func (e *Engine) RunHealthProbes() bool {
signalHealthy := e.signal.IsHealthy()
log.Debugf("signal health check: healthy=%t", signalHealthy)
managementHealthy := e.mgmClient.IsHealthy()
log.Debugf("management health check: healthy=%t", managementHealthy)
results := append(e.probeSTUNs(), e.probeTURNs()...)
e.statusRecorder.UpdateRelayStates(results)
relayHealthy := true
for _, res := range results {
if res.Err != nil {
relayHealthy = false
break
}
}
if e.probes.SignalProbe != nil {
go e.probes.SignalProbe.Receive(e.ctx, func() bool {
healthy := e.signal.IsHealthy()
log.Debugf("received signal probe request, healthy: %t", healthy)
return healthy
})
log.Debugf("relay health check: healthy=%t", relayHealthy)
for _, key := range e.peerStore.PeersPubKey() {
wgStats, err := e.wgInterface.GetStats(key)
if err != nil {
log.Debugf("failed to get wg stats for peer %s: %s", key, err)
continue
}
// wgStats could be zero value, in which case we just reset the stats
if err := e.statusRecorder.UpdateWireGuardPeerState(key, wgStats); err != nil {
log.Debugf("failed to update wg stats for peer %s: %s", key, err)
}
}
if e.probes.MgmProbe != nil {
go e.probes.MgmProbe.Receive(e.ctx, func() bool {
healthy := e.mgmClient.IsHealthy()
log.Debugf("received management probe request, healthy: %t", healthy)
return healthy
})
}
if e.probes.RelayProbe != nil {
go e.probes.RelayProbe.Receive(e.ctx, func() bool {
healthy := true
results := append(e.probeSTUNs(), e.probeTURNs()...)
e.statusRecorder.UpdateRelayStates(results)
// A single failed server will result in a "failed" probe
for _, res := range results {
if res.Err != nil {
healthy = false
break
}
}
log.Debugf("received relay probe request, healthy: %t", healthy)
return healthy
})
}
if e.probes.WgProbe != nil {
go e.probes.WgProbe.Receive(e.ctx, func() bool {
log.Debug("received wg probe request")
for _, key := range e.peerStore.PeersPubKey() {
wgStats, err := e.wgInterface.GetStats(key)
if err != nil {
log.Debugf("failed to get wg stats for peer %s: %s", key, err)
}
// wgStats could be zero value, in which case we just reset the stats
if err := e.statusRecorder.UpdateWireGuardPeerState(key, wgStats); err != nil {
log.Debugf("failed to update wg stats for peer %s: %s", key, err)
}
}
return true
})
}
allHealthy := signalHealthy && managementHealthy && relayHealthy
log.Debugf("all health checks completed: healthy=%t", allHealthy)
return allHealthy
}
func (e *Engine) probeSTUNs() []relay.ProbeResult {
return relay.ProbeAll(e.ctx, relay.ProbeSTUN, e.STUNs)
e.syncMsgMux.Lock()
stuns := slices.Clone(e.STUNs)
e.syncMsgMux.Unlock()
return relay.ProbeAll(e.ctx, relay.ProbeSTUN, stuns)
}
func (e *Engine) probeTURNs() []relay.ProbeResult {
return relay.ProbeAll(e.ctx, relay.ProbeTURN, e.TURNs)
e.syncMsgMux.Lock()
turns := slices.Clone(e.TURNs)
e.syncMsgMux.Unlock()
return relay.ProbeAll(e.ctx, relay.ProbeTURN, turns)
}
func (e *Engine) restartEngine() {

View File

@@ -1,58 +0,0 @@
package internal
import "context"
type ProbeHolder struct {
MgmProbe *Probe
SignalProbe *Probe
RelayProbe *Probe
WgProbe *Probe
}
// Probe allows to run on-demand callbacks from different code locations.
// Pass the probe to a receiving and a sending end. The receiving end starts listening
// to requests with Receive and executes a callback when the sending end requests it
// by calling Probe.
type Probe struct {
request chan struct{}
result chan bool
ready bool
}
// NewProbe returns a new initialized probe.
func NewProbe() *Probe {
return &Probe{
request: make(chan struct{}),
result: make(chan bool),
}
}
// Probe requests the callback to be run and returns a bool indicating success.
// It always returns true as long as the receiver is not ready.
func (p *Probe) Probe() bool {
if !p.ready {
return true
}
p.request <- struct{}{}
return <-p.result
}
// Receive starts listening for probe requests. On such a request it runs the supplied
// callback func which must return a bool indicating success.
// Blocks until the passed context is cancelled.
func (p *Probe) Receive(ctx context.Context, callback func() bool) {
p.ready = true
defer func() {
p.ready = false
}()
for {
select {
case <-ctx.Done():
return
case <-p.request:
p.result <- callback()
}
}
}

View File

@@ -63,12 +63,7 @@ type Server struct {
statusRecorder *peer.Status
sessionWatcher *internal.SessionWatcher
mgmProbe *internal.Probe
signalProbe *internal.Probe
relayProbe *internal.Probe
wgProbe *internal.Probe
lastProbe time.Time
lastProbe time.Time
persistNetworkMap bool
}
@@ -86,12 +81,7 @@ func New(ctx context.Context, configPath, logFile string) *Server {
latestConfigInput: internal.ConfigInput{
ConfigPath: configPath,
},
logFile: logFile,
mgmProbe: internal.NewProbe(),
signalProbe: internal.NewProbe(),
relayProbe: internal.NewProbe(),
wgProbe: internal.NewProbe(),
logFile: logFile,
persistNetworkMap: true,
}
}
@@ -202,14 +192,7 @@ func (s *Server) connectWithRetryRuns(ctx context.Context, config *internal.Conf
s.connectClient = internal.NewConnectClient(ctx, config, statusRecorder)
s.connectClient.SetNetworkMapPersistence(s.persistNetworkMap)
probes := internal.ProbeHolder{
MgmProbe: s.mgmProbe,
SignalProbe: s.signalProbe,
RelayProbe: s.relayProbe,
WgProbe: s.wgProbe,
}
err := s.connectClient.RunWithProbes(&probes, runningChan)
err := s.connectClient.Run(runningChan)
if err != nil {
log.Debugf("run client connection exited with error: %v. Will retry in the background", err)
}
@@ -676,9 +659,13 @@ func (s *Server) Down(ctx context.Context, _ *proto.DownRequest) (*proto.DownRes
// Status returns the daemon status
func (s *Server) Status(
_ context.Context,
ctx context.Context,
msg *proto.StatusRequest,
) (*proto.StatusResponse, error) {
if ctx.Err() != nil {
return nil, ctx.Err()
}
s.mutex.Lock()
defer s.mutex.Unlock()
@@ -707,14 +694,17 @@ func (s *Server) Status(
}
func (s *Server) runProbes() {
if time.Since(s.lastProbe) > probeThreshold {
managementHealthy := s.mgmProbe.Probe()
signalHealthy := s.signalProbe.Probe()
relayHealthy := s.relayProbe.Probe()
wgProbe := s.wgProbe.Probe()
if s.connectClient == nil {
return
}
// Update last time only if all probes were successful
if managementHealthy && signalHealthy && relayHealthy && wgProbe {
engine := s.connectClient.Engine()
if engine == nil {
return
}
if time.Since(s.lastProbe) > probeThreshold {
if engine.RunHealthProbes() {
s.lastProbe = time.Now()
}
}

View File

@@ -141,7 +141,6 @@ type Client struct {
muInstanceURL sync.Mutex
onDisconnectListener func(string)
onConnectedListener func()
listenerMutex sync.Mutex
}
@@ -190,7 +189,6 @@ func (c *Client) Connect() error {
c.wgReadLoop.Add(1)
go c.readLoop(c.relayConn)
go c.notifyConnected()
return nil
}
@@ -238,12 +236,6 @@ func (c *Client) SetOnDisconnectListener(fn func(string)) {
c.onDisconnectListener = fn
}
func (c *Client) SetOnConnectedListener(fn func()) {
c.listenerMutex.Lock()
defer c.listenerMutex.Unlock()
c.onConnectedListener = fn
}
// HasConns returns true if there are connections.
func (c *Client) HasConns() bool {
c.mu.Lock()
@@ -559,16 +551,6 @@ func (c *Client) notifyDisconnected() {
go c.onDisconnectListener(c.connectionURL)
}
func (c *Client) notifyConnected() {
c.listenerMutex.Lock()
defer c.listenerMutex.Unlock()
if c.onConnectedListener == nil {
return
}
go c.onConnectedListener()
}
func (c *Client) writeCloseMsg() {
msg := messages.MarshalCloseMsg()
_, err := c.relayConn.Write(msg)

View File

@@ -14,8 +14,9 @@ var (
// Guard manage the reconnection tries to the Relay server in case of disconnection event.
type Guard struct {
// OnNewRelayClient is a channel that is used to notify the relay client about a new relay client instance.
// OnNewRelayClient is a channel that is used to notify the relay manager about a new relay client instance.
OnNewRelayClient chan *Client
OnReconnected chan struct{}
serverPicker *ServerPicker
}
@@ -23,6 +24,7 @@ type Guard struct {
func NewGuard(sp *ServerPicker) *Guard {
g := &Guard{
OnNewRelayClient: make(chan *Client, 1),
OnReconnected: make(chan struct{}, 1),
serverPicker: sp,
}
return g
@@ -39,14 +41,13 @@ func NewGuard(sp *ServerPicker) *Guard {
// - relayClient: The relay client instance that was disconnected.
// todo prevent multiple reconnection instances. In the current usage it should not happen, but it is better to prevent
func (g *Guard) StartReconnectTrys(ctx context.Context, relayClient *Client) {
if relayClient == nil {
goto RETRY
}
if g.isServerURLStillValid(relayClient) && g.quickReconnect(ctx, relayClient) {
// try to reconnect to the same server
if ok := g.tryToQuickReconnect(ctx, relayClient); ok {
g.notifyReconnected()
return
}
RETRY:
// start a ticker to pick a new server
ticker := exponentTicker(ctx)
defer ticker.Stop()
@@ -64,6 +65,28 @@ RETRY:
}
}
func (g *Guard) tryToQuickReconnect(parentCtx context.Context, rc *Client) bool {
if rc == nil {
return false
}
if !g.isServerURLStillValid(rc) {
return false
}
if cancelled := waiteBeforeRetry(parentCtx); !cancelled {
return false
}
log.Infof("try to reconnect to Relay server: %s", rc.connectionURL)
if err := rc.Connect(); err != nil {
log.Errorf("failed to reconnect to relay server: %s", err)
return false
}
return true
}
func (g *Guard) retry(ctx context.Context) error {
log.Infof("try to pick up a new Relay server")
relayClient, err := g.serverPicker.PickServer(ctx)
@@ -78,23 +101,6 @@ func (g *Guard) retry(ctx context.Context) error {
return nil
}
func (g *Guard) quickReconnect(parentCtx context.Context, rc *Client) bool {
ctx, cancel := context.WithTimeout(parentCtx, 1500*time.Millisecond)
defer cancel()
<-ctx.Done()
if parentCtx.Err() != nil {
return false
}
log.Infof("try to reconnect to Relay server: %s", rc.connectionURL)
if err := rc.Connect(); err != nil {
log.Errorf("failed to reconnect to relay server: %s", err)
return false
}
return true
}
func (g *Guard) drainRelayClientChan() {
select {
case <-g.OnNewRelayClient:
@@ -111,6 +117,13 @@ func (g *Guard) isServerURLStillValid(rc *Client) bool {
return false
}
func (g *Guard) notifyReconnected() {
select {
case g.OnReconnected <- struct{}{}:
default:
}
}
func exponentTicker(ctx context.Context) *backoff.Ticker {
bo := backoff.WithContext(&backoff.ExponentialBackOff{
InitialInterval: 2 * time.Second,
@@ -121,3 +134,15 @@ func exponentTicker(ctx context.Context) *backoff.Ticker {
return backoff.NewTicker(bo)
}
func waiteBeforeRetry(ctx context.Context) bool {
timer := time.NewTimer(1500 * time.Millisecond)
defer timer.Stop()
select {
case <-timer.C:
return true
case <-ctx.Done():
return false
}
}

View File

@@ -165,6 +165,9 @@ func (m *Manager) Ready() bool {
}
func (m *Manager) SetOnReconnectedListener(f func()) {
m.listenerLock.Lock()
defer m.listenerLock.Unlock()
m.onReconnectedListenerFn = f
}
@@ -284,6 +287,9 @@ func (m *Manager) openConnVia(serverAddress, peerKey string) (net.Conn, error) {
}
func (m *Manager) onServerConnected() {
m.listenerLock.Lock()
defer m.listenerLock.Unlock()
if m.onReconnectedListenerFn == nil {
return
}
@@ -304,8 +310,11 @@ func (m *Manager) onServerDisconnected(serverAddress string) {
func (m *Manager) listenGuardEvent(ctx context.Context) {
for {
select {
case <-m.reconnectGuard.OnReconnected:
m.onServerConnected()
case rc := <-m.reconnectGuard.OnNewRelayClient:
m.storeClient(rc)
m.onServerConnected()
case <-ctx.Done():
return
}
@@ -317,7 +326,6 @@ func (m *Manager) storeClient(client *Client) {
defer m.relayClientMu.Unlock()
m.relayClient = client
m.relayClient.SetOnConnectedListener(m.onServerConnected)
m.relayClient.SetOnDisconnectListener(m.onServerDisconnected)
}