Compare commits

...

7 Commits

Author SHA1 Message Date
Diego Romar
691a90516d Change log.Debug in goroutine for log.Info 2025-12-19 12:47:52 -03:00
Diego Romar
76c63d0dd2 Track PopulateNetbirdConfig goroutine with e.shutdownWg 2025-12-19 12:46:49 -03:00
Diego Romar
ee9166d771 Remove outdated success log message 2025-12-18 16:37:43 -03:00
Diego Romar
5a91664116 Improve PopulateNetbirdConfig goroutine with retry logic 2025-12-18 16:27:37 -03:00
Diego Romar
0e9cddf2e8 Execute PopulateNetbirdConfig in a go routine 2025-12-18 13:58:31 -03:00
Diego Romar
72e1fe7b48 Add extra logs to engine and dns manager 2025-12-18 13:58:11 -03:00
Pascal Fischer
c29bb1a289 [management] use xid as request id for logging (#4955) 2025-12-16 14:02:37 +01:00
5 changed files with 84 additions and 41 deletions

View File

@@ -96,10 +96,15 @@ func (m *Resolver) continueToNext(w dns.ResponseWriter, r *dns.Msg) {
func (m *Resolver) AddDomain(ctx context.Context, d domain.Domain) error {
dnsName := strings.ToLower(dns.Fqdn(d.PunycodeString()))
log.Infof("AddDomain: starting DNS lookup for %s", d.SafeString())
ctx, cancel := context.WithTimeout(ctx, dnsTimeout)
defer cancel()
ips, err := net.DefaultResolver.LookupNetIP(ctx, "ip", d.PunycodeString())
log.Infof("AddDomain: DNS lookup completed for %s, err=%v, ips=%d", d.SafeString(), err, len(ips))
if err != nil {
return fmt.Errorf("resolve domain %s: %w", d.SafeString(), err)
}

View File

@@ -420,10 +420,14 @@ func (e *Engine) Start(netbirdConfig *mgmProto.NetbirdConfig, mgmtURL *url.URL)
e.wgInterface = wgIface
e.statusRecorder.SetWgIface(wgIface)
log.Info("set wg interface to statusRecorder")
// start flow manager right after interface creation
publicKey := e.config.WgPrivateKey.PublicKey()
e.flowManager = netflow.NewManager(e.wgInterface, publicKey[:], e.statusRecorder)
log.Info("created flow manager")
if e.config.RosenpassEnabled {
log.Infof("rosenpass is enabled")
if e.config.RosenpassPermissive {
@@ -441,6 +445,8 @@ func (e *Engine) Start(netbirdConfig *mgmProto.NetbirdConfig, mgmtURL *url.URL)
}
e.stateManager.Start()
log.Info("started state manager")
initialRoutes, dnsConfig, dnsFeatureFlag, err := e.readInitialSettings()
if err != nil {
e.close()
@@ -454,10 +460,40 @@ func (e *Engine) Start(netbirdConfig *mgmProto.NetbirdConfig, mgmtURL *url.URL)
}
e.dnsServer = dnsServer
log.Info("created dns server")
// Populate DNS cache with NetbirdConfig and management URL for early resolution
if err := e.PopulateNetbirdConfig(netbirdConfig, mgmtURL); err != nil {
log.Warnf("failed to populate DNS cache: %v", err)
}
e.shutdownWg.Add(1)
go func() {
defer e.shutdownWg.Done()
backoff := time.Second
var lastErr error
const populateAttempts = 5
for attempts := 0; attempts < populateAttempts; attempts++ {
if pErr := e.PopulateNetbirdConfig(netbirdConfig, mgmtURL); pErr == nil {
log.Info("populated DNS cache successfully")
return
} else {
lastErr = pErr
log.Infof("populate DNS cache attempt %d failed: %v", attempts+1, pErr)
}
d := backoff + time.Duration(rand.Intn(500))*time.Millisecond
log.WithFields(log.Fields{"attempt": attempts + 1, "sleep": d}).Info("populate DNS cache retrying")
select {
case <-time.After(d):
case <-e.ctx.Done():
return
}
if backoff < 10*time.Second {
backoff *= 2
}
}
log.Errorf("failed to populate DNS cache after %d attempts: %v", populateAttempts, lastErr)
}()
e.routeManager = routemanager.NewManager(routemanager.ManagerConfig{
Context: e.ctx,
@@ -478,19 +514,27 @@ func (e *Engine) Start(netbirdConfig *mgmProto.NetbirdConfig, mgmtURL *url.URL)
log.Errorf("Failed to initialize route manager: %s", err)
}
log.Info("set route manager")
e.routeManager.SetRouteChangeListener(e.mobileDep.NetworkChangeListener)
log.Info("set route change listener to route manager")
if err = e.wgInterfaceCreate(); err != nil {
log.Errorf("failed creating tunnel interface %s: [%s]", e.config.WgIfaceName, err.Error())
e.close()
return fmt.Errorf("create wg interface: %w", err)
}
log.Info("created tunnel interface")
if err := e.createFirewall(); err != nil {
e.close()
return err
}
log.Info("created firewall")
e.udpMux, err = e.wgInterface.Up()
if err != nil {
log.Errorf("failed to pull up wgInterface [%s]: %s", e.wgInterface.Name(), err.Error())
@@ -498,6 +542,8 @@ func (e *Engine) Start(netbirdConfig *mgmProto.NetbirdConfig, mgmtURL *url.URL)
return fmt.Errorf("up wg interface: %w", err)
}
log.Info("pulled up tunnel interface")
// if inbound conns are blocked there is no need to create the ACL manager
if e.firewall != nil && !e.config.BlockInbound {
e.acl = acl.NewDefaultManager(e.firewall)
@@ -509,24 +555,38 @@ func (e *Engine) Start(netbirdConfig *mgmProto.NetbirdConfig, mgmtURL *url.URL)
return fmt.Errorf("initialize dns server: %w", err)
}
log.Info("initialized dns server")
iceCfg := e.createICEConfig()
log.Infof("created ICE config: %v", iceCfg)
e.connMgr = NewConnMgr(e.config, e.statusRecorder, e.peerStore, wgIface)
e.connMgr.Start(e.ctx)
log.Info("started connection manager")
e.srWatcher = guard.NewSRWatcher(e.signal, e.relayManager, e.mobileDep.IFaceDiscover, iceCfg)
e.srWatcher.Start()
log.Info("started SR watcher")
e.receiveSignalEvents()
e.receiveManagementEvents()
log.Info("started receiving events from Signal and Management services")
// starting network monitor at the very last to avoid disruptions
e.startNetworkMonitor()
log.Info("started network monitor")
// monitor WireGuard interface lifecycle and restart engine on changes
e.wgIfaceMonitor = NewWGIfaceMonitor()
e.shutdownWg.Add(1)
log.Infof("starting WireGuard interface monitor")
go func() {
defer e.shutdownWg.Done()
@@ -538,6 +598,8 @@ func (e *Engine) Start(netbirdConfig *mgmProto.NetbirdConfig, mgmtURL *url.URL)
}
}()
log.Info("engine started successfully")
return nil
}
@@ -731,21 +793,28 @@ func (e *Engine) PopulateNetbirdConfig(netbirdConfig *mgmProto.NetbirdConfig, mg
return nil
}
log.Info("PopulateNetbirdConfig: starting")
// Populate management URL if provided
if mgmtURL != nil {
log.Infof("PopulateNetbirdConfig: calling PopulateManagementDomain for %s", mgmtURL.Host)
if err := e.dnsServer.PopulateManagementDomain(mgmtURL); err != nil {
log.Warnf("failed to populate DNS cache with management URL: %v", err)
}
log.Info("PopulateNetbirdConfig: PopulateManagementDomain completed")
}
// Populate NetbirdConfig domains if provided
if netbirdConfig != nil {
log.Info("PopulateNetbirdConfig: calling UpdateServerConfig")
serverDomains := dnsconfig.ExtractFromNetbirdConfig(netbirdConfig)
if err := e.dnsServer.UpdateServerConfig(serverDomains); err != nil {
return fmt.Errorf("update DNS server config from NetbirdConfig: %w", err)
}
log.Info("PopulateNetbirdConfig: UpdateServerConfig completed")
}
log.Info("PopulateNetbirdConfig: done")
return nil
}

View File

@@ -60,14 +60,7 @@ func (hook ContextHook) Fire(entry *logrus.Entry) error {
entry.Data["context"] = source
switch source {
case HTTPSource:
addHTTPFields(entry)
case GRPCSource:
addGRPCFields(entry)
case SystemSource:
addSystemFields(entry)
}
addFields(entry)
return nil
}
@@ -99,7 +92,7 @@ func (hook ContextHook) parseSrc(filePath string) string {
return fmt.Sprintf("%s/%s", pkg, file)
}
func addHTTPFields(entry *logrus.Entry) {
func addFields(entry *logrus.Entry) {
if ctxReqID, ok := entry.Context.Value(context.RequestIDKey).(string); ok {
entry.Data[context.RequestIDKey] = ctxReqID
}
@@ -109,30 +102,6 @@ func addHTTPFields(entry *logrus.Entry) {
if ctxInitiatorID, ok := entry.Context.Value(context.UserIDKey).(string); ok {
entry.Data[context.UserIDKey] = ctxInitiatorID
}
}
func addGRPCFields(entry *logrus.Entry) {
if ctxReqID, ok := entry.Context.Value(context.RequestIDKey).(string); ok {
entry.Data[context.RequestIDKey] = ctxReqID
}
if ctxAccountID, ok := entry.Context.Value(context.AccountIDKey).(string); ok {
entry.Data[context.AccountIDKey] = ctxAccountID
}
if ctxDeviceID, ok := entry.Context.Value(context.PeerIDKey).(string); ok {
entry.Data[context.PeerIDKey] = ctxDeviceID
}
}
func addSystemFields(entry *logrus.Entry) {
if ctxReqID, ok := entry.Context.Value(context.RequestIDKey).(string); ok {
entry.Data[context.RequestIDKey] = ctxReqID
}
if ctxInitiatorID, ok := entry.Context.Value(context.UserIDKey).(string); ok {
entry.Data[context.UserIDKey] = ctxInitiatorID
}
if ctxAccountID, ok := entry.Context.Value(context.AccountIDKey).(string); ok {
entry.Data[context.AccountIDKey] = ctxAccountID
}
if ctxDeviceID, ok := entry.Context.Value(context.PeerIDKey).(string); ok {
entry.Data[context.PeerIDKey] = ctxDeviceID
}

View File

@@ -10,9 +10,9 @@ import (
"slices"
"time"
"github.com/google/uuid"
grpcMiddleware "github.com/grpc-ecosystem/go-grpc-middleware/v2"
"github.com/grpc-ecosystem/go-grpc-middleware/v2/interceptors/realip"
"github.com/rs/xid"
log "github.com/sirupsen/logrus"
"google.golang.org/grpc"
"google.golang.org/grpc/credentials"
@@ -180,7 +180,7 @@ func unaryInterceptor(
info *grpc.UnaryServerInfo,
handler grpc.UnaryHandler,
) (interface{}, error) {
reqID := uuid.New().String()
reqID := xid.New().String()
//nolint
ctx = context.WithValue(ctx, hook.ExecutionContextKey, hook.GRPCSource)
//nolint
@@ -194,7 +194,7 @@ func streamInterceptor(
info *grpc.StreamServerInfo,
handler grpc.StreamHandler,
) error {
reqID := uuid.New().String()
reqID := xid.New().String()
wrapped := grpcMiddleware.WrapServerStream(ss)
//nolint
ctx := context.WithValue(ss.Context(), hook.ExecutionContextKey, hook.GRPCSource)

View File

@@ -7,8 +7,8 @@ import (
"strings"
"time"
"github.com/google/uuid"
"github.com/gorilla/mux"
"github.com/rs/xid"
log "github.com/sirupsen/logrus"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/metric"
@@ -169,7 +169,7 @@ func (m *HTTPMiddleware) Handler(h http.Handler) http.Handler {
//nolint
ctx := context.WithValue(r.Context(), hook.ExecutionContextKey, hook.HTTPSource)
reqID := uuid.New().String()
reqID := xid.New().String()
//nolint
ctx = context.WithValue(ctx, nbContext.RequestIDKey, reqID)