Compare commits

...

9 Commits

Author SHA1 Message Date
pascal
4b5e39c574 use alternative translator 2026-04-09 13:25:27 +02:00
pascal
fd28178cc6 remove proxy auth changes 2026-04-09 13:00:05 +02:00
pascal
7f98cf30cf export metrics without units 2026-04-09 12:46:45 +02:00
Maycon Santos
099c493b18 [management] network map tests (#5795)
* Add network map benchmark and correctness test files

* Add tests for network map components correctness and edge cases

* Skip benchmarks in CI and enhance network map test coverage with new helper functions

* Remove legacy network map benchmarks and tests; refactor components-based test coverage for clarity and scalability.
2026-04-08 21:28:29 +02:00
Pascal Fischer
c1d1229ae0 [management] use NullBool for terminated flag (#5829) 2026-04-08 21:08:43 +02:00
Viktor Liu
94a36cb53e [client] Handle UPnP routers that only support permanent leases (#5826) 2026-04-08 17:59:59 +02:00
Viktor Liu
c7ba931466 [client] Populate network addresses in FreeBSD system info (#5827) 2026-04-08 17:14:16 +02:00
Viktor Liu
413d95b740 [client] Include service.json in debug bundle (#5825)
* Include service.json in debug bundle

* Add tests for service params sanitization logic
2026-04-08 21:10:31 +08:00
Viktor Liu
332c624c55 [client] Don't abort UI debug bundle when up/down fails (#5780) 2026-04-08 10:33:46 +02:00
19 changed files with 1924 additions and 139 deletions

View File

@@ -199,9 +199,11 @@ func runForDuration(cmd *cobra.Command, args []string) error {
cmd.Println("Log level set to trace.")
}
needsRestoreUp := false
if _, err := client.Down(cmd.Context(), &proto.DownRequest{}); err != nil {
cmd.PrintErrf("Failed to bring service down: %v\n", status.Convert(err).Message())
} else {
needsRestoreUp = !stateWasDown
cmd.Println("netbird down")
}
@@ -217,6 +219,7 @@ func runForDuration(cmd *cobra.Command, args []string) error {
if _, err := client.Up(cmd.Context(), &proto.UpRequest{}); err != nil {
cmd.PrintErrf("Failed to bring service up: %v\n", status.Convert(err).Message())
} else {
needsRestoreUp = false
cmd.Println("netbird up")
}
@@ -264,6 +267,14 @@ func runForDuration(cmd *cobra.Command, args []string) error {
return fmt.Errorf("failed to bundle debug: %v", status.Convert(err).Message())
}
if needsRestoreUp {
if _, err := client.Up(cmd.Context(), &proto.UpRequest{}); err != nil {
cmd.PrintErrf("Failed to restore service up state: %v\n", status.Convert(err).Message())
} else {
cmd.Println("netbird up (restored)")
}
}
if stateWasDown {
if _, err := client.Down(cmd.Context(), &proto.DownRequest{}); err != nil {
cmd.PrintErrf("Failed to restore service down state: %v\n", status.Convert(err).Message())

View File

@@ -25,6 +25,7 @@ import (
"google.golang.org/protobuf/encoding/protojson"
"github.com/netbirdio/netbird/client/anonymize"
"github.com/netbirdio/netbird/client/configs"
"github.com/netbirdio/netbird/client/internal/peer"
"github.com/netbirdio/netbird/client/internal/profilemanager"
"github.com/netbirdio/netbird/client/internal/updater/installer"
@@ -52,6 +53,7 @@ resolved_domains.txt: Anonymized resolved domain IP addresses from the status re
config.txt: Anonymized configuration information of the NetBird client.
network_map.json: Anonymized sync response containing peer configurations, routes, DNS settings, and firewall rules.
state.json: Anonymized client state dump containing netbird states for the active profile.
service_params.json: Sanitized service install parameters (service.json). Sensitive environment variable values are masked. Only present when service.json exists.
metrics.txt: Buffered client metrics in InfluxDB line protocol format. Only present when metrics collection is enabled. Peer identifiers are anonymized.
mutex.prof: Mutex profiling information.
goroutine.prof: Goroutine profiling information.
@@ -359,6 +361,10 @@ func (g *BundleGenerator) createArchive() error {
log.Errorf("failed to add corrupted state files to debug bundle: %v", err)
}
if err := g.addServiceParams(); err != nil {
log.Errorf("failed to add service params to debug bundle: %v", err)
}
if err := g.addMetrics(); err != nil {
log.Errorf("failed to add metrics to debug bundle: %v", err)
}
@@ -488,6 +494,90 @@ func (g *BundleGenerator) addConfig() error {
return nil
}
const (
serviceParamsFile = "service.json"
serviceParamsBundle = "service_params.json"
maskedValue = "***"
envVarPrefix = "NB_"
jsonKeyManagementURL = "management_url"
jsonKeyServiceEnv = "service_env_vars"
)
var sensitiveEnvSubstrings = []string{"key", "token", "secret", "password", "credential"}
// addServiceParams reads the service.json file and adds a sanitized version to the bundle.
// Non-NB_ env vars and vars with sensitive names are masked. Other NB_ values are anonymized.
func (g *BundleGenerator) addServiceParams() error {
path := filepath.Join(configs.StateDir, serviceParamsFile)
data, err := os.ReadFile(path)
if err != nil {
if os.IsNotExist(err) {
return nil
}
return fmt.Errorf("read service params: %w", err)
}
var params map[string]any
if err := json.Unmarshal(data, &params); err != nil {
return fmt.Errorf("parse service params: %w", err)
}
if g.anonymize {
if mgmtURL, ok := params[jsonKeyManagementURL].(string); ok && mgmtURL != "" {
params[jsonKeyManagementURL] = g.anonymizer.AnonymizeURI(mgmtURL)
}
}
g.sanitizeServiceEnvVars(params)
sanitizedData, err := json.MarshalIndent(params, "", " ")
if err != nil {
return fmt.Errorf("marshal sanitized service params: %w", err)
}
if err := g.addFileToZip(bytes.NewReader(sanitizedData), serviceParamsBundle); err != nil {
return fmt.Errorf("add service params to zip: %w", err)
}
return nil
}
// sanitizeServiceEnvVars masks or anonymizes env var values in service params.
// Non-NB_ vars and vars with sensitive names (key, token, etc.) are fully masked.
// Other NB_ var values are passed through the anonymizer when anonymization is enabled.
func (g *BundleGenerator) sanitizeServiceEnvVars(params map[string]any) {
envVars, ok := params[jsonKeyServiceEnv].(map[string]any)
if !ok {
return
}
sanitized := make(map[string]any, len(envVars))
for k, v := range envVars {
val, _ := v.(string)
switch {
case !strings.HasPrefix(k, envVarPrefix) || isSensitiveEnvVar(k):
sanitized[k] = maskedValue
case g.anonymize:
sanitized[k] = g.anonymizer.AnonymizeString(val)
default:
sanitized[k] = val
}
}
params[jsonKeyServiceEnv] = sanitized
}
// isSensitiveEnvVar returns true for env var names that may contain secrets.
func isSensitiveEnvVar(key string) bool {
lower := strings.ToLower(key)
for _, s := range sensitiveEnvSubstrings {
if strings.Contains(lower, s) {
return true
}
}
return false
}
func (g *BundleGenerator) addCommonConfigFields(configContent *strings.Builder) {
configContent.WriteString("NetBird Client Configuration:\n\n")

View File

@@ -1,8 +1,12 @@
package debug
import (
"archive/zip"
"bytes"
"encoding/json"
"net"
"os"
"path/filepath"
"strings"
"testing"
@@ -10,6 +14,7 @@ import (
"github.com/stretchr/testify/require"
"github.com/netbirdio/netbird/client/anonymize"
"github.com/netbirdio/netbird/client/configs"
mgmProto "github.com/netbirdio/netbird/shared/management/proto"
)
@@ -420,6 +425,226 @@ func TestAnonymizeNetworkMap(t *testing.T) {
}
}
func TestIsSensitiveEnvVar(t *testing.T) {
tests := []struct {
key string
sensitive bool
}{
{"NB_SETUP_KEY", true},
{"NB_API_TOKEN", true},
{"NB_CLIENT_SECRET", true},
{"NB_PASSWORD", true},
{"NB_CREDENTIAL", true},
{"NB_LOG_LEVEL", false},
{"NB_MANAGEMENT_URL", false},
{"NB_HOSTNAME", false},
{"HOME", false},
{"PATH", false},
}
for _, tt := range tests {
t.Run(tt.key, func(t *testing.T) {
assert.Equal(t, tt.sensitive, isSensitiveEnvVar(tt.key))
})
}
}
func TestSanitizeServiceEnvVars(t *testing.T) {
tests := []struct {
name string
anonymize bool
input map[string]any
check func(t *testing.T, params map[string]any)
}{
{
name: "no env vars key",
anonymize: false,
input: map[string]any{"management_url": "https://mgmt.example.com"},
check: func(t *testing.T, params map[string]any) {
t.Helper()
assert.Equal(t, "https://mgmt.example.com", params["management_url"], "non-env fields should be untouched")
_, ok := params[jsonKeyServiceEnv]
assert.False(t, ok, "service_env_vars should not be added")
},
},
{
name: "non-NB vars are masked",
anonymize: false,
input: map[string]any{
jsonKeyServiceEnv: map[string]any{
"HOME": "/root",
"PATH": "/usr/bin",
"NB_LOG_LEVEL": "debug",
},
},
check: func(t *testing.T, params map[string]any) {
t.Helper()
env := params[jsonKeyServiceEnv].(map[string]any)
assert.Equal(t, maskedValue, env["HOME"], "non-NB_ var should be masked")
assert.Equal(t, maskedValue, env["PATH"], "non-NB_ var should be masked")
assert.Equal(t, "debug", env["NB_LOG_LEVEL"], "safe NB_ var should pass through")
},
},
{
name: "sensitive NB vars are masked",
anonymize: false,
input: map[string]any{
jsonKeyServiceEnv: map[string]any{
"NB_SETUP_KEY": "abc123",
"NB_API_TOKEN": "tok_xyz",
"NB_LOG_LEVEL": "info",
},
},
check: func(t *testing.T, params map[string]any) {
t.Helper()
env := params[jsonKeyServiceEnv].(map[string]any)
assert.Equal(t, maskedValue, env["NB_SETUP_KEY"], "sensitive NB_ var should be masked")
assert.Equal(t, maskedValue, env["NB_API_TOKEN"], "sensitive NB_ var should be masked")
assert.Equal(t, "info", env["NB_LOG_LEVEL"], "safe NB_ var should pass through")
},
},
{
name: "safe NB vars anonymized when anonymize is true",
anonymize: true,
input: map[string]any{
jsonKeyServiceEnv: map[string]any{
"NB_MANAGEMENT_URL": "https://mgmt.example.com:443",
"NB_LOG_LEVEL": "debug",
"NB_SETUP_KEY": "secret",
"SOME_OTHER": "val",
},
},
check: func(t *testing.T, params map[string]any) {
t.Helper()
env := params[jsonKeyServiceEnv].(map[string]any)
// Safe NB_ values should be anonymized (not the original, not masked)
mgmtVal := env["NB_MANAGEMENT_URL"].(string)
assert.NotEqual(t, "https://mgmt.example.com:443", mgmtVal, "should be anonymized")
assert.NotEqual(t, maskedValue, mgmtVal, "should not be masked")
logVal := env["NB_LOG_LEVEL"].(string)
assert.NotEqual(t, maskedValue, logVal, "safe NB_ var should not be masked")
// Sensitive and non-NB_ still masked
assert.Equal(t, maskedValue, env["NB_SETUP_KEY"])
assert.Equal(t, maskedValue, env["SOME_OTHER"])
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
anonymizer := anonymize.NewAnonymizer(anonymize.DefaultAddresses())
g := &BundleGenerator{
anonymize: tt.anonymize,
anonymizer: anonymizer,
}
g.sanitizeServiceEnvVars(tt.input)
tt.check(t, tt.input)
})
}
}
func TestAddServiceParams(t *testing.T) {
t.Run("missing service.json returns nil", func(t *testing.T) {
g := &BundleGenerator{
anonymizer: anonymize.NewAnonymizer(anonymize.DefaultAddresses()),
}
origStateDir := configs.StateDir
configs.StateDir = t.TempDir()
t.Cleanup(func() { configs.StateDir = origStateDir })
err := g.addServiceParams()
assert.NoError(t, err)
})
t.Run("management_url anonymized when anonymize is true", func(t *testing.T) {
dir := t.TempDir()
origStateDir := configs.StateDir
configs.StateDir = dir
t.Cleanup(func() { configs.StateDir = origStateDir })
input := map[string]any{
jsonKeyManagementURL: "https://api.example.com:443",
jsonKeyServiceEnv: map[string]any{
"NB_LOG_LEVEL": "trace",
},
}
data, err := json.Marshal(input)
require.NoError(t, err)
require.NoError(t, os.WriteFile(filepath.Join(dir, serviceParamsFile), data, 0600))
var buf bytes.Buffer
zw := zip.NewWriter(&buf)
g := &BundleGenerator{
anonymize: true,
anonymizer: anonymize.NewAnonymizer(anonymize.DefaultAddresses()),
archive: zw,
}
require.NoError(t, g.addServiceParams())
require.NoError(t, zw.Close())
zr, err := zip.NewReader(bytes.NewReader(buf.Bytes()), int64(buf.Len()))
require.NoError(t, err)
require.Len(t, zr.File, 1)
assert.Equal(t, serviceParamsBundle, zr.File[0].Name)
rc, err := zr.File[0].Open()
require.NoError(t, err)
defer rc.Close()
var result map[string]any
require.NoError(t, json.NewDecoder(rc).Decode(&result))
mgmt := result[jsonKeyManagementURL].(string)
assert.NotEqual(t, "https://api.example.com:443", mgmt, "management_url should be anonymized")
assert.NotEmpty(t, mgmt)
env := result[jsonKeyServiceEnv].(map[string]any)
assert.NotEqual(t, maskedValue, env["NB_LOG_LEVEL"], "safe NB_ var should not be masked")
})
t.Run("management_url preserved when anonymize is false", func(t *testing.T) {
dir := t.TempDir()
origStateDir := configs.StateDir
configs.StateDir = dir
t.Cleanup(func() { configs.StateDir = origStateDir })
input := map[string]any{
jsonKeyManagementURL: "https://api.example.com:443",
}
data, err := json.Marshal(input)
require.NoError(t, err)
require.NoError(t, os.WriteFile(filepath.Join(dir, serviceParamsFile), data, 0600))
var buf bytes.Buffer
zw := zip.NewWriter(&buf)
g := &BundleGenerator{
anonymize: false,
anonymizer: anonymize.NewAnonymizer(anonymize.DefaultAddresses()),
archive: zw,
}
require.NoError(t, g.addServiceParams())
require.NoError(t, zw.Close())
zr, err := zip.NewReader(bytes.NewReader(buf.Bytes()), int64(buf.Len()))
require.NoError(t, err)
rc, err := zr.File[0].Open()
require.NoError(t, err)
defer rc.Close()
var result map[string]any
require.NoError(t, json.NewDecoder(rc).Decode(&result))
assert.Equal(t, "https://api.example.com:443", result[jsonKeyManagementURL], "management_url should be preserved")
})
}
// Helper function to check if IP is in CGNAT range
func isInCGNATRange(ip net.IP) bool {
cgnat := net.IPNet{

View File

@@ -6,6 +6,7 @@ import (
"context"
"fmt"
"net"
"regexp"
"sync"
"time"
@@ -15,19 +16,29 @@ import (
const (
defaultMappingTTL = 2 * time.Hour
renewalInterval = defaultMappingTTL / 2
discoveryTimeout = 10 * time.Second
mappingDescription = "NetBird"
)
// upnpErrPermanentLeaseOnly matches UPnP error 725 in SOAP fault XML,
// allowing for whitespace/newlines between tags from different router firmware.
var upnpErrPermanentLeaseOnly = regexp.MustCompile(`<errorCode>\s*725\s*</errorCode>`)
// Mapping represents an active NAT port mapping.
type Mapping struct {
Protocol string
InternalPort uint16
ExternalPort uint16
ExternalIP net.IP
NATType string
// TTL is the lease duration. Zero means a permanent lease that never expires.
TTL time.Duration
}
// TODO: persist mapping state for crash recovery cleanup of permanent leases.
// Currently not done because State.Cleanup requires NAT gateway re-discovery,
// which blocks startup for ~10s when no gateway is present (affects all clients).
type Manager struct {
cancel context.CancelFunc
@@ -43,6 +54,7 @@ type Manager struct {
mu sync.Mutex
}
// NewManager creates a new port forwarding manager.
func NewManager() *Manager {
return &Manager{
stopCtx: make(chan context.Context, 1),
@@ -77,8 +89,7 @@ func (m *Manager) Start(ctx context.Context, wgPort uint16) {
gateway, mapping, err := m.setup(ctx)
if err != nil {
log.Errorf("failed to setup NAT port mapping: %v", err)
log.Infof("port forwarding setup: %v", err)
return
}
@@ -86,7 +97,7 @@ func (m *Manager) Start(ctx context.Context, wgPort uint16) {
m.mapping = mapping
m.mappingLock.Unlock()
m.renewLoop(ctx, gateway)
m.renewLoop(ctx, gateway, mapping.TTL)
select {
case cleanupCtx := <-m.stopCtx:
@@ -145,16 +156,14 @@ func (m *Manager) setup(ctx context.Context) (nat.NAT, *Mapping, error) {
gateway, err := nat.DiscoverGateway(discoverCtx)
if err != nil {
log.Infof("NAT gateway discovery failed: %v (port forwarding disabled)", err)
return nil, nil, err
return nil, nil, fmt.Errorf("discover gateway: %w", err)
}
log.Infof("discovered NAT gateway: %s", gateway.Type())
mapping, err := m.createMapping(ctx, gateway)
if err != nil {
log.Warnf("failed to create port mapping: %v", err)
return nil, nil, err
return nil, nil, fmt.Errorf("create port mapping: %w", err)
}
return gateway, mapping, nil
}
@@ -163,9 +172,18 @@ func (m *Manager) createMapping(ctx context.Context, gateway nat.NAT) (*Mapping,
ctx, cancel := context.WithTimeout(ctx, 30*time.Second)
defer cancel()
externalPort, err := gateway.AddPortMapping(ctx, "udp", int(m.wgPort), mappingDescription, defaultMappingTTL)
ttl := defaultMappingTTL
externalPort, err := gateway.AddPortMapping(ctx, "udp", int(m.wgPort), mappingDescription, ttl)
if err != nil {
return nil, err
if !isPermanentLeaseRequired(err) {
return nil, err
}
log.Infof("gateway only supports permanent leases, retrying with indefinite duration")
ttl = 0
externalPort, err = gateway.AddPortMapping(ctx, "udp", int(m.wgPort), mappingDescription, ttl)
if err != nil {
return nil, err
}
}
externalIP, err := gateway.GetExternalAddress()
@@ -180,6 +198,7 @@ func (m *Manager) createMapping(ctx context.Context, gateway nat.NAT) (*Mapping,
ExternalPort: uint16(externalPort),
ExternalIP: externalIP,
NATType: gateway.Type(),
TTL: ttl,
}
log.Infof("created port mapping: %d -> %d via %s (external IP: %s)",
@@ -187,8 +206,14 @@ func (m *Manager) createMapping(ctx context.Context, gateway nat.NAT) (*Mapping,
return mapping, nil
}
func (m *Manager) renewLoop(ctx context.Context, gateway nat.NAT) {
ticker := time.NewTicker(renewalInterval)
func (m *Manager) renewLoop(ctx context.Context, gateway nat.NAT, ttl time.Duration) {
if ttl == 0 {
// Permanent mappings don't expire, just wait for cancellation.
<-ctx.Done()
return
}
ticker := time.NewTicker(ttl / 2)
defer ticker.Stop()
for {
@@ -208,7 +233,7 @@ func (m *Manager) renewMapping(ctx context.Context, gateway nat.NAT) error {
ctx, cancel := context.WithTimeout(ctx, 30*time.Second)
defer cancel()
externalPort, err := gateway.AddPortMapping(ctx, m.mapping.Protocol, int(m.mapping.InternalPort), mappingDescription, defaultMappingTTL)
externalPort, err := gateway.AddPortMapping(ctx, m.mapping.Protocol, int(m.mapping.InternalPort), mappingDescription, m.mapping.TTL)
if err != nil {
return fmt.Errorf("add port mapping: %w", err)
}
@@ -248,3 +273,8 @@ func (m *Manager) startTearDown(ctx context.Context) {
default:
}
}
// isPermanentLeaseRequired checks if a UPnP error indicates the gateway only supports permanent leases (error 725).
func isPermanentLeaseRequired(err error) bool {
return err != nil && upnpErrPermanentLeaseOnly.MatchString(err.Error())
}

View File

@@ -3,15 +3,18 @@ package portforward
import (
"context"
"net"
"time"
)
// Mapping represents port mapping information.
// Mapping represents an active NAT port mapping.
type Mapping struct {
Protocol string
InternalPort uint16
ExternalPort uint16
ExternalIP net.IP
NATType string
// TTL is the lease duration. Zero means a permanent lease that never expires.
TTL time.Duration
}
// Manager is a stub for js/wasm builds where NAT-PMP/UPnP is not supported.

View File

@@ -4,23 +4,25 @@ package portforward
import (
"context"
"fmt"
"net"
"testing"
"time"
"github.com/libp2p/go-nat"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
type mockNAT struct {
natType string
deviceAddr net.IP
externalAddr net.IP
internalAddr net.IP
mappings map[int]int
addMappingErr error
deleteMappingErr error
natType string
deviceAddr net.IP
externalAddr net.IP
internalAddr net.IP
mappings map[int]int
addMappingErr error
deleteMappingErr error
onlyPermanentLeases bool
lastTimeout time.Duration
}
func newMockNAT() *mockNAT {
@@ -53,8 +55,12 @@ func (m *mockNAT) AddPortMapping(ctx context.Context, protocol string, internalP
if m.addMappingErr != nil {
return 0, m.addMappingErr
}
if m.onlyPermanentLeases && timeout != 0 {
return 0, fmt.Errorf("SOAP fault. Code: | Explanation: | Detail: <UPnPError xmlns=\"urn:schemas-upnp-org:control-1-0\"><errorCode>725</errorCode><errorDescription>OnlyPermanentLeasesSupported</errorDescription></UPnPError>")
}
externalPort := internalPort
m.mappings[internalPort] = externalPort
m.lastTimeout = timeout
return externalPort, nil
}
@@ -80,6 +86,7 @@ func TestManager_CreateMapping(t *testing.T) {
assert.Equal(t, uint16(51820), mapping.ExternalPort)
assert.Equal(t, "Mock-NAT", mapping.NATType)
assert.Equal(t, net.ParseIP("203.0.113.50").To4(), mapping.ExternalIP.To4())
assert.Equal(t, defaultMappingTTL, mapping.TTL)
}
func TestManager_GetMapping_ReturnsNilWhenNotReady(t *testing.T) {
@@ -131,29 +138,64 @@ func TestManager_Cleanup_NilMapping(t *testing.T) {
m.cleanup(context.Background(), gateway)
}
func TestState_Cleanup(t *testing.T) {
origDiscover := discoverGateway
defer func() { discoverGateway = origDiscover }()
mockGateway := newMockNAT()
mockGateway.mappings[51820] = 51820
discoverGateway = func(ctx context.Context) (nat.NAT, error) {
return mockGateway, nil
}
func TestManager_CreateMapping_PermanentLeaseFallback(t *testing.T) {
m := NewManager()
m.wgPort = 51820
state := &State{
Protocol: "udp",
InternalPort: 51820,
}
gateway := newMockNAT()
gateway.onlyPermanentLeases = true
err := state.Cleanup()
assert.NoError(t, err)
mapping, err := m.createMapping(context.Background(), gateway)
require.NoError(t, err)
require.NotNil(t, mapping)
_, exists := mockGateway.mappings[51820]
assert.False(t, exists, "mapping should be deleted after cleanup")
assert.Equal(t, uint16(51820), mapping.InternalPort)
assert.Equal(t, time.Duration(0), mapping.TTL, "should return zero TTL for permanent lease")
assert.Equal(t, time.Duration(0), gateway.lastTimeout, "should have retried with zero duration")
}
func TestState_Name(t *testing.T) {
state := &State{}
assert.Equal(t, "port_forward_state", state.Name())
func TestIsPermanentLeaseRequired(t *testing.T) {
tests := []struct {
name string
err error
expected bool
}{
{
name: "nil error",
err: nil,
expected: false,
},
{
name: "UPnP error 725",
err: fmt.Errorf("SOAP fault. Code: | Detail: <UPnPError><errorCode>725</errorCode><errorDescription>OnlyPermanentLeasesSupported</errorDescription></UPnPError>"),
expected: true,
},
{
name: "wrapped error with 725",
err: fmt.Errorf("add port mapping: %w", fmt.Errorf("Detail: <errorCode>725</errorCode>")),
expected: true,
},
{
name: "error 725 with newlines in XML",
err: fmt.Errorf("<errorCode>\n 725\n</errorCode>"),
expected: true,
},
{
name: "bare 725 without XML tag",
err: fmt.Errorf("error code 725"),
expected: false,
},
{
name: "unrelated error",
err: fmt.Errorf("connection refused"),
expected: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
assert.Equal(t, tt.expected, isPermanentLeaseRequired(tt.err))
})
}
}

View File

@@ -1,50 +0,0 @@
//go:build !js
package portforward
import (
"context"
"fmt"
"github.com/libp2p/go-nat"
log "github.com/sirupsen/logrus"
)
// discoverGateway is the function used for NAT gateway discovery.
// It can be replaced in tests to avoid real network operations.
var discoverGateway = nat.DiscoverGateway
// State is persisted only for crash recovery cleanup
type State struct {
InternalPort uint16 `json:"internal_port,omitempty"`
Protocol string `json:"protocol,omitempty"`
}
func (s *State) Name() string {
return "port_forward_state"
}
// Cleanup implements statemanager.CleanableState for crash recovery
func (s *State) Cleanup() error {
if s.InternalPort == 0 {
return nil
}
log.Infof("cleaning up stale port mapping for port %d", s.InternalPort)
ctx, cancel := context.WithTimeout(context.Background(), discoveryTimeout)
defer cancel()
gateway, err := discoverGateway(ctx)
if err != nil {
// Discovery failure is not an error - gateway may not exist
log.Debugf("cleanup: no gateway found: %v", err)
return nil
}
if err := gateway.DeletePortMapping(ctx, s.Protocol, int(s.InternalPort)); err != nil {
return fmt.Errorf("delete port mapping: %w", err)
}
return nil
}

View File

@@ -10,10 +10,6 @@ import (
)
// registerStates registers all states that need crash recovery cleanup.
// Note: portforward.State is intentionally NOT registered here to avoid blocking startup
// for up to 10 seconds during NAT gateway discovery when no gateway is present.
// The gateway reference cannot be persisted across restarts, so cleanup requires re-discovery.
// Port forward cleanup is handled by the Manager during normal operation instead.
func registerStates(mgr *statemanager.Manager) {
mgr.RegisterState(&dns.ShutdownState{})
mgr.RegisterState(&systemops.ShutdownState{})

View File

@@ -12,10 +12,6 @@ import (
)
// registerStates registers all states that need crash recovery cleanup.
// Note: portforward.State is intentionally NOT registered here to avoid blocking startup
// for up to 10 seconds during NAT gateway discovery when no gateway is present.
// The gateway reference cannot be persisted across restarts, so cleanup requires re-discovery.
// Port forward cleanup is handled by the Manager during normal operation instead.
func registerStates(mgr *statemanager.Manager) {
mgr.RegisterState(&dns.ShutdownState{})
mgr.RegisterState(&systemops.ShutdownState{})

View File

@@ -43,18 +43,24 @@ func GetInfo(ctx context.Context) *Info {
systemHostname, _ := os.Hostname()
addrs, err := networkAddresses()
if err != nil {
log.Warnf("failed to discover network addresses: %s", err)
}
return &Info{
GoOS: runtime.GOOS,
Kernel: osInfo[0],
Platform: runtime.GOARCH,
OS: osName,
OSVersion: osVersion,
Hostname: extractDeviceName(ctx, systemHostname),
CPUs: runtime.NumCPU(),
NetbirdVersion: version.NetbirdVersion(),
UIVersion: extractUserAgent(ctx),
KernelVersion: osInfo[1],
Environment: env,
GoOS: runtime.GOOS,
Kernel: osInfo[0],
Platform: runtime.GOARCH,
OS: osName,
OSVersion: osVersion,
Hostname: extractDeviceName(ctx, systemHostname),
CPUs: runtime.NumCPU(),
NetbirdVersion: version.NetbirdVersion(),
UIVersion: extractUserAgent(ctx),
KernelVersion: osInfo[1],
NetworkAddresses: addrs,
Environment: env,
}
}

View File

@@ -24,9 +24,10 @@ import (
// Initial state for the debug collection
type debugInitialState struct {
wasDown bool
logLevel proto.LogLevel
isLevelTrace bool
wasDown bool
needsRestoreUp bool
logLevel proto.LogLevel
isLevelTrace bool
}
// Debug collection parameters
@@ -371,46 +372,51 @@ func (s *serviceClient) configureServiceForDebug(
conn proto.DaemonServiceClient,
state *debugInitialState,
enablePersistence bool,
) error {
) {
if state.wasDown {
if _, err := conn.Up(s.ctx, &proto.UpRequest{}); err != nil {
return fmt.Errorf("bring service up: %v", err)
log.Warnf("failed to bring service up: %v", err)
} else {
log.Info("Service brought up for debug")
time.Sleep(time.Second * 10)
}
log.Info("Service brought up for debug")
time.Sleep(time.Second * 10)
}
if !state.isLevelTrace {
if _, err := conn.SetLogLevel(s.ctx, &proto.SetLogLevelRequest{Level: proto.LogLevel_TRACE}); err != nil {
return fmt.Errorf("set log level to TRACE: %v", err)
log.Warnf("failed to set log level to TRACE: %v", err)
} else {
log.Info("Log level set to TRACE for debug")
}
log.Info("Log level set to TRACE for debug")
}
if _, err := conn.Down(s.ctx, &proto.DownRequest{}); err != nil {
return fmt.Errorf("bring service down: %v", err)
log.Warnf("failed to bring service down: %v", err)
} else {
state.needsRestoreUp = !state.wasDown
time.Sleep(time.Second)
}
time.Sleep(time.Second)
if enablePersistence {
if _, err := conn.SetSyncResponsePersistence(s.ctx, &proto.SetSyncResponsePersistenceRequest{
Enabled: true,
}); err != nil {
return fmt.Errorf("enable sync response persistence: %v", err)
log.Warnf("failed to enable sync response persistence: %v", err)
} else {
log.Info("Sync response persistence enabled for debug")
}
log.Info("Sync response persistence enabled for debug")
}
if _, err := conn.Up(s.ctx, &proto.UpRequest{}); err != nil {
return fmt.Errorf("bring service back up: %v", err)
log.Warnf("failed to bring service back up: %v", err)
} else {
state.needsRestoreUp = false
time.Sleep(time.Second * 3)
}
time.Sleep(time.Second * 3)
if _, err := conn.StartCPUProfile(s.ctx, &proto.StartCPUProfileRequest{}); err != nil {
log.Warnf("failed to start CPU profiling: %v", err)
}
return nil
}
func (s *serviceClient) collectDebugData(
@@ -424,9 +430,7 @@ func (s *serviceClient) collectDebugData(
var wg sync.WaitGroup
startProgressTracker(ctx, &wg, params.duration, progress)
if err := s.configureServiceForDebug(conn, state, params.enablePersistence); err != nil {
return err
}
s.configureServiceForDebug(conn, state, params.enablePersistence)
wg.Wait()
progress.progressBar.Hide()
@@ -482,9 +486,17 @@ func (s *serviceClient) createDebugBundleFromCollection(
// Restore service to original state
func (s *serviceClient) restoreServiceState(conn proto.DaemonServiceClient, state *debugInitialState) {
if state.needsRestoreUp {
if _, err := conn.Up(s.ctx, &proto.UpRequest{}); err != nil {
log.Warnf("failed to restore up state: %v", err)
} else {
log.Info("Service state restored to up")
}
}
if state.wasDown {
if _, err := conn.Down(s.ctx, &proto.DownRequest{}); err != nil {
log.Errorf("Failed to restore down state: %v", err)
log.Warnf("failed to restore down state: %v", err)
} else {
log.Info("Service state restored to down")
}
@@ -492,7 +504,7 @@ func (s *serviceClient) restoreServiceState(conn proto.DaemonServiceClient, stat
if !state.isLevelTrace {
if _, err := conn.SetLogLevel(s.ctx, &proto.SetLogLevelRequest{Level: state.logLevel}); err != nil {
log.Errorf("Failed to restore log level: %v", err)
log.Warnf("failed to restore log level: %v", err)
} else {
log.Info("Log level restored to original setting")
}

2
go.mod
View File

@@ -86,6 +86,7 @@ require (
github.com/pires/go-proxyproto v0.11.0
github.com/pkg/sftp v1.13.9
github.com/prometheus/client_golang v1.23.2
github.com/prometheus/otlptranslator v1.0.0
github.com/quic-go/quic-go v0.55.0
github.com/redis/go-redis/v9 v9.7.3
github.com/rs/xid v1.3.0
@@ -254,7 +255,6 @@ require (
github.com/power-devops/perfstat v0.0.0-20240221224432-82ca36839d55 // indirect
github.com/prometheus/client_model v0.6.2 // indirect
github.com/prometheus/common v0.67.5 // indirect
github.com/prometheus/otlptranslator v1.0.0 // indirect
github.com/prometheus/procfs v0.19.2 // indirect
github.com/russellhaering/goxmldsig v1.5.0 // indirect
github.com/rymdport/portal v0.4.2 // indirect

View File

@@ -2099,6 +2099,7 @@ func (s *SqlStore) getServices(ctx context.Context, accountID string) ([]*rpserv
var createdAt, certIssuedAt sql.NullTime
var status, proxyCluster, sessionPrivateKey, sessionPublicKey sql.NullString
var mode, source, sourcePeer sql.NullString
var terminated sql.NullBool
err := row.Scan(
&s.ID,
&s.AccountID,
@@ -2119,7 +2120,7 @@ func (s *SqlStore) getServices(ctx context.Context, accountID string) ([]*rpserv
&s.PortAutoAssigned,
&source,
&sourcePeer,
&s.Terminated,
&terminated,
)
if err != nil {
return nil, err
@@ -2160,7 +2161,9 @@ func (s *SqlStore) getServices(ctx context.Context, accountID string) ([]*rpserv
if sourcePeer.Valid {
s.SourcePeer = sourcePeer.String
}
if terminated.Valid {
s.Terminated = terminated.Bool
}
s.Targets = []*rpservice.Target{}
return &s, nil
})

View File

@@ -10,6 +10,7 @@ import (
"github.com/gorilla/mux"
prometheus2 "github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promhttp"
"github.com/prometheus/otlptranslator"
log "github.com/sirupsen/logrus"
"go.opentelemetry.io/otel/exporters/prometheus"
metric2 "go.opentelemetry.io/otel/metric"
@@ -206,7 +207,9 @@ func (appMetrics *defaultAppMetrics) GetMeter() metric2.Meter {
// NewDefaultAppMetrics and expose them via defaultEndpoint on a given HTTP port
func NewDefaultAppMetrics(ctx context.Context) (AppMetrics, error) {
exporter, err := prometheus.New()
exporter, err := prometheus.New(
prometheus.WithTranslationStrategy(otlptranslator.UnderscoreEscapingWithoutSuffixes),
)
if err != nil {
return nil, fmt.Errorf("failed to create prometheus exporter: %w", err)
}

View File

@@ -0,0 +1,217 @@
package types_test
import (
"context"
"fmt"
"os"
"testing"
nbdns "github.com/netbirdio/netbird/dns"
"github.com/netbirdio/netbird/management/server/types"
)
type benchmarkScale struct {
name string
peers int
groups int
}
var defaultScales = []benchmarkScale{
{"100peers_5groups", 100, 5},
{"500peers_20groups", 500, 20},
{"1000peers_50groups", 1000, 50},
{"5000peers_100groups", 5000, 100},
{"10000peers_200groups", 10000, 200},
{"20000peers_200groups", 20000, 200},
{"30000peers_300groups", 30000, 300},
}
func skipCIBenchmark(b *testing.B) {
if os.Getenv("CI") == "true" {
b.Skip("Skipping benchmark in CI")
}
}
// ──────────────────────────────────────────────────────────────────────────────
// Single Peer Network Map Generation
// ──────────────────────────────────────────────────────────────────────────────
// BenchmarkNetworkMapGeneration_Components benchmarks the components-based approach for a single peer.
func BenchmarkNetworkMapGeneration_Components(b *testing.B) {
skipCIBenchmark(b)
for _, scale := range defaultScales {
b.Run(scale.name, func(b *testing.B) {
account, validatedPeers := scalableTestAccount(scale.peers, scale.groups)
ctx := context.Background()
resourcePolicies := account.GetResourcePoliciesMap()
routers := account.GetResourceRoutersMap()
groupIDToUserIDs := account.GetActiveGroupUsers()
b.ReportAllocs()
b.ResetTimer()
for range b.N {
_ = account.GetPeerNetworkMapFromComponents(ctx, "peer-0", nbdns.CustomZone{}, nil, validatedPeers, resourcePolicies, routers, nil, groupIDToUserIDs)
}
})
}
}
// ──────────────────────────────────────────────────────────────────────────────
// All Peers (UpdateAccountPeers hot path)
// ──────────────────────────────────────────────────────────────────────────────
// BenchmarkNetworkMapGeneration_AllPeers benchmarks generating network maps for ALL peers.
func BenchmarkNetworkMapGeneration_AllPeers(b *testing.B) {
skipCIBenchmark(b)
scales := []benchmarkScale{
{"100peers_5groups", 100, 5},
{"500peers_20groups", 500, 20},
{"1000peers_50groups", 1000, 50},
{"5000peers_100groups", 5000, 100},
}
for _, scale := range scales {
account, validatedPeers := scalableTestAccount(scale.peers, scale.groups)
ctx := context.Background()
peerIDs := make([]string, 0, len(account.Peers))
for peerID := range account.Peers {
peerIDs = append(peerIDs, peerID)
}
b.Run("components/"+scale.name, func(b *testing.B) {
resourcePolicies := account.GetResourcePoliciesMap()
routers := account.GetResourceRoutersMap()
groupIDToUserIDs := account.GetActiveGroupUsers()
b.ReportAllocs()
b.ResetTimer()
for range b.N {
for _, peerID := range peerIDs {
_ = account.GetPeerNetworkMapFromComponents(ctx, peerID, nbdns.CustomZone{}, nil, validatedPeers, resourcePolicies, routers, nil, groupIDToUserIDs)
}
}
})
}
}
// ──────────────────────────────────────────────────────────────────────────────
// Sub-operations
// ──────────────────────────────────────────────────────────────────────────────
// BenchmarkNetworkMapGeneration_ComponentsCreation benchmarks components extraction.
func BenchmarkNetworkMapGeneration_ComponentsCreation(b *testing.B) {
skipCIBenchmark(b)
for _, scale := range defaultScales {
b.Run(scale.name, func(b *testing.B) {
account, validatedPeers := scalableTestAccount(scale.peers, scale.groups)
ctx := context.Background()
resourcePolicies := account.GetResourcePoliciesMap()
routers := account.GetResourceRoutersMap()
groupIDToUserIDs := account.GetActiveGroupUsers()
b.ReportAllocs()
b.ResetTimer()
for range b.N {
_ = account.GetPeerNetworkMapComponents(ctx, "peer-0", nbdns.CustomZone{}, nil, validatedPeers, resourcePolicies, routers, groupIDToUserIDs)
}
})
}
}
// BenchmarkNetworkMapGeneration_ComponentsCalculation benchmarks calculation from pre-built components.
func BenchmarkNetworkMapGeneration_ComponentsCalculation(b *testing.B) {
skipCIBenchmark(b)
for _, scale := range defaultScales {
b.Run(scale.name, func(b *testing.B) {
account, validatedPeers := scalableTestAccount(scale.peers, scale.groups)
ctx := context.Background()
resourcePolicies := account.GetResourcePoliciesMap()
routers := account.GetResourceRoutersMap()
groupIDToUserIDs := account.GetActiveGroupUsers()
components := account.GetPeerNetworkMapComponents(ctx, "peer-0", nbdns.CustomZone{}, nil, validatedPeers, resourcePolicies, routers, groupIDToUserIDs)
b.ReportAllocs()
b.ResetTimer()
for range b.N {
_ = types.CalculateNetworkMapFromComponents(ctx, components)
}
})
}
}
// BenchmarkNetworkMapGeneration_PrecomputeMaps benchmarks precomputed map costs.
func BenchmarkNetworkMapGeneration_PrecomputeMaps(b *testing.B) {
skipCIBenchmark(b)
for _, scale := range defaultScales {
b.Run("ResourcePoliciesMap/"+scale.name, func(b *testing.B) {
account, _ := scalableTestAccount(scale.peers, scale.groups)
b.ReportAllocs()
b.ResetTimer()
for range b.N {
_ = account.GetResourcePoliciesMap()
}
})
b.Run("ResourceRoutersMap/"+scale.name, func(b *testing.B) {
account, _ := scalableTestAccount(scale.peers, scale.groups)
b.ReportAllocs()
b.ResetTimer()
for range b.N {
_ = account.GetResourceRoutersMap()
}
})
b.Run("ActiveGroupUsers/"+scale.name, func(b *testing.B) {
account, _ := scalableTestAccount(scale.peers, scale.groups)
b.ReportAllocs()
b.ResetTimer()
for range b.N {
_ = account.GetActiveGroupUsers()
}
})
}
}
// ──────────────────────────────────────────────────────────────────────────────
// Scaling Analysis
// ──────────────────────────────────────────────────────────────────────────────
// BenchmarkNetworkMapGeneration_GroupScaling tests group count impact on performance.
func BenchmarkNetworkMapGeneration_GroupScaling(b *testing.B) {
skipCIBenchmark(b)
groupCounts := []int{1, 5, 20, 50, 100, 200, 500}
for _, numGroups := range groupCounts {
b.Run(fmt.Sprintf("components_%dgroups", numGroups), func(b *testing.B) {
account, validatedPeers := scalableTestAccount(1000, numGroups)
ctx := context.Background()
resourcePolicies := account.GetResourcePoliciesMap()
routers := account.GetResourceRoutersMap()
groupIDToUserIDs := account.GetActiveGroupUsers()
b.ReportAllocs()
b.ResetTimer()
for range b.N {
_ = account.GetPeerNetworkMapFromComponents(ctx, "peer-0", nbdns.CustomZone{}, nil, validatedPeers, resourcePolicies, routers, nil, groupIDToUserIDs)
}
})
}
}
// BenchmarkNetworkMapGeneration_PeerScaling tests peer count impact on performance.
func BenchmarkNetworkMapGeneration_PeerScaling(b *testing.B) {
skipCIBenchmark(b)
peerCounts := []int{50, 100, 500, 1000, 2000, 5000, 10000, 20000, 30000}
for _, numPeers := range peerCounts {
numGroups := numPeers / 20
if numGroups < 1 {
numGroups = 1
}
b.Run(fmt.Sprintf("components_%dpeers", numPeers), func(b *testing.B) {
account, validatedPeers := scalableTestAccount(numPeers, numGroups)
ctx := context.Background()
resourcePolicies := account.GetResourcePoliciesMap()
routers := account.GetResourceRoutersMap()
groupIDToUserIDs := account.GetActiveGroupUsers()
b.ReportAllocs()
b.ResetTimer()
for range b.N {
_ = account.GetPeerNetworkMapFromComponents(ctx, "peer-0", nbdns.CustomZone{}, nil, validatedPeers, resourcePolicies, routers, nil, groupIDToUserIDs)
}
})
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -8,6 +8,7 @@ import (
"testing"
"github.com/google/go-cmp/cmp"
"github.com/prometheus/otlptranslator"
"go.opentelemetry.io/otel/exporters/prometheus"
"go.opentelemetry.io/otel/sdk/metric"
@@ -51,7 +52,9 @@ func TestMetrics_RoundTripper(t *testing.T) {
},
}
exporter, err := prometheus.New()
exporter, err := prometheus.New(
prometheus.WithTranslationStrategy(otlptranslator.UnderscoreEscapingWithoutSuffixes),
)
if err != nil {
t.Fatalf("create prometheus exporter: %v", err)
}

View File

@@ -27,6 +27,7 @@ import (
"github.com/pires/go-proxyproto"
prometheus2 "github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promhttp"
"github.com/prometheus/otlptranslator"
log "github.com/sirupsen/logrus"
"go.opentelemetry.io/otel/exporters/prometheus"
"go.opentelemetry.io/otel/sdk/metric"
@@ -236,7 +237,9 @@ func (s *Server) ListenAndServe(ctx context.Context, addr string) (err error) {
s.svcPorts = make(map[types.ServiceID][]uint16)
s.lastMappings = make(map[types.ServiceID]*proto.ProxyMapping)
exporter, err := prometheus.New()
exporter, err := prometheus.New(
prometheus.WithTranslationStrategy(otlptranslator.UnderscoreEscapingWithoutSuffixes),
)
if err != nil {
return fmt.Errorf("create prometheus exporter: %w", err)
}

View File

@@ -8,6 +8,7 @@ import (
prometheus2 "github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promhttp"
"github.com/prometheus/otlptranslator"
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/exporters/prometheus"
api "go.opentelemetry.io/otel/metric"
@@ -27,7 +28,9 @@ type Metrics struct {
// NewServer initializes and returns a new Metrics instance
func NewServer(port int, endpoint string) (*Metrics, error) {
exporter, err := prometheus.New()
exporter, err := prometheus.New(
prometheus.WithTranslationStrategy(otlptranslator.UnderscoreEscapingWithoutSuffixes),
)
if err != nil {
return nil, err
}