From b4fa16b2fdfc26d52eb4700c7045dd3df5e3df01 Mon Sep 17 00:00:00 2001 From: Brian Gates Date: Tue, 3 Feb 2026 21:12:26 -0500 Subject: [PATCH] fix(influxunifi): use CelsiusSafe() for temp fields to fix InfluxDB type conflict (#944) (#945) * fix(influxunifi): use CelsiusSafe() for temp fields to fix InfluxDB type conflict Write temp_* fields as float64 instead of int64 so InfluxDB does not report 'field type conflict' when the measurement already has float. Requires github.com/unpoller/unifi/v5 with CelsiusSafe() (unpoller/unifi#195). Fixes #944. Co-authored-by: Cursor * deps: unifi v5.17.0; nil guards and 429 retry (unpoller#943) - Bump github.com/unpoller/unifi/v5 to v5.17.0 (CelsiusSafe, ErrNilUnifi, RateLimitError) - inputunifi: guard pollController for nil c.Unifi; controllerID(c) in formatSites/Clients/Devices - inputunifi: getUnifi retry with backoff on 429 (up to 5 attempts, Retry-After or exponential backoff) Co-authored-by: Cursor * test(influxunifi): expect temp_* as float after CelsiusSafe() (fix #944) Co-authored-by: Cursor --------- Co-authored-by: Cursor --- go.mod | 2 +- go.sum | 4 +- .../integration_test_expectations.yaml | 50 +++++++++---------- pkg/influxunifi/udm.go | 2 +- pkg/inputunifi/collector.go | 4 ++ pkg/inputunifi/input.go | 49 ++++++++++++++---- pkg/inputunifi/updateweb.go | 26 +++++++--- 7 files changed, 91 insertions(+), 46 deletions(-) diff --git a/go.mod b/go.mod index c5ac80d5..8209e523 100644 --- a/go.mod +++ b/go.mod @@ -12,7 +12,7 @@ require ( github.com/prometheus/common v0.67.5 github.com/spf13/pflag v1.0.10 github.com/stretchr/testify v1.11.1 - github.com/unpoller/unifi/v5 v5.16.0 + github.com/unpoller/unifi/v5 v5.17.0 golang.org/x/crypto v0.47.0 golang.org/x/term v0.39.0 golift.io/cnfg v0.2.3 diff --git a/go.sum b/go.sum index 49f7194e..410042ea 100644 --- a/go.sum +++ b/go.sum @@ -77,8 +77,8 @@ github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U= github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= -github.com/unpoller/unifi/v5 v5.16.0 h1:FowfkJ7wbMoySFcqOJG2IJH9pOGTUnPpKNNG9vHl2/I= -github.com/unpoller/unifi/v5 v5.16.0/go.mod h1:vSIXIclPG9dpKxUp+pavfgENHWaTZXvDg7F036R1YCo= +github.com/unpoller/unifi/v5 v5.17.0 h1:e2yES/35+/Ddd6BsXOjXRhsO663uqI99PKleS9plF/w= +github.com/unpoller/unifi/v5 v5.17.0/go.mod h1:vSIXIclPG9dpKxUp+pavfgENHWaTZXvDg7F036R1YCo= github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k= go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= diff --git a/pkg/influxunifi/integration_test_expectations.yaml b/pkg/influxunifi/integration_test_expectations.yaml index 4e840773..901791fd 100644 --- a/pkg/influxunifi/integration_test_expectations.yaml +++ b/pkg/influxunifi/integration_test_expectations.yaml @@ -115,11 +115,11 @@ points: stat_tx_retries: float state: float system_uptime: float - temp_cpu: int - temp_memory: int - temp_network: int - temp_probe: int - temp_sys: int + temp_cpu: float + temp_memory: float + temp_network: float + temp_probe: float + temp_sys: float total_max_power: float tx_bytes: float upgradeable: bool @@ -275,11 +275,11 @@ points: stat_user-tx_retries: float state: string system_uptime: float - temp_cpu: int - temp_memory: int - temp_network: int - temp_probe: int - temp_sys: int + temp_cpu: float + temp_memory: float + temp_network: float + temp_probe: float + temp_sys: float tx_bytes: float upgradeable: bool uptime: float @@ -520,11 +520,11 @@ points: stat_wifi_tx_dropped: float state: float system_uptime: float - temp_cpu: int - temp_memory: int - temp_network: int - temp_probe: int - temp_sys: int + temp_cpu: float + temp_memory: float + temp_network: float + temp_probe: float + temp_sys: float tx_bytes: float uplink_latency: float uplink_max_speed: float @@ -573,11 +573,11 @@ points: stat_tx_retries: float state: float system_uptime: float - temp_cpu: int - temp_memory: int - temp_network: int - temp_probe: int - temp_sys: int + temp_cpu: float + temp_memory: float + temp_network: float + temp_probe: float + temp_sys: float tx_bytes: float uptime: float version: string @@ -769,11 +769,11 @@ points: storage_foo_size: float storage_foo_used: float system_uptime: float - temp_cpu: int - temp_memory: int - temp_network: int - temp_probe: int - temp_sys: int + temp_cpu: float + temp_memory: float + temp_network: float + temp_probe: float + temp_sys: float tx_bytes: float upgradeable: bool uplink_latency: float diff --git a/pkg/influxunifi/udm.go b/pkg/influxunifi/udm.go index 3feaee3e..37433456 100644 --- a/pkg/influxunifi/udm.go +++ b/pkg/influxunifi/udm.go @@ -41,7 +41,7 @@ func (u *InfluxUnifi) batchSysStats(s unifi.SysStats, ss unifi.SystemStats) map[ } for k, v := range ss.Temps { - temp := v.CelsiusInt64() + temp := v.CelsiusSafe() if temp != 0 && k != "" { m["temp_"+sanitizeName(k)] = temp diff --git a/pkg/inputunifi/collector.go b/pkg/inputunifi/collector.go index 68e48489..139df4f3 100644 --- a/pkg/inputunifi/collector.go +++ b/pkg/inputunifi/collector.go @@ -100,6 +100,10 @@ func (u *InputUnifi) pollController(c *Controller) (*poller.Metrics, error) { u.RLock() defer u.RUnlock() + if c.Unifi == nil { + return nil, fmt.Errorf("controller client is nil (e.g. after 429 or auth failure): %s", c.URL) + } + u.LogDebugf("Polling controller: %s (%s)", c.URL, c.ID) // Get the sites we care about. diff --git a/pkg/inputunifi/input.go b/pkg/inputunifi/input.go index cd04c126..a6a364f3 100644 --- a/pkg/inputunifi/input.go +++ b/pkg/inputunifi/input.go @@ -3,6 +3,7 @@ package inputunifi import ( + "errors" "fmt" "os" "strings" @@ -123,8 +124,11 @@ func (c *Controller) getCerts() ([][]byte, error) { return b, nil } +const maxAuthRetries = 5 + // getUnifi (re-)authenticates to a unifi controller. // If certificate files are provided, they are re-read. +// On 429 Too Many Requests, retries with exponential backoff (and Retry-After when present) up to maxAuthRetries. func (u *InputUnifi) getUnifi(c *Controller) error { u.Lock() defer u.Unlock() @@ -138,8 +142,7 @@ func (u *InputUnifi) getUnifi(c *Controller) error { return err } - // Create an authenticated session to the Unifi Controller. - c.Unifi, err = unifi.NewUnifi(&unifi.Config{ + cfg := &unifi.Config{ User: c.User, Pass: c.Pass, APIKey: c.APIKey, @@ -147,18 +150,42 @@ func (u *InputUnifi) getUnifi(c *Controller) error { SSLCert: certs, VerifySSL: *c.VerifySSL, Timeout: c.Timeout.Duration, - ErrorLog: u.LogErrorf, // Log all errors. - DebugLog: u.LogDebugf, // Log debug messages. - }) - if err != nil { - c.Unifi = nil - - return fmt.Errorf("unifi controller: %w", err) + ErrorLog: u.LogErrorf, + DebugLog: u.LogDebugf, } - u.LogDebugf("Authenticated with controller successfully, %s", c.URL) + var lastErr error + backoff := 30 * time.Second - return nil + for attempt := 0; attempt < maxAuthRetries; attempt++ { + c.Unifi, lastErr = unifi.NewUnifi(cfg) + if lastErr == nil { + u.LogDebugf("Authenticated with controller successfully, %s", c.URL) + return nil + } + + if !errors.Is(lastErr, unifi.ErrTooManyRequests) { + c.Unifi = nil + return fmt.Errorf("unifi controller: %w", lastErr) + } + + var rl *unifi.RateLimitError + if errors.As(lastErr, &rl) && rl.RetryAfter > 0 { + backoff = rl.RetryAfter + } + + if attempt < maxAuthRetries-1 { + u.Logf("Controller %s returned 429 Too Many Requests; waiting %v before retry (%d/%d)", + c.URL, backoff, attempt+1, maxAuthRetries) + time.Sleep(backoff) + if backoff < 5*time.Minute { + backoff = backoff * 2 + } + } + } + + c.Unifi = nil + return fmt.Errorf("unifi controller: %w (gave up after %d retries)", lastErr, maxAuthRetries) } // checkSites makes sure the list of provided sites exists on the controller. diff --git a/pkg/inputunifi/updateweb.go b/pkg/inputunifi/updateweb.go index c124161c..60145037 100644 --- a/pkg/inputunifi/updateweb.go +++ b/pkg/inputunifi/updateweb.go @@ -11,6 +11,16 @@ import ( /* This code reformats our data to be displayed on the built-in web interface. */ +// controllerID returns the controller UUID for display, or "" if the client is nil (e.g. after 429 re-auth failure). +// Avoids SIGSEGV when updateWeb runs while c.Unifi is nil (see unpoller/unpoller#943). +func controllerID(c *Controller) string { + if c == nil || c.Unifi == nil { + return "" + } + + return c.Unifi.UUID +} + func updateWeb(c *Controller, metrics *Metrics) { webserver.UpdateInput(&webserver.Input{ Name: PluginName, // Forgetting this leads to 3 hours of head scratching. @@ -65,13 +75,15 @@ func formatControllers(controllers []*Controller) []*Controller { } func formatSites(c *Controller, sites []*unifi.Site) (s webserver.Sites) { + id := controllerID(c) + for _, site := range sites { s = append(s, &webserver.Site{ ID: site.ID, Name: site.Name, Desc: site.Desc, Source: site.SourceName, - Controller: c.Unifi.UUID, + Controller: id, }) } @@ -97,7 +109,7 @@ func formatClients(c *Controller, clients []*unifi.Client) (d webserver.Clients) Name: client.Name, SiteID: client.SiteID, Source: client.SourceName, - Controller: c.Unifi.UUID, + Controller: controllerID(c), MAC: client.Mac, IP: client.IP, Type: clientType, @@ -117,12 +129,14 @@ func formatDevices(c *Controller, devices *unifi.Devices) (d webserver.Devices) return d } + id := controllerID(c) + for _, device := range devices.UAPs { d = append(d, &webserver.Device{ Name: device.Name, SiteID: device.SiteID, Source: device.SourceName, - Controller: c.Unifi.UUID, + Controller: id, MAC: device.Mac, IP: device.IP, Type: device.Type, @@ -139,7 +153,7 @@ func formatDevices(c *Controller, devices *unifi.Devices) (d webserver.Devices) Name: device.Name, SiteID: device.SiteID, Source: device.SourceName, - Controller: c.Unifi.UUID, + Controller: id, MAC: device.Mac, IP: device.IP, Type: device.Type, @@ -156,7 +170,7 @@ func formatDevices(c *Controller, devices *unifi.Devices) (d webserver.Devices) Name: device.Name, SiteID: device.SiteID, Source: device.SourceName, - Controller: c.Unifi.UUID, + Controller: id, MAC: device.Mac, IP: device.IP, Type: device.Type, @@ -173,7 +187,7 @@ func formatDevices(c *Controller, devices *unifi.Devices) (d webserver.Devices) Name: device.Name, SiteID: device.SiteID, Source: device.SourceName, - Controller: c.Unifi.UUID, + Controller: id, MAC: device.Mac, IP: device.IP, Type: device.Type,