Allow polling to continue when individual controllers fail

Fixes #425

When polling multiple controllers, if one controller was down or
unreachable, unpoller would stop collecting data from ALL controllers.
This caused complete data loss across all sites when just one was down.

Root Cause:
Both Metrics() and Events() methods would immediately return an error
when any controller failed, skipping all remaining controllers in the
loop.

Changes:
- Log errors from failed controllers but continue to next controller
- Track collection errors separately from successful data collection
- Only return error if ALL controllers failed and no data was collected
- Return success if at least one controller provided data

This allows unpoller to continue monitoring healthy controllers even
when some are temporarily unreachable due to network issues, timeouts,
or maintenance.

Example behavior:
- Controller 1: Down (timeout) - logs error, continues
- Controller 2: Up - collects data successfully
- Controller 3: Up - collects data successfully
- Result: Returns data from controllers 2 and 3

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
Cody Lee
2026-01-25 12:01:37 -06:00
parent 4e3c823989
commit f51a0c7202

View File

@@ -200,6 +200,8 @@ func (u *InputUnifi) Events(filter *poller.Filter) (*poller.Events, error) {
filter = &poller.Filter{}
}
var collectionErrors []error
for _, c := range u.Controllers {
if filter.Path != "" && !strings.EqualFold(c.URL, filter.Path) {
continue
@@ -207,12 +209,21 @@ func (u *InputUnifi) Events(filter *poller.Filter) (*poller.Events, error) {
events, err := u.collectControllerEvents(c)
if err != nil {
return nil, err
// Log error but continue to next controller
u.LogErrorf("Failed to collect events from controller %s: %v", c.URL, err)
collectionErrors = append(collectionErrors, fmt.Errorf("%s: %w", c.URL, err))
continue
}
logs = append(logs, events...)
}
// Return collected events even if some controllers failed
// Only return error if ALL controllers failed and we have no events
if len(logs) == 0 && len(collectionErrors) > 0 {
return nil, collectionErrors[0]
}
return &poller.Events{Logs: logs}, nil
}
@@ -229,6 +240,8 @@ func (u *InputUnifi) Metrics(filter *poller.Filter) (*poller.Metrics, error) {
filter = &poller.Filter{}
}
var collectionErrors []error
// Check if the request is for an existing, configured controller (or all controllers)
for _, c := range u.Controllers {
if filter.Path != "" && !strings.EqualFold(c.URL, filter.Path) {
@@ -238,12 +251,25 @@ func (u *InputUnifi) Metrics(filter *poller.Filter) (*poller.Metrics, error) {
m, err := u.collectController(c)
if err != nil {
return metrics, err
// Log error but continue to next controller
u.LogErrorf("Failed to collect metrics from controller %s: %v", c.URL, err)
collectionErrors = append(collectionErrors, fmt.Errorf("%s: %w", c.URL, err))
continue
}
metrics = poller.AppendMetrics(metrics, m)
}
// If we collected data from at least one controller, return success
if len(metrics.Devices) > 0 || len(metrics.Clients) > 0 {
return metrics, nil
}
// If all controllers failed and we had errors, return the first error
if len(collectionErrors) > 0 {
return metrics, collectionErrors[0]
}
if filter.Path == "" || len(metrics.Clients) != 0 {
return metrics, nil
}