[client] add reset for management backoff (#4935)

Reset client management grpc client backoff after successful connected to management API.

Current Situation:
If the connection duration exceeds MaxElapsedTime, when the connection is interrupted, the backoff fails immediately due to timeout and does not actually perform a retry.
This commit is contained in:
Louis Li
2025-12-30 15:37:49 +08:00
committed by GitHub
parent 38f9d5ed58
commit e11970e32e

View File

@@ -111,6 +111,8 @@ func (c *GrpcClient) ready() bool {
// Sync wraps the real client's Sync endpoint call and takes care of retries and encryption/decryption of messages
// Blocking request. The result will be sent via msgHandler callback function
func (c *GrpcClient) Sync(ctx context.Context, sysInfo *system.Info, msgHandler func(msg *proto.SyncResponse) error) error {
backOff := defaultBackoff(ctx)
operation := func() error {
log.Debugf("management connection state %v", c.conn.GetState())
connState := c.conn.GetState()
@@ -128,10 +130,10 @@ func (c *GrpcClient) Sync(ctx context.Context, sysInfo *system.Info, msgHandler
return err
}
return c.handleStream(ctx, *serverPubKey, sysInfo, msgHandler)
return c.handleStream(ctx, *serverPubKey, sysInfo, msgHandler, backOff)
}
err := backoff.Retry(operation, defaultBackoff(ctx))
err := backoff.Retry(operation, backOff)
if err != nil {
log.Warnf("exiting the Management service connection retry loop due to the unrecoverable error: %s", err)
}
@@ -140,7 +142,7 @@ func (c *GrpcClient) Sync(ctx context.Context, sysInfo *system.Info, msgHandler
}
func (c *GrpcClient) handleStream(ctx context.Context, serverPubKey wgtypes.Key, sysInfo *system.Info,
msgHandler func(msg *proto.SyncResponse) error) error {
msgHandler func(msg *proto.SyncResponse) error, backOff backoff.BackOff) error {
ctx, cancelStream := context.WithCancel(ctx)
defer cancelStream()
@@ -158,6 +160,9 @@ func (c *GrpcClient) handleStream(ctx context.Context, serverPubKey wgtypes.Key,
// blocking until error
err = c.receiveEvents(stream, serverPubKey, msgHandler)
// we need this reset because after a successful connection and a consequent error, backoff lib doesn't
// reset times and next try will start with a long delay
backOff.Reset()
if err != nil {
c.notifyDisconnected(err)
s, _ := gstatus.FromError(err)