mirror of
https://github.com/netbirdio/netbird.git
synced 2026-03-31 06:24:18 -04:00
Add health-check agent recognition to avoid error logs (#4917)
Health-check connections now send a properly formatted auth message with a well-known peer ID instead of immediately closing. The server recognizes this peer ID and handles the connection gracefully with a debug log instead of error logs.
This commit is contained in:
31
relay/healthcheck/peerid/peerid.go
Normal file
31
relay/healthcheck/peerid/peerid.go
Normal file
@@ -0,0 +1,31 @@
|
||||
package peerid
|
||||
|
||||
import (
|
||||
"crypto/sha256"
|
||||
|
||||
v2 "github.com/netbirdio/netbird/shared/relay/auth/hmac/v2"
|
||||
"github.com/netbirdio/netbird/shared/relay/messages"
|
||||
)
|
||||
|
||||
var (
|
||||
// HealthCheckPeerID is the hashed peer ID for health check connections
|
||||
HealthCheckPeerID = messages.HashID("healthcheck-agent")
|
||||
|
||||
// DummyAuthToken is a structurally valid auth token for health check.
|
||||
// The signature is not valid but the format is correct (1 byte algo + 32 bytes signature + payload).
|
||||
DummyAuthToken = createDummyToken()
|
||||
)
|
||||
|
||||
func createDummyToken() []byte {
|
||||
token := v2.Token{
|
||||
AuthAlgo: v2.AuthAlgoHMACSHA256,
|
||||
Signature: make([]byte, sha256.Size),
|
||||
Payload: []byte("healthcheck"),
|
||||
}
|
||||
return token.Marshal()
|
||||
}
|
||||
|
||||
// IsHealthCheck checks if the given peer ID is the health check agent
|
||||
func IsHealthCheck(peerID *messages.PeerID) bool {
|
||||
return peerID != nil && *peerID == HealthCheckPeerID
|
||||
}
|
||||
@@ -7,8 +7,10 @@ import (
|
||||
|
||||
"github.com/coder/websocket"
|
||||
|
||||
"github.com/netbirdio/netbird/relay/healthcheck/peerid"
|
||||
"github.com/netbirdio/netbird/relay/server"
|
||||
"github.com/netbirdio/netbird/shared/relay"
|
||||
"github.com/netbirdio/netbird/shared/relay/messages"
|
||||
)
|
||||
|
||||
func dialWS(ctx context.Context, address url.URL) error {
|
||||
@@ -30,7 +32,18 @@ func dialWS(ctx context.Context, address url.URL) error {
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to connect to websocket: %w", err)
|
||||
}
|
||||
defer func() {
|
||||
_ = conn.CloseNow()
|
||||
}()
|
||||
|
||||
authMsg, err := messages.MarshalAuthMsg(peerid.HealthCheckPeerID, peerid.DummyAuthToken)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to marshal auth message: %w", err)
|
||||
}
|
||||
|
||||
if err := conn.Write(ctx, websocket.MessageBinary, authMsg); err != nil {
|
||||
return fmt.Errorf("failed to write auth message: %w", err)
|
||||
}
|
||||
|
||||
_ = conn.Close(websocket.StatusNormalClosure, "availability check complete")
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -97,7 +97,7 @@ func (h *handshake) handshakeReceive() (*messages.PeerID, error) {
|
||||
return nil, fmt.Errorf("invalid message type %d from %s", msgType, h.conn.RemoteAddr())
|
||||
}
|
||||
if err != nil {
|
||||
return nil, err
|
||||
return peerID, err
|
||||
}
|
||||
h.peerID = peerID
|
||||
return peerID, nil
|
||||
@@ -147,7 +147,7 @@ func (h *handshake) handleAuthMsg(buf []byte) (*messages.PeerID, error) {
|
||||
}
|
||||
|
||||
if err := h.validator.Validate(authPayload); err != nil {
|
||||
return nil, fmt.Errorf("validate %s (%s): %w", rawPeerID.String(), h.conn.RemoteAddr(), err)
|
||||
return rawPeerID, fmt.Errorf("validate %s (%s): %w", rawPeerID.String(), h.conn.RemoteAddr(), err)
|
||||
}
|
||||
|
||||
return rawPeerID, nil
|
||||
|
||||
@@ -12,6 +12,7 @@ import (
|
||||
"go.opentelemetry.io/otel"
|
||||
"go.opentelemetry.io/otel/metric"
|
||||
|
||||
"github.com/netbirdio/netbird/relay/healthcheck/peerid"
|
||||
//nolint:staticcheck
|
||||
"github.com/netbirdio/netbird/relay/metrics"
|
||||
"github.com/netbirdio/netbird/relay/server/store"
|
||||
@@ -123,7 +124,11 @@ func (r *Relay) Accept(conn net.Conn) {
|
||||
}
|
||||
peerID, err := h.handshakeReceive()
|
||||
if err != nil {
|
||||
log.Errorf("failed to handshake: %s", err)
|
||||
if peerid.IsHealthCheck(peerID) {
|
||||
log.Debugf("health check connection from %s", conn.RemoteAddr())
|
||||
} else {
|
||||
log.Errorf("failed to handshake: %s", err)
|
||||
}
|
||||
if cErr := conn.Close(); cErr != nil {
|
||||
log.Errorf("failed to close connection, %s: %s", conn.RemoteAddr(), cErr)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user