Log something better when UP doesn't find service grpc socket

This commit is contained in:
riccardom
2026-06-18 14:39:55 +02:00
parent 9b179be324
commit aff5da2c8e
2 changed files with 39 additions and 4 deletions

View File

@@ -20,6 +20,7 @@ import (
"github.com/spf13/cobra"
"github.com/spf13/pflag"
"google.golang.org/grpc"
"google.golang.org/grpc/connectivity"
"google.golang.org/grpc/credentials/insecure"
daddr "github.com/netbirdio/netbird/client/internal/daemonaddr"
@@ -260,17 +261,46 @@ func FlagNameToEnvVar(cmdFlag string, prefix string) string {
return prefix + upper
}
// DialClientGRPCServer returns client connection to the daemon server.
// DialClientGRPCServer returns client connection to the daemon server. It waits
// (up to the timeout) for the daemon to become reachable so an `up` issued right
// after `service start` tolerates the startup race. Instead of grpc's blocking
// dial — whose raw "transport failed" retry warnings are silenced by the logger
// config — we drive the wait ourselves and emit one clean line per failed attempt.
func DialClientGRPCServer(ctx context.Context, addr string) (*grpc.ClientConn, error) {
ctx, cancel := context.WithTimeout(ctx, time.Second*10)
defer cancel()
return grpc.DialContext(
conn, err := grpc.DialContext(
ctx,
strings.TrimPrefix(addr, "tcp://"),
grpc.WithTransportCredentials(insecure.NewCredentials()),
grpc.WithBlock(),
)
if err != nil {
return nil, err
}
conn.Connect()
for {
state := conn.GetState()
if state == connectivity.Ready {
return conn, nil
}
// Log only once the connection has actually failed — not during the
// brief Idle/Connecting phase on a healthy daemon (avoids a spurious
// line + wait when the daemon is already up).
if state == connectivity.TransientFailure {
log.Infof("waiting for the netbird daemon to become available at %s...", addr)
}
// Wake on the next state change, but at least every second so a stuck
// TransientFailure re-logs at a steady cadence until the timeout.
waitCtx, waitCancel := context.WithTimeout(ctx, time.Second)
conn.WaitForStateChange(waitCtx, state)
waitCancel()
if ctx.Err() != nil {
_ = conn.Close()
return nil, fmt.Errorf("daemon not reachable at %s: %w", addr, ctx.Err())
}
}
}
// WithBackOff execute function in backoff cycle.

View File

@@ -140,7 +140,12 @@ func newRotatedOutput(logPath string) io.Writer {
func setGRPCLibLogger(logger *log.Logger) {
logOut := logger.Writer()
if os.Getenv("GRPC_GO_LOG_SEVERITY_LEVEL") != "info" {
grpclog.SetLoggerV2(grpclog.NewLoggerV2(io.Discard, logOut, logOut))
// Discard grpc info AND warning logs by default — the warning stream is
// dominated by benign connection-retry noise ("addrConn.createTransport
// failed", "transport is closing") that surfaces e.g. when the CLI dials
// a daemon that is still starting or already gone. Errors are kept. Set
// GRPC_GO_LOG_SEVERITY_LEVEL=info to get the full verbose grpc logging.
grpclog.SetLoggerV2(grpclog.NewLoggerV2(io.Discard, io.Discard, logOut))
return
}