[client] Add health check flag to status command and expose daemon status in output (#5650)

This commit is contained in:
Viktor Liu
2026-03-22 19:39:40 +08:00
committed by GitHub
parent b550a2face
commit 82762280ee
9 changed files with 198 additions and 77 deletions

View File

@@ -17,8 +17,8 @@ ENV \
NETBIRD_BIN="/usr/local/bin/netbird" \
NB_LOG_FILE="console,/var/log/netbird/client.log" \
NB_DAEMON_ADDR="unix:///var/run/netbird.sock" \
NB_ENTRYPOINT_SERVICE_TIMEOUT="5" \
NB_ENTRYPOINT_LOGIN_TIMEOUT="5"
NB_ENTRYPOINT_SERVICE_TIMEOUT="30" \
NB_ENTRYPOINT_LOGIN_TIMEOUT="30"
ENTRYPOINT [ "/usr/local/bin/netbird-entrypoint.sh" ]

View File

@@ -23,8 +23,8 @@ ENV \
NB_DAEMON_ADDR="unix:///var/lib/netbird/netbird.sock" \
NB_LOG_FILE="console,/var/lib/netbird/client.log" \
NB_DISABLE_DNS="true" \
NB_ENTRYPOINT_SERVICE_TIMEOUT="5" \
NB_ENTRYPOINT_LOGIN_TIMEOUT="1"
NB_ENTRYPOINT_SERVICE_TIMEOUT="30" \
NB_ENTRYPOINT_LOGIN_TIMEOUT="30"
ENTRYPOINT [ "/usr/local/bin/netbird-entrypoint.sh" ]

View File

@@ -28,6 +28,7 @@ var (
ipsFilterMap map[string]struct{}
prefixNamesFilterMap map[string]struct{}
connectionTypeFilter string
checkFlag string
)
var statusCmd = &cobra.Command{
@@ -49,6 +50,7 @@ func init() {
statusCmd.PersistentFlags().StringSliceVar(&prefixNamesFilter, "filter-by-names", []string{}, "filters the detailed output by a list of one or more peer FQDN or hostnames, e.g., --filter-by-names peer-a,peer-b.netbird.cloud")
statusCmd.PersistentFlags().StringVar(&statusFilter, "filter-by-status", "", "filters the detailed output by connection status(idle|connecting|connected), e.g., --filter-by-status connected")
statusCmd.PersistentFlags().StringVar(&connectionTypeFilter, "filter-by-connection-type", "", "filters the detailed output by connection type (P2P|Relayed), e.g., --filter-by-connection-type P2P")
statusCmd.PersistentFlags().StringVar(&checkFlag, "check", "", "run a health check and exit with code 0 on success, 1 on failure (live|ready|startup)")
}
func statusFunc(cmd *cobra.Command, args []string) error {
@@ -56,6 +58,10 @@ func statusFunc(cmd *cobra.Command, args []string) error {
cmd.SetOut(cmd.OutOrStdout())
if checkFlag != "" {
return runHealthCheck(cmd)
}
err := parseFilters()
if err != nil {
return err
@@ -68,15 +74,17 @@ func statusFunc(cmd *cobra.Command, args []string) error {
ctx := internal.CtxInitState(cmd.Context())
resp, err := getStatus(ctx, false)
resp, err := getStatus(ctx, true, false)
if err != nil {
return err
}
status := resp.GetStatus()
if status == string(internal.StatusNeedsLogin) || status == string(internal.StatusLoginFailed) ||
status == string(internal.StatusSessionExpired) {
needsAuth := status == string(internal.StatusNeedsLogin) || status == string(internal.StatusLoginFailed) ||
status == string(internal.StatusSessionExpired)
if needsAuth && !jsonFlag && !yamlFlag {
cmd.Printf("Daemon status: %s\n\n"+
"Run UP command to log in with SSO (interactive login):\n\n"+
" netbird up \n\n"+
@@ -99,7 +107,17 @@ func statusFunc(cmd *cobra.Command, args []string) error {
profName = activeProf.Name
}
var outputInformationHolder = nbstatus.ConvertToStatusOutputOverview(resp.GetFullStatus(), anonymizeFlag, resp.GetDaemonVersion(), statusFilter, prefixNamesFilter, prefixNamesFilterMap, ipsFilterMap, connectionTypeFilter, profName)
var outputInformationHolder = nbstatus.ConvertToStatusOutputOverview(resp.GetFullStatus(), nbstatus.ConvertOptions{
Anonymize: anonymizeFlag,
DaemonVersion: resp.GetDaemonVersion(),
DaemonStatus: nbstatus.ParseDaemonStatus(status),
StatusFilter: statusFilter,
PrefixNamesFilter: prefixNamesFilter,
PrefixNamesFilterMap: prefixNamesFilterMap,
IPsFilter: ipsFilterMap,
ConnectionTypeFilter: connectionTypeFilter,
ProfileName: profName,
})
var statusOutputString string
switch {
case detailFlag:
@@ -121,7 +139,7 @@ func statusFunc(cmd *cobra.Command, args []string) error {
return nil
}
func getStatus(ctx context.Context, shouldRunProbes bool) (*proto.StatusResponse, error) {
func getStatus(ctx context.Context, fullPeerStatus bool, shouldRunProbes bool) (*proto.StatusResponse, error) {
conn, err := DialClientGRPCServer(ctx, daemonAddr)
if err != nil {
//nolint
@@ -131,7 +149,7 @@ func getStatus(ctx context.Context, shouldRunProbes bool) (*proto.StatusResponse
}
defer conn.Close()
resp, err := proto.NewDaemonServiceClient(conn).Status(ctx, &proto.StatusRequest{GetFullPeerStatus: true, ShouldRunProbes: shouldRunProbes})
resp, err := proto.NewDaemonServiceClient(conn).Status(ctx, &proto.StatusRequest{GetFullPeerStatus: fullPeerStatus, ShouldRunProbes: shouldRunProbes})
if err != nil {
return nil, fmt.Errorf("status failed: %v", status.Convert(err).Message())
}
@@ -185,6 +203,83 @@ func enableDetailFlagWhenFilterFlag() {
}
}
func runHealthCheck(cmd *cobra.Command) error {
check := strings.ToLower(checkFlag)
switch check {
case "live", "ready", "startup":
default:
return fmt.Errorf("unknown check %q, must be one of: live, ready, startup", checkFlag)
}
if err := util.InitLog(logLevel, util.LogConsole); err != nil {
return fmt.Errorf("init log: %w", err)
}
ctx := internal.CtxInitState(cmd.Context())
isStartup := check == "startup"
resp, err := getStatus(ctx, isStartup, isStartup)
if err != nil {
return err
}
switch check {
case "live":
return nil
case "ready":
return checkReadiness(resp)
case "startup":
return checkStartup(resp)
default:
return nil
}
}
func checkReadiness(resp *proto.StatusResponse) error {
daemonStatus := internal.StatusType(resp.GetStatus())
switch daemonStatus {
case internal.StatusIdle, internal.StatusConnecting, internal.StatusConnected:
return nil
case internal.StatusNeedsLogin, internal.StatusLoginFailed, internal.StatusSessionExpired:
return fmt.Errorf("readiness check: daemon status is %s", daemonStatus)
default:
return fmt.Errorf("readiness check: unexpected daemon status %q", daemonStatus)
}
}
func checkStartup(resp *proto.StatusResponse) error {
fullStatus := resp.GetFullStatus()
if fullStatus == nil {
return fmt.Errorf("startup check: no full status available")
}
if !fullStatus.GetManagementState().GetConnected() {
return fmt.Errorf("startup check: management not connected")
}
if !fullStatus.GetSignalState().GetConnected() {
return fmt.Errorf("startup check: signal not connected")
}
var relayCount, relaysConnected int
for _, r := range fullStatus.GetRelays() {
uri := r.GetURI()
if !strings.HasPrefix(uri, "rel://") && !strings.HasPrefix(uri, "rels://") {
continue
}
relayCount++
if r.GetAvailable() {
relaysConnected++
}
}
if relayCount > 0 && relaysConnected == 0 {
return fmt.Errorf("startup check: no relay servers available (0/%d connected)", relayCount)
}
return nil
}
func parseInterfaceIP(interfaceIP string) string {
ip, _, err := net.ParseCIDR(interfaceIP)
if err != nil {

View File

@@ -31,7 +31,6 @@ import (
nbstatus "github.com/netbirdio/netbird/client/status"
mgmProto "github.com/netbirdio/netbird/shared/management/proto"
"github.com/netbirdio/netbird/util"
"github.com/netbirdio/netbird/version"
)
const readmeContent = `Netbird debug bundle
@@ -418,7 +417,10 @@ func (g *BundleGenerator) addStatus() error {
fullStatus := g.statusRecorder.GetFullStatus()
protoFullStatus := nbstatus.ToProtoFullStatus(fullStatus)
protoFullStatus.Events = g.statusRecorder.GetEventHistory()
overview := nbstatus.ConvertToStatusOutputOverview(protoFullStatus, g.anonymize, version.NetbirdVersion(), "", nil, nil, nil, "", profName)
overview := nbstatus.ConvertToStatusOutputOverview(protoFullStatus, nbstatus.ConvertOptions{
Anonymize: g.anonymize,
ProfileName: profName,
})
statusOutput := overview.FullDetailSummary()
statusReader := strings.NewReader(statusOutput)

View File

@@ -1,12 +1,11 @@
#!/usr/bin/env bash
set -eEuo pipefail
: ${NB_ENTRYPOINT_SERVICE_TIMEOUT:="5"}
: ${NB_ENTRYPOINT_LOGIN_TIMEOUT:="5"}
: ${NB_ENTRYPOINT_SERVICE_TIMEOUT:="30"}
: ${NB_ENTRYPOINT_LOGIN_TIMEOUT:="30"}
NETBIRD_BIN="${NETBIRD_BIN:-"netbird"}"
export NB_LOG_FILE="${NB_LOG_FILE:-"console,/var/log/netbird/client.log"}"
service_pids=()
log_file_path=""
_log() {
# mimic Go logger's output for easier parsing
@@ -33,60 +32,50 @@ on_exit() {
fi
}
wait_for_message() {
local timeout="${1}" message="${2}"
if test "${timeout}" -eq 0; then
info "not waiting for log line ${message@Q} due to zero timeout."
elif test -n "${log_file_path}"; then
info "waiting for log line ${message@Q} for ${timeout} seconds..."
grep -E -q "${message}" <(timeout "${timeout}" tail -F "${log_file_path}" 2>/dev/null)
else
info "log file unsupported, sleeping for ${timeout} seconds..."
sleep "${timeout}"
fi
}
locate_log_file() {
local log_files_string="${1}"
while read -r log_file; do
case "${log_file}" in
console | syslog) ;;
*)
log_file_path="${log_file}"
return
;;
esac
done < <(sed 's#,#\n#g' <<<"${log_files_string}")
warn "log files parsing for ${log_files_string@Q} is not supported by debug bundles"
warn "please consider removing the \$NB_LOG_FILE or setting it to real file, before gathering debug bundles."
}
wait_for_daemon_startup() {
local timeout="${1}"
if test -n "${log_file_path}"; then
if ! wait_for_message "${timeout}" "started daemon server"; then
warn "log line containing 'started daemon server' not found after ${timeout} seconds"
warn "daemon failed to start, exiting..."
exit 1
fi
else
warn "daemon service startup not discovered, sleeping ${timeout} instead"
sleep "${timeout}"
if [[ "${timeout}" -eq 0 ]]; then
info "not waiting for daemon startup due to zero timeout."
return
fi
local deadline=$((SECONDS + timeout))
while [[ "${SECONDS}" -lt "${deadline}" ]]; do
if "${NETBIRD_BIN}" status --check live 2>/dev/null; then
return
fi
sleep 1
done
warn "daemon did not become responsive after ${timeout} seconds, exiting..."
exit 1
}
login_if_needed() {
local timeout="${1}"
if test -n "${log_file_path}" && wait_for_message "${timeout}" 'peer has been successfully registered|management connection state READY'; then
if "${NETBIRD_BIN}" status --check ready 2>/dev/null; then
info "already logged in, skipping 'netbird up'..."
else
return
fi
if [[ "${timeout}" -eq 0 ]]; then
info "logging in..."
"${NETBIRD_BIN}" up
return
fi
local deadline=$((SECONDS + timeout))
while [[ "${SECONDS}" -lt "${deadline}" ]]; do
if "${NETBIRD_BIN}" status --check ready 2>/dev/null; then
info "already logged in, skipping 'netbird up'..."
return
fi
sleep 1
done
info "logging in..."
"${NETBIRD_BIN}" up
}
main() {
@@ -95,7 +84,6 @@ main() {
service_pids+=("$!")
info "registered new service process 'netbird service run', currently running: ${service_pids[@]@Q}"
locate_log_file "${NB_LOG_FILE}"
wait_for_daemon_startup "${NB_ENTRYPOINT_SERVICE_TIMEOUT}"
login_if_needed "${NB_ENTRYPOINT_LOGIN_TIMEOUT}"

View File

@@ -25,6 +25,38 @@ import (
"github.com/netbirdio/netbird/version"
)
// DaemonStatus represents the current state of the NetBird daemon.
// These values mirror internal.StatusType but are defined here to avoid an import cycle.
type DaemonStatus string
const (
DaemonStatusIdle DaemonStatus = "Idle"
DaemonStatusConnecting DaemonStatus = "Connecting"
DaemonStatusConnected DaemonStatus = "Connected"
DaemonStatusNeedsLogin DaemonStatus = "NeedsLogin"
DaemonStatusLoginFailed DaemonStatus = "LoginFailed"
DaemonStatusSessionExpired DaemonStatus = "SessionExpired"
)
// ParseDaemonStatus converts a raw status string to DaemonStatus.
// Unrecognized values are preserved as-is to remain visible during version skew.
func ParseDaemonStatus(s string) DaemonStatus {
return DaemonStatus(s)
}
// ConvertOptions holds parameters for ConvertToStatusOutputOverview.
type ConvertOptions struct {
Anonymize bool
DaemonVersion string
DaemonStatus DaemonStatus
StatusFilter string
PrefixNamesFilter []string
PrefixNamesFilterMap map[string]struct{}
IPsFilter map[string]struct{}
ConnectionTypeFilter string
ProfileName string
}
type PeerStateDetailOutput struct {
FQDN string `json:"fqdn" yaml:"fqdn"`
IP string `json:"netbirdIp" yaml:"netbirdIp"`
@@ -102,6 +134,7 @@ type OutputOverview struct {
Peers PeersStateOutput `json:"peers" yaml:"peers"`
CliVersion string `json:"cliVersion" yaml:"cliVersion"`
DaemonVersion string `json:"daemonVersion" yaml:"daemonVersion"`
DaemonStatus DaemonStatus `json:"daemonStatus" yaml:"daemonStatus"`
ManagementState ManagementStateOutput `json:"management" yaml:"management"`
SignalState SignalStateOutput `json:"signal" yaml:"signal"`
Relays RelayStateOutput `json:"relays" yaml:"relays"`
@@ -120,7 +153,8 @@ type OutputOverview struct {
SSHServerState SSHServerStateOutput `json:"sshServer" yaml:"sshServer"`
}
func ConvertToStatusOutputOverview(pbFullStatus *proto.FullStatus, anon bool, daemonVersion string, statusFilter string, prefixNamesFilter []string, prefixNamesFilterMap map[string]struct{}, ipsFilter map[string]struct{}, connectionTypeFilter string, profName string) OutputOverview {
// ConvertToStatusOutputOverview converts protobuf status to the output overview.
func ConvertToStatusOutputOverview(pbFullStatus *proto.FullStatus, opts ConvertOptions) OutputOverview {
managementState := pbFullStatus.GetManagementState()
managementOverview := ManagementStateOutput{
URL: managementState.GetURL(),
@@ -137,12 +171,13 @@ func ConvertToStatusOutputOverview(pbFullStatus *proto.FullStatus, anon bool, da
relayOverview := mapRelays(pbFullStatus.GetRelays())
sshServerOverview := mapSSHServer(pbFullStatus.GetSshServerState())
peersOverview := mapPeers(pbFullStatus.GetPeers(), statusFilter, prefixNamesFilter, prefixNamesFilterMap, ipsFilter, connectionTypeFilter)
peersOverview := mapPeers(pbFullStatus.GetPeers(), opts.StatusFilter, opts.PrefixNamesFilter, opts.PrefixNamesFilterMap, opts.IPsFilter, opts.ConnectionTypeFilter)
overview := OutputOverview{
Peers: peersOverview,
CliVersion: version.NetbirdVersion(),
DaemonVersion: daemonVersion,
DaemonVersion: opts.DaemonVersion,
DaemonStatus: opts.DaemonStatus,
ManagementState: managementOverview,
SignalState: signalOverview,
Relays: relayOverview,
@@ -157,11 +192,11 @@ func ConvertToStatusOutputOverview(pbFullStatus *proto.FullStatus, anon bool, da
NSServerGroups: mapNSGroups(pbFullStatus.GetDnsServers()),
Events: mapEvents(pbFullStatus.GetEvents()),
LazyConnectionEnabled: pbFullStatus.GetLazyConnectionEnabled(),
ProfileName: profName,
ProfileName: opts.ProfileName,
SSHServerState: sshServerOverview,
}
if anon {
if opts.Anonymize {
anonymizer := anonymize.NewAnonymizer(anonymize.DefaultAddresses())
anonymizeOverview(anonymizer, &overview)
}

View File

@@ -176,6 +176,7 @@ var overview = OutputOverview{
Events: []SystemEventOutput{},
CliVersion: version.NetbirdVersion(),
DaemonVersion: "0.14.1",
DaemonStatus: DaemonStatusConnected,
ManagementState: ManagementStateOutput{
URL: "my-awesome-management.com:443",
Connected: true,
@@ -238,7 +239,10 @@ var overview = OutputOverview{
}
func TestConversionFromFullStatusToOutputOverview(t *testing.T) {
convertedResult := ConvertToStatusOutputOverview(resp.GetFullStatus(), false, resp.GetDaemonVersion(), "", nil, nil, nil, "", "")
convertedResult := ConvertToStatusOutputOverview(resp.GetFullStatus(), ConvertOptions{
DaemonVersion: resp.GetDaemonVersion(),
DaemonStatus: ParseDaemonStatus(resp.GetStatus()),
})
assert.Equal(t, overview, convertedResult)
}
@@ -329,6 +333,7 @@ func TestParsingToJSON(t *testing.T) {
},
"cliVersion": "development",
"daemonVersion": "0.14.1",
"daemonStatus": "Connected",
"management": {
"url": "my-awesome-management.com:443",
"connected": true,
@@ -452,6 +457,7 @@ func TestParsingToYAML(t *testing.T) {
networks: []
cliVersion: development
daemonVersion: 0.14.1
daemonStatus: Connected
management:
url: my-awesome-management.com:443
connected: true

View File

@@ -18,7 +18,6 @@ import (
"github.com/netbirdio/netbird/client/wasm/internal/rdp"
"github.com/netbirdio/netbird/client/wasm/internal/ssh"
"github.com/netbirdio/netbird/util"
"github.com/netbirdio/netbird/version"
)
const (
@@ -350,7 +349,7 @@ func getStatusOverview(client *netbird.Client) (nbstatus.OutputOverview, error)
pbFullStatus := fullStatus.ToProto()
return nbstatus.ConvertToStatusOutputOverview(pbFullStatus, false, version.NetbirdVersion(), "", nil, nil, nil, "", ""), nil
return nbstatus.ConvertToStatusOutputOverview(pbFullStatus, nbstatus.ConvertOptions{}), nil
}
// createStatusMethod creates the status method that returns JSON