Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions cmd/healthchecker/options/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ type HealthCheckerOptions struct {
// AddFlags adds health checker command line options to pflag.
func (hco *HealthCheckerOptions) AddFlags(fs *pflag.FlagSet) {
fs.StringVar(&hco.Component, "component", types.KubeletComponent,
"The component to check health for. Supports kubelet, docker and cri")
"The component to check health for. Supports kubelet, docker, kube-proxy, and cri")
// Deprecated: For backward compatibility on linux environment. Going forward "service" will be used instead of systemd-service
if runtime.GOOS == "linux" {
fs.MarkDeprecated("systemd-service", "please use --service flag instead")
Expand All @@ -73,8 +73,9 @@ func (hco *HealthCheckerOptions) AddFlags(fs *pflag.FlagSet) {
// Returns error if invalid, nil otherwise.
func (hco *HealthCheckerOptions) IsValid() error {
// Make sure the component specified is valid.
if hco.Component != types.KubeletComponent && hco.Component != types.DockerComponent && hco.Component != types.CRIComponent {
return fmt.Errorf("the component specified is not supported. Supported components are : <kubelet/docker/cri>")
if hco.Component != types.KubeletComponent && hco.Component != types.DockerComponent &&
hco.Component != types.CRIComponent && hco.Component != types.KubeProxyComponent {
return fmt.Errorf("the component specified is not supported. Supported components are : <kubelet/docker/cri/kube-proxy>")
}
// Make sure the service is specified if repair is enabled.
if hco.EnableRepair && hco.Service == "" {
Expand Down
34 changes: 34 additions & 0 deletions config/windows-health-checker-kubeproxy.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
{
"plugin": "custom",
"pluginConfig": {
"invoke_interval": "10s",
"timeout": "3m",
"max_output_length": 80,
"concurrency": 1
},
"source": "health-checker",
"metricsReporting": true,
"conditions": [
{
"type": "KubeProxyUnhealthy",
"reason": "KubeProxyIsHealthy",
"message": "kube-proxy on the node is functioning properly"
}
],
"rules": [
{
"type": "permanent",
"condition": "KubeProxyUnhealthy",
"reason": "KubeProxyUnhealthy",
"path": "C:\\etc\\kubernetes\\node\\bin\\health-checker.exe",
"args": [
"--component=kube-proxy",
"--enable-repair=true",
"--cooldown-time=1m",
"--health-check-timeout=10s"
],
"timeout": "3m"
}
]
}

23 changes: 15 additions & 8 deletions pkg/healthchecker/health_checker_windows.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,14 +68,9 @@ func getRepairFunc(hco *options.HealthCheckerOptions) func() {
func getHealthCheckFunc(hco *options.HealthCheckerOptions) func() (bool, error) {
switch hco.Component {
case types.KubeletComponent:
return func() (bool, error) {
httpClient := http.Client{Timeout: hco.HealthCheckTimeout}
response, err := httpClient.Get(types.KubeletHealthCheckEndpoint)
if err != nil || response.StatusCode != http.StatusOK {
return false, nil
}
return true, nil
}
return healthCheckEndpointOKFunc(types.KubeletHealthCheckEndpoint, hco.HealthCheckTimeout)
case types.KubeProxyComponent:
return healthCheckEndpointOKFunc(types.KubeProxyHealthCheckEndpoint, hco.HealthCheckTimeout)
case types.DockerComponent:
return func() (bool, error) {
if _, err := execCommand("docker.exe", "ps"); err != nil {
Expand All @@ -94,6 +89,18 @@ func getHealthCheckFunc(hco *options.HealthCheckerOptions) func() (bool, error)
return nil
}

// healthCheckEndpointOKFunc returns a function to check the status of an http endpoint
func healthCheckEndpointOKFunc(endpoint string, timeout time.Duration) func() (bool, error) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems like a missed opportunity to not make this generic health check through config. IE remove the hardcoded addresses and pass that in as a json argument.

/cc @Random-Liu what do you think?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@Random-Liu what do you think?

Yeah, we thought about introducing the Kubernetes like "healthiness probe": Exec probe + http probe.

The only problem is that the health checker is already a plugin of NPD, and now we need another layer of plugin. :P

Some design is needed here.

Please feel free to suggest ideas or propose a design for this, but that doesn't necessarily need to block this change.

return func() (bool, error) {
httpClient := http.Client{Timeout: timeout}
response, err := httpClient.Get(endpoint)
if err != nil || response.StatusCode != http.StatusOK {
return false, nil
}
return true, nil
}
}

// execCommand creates a new process, executes the command, and returns the (output, error) from command.
func execCommand(command string, args ...string) (string, error) {
cmd := util.Exec(command, args...)
Expand Down
12 changes: 7 additions & 5 deletions pkg/healthchecker/types/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,14 @@ const (
CmdTimeout = 10 * time.Second
LogParsingTimeLayout = "2006-01-02 15:04:05"

KubeletComponent = "kubelet"
CRIComponent = "cri"
DockerComponent = "docker"
ContainerdService = "containerd"
KubeletComponent = "kubelet"
CRIComponent = "cri"
DockerComponent = "docker"
ContainerdService = "containerd"
KubeProxyComponent = "kube-proxy"

KubeletHealthCheckEndpoint = "http://127.0.0.1:10248/healthz"
KubeletHealthCheckEndpoint = "http://127.0.0.1:10248/healthz"
KubeProxyHealthCheckEndpoint = "http://127.0.0.1:10256/healthz"

LogPatternFlagSeparator = ":"
)
Expand Down