Skip to content

Commit

Permalink
Pre-installation testing framework (#6278)
Browse files Browse the repository at this point in the history
This PR enables running sanity checks on a K8s cluster before
Antrea is installed.
The command to run the checks is `antctl check cluster`.
At the moment we run 4 different sanity checks.

Fixes #6153

Signed-off-by: Kanha gupta <[email protected]>
  • Loading branch information
kanha-gupta authored May 14, 2024
1 parent 6a7c606 commit 3eb4d44
Show file tree
Hide file tree
Showing 9 changed files with 599 additions and 127 deletions.
15 changes: 9 additions & 6 deletions .github/workflows/kind.yml
Original file line number Diff line number Diff line change
Expand Up @@ -744,8 +744,8 @@ jobs:
path: log.tar.gz
retention-days: 30

run-post-installation-checks:
name: Test connectivity using 'antctl check' command
run-installation-checks:
name: Test installation using 'antctl check' command
needs: [ build-antrea-coverage-image ]
runs-on: [ ubuntu-latest ]
steps:
Expand Down Expand Up @@ -775,13 +775,16 @@ jobs:
- name: Create Kind Cluster
run: |
./ci/kind/kind-setup.sh create kind --ip-family dual
- name: Deploy Antrea
run: |
kubectl apply -f build/yamls/antrea.yml
- name: Build antctl binary
run: |
make antctl-linux
- name: Run antctl command
- name: Run Pre checks
run: |
./bin/antctl-linux check cluster
- name: Deploy Antrea
run: |
kubectl apply -f build/yamls/antrea.yml
- name: Run Post checks
run: |
./bin/antctl-linux check installation
Expand Down
7 changes: 7 additions & 0 deletions pkg/antctl/antctl.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (

agentapis "antrea.io/antrea/pkg/agent/apis"
fallbackversion "antrea.io/antrea/pkg/antctl/fallback/version"
checkcluster "antrea.io/antrea/pkg/antctl/raw/check/cluster"
checkinstallation "antrea.io/antrea/pkg/antctl/raw/check/installation"
"antrea.io/antrea/pkg/antctl/raw/featuregates"
"antrea.io/antrea/pkg/antctl/raw/multicluster"
Expand Down Expand Up @@ -640,6 +641,12 @@ $ antctl get podmulticaststats pod -n namespace`,
supportController: false,
commandGroup: check,
},
{
cobraCommand: checkcluster.Command(),
supportAgent: false,
supportController: false,
commandGroup: check,
},
{
cobraCommand: supportbundle.Command,
supportAgent: true,
Expand Down
236 changes: 236 additions & 0 deletions pkg/antctl/raw/check/cluster/command.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,236 @@
// Copyright 2024 Antrea Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package cluster

import (
"context"
"errors"
"fmt"
"os"
"time"

"github.com/fatih/color"
"github.com/spf13/cobra"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/rest"
"k8s.io/utils/ptr"

"antrea.io/antrea/pkg/antctl/raw/check"
"antrea.io/antrea/pkg/version"
)

func Command() *cobra.Command {
command := &cobra.Command{
Use: "cluster",
Short: "Runs pre installation checks",
RunE: func(cmd *cobra.Command, args []string) error {
return Run()
},
}
return command
}

const (
testNamespacePrefix = "antrea-test"
deploymentName = "cluster-checker"
podReadyTimeout = 1 * time.Minute
)

type uncertainError struct {
reason string
}

func (e uncertainError) Error() string {
return e.reason
}

func newUncertainError(reason string, a ...interface{}) uncertainError {
return uncertainError{reason: fmt.Sprintf(reason, a...)}
}

type Test interface {
Run(ctx context.Context, testContext *testContext) error
}

var testsRegistry = make(map[string]Test)

func RegisterTest(name string, test Test) {
testsRegistry[name] = test
}

type testContext struct {
client kubernetes.Interface
config *rest.Config
clusterName string
namespace string
testPod *corev1.Pod
}

func Run() error {
client, config, clusterName, err := check.NewClient()
if err != nil {
return fmt.Errorf("unable to create Kubernetes client: %s", err)
}
ctx := context.Background()
testContext := NewTestContext(client, config, clusterName)
if err := testContext.setup(ctx); err != nil {
return err
}
var numSuccess, numFailure, numUncertain int
for name, test := range testsRegistry {
testContext.Header("Running test: %s", name)
if err := test.Run(ctx, testContext); err != nil {
if errors.As(err, new(uncertainError)) {
testContext.Warning("Test %s was uncertain: %v", name, err)
numUncertain++
} else {
testContext.Fail("Test %s failed: %v", name, err)
numFailure++
}
} else {
testContext.Success("Test %s passed", name)
numSuccess++
}
}
testContext.Log("Test finished: %v tests succeeded, %v tests failed, %v tests were uncertain", numSuccess, numFailure, numUncertain)
check.Teardown(ctx, testContext.client, testContext.clusterName, testContext.namespace)
if numFailure > 0 {
return fmt.Errorf("%v/%v tests failed", numFailure, len(testsRegistry))
}
return nil
}

func (t *testContext) setup(ctx context.Context) error {
t.Log("Creating Namespace %s for pre installation tests...", t.namespace)
_, err := t.client.CoreV1().Namespaces().Create(ctx, &corev1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: t.namespace}}, metav1.CreateOptions{})
if err != nil {
return fmt.Errorf("unable to create Namespace %s: %s", t.namespace, err)
}
deployment := check.NewDeployment(check.DeploymentParameters{
Name: deploymentName,
Image: getAntreaAgentImage(),
Replicas: 1,
Command: []string{"bash", "-c"},
Args: []string{"trap 'exit 0' SIGTERM; sleep infinity & pid=$!; wait $pid"},
Labels: map[string]string{"app": "antrea", "component": "cluster-checker"},
HostNetwork: true,
VolumeMounts: []corev1.VolumeMount{
{Name: "cni-conf", MountPath: "/etc/cni/net.d"},
{Name: "lib-modules", MountPath: "/lib/modules"},
},
Tolerations: []corev1.Toleration{
{
Key: "node-role.kubernetes.io/control-plane",
Operator: "Exists",
Effect: "NoSchedule",
},
{
Key: "node-role.kubernetes.io/master",
Operator: "Exists",
Effect: "NoSchedule",
},
{
Key: "node.kubernetes.io/not-ready",
Operator: "Exists",
Effect: "NoSchedule",
},
},
Volumes: []corev1.Volume{
{
Name: "cni-conf",
VolumeSource: corev1.VolumeSource{
HostPath: &corev1.HostPathVolumeSource{
Path: "/etc/cni/net.d",
},
},
},
{
Name: "lib-modules",
VolumeSource: corev1.VolumeSource{
HostPath: &corev1.HostPathVolumeSource{
Path: "/lib/modules",
Type: ptr.To(corev1.HostPathType("Directory")),
},
},
},
},
NodeSelector: map[string]string{
"kubernetes.io/os": "linux",
},
SecurityContext: &corev1.SecurityContext{
Capabilities: &corev1.Capabilities{
Add: []corev1.Capability{"SYS_MODULE"},
},
},
})

t.Log("Creating Deployment")
_, err = t.client.AppsV1().Deployments(t.namespace).Create(ctx, deployment, metav1.CreateOptions{})
if err != nil {
return fmt.Errorf("unable to create Deployment: %w", err)
}

t.Log("Waiting for Deployment to become ready")
err = check.WaitForDeploymentsReady(ctx, time.Second, podReadyTimeout, t.client, t.clusterName, t.namespace, deploymentName)
if err != nil {
return fmt.Errorf("error while waiting for Deployment to become ready: %w", err)
}
testPods, err := t.client.CoreV1().Pods(t.namespace).List(ctx, metav1.ListOptions{LabelSelector: "component=cluster-checker"})
if err != nil {
return fmt.Errorf("no pod found for test Deployment")
}
t.testPod = &testPods.Items[0]
return nil
}

func getAntreaAgentImage() string {
if version.ReleaseStatus == "released" {
return fmt.Sprintf("antrea/antrea-agent-ubuntu:%s", version.GetVersion())
}
return "antrea/antrea-agent-ubuntu:latest"
}

func NewTestContext(client kubernetes.Interface, config *rest.Config, clusterName string) *testContext {
return &testContext{
client: client,
config: config,
clusterName: clusterName,
namespace: check.GenerateRandomNamespace(testNamespacePrefix),
}
}

func (t *testContext) Log(format string, a ...interface{}) {
fmt.Fprintf(os.Stdout, fmt.Sprintf("[%s] ", t.clusterName)+format+"\n", a...)
}

func (t *testContext) Success(format string, a ...interface{}) {
fmt.Fprintf(os.Stdout, fmt.Sprintf("[%s] ", t.clusterName)+color.GreenString(format, a...)+"\n")
}

func (t *testContext) Fail(format string, a ...interface{}) {
fmt.Fprintf(os.Stdout, fmt.Sprintf("[%s] ", t.clusterName)+color.RedString(format, a...)+"\n")
}

func (t *testContext) Warning(format string, a ...interface{}) {
fmt.Fprintf(os.Stdout, fmt.Sprintf("[%s] ", t.clusterName)+color.YellowString(format, a...)+"\n")
}

func (t *testContext) Header(format string, a ...interface{}) {
t.Log("-------------------------------------------------------------------------------------------")
t.Log(format, a...)
t.Log("-------------------------------------------------------------------------------------------")
}
52 changes: 52 additions & 0 deletions pkg/antctl/raw/check/cluster/test_checkcniexistence.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
// Copyright 2024 Antrea Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package cluster

import (
"context"
"fmt"
"sort"
"strings"

"antrea.io/antrea/pkg/antctl/raw/check"
)

type checkCNIExistence struct{}

func init() {
RegisterTest("check-cni-existence", &checkCNIExistence{})
}

func (t *checkCNIExistence) Run(ctx context.Context, testContext *testContext) error {
command := []string{"ls", "-1", "/etc/cni/net.d"}
output, _, err := check.ExecInPod(ctx, testContext.client, testContext.config, testContext.namespace, testContext.testPod.Name, "", command)
if err != nil {
return fmt.Errorf("failed to execute command in Pod %s, error: %w", testContext.testPod.Name, err)
}
files := strings.Fields(output)
if len(files) == 0 {
testContext.Log("No files present in /etc/cni/net.d in Node %s", testContext.testPod.Spec.NodeName)
return nil
}
sort.Strings(files)
if files[0] < "10-antrea.conflist" {
return newUncertainError("another CNI configuration file with higher priority than Antrea's CNI configuration file found: %s; this may be expected if networkPolicyOnly mode is enabled", files[0])
} else if files[0] != "10-antrea.conflist" {
testContext.Log("Another CNI configuration file found: %s with Antrea having higher precedence", files[0])
} else {
testContext.Log("Antrea's CNI configuration file already present: %s", files[0])
}
return nil
}
49 changes: 49 additions & 0 deletions pkg/antctl/raw/check/cluster/test_checkcontrolplaneavailability.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
// Copyright 2024 Antrea Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package cluster

import (
"context"
"fmt"

metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/sets"
)

type checkControlPlaneAvailability struct{}

func init() {
RegisterTest("check-control-plane-nodes-availability", &checkControlPlaneAvailability{})
}

func (t *checkControlPlaneAvailability) Run(ctx context.Context, testContext *testContext) error {
controlPlaneNodes := sets.New[string]()
controlPlaneLabels := []string{"node-role.kubernetes.io/control-plane", "node-role.kubernetes.io/master"}
for _, label := range controlPlaneLabels {
nodes, err := testContext.client.CoreV1().Nodes().List(ctx, metav1.ListOptions{LabelSelector: label})
if err != nil {
return fmt.Errorf("failed to list Nodes with label %s: %w", label, err)
}
for idx := range nodes.Items {
controlPlaneNodes.Insert(nodes.Items[idx].Name)
}
}
if controlPlaneNodes.Len() == 0 {
return newUncertainError("no control-plane Nodes were found; if installing Antrea in encap mode, some K8s functionalities (API aggregation, apiserver proxy, admission controllers) may be impacted")
} else {
testContext.Log("control-plane Nodes were found in the cluster.")
}
return nil
}
Loading

0 comments on commit 3eb4d44

Please sign in to comment.