From e14ca7787e5590119118179114345bdeeea82e43 Mon Sep 17 00:00:00 2001 From: r2k1 Date: Fri, 11 Oct 2024 14:54:01 +1300 Subject: [PATCH] node-bootstrapper (#4891) Co-authored-by: Tim Wright --- .golangci.yaml | 2 +- .pipelines/e2e.yaml | 6 +- .../templates/.builder-release-template.yaml | 11 +- e2e/cluster.go | 4 +- e2e/config/azure.go | 87 +++- e2e/config/config.go | 3 + e2e/config/vhd.go | 6 +- e2e/exec.go | 16 +- e2e/go.mod | 1 + e2e/go.sum | 4 + e2e/log.go | 7 + e2e/node_bootstrapper_test.go | 92 ++++ e2e/nodebootstrapping.go | 18 +- e2e/scenario_helpers_test.go | 1 + e2e/scenario_test.go | 14 + e2e/template.go | 1 + e2e/types.go | 12 + e2e/validation.go | 2 +- e2e/validators.go | 3 +- e2e/vmss.go | 34 +- go.mod | 1 + go.sum | 3 + node-bootstrapper/.gitignore | 3 + node-bootstrapper/build.sh | 8 + .../custom_data_kubeconfig_test.go | 114 +++++ node-bootstrapper/custom_data_test.go | 71 ++++ node-bootstrapper/main.go | 396 ++++++++++++++++++ node-bootstrapper/test_helpers.go | 48 +++ pkg/agent/baker.go | 70 ++-- pkg/agent/baker_test.go | 131 ++++++ pkg/agent/bakerapi.go | 28 ++ pkg/agent/datamodel/types.go | 3 + pkg/agent/datamodel/types_test.go | 14 +- pkg/agent/datamodel/versions_test.go | 12 +- pkg/agent/utils.go | 13 +- pkg/agent/utils_test.go | 104 ++--- vhdbuilder/packer/packer_source.sh | 3 + .../packer/vhd-image-builder-arm64-gen2.json | 5 + vhdbuilder/packer/vhd-image-builder-base.json | 5 + .../vhd-image-builder-mariner-arm64.json | 5 + .../packer/vhd-image-builder-mariner.json | 5 + 41 files changed, 1212 insertions(+), 154 deletions(-) create mode 100644 e2e/node_bootstrapper_test.go create mode 100644 node-bootstrapper/.gitignore create mode 100755 node-bootstrapper/build.sh create mode 100644 node-bootstrapper/custom_data_kubeconfig_test.go create mode 100644 node-bootstrapper/custom_data_test.go create mode 100644 node-bootstrapper/main.go create mode 100644 node-bootstrapper/test_helpers.go diff --git a/.golangci.yaml b/.golangci.yaml index 0e52fbbada2..fad075732f5 100644 --- a/.golangci.yaml +++ b/.golangci.yaml @@ -148,7 +148,7 @@ linters-settings: shadow: # Whether to be strict about shadowing; can be noisy. # Default: false - strict: true + strict: false nakedret: # Make an issue if func has more lines of code than this setting, and it has naked returns. diff --git a/.pipelines/e2e.yaml b/.pipelines/e2e.yaml index ed8a7139c9c..bbe7269e1c7 100644 --- a/.pipelines/e2e.yaml +++ b/.pipelines/e2e.yaml @@ -1,8 +1,8 @@ name: $(Date:yyyyMMdd)$(Rev:.r) -trigger: +trigger: branches: - include: - - master + include: + - master pr: branches: include: diff --git a/.pipelines/templates/.builder-release-template.yaml b/.pipelines/templates/.builder-release-template.yaml index 5e8a69dd4f8..1dd8a4bea1a 100644 --- a/.pipelines/templates/.builder-release-template.yaml +++ b/.pipelines/templates/.builder-release-template.yaml @@ -40,12 +40,12 @@ steps: exit 1 fi echo "Found source components.json to use for overrides: ${COMPONENT_JSON_SRC}" - + echo "Overwriting component.json from ${COMPONENT_JSON_SRC} -> ${COMPONENT_JSON_DEST}" cp -af "${COMPONENT_JSON_SRC}" "${COMPONENT_JSON_DEST}" condition: eq('${{ parameters.useOverrides }}', true) displayName: Apply Overrides - + - bash: | m="linuxVhdMode" && \ echo "Set build mode to $m" && \ @@ -112,7 +112,10 @@ steps: echo "##vso[task.setvariable variable=SKU_NAME]$SKU_NAME" echo "Set SKU_NAME to $SKU_NAME" displayName: Set SKU Name - + - bash: | + ./build.sh + displayName: Build node-bootstrapper + workingDirectory: node-bootstrapper - bash: make -f packer.mk run-packer displayName: Build VHD retryCountOnTaskFailure: 3 @@ -123,7 +126,7 @@ steps: BUILD_NUMBER: $(Build.BuildNumber) BUILD_ID: $(Build.BuildId) BUILD_DEFINITION_NAME: $(Build.DefinitionName) - + - bash: | CAPTURED_SIG_VERSION="$(cat vhdbuilder/packer/settings.json | grep "captured_sig_version" | awk -F':' '{print $2}' | awk -F'"' '{print $2}')" && \ SIG_IMAGE_NAME="$(cat vhdbuilder/packer/settings.json | grep "sig_image_name" | awk -F':' '{print $2}' | awk -F'"' '{print $2}')" && \ diff --git a/e2e/cluster.go b/e2e/cluster.go index a9b371e332b..60363860bdb 100644 --- a/e2e/cluster.go +++ b/e2e/cluster.go @@ -85,7 +85,7 @@ func ClusterAzureNetwork(ctx context.Context, t *testing.T) (*Cluster, error) { return clusterAzureNetwork, clusterAzureNetworkError } -func nodeBootsrappingConfig(ctx context.Context, t *testing.T, kube *Kubeclient) (*datamodel.NodeBootstrappingConfiguration, error) { +func nodeBootstrappingConfig(ctx context.Context, t *testing.T, kube *Kubeclient) (*datamodel.NodeBootstrappingConfiguration, error) { clusterParams, err := extractClusterParameters(ctx, t, kube) if err != nil { return nil, fmt.Errorf("extract cluster parameters: %w", err) @@ -133,7 +133,7 @@ func prepareCluster(ctx context.Context, t *testing.T, cluster *armcontainerserv return nil, fmt.Errorf("get cluster subnet: %w", err) } - nbc, err := nodeBootsrappingConfig(ctx, t, kube) + nbc, err := nodeBootstrappingConfig(ctx, t, kube) if err != nil { return nil, fmt.Errorf("get node bootstrapping configuration: %w", err) } diff --git a/e2e/config/azure.go b/e2e/config/azure.go index 46fdbf2522a..587cb4111aa 100644 --- a/e2e/config/azure.go +++ b/e2e/config/azure.go @@ -1,42 +1,50 @@ package config import ( + "context" "crypto/tls" "fmt" "net" "net/http" + "os" "time" "github.com/Azure/azure-sdk-for-go/sdk/azcore" "github.com/Azure/azure-sdk-for-go/sdk/azcore/arm" "github.com/Azure/azure-sdk-for-go/sdk/azcore/policy" "github.com/Azure/azure-sdk-for-go/sdk/azcore/runtime" + "github.com/Azure/azure-sdk-for-go/sdk/azcore/to" "github.com/Azure/azure-sdk-for-go/sdk/azidentity" "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/compute/armcompute/v6" "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/containerservice/armcontainerservice/v6" "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/network/armnetwork/v6" "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/privatedns/armprivatedns" "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/resources/armresources" + "github.com/Azure/azure-sdk-for-go/sdk/storage/azblob" + "github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/sas" + "github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/service" "github.com/Azure/go-armbalancer" ) type AzureClient struct { + AKS *armcontainerservice.ManagedClustersClient + Blob *azblob.Client Core *azcore.Client - VMSS *armcompute.VirtualMachineScaleSetsClient - VMSSVM *armcompute.VirtualMachineScaleSetVMsClient - VNet *armnetwork.VirtualNetworksClient + Credential *azidentity.DefaultAzureCredential + GalleryImageVersion *armcompute.GalleryImageVersionsClient + Maintenance *armcontainerservice.MaintenanceConfigurationsClient + PrivateDNSZoneGroup *armnetwork.PrivateDNSZoneGroupsClient + PrivateEndpointClient *armnetwork.PrivateEndpointsClient + PrivateZonesClient *armprivatedns.PrivateZonesClient + RecordSetClient *armprivatedns.RecordSetsClient Resource *armresources.Client ResourceGroup *armresources.ResourceGroupsClient - AKS *armcontainerservice.ManagedClustersClient - Maintenance *armcontainerservice.MaintenanceConfigurationsClient SecurityGroup *armnetwork.SecurityGroupsClient Subnet *armnetwork.SubnetsClient - GalleryImageVersionClient *armcompute.GalleryImageVersionsClient - PrivateEndpointClient *armnetwork.PrivateEndpointsClient - PrivateZonesClient *armprivatedns.PrivateZonesClient + VMSS *armcompute.VirtualMachineScaleSetsClient + VMSSVM *armcompute.VirtualMachineScaleSetVMsClient + VNet *armnetwork.VirtualNetworksClient VirutalNetworkLinksClient *armprivatedns.VirtualNetworkLinksClient - RecordSetClient *armprivatedns.RecordSetsClient - PrivateDNSZoneGroup *armnetwork.PrivateDNSZoneGroupsClient } func mustNewAzureClient(subscription string) *AzureClient { @@ -107,7 +115,7 @@ func NewAzureClient(subscription string) (*AzureClient, error) { // purely for telemetry, entirely unused today cloud.Core, err = azcore.NewClient("agentbakere2e.e2e_test", "v0.0.0", plOpts, clOpts) if err != nil { - return nil, fmt.Errorf("failed to create core client: %w", err) + return nil, fmt.Errorf("create core client: %w", err) } cloud.PrivateEndpointClient, err = armnetwork.NewPrivateEndpointsClient(subscription, credential, opts) @@ -137,17 +145,17 @@ func NewAzureClient(subscription string) (*AzureClient, error) { cloud.SecurityGroup, err = armnetwork.NewSecurityGroupsClient(subscription, credential, opts) if err != nil { - return nil, fmt.Errorf("failed to create security group client: %w", err) + return nil, fmt.Errorf("create security group client: %w", err) } cloud.Subnet, err = armnetwork.NewSubnetsClient(subscription, credential, opts) if err != nil { - return nil, fmt.Errorf("failed to create subnet client: %w", err) + return nil, fmt.Errorf("create subnet client: %w", err) } cloud.AKS, err = armcontainerservice.NewManagedClustersClient(subscription, credential, opts) if err != nil { - return nil, fmt.Errorf("failed to create aks client: %w", err) + return nil, fmt.Errorf("create aks client: %w", err) } cloud.Maintenance, err = armcontainerservice.NewMaintenanceConfigurationsClient(subscription, credential, opts) @@ -157,37 +165,74 @@ func NewAzureClient(subscription string) (*AzureClient, error) { cloud.VMSS, err = armcompute.NewVirtualMachineScaleSetsClient(subscription, credential, opts) if err != nil { - return nil, fmt.Errorf("failed to create vmss client: %w", err) + return nil, fmt.Errorf("create vmss client: %w", err) } cloud.VMSSVM, err = armcompute.NewVirtualMachineScaleSetVMsClient(subscription, credential, opts) if err != nil { - return nil, fmt.Errorf("failed to create vmss vm client: %w", err) + return nil, fmt.Errorf("create vmss vm client: %w", err) } cloud.Resource, err = armresources.NewClient(subscription, credential, opts) if err != nil { - return nil, fmt.Errorf("failed to create resource client: %w", err) + return nil, fmt.Errorf("create resource client: %w", err) } cloud.ResourceGroup, err = armresources.NewResourceGroupsClient(subscription, credential, opts) if err != nil { - return nil, fmt.Errorf("failed to create resource group client: %w", err) + return nil, fmt.Errorf("create resource group client: %w", err) } cloud.VNet, err = armnetwork.NewVirtualNetworksClient(subscription, credential, opts) if err != nil { - return nil, fmt.Errorf("failed to create vnet client: %w", err) + return nil, fmt.Errorf("create vnet client: %w", err) + } + + cloud.GalleryImageVersion, err = armcompute.NewGalleryImageVersionsClient(subscription, credential, opts) + if err != nil { + return nil, fmt.Errorf("create a new images client: %v", err) } - cloud.GalleryImageVersionClient, err = armcompute.NewGalleryImageVersionsClient(subscription, credential, opts) + cloud.Blob, err = azblob.NewClient(Config.BlobStorageAccount, credential, nil) if err != nil { - return nil, fmt.Errorf("failed to create a new images client: %v", err) + return nil, fmt.Errorf("create blob container client: %w", err) } + cloud.Credential = credential + return cloud, nil } +// UploadAndGetLink uploads the data to the blob storage and returns the signed link to download the blob +// If the blob already exists, it will be overwritten +func (a *AzureClient) UploadAndGetLink(ctx context.Context, blobName string, file *os.File) (string, error) { + _, err := a.Blob.UploadFile(ctx, Config.BlobContainer, blobName, file, nil) + if err != nil { + return "", fmt.Errorf("upload blob: %w", err) + } + + udc, err := a.Blob.ServiceClient().GetUserDelegationCredential(ctx, service.KeyInfo{ + Expiry: to.Ptr(time.Now().Add(time.Hour).UTC().Format(sas.TimeFormat)), + Start: to.Ptr(time.Now().UTC().Format(sas.TimeFormat)), + }, nil) + if err != nil { + return "", fmt.Errorf("get user delegation credential: %w", err) + } + + sig, err := sas.BlobSignatureValues{ + Protocol: sas.ProtocolHTTPS, + ExpiryTime: time.Now().Add(time.Hour), + Permissions: to.Ptr(sas.BlobPermissions{Read: true}).String(), + ContainerName: Config.BlobContainer, + BlobName: blobName, + }.SignWithUserDelegation(udc) + if err != nil { + return "", fmt.Errorf("sign blob: %w", err) + } + + return fmt.Sprintf("%s/%s/%s?%s", Config.BlobStorageAccount, Config.BlobContainer, blobName, sig.Encode()), nil +} + func DefaultRetryOpts() policy.RetryOptions { return policy.RetryOptions{ MaxRetries: 3, diff --git a/e2e/config/config.go b/e2e/config/config.go index de65a05301a..04a2f9f9779 100644 --- a/e2e/config/config.go +++ b/e2e/config/config.go @@ -32,6 +32,9 @@ type Configuration struct { IgnoreScenariosWithMissingVHD bool `env:"IGNORE_SCENARIOS_WITH_MISSING_VHD"` SkipTestsWithSKUCapacityIssue bool `env:"SKIP_TESTS_WITH_SKU_CAPACITY_ISSUE"` KeepVMSS bool `env:"KEEP_VMSS"` + BlobStorageAccount string `env:"BLOB_STORAGE_ACCOUNT" envDefault:"https://abe2e.blob.core.windows.net"` + BlobContainer string `env:"BLOB_CONTAINER" envDefault:"abe2e"` + EnableNodeBootstrapperTest bool `env:"ENABLE_NODE_BOOTSTRAPPER_TEST"` } func mustLoadConfig() Configuration { diff --git a/e2e/config/vhd.go b/e2e/config/vhd.go index 4fe7755c8af..4a9b7554acb 100644 --- a/e2e/config/vhd.go +++ b/e2e/config/vhd.go @@ -153,7 +153,7 @@ func ensureStaticSIGImageVersion(ctx context.Context, t *testing.T, imageVersion } version := newSIGImageVersionFromResourceID(rid) - resp, err := Azure.GalleryImageVersionClient.Get(ctx, version.resourceGroup, version.gallery, version.definition, version.version, nil) + resp, err := Azure.GalleryImageVersion.Get(ctx, version.resourceGroup, version.gallery, version.definition, version.version, nil) if err != nil { return "", fmt.Errorf("getting live image version info: %w", err) } @@ -177,7 +177,7 @@ func findLatestSIGImageVersionWithTag(ctx context.Context, t *testing.T, imageDe } definition := newSIGImageDefinitionFromResourceID(rid) - pager := Azure.GalleryImageVersionClient.NewListByGalleryImagePager(definition.resourceGroup, definition.gallery, definition.definition, nil) + pager := Azure.GalleryImageVersion.NewListByGalleryImagePager(definition.resourceGroup, definition.gallery, definition.definition, nil) var latestVersion *armcompute.GalleryImageVersion for pager.More() { page, err := pager.NextPage(ctx) @@ -241,7 +241,7 @@ func replicateToCurrentRegion(ctx context.Context, t *testing.T, definition sigI StorageAccountType: to.Ptr(armcompute.StorageAccountTypeStandardLRS), }) - resp, err := Azure.GalleryImageVersionClient.BeginCreateOrUpdate(ctx, definition.resourceGroup, definition.gallery, definition.definition, *version.Name, *version, nil) + resp, err := Azure.GalleryImageVersion.BeginCreateOrUpdate(ctx, definition.resourceGroup, definition.gallery, definition.definition, *version.Name, *version, nil) if err != nil { return fmt.Errorf("begin updating image version target regions: %w", err) } diff --git a/e2e/exec.go b/e2e/exec.go index b6ebd3f150b..8413b2c8fb7 100644 --- a/e2e/exec.go +++ b/e2e/exec.go @@ -53,15 +53,16 @@ func (r podExecResult) dumpStderr(t *testing.T) { } } -func extractLogsFromVM(ctx context.Context, t *testing.T, vmssName, privateIP, sshPrivateKey string, opts *scenarioRunOpts) (map[string]string, error) { +func extractLogsFromVM(ctx context.Context, t *testing.T, vmssName, privateIP, sshPrivateKey string, cluster *Cluster) (map[string]string, error) { commandList := map[string]string{ - "/var/log/azure/cluster-provision": "cat /var/log/azure/cluster-provision.log", - "kubelet": "journalctl -u kubelet", - "/var/log/azure/cluster-provision-cse-output": "cat /var/log/azure/cluster-provision-cse-output.log", - "sysctl-out": "sysctl -a", + "cluster-provision": "cat /var/log/azure/cluster-provision.log", + "kubelet": "journalctl -u kubelet", + "cluster-provision-cse-output": "cat /var/log/azure/cluster-provision-cse-output.log", + "sysctl-out": "sysctl -a", + "node-bootstrapper": "cat /var/log/azure/node-bootstrapper.log", } - podName, err := getHostNetworkDebugPodName(ctx, opts.clusterConfig.Kube) + podName, err := getHostNetworkDebugPodName(ctx, cluster.Kube) if err != nil { return nil, fmt.Errorf("unable to get debug pod name: %w", err) } @@ -70,12 +71,11 @@ func extractLogsFromVM(ctx context.Context, t *testing.T, vmssName, privateIP, s for file, sourceCmd := range commandList { t.Logf("executing command on remote VM at %s of VMSS %s: %q", privateIP, vmssName, sourceCmd) - execResult, err := execOnVM(ctx, opts.clusterConfig.Kube, privateIP, podName, sshPrivateKey, sourceCmd, false) + execResult, err := execOnVM(ctx, cluster.Kube, privateIP, podName, sshPrivateKey, sourceCmd, false) if err != nil { t.Logf("error executing command on remote VM at %s of VMSS %s: %s", privateIP, vmssName, err) return nil, err } - if execResult.stdout != nil { out := execResult.stdout.String() if out != "" { diff --git a/e2e/go.mod b/e2e/go.mod index 158dc824e5c..5addab31702 100644 --- a/e2e/go.mod +++ b/e2e/go.mod @@ -10,6 +10,7 @@ require ( github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/containerservice/armcontainerservice/v6 v6.0.0 github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/network/armnetwork/v6 v6.0.0 github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/resources/armresources v1.2.0 + github.com/Azure/azure-sdk-for-go/sdk/storage/azblob v1.4.0 github.com/Azure/go-armbalancer v0.0.2 github.com/barkimedes/go-deepcopy v0.0.0-20220514131651-17c30cfc62df github.com/caarlos0/env/v11 v11.2.2 diff --git a/e2e/go.sum b/e2e/go.sum index b7fb0fd7509..d529497d5b6 100644 --- a/e2e/go.sum +++ b/e2e/go.sum @@ -26,6 +26,10 @@ github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/privatedns/armprivatedns v github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/privatedns/armprivatedns v1.3.0/go.mod h1:GE4m0rnnfwLGX0Y9A9A25Zx5N/90jneT5ABevqzhuFQ= github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/resources/armresources v1.2.0 h1:Dd+RhdJn0OTtVGaeDLZpcumkIVCtA/3/Fo42+eoYvVM= github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/resources/armresources v1.2.0/go.mod h1:5kakwfW5CjC9KK+Q4wjXAg+ShuIm2mBMua0ZFj2C8PE= +github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/storage/armstorage v1.6.0 h1:PiSrjRPpkQNjrM8H0WwKMnZUdu1RGMtd/LdGKUrOo+c= +github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/storage/armstorage v1.6.0/go.mod h1:oDrbWx4ewMylP7xHivfgixbfGBT6APAwsSoHRKotnIc= +github.com/Azure/azure-sdk-for-go/sdk/storage/azblob v1.4.0 h1:Be6KInmFEKV81c0pOAEbRYehLMwmmGI1exuFj248AMk= +github.com/Azure/azure-sdk-for-go/sdk/storage/azblob v1.4.0/go.mod h1:WCPBHsOXfBVnivScjs2ypRfimjEW0qPVLGgJkZlrIOA= github.com/Azure/go-armbalancer v0.0.2 h1:NVnxsTWHI5/fEzL6k6TjxPUfcB/3Si3+HFOZXOu0QtA= github.com/Azure/go-armbalancer v0.0.2/go.mod h1:yTg7MA/8YnfKQc9o97tzAJ7fbdVkod1xGsIvKmhYPRE= github.com/Azure/go-autorest v14.2.0+incompatible h1:V5VMDjClD3GiElqLWO7mz2MxNAK/vTfRHdAubSIPRgs= diff --git a/e2e/log.go b/e2e/log.go index 74cf3b3ce80..1025f5c8728 100644 --- a/e2e/log.go +++ b/e2e/log.go @@ -6,12 +6,19 @@ import ( "testing" "github.com/Azure/agentbakere2e/config" + "github.com/stretchr/testify/require" ) func testDir(t *testing.T) string { return filepath.Join(config.Config.E2ELoggingDir, t.Name()) } +func cleanTestDir(t *testing.T) { + dirPath := testDir(t) + err := os.RemoveAll(dirPath) + require.NoError(t, err) +} + func writeToFile(t *testing.T, fileName, content string) error { dirPath := testDir(t) // Create the directory if it doesn't exist diff --git a/e2e/node_bootstrapper_test.go b/e2e/node_bootstrapper_test.go new file mode 100644 index 00000000000..601534c9adf --- /dev/null +++ b/e2e/node_bootstrapper_test.go @@ -0,0 +1,92 @@ +package e2e + +import ( + "context" + "crypto/sha256" + "encoding/base32" + "encoding/base64" + "encoding/json" + "fmt" + "io" + "os" + "os/exec" + "testing" + + "github.com/Azure/agentbakere2e/config" + "github.com/stretchr/testify/require" +) + +// test node-bootstrapper binary without rebuilding VHD images. +// it compiles the node-bootstrapper binary and uploads it to Azure Storage. +// the runs the node-bootstrapper on the VM. +func Test_ubuntu2204NodeBootstrapper(t *testing.T) { + ctx := newTestCtx(t) + if !config.Config.EnableNodeBootstrapperTest { + t.Skip("ENABLE_NODE_BOOTSTRAPPER_TEST is not set") + } + // TODO: figure out how to properly parallelize test, maybe move t.Parallel to the top of each test? + cluster, err := ClusterKubenet(ctx, t) + require.NoError(t, err) + RunScenario(t, &Scenario{ + Description: "Tests that a node using the Ubuntu 2204 VHD can be properly bootstrapped", + Config: Config{ + //NodeBootstrappingType: Scriptless, + Cluster: ClusterKubenet, + VHD: config.VHDUbuntu2204Gen2Containerd, + LiveVMValidators: []*LiveVMValidator{ + mobyComponentVersionValidator("containerd", getExpectedPackageVersions("containerd", "ubuntu", "r2204")[0], "apt"), + mobyComponentVersionValidator("runc", getExpectedPackageVersions("runc", "ubuntu", "r2204")[0], "apt"), + }, + CSEOverride: CSENodeBootstrapper(ctx, t, cluster), + DisableCustomData: true, + }, + }) +} + +func CSENodeBootstrapper(ctx context.Context, t *testing.T, cluster *Cluster) string { + configJSON, err := json.Marshal(cluster.NodeBootstrappingConfiguration) + require.NoError(t, err) + + binary := compileNodeBootstrapper(t) + url, err := config.Azure.UploadAndGetLink(ctx, "node-bootstrapper-"+hashFile(t, binary.Name()), binary) + require.NoError(t, err) + return fmt.Sprintf(`bash -c "(echo '%s' | base64 -d > config.json && curl -L -o ./node-bootstrapper '%s' && chmod +x ./node-bootstrapper && mkdir -p /var/log/azure && ./node-bootstrapper provision --provision-config=config.json) > /var/log/azure/node-bootstrapper.log 2>&1"`, base64.StdEncoding.EncodeToString(configJSON), url) +} + +func compileNodeBootstrapper(t *testing.T) *os.File { + cmd := exec.Command("go", "build", "-o", "node-bootstrapper", "-v") + cmd.Dir = "../node-bootstrapper" + cmd.Env = append(os.Environ(), + "GOOS=linux", + "GOARCH=amd64", + ) + log, err := cmd.CombinedOutput() + require.NoError(t, err, string(log)) + t.Logf("Compiled %s", "../node-bootstrapper") + f, err := os.Open("../node-bootstrapper/node-bootstrapper") + require.NoError(t, err) + return f +} + +func hashFile(t *testing.T, filePath string) string { + // Open the file + file, err := os.Open(filePath) + require.NoError(t, err) + defer file.Close() + + // Create a SHA-256 hasher + hasher := sha256.New() + + // Copy the file content to the hasher + _, err = io.Copy(hasher, file) + require.NoError(t, err) + + // Compute the hash + hashSum := hasher.Sum(nil) + + // Encode the hash using base32 + encodedHash := base32.StdEncoding.EncodeToString(hashSum) + + // Return the first 5 characters of the encoded hash + return encodedHash[:5] +} diff --git a/e2e/nodebootstrapping.go b/e2e/nodebootstrapping.go index 3b7cc1cf8bb..36e1ff160e2 100644 --- a/e2e/nodebootstrapping.go +++ b/e2e/nodebootstrapping.go @@ -15,12 +15,12 @@ import ( "github.com/Azure/agentbakere2e/config" ) -func getNodeBootstrapping(ctx context.Context, nbc *datamodel.NodeBootstrappingConfiguration) (*datamodel.NodeBootstrapping, error) { +func getNodeBootstrapping(ctx context.Context, nbc *datamodel.NodeBootstrappingConfiguration, bootstrappingType NodeBootstrappingType) (*datamodel.NodeBootstrapping, error) { switch e2eMode { case "coverage": return getNodeBootstrappingForCoverage(nbc) default: - return getNodeBootstrappingForValidation(ctx, nbc) + return getNodeBootstrappingForValidation(ctx, nbc, bootstrappingType) } } @@ -47,16 +47,20 @@ func getNodeBootstrappingForCoverage(nbc *datamodel.NodeBootstrappingConfigurati return nodeBootstrapping, nil } -func getNodeBootstrappingForValidation(ctx context.Context, nbc *datamodel.NodeBootstrappingConfiguration) (*datamodel.NodeBootstrapping, error) { +func getNodeBootstrappingForValidation(ctx context.Context, nbc *datamodel.NodeBootstrappingConfiguration, bootstrappingType NodeBootstrappingType) (*datamodel.NodeBootstrapping, error) { ab, err := agent.NewAgentBaker() if err != nil { return nil, err } - nodeBootstrapping, err := ab.GetNodeBootstrapping(ctx, nbc) - if err != nil { - return nil, err + switch { + case bootstrappingType == Scriptless: + return ab.GetNodeBootstrappingForScriptless(ctx, nbc) + case bootstrappingType == CustomScripts: + return ab.GetNodeBootstrapping(ctx, nbc) + default: + // fallback to custom scripts + return ab.GetNodeBootstrapping(ctx, nbc) } - return nodeBootstrapping, nil } func getBaseNodeBootstrappingConfiguration(clusterParams map[string]string) (*datamodel.NodeBootstrappingConfiguration, error) { diff --git a/e2e/scenario_helpers_test.go b/e2e/scenario_helpers_test.go index c76ba8793cc..c3b97132fe8 100644 --- a/e2e/scenario_helpers_test.go +++ b/e2e/scenario_helpers_test.go @@ -55,6 +55,7 @@ var scenarioOnce sync.Once func RunScenario(t *testing.T, s *Scenario) { t.Parallel() ctx := newTestCtx(t) + cleanTestDir(t) scenarioOnce.Do(func() { err := ensureResourceGroup(ctx) if err != nil { diff --git a/e2e/scenario_test.go b/e2e/scenario_test.go index 60a6e2f440f..cd565a2445a 100644 --- a/e2e/scenario_test.go +++ b/e2e/scenario_test.go @@ -560,6 +560,20 @@ func Test_ubuntu1804ChronyRestarts(t *testing.T) { }) } +func Test_ubuntu2204ScriptlessInstaller(t *testing.T) { + RunScenario(t, &Scenario{ + Description: "tests that a new ubuntu 2204 node using self contained installer can be properly bootstrapped", + Config: Config{ + NodeBootstrappingType: Scriptless, + Cluster: ClusterKubenet, + VHD: config.VHDUbuntu2204Gen2Containerd, + LiveVMValidators: []*LiveVMValidator{ + FileHasContentsValidator("/var/log/azure/node-bootstrapper.log", "node-bootstrapper started"), + }, + }, + }) +} + // Returns config for the 'gpu' E2E scenario func Test_ubuntu1804gpu(t *testing.T) { RunScenario(t, &Scenario{ diff --git a/e2e/template.go b/e2e/template.go index 98887b70e6d..7a94f83ae29 100644 --- a/e2e/template.go +++ b/e2e/template.go @@ -16,6 +16,7 @@ func baseTemplate(location string) *datamodel.NodeBootstrappingConfiguration { falseConst = false ) return &datamodel.NodeBootstrappingConfiguration{ + Version: "v0", ContainerService: &datamodel.ContainerService{ ID: "", Location: location, diff --git a/e2e/types.go b/e2e/types.go index 640abd66c29..b51826a0278 100644 --- a/e2e/types.go +++ b/e2e/types.go @@ -110,11 +110,20 @@ type Scenario struct { Config } +type NodeBootstrappingType string + +const ( + CustomScripts NodeBootstrappingType = "CustomScripts" + Scriptless NodeBootstrappingType = "Scriptless" +) + // Config represents the configuration of an AgentBaker E2E scenario type Config struct { // Cluster creates, updates or re-uses an AKS cluster for the scenario Cluster func(ctx context.Context, t *testing.T) (*Cluster, error) + NodeBootstrappingType NodeBootstrappingType + // VHD is the function called by the e2e suite on the given scenario to get its VHD selection VHD *config.Image @@ -127,6 +136,9 @@ type Config struct { // LiveVMValidators is a slice of LiveVMValidator objects for performing any live VM validation // specific to the scenario that isn't covered in the set of common validators run with all scenarios LiveVMValidators []*LiveVMValidator + + CSEOverride string + DisableCustomData bool } // VMCommandOutputAsserterFn is a function which takes in stdout and stderr stream content diff --git a/e2e/validation.go b/e2e/validation.go index fc03931a5a8..4ffbe296796 100644 --- a/e2e/validation.go +++ b/e2e/validation.go @@ -110,7 +110,7 @@ func commonLiveVMValidators(opts *scenarioRunOpts) []*LiveVMValidator { "cluster-provision-cse-output.log", "cloud-init-files.paved", "vhd-install.complete", - "cloud-config.txt", + //"cloud-config.txt", // file with UserData }, ), // this check will run from host's network - we expect it to succeed diff --git a/e2e/validators.go b/e2e/validators.go index 464e8f75aa3..30d28c64ba2 100644 --- a/e2e/validators.go +++ b/e2e/validators.go @@ -108,6 +108,7 @@ func NonEmptyDirectoryValidator(dirName string) *LiveVMValidator { func FileHasContentsValidator(fileName string, contents string) *LiveVMValidator { steps := []string{ fmt.Sprintf("ls -la %[1]s", fileName), + fmt.Sprintf("sudo cat %[1]s", fileName), fmt.Sprintf("(sudo cat %[1]s | grep -q %[2]q)", fileName, contents), } @@ -252,7 +253,7 @@ func UlimitValidator(ulimits map[string]string) *LiveVMValidator { func mobyComponentVersionValidator(component, version, packageManager string) *LiveVMValidator { installedCommand := "list --installed" if packageManager == "dnf" { - installedCommand = "list installed" + installedCommand = "list installed" } if packageManager == "apt" { installedCommand = "list --installed" diff --git a/e2e/vmss.go b/e2e/vmss.go index eb6d11c8610..e21c8d74773 100644 --- a/e2e/vmss.go +++ b/e2e/vmss.go @@ -31,10 +31,20 @@ const ( func createVMSS(ctx context.Context, t *testing.T, vmssName string, opts *scenarioRunOpts, privateKeyBytes []byte, publicKeyBytes []byte) *armcompute.VirtualMachineScaleSet { t.Logf("creating VMSS %q in resource group %q", vmssName, *opts.clusterConfig.Model.Properties.NodeResourceGroup) - nodeBootstrapping, err := getNodeBootstrapping(ctx, opts.nbc) + nodeBootstrapping, err := getNodeBootstrapping(ctx, opts.nbc, opts.scenario.Config.NodeBootstrappingType) require.NoError(t, err) - model := getBaseVMSSModel(vmssName, string(publicKeyBytes), nodeBootstrapping.CustomData, nodeBootstrapping.CSE, opts) + cse := nodeBootstrapping.CSE + if opts.scenario.CSEOverride != "" { + cse = opts.scenario.CSEOverride + } + + customData := nodeBootstrapping.CustomData + if opts.scenario.DisableCustomData { + customData = "" + } + + model := getBaseVMSSModel(vmssName, string(publicKeyBytes), customData, cse, opts.clusterConfig) isAzureCNI, err := opts.clusterConfig.IsAzureCNI() require.NoError(t, err, vmssName, opts) @@ -56,7 +66,7 @@ func createVMSS(ctx context.Context, t *testing.T, vmssName string, opts *scenar skipTestIfSKUNotAvailableErr(t, err) require.NoError(t, err) t.Cleanup(func() { - cleanupVMSS(ctx, t, vmssName, opts, privateKeyBytes) + cleanupVMSS(ctx, t, vmssName, opts.clusterConfig, privateKeyBytes) }) vmssResp, err := operation.PollUntilDone(ctx, config.DefaultPollUntilDoneOptions) @@ -76,17 +86,17 @@ func skipTestIfSKUNotAvailableErr(t *testing.T, err error) { } } -func cleanupVMSS(ctx context.Context, t *testing.T, vmssName string, opts *scenarioRunOpts, privateKeyBytes []byte) { +func cleanupVMSS(ctx context.Context, t *testing.T, vmssName string, cluster *Cluster, privateKeyBytes []byte) { // original context can be cancelled, but we still want to collect the logs ctx, cancel := context.WithTimeout(context.WithoutCancel(ctx), 3*time.Minute) defer cancel() - defer deleteVMSS(t, ctx, vmssName, opts, privateKeyBytes) + defer deleteVMSS(t, ctx, vmssName, cluster, privateKeyBytes) - vmPrivateIP, err := getVMPrivateIPAddress(ctx, *opts.clusterConfig.Model.Properties.NodeResourceGroup, vmssName) + vmPrivateIP, err := getVMPrivateIPAddress(ctx, *cluster.Model.Properties.NodeResourceGroup, vmssName) require.NoError(t, err) require.NoError(t, err, "get vm private IP %v", vmssName) - logFiles, err := extractLogsFromVM(ctx, t, vmssName, vmPrivateIP, string(privateKeyBytes), opts) + logFiles, err := extractLogsFromVM(ctx, t, vmssName, vmPrivateIP, string(privateKeyBytes), cluster) require.NoError(t, err, "extract logs from vm %v", vmssName) err = dumpFileMapToDir(t, logFiles) @@ -94,7 +104,7 @@ func cleanupVMSS(ctx context.Context, t *testing.T, vmssName string, opts *scena } -func deleteVMSS(t *testing.T, ctx context.Context, vmssName string, opts *scenarioRunOpts, privateKeyBytes []byte) { +func deleteVMSS(t *testing.T, ctx context.Context, vmssName string, cluster *Cluster, privateKeyBytes []byte) { if config.Config.KeepVMSS { t.Logf("vmss %q will be retained for debugging purposes, please make sure to manually delete it later", vmssName) if err := writeToFile(t, "sshkey", string(privateKeyBytes)); err != nil { @@ -102,7 +112,7 @@ func deleteVMSS(t *testing.T, ctx context.Context, vmssName string, opts *scenar } return } - _, err := config.Azure.VMSS.BeginDelete(ctx, *opts.clusterConfig.Model.Properties.NodeResourceGroup, vmssName, &armcompute.VirtualMachineScaleSetsClientBeginDeleteOptions{ + _, err := config.Azure.VMSS.BeginDelete(ctx, *cluster.Model.Properties.NodeResourceGroup, vmssName, &armcompute.VirtualMachineScaleSetsClientBeginDeleteOptions{ ForceDeletion: to.Ptr(true), }) if err != nil { @@ -232,7 +242,7 @@ func getVmssName(t *testing.T) string { return name } -func getBaseVMSSModel(name, sshPublicKey, customData, cseCmd string, opts *scenarioRunOpts) armcompute.VirtualMachineScaleSet { +func getBaseVMSSModel(name, sshPublicKey, customData, cseCmd string, cluster *Cluster) armcompute.VirtualMachineScaleSet { return armcompute.VirtualMachineScaleSet{ Location: to.Ptr(config.Config.Location), SKU: &armcompute.SKU{ @@ -302,13 +312,13 @@ func getBaseVMSSModel(name, sshPublicKey, customData, cseCmd string, opts *scena fmt.Sprintf( loadBalancerBackendAddressPoolIDTemplate, config.Config.SubscriptionID, - *opts.clusterConfig.Model.Properties.NodeResourceGroup, + *cluster.Model.Properties.NodeResourceGroup, ), ), }, }, Subnet: &armcompute.APIEntityReference{ - ID: to.Ptr(opts.clusterConfig.SubnetID), + ID: to.Ptr(cluster.SubnetID), }, }, }, diff --git a/go.mod b/go.mod index 56ce38f10f0..428b75ece59 100644 --- a/go.mod +++ b/go.mod @@ -18,6 +18,7 @@ require ( google.golang.org/protobuf v1.33.0 gopkg.in/yaml.v3 v3.0.1 k8s.io/apimachinery v0.28.5 + sigs.k8s.io/yaml v1.4.0 ) require ( diff --git a/go.sum b/go.sum index 9b55b9f739e..4d6059278c6 100644 --- a/go.sum +++ b/go.sum @@ -39,6 +39,7 @@ github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiu github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/pprof v0.0.0-20210720184732-4bb14d4b1be1 h1:K6RDEckDVWvDI9JAJYCmNdQXq6neHJOYx3V6jnqNEec= @@ -125,3 +126,5 @@ gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= k8s.io/apimachinery v0.28.5 h1:EEj2q1qdTcv2p5wl88KavAn3VlFRjREgRu8Sm/EuMPY= k8s.io/apimachinery v0.28.5/go.mod h1:wI37ncBvfAoswfq626yPTe6Bz1c22L7uaJ8dho83mgg= +sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E= +sigs.k8s.io/yaml v1.4.0/go.mod h1:Ejl7/uTz7PSA4eKMyQCUTnhZYNmLIl+5c2lQPGR2BPY= diff --git a/node-bootstrapper/.gitignore b/node-bootstrapper/.gitignore new file mode 100644 index 00000000000..404b78a18e8 --- /dev/null +++ b/node-bootstrapper/.gitignore @@ -0,0 +1,3 @@ +node-bootstrapper.log +node-bootstrapper +dist diff --git a/node-bootstrapper/build.sh b/node-bootstrapper/build.sh new file mode 100755 index 00000000000..2fa7987ed3e --- /dev/null +++ b/node-bootstrapper/build.sh @@ -0,0 +1,8 @@ +#!/bin/sh +set -x +set -e +go test ./... +GOOS=linux GOARCH=amd64 go build -o ./dist/node-bootstrapper-linux-amd64 +GOOS=linux GOARCH=arm64 go build -o ./dist/node-bootstrapper-linux-arm64 +GOOS=windows GOARCH=amd64 go build -o ./dist/node-bootstrapper-windows-amd64.exe +GOOS=windows GOARCH=arm64 go build -o ./dist/node-bootstrapper-windows-arm64.exe diff --git a/node-bootstrapper/custom_data_kubeconfig_test.go b/node-bootstrapper/custom_data_kubeconfig_test.go new file mode 100644 index 00000000000..e00d56c146a --- /dev/null +++ b/node-bootstrapper/custom_data_kubeconfig_test.go @@ -0,0 +1,114 @@ +package main + +import ( + "testing" + + "github.com/Azure/agentbaker/pkg/agent/datamodel" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +const ( + bootstrapConfigFile = "/var/lib/kubelet/bootstrap-kubeconfig" + kubeConfigFile = "/var/lib/kubelet/kubeconfig" + arcTokenSh = "/opt/azure/bootstrap/arc-token.sh" + azureTokenSh = "/opt/azure/bootstrap/azure-token.sh" +) + +func assertKubeconfig(t *testing.T, nbc *datamodel.NodeBootstrappingConfiguration, expected string) { + t.Helper() + files, err := customData(nbc) + require.NoError(t, err) + require.NotContains(t, files, bootstrapConfigFile) + actual := getFile(t, nbc, kubeConfigFile, 0644) + assert.YAMLEq(t, expected, actual) +} + +func assertBootstrapKubeconfig(t *testing.T, nbc *datamodel.NodeBootstrappingConfiguration, expected string) { + t.Helper() + files, err := customData(nbc) + require.NoError(t, err) + require.NotContains(t, files, kubeConfigFile) + actual := getFile(t, nbc, bootstrapConfigFile, 0644) + assert.YAMLEq(t, expected, actual) +} + +func TestKubeConfigGeneratedCorrectly(t *testing.T) { + t.Run("kubeconfig", func(t *testing.T) { + nbc := validNBC() + assertKubeconfig(t, nbc, ` +apiVersion: v1 +kind: Config +clusters: +- name: localcluster + cluster: + certificate-authority: /etc/kubernetes/certs/ca.crt + server: https://:443 +users: +- name: client + user: + client-certificate: /etc/kubernetes/certs/client.crt + client-key: /etc/kubernetes/certs/client.key +contexts: +- context: + cluster: localcluster + user: client + name: localclustercontext +current-context: localclustercontext +`) + }) + + t.Run("bootstrap-kubeconfig", func(t *testing.T) { + nbc := validNBC() + nbc.KubeletClientTLSBootstrapToken = Ptr("test-token") + assertBootstrapKubeconfig(t, nbc, `apiVersion: v1 +clusters: + - cluster: + certificate-authority: /etc/kubernetes/certs/ca.crt + server: https://:443 + name: localcluster +contexts: + - context: + cluster: localcluster + user: kubelet-bootstrap + name: bootstrap-context +current-context: bootstrap-context +kind: Config +users: + - name: kubelet-bootstrap + user: + token: test-token +`) + }) + + t.Run("secureTlsBootstrapKubeConfig sets bootstrap-kubeconfig correctly", func(t *testing.T) { + nbc := validNBC() + nbc.EnableSecureTLSBootstrapping = true + assertBootstrapKubeconfig(t, nbc, `apiVersion: v1 +clusters: + - cluster: + certificate-authority: /etc/kubernetes/certs/ca.crt + server: https://:443 + name: localcluster +contexts: + - context: + cluster: localcluster + user: kubelet-bootstrap + name: bootstrap-context +current-context: bootstrap-context +kind: Config +users: +- name: kubelet-bootstrap + user: + exec: + apiVersion: client.authentication.k8s.io/v1 + args: + - bootstrap + - --next-proto=aks-tls-bootstrap + - --aad-resource=test-app-id + command: /opt/azure/tlsbootstrap/tls-bootstrap-client + interactiveMode: Never + provideClusterInfo: true +`) + }) +} diff --git a/node-bootstrapper/custom_data_test.go b/node-bootstrapper/custom_data_test.go new file mode 100644 index 00000000000..9b6a1af769c --- /dev/null +++ b/node-bootstrapper/custom_data_test.go @@ -0,0 +1,71 @@ +package main + +import ( + "testing" + + "github.com/Azure/agentbaker/pkg/agent/datamodel" + "github.com/stretchr/testify/assert" +) + +func TestCustomData(t *testing.T) { + t.Run("ca.crt", func(t *testing.T) { + nbc := validNBC() + actual := getFile(t, nbc, "/etc/kubernetes/certs/ca.crt", 0600) + expected := "test-ca-cert" + assert.Equal(t, expected, actual) + }) + + t.Run("exec_start.conf", func(t *testing.T) { + nbc := validNBC() + actual := getFile(t, nbc, "/etc/systemd/system/docker.service.d/exec_start.conf", 0644) + nbc.ContainerService.Properties.OrchestratorProfile.KubernetesConfig.DockerBridgeSubnet = "1.1.1.1" + expected := `[Service] +ExecStart= +ExecStart=/usr/bin/dockerd -H fd:// --storage-driver=overlay2 --bip=1.1.1.1 +ExecStartPost=/sbin/iptables -P FORWARD ACCEPT +#EOF` + assert.Equal(t, expected, actual) + }) + + t.Run("docker-daemon.json", func(t *testing.T) { + nbc := validNBC() + actual := getFile(t, nbc, "/etc/docker/daemon.json", 0644) + expected := ` +{ + "data-root":"/mnt/aks/containers", + "live-restore":true, + "log-driver":"json-file", + "log-opts": { + "max-file":"5", + "max-size":"50m" + } +} +` + assert.JSONEq(t, expected, actual) + }) + t.Run("kubelet", func(t *testing.T) { + nbc := validNBC() + actual := getFile(t, nbc, "/etc/default/kubelet", 0644) + expected := `KUBELET_FLAGS= +KUBELET_REGISTER_SCHEDULABLE=true +NETWORK_POLICY= +KUBELET_NODE_LABELS=agentpool=,kubernetes.azure.com/agentpool= +` + assert.Equal(t, expected, actual) + }) + + t.Run("containerDMCRHosts", func(t *testing.T) { + nbc := validNBC() + nbc.ContainerService.Properties.SecurityProfile = &datamodel.SecurityProfile{ + PrivateEgress: &datamodel.PrivateEgress{ + Enabled: true, + ContainerRegistryServer: "test-registry", + }, + } + actual := getFile(t, nbc, "/etc/containerd/certs.d/mcr.microsoft.com/hosts.toml", 0644) + expected := `[host."https://test-registry"] +capabilities = ["pull", "resolve"] +` + assert.Equal(t, expected, actual) + }) +} diff --git a/node-bootstrapper/main.go b/node-bootstrapper/main.go new file mode 100644 index 00000000000..61249987cfa --- /dev/null +++ b/node-bootstrapper/main.go @@ -0,0 +1,396 @@ +package main + +import ( + "context" + "encoding/json" + "errors" + "flag" + "fmt" + "log/slog" + "os" + "os/exec" + "path/filepath" + "strings" + + yaml "sigs.k8s.io/yaml/goyaml.v3" // TODO: should we use JSON instead of YAML to avoid 3rd party dependencies? + + "github.com/Azure/agentbaker/pkg/agent" + "github.com/Azure/agentbaker/pkg/agent/datamodel" +) + +const ( + DefaultAksAadAppID = "6dae42f8-4368-4678-94ff-3960e28e3630" + ReadOnlyWorld os.FileMode = 0644 + ReadOnlyUser os.FileMode = 0600 + ConfigVersion = "v0" +) + +type Config struct { + Version string `json:"version"` +} + +func main() { + slog.Info("node-bootstrapper started") + ctx := context.Background() + if err := Run(ctx); err != nil { + slog.Error("node-bootstrapper finished with error", "error", err.Error()) + var exitErr *exec.ExitError + if errors.As(err, &exitErr) { + os.Exit(exitErr.ExitCode()) + } + os.Exit(1) + } + slog.Info("node-bootstrapper finished") +} + +func Run(ctx context.Context) error { + const minNumberArgs = 2 + if len(os.Args) < minNumberArgs { + return errors.New("missing command argument") + } + switch os.Args[1] { + case "provision": + return Provision(ctx) + default: + return fmt.Errorf("unknown command: %s", os.Args[1]) + } +} + +// usage example: +// node-bootstrapper provision --provision-config=config.json . +func Provision(ctx context.Context) error { + fs := flag.NewFlagSet("provision", flag.ContinueOnError) + provisionConfig := fs.String("provision-config", "", "path to the provision config file") + err := fs.Parse(os.Args[2:]) + if err != nil { + return fmt.Errorf("parse args: %w", err) + } + if provisionConfig == nil || *provisionConfig == "" { + return errors.New("--provision-config is required") + } + + config, err := loadConfig(*provisionConfig) + if err != nil { + return err + } + + if err := writeCustomData(config); err != nil { + return fmt.Errorf("write custom data: %w", err) + } + + if err := provisionStart(ctx, config); err != nil { + return fmt.Errorf("provision start: %w", err) + } + return nil +} + +func loadConfig(path string) (*datamodel.NodeBootstrappingConfiguration, error) { + content, err := os.ReadFile(path) + if err != nil { + return nil, fmt.Errorf("failed to open config file: %w", err) + } + + var config Config + if err := json.Unmarshal(content, &config); err != nil { + return nil, fmt.Errorf("failed to decode config file: %w", err) + } + switch config.Version { + case ConfigVersion: + nbc := &datamodel.NodeBootstrappingConfiguration{} + if err := json.Unmarshal(content, nbc); err != nil { + return nil, fmt.Errorf("failed to decode config file: %w", err) + } + return nbc, nil + case "": + return nil, fmt.Errorf("missing config version") + default: + return nil, fmt.Errorf("unsupported config version: %s", config.Version) + } +} + +func provisionStart(ctx context.Context, config *datamodel.NodeBootstrappingConfiguration) error { + // CSEScript can't be logged because it contains sensitive information. + slog.Info("Running CSE script") + defer slog.Info("CSE script finished") + cse, err := CSEScript(ctx, config) + if err != nil { + return fmt.Errorf("cse script: %w", err) + } + + slog.Info(fmt.Sprintf("CSE script: %s", cse)) + + // TODO: add Windows support + cmd := exec.CommandContext(ctx, "/bin/bash", "-c", cse) + cmd.Dir = "/" + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + return cmd.Run() +} + +func CSEScript(ctx context.Context, config *datamodel.NodeBootstrappingConfiguration) (string, error) { + ab, err := agent.NewAgentBaker() + if err != nil { + return "", err + } + + nodeBootstrapping, err := ab.GetNodeBootstrapping(ctx, config) + if err != nil { + return "", err + } + return nodeBootstrapping.CSE, nil +} + +// re-implement CustomData + cloud-init logic from AgentBaker +// only for files not copied during build process. +func writeCustomData(config *datamodel.NodeBootstrappingConfiguration) error { + files, err := customData(config) + if err != nil { + return err + } + for path, file := range files { + slog.Info(fmt.Sprintf("Saving file %s ", path)) + + dir := filepath.Dir(path) + if err := os.MkdirAll(dir, 0755); err != nil { + return fmt.Errorf("create directory %s: %w", dir, err) + } + if err := os.WriteFile(path, []byte(file.Content), file.Mode); err != nil { + return fmt.Errorf("write file %s: %w", path, err) + } + } + return nil +} + +type File struct { + Content string + Mode os.FileMode +} + +func customData(config *datamodel.NodeBootstrappingConfiguration) (map[string]File, error) { + contentDockerDaemon, err := generateContentDockerDaemonJSON(config) + if err != nil { + return nil, fmt.Errorf("content docker daemon json: %w", err) + } + + files := map[string]File{ + "/etc/kubernetes/certs/ca.crt": { + Content: config.ContainerService.Properties.CertificateProfile.CaCertificate, + Mode: ReadOnlyUser, + }, + "/etc/systemd/system/docker.service.d/exec_start.conf": { + Content: generateContentDockerExecStart(config), + Mode: ReadOnlyWorld, + }, + "/etc/docker/daemon.json": { + Content: contentDockerDaemon, + Mode: ReadOnlyWorld, + }, + "/etc/default/kubelet": { + Content: generateContentKubelet(config), + Mode: ReadOnlyWorld, + }, + } + + if config.ContainerService.Properties.SecurityProfile.GetPrivateEgressContainerRegistryServer() != "" { + files["/etc/containerd/certs.d/mcr.microsoft.com/hosts.toml"] = File{ + Content: containerDMCRHosts(config), + Mode: ReadOnlyWorld, + } + } + + if config.EnableSecureTLSBootstrapping || agent.IsTLSBootstrappingEnabledWithHardCodedToken(config.KubeletClientTLSBootstrapToken) { + if err := useBootstrappingKubeConfig(config, files); err != nil { + return nil, err + } + } else { + useHardCodedKubeconfig(config, files) + } + + for path, file := range files { + file.Content = strings.TrimLeft(file.Content, "\n") + files[path] = file + } + + return files, nil +} + +func useHardCodedKubeconfig(config *datamodel.NodeBootstrappingConfiguration, files map[string]File) { + files["/var/lib/kubelet/kubeconfig"] = File{ + Content: generateContentKubeconfig(config), + Mode: ReadOnlyWorld, + } +} + +func useBootstrappingKubeConfig(config *datamodel.NodeBootstrappingConfiguration, files map[string]File) error { + bootstrapKubeconfig, err := generateContentBootstrapKubeconfig(config) + if err != nil { + return fmt.Errorf("content bootstrap kubeconfig: %w", err) + } + files["/var/lib/kubelet/bootstrap-kubeconfig"] = File{ + Content: bootstrapKubeconfig, + Mode: ReadOnlyWorld, + } + return nil +} + +func generateContentKubeconfig(config *datamodel.NodeBootstrappingConfiguration) string { + users := `- name: client + user: + client-certificate: /etc/kubernetes/certs/client.crt + client-key: /etc/kubernetes/certs/client.key` + + return fmt.Sprintf(` +apiVersion: v1 +kind: Config +clusters: +- name: localcluster + cluster: + certificate-authority: /etc/kubernetes/certs/ca.crt + server: https://%s:443 +users: +%s +contexts: +- context: + cluster: localcluster + user: client + name: localclustercontext +current-context: localclustercontext +`, agent.GetKubernetesEndpoint(config.ContainerService), users) +} + +func generateContentBootstrapKubeconfig(config *datamodel.NodeBootstrappingConfiguration) (string, error) { + data := map[string]any{ + "apiVersion": "v1", + "kind": "Config", + "clusters": []map[string]any{ + { + "name": "localcluster", + "cluster": map[string]any{ + "certificate-authority": "/etc/kubernetes/certs/ca.crt", + "server": "https://" + agent.GetKubernetesEndpoint(config.ContainerService) + ":443", + }, + }, + }, + "users": []map[string]any{ + { + "name": "kubelet-bootstrap", + "user": func() map[string]any { + if config.EnableSecureTLSBootstrapping { + appID := config.CustomSecureTLSBootstrapAADServerAppID + if appID == "" { + appID = DefaultAksAadAppID + } + return map[string]any{ + "exec": map[string]any{ + "apiVersion": "client.authentication.k8s.io/v1", + "command": "/opt/azure/tlsbootstrap/tls-bootstrap-client", + "args": []string{ + "bootstrap", + "--next-proto=aks-tls-bootstrap", + "--aad-resource=" + appID}, + "interactiveMode": "Never", + "provideClusterInfo": true, + }, + } + } + return map[string]any{ + "token": agent.GetTLSBootstrapTokenForKubeConfig(config.KubeletClientTLSBootstrapToken), + } + }(), + }, + }, + "contexts": []map[string]any{ + { + "context": map[string]any{ + "cluster": "localcluster", + "user": "kubelet-bootstrap", + }, + "name": "bootstrap-context", + }, + }, + "current-context": "bootstrap-context", + } + dataYAML, err := yaml.Marshal(data) + if err != nil { + return "", err + } + return string(dataYAML), nil +} + +func generateContentDockerExecStart(config *datamodel.NodeBootstrappingConfiguration) string { + return fmt.Sprintf(` +[Service] +ExecStart= +ExecStart=/usr/bin/dockerd -H fd:// --storage-driver=overlay2 --bip=%s +ExecStartPost=/sbin/iptables -P FORWARD ACCEPT +#EOF`, config.ContainerService.Properties.OrchestratorProfile.KubernetesConfig.DockerBridgeSubnet) +} + +func generateContentDockerDaemonJSON(config *datamodel.NodeBootstrappingConfiguration) (string, error) { + data := map[string]any{ + "live-restore": true, + "log-driver": "json-file", + "log-opts": map[string]string{ + "max-size": "50m", + "max-file": "5", + }, + } + if config.EnableNvidia { + data["default-runtime"] = "nvidia" + data["runtimes"] = map[string]any{ + "nvidia": map[string]any{ + "path": "/usr/bin/nvidia-container-runtime", + "runtimeArgs": []string{}, + }, + } + } + if agent.HasDataDir(config) { + data["data-root"] = agent.GetDataDir(config) + } + dataJSON, err := json.Marshal(data) + if err != nil { + return "", err + } + return string(dataJSON), nil +} + +func generateContentKubelet(config *datamodel.NodeBootstrappingConfiguration) string { + data := make([][2]string, 0) + data = append(data, [2]string{"KUBELET_FLAGS", agent.GetOrderedKubeletConfigFlagString(config)}) + data = append(data, [2]string{"KUBELET_REGISTER_SCHEDULABLE", "true"}) + data = append(data, [2]string{"NETWORK_POLICY", config.ContainerService.Properties.OrchestratorProfile.KubernetesConfig.NetworkPolicy}) + isKubernetesVersionGe := func(version string) bool { + isKubernetes := config.ContainerService.Properties.OrchestratorProfile.IsKubernetes() + isKubernetesVersionGe := agent.IsKubernetesVersionGe(config.ContainerService.Properties.OrchestratorProfile.OrchestratorVersion, version) + return isKubernetes && isKubernetesVersionGe + } + + if !isKubernetesVersionGe("1.17.0") { + data = append(data, [2]string{"KUBELET_IMAGE", config.K8sComponents.HyperkubeImageURL}) + } + + labels := func() string { + if isKubernetesVersionGe("1.16.0") { + return agent.GetAgentKubernetesLabels(config.AgentPoolProfile, config) + } + return config.AgentPoolProfile.GetKubernetesLabels() + } + + data = append(data, [2]string{"KUBELET_NODE_LABELS", labels()}) + if config.ContainerService.IsAKSCustomCloud() { + data = append(data, [2]string{"AZURE_ENVIRONMENT_FILEPATH", "/etc/kubernetes/" + config.ContainerService.Properties.CustomCloudEnv.Name + ".json"}) + } + + result := "" + for _, d := range data { + result += fmt.Sprintf("%s=%s\n", d[0], d[1]) + } + return result +} + +func containerDMCRHosts(config *datamodel.NodeBootstrappingConfiguration) string { + return fmt.Sprintf(` +[host."https://%s"] +capabilities = ["pull", "resolve"] +`, config.ContainerService.Properties.SecurityProfile.GetPrivateEgressContainerRegistryServer()) +} diff --git a/node-bootstrapper/test_helpers.go b/node-bootstrapper/test_helpers.go new file mode 100644 index 00000000000..d0541690cb7 --- /dev/null +++ b/node-bootstrapper/test_helpers.go @@ -0,0 +1,48 @@ +package main + +import ( + "io/fs" + "testing" + + "github.com/Azure/agentbaker/pkg/agent/datamodel" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func getFile(t *testing.T, nbc *datamodel.NodeBootstrappingConfiguration, path string, expectedMode fs.FileMode) string { + t.Helper() + files, err := customData(nbc) + require.NoError(t, err) + require.Contains(t, files, path) + actual := files[path] + assert.Equal(t, expectedMode, actual.Mode) + + return actual.Content +} + +func Ptr[T any](input T) *T { + return &input +} + +func validNBC() *datamodel.NodeBootstrappingConfiguration { + return &datamodel.NodeBootstrappingConfiguration{ + ContainerService: &datamodel.ContainerService{ + Properties: &datamodel.Properties{ + CertificateProfile: &datamodel.CertificateProfile{ + CaCertificate: "test-ca-cert", + }, + OrchestratorProfile: &datamodel.OrchestratorProfile{ + OrchestratorType: datamodel.Kubernetes, + OrchestratorVersion: "1.31.0", + KubernetesConfig: &datamodel.KubernetesConfig{ + DockerBridgeSubnet: "1.1.1.1", + }, + }, + }, + }, + CustomSecureTLSBootstrapAADServerAppID: "test-app-id", + AgentPoolProfile: &datamodel.AgentPoolProfile{ + KubeletDiskType: datamodel.TempDisk, + }, + } +} diff --git a/pkg/agent/baker.go b/pkg/agent/baker.go index 4341c8bcb05..04a06b225ac 100644 --- a/pkg/agent/baker.go +++ b/pkg/agent/baker.go @@ -373,7 +373,7 @@ func getContainerServiceFuncMap(config *datamodel.NodeBootstrappingConfiguration return cs.Properties.OrchestratorProfile.IsKubernetes() && IsKubernetesVersionGe(cs.Properties.OrchestratorProfile.OrchestratorVersion, version) }, "GetAgentKubernetesLabels": func(profile *datamodel.AgentPoolProfile) string { - return getAgentKubernetesLabels(profile, config) + return GetAgentKubernetesLabels(profile, config) }, "GetAgentKubernetesLabelsDeprecated": func(profile *datamodel.AgentPoolProfile) string { return profile.GetKubernetesLabels() @@ -411,7 +411,7 @@ func getContainerServiceFuncMap(config *datamodel.NodeBootstrappingConfiguration return IsKubeletServingCertificateRotationEnabled(config) }, "GetKubeletConfigKeyVals": func() string { - return GetOrderedKubeletConfigFlagString(config.KubeletConfig, cs, profile, config.EnableKubeletConfigFile) + return GetOrderedKubeletConfigFlagString(config) }, "GetKubeletConfigKeyValsPsh": func() string { return config.GetOrderedKubeletConfigStringForPowershell(profile.CustomKubeletConfig) @@ -484,13 +484,7 @@ func getContainerServiceFuncMap(config *datamodel.NodeBootstrappingConfiguration return cs.Properties.OrchestratorProfile.IsKubernetes() }, "GetKubernetesEndpoint": func() string { - if cs.Properties.HostedMasterProfile == nil { - return "" - } - if cs.Properties.HostedMasterProfile.IPAddress != "" { - return cs.Properties.HostedMasterProfile.IPAddress - } - return cs.Properties.HostedMasterProfile.FQDN + return GetKubernetesEndpoint(cs) }, "IsAzureCNI": func() bool { return cs.Properties.OrchestratorProfile.IsAzureCNI() @@ -662,26 +656,10 @@ func getContainerServiceFuncMap(config *datamodel.NodeBootstrappingConfiguration return cs.Properties.OrchestratorProfile.KubernetesConfig.RequiresDocker() }, "HasDataDir": func() bool { - if profile != nil && profile.KubernetesConfig != nil && profile.KubernetesConfig.ContainerRuntimeConfig != nil && - profile.KubernetesConfig.ContainerRuntimeConfig[datamodel.ContainerDataDirKey] != "" { - return true - } - if profile.KubeletDiskType == datamodel.TempDisk { - return true - } - return cs.Properties.OrchestratorProfile.KubernetesConfig.ContainerRuntimeConfig != nil && - cs.Properties.OrchestratorProfile.KubernetesConfig.ContainerRuntimeConfig[datamodel.ContainerDataDirKey] != "" + return HasDataDir(config) }, "GetDataDir": func() string { - if profile != nil && profile.KubernetesConfig != nil && - profile.KubernetesConfig.ContainerRuntimeConfig != nil && - profile.KubernetesConfig.ContainerRuntimeConfig[datamodel.ContainerDataDirKey] != "" { - return profile.KubernetesConfig.ContainerRuntimeConfig[datamodel.ContainerDataDirKey] - } - if profile.KubeletDiskType == datamodel.TempDisk { - return datamodel.TempDiskContainerDataDir - } - return cs.Properties.OrchestratorProfile.KubernetesConfig.ContainerRuntimeConfig[datamodel.ContainerDataDirKey] + return GetDataDir(config) }, "HasKubeletDiskType": func() bool { return profile != nil && profile.KubeletDiskType != "" && profile.KubeletDiskType != datamodel.OSDisk @@ -1001,6 +979,44 @@ func getContainerServiceFuncMap(config *datamodel.NodeBootstrappingConfiguration } } +func GetDataDir(config *datamodel.NodeBootstrappingConfiguration) string { + cs := config.ContainerService + profile := config.AgentPoolProfile + if profile != nil && profile.KubernetesConfig != nil && + profile.KubernetesConfig.ContainerRuntimeConfig != nil && + profile.KubernetesConfig.ContainerRuntimeConfig[datamodel.ContainerDataDirKey] != "" { + return profile.KubernetesConfig.ContainerRuntimeConfig[datamodel.ContainerDataDirKey] + } + if profile.KubeletDiskType == datamodel.TempDisk { + return datamodel.TempDiskContainerDataDir + } + return cs.Properties.OrchestratorProfile.KubernetesConfig.ContainerRuntimeConfig[datamodel.ContainerDataDirKey] +} + +func HasDataDir(config *datamodel.NodeBootstrappingConfiguration) bool { + cs := config.ContainerService + profile := config.AgentPoolProfile + if profile != nil && profile.KubernetesConfig != nil && profile.KubernetesConfig.ContainerRuntimeConfig != nil && + profile.KubernetesConfig.ContainerRuntimeConfig[datamodel.ContainerDataDirKey] != "" { + return true + } + if profile.KubeletDiskType == datamodel.TempDisk { + return true + } + return cs.Properties.OrchestratorProfile.KubernetesConfig.ContainerRuntimeConfig != nil && + cs.Properties.OrchestratorProfile.KubernetesConfig.ContainerRuntimeConfig[datamodel.ContainerDataDirKey] != "" +} + +func GetKubernetesEndpoint(cs *datamodel.ContainerService) string { + if cs.Properties.HostedMasterProfile == nil { + return "" + } + if cs.Properties.HostedMasterProfile.IPAddress != "" { + return cs.Properties.HostedMasterProfile.IPAddress + } + return cs.Properties.HostedMasterProfile.FQDN +} + func getPortRangeEndValue(portRange string) int { arr := strings.Split(portRange, " ") num, err := strconv.Atoi(arr[1]) diff --git a/pkg/agent/baker_test.go b/pkg/agent/baker_test.go index 1daf836d51c..ee140161294 100644 --- a/pkg/agent/baker_test.go +++ b/pkg/agent/baker_test.go @@ -61,6 +61,137 @@ const ( type outputValidator func(*nodeBootstrappingOutput) var _ = Describe("Assert generated customData and cseCmd", func() { + Describe("Tests of template methods", func() { + var config *datamodel.NodeBootstrappingConfiguration + BeforeEach(func() { + config = &datamodel.NodeBootstrappingConfiguration{ + ContainerService: &datamodel.ContainerService{ + Properties: &datamodel.Properties{ + HostedMasterProfile: &datamodel.HostedMasterProfile{}, + OrchestratorProfile: &datamodel.OrchestratorProfile{ + KubernetesConfig: &datamodel.KubernetesConfig{ + ContainerRuntimeConfig: map[string]string{}, + }, + }, + }, + }, + AgentPoolProfile: &datamodel.AgentPoolProfile{}, + } + }) + + Describe(".HasDataDir()", func() { + It("given there is no profile, it returns false", func() { + Expect(HasDataDir(config)).To(BeFalse()) + }) + It("given there is a data dir, it returns true", func() { + config.ContainerService.Properties.OrchestratorProfile.KubernetesConfig.ContainerRuntimeConfig["dataDir"] = "data dir" + Expect(HasDataDir(config)).To(BeTrue()) + }) + It("given there is a temp disk, it returns true", func() { + // test the actual string because this data is posted to agentbaker and we want to check a particular posted string + // - rather than the value of our internal const is mariner. + config.AgentPoolProfile.KubeletDiskType = "Temporary" + Expect(HasDataDir(config)).To(BeTrue()) + }) + }) + + Describe(".GetDataDir()", func() { + It("given there is no profile, it returns an empty string", func() { + Expect(GetDataDir(config)).To(BeEmpty()) + }) + It("given there is a data dir, it returns true", func() { + config.ContainerService.Properties.OrchestratorProfile.KubernetesConfig.ContainerRuntimeConfig["dataDir"] = "data dir" + Expect(GetDataDir(config)).To(Equal("data dir")) + }) + It("given there is a temp disk, it returns true", func() { + // test the actual string because this data is posted to agentbaker and we want to check a particular posted string + // - rather than the value of our internal const is mariner. + config.AgentPoolProfile.KubeletDiskType = "Temporary" + Expect(GetDataDir(config)).To(Equal("/mnt/aks/containers")) + }) + }) + + Describe(".GetKubernetesEndpoint()", func() { + It("given there is no profile, it returns an empty string", func() { + Expect(GetKubernetesEndpoint(config.ContainerService)).To(BeEmpty()) + }) + It("given there is an ip address, it returns the ip address", func() { + config.ContainerService.Properties.HostedMasterProfile.IPAddress = "127.0.0.1" + Expect(GetKubernetesEndpoint(config.ContainerService)).To(Equal("127.0.0.1")) + }) + It("given there is n fqdn, it returns the fqdn", func() { + config.ContainerService.Properties.HostedMasterProfile.FQDN = "fqdn" + Expect(GetKubernetesEndpoint(config.ContainerService)).To(Equal("fqdn")) + }) + It("given there is an ip address and a fqdn, it returns the ip address", func() { + config.ContainerService.Properties.HostedMasterProfile.IPAddress = "127.0.0.1" + config.ContainerService.Properties.HostedMasterProfile.FQDN = "fqdn" + Expect(GetKubernetesEndpoint(config.ContainerService)).To(Equal("127.0.0.1")) + }) + }) + + Describe(".getPortRangeEndValue()", func() { + It("given a port range with 2 numbers, it returns an the second number", func() { + Expect(getPortRangeEndValue("1 2")).To(Equal(2)) + }) + It("given a port range with 3 numbers, it returns an the second number", func() { + Expect(getPortRangeEndValue("1 2 3")).To(Equal(2)) + }) + }) + + Describe(".areCustomCATrustCertsPopulated()", func() { + It("given an empty profile, it returns false", func() { + Expect(areCustomCATrustCertsPopulated(*config)).To(BeFalse()) + }) + It("given no list of certs, it returns false", func() { + config.CustomCATrustConfig = &datamodel.CustomCATrustConfig{} + Expect(areCustomCATrustCertsPopulated(*config)).To(BeFalse()) + }) + It("given an empty list of certs, it returns false", func() { + config.CustomCATrustConfig = &datamodel.CustomCATrustConfig{ + CustomCATrustCerts: []string{}, + } + Expect(areCustomCATrustCertsPopulated(*config)).To(BeFalse()) + }) + It("given a single custom ca cert, it returns true", func() { + config.CustomCATrustConfig = &datamodel.CustomCATrustConfig{ + CustomCATrustCerts: []string{"mock cert value"}, + } + Expect(areCustomCATrustCertsPopulated(*config)).To(BeTrue()) + }) + It("given 4 custom ca certs, it returns true", func() { + config.CustomCATrustConfig = &datamodel.CustomCATrustConfig{ + CustomCATrustCerts: []string{"cert1", "cert2", "cert3", "cert4"}, + } + Expect(areCustomCATrustCertsPopulated(*config)).To(BeTrue()) + }) + }) + + Describe(".isMariner()", func() { + It("given an empty string, that is not mariner", func() { + Expect(isMariner("")).To(BeFalse()) + }) + It("given datamodel.OSSKUCBLMariner, that is mariner", func() { + // test the actual string because this data is posted to agentbaker and we want to check a particular posted string + // is mariner - rather than the value of our internal const is mariner. + Expect(isMariner("CBLMariner")).To(BeTrue()) + }) + It("given datamodel.OSSKUMariner, that is mariner", func() { + // test the actual string because this data is posted to agentbaker and we want to check a particular posted string + // is mariner - rather than the value of our internal const is mariner. + Expect(isMariner("Mariner")).To(BeTrue()) + }) + It("given datamodel.OSSKUAzureLinux, that is mariner", func() { + // test the actual string because this data is posted to agentbaker and we want to check a particular posted string + // is mariner - rather than the value of our internal const is mariner. + Expect(isMariner("AzureLinux")).To(BeTrue()) + }) + It("given ubuntu, that is not mariner", func() { + Expect(isMariner("Ubuntu")).To(BeFalse()) + }) + }) + }) + DescribeTable("Generated customData and CSE", func(folder, k8sVersion string, configUpdator func(*datamodel.NodeBootstrappingConfiguration), validator outputValidator) { cs := &datamodel.ContainerService{ diff --git a/pkg/agent/bakerapi.go b/pkg/agent/bakerapi.go index af04f64c561..beca65ad53d 100644 --- a/pkg/agent/bakerapi.go +++ b/pkg/agent/bakerapi.go @@ -5,6 +5,8 @@ package agent import ( "context" + "encoding/base64" + "encoding/json" "fmt" "github.com/Azure/agentbaker/pkg/agent/datamodel" @@ -85,6 +87,32 @@ func (agentBaker *agentBakerImpl) GetNodeBootstrapping(ctx context.Context, conf return nodeBootstrapping, nil } +func (agentBaker *agentBakerImpl) GetNodeBootstrappingForScriptless( + ctx context.Context, + config *datamodel.NodeBootstrappingConfiguration, +) (*datamodel.NodeBootstrapping, error) { + // TODO: add windows support + if config.AgentPoolProfile.IsWindows() { + return agentBaker.GetNodeBootstrapping(ctx, config) + } + config.Version = "v0" + nodeBootstrapping, err := agentBaker.GetNodeBootstrapping(ctx, config) + if err != nil { + return nil, err + } + configJSON, err := json.Marshal(config) + if err != nil { + return nil, fmt.Errorf("failed to marshal nbc, error: %w", err) + } + //nolint: lll + nodeBootstrapping.CSE = fmt.Sprintf( + `bash -c "(echo '%s' | base64 -d > config.json && mkdir -p /var/log/azure && /opt/azure/node-bootstrapper provision --provision-config=config.json) > /var/log/azure/node-bootstrapper.log 2>&1"`, + base64.StdEncoding.EncodeToString(configJSON), + ) // TODO: simplify this) + nodeBootstrapping.CustomData = "" + return nodeBootstrapping, nil +} + func (agentBaker *agentBakerImpl) GetLatestSigImageConfig(sigConfig datamodel.SIGConfig, distro datamodel.Distro, envInfo *datamodel.EnvironmentInfo) (*datamodel.SigImageConfig, error) { sigAzureEnvironmentSpecConfig, err := datamodel.GetSIGAzureCloudSpecConfig(sigConfig, envInfo.Region) diff --git a/pkg/agent/datamodel/types.go b/pkg/agent/datamodel/types.go index 576e337997d..eb0a30f4656 100644 --- a/pkg/agent/datamodel/types.go +++ b/pkg/agent/datamodel/types.go @@ -1743,6 +1743,9 @@ type NodeBootstrappingConfiguration struct { // CNI, which will overwrite the `filter` table so that we can only insert to `mangle` table to avoid // our added rule is overwritten by Cilium. InsertIMDSRestrictionRuleToMangleTable bool + + // Version is required for node-bootstrapper application to determine the version of the config file. + Version string } type SSHStatus int diff --git a/pkg/agent/datamodel/types_test.go b/pkg/agent/datamodel/types_test.go index 9b2a5913c43..aa37d8382e3 100644 --- a/pkg/agent/datamodel/types_test.go +++ b/pkg/agent/datamodel/types_test.go @@ -9,6 +9,7 @@ import ( "testing" "github.com/Azure/go-autorest/autorest/to" + "github.com/stretchr/testify/assert" ) const ( @@ -40,15 +41,14 @@ func TestHasAadProfile(t *testing.T) { } func TestGetCustomEnvironmentJSON(t *testing.T) { - properities := getMockProperitesWithCustomClouEnv() + properties := getMockProperitesWithCustomClouEnv() expectedRet := `{"name":"AzureStackCloud","Name":"AzureStackCloud","mcrURL":"mcr.microsoft.fakecustomcloud","repoDepotEndpoint":"https://repodepot.azure.microsoft.fakecustomcloud/ubuntu","managementPortalURL":"https://portal.azure.microsoft.fakecustomcloud/","serviceManagementEndpoint":"https://management.core.microsoft.fakecustomcloud/","resourceManagerEndpoint":"https://management.azure.microsoft.fakecustomcloud/","activeDirectoryEndpoint":"https://login.microsoftonline.microsoft.fakecustomcloud/","keyVaultEndpoint":"https://vault.cloudapi.microsoft.fakecustomcloud/","graphEndpoint":"https://graph.cloudapi.microsoft.fakecustomcloud/","storageEndpointSuffix":"core.microsoft.fakecustomcloud","sqlDatabaseDNSSuffix":"database.cloudapi.microsoft.fakecustomcloud","keyVaultDNSSuffix":"vault.cloudapi.microsoft.fakecustomcloud","resourceManagerVMDNSSuffix":"cloudapp.azure.microsoft.fakecustomcloud/","containerRegistryDNSSuffix":".azurecr.microsoft.fakecustomcloud","cosmosDBDNSSuffix":"documents.core.microsoft.fakecustomcloud/","tokenAudience":"https://management.core.microsoft.fakecustomcloud/","resourceIdentifiers":{}}` //nolint: lll - actual, err := properities.GetCustomEnvironmentJSON(false) + actual, err := properties.GetCustomEnvironmentJSON(false) if err != nil { t.Error(err) } - if expectedRet != actual { - t.Errorf("Expected GetCustomEnvironmentJSON() to return %s, but got %s . ", expectedRet, actual) - } + + assert.JSONEq(t, expectedRet, actual) } func TestPropertiesIsIPMasqAgentDisabled(t *testing.T) { @@ -2128,7 +2128,7 @@ func TestGetAddonByName(t *testing.T) { } addon := c.GetAddonByName(containerMonitoringAddonName) - if addon.Config == nil || len(addon.Config) == 0 { + if len(addon.Config) == 0 { t.Fatalf("KubernetesConfig.IsContainerMonitoringAddonEnabled() should have addon config instead returned null or empty") } @@ -2164,7 +2164,7 @@ func TestGetAddonByName(t *testing.T) { } addon = c.GetAddonByName(containerMonitoringAddonName) - if addon.Config == nil || len(addon.Config) == 0 { + if len(addon.Config) == 0 { t.Fatalf("KubernetesConfig.IsContainerMonitoringAddonEnabled() should have addon config instead returned null or empty") } diff --git a/pkg/agent/datamodel/versions_test.go b/pkg/agent/datamodel/versions_test.go index 0d04b826870..f4945ed301b 100644 --- a/pkg/agent/datamodel/versions_test.go +++ b/pkg/agent/datamodel/versions_test.go @@ -474,13 +474,13 @@ func TestGetMinMaxVersion(t *testing.T) { c := c t.Run(c.name, func(t *testing.T) { t.Parallel() - min := GetMinVersion(c.versions, c.preRelease) - if min != c.expectedMin { - t.Errorf("GetMinVersion returned the wrong min version, expected %s, got %s", c.expectedMin, min) + minVersion := GetMinVersion(c.versions, c.preRelease) + if minVersion != c.expectedMin { + t.Errorf("GetMinVersion returned the wrong min version, expected %s, got %s", c.expectedMin, minVersion) } - max := GetMaxVersion(c.versions, c.preRelease) - if max != c.expectedMax { - t.Errorf("GetMaxVersion returned the wrong max version, expected %s, got %s", c.expectedMax, max) + maxVersion := GetMaxVersion(c.versions, c.preRelease) + if maxVersion != c.expectedMax { + t.Errorf("GetMaxVersion returned the wrong max version, expected %s, got %s", c.expectedMax, maxVersion) } }) } diff --git a/pkg/agent/utils.go b/pkg/agent/utils.go index ba545ccabf0..35f6bc58123 100644 --- a/pkg/agent/utils.go +++ b/pkg/agent/utils.go @@ -9,6 +9,7 @@ import ( "encoding/base64" "encoding/json" "fmt" + "math" "regexp" "sort" "strconv" @@ -323,8 +324,11 @@ func getCustomDataFromJSON(jsonStr string) string { // GetOrderedKubeletConfigFlagString returns an ordered string of key/val pairs. // copied from AKS-Engine and filter out flags that already translated to config file. -func GetOrderedKubeletConfigFlagString(k map[string]string, cs *datamodel.ContainerService, profile *datamodel.AgentPoolProfile, - kubeletConfigFileToggleEnabled bool) string { +func GetOrderedKubeletConfigFlagString(config *datamodel.NodeBootstrappingConfiguration) string { + k := config.KubeletConfig + cs := config.ContainerService + profile := config.AgentPoolProfile + kubeletConfigFileToggleEnabled := config.EnableKubeletConfigFile /* NOTE(mainred): kubeConfigFile now relies on CustomKubeletConfig, while custom configuration is not compatible with CustomKubeletConfig. When custom configuration is set we want to override every configuration with the customized one. */ @@ -432,7 +436,7 @@ func IsKubeletServingCertificateRotationEnabled(config *datamodel.NodeBootstrapp return config.KubeletConfig["--rotate-server-certificates"] == "true" } -func getAgentKubernetesLabels(profile *datamodel.AgentPoolProfile, config *datamodel.NodeBootstrappingConfiguration) string { +func GetAgentKubernetesLabels(profile *datamodel.AgentPoolProfile, config *datamodel.NodeBootstrappingConfiguration) string { var labels string if profile != nil { labels = profile.GetKubernetesLabels() @@ -612,6 +616,9 @@ func strToBoolPtr(str string) *bool { func strToInt32(str string) int32 { i, _ := strconv.ParseInt(str, 10, 32) + if i > math.MaxInt32 { + panic("Unable to cast int parsed as 32 bits to 32 bit int. Yeah, this shouldn't happen") + } return int32(i) } diff --git a/pkg/agent/utils_test.go b/pkg/agent/utils_test.go index 37d920665a9..48dc1210e61 100644 --- a/pkg/agent/utils_test.go +++ b/pkg/agent/utils_test.go @@ -544,7 +544,7 @@ func TestGetAgentKubernetesLabels(t *testing.T) { for _, c := range cases { t.Run(c.name, func(t *testing.T) { - actual := getAgentKubernetesLabels(c.profile, c.config) + actual := GetAgentKubernetesLabels(c.profile, c.config) assert.Equal(t, c.expected, actual) }) } @@ -665,74 +665,82 @@ func TestGetTLSBootstrapTokenForKubeConfig(t *testing.T) { var _ = Describe("Test GetOrderedKubeletConfigFlagString", func() { It("should return expected kubelet config when custom configuration is not set", func() { - cs := &datamodel.ContainerService{ - Location: "southcentralus", - Type: "Microsoft.ContainerService/ManagedClusters", - Properties: &datamodel.Properties{}, - } - k := map[string]string{ - "--node-status-update-frequency": "10s", - "--node-status-report-frequency": "5m0s", - "--image-gc-high-threshold": "85", - "--event-qps": "0", + config := &datamodel.NodeBootstrappingConfiguration{ + KubeletConfig: map[string]string{ + "--node-status-update-frequency": "10s", + "--node-status-report-frequency": "5m0s", + "--image-gc-high-threshold": "85", + "--event-qps": "0", + }, + ContainerService: &datamodel.ContainerService{ + Location: "southcentralus", + Type: "Microsoft.ContainerService/ManagedClusters", + Properties: &datamodel.Properties{}, + }, + EnableKubeletConfigFile: false, + AgentPoolProfile: &datamodel.AgentPoolProfile{}, } - ap := &datamodel.AgentPoolProfile{} - + actucalStr := GetOrderedKubeletConfigFlagString(config) expectStr := "--event-qps=0 --image-gc-high-threshold=85 --node-status-update-frequency=10s " - actucalStr := GetOrderedKubeletConfigFlagString(k, cs, ap, false) Expect(expectStr).To(Equal(actucalStr)) }) It("should return expected kubelet config when custom configuration is set", func() { - cs := &datamodel.ContainerService{ - Location: "southcentralus", - Type: "Microsoft.ContainerService/ManagedClusters", - Properties: &datamodel.Properties{ - CustomConfiguration: &datamodel.CustomConfiguration{ - KubernetesConfigurations: map[string]*datamodel.ComponentConfiguration{ - "kubelet": { - Config: map[string]string{ - "--node-status-update-frequency": "20s", - "--streaming-connection-idle-timeout": "4h0m0s", + config := &datamodel.NodeBootstrappingConfiguration{ + KubeletConfig: map[string]string{ + "--node-status-update-frequency": "10s", + "--node-status-report-frequency": "10s", + "--image-gc-high-threshold": "85", + "--event-qps": "0", + }, + ContainerService: &datamodel.ContainerService{ + Location: "southcentralus", + Type: "Microsoft.ContainerService/ManagedClusters", + Properties: &datamodel.Properties{ + CustomConfiguration: &datamodel.CustomConfiguration{ + KubernetesConfigurations: map[string]*datamodel.ComponentConfiguration{ + "kubelet": { + Config: map[string]string{ + "--node-status-update-frequency": "20s", + "--streaming-connection-idle-timeout": "4h0m0s", + }, }, }, }, }, }, + EnableKubeletConfigFile: false, + AgentPoolProfile: &datamodel.AgentPoolProfile{}, } - k := map[string]string{ - "--node-status-update-frequency": "10s", - "--node-status-report-frequency": "10s", - "--image-gc-high-threshold": "85", - "--event-qps": "0", - } - ap := &datamodel.AgentPoolProfile{} expectStr := "--event-qps=0 --image-gc-high-threshold=85 --node-status-update-frequency=20s --streaming-connection-idle-timeout=4h0m0s " - actucalStr := GetOrderedKubeletConfigFlagString(k, cs, ap, false) + actucalStr := GetOrderedKubeletConfigFlagString(config) Expect(expectStr).To(Equal(actucalStr)) }) It("should return expected kubelet command line flags when a config file is being used", func() { - cs := &datamodel.ContainerService{ - Location: "southcentralus", - Type: "Microsoft.ContainerService/ManagedClusters", - Properties: &datamodel.Properties{ - OrchestratorProfile: &datamodel.OrchestratorProfile{ - OrchestratorType: "Kubernetes", - OrchestratorVersion: "1.22.11", + config := &datamodel.NodeBootstrappingConfiguration{ + KubeletConfig: map[string]string{ + "--node-labels": "topology.kubernetes.io/region=southcentralus", + "--node-status-update-frequency": "10s", + "--node-status-report-frequency": "5m0s", + "--image-gc-high-threshold": "85", + "--event-qps": "0", + }, + ContainerService: &datamodel.ContainerService{ + Location: "southcentralus", + Type: "Microsoft.ContainerService/ManagedClusters", + Properties: &datamodel.Properties{ + OrchestratorProfile: &datamodel.OrchestratorProfile{ + OrchestratorType: "Kubernetes", + OrchestratorVersion: "1.22.11", + }, }, }, - } - k := map[string]string{ - "--node-labels": "topology.kubernetes.io/region=southcentralus", - "--node-status-update-frequency": "10s", - "--node-status-report-frequency": "5m0s", - "--image-gc-high-threshold": "85", - "--event-qps": "0", + EnableKubeletConfigFile: true, + AgentPoolProfile: &datamodel.AgentPoolProfile{}, } - ap := &datamodel.AgentPoolProfile{} expectedStr := "--node-labels=topology.kubernetes.io/region=southcentralus " - actualStr := GetOrderedKubeletConfigFlagString(k, cs, ap, true) + actualStr := GetOrderedKubeletConfigFlagString(config) Expect(expectedStr).To(Equal(actualStr)) }) }) diff --git a/vhdbuilder/packer/packer_source.sh b/vhdbuilder/packer/packer_source.sh index 834cff0024c..a57c7b0887b 100644 --- a/vhdbuilder/packer/packer_source.sh +++ b/vhdbuilder/packer/packer_source.sh @@ -125,6 +125,8 @@ copyPackerFiles() { VHD_CLEANUP_SCRIPT_DEST=/opt/azure/containers/cleanup-vhd.sh CONTAINER_IMAGE_PREFETCH_SCRIPT_SRC=/home/packer/prefetch.sh CONTAINER_IMAGE_PREFETCH_SCRIPT_DEST=/opt/azure/containers/prefetch.sh + NODE_BOOTSTRAPPER_SRC=/home/packer/node-bootstrapper + NODE_BOOTSTRAPPER_DEST=/opt/azure/node-bootstrapper CSE_REDACT_SRC=/home/packer/cse_redact_cloud_config.py CSE_REDACT_DEST=/opt/azure/containers/provision_redact_cloud_config.py @@ -357,6 +359,7 @@ copyPackerFiles() { # Copy the generated CNI prefetch script to the appropriate location so AIB can invoke it later cpAndMode $CONTAINER_IMAGE_PREFETCH_SCRIPT_SRC $CONTAINER_IMAGE_PREFETCH_SCRIPT_DEST 644 + cpAndMode $NODE_BOOTSTRAPPER_SRC $NODE_BOOTSTRAPPER_DEST 755 } cpAndMode() { diff --git a/vhdbuilder/packer/vhd-image-builder-arm64-gen2.json b/vhdbuilder/packer/vhd-image-builder-arm64-gen2.json index bdedacfa970..ca2e2f116d6 100644 --- a/vhdbuilder/packer/vhd-image-builder-arm64-gen2.json +++ b/vhdbuilder/packer/vhd-image-builder-arm64-gen2.json @@ -89,6 +89,11 @@ "sudo chown -R $USER /opt/certs" ] }, + { + "type": "file", + "source": "node-bootstrapper/dist/node-bootstrapper-linux-arm64", + "destination": "/home/packer/node-bootstrapper" + }, { "type": "file", "source": "vhdbuilder/lister/bin/lister", diff --git a/vhdbuilder/packer/vhd-image-builder-base.json b/vhdbuilder/packer/vhd-image-builder-base.json index dca148a4c0e..25defe1330e 100644 --- a/vhdbuilder/packer/vhd-image-builder-base.json +++ b/vhdbuilder/packer/vhd-image-builder-base.json @@ -91,6 +91,11 @@ "sudo chown -R $USER /opt/certs" ] }, + { + "type": "file", + "source": "node-bootstrapper/dist/node-bootstrapper-linux-amd64", + "destination": "/home/packer/node-bootstrapper" + }, { "type": "file", "source": "vhdbuilder/lister/bin/lister", diff --git a/vhdbuilder/packer/vhd-image-builder-mariner-arm64.json b/vhdbuilder/packer/vhd-image-builder-mariner-arm64.json index 67f67950326..15ed931ea9e 100644 --- a/vhdbuilder/packer/vhd-image-builder-mariner-arm64.json +++ b/vhdbuilder/packer/vhd-image-builder-mariner-arm64.json @@ -88,6 +88,11 @@ "sudo chown -R $USER /opt/certs" ] }, + { + "type": "file", + "source": "node-bootstrapper/dist/node-bootstrapper-linux-arm64", + "destination": "/home/packer/node-bootstrapper" + }, { "type": "file", "source": "vhdbuilder/lister/bin/lister", diff --git a/vhdbuilder/packer/vhd-image-builder-mariner.json b/vhdbuilder/packer/vhd-image-builder-mariner.json index aec2ba079c5..27013775f14 100644 --- a/vhdbuilder/packer/vhd-image-builder-mariner.json +++ b/vhdbuilder/packer/vhd-image-builder-mariner.json @@ -90,6 +90,11 @@ "sudo chown -R $USER /opt/certs" ] }, + { + "type": "file", + "source": "node-bootstrapper/dist/node-bootstrapper-linux-amd64", + "destination": "/home/packer/node-bootstrapper" + }, { "type": "file", "source": "vhdbuilder/lister/bin/lister",