From fa5b416264c8ea7cc389603833c9f131a2331f97 Mon Sep 17 00:00:00 2001
From: Ilya Alekseyev <ilya.alekseyev@nutanix.com>
Date: Tue, 13 Aug 2024 14:43:57 +0000
Subject: [PATCH] Initial GPU support implementation

---
 pkg/api/v1alpha1/nutanixmachineconfig.go      |  28 +
 .../v1alpha1/nutanixmachineconfig_types.go    |   4 +
 pkg/constants/constants.go                    |   2 +
 pkg/providers/nutanix/client.go               |   1 +
 pkg/providers/nutanix/config/md-template.yaml |  12 +
 pkg/providers/nutanix/mocks/client.go         |  15 +
 pkg/providers/nutanix/provider_test.go        |   1 +
 pkg/providers/nutanix/template.go             |   4 +
 pkg/providers/nutanix/template_test.go        |  46 ++
 .../nutanix/testdata/eksa-cluster-gpus.yaml   |  75 ++
 .../testdata/expected_results_gpus.yaml       | 611 +++++++++++++++++
 .../testdata/expected_results_gpus_md.yaml    |  86 +++
 pkg/providers/nutanix/validator.go            | 278 +++++++-
 pkg/providers/nutanix/validator_test.go       | 644 ++++++++++++++++++
 14 files changed, 1800 insertions(+), 7 deletions(-)
 create mode 100644 pkg/providers/nutanix/testdata/eksa-cluster-gpus.yaml
 create mode 100644 pkg/providers/nutanix/testdata/expected_results_gpus.yaml
 create mode 100644 pkg/providers/nutanix/testdata/expected_results_gpus_md.yaml

diff --git a/pkg/api/v1alpha1/nutanixmachineconfig.go b/pkg/api/v1alpha1/nutanixmachineconfig.go
index 66b59aaca465..00e09101a05f 100644
--- a/pkg/api/v1alpha1/nutanixmachineconfig.go
+++ b/pkg/api/v1alpha1/nutanixmachineconfig.go
@@ -10,10 +10,17 @@ import (
 // NutanixIdentifierType is an enumeration of different resource identifier types.
 type NutanixIdentifierType string
 
+// NutanixGPUIdentifierType is an enumeration of different GPU identifier types.
+type NutanixGPUIdentifierType string
+
 func (c NutanixIdentifierType) String() string {
 	return string(c)
 }
 
+func (c NutanixGPUIdentifierType) String() string {
+	return string(c)
+}
+
 const (
 	// NutanixMachineConfigKind is the kind for a NutanixMachineConfig.
 	NutanixMachineConfigKind = "NutanixMachineConfig"
@@ -23,6 +30,11 @@ const (
 	// NutanixIdentifierName is a resource identifier identifying the object by Name.
 	NutanixIdentifierName NutanixIdentifierType = "name"
 
+	// NutanixGPUIdentifierDeviceID is a GPU identifier identifying the object by DeviceID.
+	NutanixGPUIdentifierDeviceID NutanixGPUIdentifierType = "deviceID"
+	// NutanixGPUIdentifierName is a GPU identifier identifying the object by Name.
+	NutanixGPUIdentifierName NutanixGPUIdentifierType = "name"
+
 	defaultNutanixOSFamily         = Ubuntu
 	defaultNutanixSystemDiskSizeGi = "40Gi"
 	defaultNutanixMemorySizeGi     = "4Gi"
@@ -62,6 +74,22 @@ type NutanixCategoryIdentifier struct {
 	Value string `json:"value,omitempty"`
 }
 
+// NutanixGPUIdentifier holds VM GPU device configuration.
+type NutanixGPUIdentifier struct {
+	// deviceID is the device ID of the GPU device.
+	// +optional
+	DeviceID *int64 `json:"deviceID,omitempty"`
+
+	// vendorID is the vendor ID of the GPU device.
+	// +optional
+	Name string `json:"name,omitempty"`
+
+	// type is the type of the GPU device.
+	// +kubebuilder:validation:Required
+	// +kubebuilder:validation:Enum:=deviceID;name
+	Type NutanixGPUIdentifierType `json:"type"`
+}
+
 // NutanixMachineConfigGenerateOpt is a functional option that can be passed to NewNutanixMachineConfigGenerate to
 // customize the generated machine config
 //
diff --git a/pkg/api/v1alpha1/nutanixmachineconfig_types.go b/pkg/api/v1alpha1/nutanixmachineconfig_types.go
index 3a696f3b3fc2..d63dcb8939df 100644
--- a/pkg/api/v1alpha1/nutanixmachineconfig_types.go
+++ b/pkg/api/v1alpha1/nutanixmachineconfig_types.go
@@ -59,6 +59,10 @@ type NutanixMachineConfigSpec struct {
 	// Categories must be created in Prism Central before they can be used.
 	// +kubebuilder:validation:Optional
 	AdditionalCategories []NutanixCategoryIdentifier `json:"additionalCategories,omitempty"`
+
+	// List of GPU devices that should be added to the VMs.
+	// +kubebuilder:validation:Optional
+	GPUs []NutanixGPUIdentifier `json:"gpus,omitempty"`
 }
 
 // SetDefaults sets defaults to NutanixMachineConfig if user has not provided.
diff --git a/pkg/constants/constants.go b/pkg/constants/constants.go
index 3eb41b91e27f..d71726d4ea7e 100644
--- a/pkg/constants/constants.go
+++ b/pkg/constants/constants.go
@@ -90,6 +90,8 @@ const (
 	ConfigMapKind          = "ConfigMap"
 	ClusterResourceSetKind = "ClusterResourceSet"
 
+	NutanixMachineConfigKind = "NutanixMachineConfig"
+
 	BottlerocketDefaultUser = "ec2-user"
 	UbuntuDefaultUser       = "capv"
 
diff --git a/pkg/providers/nutanix/client.go b/pkg/providers/nutanix/client.go
index 4de7b5e52ffb..286fd66b7cc8 100644
--- a/pkg/providers/nutanix/client.go
+++ b/pkg/providers/nutanix/client.go
@@ -8,6 +8,7 @@ import (
 
 type Client interface {
 	GetSubnet(ctx context.Context, uuid string) (*v3.SubnetIntentResponse, error)
+	ListAllHost(ctx context.Context) (*v3.HostListResponse, error)
 	ListSubnet(ctx context.Context, getEntitiesRequest *v3.DSMetadata) (*v3.SubnetListIntentResponse, error)
 	GetImage(ctx context.Context, uuid string) (*v3.ImageIntentResponse, error)
 	ListImage(ctx context.Context, getEntitiesRequest *v3.DSMetadata) (*v3.ImageListIntentResponse, error)
diff --git a/pkg/providers/nutanix/config/md-template.yaml b/pkg/providers/nutanix/config/md-template.yaml
index 4d7717cc2ebc..8cb017650596 100644
--- a/pkg/providers/nutanix/config/md-template.yaml
+++ b/pkg/providers/nutanix/config/md-template.yaml
@@ -94,6 +94,18 @@ spec:
           value: "{{ .Value }}"
 {{- end }}
 {{- end }}
+{{- if .GPUs }}
+      gpus:
+{{- range .GPUs }}
+{{- if (eq .Type "deviceID") }}
+        - type: deviceID
+          deviceID: {{ .DeviceID }}
+{{- else if (eq .Type "name") }}
+        - type: name
+          name: "{{ .Name }}"
+{{- end }}
+{{- end }}
+{{- end }}
 ---
 apiVersion: bootstrap.cluster.x-k8s.io/v1beta1
 kind: KubeadmConfigTemplate
diff --git a/pkg/providers/nutanix/mocks/client.go b/pkg/providers/nutanix/mocks/client.go
index 0e3cea843469..796e0ea723ec 100644
--- a/pkg/providers/nutanix/mocks/client.go
+++ b/pkg/providers/nutanix/mocks/client.go
@@ -155,6 +155,21 @@ func (mr *MockClientMockRecorder) GetSubnet(ctx, uuid interface{}) *gomock.Call
 	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetSubnet", reflect.TypeOf((*MockClient)(nil).GetSubnet), ctx, uuid)
 }
 
+// ListAllHost mocks base method.
+func (m *MockClient) ListAllHost(ctx context.Context) (*v3.HostListResponse, error) {
+	m.ctrl.T.Helper()
+	ret := m.ctrl.Call(m, "ListAllHost", ctx)
+	ret0, _ := ret[0].(*v3.HostListResponse)
+	ret1, _ := ret[1].(error)
+	return ret0, ret1
+}
+
+// ListAllHost indicates an expected call of ListAllHost.
+func (mr *MockClientMockRecorder) ListAllHost(ctx interface{}) *gomock.Call {
+	mr.mock.ctrl.T.Helper()
+	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "ListAllHost", reflect.TypeOf((*MockClient)(nil).ListAllHost), ctx)
+}
+
 // ListCategories mocks base method.
 func (m *MockClient) ListCategories(ctx context.Context, getEntitiesRequest *v3.CategoryListMetadata) (*v3.CategoryKeyListResponse, error) {
 	m.ctrl.T.Helper()
diff --git a/pkg/providers/nutanix/provider_test.go b/pkg/providers/nutanix/provider_test.go
index 5dd4bab83bbe..edd67448282c 100644
--- a/pkg/providers/nutanix/provider_test.go
+++ b/pkg/providers/nutanix/provider_test.go
@@ -418,6 +418,7 @@ func TestNutanixProviderSetupAndValidateCreate(t *testing.T) {
 		},
 	}
 	mockClient.EXPECT().ListImage(gomock.Any(), gomock.Any()).Return(images, nil).AnyTimes()
+	mockClient.EXPECT().ListAllHost(gomock.Any()).Return(fakeHostList(), nil).AnyTimes()
 	mockCertValidator := mockCrypto.NewMockTlsValidator(ctrl)
 	mockCertValidator.EXPECT().ValidateCert(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil)
 	mockCertValidator.EXPECT().ValidateCert(gomock.Any(), gomock.Any(), gomock.Any()).Return(errors.New("invalid cert"))
diff --git a/pkg/providers/nutanix/template.go b/pkg/providers/nutanix/template.go
index 8a52b0ecd093..507d2ec32c47 100644
--- a/pkg/providers/nutanix/template.go
+++ b/pkg/providers/nutanix/template.go
@@ -415,6 +415,10 @@ func buildTemplateMapMD(clusterSpec *cluster.Spec, workerNodeGroupMachineSpec v1
 		values["additionalCategories"] = workerNodeGroupMachineSpec.AdditionalCategories
 	}
 
+	if len(workerNodeGroupMachineSpec.GPUs) > 0 {
+		values["GPUs"] = workerNodeGroupMachineSpec.GPUs
+	}
+
 	if workerNodeGroupConfiguration.KubeletConfiguration != nil {
 		wnKubeletConfig := workerNodeGroupConfiguration.KubeletConfiguration.Object
 		if _, ok := wnKubeletConfig["tlsCipherSuites"]; !ok {
diff --git a/pkg/providers/nutanix/template_test.go b/pkg/providers/nutanix/template_test.go
index 40a59ae613d7..71045105a515 100644
--- a/pkg/providers/nutanix/template_test.go
+++ b/pkg/providers/nutanix/template_test.go
@@ -726,6 +726,52 @@ func TestTemplateBuilderFailureDomains(t *testing.T) {
 	}
 }
 
+func TestTemplateBuilderGPUs(t *testing.T) {
+	for _, tc := range []struct {
+		Input    string
+		Output   string
+		OutputMD string
+	}{
+		{
+			Input:    "testdata/eksa-cluster-gpus.yaml",
+			Output:   "testdata/expected_results_gpus.yaml",
+			OutputMD: "testdata/expected_results_gpus_md.yaml",
+		},
+	} {
+		clusterSpec := test.NewFullClusterSpec(t, tc.Input)
+
+		machineCfg := clusterSpec.NutanixMachineConfig(clusterSpec.Cluster.Spec.ControlPlaneConfiguration.MachineGroupRef.Name)
+		workerConfs := map[string]anywherev1.NutanixMachineConfigSpec{
+			"eksa-unit-test": machineCfg.Spec,
+		}
+
+		t.Setenv(constants.EksaNutanixUsernameKey, "admin")
+		t.Setenv(constants.EksaNutanixPasswordKey, "password")
+		creds := GetCredsFromEnv()
+
+		bldr := NewNutanixTemplateBuilder(&clusterSpec.NutanixDatacenter.Spec, &machineCfg.Spec, &machineCfg.Spec,
+			workerConfs, creds, time.Now)
+
+		cpSpec, err := bldr.GenerateCAPISpecControlPlane(clusterSpec)
+		assert.NoError(t, err)
+		assert.NotNil(t, cpSpec)
+		test.AssertContentToFile(t, string(cpSpec), tc.Output)
+
+		workloadTemplateNames := map[string]string{
+			"eksa-unit-test": "eksa-unit-test",
+		}
+		kubeadmconfigTemplateNames := map[string]string{
+			"eksa-unit-test": "eksa-unit-test",
+		}
+
+		data, err := bldr.GenerateCAPISpecWorkers(clusterSpec, workloadTemplateNames, kubeadmconfigTemplateNames)
+
+		assert.NoError(t, err)
+
+		test.AssertContentToFile(t, string(data), tc.OutputMD)
+	}
+}
+
 func minimalNutanixConfigSpec(t *testing.T) (*anywherev1.NutanixDatacenterConfig, *anywherev1.NutanixMachineConfig, map[string]anywherev1.NutanixMachineConfigSpec) {
 	dcConf := &anywherev1.NutanixDatacenterConfig{}
 	err := yaml.Unmarshal([]byte(nutanixDatacenterConfigSpec), dcConf)
diff --git a/pkg/providers/nutanix/testdata/eksa-cluster-gpus.yaml b/pkg/providers/nutanix/testdata/eksa-cluster-gpus.yaml
new file mode 100644
index 000000000000..bcd875ebfbbd
--- /dev/null
+++ b/pkg/providers/nutanix/testdata/eksa-cluster-gpus.yaml
@@ -0,0 +1,75 @@
+apiVersion: anywhere.eks.amazonaws.com/v1alpha1
+kind: Cluster
+metadata:
+  name: eksa-unit-test
+  namespace: default
+spec:
+  kubernetesVersion: "1.19"
+  controlPlaneConfiguration:
+    name: eksa-unit-test
+    count: 3
+    endpoint:
+      host: test-ip
+    machineGroupRef:
+      name: eksa-unit-test
+      kind: NutanixMachineConfig
+  workerNodeGroupConfigurations:
+    - count: 4
+      name: eksa-unit-test
+      machineGroupRef:
+        name: eksa-unit-test
+        kind: NutanixMachineConfig
+  datacenterRef:
+    kind: NutanixDatacenterConfig
+    name: eksa-unit-test
+  clusterNetwork:
+    cni: "cilium"
+    pods:
+      cidrBlocks:
+        - 192.168.0.0/16
+    services:
+      cidrBlocks:
+        - 10.96.0.0/12
+---
+apiVersion: anywhere.eks.amazonaws.com/v1alpha1
+kind: NutanixDatacenterConfig
+metadata:
+  name: eksa-unit-test
+  namespace: default
+spec:
+  endpoint: "prism.nutanix.com"
+  port: 9440
+  credentialRef:
+    kind: Secret
+    name: "nutanix-credentials"
+---
+apiVersion: anywhere.eks.amazonaws.com/v1alpha1
+kind: NutanixMachineConfig
+metadata:
+  name: eksa-unit-test
+  namespace: default
+spec:
+  vcpusPerSocket: 1
+  vcpuSockets: 4
+  memorySize: 8Gi
+  image:
+    type: "name"
+    name: "prism-image"
+  cluster:
+    type: "name"
+    name: "prism-cluster"
+  subnet:
+    type: "name"
+    name: "prism-subnet"
+  gpus:
+  - type:     deviceID
+    deviceID: 8757
+  - type:     name
+    name:     "Ampere 40"
+  systemDiskSize: 40Gi
+  osFamily: "ubuntu"
+  users:
+    - name: "mySshUsername"
+      sshAuthorizedKeys:
+        - "mySshAuthorizedKey"
+---
diff --git a/pkg/providers/nutanix/testdata/expected_results_gpus.yaml b/pkg/providers/nutanix/testdata/expected_results_gpus.yaml
new file mode 100644
index 000000000000..e9196e6783b2
--- /dev/null
+++ b/pkg/providers/nutanix/testdata/expected_results_gpus.yaml
@@ -0,0 +1,611 @@
+apiVersion: infrastructure.cluster.x-k8s.io/v1beta1
+kind: NutanixCluster
+metadata:
+  name: "eksa-unit-test"
+  namespace: "eksa-system"
+spec:
+  failureDomains: []
+  prismCentral:
+    address: "prism.nutanix.com"
+    port: 9440
+    insecure: false
+    credentialRef:
+      name: "capx-eksa-unit-test"
+      kind: Secret
+  controlPlaneEndpoint:
+    host: "test-ip"
+    port: 6443
+---
+apiVersion: cluster.x-k8s.io/v1beta1
+kind: Cluster
+metadata:
+  labels:
+    cluster.x-k8s.io/cluster-name: "eksa-unit-test"
+  name: "eksa-unit-test"
+  namespace: "eksa-system"
+spec:
+  clusterNetwork:
+    services:
+      cidrBlocks: [10.96.0.0/12]
+    pods:
+      cidrBlocks: [192.168.0.0/16]
+    serviceDomain: "cluster.local"
+  controlPlaneRef:
+    apiVersion: controlplane.cluster.x-k8s.io/v1beta1
+    kind: KubeadmControlPlane
+    name: "eksa-unit-test"
+  infrastructureRef:
+    apiVersion: infrastructure.cluster.x-k8s.io/v1beta1
+    kind: NutanixCluster
+    name: "eksa-unit-test"
+---
+apiVersion: controlplane.cluster.x-k8s.io/v1beta1
+kind: KubeadmControlPlane
+metadata:
+  name: "eksa-unit-test"
+  namespace: "eksa-system"
+spec:
+  replicas: 3
+  version: "v1.19.8-eks-1-19-4"
+  machineTemplate:
+    infrastructureRef:
+      apiVersion: infrastructure.cluster.x-k8s.io/v1beta1
+      kind: NutanixMachineTemplate
+      name: "<no value>"
+  kubeadmConfigSpec:
+    clusterConfiguration:
+      imageRepository: "public.ecr.aws/eks-distro/kubernetes"
+      apiServer:
+        certSANs:
+          - localhost
+          - 127.0.0.1
+          - 0.0.0.0
+        extraArgs:
+          cloud-provider: external
+          audit-policy-file: /etc/kubernetes/audit-policy.yaml
+          audit-log-path: /var/log/kubernetes/api-audit.log
+          audit-log-maxage: "30"
+          audit-log-maxbackup: "10"
+          audit-log-maxsize: "512"
+        extraVolumes:
+        - hostPath: /etc/kubernetes/audit-policy.yaml
+          mountPath: /etc/kubernetes/audit-policy.yaml
+          name: audit-policy
+          pathType: File
+          readOnly: true
+        - hostPath: /var/log/kubernetes
+          mountPath: /var/log/kubernetes
+          name: audit-log-dir
+          pathType: DirectoryOrCreate
+          readOnly: false
+      controllerManager:
+        extraArgs:
+          cloud-provider: external
+          enable-hostpath-provisioner: "true"
+      dns:
+        imageRepository: public.ecr.aws/eks-distro/coredns
+        imageTag: v1.8.0-eks-1-19-4
+      etcd:
+        local:
+          imageRepository: public.ecr.aws/eks-distro/etcd-io
+          imageTag: v3.4.14-eks-1-19-4
+    files:
+    - content: |
+        apiVersion: v1
+        kind: Pod
+        metadata:
+          creationTimestamp: null
+          name: kube-vip
+          namespace: kube-system
+        spec:
+          containers:
+            - name: kube-vip
+              image: 
+              imagePullPolicy: IfNotPresent
+              args:
+                - manager
+              env:
+                - name: vip_arp
+                  value: "true"
+                - name: address
+                  value: "test-ip"
+                - name: port
+                  value: "6443"
+                - name: vip_cidr
+                  value: "32"
+                - name: cp_enable
+                  value: "true"
+                - name: cp_namespace
+                  value: kube-system
+                - name: vip_ddns
+                  value: "false"
+                - name: vip_leaderelection
+                  value: "true"
+                - name: vip_leaseduration
+                  value: "15"
+                - name: vip_renewdeadline
+                  value: "10"
+                - name: vip_retryperiod
+                  value: "2"
+                - name: svc_enable
+                  value: "false"
+                - name: lb_enable
+                  value: "false"
+              securityContext:
+                capabilities:
+                  add:
+                    - NET_ADMIN
+                    - SYS_TIME
+                    - NET_RAW
+              volumeMounts:
+                - mountPath: /etc/kubernetes/admin.conf
+                  name: kubeconfig
+              resources: {}
+          hostNetwork: true
+          volumes:
+            - name: kubeconfig
+              hostPath:
+                type: FileOrCreate
+                path: /etc/kubernetes/admin.conf
+        status: {}
+      owner: root:root
+      path: /etc/kubernetes/manifests/kube-vip.yaml
+    - content: |
+        apiVersion: audit.k8s.io/v1beta1
+        kind: Policy
+        rules:
+        # Log aws-auth configmap changes
+        - level: RequestResponse
+          namespaces: ["kube-system"]
+          verbs: ["update", "patch", "delete"]
+          resources:
+          - group: "" # core
+            resources: ["configmaps"]
+            resourceNames: ["aws-auth"]
+          omitStages:
+          - "RequestReceived"
+        # The following requests were manually identified as high-volume and low-risk,
+        # so drop them.
+        - level: None
+          users: ["system:kube-proxy"]
+          verbs: ["watch"]
+          resources:
+          - group: "" # core
+            resources: ["endpoints", "services", "services/status"]
+        - level: None
+          users: ["kubelet"] # legacy kubelet identity
+          verbs: ["get"]
+          resources:
+          - group: "" # core
+            resources: ["nodes", "nodes/status"]
+        - level: None
+          userGroups: ["system:nodes"]
+          verbs: ["get"]
+          resources:
+          - group: "" # core
+            resources: ["nodes", "nodes/status"]
+        - level: None
+          users:
+          - system:kube-controller-manager
+          - system:kube-scheduler
+          - system:serviceaccount:kube-system:endpoint-controller
+          verbs: ["get", "update"]
+          namespaces: ["kube-system"]
+          resources:
+          - group: "" # core
+            resources: ["endpoints"]
+        - level: None
+          users: ["system:apiserver"]
+          verbs: ["get"]
+          resources:
+          - group: "" # core
+            resources: ["namespaces", "namespaces/status", "namespaces/finalize"]
+        # Don't log HPA fetching metrics.
+        - level: None
+          users:
+          - system:kube-controller-manager
+          verbs: ["get", "list"]
+          resources:
+          - group: "metrics.k8s.io"
+        # Don't log these read-only URLs.
+        - level: None
+          nonResourceURLs:
+          - /healthz*
+          - /version
+          - /swagger*
+        # Don't log events requests.
+        - level: None
+          resources:
+          - group: "" # core
+            resources: ["events"]
+        # node and pod status calls from nodes are high-volume and can be large, don't log responses for expected updates from nodes
+        - level: Request
+          users: ["kubelet", "system:node-problem-detector", "system:serviceaccount:kube-system:node-problem-detector"]
+          verbs: ["update","patch"]
+          resources:
+          - group: "" # core
+            resources: ["nodes/status", "pods/status"]
+          omitStages:
+          - "RequestReceived"
+        - level: Request
+          userGroups: ["system:nodes"]
+          verbs: ["update","patch"]
+          resources:
+          - group: "" # core
+            resources: ["nodes/status", "pods/status"]
+          omitStages:
+          - "RequestReceived"
+        # deletecollection calls can be large, don't log responses for expected namespace deletions
+        - level: Request
+          users: ["system:serviceaccount:kube-system:namespace-controller"]
+          verbs: ["deletecollection"]
+          omitStages:
+          - "RequestReceived"
+        # Secrets, ConfigMaps, and TokenReviews can contain sensitive & binary data,
+        # so only log at the Metadata level.
+        - level: Metadata
+          resources:
+          - group: "" # core
+            resources: ["secrets", "configmaps"]
+          - group: authentication.k8s.io
+            resources: ["tokenreviews"]
+          omitStages:
+            - "RequestReceived"
+        - level: Request
+          resources:
+          - group: ""
+            resources: ["serviceaccounts/token"]
+        # Get repsonses can be large; skip them.
+        - level: Request
+          verbs: ["get", "list", "watch"]
+          resources:
+          - group: "" # core
+          - group: "admissionregistration.k8s.io"
+          - group: "apiextensions.k8s.io"
+          - group: "apiregistration.k8s.io"
+          - group: "apps"
+          - group: "authentication.k8s.io"
+          - group: "authorization.k8s.io"
+          - group: "autoscaling"
+          - group: "batch"
+          - group: "certificates.k8s.io"
+          - group: "extensions"
+          - group: "metrics.k8s.io"
+          - group: "networking.k8s.io"
+          - group: "policy"
+          - group: "rbac.authorization.k8s.io"
+          - group: "scheduling.k8s.io"
+          - group: "settings.k8s.io"
+          - group: "storage.k8s.io"
+          omitStages:
+          - "RequestReceived"
+        # Default level for known APIs
+        - level: RequestResponse
+          resources:
+          - group: "" # core
+          - group: "admissionregistration.k8s.io"
+          - group: "apiextensions.k8s.io"
+          - group: "apiregistration.k8s.io"
+          - group: "apps"
+          - group: "authentication.k8s.io"
+          - group: "authorization.k8s.io"
+          - group: "autoscaling"
+          - group: "batch"
+          - group: "certificates.k8s.io"
+          - group: "extensions"
+          - group: "metrics.k8s.io"
+          - group: "networking.k8s.io"
+          - group: "policy"
+          - group: "rbac.authorization.k8s.io"
+          - group: "scheduling.k8s.io"
+          - group: "settings.k8s.io"
+          - group: "storage.k8s.io"
+          omitStages:
+          - "RequestReceived"
+        # Default level for all other requests.
+        - level: Metadata
+          omitStages:
+          - "RequestReceived"
+      owner: root:root
+      path: /etc/kubernetes/audit-policy.yaml
+    initConfiguration:
+      nodeRegistration:
+        kubeletExtraArgs:
+          cloud-provider: external
+          # We have to pin the cgroupDriver to cgroupfs as kubeadm >=1.21 defaults to systemd
+          # kind will implement systemd support in: https://github.com/kubernetes-sigs/kind/issues/1726
+          #cgroup-driver: cgroupfs
+          eviction-hard: nodefs.available<0%,nodefs.inodesFree<0%,imagefs.available<0%
+          tls-cipher-suites: TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256
+    joinConfiguration:
+      nodeRegistration:
+        criSocket: /var/run/containerd/containerd.sock
+        kubeletExtraArgs:
+          cloud-provider: external
+          read-only-port: "0"
+          anonymous-auth: "false"
+          tls-cipher-suites: TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256
+        name: "{{ ds.meta_data.hostname }}"
+    users:
+      - name: "mySshUsername"
+        lockPassword: false
+        sudo: ALL=(ALL) NOPASSWD:ALL
+        sshAuthorizedKeys:
+          - "mySshAuthorizedKey"
+    preKubeadmCommands:
+      - hostnamectl set-hostname "{{ ds.meta_data.hostname }}"
+      - echo "::1         ipv6-localhost ipv6-loopback" >/etc/hosts
+      - echo "127.0.0.1   localhost" >>/etc/hosts
+      - echo "127.0.0.1   {{ ds.meta_data.hostname }}" >> /etc/hosts
+    postKubeadmCommands:
+      - echo export KUBECONFIG=/etc/kubernetes/admin.conf >> /root/.bashrc
+    useExperimentalRetryJoin: true
+---
+apiVersion: infrastructure.cluster.x-k8s.io/v1beta1
+kind: NutanixMachineTemplate
+metadata:
+  name: "<no value>"
+  namespace: "eksa-system"
+spec:
+  template:
+    spec:
+      providerID: "nutanix://eksa-unit-test-m1"
+      vcpusPerSocket: 1
+      vcpuSockets: 4
+      memorySize: 8Gi
+      systemDiskSize: 40Gi
+      image:
+        type: name
+        name: "prism-image"
+
+      cluster:
+        type: name
+        name: "prism-cluster"
+      subnet:
+        - type: name
+          name: "prism-subnet"
+---
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: eksa-unit-test-nutanix-ccm
+  namespace: "eksa-system"
+data:
+  nutanix-ccm.yaml: |
+    ---
+    apiVersion: v1
+    kind: ServiceAccount
+    metadata:
+      name: cloud-controller-manager
+      namespace: kube-system
+    ---
+    kind: ConfigMap
+    apiVersion: v1
+    metadata:
+      name: nutanix-config
+      namespace: kube-system
+    data:
+      nutanix_config.json: |-
+        {
+          "prismCentral": {
+            "address": "prism.nutanix.com",
+            "port": 9440,
+            "insecure": false,
+            "credentialRef": {
+              "kind": "secret",
+              "name": "nutanix-creds",
+              "namespace": "kube-system"
+            }
+          },
+          "enableCustomLabeling": false,
+          "topologyDiscovery": {
+            "type": "Prism"
+          }
+        }
+    ---
+    apiVersion: rbac.authorization.k8s.io/v1
+    kind: ClusterRole
+    metadata:
+      annotations:
+        rbac.authorization.kubernetes.io/autoupdate: "true"
+      name: system:cloud-controller-manager
+    rules:
+      - apiGroups:
+          - ""
+        resources:
+          - secrets
+        verbs:
+          - get
+          - list
+          - watch
+      - apiGroups:
+          - ""
+        resources:
+          - configmaps
+        verbs:
+          - get
+          - list
+          - watch
+      - apiGroups:
+          - ""
+        resources:
+          - events
+        verbs:
+          - create
+          - patch
+          - update
+      - apiGroups:
+          - ""
+        resources:
+          - nodes
+        verbs:
+          - "*"
+      - apiGroups:
+          - ""
+        resources:
+          - nodes/status
+        verbs:
+          - patch
+      - apiGroups:
+          - ""
+        resources:
+          - serviceaccounts
+        verbs:
+          - create
+      - apiGroups:
+          - ""
+        resources:
+          - endpoints
+        verbs:
+          - create
+          - get
+          - list
+          - watch
+          - update
+      - apiGroups:
+          - coordination.k8s.io
+        resources:
+          - leases
+        verbs:
+          - get
+          - list
+          - watch
+          - create
+          - update
+          - patch
+          - delete
+    ---
+    kind: ClusterRoleBinding
+    apiVersion: rbac.authorization.k8s.io/v1
+    metadata:
+      name: system:cloud-controller-manager
+    roleRef:
+      apiGroup: rbac.authorization.k8s.io
+      kind: ClusterRole
+      name: system:cloud-controller-manager
+    subjects:
+      - kind: ServiceAccount
+        name: cloud-controller-manager
+        namespace: kube-system
+    ---
+    apiVersion: apps/v1
+    kind: Deployment
+    metadata:
+      labels:
+        k8s-app: nutanix-cloud-controller-manager
+      name: nutanix-cloud-controller-manager
+      namespace: kube-system
+    spec:
+      replicas: 1
+      selector:
+        matchLabels:
+          k8s-app: nutanix-cloud-controller-manager
+      strategy:
+        type: Recreate
+      template:
+        metadata:
+          labels:
+            k8s-app: nutanix-cloud-controller-manager
+        spec:
+          hostNetwork: true
+          priorityClassName: system-cluster-critical
+          nodeSelector:
+            node-role.kubernetes.io/control-plane: ""
+          serviceAccountName: cloud-controller-manager
+          affinity:
+            podAntiAffinity:
+              requiredDuringSchedulingIgnoredDuringExecution:
+              - labelSelector:
+                  matchLabels:
+                    k8s-app: nutanix-cloud-controller-manager
+                topologyKey: kubernetes.io/hostname
+          dnsPolicy: Default
+          tolerations:
+            - effect: NoSchedule
+              key: node-role.kubernetes.io/master
+              operator: Exists
+            - effect: NoSchedule
+              key: node-role.kubernetes.io/control-plane
+              operator: Exists
+            - effect: NoExecute
+              key: node.kubernetes.io/unreachable
+              operator: Exists
+              tolerationSeconds: 120
+            - effect: NoExecute
+              key: node.kubernetes.io/not-ready
+              operator: Exists
+              tolerationSeconds: 120
+            - effect: NoSchedule
+              key: node.cloudprovider.kubernetes.io/uninitialized
+              operator: Exists
+            - effect: NoSchedule
+              key: node.kubernetes.io/not-ready
+              operator: Exists
+          containers:
+            - image: ""
+              imagePullPolicy: IfNotPresent
+              name: nutanix-cloud-controller-manager
+              env:
+                - name: POD_NAMESPACE
+                  valueFrom:
+                    fieldRef:
+                      fieldPath: metadata.namespace
+              args:
+                - "--leader-elect=true"
+                - "--cloud-config=/etc/cloud/nutanix_config.json"
+              resources:
+                requests:
+                  cpu: 100m
+                  memory: 50Mi
+              volumeMounts:
+                - mountPath: /etc/cloud
+                  name: nutanix-config-volume
+                  readOnly: true
+          volumes:
+            - name: nutanix-config-volume
+              configMap:
+                name: nutanix-config
+---
+apiVersion: addons.cluster.x-k8s.io/v1beta1
+kind: ClusterResourceSet
+metadata:
+  name: eksa-unit-test-nutanix-ccm-crs
+  namespace: "eksa-system"
+spec:
+  clusterSelector:
+    matchLabels:
+      cluster.x-k8s.io/cluster-name: "eksa-unit-test"
+  resources:
+  - kind: ConfigMap
+    name: eksa-unit-test-nutanix-ccm
+  - kind: Secret
+    name: eksa-unit-test-nutanix-ccm-secret
+  strategy: Reconcile
+---
+apiVersion: v1
+kind: Secret
+metadata:
+  name: "eksa-unit-test-nutanix-ccm-secret"
+  namespace: "eksa-system"
+stringData:
+  nutanix-ccm-secret.yaml: |
+    apiVersion: v1
+    kind: Secret
+    metadata:
+      name: nutanix-creds
+      namespace: kube-system
+    stringData:
+      credentials: |-
+        [
+          {        
+            "type": "basic_auth",
+            "data": {
+              "prismCentral": {
+                "username": "admin",
+                "password": "password"
+              },
+              "prismElements": null
+            }
+          }
+        ]
+type: addons.cluster.x-k8s.io/resource-set
diff --git a/pkg/providers/nutanix/testdata/expected_results_gpus_md.yaml b/pkg/providers/nutanix/testdata/expected_results_gpus_md.yaml
new file mode 100644
index 000000000000..07e96a0f7901
--- /dev/null
+++ b/pkg/providers/nutanix/testdata/expected_results_gpus_md.yaml
@@ -0,0 +1,86 @@
+apiVersion: cluster.x-k8s.io/v1beta1
+kind: MachineDeployment
+metadata:
+  labels:
+    cluster.x-k8s.io/cluster-name: "eksa-unit-test"
+  name: "eksa-unit-test-eksa-unit-test"
+  namespace: "eksa-system"
+spec:
+  clusterName: "eksa-unit-test"
+  replicas: 4
+  selector:
+    matchLabels: {}
+  template:
+    metadata:
+      labels:
+        cluster.x-k8s.io/cluster-name: "eksa-unit-test"
+    spec:
+      bootstrap:
+        configRef:
+          apiVersion: bootstrap.cluster.x-k8s.io/v1beta1
+          kind: KubeadmConfigTemplate
+          name: "eksa-unit-test"
+      clusterName: "eksa-unit-test"
+      infrastructureRef:
+        apiVersion: infrastructure.cluster.x-k8s.io/v1beta1
+        kind: NutanixMachineTemplate
+        name: "eksa-unit-test"
+      version: "v1.19.8-eks-1-19-4"
+---
+apiVersion: infrastructure.cluster.x-k8s.io/v1beta1
+kind: NutanixMachineTemplate
+metadata:
+  name: "eksa-unit-test"
+  namespace: "eksa-system"
+spec:
+  template:
+    spec:
+      providerID: "nutanix://eksa-unit-test-m1"
+      vcpusPerSocket: 1
+      vcpuSockets: 4
+      memorySize: 8Gi
+      systemDiskSize: 40Gi
+      image:
+        type: name
+        name: "prism-image"
+
+      cluster:
+        type: name
+        name: "prism-cluster"
+      subnet:
+        - type: name
+          name: "prism-subnet"
+      gpus:
+        - type: deviceID
+          deviceID: 8757
+        - type: name
+          name: "Ampere 40"
+---
+apiVersion: bootstrap.cluster.x-k8s.io/v1beta1
+kind: KubeadmConfigTemplate
+metadata:
+  name: "eksa-unit-test"
+  namespace: "eksa-system"
+spec:
+  template:
+    spec:
+      preKubeadmCommands:
+        - hostnamectl set-hostname "{{ ds.meta_data.hostname }}"
+      joinConfiguration:
+        nodeRegistration:
+          kubeletExtraArgs:
+            cloud-provider: external
+            # We have to pin the cgroupDriver to cgroupfs as kubeadm >=1.21 defaults to systemd
+            # kind will implement systemd support in: https://github.com/kubernetes-sigs/kind/issues/1726
+            #cgroup-driver: cgroupfs
+            eviction-hard: nodefs.available<0%,nodefs.inodesFree<0%,imagefs.available<0%
+            tls-cipher-suites: TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256
+          name: '{{ ds.meta_data.hostname }}'
+      users:
+        - name: "mySshUsername"
+          lockPassword: false
+          sudo: ALL=(ALL) NOPASSWD:ALL
+          sshAuthorizedKeys:
+            - "mySshAuthorizedKey"
+
+---
diff --git a/pkg/providers/nutanix/validator.go b/pkg/providers/nutanix/validator.go
index 827d91b20652..1e23c924292f 100644
--- a/pkg/providers/nutanix/validator.go
+++ b/pkg/providers/nutanix/validator.go
@@ -67,6 +67,10 @@ func (v *Validator) ValidateClusterSpec(ctx context.Context, spec *cluster.Spec,
 		}
 	}
 
+	if err := v.validateFreeGPU(ctx, client, spec); err != nil {
+		return err
+	}
+
 	return v.checkImageNameMatchesKubernetesVersion(ctx, spec, client)
 }
 
@@ -274,6 +278,14 @@ func (v *Validator) ValidateMachineConfig(ctx context.Context, client Client, co
 		}
 	}
 
+	if config.Spec.GPUs != nil {
+		for _, gpu := range config.Spec.GPUs {
+			if err := v.validateGPUConfig(gpu); err != nil {
+				return err
+			}
+		}
+	}
+
 	return nil
 }
 
@@ -441,6 +453,243 @@ func (v *Validator) validateAdditionalCategories(ctx context.Context, client Cli
 	return nil
 }
 
+func (v *Validator) validateGPUConfig(gpu anywherev1.NutanixGPUIdentifier) error {
+	if gpu.Type == "" {
+		return fmt.Errorf("missing GPU type")
+	}
+
+	if gpu.Type != anywherev1.NutanixGPUIdentifierDeviceID && gpu.Type != anywherev1.NutanixGPUIdentifierName {
+		return fmt.Errorf("invalid GPU identifier type: %s; valid types are: %q and %q", gpu.Type, anywherev1.NutanixGPUIdentifierDeviceID, anywherev1.NutanixGPUIdentifierName)
+	}
+
+	if gpu.Type == anywherev1.NutanixGPUIdentifierDeviceID {
+		if gpu.DeviceID == nil {
+			return fmt.Errorf("missing GPU device ID")
+		}
+	} else {
+		if gpu.Name == "" {
+			return fmt.Errorf("missing GPU name")
+		}
+	}
+
+	return nil
+}
+
+func getRequestedGPUsForAllMachines(machineCount int, requestedGpus []anywherev1.NutanixGPUIdentifier) []anywherev1.NutanixGPUIdentifier {
+	allMachinesRequestedGPUs := make([]anywherev1.NutanixGPUIdentifier, 0)
+	for i := 0; i < machineCount; i++ {
+		allMachinesRequestedGPUs = append(allMachinesRequestedGPUs, requestedGpus...)
+	}
+	return allMachinesRequestedGPUs
+}
+
+func (v *Validator) tryAssignGPUsToMachineConfig(machineCount int, requestedGpus []anywherev1.NutanixGPUIdentifier, clusterGpuList []v3.GPU, cluster anywherev1.NutanixResourceIdentifier) ([]v3.GPU, error) {
+	allMachinesRequestedGPUs := getRequestedGPUsForAllMachines(machineCount, requestedGpus)
+
+	for _, requestedGpu := range allMachinesRequestedGPUs {
+		found := -1
+		for index, gpu := range clusterGpuList {
+			if isRequestedGPUAssignable(gpu, requestedGpu) {
+				found = index
+				break
+			}
+		}
+
+		if found == -1 {
+			return nil, errorGPUNotFound(requestedGpu, cluster)
+		}
+
+		clusterGpuList = append(clusterGpuList[:found], clusterGpuList[found+1:]...)
+	}
+
+	return clusterGpuList, nil
+}
+
+func (v *Validator) isGPURequested(configs map[string]*anywherev1.NutanixMachineConfig) bool {
+	for _, machineConfig := range configs {
+		if machineConfig.Spec.GPUs != nil {
+			return true
+		}
+	}
+
+	return false
+}
+
+func (v *Validator) initAvailableGPUsMap(hosts []*v3.HostResponse) map[string][]v3.GPU {
+	availableGpu := make(map[string][]v3.GPU)
+	for _, host := range hosts {
+		if host.Status != nil &&
+			host.Status.Resources != nil &&
+			host.Status.ClusterReference != nil &&
+			host.Status.ClusterReference.UUID != "" {
+			clusterUUID := host.Status.ClusterReference.UUID
+
+			availableGpu[clusterUUID] = make([]v3.GPU, 0)
+		}
+	}
+	return availableGpu
+}
+
+func (v *Validator) getAvailableGPUs(hosts []*v3.HostResponse) (map[string][]v3.GPU, error) {
+	availableGpu := v.initAvailableGPUsMap(hosts)
+
+	for _, host := range hosts {
+		if host.Status != nil &&
+			host.Status.Resources != nil &&
+			host.Status.Resources.GPUList != nil &&
+			host.Status.ClusterReference != nil &&
+			host.Status.ClusterReference.UUID != "" {
+			clusterUUID := host.Status.ClusterReference.UUID
+
+			for _, gpu := range host.Status.Resources.GPUList {
+				availableGpu[clusterUUID] = append(availableGpu[clusterUUID], *gpu)
+			}
+		}
+	}
+
+	return availableGpu, nil
+}
+
+func (v *Validator) tryAssignGPUsToAllMachineConfigs(ctx context.Context, v3Client Client, cluster *cluster.Spec, availableGpu map[string][]v3.GPU) error {
+	configs := cluster.NutanixMachineConfigs
+	machineCount := v.getMachineCountForAllMachineConfigs(cluster)
+
+	for _, machineConfig := range configs {
+		clusterUUID, err := getClusterUUID(ctx, v3Client, machineConfig.Spec.Cluster)
+		if err != nil {
+			return err
+		}
+
+		if machineConfig.Spec.GPUs != nil {
+			if _, ok := machineCount[machineConfig.Name]; ok {
+				availableGpu[clusterUUID], err = v.tryAssignGPUsToMachineConfig(machineCount[machineConfig.Name], machineConfig.Spec.GPUs, availableGpu[clusterUUID], machineConfig.Spec.Cluster)
+				if err != nil {
+					return err
+				}
+			}
+		}
+	}
+
+	return nil
+}
+
+func (v *Validator) getMachineCountForAllMachineConfigs(clusterSpec *cluster.Spec) map[string]int {
+	machineCountMap := make(map[string]int)
+	cluster := clusterSpec.Cluster.Spec
+	if cluster.ControlPlaneConfiguration.MachineGroupRef.Kind == constants.NutanixMachineConfigKind {
+		machineCountMap[cluster.ControlPlaneConfiguration.MachineGroupRef.Name] = cluster.ControlPlaneConfiguration.Count
+	}
+
+	if cluster.ExternalEtcdConfiguration != nil &&
+		cluster.ExternalEtcdConfiguration.MachineGroupRef.Kind == constants.NutanixMachineConfigKind {
+		machineCountMap[cluster.ExternalEtcdConfiguration.MachineGroupRef.Name] = cluster.ExternalEtcdConfiguration.Count
+	}
+
+	for _, workerNodeGroupConfiguration := range cluster.WorkerNodeGroupConfigurations {
+		if workerNodeGroupConfiguration.MachineGroupRef.Kind == constants.NutanixMachineConfigKind &&
+			workerNodeGroupConfiguration.Count != nil {
+			machineCountMap[workerNodeGroupConfiguration.MachineGroupRef.Name] = *workerNodeGroupConfiguration.Count
+		}
+	}
+	return machineCountMap
+}
+
+func (v *Validator) getGPUModeMapping(hosts []*v3.HostResponse) (map[int64]string, map[string]string, error) {
+	gpuDeviceIDToMode := make(map[int64]string)
+	gpuNameToMode := make(map[string]string)
+
+	for _, host := range hosts {
+		if host.Status != nil &&
+			host.Status.Resources != nil &&
+			host.Status.Resources.GPUList != nil {
+			for _, gpu := range host.Status.Resources.GPUList {
+				if gpu.DeviceID != nil {
+					gpuDeviceIDToMode[*gpu.DeviceID] = gpu.Mode
+				}
+				if gpu.Name != "" {
+					gpuNameToMode[gpu.Name] = gpu.Mode
+				}
+			}
+		}
+	}
+
+	return gpuDeviceIDToMode, gpuNameToMode, nil
+}
+
+func (v *Validator) validateGPUModeNotMixed(hosts []*v3.HostResponse, cluster *cluster.Spec) error {
+	configs := cluster.NutanixMachineConfigs
+
+	gpuDeviceIDToMode, gpuNameToMode, err := v.getGPUModeMapping(hosts)
+	if err != nil {
+		return err
+	}
+
+	gpuMode := ""
+	getGpuModeFunc := createGetGpuModeFunc(gpuDeviceIDToMode, gpuNameToMode)
+	for _, machineConfig := range configs {
+		if machineConfig.Spec.GPUs != nil {
+			for _, gpu := range machineConfig.Spec.GPUs {
+				if gpuMode == "" {
+					gpuMode = getGpuModeFunc(gpu)
+				} else {
+					if gpuMode != getGpuModeFunc(gpu) {
+						return fmt.Errorf("all GPUs in a machine config must be of the same mode, vGPU or passthrough")
+					}
+				}
+			}
+		}
+	}
+
+	return nil
+}
+
+func createGetGpuModeFunc(gpuDeviceIDToMode map[int64]string, gpuNameToMode map[string]string) func(gpu anywherev1.NutanixGPUIdentifier) string {
+	return func(gpu anywherev1.NutanixGPUIdentifier) string {
+		if gpu.Type == anywherev1.NutanixGPUIdentifierDeviceID {
+			return gpuDeviceIDToMode[*gpu.DeviceID]
+		}
+
+		return gpuNameToMode[gpu.Name]
+	}
+}
+
+func (v *Validator) validateFreeGPU(ctx context.Context, v3Client Client, cluster *cluster.Spec) error {
+	res, err := v3Client.ListAllHost(ctx)
+	if err != nil || len(res.Entities) == 0 {
+		return fmt.Errorf("No GPUs found: %v", err)
+	}
+
+	if v.isGPURequested(cluster.NutanixMachineConfigs) {
+		err := v.validateGPUModeNotMixed(res.Entities, cluster)
+		if err != nil {
+			return err
+		}
+
+		availableGpu, err := v.getAvailableGPUs(res.Entities)
+		if err != nil {
+			return err
+		}
+
+		if err = v.tryAssignGPUsToAllMachineConfigs(ctx, v3Client, cluster, availableGpu); err != nil {
+			return err
+		}
+	}
+
+	return nil
+}
+
+func (v *Validator) validateUpgradeRolloutStrategy(clusterSpec *cluster.Spec) error {
+	if clusterSpec.Cluster.Spec.ControlPlaneConfiguration.UpgradeRolloutStrategy != nil {
+		return fmt.Errorf("Upgrade rollout strategy customization is not supported for nutanix provider")
+	}
+	for _, workerNodeGroupConfiguration := range clusterSpec.Cluster.Spec.WorkerNodeGroupConfigurations {
+		if workerNodeGroupConfiguration.UpgradeRolloutStrategy != nil {
+			return fmt.Errorf("Upgrade rollout strategy customization is not supported for nutanix provider")
+		}
+	}
+	return nil
+}
+
 // findSubnetUUIDByName retrieves the subnet uuid by the given subnet name.
 func findSubnetUUIDByName(ctx context.Context, v3Client Client, clusterUUID, subnetName string) (*string, error) {
 	res, err := v3Client.ListSubnet(ctx, &v3.DSMetadata{
@@ -546,14 +795,29 @@ func findProjectUUIDByName(ctx context.Context, v3Client Client, projectName str
 	return res.Entities[0].Metadata.UUID, nil
 }
 
-func (v *Validator) validateUpgradeRolloutStrategy(clusterSpec *cluster.Spec) error {
-	if clusterSpec.Cluster.Spec.ControlPlaneConfiguration.UpgradeRolloutStrategy != nil {
-		return fmt.Errorf("Upgrade rollout strategy customization is not supported for nutanix provider")
+func isRequestedGPUAssignable(gpu v3.GPU, requestedGpu anywherev1.NutanixGPUIdentifier) bool {
+	if requestedGpu.Type == anywherev1.NutanixGPUIdentifierDeviceID {
+		return (*gpu.DeviceID == *requestedGpu.DeviceID) && gpu.Assignable
 	}
-	for _, workerNodeGroupConfiguration := range clusterSpec.Cluster.Spec.WorkerNodeGroupConfigurations {
-		if workerNodeGroupConfiguration.UpgradeRolloutStrategy != nil {
-			return fmt.Errorf("Upgrade rollout strategy customization is not supported for nutanix provider")
+
+	return (gpu.Name == requestedGpu.Name) && gpu.Assignable
+}
+
+func errorGPUNotFound(gpu anywherev1.NutanixGPUIdentifier, cluster anywherev1.NutanixResourceIdentifier) error {
+	clusterAddonString := ""
+	if cluster.Type == anywherev1.NutanixIdentifierUUID {
+		if cluster.UUID != nil {
+			clusterAddonString = fmt.Sprintf("on cluster with UUID %s", *cluster.UUID)
+		}
+	} else {
+		if cluster.Name != nil {
+			clusterAddonString = fmt.Sprintf("on cluster with name %s", *cluster.Name)
 		}
 	}
-	return nil
+
+	if gpu.Type == anywherev1.NutanixGPUIdentifierDeviceID {
+		return fmt.Errorf("GPU with device ID %d not found %s", *gpu.DeviceID, clusterAddonString)
+	}
+
+	return fmt.Errorf("GPU with name %s not found %s", gpu.Name, clusterAddonString)
 }
diff --git a/pkg/providers/nutanix/validator_test.go b/pkg/providers/nutanix/validator_test.go
index e0609c415923..634249187484 100644
--- a/pkg/providers/nutanix/validator_test.go
+++ b/pkg/providers/nutanix/validator_test.go
@@ -21,6 +21,8 @@ import (
 	"github.com/aws/eks-anywhere/internal/test"
 	"github.com/aws/eks-anywhere/pkg/api/v1alpha1"
 	anywherev1 "github.com/aws/eks-anywhere/pkg/api/v1alpha1"
+	"github.com/aws/eks-anywhere/pkg/cluster"
+	"github.com/aws/eks-anywhere/pkg/constants"
 	mockCrypto "github.com/aws/eks-anywhere/pkg/crypto/mocks"
 	mocknutanix "github.com/aws/eks-anywhere/pkg/providers/nutanix/mocks"
 	"github.com/aws/eks-anywhere/pkg/utils/ptr"
@@ -638,6 +640,70 @@ func TestNutanixValidatorValidateMachineConfig(t *testing.T) {
 			},
 			expectedError: "failed to find category value",
 		},
+		{
+			name: "invalid gpu identifier type",
+			setup: func(machineConf *anywherev1.NutanixMachineConfig, mockClient *mocknutanix.MockClient, validator *mockCrypto.MockTlsValidator, transport *mocknutanix.MockRoundTripper) *Validator {
+				mockClient.EXPECT().ListCluster(gomock.Any(), gomock.Any()).Return(fakeClusterList(), nil).Times(2)
+				mockClient.EXPECT().ListSubnet(gomock.Any(), gomock.Any()).Return(fakeSubnetList(), nil)
+				mockClient.EXPECT().ListImage(gomock.Any(), gomock.Any()).Return(fakeImageList(), nil)
+				machineConf.Spec.GPUs = []anywherev1.NutanixGPUIdentifier{
+					{
+						Type: "invalid",
+					},
+				}
+				clientCache := &ClientCache{clients: map[string]Client{"test": mockClient}}
+				return NewValidator(clientCache, validator, &http.Client{Transport: transport})
+			},
+			expectedError: "invalid GPU identifier type",
+		},
+		{
+			name: "missing GPU type",
+			setup: func(machineConf *anywherev1.NutanixMachineConfig, mockClient *mocknutanix.MockClient, validator *mockCrypto.MockTlsValidator, transport *mocknutanix.MockRoundTripper) *Validator {
+				mockClient.EXPECT().ListCluster(gomock.Any(), gomock.Any()).Return(fakeClusterList(), nil).Times(2)
+				mockClient.EXPECT().ListSubnet(gomock.Any(), gomock.Any()).Return(fakeSubnetList(), nil)
+				mockClient.EXPECT().ListImage(gomock.Any(), gomock.Any()).Return(fakeImageList(), nil)
+				machineConf.Spec.GPUs = []anywherev1.NutanixGPUIdentifier{
+					{
+						Type: "",
+					},
+				}
+				clientCache := &ClientCache{clients: map[string]Client{"test": mockClient}}
+				return NewValidator(clientCache, validator, &http.Client{Transport: transport})
+			},
+			expectedError: "missing GPU type",
+		},
+		{
+			name: "missing GPU device ID",
+			setup: func(machineConf *anywherev1.NutanixMachineConfig, mockClient *mocknutanix.MockClient, validator *mockCrypto.MockTlsValidator, transport *mocknutanix.MockRoundTripper) *Validator {
+				mockClient.EXPECT().ListCluster(gomock.Any(), gomock.Any()).Return(fakeClusterList(), nil).Times(2)
+				mockClient.EXPECT().ListSubnet(gomock.Any(), gomock.Any()).Return(fakeSubnetList(), nil)
+				mockClient.EXPECT().ListImage(gomock.Any(), gomock.Any()).Return(fakeImageList(), nil)
+				machineConf.Spec.GPUs = []anywherev1.NutanixGPUIdentifier{
+					{
+						Type: "deviceID",
+					},
+				}
+				clientCache := &ClientCache{clients: map[string]Client{"test": mockClient}}
+				return NewValidator(clientCache, validator, &http.Client{Transport: transport})
+			},
+			expectedError: "missing GPU device ID",
+		},
+		{
+			name: "missing GPU name",
+			setup: func(machineConf *anywherev1.NutanixMachineConfig, mockClient *mocknutanix.MockClient, validator *mockCrypto.MockTlsValidator, transport *mocknutanix.MockRoundTripper) *Validator {
+				mockClient.EXPECT().ListCluster(gomock.Any(), gomock.Any()).Return(fakeClusterList(), nil).Times(2)
+				mockClient.EXPECT().ListSubnet(gomock.Any(), gomock.Any()).Return(fakeSubnetList(), nil)
+				mockClient.EXPECT().ListImage(gomock.Any(), gomock.Any()).Return(fakeImageList(), nil)
+				machineConf.Spec.GPUs = []anywherev1.NutanixGPUIdentifier{
+					{
+						Type: "name",
+					},
+				}
+				clientCache := &ClientCache{clients: map[string]Client{"test": mockClient}}
+				return NewValidator(clientCache, validator, &http.Client{Transport: transport})
+			},
+			expectedError: "missing GPU name",
+		},
 	}
 
 	for _, tc := range tests {
@@ -658,6 +724,584 @@ func TestNutanixValidatorValidateMachineConfig(t *testing.T) {
 	}
 }
 
+func fakeHostList() *v3.HostListResponse {
+	return &v3.HostListResponse{
+		Entities: []*v3.HostResponse{
+			{
+				Status: &v3.HostStatus{
+					ClusterReference: &v3.ReferenceValues{
+						UUID: "a15f6966-bfc7-4d1e-8575-224096fc1cdb",
+					},
+					Resources: &v3.HostResources{
+						GPUList: []*v3.GPU{
+							{
+								Assignable: false,
+								DeviceID:   utils.Int64Ptr(8757),
+								Name:       "Ampere 40",
+								Mode:       "PASSTHROUGH_COMPUTE",
+							},
+							{
+								Assignable: true,
+								DeviceID:   utils.Int64Ptr(8757),
+								Name:       "Ampere 40",
+								Mode:       "PASSTHROUGH_COMPUTE",
+							},
+							{
+								Assignable: false,
+								DeviceID:   utils.Int64Ptr(8757),
+								Name:       "Ampere 40",
+								Mode:       "PASSTHROUGH_COMPUTE",
+							},
+							{
+								Assignable: true,
+								DeviceID:   utils.Int64Ptr(8757),
+								Name:       "Ampere 40",
+								Mode:       "PASSTHROUGH_COMPUTE",
+							},
+							{
+								Assignable: true,
+								DeviceID:   utils.Int64Ptr(557),
+								Name:       "NVIDIA A40-1Q",
+								Mode:       "VIRTUAL",
+							},
+							{
+								Assignable: true,
+								DeviceID:   utils.Int64Ptr(557),
+								Name:       "NVIDIA A40-1Q",
+								Mode:       "VIRTUAL",
+							},
+							{
+								Assignable: true,
+								DeviceID:   utils.Int64Ptr(557),
+								Name:       "NVIDIA A40-1Q",
+								Mode:       "VIRTUAL",
+							},
+							{
+								Assignable: true,
+								DeviceID:   utils.Int64Ptr(557),
+								Name:       "NVIDIA A40-1Q",
+								Mode:       "VIRTUAL",
+							},
+							{
+								Assignable: true,
+								DeviceID:   utils.Int64Ptr(557),
+								Name:       "NVIDIA A40-1Q",
+								Mode:       "VIRTUAL",
+							},
+							{
+								Assignable: true,
+								DeviceID:   utils.Int64Ptr(557),
+								Name:       "NVIDIA A40-1Q",
+								Mode:       "VIRTUAL",
+							},
+							{
+								Assignable: true,
+								DeviceID:   utils.Int64Ptr(557),
+								Name:       "NVIDIA A40-1Q",
+								Mode:       "VIRTUAL",
+							},
+							{
+								Assignable: true,
+								DeviceID:   utils.Int64Ptr(557),
+								Name:       "NVIDIA A40-1Q",
+								Mode:       "VIRTUAL",
+							},
+							{
+								Assignable: true,
+								DeviceID:   utils.Int64Ptr(557),
+								Name:       "NVIDIA A40-1Q",
+								Mode:       "VIRTUAL",
+							},
+							{
+								Assignable: true,
+								DeviceID:   utils.Int64Ptr(557),
+								Name:       "NVIDIA A40-1Q",
+								Mode:       "VIRTUAL",
+							},
+							{
+								Assignable: true,
+								DeviceID:   utils.Int64Ptr(557),
+								Name:       "NVIDIA A40-1Q",
+								Mode:       "VIRTUAL",
+							},
+							{
+								Assignable: true,
+								DeviceID:   utils.Int64Ptr(557),
+								Name:       "NVIDIA A40-1Q",
+								Mode:       "VIRTUAL",
+							},
+							{
+								Assignable: true,
+								DeviceID:   utils.Int64Ptr(557),
+								Name:       "NVIDIA A40-1Q",
+								Mode:       "VIRTUAL",
+							},
+							{
+								Assignable: true,
+								DeviceID:   utils.Int64Ptr(557),
+								Name:       "NVIDIA A40-1Q",
+								Mode:       "VIRTUAL",
+							},
+							{
+								Assignable: true,
+								DeviceID:   utils.Int64Ptr(557),
+								Name:       "NVIDIA A40-1Q",
+								Mode:       "VIRTUAL",
+							},
+							{
+								Assignable: true,
+								DeviceID:   utils.Int64Ptr(557),
+								Name:       "NVIDIA A40-1Q",
+								Mode:       "VIRTUAL",
+							},
+						},
+					},
+				},
+			},
+			{
+				Status: &v3.HostStatus{
+					ClusterReference: &v3.ReferenceValues{
+						UUID: "4d69ca7d-022f-49d1-a454-74535993bda4",
+					},
+					Resources: &v3.HostResources{
+						GPUList: []*v3.GPU{
+							{
+								Assignable: false,
+								DeviceID:   utils.Int64Ptr(8757),
+								Name:       "Ampere 40",
+								Mode:       "PASSTHROUGH_COMPUTE",
+							},
+							{
+								Assignable: true,
+								DeviceID:   utils.Int64Ptr(8757),
+								Name:       "Ampere 40",
+								Mode:       "PASSTHROUGH_COMPUTE",
+							},
+							{
+								Assignable: true,
+								DeviceID:   utils.Int64Ptr(8757),
+								Name:       "Ampere 40",
+								Mode:       "PASSTHROUGH_COMPUTE",
+							},
+							{
+								Assignable: false,
+								DeviceID:   utils.Int64Ptr(8757),
+								Name:       "Ampere 40",
+								Mode:       "PASSTHROUGH_COMPUTE",
+							},
+							{
+								Assignable: true,
+								DeviceID:   utils.Int64Ptr(8757),
+								Name:       "Ampere 40",
+								Mode:       "PASSTHROUGH_COMPUTE",
+							},
+							{
+								Assignable: true,
+								DeviceID:   utils.Int64Ptr(8757),
+								Name:       "Ampere 40",
+								Mode:       "PASSTHROUGH_COMPUTE",
+							},
+						},
+					},
+				},
+			},
+			{
+				Status: &v3.HostStatus{
+					ClusterReference: &v3.ReferenceValues{
+						UUID: "e0b1dfc7-5447-410f-b708-f9603e9be79a",
+					},
+					Resources: &v3.HostResources{},
+				},
+			},
+		},
+	}
+}
+
+func fakeEmptyHostList() *v3.HostListResponse {
+	return &v3.HostListResponse{
+		Entities: []*v3.HostResponse{},
+	}
+}
+
+func fakeClusterListForFreeGPUTest() *v3.ClusterListIntentResponse {
+	return &v3.ClusterListIntentResponse{
+		Entities: []*v3.ClusterIntentResponse{
+			{
+				Metadata: &v3.Metadata{
+					UUID: utils.StringPtr("a15f6966-bfc7-4d1e-8575-224096fc1cdb"),
+				},
+				Spec: &v3.Cluster{
+					Name: utils.StringPtr("prism-cluster"),
+				},
+				Status: &v3.ClusterDefStatus{
+					Resources: &v3.ClusterObj{
+						Config: &v3.ClusterConfig{
+							ServiceList: []*string{utils.StringPtr("AOS")},
+						},
+					},
+				},
+			},
+			{
+				Metadata: &v3.Metadata{
+					UUID: utils.StringPtr("4d69ca7d-022f-49d1-a454-74535993bda4"),
+				},
+				Spec: &v3.Cluster{
+					Name: utils.StringPtr("prism-cluster-1"),
+				},
+				Status: &v3.ClusterDefStatus{
+					Resources: &v3.ClusterObj{
+						Config: &v3.ClusterConfig{
+							ServiceList: []*string{utils.StringPtr("AOS")},
+						},
+					},
+				},
+			},
+			{
+				Metadata: &v3.Metadata{
+					UUID: utils.StringPtr("e0b1dfc7-5447-410f-b708-f9603e9be79a"),
+				},
+				Spec: &v3.Cluster{
+					Name: utils.StringPtr("prism-cluster-2"),
+				},
+				Status: &v3.ClusterDefStatus{
+					Resources: &v3.ClusterObj{
+						Config: &v3.ClusterConfig{
+							ServiceList: []*string{utils.StringPtr("AOS")},
+						},
+					},
+				},
+			},
+		},
+	}
+}
+
+func TestNutanixValidatorValidateFreeGPU(t *testing.T) {
+	ctrl := gomock.NewController(t)
+
+	tests := []struct {
+		name          string
+		setup         func(map[string]*anywherev1.NutanixMachineConfig, *mocknutanix.MockClient, *mockCrypto.MockTlsValidator, *mocknutanix.MockRoundTripper) *Validator
+		expectedError string
+	}{
+		{
+			name: "not enough GPU resources available by name",
+			setup: func(machineConfigs map[string]*anywherev1.NutanixMachineConfig, mockClient *mocknutanix.MockClient, validator *mockCrypto.MockTlsValidator, transport *mocknutanix.MockRoundTripper) *Validator {
+				mockClient.EXPECT().ListCluster(gomock.Any(), gomock.Any()).Return(fakeClusterListForFreeGPUTest(), nil).AnyTimes()
+				mockClient.EXPECT().ListAllHost(gomock.Any()).Return(fakeHostList(), nil)
+				machineConfigs["cp"].Spec.Cluster = anywherev1.NutanixResourceIdentifier{
+					Type: anywherev1.NutanixIdentifierUUID,
+					UUID: utils.StringPtr("a15f6966-bfc7-4d1e-8575-224096fc1cdb"),
+				}
+				machineConfigs["cp"].Spec.GPUs = []anywherev1.NutanixGPUIdentifier{
+					{
+						Type: "name",
+						Name: "Ampere 40",
+					},
+					{
+						Type:     "deviceID",
+						DeviceID: utils.Int64Ptr(8757),
+					},
+				}
+				machineConfigs["worker"].Spec.Cluster = anywherev1.NutanixResourceIdentifier{
+					Type: anywherev1.NutanixIdentifierUUID,
+					UUID: utils.StringPtr("a15f6966-bfc7-4d1e-8575-224096fc1cdb"),
+				}
+				machineConfigs["worker"].Spec.GPUs = []anywherev1.NutanixGPUIdentifier{
+					{
+						Type: "name",
+						Name: "Ampere 40",
+					},
+					{
+						Type: "name",
+						Name: "Ampere 40",
+					},
+				}
+				clientCache := &ClientCache{clients: map[string]Client{"test": mockClient}}
+				return NewValidator(clientCache, validator, &http.Client{Transport: transport})
+			},
+			expectedError: "GPU with name Ampere 40 not found",
+		},
+		{
+			name: "not enough GPU resources available by name in different PE (UUID)",
+			setup: func(machineConfigs map[string]*anywherev1.NutanixMachineConfig, mockClient *mocknutanix.MockClient, validator *mockCrypto.MockTlsValidator, transport *mocknutanix.MockRoundTripper) *Validator {
+				mockClient.EXPECT().ListCluster(gomock.Any(), gomock.Any()).Return(fakeClusterListForFreeGPUTest(), nil).AnyTimes()
+				mockClient.EXPECT().ListAllHost(gomock.Any()).Return(fakeHostList(), nil)
+				machineConfigs["cp"].Spec.Cluster = anywherev1.NutanixResourceIdentifier{
+					Type: anywherev1.NutanixIdentifierUUID,
+					UUID: utils.StringPtr("e0b1dfc7-5447-410f-b708-f9603e9be79a"),
+				}
+				machineConfigs["cp"].Spec.GPUs = []anywherev1.NutanixGPUIdentifier{}
+				machineConfigs["worker"].Spec.Cluster = anywherev1.NutanixResourceIdentifier{
+					Type: anywherev1.NutanixIdentifierUUID,
+					UUID: utils.StringPtr("a15f6966-bfc7-4d1e-8575-224096fc1cdb"),
+				}
+				machineConfigs["worker"].Spec.GPUs = []anywherev1.NutanixGPUIdentifier{
+					{
+						Type: "name",
+						Name: "Ampere 40",
+					},
+					{
+						Type: "name",
+						Name: "Ampere 40",
+					},
+					{
+						Type: "name",
+						Name: "Ampere 40",
+					},
+					{
+						Type: "name",
+						Name: "Ampere 40",
+					},
+				}
+				clientCache := &ClientCache{clients: map[string]Client{"test": mockClient}}
+				return NewValidator(clientCache, validator, &http.Client{Transport: transport})
+			},
+			expectedError: "GPU with name Ampere 40 not found on cluster with UUID a15f6966-bfc7-4d1e-8575-224096fc1cdb",
+		},
+		{
+			name: "not enough GPU resources available by deviceID in different PE (UUID)",
+			setup: func(machineConfigs map[string]*anywherev1.NutanixMachineConfig, mockClient *mocknutanix.MockClient, validator *mockCrypto.MockTlsValidator, transport *mocknutanix.MockRoundTripper) *Validator {
+				mockClient.EXPECT().ListCluster(gomock.Any(), gomock.Any()).Return(fakeClusterListForFreeGPUTest(), nil).AnyTimes()
+				mockClient.EXPECT().ListAllHost(gomock.Any()).Return(fakeHostList(), nil)
+				machineConfigs["cp"].Spec.Cluster = anywherev1.NutanixResourceIdentifier{
+					Type: anywherev1.NutanixIdentifierUUID,
+					UUID: utils.StringPtr("e0b1dfc7-5447-410f-b708-f9603e9be79a"),
+				}
+				machineConfigs["cp"].Spec.GPUs = []anywherev1.NutanixGPUIdentifier{}
+				machineConfigs["worker"].Spec.Cluster = anywherev1.NutanixResourceIdentifier{
+					Type: anywherev1.NutanixIdentifierUUID,
+					UUID: utils.StringPtr("a15f6966-bfc7-4d1e-8575-224096fc1cdb"),
+				}
+				machineConfigs["worker"].Spec.GPUs = []anywherev1.NutanixGPUIdentifier{
+					{
+						Type:     "deviceID",
+						DeviceID: utils.Int64Ptr(8757),
+					},
+					{
+						Type:     "deviceID",
+						DeviceID: utils.Int64Ptr(8757),
+					},
+					{
+						Type:     "deviceID",
+						DeviceID: utils.Int64Ptr(8757),
+					},
+				}
+				clientCache := &ClientCache{clients: map[string]Client{"test": mockClient}}
+				return NewValidator(clientCache, validator, &http.Client{Transport: transport})
+			},
+			expectedError: "GPU with device ID 8757 not found on cluster with UUID a15f6966-bfc7-4d1e-8575-224096fc1cdb",
+		},
+		{
+			name: "not enough GPU resources available by deviceID",
+			setup: func(machineConfigs map[string]*anywherev1.NutanixMachineConfig, mockClient *mocknutanix.MockClient, validator *mockCrypto.MockTlsValidator, transport *mocknutanix.MockRoundTripper) *Validator {
+				mockClient.EXPECT().ListCluster(gomock.Any(), gomock.Any()).Return(fakeClusterListForFreeGPUTest(), nil).AnyTimes()
+				mockClient.EXPECT().ListAllHost(gomock.Any()).Return(fakeHostList(), nil)
+				machineConfigs["cp"].Spec.Cluster = anywherev1.NutanixResourceIdentifier{
+					Type: anywherev1.NutanixIdentifierUUID,
+					UUID: utils.StringPtr("a15f6966-bfc7-4d1e-8575-224096fc1cdb"),
+				}
+				machineConfigs["cp"].Spec.GPUs = []anywherev1.NutanixGPUIdentifier{
+					{
+						Type:     "deviceID",
+						DeviceID: utils.Int64Ptr(8757),
+					},
+					{
+						Type:     "deviceID",
+						DeviceID: utils.Int64Ptr(8757),
+					},
+				}
+				machineConfigs["worker"].Spec.Cluster = anywherev1.NutanixResourceIdentifier{
+					Type: anywherev1.NutanixIdentifierUUID,
+					UUID: utils.StringPtr("a15f6966-bfc7-4d1e-8575-224096fc1cdb"),
+				}
+				machineConfigs["worker"].Spec.GPUs = []anywherev1.NutanixGPUIdentifier{
+					{
+						Type:     "deviceID",
+						DeviceID: utils.Int64Ptr(8757),
+					},
+					{
+						Type:     "deviceID",
+						DeviceID: utils.Int64Ptr(8757),
+					},
+					{
+						Type: "name",
+						Name: "Ampere 40",
+					},
+					{
+						Type: "name",
+						Name: "Ampere 40",
+					},
+				}
+				clientCache := &ClientCache{clients: map[string]Client{"test": mockClient}}
+				return NewValidator(clientCache, validator, &http.Client{Transport: transport})
+			},
+			expectedError: "GPU with device ID 8757 not found",
+		},
+		{
+			name: "no GPU resources found",
+			setup: func(machineConfigs map[string]*anywherev1.NutanixMachineConfig, mockClient *mocknutanix.MockClient, validator *mockCrypto.MockTlsValidator, transport *mocknutanix.MockRoundTripper) *Validator {
+				mockClient.EXPECT().ListCluster(gomock.Any(), gomock.Any()).Return(fakeClusterListForFreeGPUTest(), nil).AnyTimes()
+				mockClient.EXPECT().ListAllHost(gomock.Any()).Return(fakeEmptyHostList(), nil)
+				machineConfigs["worker"].Spec.GPUs = []anywherev1.NutanixGPUIdentifier{
+					{
+						Type: "name",
+						Name: "Ampere 40",
+					},
+					{
+						Type: "name",
+						Name: "Ampere 40",
+					},
+					{
+						Type:     "deviceID",
+						DeviceID: utils.Int64Ptr(8757),
+					},
+					{
+						Type: "name",
+						Name: "Ampere 40",
+					},
+				}
+				clientCache := &ClientCache{clients: map[string]Client{"test": mockClient}}
+				return NewValidator(clientCache, validator, &http.Client{Transport: transport})
+			},
+			expectedError: "No GPUs found",
+		},
+		{
+			name: "no GPU resources found: ListAllHost failed",
+			setup: func(machineConfigs map[string]*anywherev1.NutanixMachineConfig, mockClient *mocknutanix.MockClient, validator *mockCrypto.MockTlsValidator, transport *mocknutanix.MockRoundTripper) *Validator {
+				mockClient.EXPECT().ListCluster(gomock.Any(), gomock.Any()).Return(fakeClusterListForFreeGPUTest(), nil).AnyTimes()
+				mockClient.EXPECT().ListAllHost(gomock.Any()).Return(nil, fmt.Errorf("failed to list hosts"))
+				machineConfigs["worker"].Spec.GPUs = []anywherev1.NutanixGPUIdentifier{
+					{
+						Type: "name",
+						Name: "Ampere 40",
+					},
+					{
+						Type: "name",
+						Name: "Ampere 40",
+					},
+					{
+						Type:     "deviceID",
+						DeviceID: utils.Int64Ptr(8757),
+					},
+					{
+						Type: "name",
+						Name: "Ampere 40",
+					},
+				}
+				clientCache := &ClientCache{clients: map[string]Client{"test": mockClient}}
+				return NewValidator(clientCache, validator, &http.Client{Transport: transport})
+			},
+			expectedError: "No GPUs found",
+		},
+		{
+			name: "mixed passthrough and vGPU mode GPUs in a machine config",
+			setup: func(machineConfigs map[string]*anywherev1.NutanixMachineConfig, mockClient *mocknutanix.MockClient, validator *mockCrypto.MockTlsValidator, transport *mocknutanix.MockRoundTripper) *Validator {
+				mockClient.EXPECT().ListCluster(gomock.Any(), gomock.Any()).Return(fakeClusterListForFreeGPUTest(), nil).AnyTimes()
+				mockClient.EXPECT().ListAllHost(gomock.Any()).Return(fakeHostList(), nil)
+				machineConfigs["worker"].Spec.GPUs = []anywherev1.NutanixGPUIdentifier{
+					{
+						Type: "name",
+						Name: "Ampere 40",
+					},
+					{
+						Type: "name",
+						Name: "NVIDIA A40-1Q",
+					},
+				}
+				clientCache := &ClientCache{clients: map[string]Client{"test": mockClient}}
+				return NewValidator(clientCache, validator, &http.Client{Transport: transport})
+			},
+			expectedError: "all GPUs in a machine config must be of the same mode, vGPU or passthrough",
+		},
+		{
+			name: "GPUs validation successful",
+			setup: func(machineConfigs map[string]*anywherev1.NutanixMachineConfig, mockClient *mocknutanix.MockClient, validator *mockCrypto.MockTlsValidator, transport *mocknutanix.MockRoundTripper) *Validator {
+				mockClient.EXPECT().ListCluster(gomock.Any(), gomock.Any()).Return(fakeClusterListForFreeGPUTest(), nil).AnyTimes()
+				mockClient.EXPECT().ListAllHost(gomock.Any()).Return(fakeHostList(), nil)
+				machineConfigs["cp"].Spec.Cluster = anywherev1.NutanixResourceIdentifier{
+					Type: anywherev1.NutanixIdentifierUUID,
+					UUID: utils.StringPtr("4d69ca7d-022f-49d1-a454-74535993bda4"),
+				}
+				machineConfigs["cp"].Spec.GPUs = []anywherev1.NutanixGPUIdentifier{
+					{
+						Type: "name",
+						Name: "Ampere 40",
+					},
+					{
+						Type:     "deviceID",
+						DeviceID: utils.Int64Ptr(8757),
+					},
+				}
+
+				machineConfigs["worker"].Spec.GPUs = []anywherev1.NutanixGPUIdentifier{
+					{
+						Type: "name",
+						Name: "Ampere 40",
+					},
+				}
+				clientCache := &ClientCache{clients: map[string]Client{"test": mockClient}}
+				return NewValidator(clientCache, validator, &http.Client{Transport: transport})
+			},
+			expectedError: "",
+		},
+	}
+
+	for _, tc := range tests {
+		t.Run(tc.name, func(t *testing.T) {
+			machineConfigsNames := []string{"cp", "etcd", "worker"}
+			machineConfigs := make(map[string]*anywherev1.NutanixMachineConfig)
+
+			for _, name := range machineConfigsNames {
+				machineConfigs[name] = &anywherev1.NutanixMachineConfig{}
+				err := yaml.Unmarshal([]byte(nutanixMachineConfigSpec), machineConfigs[name])
+				machineConfigs[name].Name = machineConfigs[name].Name + "-" + name
+				require.NoError(t, err)
+			}
+
+			mockClient := mocknutanix.NewMockClient(ctrl)
+			validator := tc.setup(machineConfigs, mockClient, mockCrypto.NewMockTlsValidator(ctrl), mocknutanix.NewMockRoundTripper(ctrl))
+			spec := &cluster.Spec{
+				Config: &cluster.Config{
+					Cluster: &anywherev1.Cluster{
+						Spec: anywherev1.ClusterSpec{
+							ControlPlaneConfiguration: anywherev1.ControlPlaneConfiguration{
+								Count: 1,
+								MachineGroupRef: &anywherev1.Ref{
+									Name: "eksa-unit-test-cp",
+									Kind: constants.NutanixMachineConfigKind,
+								},
+							},
+							WorkerNodeGroupConfigurations: []anywherev1.WorkerNodeGroupConfiguration{
+								{
+									Count: utils.IntPtr(2),
+									MachineGroupRef: &anywherev1.Ref{
+										Name: "eksa-unit-test-worker",
+										Kind: constants.NutanixMachineConfigKind,
+									},
+								},
+							},
+							ExternalEtcdConfiguration: &anywherev1.ExternalEtcdConfiguration{
+								Count: 1,
+								MachineGroupRef: &anywherev1.Ref{
+									Name: "eksa-unit-test-etcd",
+									Kind: constants.NutanixMachineConfigKind,
+								},
+							},
+						},
+					},
+					NutanixMachineConfigs: machineConfigs,
+				},
+			}
+			err := validator.validateFreeGPU(context.Background(), mockClient, spec)
+			if tc.expectedError != "" {
+				assert.Contains(t, err.Error(), tc.expectedError)
+			} else {
+				assert.NoError(t, err)
+			}
+		})
+	}
+}
+
 func TestNutanixValidatorValidateDatacenterConfig(t *testing.T) {
 	tests := []struct {
 		name       string