diff --git a/controllers/constants/constants.go b/controllers/constants/constants.go index 11b63ace..5f69ea98 100644 --- a/controllers/constants/constants.go +++ b/controllers/constants/constants.go @@ -32,7 +32,10 @@ const ( LabelEnableAuth = "enable-auth" LabelEnableRoute = "enable-route" - CapabilityServiceMeshAuthorization = "CapabilityServiceMeshAuthorization" + CapabilityServiceMeshAuthorization = "CapabilityServiceMeshAuthorization" + InferenceServiceDeploymentModeAnnotation = "serving.kserve.io/deploymentMode" + KserveConfigMapName = "inferenceservice-config" + KServeWithServiceMeshComponent = "kserve-service-mesh" ) // model registry diff --git a/controllers/inferenceservice_controller.go b/controllers/inferenceservice_controller.go index 600c940c..69a6d078 100644 --- a/controllers/inferenceservice_controller.go +++ b/controllers/inferenceservice_controller.go @@ -22,6 +22,7 @@ import ( kservev1alpha1 "github.com/kserve/kserve/pkg/apis/serving/v1alpha1" kservev1beta1 "github.com/kserve/kserve/pkg/apis/serving/v1beta1" authorinov1beta2 "github.com/kuadrant/authorino/api/v1beta2" + "github.com/opendatahub-io/odh-model-controller/controllers/constants" "github.com/opendatahub-io/odh-model-controller/controllers/reconcilers" "github.com/opendatahub-io/odh-model-controller/controllers/utils" routev1 "github.com/openshift/api/route/v1" @@ -172,7 +173,7 @@ func (r *OpenshiftInferenceServiceReconciler) SetupWithManager(mgr ctrl.Manager) } })) - kserveWithMeshEnabled, kserveWithMeshEnabledErr := utils.VerifyIfComponentIsEnabled(context.Background(), mgr.GetClient(), utils.KServeWithServiceMeshComponent) + kserveWithMeshEnabled, kserveWithMeshEnabledErr := utils.VerifyIfComponentIsEnabled(context.Background(), mgr.GetClient(), constants.KServeWithServiceMeshComponent) if kserveWithMeshEnabledErr != nil { r.log.V(1).Error(kserveWithMeshEnabledErr, "could not determine if kserve have service mesh enabled") } diff --git a/controllers/kserve_inferenceservice_controller_metrics_test.go b/controllers/kserve_inferenceservice_controller_metrics_test.go index e52e8a0c..c589631b 100644 --- a/controllers/kserve_inferenceservice_controller_metrics_test.go +++ b/controllers/kserve_inferenceservice_controller_metrics_test.go @@ -2,6 +2,7 @@ package controllers import ( "github.com/opendatahub-io/odh-model-controller/controllers/constants" + utils "github.com/opendatahub-io/odh-model-controller/controllers/utils" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" "strings" @@ -39,7 +40,7 @@ var _ = Describe("The KServe Dashboard reconciler", func() { return servingRuntime } - createInferenceService := func(namespace, name string, path string) *kservev1beta1.InferenceService { + createInferenceService := func(namespace, name string, path string, deploymentMode utils.IsvcDeploymentMode) *kservev1beta1.InferenceService { inferenceService := &kservev1beta1.InferenceService{} err := convertToStructuredResource(path, inferenceService) Expect(err).NotTo(HaveOccurred()) @@ -47,6 +48,10 @@ var _ = Describe("The KServe Dashboard reconciler", func() { if len(name) != 0 { inferenceService.Name = name } + if deploymentMode == utils.RawDeployment { + inferenceService.ObjectMeta.Annotations = map[string]string{} + inferenceService.Annotations[constants.InferenceServiceDeploymentModeAnnotation] = string(utils.RawDeployment) + } if err := cli.Create(ctx, inferenceService); err != nil && !errors.IsAlreadyExists(err) { Fail(err.Error()) } @@ -64,10 +69,10 @@ var _ = Describe("The KServe Dashboard reconciler", func() { }) - When("deploying a Kserve model", func() { + When("deploying a Kserve Serverless model", func() { It("if the runtime is supported for metrics, it should create a configmap with prometheus queries", func() { _ = createServingRuntime(testNs, KserveServingRuntimePath1) - _ = createInferenceService(testNs, KserveOvmsInferenceServiceName, KserveInferenceServicePath1) + _ = createInferenceService(testNs, KserveOvmsInferenceServiceName, KserveInferenceServicePath1, utils.Serverless) metricsConfigMap, err := waitForConfigMap(cli, testNs, KserveOvmsInferenceServiceName+constants.KserveMetricsConfigMapNameSuffix, 30, 1*time.Second) Expect(err).NotTo(HaveOccurred()) @@ -89,7 +94,78 @@ var _ = Describe("The KServe Dashboard reconciler", func() { It("if the runtime is not supported for metrics, it should create a configmap with the unsupported config", func() { _ = createServingRuntime(testNs, UnsupprtedMetricsServingRuntimePath) - _ = createInferenceService(testNs, UnsupportedMetricsInferenceServiceName, UnsupportedMetricsInferenceServicePath) + _ = createInferenceService(testNs, UnsupportedMetricsInferenceServiceName, UnsupportedMetricsInferenceServicePath, utils.Serverless) + + metricsConfigMap, err := waitForConfigMap(cli, testNs, UnsupportedMetricsInferenceServiceName+constants.KserveMetricsConfigMapNameSuffix, 30, 1*time.Second) + Expect(err).NotTo(HaveOccurred()) + Expect(metricsConfigMap).NotTo(BeNil()) + + expectedmetricsConfigMap := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: UnsupportedMetricsInferenceServiceName + constants.KserveMetricsConfigMapNameSuffix, + Namespace: testNs, + }, + Data: map[string]string{ + "supported": "false", + }, + } + Expect(compareConfigMap(metricsConfigMap, expectedmetricsConfigMap)).Should(BeTrue()) + }) + + It("if the isvc does not have a runtime specified, an unsupported metrics configmap should be created", func() { + _ = createInferenceService(testNs, NilRuntimeInferenceServiceName, NilRuntimeInferenceServicePath, utils.Serverless) + + metricsConfigMap, err := waitForConfigMap(cli, testNs, NilRuntimeInferenceServiceName+constants.KserveMetricsConfigMapNameSuffix, 30, 1*time.Second) + Expect(err).NotTo(HaveOccurred()) + Expect(metricsConfigMap).NotTo(BeNil()) + + expectedmetricsConfigMap := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: NilRuntimeInferenceServiceName + constants.KserveMetricsConfigMapNameSuffix, + Namespace: testNs, + }, + Data: map[string]string{ + "supported": "false", + }, + } + Expect(compareConfigMap(metricsConfigMap, expectedmetricsConfigMap)).Should(BeTrue()) + }) + }) + + When("deploying a Kserve RawDeployment model", func() { + It("if the runtime is supported for metrics, it should create a configmap with prometheus queries and create a metrics service and servicemonitor", func() { + _ = createServingRuntime(testNs, KserveServingRuntimePath1) + _ = createInferenceService(testNs, KserveOvmsInferenceServiceName, KserveInferenceServicePath1, utils.RawDeployment) + + metricsConfigMap, err := waitForConfigMap(cli, testNs, KserveOvmsInferenceServiceName+constants.KserveMetricsConfigMapNameSuffix, 30, 1*time.Second) + Expect(err).NotTo(HaveOccurred()) + Expect(metricsConfigMap).NotTo(BeNil()) + + finaldata := substituteVariablesInQueries(constants.OvmsMetricsData, testNs, KserveOvmsInferenceServiceName, constants.IntervalValue) + expectedmetricsConfigMap := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: KserveOvmsInferenceServiceName + constants.KserveMetricsConfigMapNameSuffix, + Namespace: testNs, + }, + Data: map[string]string{ + "supported": "true", + "metrics": finaldata, + }, + } + Expect(compareConfigMap(metricsConfigMap, expectedmetricsConfigMap)).Should(BeTrue()) + + metricsService, err := waitForService(cli, testNs, KserveOvmsInferenceServiceName+"-metrics", 30, 1*time.Second) + Expect(err).NotTo(HaveOccurred()) + Expect(metricsService).NotTo(BeNil()) + + metricsServiceMonitor, err := waitForServiceMonitor(cli, testNs, KserveOvmsInferenceServiceName+"-metrics", 30, 1*time.Second) + Expect(err).NotTo(HaveOccurred()) + Expect(metricsServiceMonitor).NotTo(BeNil()) + }) + + It("if the runtime is not supported for metrics, it should create a configmap with the unsupported config and create a metrics service and servicemonitor", func() { + _ = createServingRuntime(testNs, UnsupprtedMetricsServingRuntimePath) + _ = createInferenceService(testNs, UnsupportedMetricsInferenceServiceName, UnsupportedMetricsInferenceServicePath, utils.RawDeployment) metricsConfigMap, err := waitForConfigMap(cli, testNs, UnsupportedMetricsInferenceServiceName+constants.KserveMetricsConfigMapNameSuffix, 30, 1*time.Second) Expect(err).NotTo(HaveOccurred()) @@ -105,10 +181,18 @@ var _ = Describe("The KServe Dashboard reconciler", func() { }, } Expect(compareConfigMap(metricsConfigMap, expectedmetricsConfigMap)).Should(BeTrue()) + + metricsService, err := waitForService(cli, testNs, UnsupportedMetricsInferenceServiceName+"-metrics", 30, 1*time.Second) + Expect(err).NotTo(HaveOccurred()) + Expect(metricsService).NotTo(BeNil()) + + metricsServiceMonitor, err := waitForServiceMonitor(cli, testNs, UnsupportedMetricsInferenceServiceName+"-metrics", 30, 1*time.Second) + Expect(err).NotTo(HaveOccurred()) + Expect(metricsServiceMonitor).NotTo(BeNil()) }) It("if the isvc does not have a runtime specified, an unsupported metrics configmap should be created", func() { - _ = createInferenceService(testNs, NilRuntimeInferenceServiceName, NilRuntimeInferenceServicePath) + _ = createInferenceService(testNs, NilRuntimeInferenceServiceName, NilRuntimeInferenceServicePath, utils.RawDeployment) metricsConfigMap, err := waitForConfigMap(cli, testNs, NilRuntimeInferenceServiceName+constants.KserveMetricsConfigMapNameSuffix, 30, 1*time.Second) Expect(err).NotTo(HaveOccurred()) @@ -130,7 +214,7 @@ var _ = Describe("The KServe Dashboard reconciler", func() { When("deleting the deployed models", func() { It("it should delete the associated configmap", func() { _ = createServingRuntime(testNs, KserveServingRuntimePath1) - OvmsInferenceService := createInferenceService(testNs, KserveOvmsInferenceServiceName, KserveInferenceServicePath1) + OvmsInferenceService := createInferenceService(testNs, KserveOvmsInferenceServiceName, KserveInferenceServicePath1, utils.Serverless) Expect(cli.Delete(ctx, OvmsInferenceService)).Should(Succeed()) Eventually(func() error { @@ -141,7 +225,7 @@ var _ = Describe("The KServe Dashboard reconciler", func() { }, timeout, interval).ShouldNot(Succeed()) _ = createServingRuntime(testNs, UnsupprtedMetricsServingRuntimePath) - SklearnInferenceService := createInferenceService(testNs, UnsupportedMetricsInferenceServiceName, UnsupportedMetricsInferenceServicePath) + SklearnInferenceService := createInferenceService(testNs, UnsupportedMetricsInferenceServiceName, UnsupportedMetricsInferenceServicePath, utils.RawDeployment) Expect(cli.Delete(ctx, SklearnInferenceService)).Should(Succeed()) Eventually(func() error { diff --git a/controllers/reconcilers/kserve_metrics_dashboard_reconciler.go b/controllers/reconcilers/kserve_metrics_dashboard_reconciler.go index 0b3eb8e8..d8122b0e 100644 --- a/controllers/reconcilers/kserve_metrics_dashboard_reconciler.go +++ b/controllers/reconcilers/kserve_metrics_dashboard_reconciler.go @@ -25,7 +25,6 @@ import ( "strings" "github.com/go-logr/logr" - kservev1alpha1 "github.com/kserve/kserve/pkg/apis/serving/v1alpha1" kservev1beta1 "github.com/kserve/kserve/pkg/apis/serving/v1beta1" "github.com/opendatahub-io/odh-model-controller/controllers/comparators" "github.com/opendatahub-io/odh-model-controller/controllers/constants" @@ -93,37 +92,26 @@ func (r *KserveMetricsDashboardReconciler) Reconcile(ctx context.Context, log lo func (r *KserveMetricsDashboardReconciler) createDesiredResource(ctx context.Context, log logr.Logger, isvc *kservev1beta1.InferenceService) (*corev1.ConfigMap, error) { - var err error var servingRuntime string - runtime := &kservev1alpha1.ServingRuntime{} supported := false - // resolve SR - isvcRuntime := isvc.Spec.Predictor.Model.Runtime - if isvcRuntime == nil { - runtime, err = utils.FindSupportingRuntimeForISvc(ctx, r.client, log, isvc) - if err != nil { - if errwrap.Contains(err, constants.NoSuitableRuntimeError) { - configmap, err := r.createConfigMap(isvc, false, log) - if err != nil { - return nil, err - } - return configmap, nil + isvcRuntime, err := utils.FindSupportingRuntimeForISvc(ctx, r.client, log, isvc) + if err != nil { + if errwrap.Contains(err, constants.NoSuitableRuntimeError) { + configmap, err := r.createConfigMap(isvc, false, log) + if err != nil { + return nil, err } - return nil, err - } - } else { - if err := r.client.Get(ctx, types.NamespacedName{Name: *isvcRuntime, Namespace: isvc.Namespace}, runtime); err != nil { - log.Error(err, "Could not determine servingruntime for isvc") - return nil, err + return configmap, nil } + return nil, err } - if (runtime.Spec.Containers == nil) || (len(runtime.Spec.Containers) < 1) { + if (isvcRuntime.Spec.Containers == nil) || (len(isvcRuntime.Spec.Containers) < 1) { log.V(1).Info("Could not determine runtime image") supported = false } - servingRuntimeImage := runtime.Spec.Containers[0].Image + servingRuntimeImage := isvcRuntime.Spec.Containers[0].Image re := regexp.MustCompile(`/([^/@]+)[@:]`) findImageName := re.FindStringSubmatch(servingRuntimeImage) // sanity check for regex match, will fall back to a known string that will lead to a configmap for unsupported metrics diff --git a/controllers/reconcilers/kserve_raw_inferenceservice_reconciler.go b/controllers/reconcilers/kserve_raw_inferenceservice_reconciler.go index 33857c81..d5b2664c 100644 --- a/controllers/reconcilers/kserve_raw_inferenceservice_reconciler.go +++ b/controllers/reconcilers/kserve_raw_inferenceservice_reconciler.go @@ -17,6 +17,7 @@ package reconcilers import ( "context" + "github.com/hashicorp/go-multierror" "github.com/go-logr/logr" kservev1beta1 "github.com/kserve/kserve/pkg/apis/serving/v1beta1" @@ -26,16 +27,29 @@ import ( var _ Reconciler = (*KserveRawInferenceServiceReconciler)(nil) type KserveRawInferenceServiceReconciler struct { - client client.Client + client client.Client + subResourceReconcilers []SubResourceReconciler } func NewKServeRawInferenceServiceReconciler(client client.Client) *KserveRawInferenceServiceReconciler { + + subResourceReconciler := []SubResourceReconciler{ + NewKServeRawMetricsServiceReconciler(client), + NewRawKServeMetricsServiceMonitorReconciler(client), + NewKserveMetricsDashboardReconciler(client), + } + return &KserveRawInferenceServiceReconciler{ - client: client, + client: client, + subResourceReconcilers: subResourceReconciler, } } -func (r *KserveRawInferenceServiceReconciler) Reconcile(_ context.Context, log logr.Logger, _ *kservev1beta1.InferenceService) error { - log.V(1).Info("No Reconciliation to be done for inferenceservice as it is using RawDeployment mode") - return nil +func (r *KserveRawInferenceServiceReconciler) Reconcile(ctx context.Context, log logr.Logger, isvc *kservev1beta1.InferenceService) error { + var reconcileErrors *multierror.Error + for _, reconciler := range r.subResourceReconcilers { + reconcileErrors = multierror.Append(reconcileErrors, reconciler.Reconcile(ctx, log, isvc)) + } + + return reconcileErrors.ErrorOrNil() } diff --git a/controllers/reconcilers/kserve_metrics_service_reconciler.go b/controllers/reconcilers/kserve_raw_metrics_service_reconciler.go similarity index 51% rename from controllers/reconcilers/kserve_metrics_service_reconciler.go rename to controllers/reconcilers/kserve_raw_metrics_service_reconciler.go index a006263c..dcde4cac 100644 --- a/controllers/reconcilers/kserve_metrics_service_reconciler.go +++ b/controllers/reconcilers/kserve_raw_metrics_service_reconciler.go @@ -19,41 +19,44 @@ import ( "context" "github.com/go-logr/logr" kservev1beta1 "github.com/kserve/kserve/pkg/apis/serving/v1beta1" + "github.com/kserve/kserve/pkg/constants" "github.com/opendatahub-io/odh-model-controller/controllers/comparators" "github.com/opendatahub-io/odh-model-controller/controllers/processors" "github.com/opendatahub-io/odh-model-controller/controllers/resources" + "github.com/opendatahub-io/odh-model-controller/controllers/utils" v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/intstr" + ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" + "strconv" ) const ( inferenceServiceLabelName = "serving.kserve.io/inferenceservice" ) -var _ SubResourceReconciler = (*KserveMetricsServiceReconciler)(nil) +var _ SubResourceReconciler = (*KserveRawMetricsServiceReconciler)(nil) -type KserveMetricsServiceReconciler struct { +type KserveRawMetricsServiceReconciler struct { NoResourceRemoval client client.Client serviceHandler resources.ServiceHandler deltaProcessor processors.DeltaProcessor } -func NewKServeMetricsServiceReconciler(client client.Client) *KserveMetricsServiceReconciler { - return &KserveMetricsServiceReconciler{ +func NewKServeRawMetricsServiceReconciler(client client.Client) *KserveRawMetricsServiceReconciler { + return &KserveRawMetricsServiceReconciler{ client: client, serviceHandler: resources.NewServiceHandler(client), deltaProcessor: processors.NewDeltaProcessor(), } } -// TODO remove this reconcile loop in future versions -func (r *KserveMetricsServiceReconciler) Reconcile(ctx context.Context, log logr.Logger, isvc *kservev1beta1.InferenceService) error { - log.V(1).Info("Reconciling Metrics Service for InferenceService, checking if there are resource for deletion") - +func (r *KserveRawMetricsServiceReconciler) Reconcile(ctx context.Context, log logr.Logger, isvc *kservev1beta1.InferenceService) error { // Create Desired resource - desiredResource, err := r.createDesiredResource(log, isvc) + desiredResource, err := r.createDesiredResource(ctx, log, isvc) if err != nil { return err } @@ -71,15 +74,58 @@ func (r *KserveMetricsServiceReconciler) Reconcile(ctx context.Context, log logr return nil } -func (r *KserveMetricsServiceReconciler) createDesiredResource(log logr.Logger, isvc *kservev1beta1.InferenceService) (*v1.Service, error) { - return nil, nil +func (r *KserveRawMetricsServiceReconciler) createDesiredResource(ctx context.Context, log logr.Logger, isvc *kservev1beta1.InferenceService) (*v1.Service, error) { + + isvcRuntime, err := utils.FindSupportingRuntimeForISvc(ctx, r.client, log, isvc) + if err != nil { + return nil, err + } + + if isvcRuntime.Spec.Annotations == nil || isvcRuntime.Spec.Annotations[constants.PrometheusPortAnnotationKey] == "" { + log.V(1).Info("No Prometheus annotations on ServingRuntime, skipping creation of metrics resources") + return nil, nil + } + + prometheusPortAnnotationValue := isvcRuntime.Spec.Annotations[constants.PrometheusPortAnnotationKey] + prometheusPort, err := strconv.ParseInt(prometheusPortAnnotationValue, 10, 64) + if err != nil { + return nil, err + } + metricsService := &v1.Service{ + ObjectMeta: metav1.ObjectMeta{ + Name: getMetricsServiceName(isvc), + Namespace: isvc.Namespace, + Labels: map[string]string{ + "name": getMetricsServiceName(isvc), + }, + }, + Spec: v1.ServiceSpec{ + Ports: []v1.ServicePort{ + { + Name: isvcRuntime.Name + "-metrics", + Protocol: v1.ProtocolTCP, + Port: int32(prometheusPort), + TargetPort: intstr.FromInt32(int32(prometheusPort)), + }, + }, + Type: v1.ServiceTypeClusterIP, + Selector: map[string]string{ + inferenceServiceLabelName: isvc.Name, + }, + }, + } + if err := ctrl.SetControllerReference(isvc, metricsService, r.client.Scheme()); err != nil { + log.Error(err, "Unable to add OwnerReference to the Metrics Service") + return nil, err + } + return metricsService, nil } -func (r *KserveMetricsServiceReconciler) getExistingResource(ctx context.Context, log logr.Logger, isvc *kservev1beta1.InferenceService) (*v1.Service, error) { +func (r *KserveRawMetricsServiceReconciler) getExistingResource(ctx context.Context, log logr.Logger, isvc *kservev1beta1.InferenceService) (*v1.Service, error) { return r.serviceHandler.FetchService(ctx, log, types.NamespacedName{Name: getMetricsServiceName(isvc), Namespace: isvc.Namespace}) } -func (r *KserveMetricsServiceReconciler) processDelta(ctx context.Context, log logr.Logger, desiredService *v1.Service, existingService *v1.Service) (err error) { +func (r *KserveRawMetricsServiceReconciler) processDelta(ctx context.Context, log logr.Logger, desiredService *v1.Service, existingService *v1.Service) (err error) { comparator := comparators.GetServiceComparator() delta := r.deltaProcessor.ComputeDelta(comparator, desiredService, existingService) diff --git a/controllers/reconcilers/kserve_metrics_servicemonitor_reconciler.go b/controllers/reconcilers/kserve_raw_metrics_servicemonitor_reconciler.go similarity index 59% rename from controllers/reconcilers/kserve_metrics_servicemonitor_reconciler.go rename to controllers/reconcilers/kserve_raw_metrics_servicemonitor_reconciler.go index 9350eaa2..d8d68bb8 100644 --- a/controllers/reconcilers/kserve_metrics_servicemonitor_reconciler.go +++ b/controllers/reconcilers/kserve_raw_metrics_servicemonitor_reconciler.go @@ -22,34 +22,36 @@ import ( "github.com/opendatahub-io/odh-model-controller/controllers/comparators" "github.com/opendatahub-io/odh-model-controller/controllers/processors" "github.com/opendatahub-io/odh-model-controller/controllers/resources" + "github.com/opendatahub-io/odh-model-controller/controllers/utils" v1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" + ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" ) -var _ SubResourceReconciler = (*KserveMetricsServiceMonitorReconciler)(nil) +var _ SubResourceReconciler = (*KserveRawMetricsServiceMonitorReconciler)(nil) -type KserveMetricsServiceMonitorReconciler struct { +type KserveRawMetricsServiceMonitorReconciler struct { NoResourceRemoval client client.Client serviceMonitorHandler resources.ServiceMonitorHandler deltaProcessor processors.DeltaProcessor } -func NewKServeMetricsServiceMonitorReconciler(client client.Client) *KserveMetricsServiceMonitorReconciler { - return &KserveMetricsServiceMonitorReconciler{ +func NewRawKServeMetricsServiceMonitorReconciler(client client.Client) *KserveRawMetricsServiceMonitorReconciler { + return &KserveRawMetricsServiceMonitorReconciler{ client: client, serviceMonitorHandler: resources.NewServiceMonitorHandler(client), deltaProcessor: processors.NewDeltaProcessor(), } } -// TODO remove this reconcile loop in future versions -func (r *KserveMetricsServiceMonitorReconciler) Reconcile(ctx context.Context, log logr.Logger, isvc *kservev1beta1.InferenceService) error { +func (r *KserveRawMetricsServiceMonitorReconciler) Reconcile(ctx context.Context, log logr.Logger, isvc *kservev1beta1.InferenceService) error { log.V(1).Info("Reconciling Metrics ServiceMonitor for InferenceService") // Create Desired resource - desiredResource, err := r.createDesiredResource(isvc) + desiredResource, err := r.createDesiredResource(ctx, log, isvc) if err != nil { return err } @@ -67,16 +69,44 @@ func (r *KserveMetricsServiceMonitorReconciler) Reconcile(ctx context.Context, l return nil } -// TODO remove this reconcile loop in future versions -func (r *KserveMetricsServiceMonitorReconciler) createDesiredResource(isvc *kservev1beta1.InferenceService) (*v1.ServiceMonitor, error) { - return nil, nil +func (r *KserveRawMetricsServiceMonitorReconciler) createDesiredResource(ctx context.Context, log logr.Logger, isvc *kservev1beta1.InferenceService) (*v1.ServiceMonitor, error) { + + isvcRuntime, err := utils.FindSupportingRuntimeForISvc(ctx, r.client, log, isvc) + if err != nil { + return nil, err + } + + desiredServiceMonitor := &v1.ServiceMonitor{ + ObjectMeta: metav1.ObjectMeta{ + Name: getMetricsServiceMonitorName(isvc), + Namespace: isvc.Namespace, + }, + Spec: v1.ServiceMonitorSpec{ + Endpoints: []v1.Endpoint{ + { + Port: isvcRuntime.Name + "-metrics", + Scheme: "http", + }, + }, + NamespaceSelector: v1.NamespaceSelector{}, + Selector: metav1.LabelSelector{ + MatchLabels: map[string]string{ + "name": getMetricsServiceMonitorName(isvc), + }, + }, + }, + } + if err := ctrl.SetControllerReference(isvc, desiredServiceMonitor, r.client.Scheme()); err != nil { + return nil, err + } + return desiredServiceMonitor, nil } -func (r *KserveMetricsServiceMonitorReconciler) getExistingResource(ctx context.Context, log logr.Logger, isvc *kservev1beta1.InferenceService) (*v1.ServiceMonitor, error) { +func (r *KserveRawMetricsServiceMonitorReconciler) getExistingResource(ctx context.Context, log logr.Logger, isvc *kservev1beta1.InferenceService) (*v1.ServiceMonitor, error) { return r.serviceMonitorHandler.FetchServiceMonitor(ctx, log, types.NamespacedName{Name: getMetricsServiceMonitorName(isvc), Namespace: isvc.Namespace}) } -func (r *KserveMetricsServiceMonitorReconciler) processDelta(ctx context.Context, log logr.Logger, desiredServiceMonitor *v1.ServiceMonitor, existingServiceMonitor *v1.ServiceMonitor) (err error) { +func (r *KserveRawMetricsServiceMonitorReconciler) processDelta(ctx context.Context, log logr.Logger, desiredServiceMonitor *v1.ServiceMonitor, existingServiceMonitor *v1.ServiceMonitor) (err error) { comparator := comparators.GetServiceMonitorComparator() delta := r.deltaProcessor.ComputeDelta(comparator, desiredServiceMonitor, existingServiceMonitor) diff --git a/controllers/reconcilers/kserve_serverless_inferenceservice_reconciler.go b/controllers/reconcilers/kserve_serverless_inferenceservice_reconciler.go index 00365f0d..3f7b9aac 100644 --- a/controllers/reconcilers/kserve_serverless_inferenceservice_reconciler.go +++ b/controllers/reconcilers/kserve_serverless_inferenceservice_reconciler.go @@ -38,8 +38,6 @@ func NewKServeServerlessInferenceServiceReconciler(client client.Client, clientR subResourceReconciler := []SubResourceReconciler{ NewKserveServiceMeshMemberReconciler(client), NewKserveRouteReconciler(client), - NewKServeMetricsServiceReconciler(client), - NewKServeMetricsServiceMonitorReconciler(client), NewKServePrometheusRoleBindingReconciler(client), NewKServeIstioTelemetryReconciler(client), NewKServeIstioServiceMonitorReconciler(client), diff --git a/controllers/suite_test.go b/controllers/suite_test.go index b073adf2..1a504f5e 100644 --- a/controllers/suite_test.go +++ b/controllers/suite_test.go @@ -315,3 +315,47 @@ func waitForConfigMap(cli client.Client, namespace, configMapName string, maxTri } return configMap, nil } + +func waitForService(cli client.Client, namespace, serviceName string, maxTries int, delay time.Duration) (*corev1.Service, error) { + time.Sleep(delay) + + ctx := context.Background() + service := &corev1.Service{} + for try := 1; try <= maxTries; try++ { + err := cli.Get(ctx, client.ObjectKey{Namespace: namespace, Name: serviceName}, service) + if err == nil { + return service, nil + } + if !apierrs.IsNotFound(err) { + return nil, fmt.Errorf("failed to get configmap %s/%s: %v", namespace, serviceName, err) + } + + if try > maxTries { + time.Sleep(1 * time.Second) + return nil, err + } + } + return service, nil +} + +func waitForServiceMonitor(cli client.Client, namespace, serviceMonitorName string, maxTries int, delay time.Duration) (*monitoringv1.ServiceMonitor, error) { + time.Sleep(delay) + + ctx := context.Background() + serviceMonitor := &monitoringv1.ServiceMonitor{} + for try := 1; try <= maxTries; try++ { + err := cli.Get(ctx, client.ObjectKey{Namespace: namespace, Name: serviceMonitorName}, serviceMonitor) + if err == nil { + return serviceMonitor, nil + } + if !apierrs.IsNotFound(err) { + return nil, fmt.Errorf("failed to get configmap %s/%s: %v", namespace, serviceMonitorName, err) + } + + if try > maxTries { + time.Sleep(1 * time.Second) + return nil, err + } + } + return serviceMonitor, nil +} diff --git a/controllers/utils/utils.go b/controllers/utils/utils.go index 60385633..39ec4e08 100644 --- a/controllers/utils/utils.go +++ b/controllers/utils/utils.go @@ -39,16 +39,10 @@ var ( gvResourcesCache map[string]*metav1.APIResourceList ) -const ( - inferenceServiceDeploymentModeAnnotation = "serving.kserve.io/deploymentMode" - KserveConfigMapName = "inferenceservice-config" - KServeWithServiceMeshComponent = "kserve-service-mesh" -) - func GetDeploymentModeForIsvc(ctx context.Context, cli client.Client, isvc *kservev1beta1.InferenceService) (IsvcDeploymentMode, error) { // If ISVC specifically sets deployment mode using an annotation, return bool depending on value - value, exists := isvc.Annotations[inferenceServiceDeploymentModeAnnotation] + value, exists := isvc.Annotations[constants.InferenceServiceDeploymentModeAnnotation] if exists { switch value { case string(ModelMesh): @@ -66,14 +60,14 @@ func GetDeploymentModeForIsvc(ctx context.Context, cli client.Client, isvc *kser inferenceServiceConfigMap := &corev1.ConfigMap{} err := cli.Get(ctx, client.ObjectKey{ Namespace: controllerNs, - Name: KserveConfigMapName, + Name: constants.KserveConfigMapName, }, inferenceServiceConfigMap) if err != nil { return "", fmt.Errorf("error getting configmap 'inferenceservice-config'. %w", err) } var deployData map[string]interface{} if err = json.Unmarshal([]byte(inferenceServiceConfigMap.Data["deploy"]), &deployData); err != nil { - return "", fmt.Errorf("error retrieving value for key 'deploy' from configmap %s. %w", KserveConfigMapName, err) + return "", fmt.Errorf("error retrieving value for key 'deploy' from configmap %s. %w", constants.KserveConfigMapName, err) } defaultDeploymentMode := deployData["defaultDeploymentMode"] switch defaultDeploymentMode { @@ -103,7 +97,7 @@ func VerifyIfComponentIsEnabled(ctx context.Context, cli client.Client, componen // there must be only one dsc if len(objectList.Items) == 1 { fields := []string{"spec", "components", componentName, "managementState"} - if componentName == KServeWithServiceMeshComponent { + if componentName == constants.KServeWithServiceMeshComponent { // For KServe, Authorino is required when serving is enabled // By Disabling ServiceMesh for RawDeployment, it should reflect on disabling // the Authorino integration as well. diff --git a/main.go b/main.go index 36dcda70..a013aff2 100644 --- a/main.go +++ b/main.go @@ -19,6 +19,7 @@ package main import ( "context" "flag" + "github.com/opendatahub-io/odh-model-controller/controllers/constants" "os" "strconv" @@ -199,7 +200,7 @@ func main() { "reconciliation for InferenceService, please provide --model-registry-inference-reconcile flag.") } - kserveWithMeshEnabled, kserveWithMeshEnabledErr := utils.VerifyIfComponentIsEnabled(context.Background(), mgr.GetClient(), utils.KServeWithServiceMeshComponent) + kserveWithMeshEnabled, kserveWithMeshEnabledErr := utils.VerifyIfComponentIsEnabled(context.Background(), mgr.GetClient(), constants.KServeWithServiceMeshComponent) if kserveWithMeshEnabledErr != nil { setupLog.Error(kserveWithMeshEnabledErr, "could not determine if kserve have service mesh enabled") }