From 11d5c78a481d1d4a48aa12511a731d8fc18831b0 Mon Sep 17 00:00:00 2001 From: LeaveMyYard Date: Wed, 4 Oct 2023 21:41:00 +0300 Subject: [PATCH 1/5] Load pods from KubeAPI in case some required metrics are missing --- robusta_krr/core/integrations/kubernetes.py | 37 +++++++++++++++++-- .../core/integrations/prometheus/loader.py | 7 ++-- .../prometheus_metrics_service.py | 7 ++-- robusta_krr/core/models/objects.py | 4 +- robusta_krr/core/runner.py | 19 ++++++++-- 5 files changed, 59 insertions(+), 15 deletions(-) diff --git a/robusta_krr/core/integrations/kubernetes.py b/robusta_krr/core/integrations/kubernetes.py index db85fb9a..79c6eea3 100644 --- a/robusta_krr/core/integrations/kubernetes.py +++ b/robusta_krr/core/integrations/kubernetes.py @@ -13,12 +13,13 @@ V1Job, V1LabelSelector, V1Pod, + V1PodList, V1StatefulSet, V2HorizontalPodAutoscaler, V2HorizontalPodAutoscalerList, ) -from robusta_krr.core.models.objects import HPAData, K8sObjectData, KindLiteral +from robusta_krr.core.models.objects import HPAData, K8sObjectData, KindLiteral, PodData from robusta_krr.core.models.result import ResourceAllocations from robusta_krr.utils.configurable import Configurable @@ -79,6 +80,20 @@ async def list_scannable_objects(self) -> AsyncGenerator[K8sObjectData, None]: continue yield object + async def list_pods(self, object: K8sObjectData) -> list[PodData]: + selector = self._build_selector_query(object.api_resource.spec.selector) + if selector is None: + return [] + + loop = asyncio.get_running_loop() + ret: V1PodList = await loop.run_in_executor( + self.executor, + lambda: self.core.list_namespaced_pod( + namespace=object.api_resource.metadata.namespace, label_selector=selector + ), + ) + return [PodData(name=pod.metadata.name, deleted=False) for pod in ret.items] + @staticmethod def _get_match_expression_filter(expression) -> str: if expression.operator.lower() == "exists": @@ -115,6 +130,7 @@ def __build_obj( container=container.name, allocations=ResourceAllocations.from_container(container), hpa=self.__hpa_list.get((namespace, kind, name)), + api_resource=item, ) def _should_list_resource(self, resource: str): @@ -305,6 +321,11 @@ async def _try_list_hpa(self) -> dict[HPAKey, HPAData]: class KubernetesLoader(Configurable): + def __init__(self, *args, **kwargs) -> None: + super().__init__(*args, **kwargs) + + self._cluster_loaders: dict[Optional[str], ClusterLoader] = {} + async def list_clusters(self) -> Optional[list[str]]: """List all clusters. @@ -363,17 +384,25 @@ async def list_scannable_objects(self, clusters: Optional[list[str]]) -> AsyncIt else: _cluster_loaders = [self._try_create_cluster_loader(cluster) for cluster in clusters] - cluster_loaders = [cl for cl in _cluster_loaders if cl is not None] - if cluster_loaders == []: + self.cluster_loaders = {cl.cluster: cl for cl in _cluster_loaders if cl is not None} + if self.cluster_loaders == {}: self.error("Could not load any cluster.") return # https://stackoverflow.com/questions/55299564/join-multiple-async-generators-in-python # This will merge all the streams from all the cluster loaders into a single stream objects_combined = aiostream.stream.merge( - *[cluster_loader.list_scannable_objects() for cluster_loader in cluster_loaders] + *[cluster_loader.list_scannable_objects() for cluster_loader in self.cluster_loaders.values()] ) async with objects_combined.stream() as streamer: async for object in streamer: yield object + + async def load_pods(self, object: K8sObjectData) -> list[PodData]: + try: + cluster_loader = self.cluster_loaders[object.cluster] + except KeyError: + raise RuntimeError(f"Cluster loader for cluster {object.cluster} not found") from None + + return await cluster_loader.list_pods(object) diff --git a/robusta_krr/core/integrations/prometheus/loader.py b/robusta_krr/core/integrations/prometheus/loader.py index 5bcec23b..ef028152 100644 --- a/robusta_krr/core/integrations/prometheus/loader.py +++ b/robusta_krr/core/integrations/prometheus/loader.py @@ -8,7 +8,7 @@ from kubernetes.client.api_client import ApiClient from prometrix import MetricsNotFound, PrometheusNotFound -from robusta_krr.core.models.objects import K8sObjectData +from robusta_krr.core.models.objects import K8sObjectData, PodData from robusta_krr.utils.configurable import Configurable from .metrics_service.prometheus_metrics_service import PrometheusMetricsService @@ -76,6 +76,9 @@ def get_metrics_service( return None + async def load_pods(self, object: K8sObjectData, period: datetime.timedelta) -> list[PodData]: + return await self.loader.load_pods(object, period) + async def gather_data( self, object: K8sObjectData, @@ -97,8 +100,6 @@ async def gather_data( ResourceHistoryData: The gathered resource history data. """ - await self.loader.load_pods(object, period) - return { MetricLoader.__name__: await self.loader.gather_data(object, MetricLoader, period, step) for MetricLoader in strategy.metrics diff --git a/robusta_krr/core/integrations/prometheus/metrics_service/prometheus_metrics_service.py b/robusta_krr/core/integrations/prometheus/metrics_service/prometheus_metrics_service.py index 2ea99dc1..8a9202c8 100644 --- a/robusta_krr/core/integrations/prometheus/metrics_service/prometheus_metrics_service.py +++ b/robusta_krr/core/integrations/prometheus/metrics_service/prometheus_metrics_service.py @@ -146,7 +146,7 @@ async def gather_data( return data - async def load_pods(self, object: K8sObjectData, period: datetime.timedelta) -> None: + async def load_pods(self, object: K8sObjectData, period: datetime.timedelta) -> list[PodData]: """ List pods related to the object and add them to the object's pods list. Args: @@ -195,8 +195,7 @@ async def load_pods(self, object: K8sObjectData, period: datetime.timedelta) -> ) if related_pods_result == []: - self.debug(f"No pods found for {object}") - return + return [] related_pods = [pod["metric"]["pod"] for pod in related_pods_result] current_pods_set = set() @@ -217,4 +216,4 @@ async def load_pods(self, object: K8sObjectData, period: datetime.timedelta) -> current_pods_set |= {pod["metric"]["pod"] for pod in pods_status_result} del pods_status_result - object.pods = list({PodData(name=pod, deleted=pod not in current_pods_set) for pod in related_pods}) + return list({PodData(name=pod, deleted=pod not in current_pods_set) for pod in related_pods}) diff --git a/robusta_krr/core/models/objects.py b/robusta_krr/core/models/objects.py index 7da48d72..2d46e374 100644 --- a/robusta_krr/core/models/objects.py +++ b/robusta_krr/core/models/objects.py @@ -1,6 +1,6 @@ from __future__ import annotations -from typing import Literal, Optional +from typing import Any, Literal, Optional import pydantic as pd @@ -38,6 +38,8 @@ class K8sObjectData(pd.BaseModel): kind: KindLiteral allocations: ResourceAllocations + api_resource: Any + def __str__(self) -> str: return f"{self.kind} {self.namespace}/{self.name}/{self.container}" diff --git a/robusta_krr/core/runner.py b/robusta_krr/core/runner.py index 9b435bd3..b3f7de05 100644 --- a/robusta_krr/core/runner.py +++ b/robusta_krr/core/runner.py @@ -30,7 +30,7 @@ def __init__(self, config: Config) -> None: self._metrics_service_loaders: dict[Optional[str], Union[PrometheusMetricsLoader, Exception]] = {} self._metrics_service_loaders_error_logged: set[Exception] = set() self._strategy = self.config.create_strategy() - + # This executor will be running calculations for recommendations self._executor = ThreadPoolExecutor(self.config.max_workers) @@ -128,6 +128,17 @@ async def _calculate_object_recommendations(self, object: K8sObjectData) -> RunR if prometheus_loader is None: return {resource: ResourceRecommendation.undefined() for resource in ResourceType} + object.pods = await prometheus_loader.load_pods(object, self._strategy.settings.history_timedelta) + if object.pods == []: + # TODO: Fallback to Kubernetes API + object.pods = await self._k8s_loader.load_pods(object) + if object.pods != []: # NOTE: Kubernetes API returned pods, but Prometheus did not + self.warning( + f"Was not able to load any pods for {object} from Prometheus.\n\t" + "This could mean that Prometheus is missing some required metrics.\n\t" + "Loaded pods from Kubernetes API instead." + ) + metrics = await prometheus_loader.gather_data( object, self._strategy, @@ -209,8 +220,10 @@ async def run(self) -> None: # eks has a lower step limit than other types of prometheus, it will throw an error step_count = self._strategy.settings.history_duration * 60 / self._strategy.settings.timeframe_duration if self.config.eks_managed_prom and step_count > 11000: - min_step = self._strategy.settings.history_duration * 60 / 10000 - self.warning(f"The timeframe duration provided is insufficient and will be overridden with {min_step}. Kindly adjust --timeframe_duration to a value equal to or greater than {min_step}.") + min_step = self._strategy.settings.history_duration * 60 / 10000 + self.warning( + f"The timeframe duration provided is insufficient and will be overridden with {min_step}. Kindly adjust --timeframe_duration to a value equal to or greater than {min_step}." + ) self._strategy.settings.timeframe_duration = min_step result = await self._collect_result() From 31758d8170f4704a7d66414248f0743adc132760 Mon Sep 17 00:00:00 2001 From: LeaveMyYard Date: Thu, 5 Oct 2023 15:47:46 +0300 Subject: [PATCH 2/5] Refactor how settings are passed and how logging works --- robusta_krr/core/integrations/kubernetes.py | 99 +++++++------ .../core/integrations/prometheus/loader.py | 33 ++--- .../integrations/prometheus/metrics/base.py | 13 +- .../metrics_service/base_metric_service.py | 11 +- .../prometheus_metrics_service.py | 42 +++--- .../prometheus/prometheus_utils.py | 22 +-- robusta_krr/core/models/config.py | 38 +++++ robusta_krr/core/models/objects.py | 4 +- robusta_krr/core/runner.py | 107 +++++++------- robusta_krr/main.py | 136 ++++++++++-------- robusta_krr/utils/configurable.py | 99 ------------- robusta_krr/utils/print.py | 21 +++ robusta_krr/utils/progress_bar.py | 12 +- robusta_krr/utils/service_discovery.py | 21 +-- tests/conftest.py | 12 ++ tests/test_krr.py | 7 +- 16 files changed, 329 insertions(+), 348 deletions(-) delete mode 100644 robusta_krr/utils/configurable.py create mode 100644 robusta_krr/utils/print.py diff --git a/robusta_krr/core/integrations/kubernetes.py b/robusta_krr/core/integrations/kubernetes.py index 79c6eea3..3baffd1f 100644 --- a/robusta_krr/core/integrations/kubernetes.py +++ b/robusta_krr/core/integrations/kubernetes.py @@ -1,4 +1,5 @@ import asyncio +import logging from concurrent.futures import ThreadPoolExecutor from typing import AsyncGenerator, AsyncIterator, Callable, Optional, Union @@ -19,25 +20,25 @@ V2HorizontalPodAutoscalerList, ) +from robusta_krr.core.models.config import settings from robusta_krr.core.models.objects import HPAData, K8sObjectData, KindLiteral, PodData from robusta_krr.core.models.result import ResourceAllocations -from robusta_krr.utils.configurable import Configurable from .rollout import RolloutAppsV1Api +logger = logging.getLogger("krr") + AnyKubernetesAPIObject = Union[V1Deployment, V1DaemonSet, V1StatefulSet, V1Pod, V1Job] HPAKey = tuple[str, str, str] -class ClusterLoader(Configurable): - def __init__(self, cluster: Optional[str], *args, **kwargs): - super().__init__(*args, **kwargs) - +class ClusterLoader: + def __init__(self, cluster: Optional[str]): self.cluster = cluster # This executor will be running requests to Kubernetes API - self.executor = ThreadPoolExecutor(self.config.max_workers) + self.executor = ThreadPoolExecutor(settings.max_workers) self.api_client = ( - config.new_client_from_config(context=cluster, config_file=self.config.kubeconfig) + config.new_client_from_config(context=cluster, config_file=settings.kubeconfig) if cluster is not None else None ) @@ -57,9 +58,9 @@ async def list_scannable_objects(self) -> AsyncGenerator[K8sObjectData, None]: A list of scannable objects. """ - self.info(f"Listing scannable objects in {self.cluster}") - self.debug(f"Namespaces: {self.config.namespaces}") - self.debug(f"Resources: {self.config.resources}") + logger.info(f"Listing scannable objects in {self.cluster}") + logger.debug(f"Namespaces: {settings.namespaces}") + logger.debug(f"Resources: {settings.resources}") self.__hpa_list = await self._try_list_hpa() @@ -76,12 +77,12 @@ async def list_scannable_objects(self) -> AsyncGenerator[K8sObjectData, None]: async with objects_combined.stream() as streamer: async for object in streamer: # NOTE: By default we will filter out kube-system namespace - if self.config.namespaces == "*" and object.namespace == "kube-system": + if settings.namespaces == "*" and object.namespace == "kube-system": continue yield object async def list_pods(self, object: K8sObjectData) -> list[PodData]: - selector = self._build_selector_query(object.api_resource.spec.selector) + selector = self._build_selector_query(object._api_resource.spec.selector) if selector is None: return [] @@ -89,7 +90,7 @@ async def list_pods(self, object: K8sObjectData) -> list[PodData]: ret: V1PodList = await loop.run_in_executor( self.executor, lambda: self.core.list_namespaced_pod( - namespace=object.api_resource.metadata.namespace, label_selector=selector + namespace=object._api_resource.metadata.namespace, label_selector=selector ), ) return [PodData(name=pod.metadata.name, deleted=False) for pod in ret.items] @@ -134,33 +135,33 @@ def __build_obj( ) def _should_list_resource(self, resource: str): - if self.config.resources == "*": + if settings.resources == "*": return True - return resource.lower() in self.config.resources + return resource.lower() in settings.resources async def _list_workflows( self, kind: KindLiteral, all_namespaces_request: Callable, namespaced_request: Callable ) -> AsyncIterator[K8sObjectData]: if not self._should_list_resource(kind): - self.debug(f"Skipping {kind}s in {self.cluster}") + logger.debug(f"Skipping {kind}s in {self.cluster}") return if kind == "Rollout" and not self.__rollouts_available: return - self.debug(f"Listing {kind}s in {self.cluster}") + logger.debug(f"Listing {kind}s in {self.cluster}") loop = asyncio.get_running_loop() try: - if self.config.namespaces == "*": + if settings.namespaces == "*": ret_multi = await loop.run_in_executor( self.executor, lambda: all_namespaces_request( watch=False, - label_selector=self.config.selector, + label_selector=settings.selector, ), ) - self.debug(f"Found {len(ret_multi.items)} {kind} in {self.cluster}") + logger.debug(f"Found {len(ret_multi.items)} {kind} in {self.cluster}") for item in ret_multi.items: for container in item.spec.template.spec.containers: yield self.__build_obj(item, container, kind) @@ -171,10 +172,10 @@ async def _list_workflows( lambda: namespaced_request( namespace=namespace, watch=False, - label_selector=self.config.selector, + label_selector=settings.selector, ), ) - for namespace in self.config.namespaces + for namespace in settings.namespaces ] total_items = 0 @@ -185,16 +186,15 @@ async def _list_workflows( for container in item.spec.template.spec.containers: yield self.__build_obj(item, container, kind) - self.debug(f"Found {total_items} {kind} in {self.cluster}") + logger.debug(f"Found {total_items} {kind} in {self.cluster}") except ApiException as e: if kind == "Rollout" and e.status in [400, 401, 403, 404]: if self.__rollouts_available: - self.debug(f"Rollout API not available in {self.cluster}") + logger.debug(f"Rollout API not available in {self.cluster}") self.__rollouts_available = False else: - self.error(f"Error {e.status} listing {kind} in cluster {self.cluster}: {e.reason}") - self.debug_exception() - self.error("Will skip this object type and continue.") + logger.exception(f"Error {e.status} listing {kind} in cluster {self.cluster}: {e.reason}") + logger.error("Will skip this object type and continue.") def _list_deployments(self) -> AsyncIterator[K8sObjectData]: return self._list_workflows( @@ -311,19 +311,16 @@ async def _try_list_hpa(self) -> dict[HPAKey, HPAData]: try: return await self.__list_hpa() except Exception as e: - self.error(f"Error trying to list hpa in cluster {self.cluster}: {e}") - self.debug_exception() - self.error( + logger.exception(f"Error trying to list hpa in cluster {self.cluster}: {e}") + logger.error( "Will assume that there are no HPA. " "Be careful as this may lead to inaccurate results if object actually has HPA." ) return {} -class KubernetesLoader(Configurable): - def __init__(self, *args, **kwargs) -> None: - super().__init__(*args, **kwargs) - +class KubernetesLoader: + def __init__(self) -> None: self._cluster_loaders: dict[Optional[str], ClusterLoader] = {} async def list_clusters(self) -> Optional[list[str]]: @@ -333,44 +330,44 @@ async def list_clusters(self) -> Optional[list[str]]: A list of clusters. """ - if self.config.inside_cluster: - self.debug("Working inside the cluster") + if settings.inside_cluster: + logger.debug("Working inside the cluster") return None try: - contexts, current_context = config.list_kube_config_contexts(self.config.kubeconfig) + contexts, current_context = config.list_kube_config_contexts(settings.kubeconfig) except config.ConfigException: - if self.config.clusters is not None and self.config.clusters != "*": - self.warning("Could not load context from kubeconfig.") - self.warning(f"Falling back to clusters from CLI: {self.config.clusters}") - return self.config.clusters + if settings.clusters is not None and settings.clusters != "*": + logger.warning("Could not load context from kubeconfig.") + logger.warning(f"Falling back to clusters from CLI: {settings.clusters}") + return settings.clusters else: - self.error( + logger.error( "Could not load context from kubeconfig. " "Please check your kubeconfig file or pass -c flag with the context name." ) return None - self.debug(f"Found {len(contexts)} clusters: {', '.join([context['name'] for context in contexts])}") - self.debug(f"Current cluster: {current_context['name']}") + logger.debug(f"Found {len(contexts)} clusters: {', '.join([context['name'] for context in contexts])}") + logger.debug(f"Current cluster: {current_context['name']}") - self.debug(f"Configured clusters: {self.config.clusters}") + logger.debug(f"Configured clusters: {settings.clusters}") # None, empty means current cluster - if not self.config.clusters: + if not settings.clusters: return [current_context["name"]] # * means all clusters - if self.config.clusters == "*": + if settings.clusters == "*": return [context["name"] for context in contexts] - return [context["name"] for context in contexts if context["name"] in self.config.clusters] + return [context["name"] for context in contexts if context["name"] in settings.clusters] def _try_create_cluster_loader(self, cluster: Optional[str]) -> Optional[ClusterLoader]: try: - return ClusterLoader(cluster=cluster, config=self.config) + return ClusterLoader(cluster=cluster) except Exception as e: - self.error(f"Could not load cluster {cluster} and will skip it: {e}") + logger.error(f"Could not load cluster {cluster} and will skip it: {e}") return None async def list_scannable_objects(self, clusters: Optional[list[str]]) -> AsyncIterator[K8sObjectData]: @@ -386,7 +383,7 @@ async def list_scannable_objects(self, clusters: Optional[list[str]]) -> AsyncIt self.cluster_loaders = {cl.cluster: cl for cl in _cluster_loaders if cl is not None} if self.cluster_loaders == {}: - self.error("Could not load any cluster.") + logger.error("Could not load any cluster.") return # https://stackoverflow.com/questions/55299564/join-multiple-async-generators-in-python diff --git a/robusta_krr/core/integrations/prometheus/loader.py b/robusta_krr/core/integrations/prometheus/loader.py index ef028152..9129bb6f 100644 --- a/robusta_krr/core/integrations/prometheus/loader.py +++ b/robusta_krr/core/integrations/prometheus/loader.py @@ -1,6 +1,7 @@ from __future__ import annotations import datetime +import logging from concurrent.futures import ThreadPoolExecutor from typing import TYPE_CHECKING, Optional @@ -8,8 +9,8 @@ from kubernetes.client.api_client import ApiClient from prometrix import MetricsNotFound, PrometheusNotFound +from robusta_krr.core.models.config import settings from robusta_krr.core.models.objects import K8sObjectData, PodData -from robusta_krr.utils.configurable import Configurable from .metrics_service.prometheus_metrics_service import PrometheusMetricsService from .metrics_service.thanos_metrics_service import ThanosMetricsService @@ -17,7 +18,8 @@ if TYPE_CHECKING: from robusta_krr.core.abstract.strategies import BaseStrategy, MetricsPodData - from robusta_krr.core.models.config import Config + +logger = logging.getLogger("krr") METRICS_SERVICES = { "Prometheus": PrometheusMetricsService, @@ -26,53 +28,44 @@ } -class PrometheusMetricsLoader(Configurable): - def __init__( - self, - config: Config, - *, - cluster: Optional[str] = None, - ) -> None: +class PrometheusMetricsLoader: + def __init__(self, *, cluster: Optional[str] = None) -> None: """ Initializes the Prometheus Loader. Args: - config (Config): The configuration object. cluster (Optional[str]): The name of the cluster. Defaults to None. """ - super().__init__(config=config) - - self.executor = ThreadPoolExecutor(self.config.max_workers) + self.executor = ThreadPoolExecutor(settings.max_workers) self.api_client = ( - k8s_config.new_client_from_config(config_file=self.config.kubeconfig, context=cluster) + k8s_config.new_client_from_config(config_file=settings.kubeconfig, context=cluster) if cluster is not None else None ) - loader = self.get_metrics_service(config, api_client=self.api_client, cluster=cluster) + loader = self.get_metrics_service(api_client=self.api_client, cluster=cluster) if loader is None: raise PrometheusNotFound("No Prometheus or metrics service found") self.loader = loader - self.info(f"{self.loader.name} connected successfully for {cluster or 'default'} cluster") + logger.info(f"{self.loader.name} connected successfully for {cluster or 'default'} cluster") def get_metrics_service( self, - config: Config, api_client: Optional[ApiClient] = None, cluster: Optional[str] = None, ) -> Optional[PrometheusMetricsService]: for service_name, metric_service_class in METRICS_SERVICES.items(): try: - loader = metric_service_class(config, api_client=api_client, cluster=cluster, executor=self.executor) + loader = metric_service_class(api_client=api_client, cluster=cluster, executor=self.executor) loader.check_connection() - self.echo(f"{service_name} found") + logger.info(f"{service_name} found") loader.validate_cluster_name() return loader except MetricsNotFound as e: - self.debug(f"{service_name} not found: {e}") + logger.debug(f"{service_name} not found: {e}") return None diff --git a/robusta_krr/core/integrations/prometheus/metrics/base.py b/robusta_krr/core/integrations/prometheus/metrics/base.py index 81751aaf..ab095f89 100644 --- a/robusta_krr/core/integrations/prometheus/metrics/base.py +++ b/robusta_krr/core/integrations/prometheus/metrics/base.py @@ -14,9 +14,8 @@ from robusta_krr.core.abstract.metrics import BaseMetric from robusta_krr.core.abstract.strategies import PodsTimeData -from robusta_krr.core.models.config import Config +from robusta_krr.core.models.config import settings from robusta_krr.core.models.objects import K8sObjectData -from robusta_krr.utils.configurable import Configurable class PrometheusSeries(TypedDict): @@ -37,7 +36,7 @@ class PrometheusMetricData(pd.BaseModel): type: QueryType -class PrometheusMetric(BaseMetric, Configurable): +class PrometheusMetric(BaseMetric): """ Base class for all metric loaders. @@ -63,12 +62,10 @@ class PrometheusMetric(BaseMetric, Configurable): def __init__( self, - config: Config, prometheus: CustomPrometheusConnect, service_name: str, executor: Optional[ThreadPoolExecutor] = None, ) -> None: - super().__init__(config) self.prometheus = prometheus self.service_name = service_name @@ -84,9 +81,9 @@ def get_prometheus_cluster_label(self) -> str: Returns: str: a promql safe label string for querying the cluster. """ - if self.config.prometheus_cluster_label is None: + if settings.prometheus_cluster_label is None: return "" - return f', {self.config.prometheus_label}="{self.config.prometheus_cluster_label}"' + return f', {settings.prometheus_label}="{settings.prometheus_cluster_label}"' @abc.abstractmethod def get_query(self, object: K8sObjectData, duration: str, step: str) -> str: @@ -237,7 +234,7 @@ def filter_prom_jobs_results( if len(relevant_kubelet_metric) == 1: return_list.append(relevant_kubelet_metric[0]) continue - sorted_relevant_series = sorted(relevant_series, key=lambda s: s["metric"].get("job"), reverse=False) + sorted_relevant_series = sorted(relevant_series, key=lambda s: s["metric"].get("job", ""), reverse=False) return_list.append(sorted_relevant_series[0]) return return_list diff --git a/robusta_krr/core/integrations/prometheus/metrics_service/base_metric_service.py b/robusta_krr/core/integrations/prometheus/metrics_service/base_metric_service.py index 8b4beef5..9adb5b53 100644 --- a/robusta_krr/core/integrations/prometheus/metrics_service/base_metric_service.py +++ b/robusta_krr/core/integrations/prometheus/metrics_service/base_metric_service.py @@ -6,22 +6,19 @@ from kubernetes.client.api_client import ApiClient from robusta_krr.core.abstract.strategies import PodsTimeData -from robusta_krr.core.models.config import Config +from robusta_krr.core.models.config import settings from robusta_krr.core.models.objects import K8sObjectData -from robusta_krr.utils.configurable import Configurable from ..metrics import PrometheusMetric -class MetricsService(Configurable, abc.ABC): +class MetricsService(abc.ABC): def __init__( self, - config: Config, api_client: Optional[ApiClient] = None, cluster: Optional[str] = None, executor: Optional[ThreadPoolExecutor] = None, ) -> None: - super().__init__(config=config) self.api_client = api_client self.cluster = cluster or "default" self.executor = executor @@ -56,6 +53,6 @@ def get_prometheus_cluster_label(self) -> str: Returns: str: a promql safe label string for querying the cluster. """ - if self.config.prometheus_cluster_label is None: + if settings.prometheus_cluster_label is None: return "" - return f', {self.config.prometheus_label}="{self.config.prometheus_cluster_label}"' + return f', {settings.prometheus_label}="{settings.prometheus_cluster_label}"' diff --git a/robusta_krr/core/integrations/prometheus/metrics_service/prometheus_metrics_service.py b/robusta_krr/core/integrations/prometheus/metrics_service/prometheus_metrics_service.py index 8a9202c8..7894e53a 100644 --- a/robusta_krr/core/integrations/prometheus/metrics_service/prometheus_metrics_service.py +++ b/robusta_krr/core/integrations/prometheus/metrics_service/prometheus_metrics_service.py @@ -1,5 +1,6 @@ import asyncio import datetime +import logging from concurrent.futures import ThreadPoolExecutor from typing import List, Optional @@ -8,7 +9,7 @@ from prometrix import PrometheusNotFound, get_custom_prometheus_connect from robusta_krr.core.abstract.strategies import PodsTimeData -from robusta_krr.core.models.config import Config +from robusta_krr.core.models.config import settings from robusta_krr.core.models.objects import K8sObjectData, PodData from robusta_krr.utils.batched import batched from robusta_krr.utils.service_discovery import MetricsServiceDiscovery @@ -17,6 +18,8 @@ from ..prometheus_utils import ClusterNotSpecifiedException, generate_prometheus_config from .base_metric_service import MetricsService +logger = logging.getLogger("krr") + class PrometheusDiscovery(MetricsServiceDiscovery): def find_metrics_url(self, *, api_client: Optional[ApiClient] = None) -> Optional[str]: @@ -50,22 +53,21 @@ class PrometheusMetricsService(MetricsService): def __init__( self, - config: Config, *, cluster: Optional[str] = None, api_client: Optional[ApiClient] = None, executor: Optional[ThreadPoolExecutor] = None, ) -> None: - super().__init__(config=config, api_client=api_client, cluster=cluster, executor=executor) + super().__init__(api_client=api_client, cluster=cluster, executor=executor) - self.info(f"Connecting to {self.name} for {self.cluster} cluster") + logger.info(f"Connecting to {self.name} for {self.cluster} cluster") - self.auth_header = self.config.prometheus_auth_header - self.ssl_enabled = self.config.prometheus_ssl_enabled + self.auth_header = settings.prometheus_auth_header + self.ssl_enabled = settings.prometheus_ssl_enabled - self.prometheus_discovery = self.service_discovery(config=self.config, api_client=self.api_client) + self.prometheus_discovery = self.service_discovery(api_client=self.api_client) - self.url = self.config.prometheus_url + self.url = settings.prometheus_url self.url = self.url or self.prometheus_discovery.find_metrics_url() if not self.url: @@ -74,15 +76,15 @@ def __init__( "\tTry using port-forwarding and/or setting the url manually (using the -p flag.)." ) - self.info(f"Using {self.name} at {self.url} for cluster {cluster or 'default'}") + logger.info(f"Using {self.name} at {self.url} for cluster {cluster or 'default'}") - headers = self.config.prometheus_other_headers + headers = settings.prometheus_other_headers if self.auth_header: headers |= {"Authorization": self.auth_header} - elif not self.config.inside_cluster and self.api_client is not None: + elif not settings.inside_cluster and self.api_client is not None: self.api_client.update_params_for_auth(headers, {}, ["BearerToken"]) - self.prom_config = generate_prometheus_config(config, url=self.url, headers=headers, metrics_service=self) + self.prom_config = generate_prometheus_config(url=self.url, headers=headers, metrics_service=self) self.prometheus = get_custom_prometheus_connect(self.prom_config) def check_connection(self): @@ -98,10 +100,10 @@ async def query(self, query: str) -> dict: return await loop.run_in_executor(self.executor, lambda: self.prometheus.custom_query(query=query)) def validate_cluster_name(self): - if not self.config.prometheus_cluster_label and not self.config.prometheus_label: + if not settings.prometheus_cluster_label and not settings.prometheus_label: return - cluster_label = self.config.prometheus_cluster_label + cluster_label = settings.prometheus_cluster_label cluster_names = self.get_cluster_names() if cluster_names is None or len(cluster_names) <= 1: @@ -119,9 +121,9 @@ def validate_cluster_name(self): def get_cluster_names(self) -> Optional[List[str]]: try: - return self.prometheus.get_label_values(label_name=self.config.prometheus_label) + return self.prometheus.get_label_values(label_name=settings.prometheus_label) except PrometheusApiClientException: - self.error("Labels api not present on prometheus client") + logger.error("Labels api not present on prometheus client") return [] async def gather_data( @@ -134,13 +136,13 @@ async def gather_data( """ ResourceHistoryData: The gathered resource history data. """ - self.debug(f"Gathering {LoaderClass.__name__} metric for {object}") + logger.debug(f"Gathering {LoaderClass.__name__} metric for {object}") - metric_loader = LoaderClass(self.config, self.prometheus, self.name, self.executor) + metric_loader = LoaderClass(self.prometheus, self.name, self.executor) data = await metric_loader.load_data(object, period, step) if len(data) == 0: - self.warning( + logger.warning( f"{metric_loader.service_name} returned no {metric_loader.__class__.__name__} metrics for {object}" ) @@ -154,7 +156,7 @@ async def load_pods(self, object: K8sObjectData, period: datetime.timedelta) -> period (datetime.timedelta): The time period for which to gather data. """ - self.debug(f"Adding historic pods for {object}") + logger.debug(f"Adding historic pods for {object}") days_literal = min(int(period.total_seconds()) // 60 // 24, 32) period_literal = f"{days_literal}d" diff --git a/robusta_krr/core/integrations/prometheus/prometheus_utils.py b/robusta_krr/core/integrations/prometheus/prometheus_utils.py index 450c60c9..e40c2805 100644 --- a/robusta_krr/core/integrations/prometheus/prometheus_utils.py +++ b/robusta_krr/core/integrations/prometheus/prometheus_utils.py @@ -5,7 +5,7 @@ import boto3 from prometrix import AWSPrometheusConfig, CoralogixPrometheusConfig, PrometheusConfig, VictoriaMetricsPrometheusConfig -from robusta_krr.core.models.config import Config +from robusta_krr.core.models.config import settings if TYPE_CHECKING: from robusta_krr.core.integrations.prometheus.metrics_service.prometheus_metrics_service import ( @@ -22,25 +22,25 @@ class ClusterNotSpecifiedException(Exception): def generate_prometheus_config( - config: Config, url: str, headers: dict[str, str], metrics_service: PrometheusMetricsService + url: str, headers: dict[str, str], metrics_service: PrometheusMetricsService ) -> PrometheusConfig: from .metrics_service.victoria_metrics_service import VictoriaMetricsService baseconfig = { "url": url, - "disable_ssl": not config.prometheus_ssl_enabled, + "disable_ssl": not settings.prometheus_ssl_enabled, "headers": headers, } # aws config - if config.eks_managed_prom: - session = boto3.Session(profile_name=config.eks_managed_prom_profile_name) + if settings.eks_managed_prom: + session = boto3.Session(profile_name=settings.eks_managed_prom_profile_name) credentials = session.get_credentials() credentials = credentials.get_frozen_credentials() - region = config.eks_managed_prom_region if config.eks_managed_prom_region else session.region_name - access_key = config.eks_access_key if config.eks_access_key else credentials.access_key - secret_key = config.eks_secret_key if config.eks_secret_key else credentials.secret_key - service_name = config.eks_service_name if config.eks_secret_key else "aps" + region = settings.eks_managed_prom_region if settings.eks_managed_prom_region else session.region_name + access_key = settings.eks_access_key if settings.eks_access_key else credentials.access_key + secret_key = settings.eks_secret_key if settings.eks_secret_key else credentials.secret_key + service_name = settings.eks_service_name if settings.eks_secret_key else "aps" if not region: raise Exception("No eks region specified") @@ -52,8 +52,8 @@ def generate_prometheus_config( **baseconfig, ) # coralogix config - if config.coralogix_token: - return CoralogixPrometheusConfig(**baseconfig, prometheus_token=config.coralogix_token) + if settings.coralogix_token: + return CoralogixPrometheusConfig(**baseconfig, prometheus_token=settings.coralogix_token) if isinstance(metrics_service, VictoriaMetricsService): return VictoriaMetricsPrometheusConfig(**baseconfig) return PrometheusConfig(**baseconfig) diff --git a/robusta_krr/core/models/config.py b/robusta_krr/core/models/config.py index d170ef52..6e7ae304 100644 --- a/robusta_krr/core/models/config.py +++ b/robusta_krr/core/models/config.py @@ -1,14 +1,21 @@ +from __future__ import annotations + +import logging +import sys from typing import Any, Literal, Optional, Union import pydantic as pd from kubernetes import config from kubernetes.config.config_exception import ConfigException from rich.console import Console +from rich.logging import RichHandler from robusta_krr.core.abstract import formatters from robusta_krr.core.abstract.strategies import AnyStrategy, BaseStrategy from robusta_krr.core.models.objects import KindLiteral +logger = logging.getLogger("krr") + class Config(pd.BaseSettings): quiet: bool = pd.Field(False) @@ -113,3 +120,34 @@ def load_kubeconfig(self) -> None: self.inside_cluster = False else: self.inside_cluster = True + + @staticmethod + def set_config(config: Config) -> None: + global _config + + _config = config + logging.basicConfig( + level="NOTSET", + format="%(message)s", + datefmt="[%X]", + handlers=[RichHandler(console=Console(file=sys.stderr if settings.log_to_stderr else sys.stdout))], + ) + logging.getLogger("").setLevel(logging.CRITICAL) + logger.setLevel(logging.DEBUG if config.verbose else logging.CRITICAL if config.quiet else logging.INFO) + + +# NOTE: This class is just a proxy for _config. +# Import settings from this module and use it like it is just a config object. +class _Settings(Config): + def __init__(self) -> None: + pass + + def __getattr__(self, name: str): + if _config is None: + raise AttributeError("Config is not set") + + return getattr(_config, name) + + +_config: Optional[Config] = None +settings = _Settings() diff --git a/robusta_krr/core/models/objects.py b/robusta_krr/core/models/objects.py index 2d46e374..c5f6a984 100644 --- a/robusta_krr/core/models/objects.py +++ b/robusta_krr/core/models/objects.py @@ -1,6 +1,6 @@ from __future__ import annotations -from typing import Any, Literal, Optional +from typing import Literal, Optional import pydantic as pd @@ -38,7 +38,7 @@ class K8sObjectData(pd.BaseModel): kind: KindLiteral allocations: ResourceAllocations - api_resource: Any + _api_resource = pd.PrivateAttr(None) def __str__(self) -> str: return f"{self.kind} {self.namespace}/{self.name}/{self.container}" diff --git a/robusta_krr/core/runner.py b/robusta_krr/core/runner.py index b3f7de05..7bf5b320 100644 --- a/robusta_krr/core/runner.py +++ b/robusta_krr/core/runner.py @@ -1,4 +1,5 @@ import asyncio +import logging import math import os import sys @@ -12,32 +13,33 @@ from robusta_krr.core.abstract.strategies import ResourceRecommendation, RunResult from robusta_krr.core.integrations.kubernetes import KubernetesLoader from robusta_krr.core.integrations.prometheus import ClusterNotSpecifiedException, PrometheusMetricsLoader -from robusta_krr.core.models.config import Config +from robusta_krr.core.models.config import settings from robusta_krr.core.models.objects import K8sObjectData from robusta_krr.core.models.result import ResourceAllocations, ResourceScan, ResourceType, Result, StrategyData -from robusta_krr.utils.configurable import Configurable from robusta_krr.utils.logo import ASCII_LOGO +from robusta_krr.utils.print import print from robusta_krr.utils.progress_bar import ProgressBar from robusta_krr.utils.version import get_version +logger = logging.getLogger("krr") -class Runner(Configurable): + +class Runner: EXPECTED_EXCEPTIONS = (KeyboardInterrupt, PrometheusNotFound) - def __init__(self, config: Config) -> None: - super().__init__(config) - self._k8s_loader = KubernetesLoader(self.config) + def __init__(self) -> None: + self._k8s_loader = KubernetesLoader() self._metrics_service_loaders: dict[Optional[str], Union[PrometheusMetricsLoader, Exception]] = {} self._metrics_service_loaders_error_logged: set[Exception] = set() - self._strategy = self.config.create_strategy() + self._strategy = settings.create_strategy() # This executor will be running calculations for recommendations - self._executor = ThreadPoolExecutor(self.config.max_workers) + self._executor = ThreadPoolExecutor(settings.max_workers) def _get_prometheus_loader(self, cluster: Optional[str]) -> Optional[PrometheusMetricsLoader]: if cluster not in self._metrics_service_loaders: try: - self._metrics_service_loaders[cluster] = PrometheusMetricsLoader(self.config, cluster=cluster) + self._metrics_service_loaders[cluster] = PrometheusMetricsLoader(cluster=cluster) except Exception as e: self._metrics_service_loaders[cluster] = e @@ -45,7 +47,7 @@ def _get_prometheus_loader(self, cluster: Optional[str]) -> Optional[PrometheusM if isinstance(result, self.EXPECTED_EXCEPTIONS): if result not in self._metrics_service_loaders_error_logged: self._metrics_service_loaders_error_logged.add(result) - self.error(str(result)) + logger.error(str(result)) return None elif isinstance(result, Exception): raise result @@ -53,42 +55,46 @@ def _get_prometheus_loader(self, cluster: Optional[str]) -> Optional[PrometheusM return result def _greet(self) -> None: - self.echo(ASCII_LOGO, no_prefix=True) - self.echo(f"Running Robusta's KRR (Kubernetes Resource Recommender) {get_version()}", no_prefix=True) - self.echo(f"Using strategy: {self._strategy}", no_prefix=True) - self.echo(f"Using formatter: {self.config.format}", no_prefix=True) - self.echo(no_prefix=True) + if settings.quiet: + return + + print(ASCII_LOGO) + print(f"Running Robusta's KRR (Kubernetes Resource Recommender) {get_version()}") + print(f"Using strategy: {self._strategy}") + print(f"Using formatter: {settings.format}") + print("") def _process_result(self, result: Result) -> None: - Formatter = self.config.Formatter + Formatter = settings.Formatter formatted = result.format(Formatter) - self.echo("\n", no_prefix=True) - self.print_result(formatted, rich=getattr(Formatter, "__rich_console__", False)) - if (self.config.file_output) or (self.config.slack_output): - if self.config.file_output: - file_name = self.config.file_output - elif self.config.slack_output: - file_name = self.config.slack_output + rich = getattr(Formatter, "__rich_console__", False) + + print(formatted, rich=rich, force=True) + if settings.file_output or settings.slack_output: + if settings.file_output: + file_name = settings.file_output + elif settings.slack_output: + file_name = settings.slack_output with open(file_name, "w") as target_file: sys.stdout = target_file - self.print_result(formatted, rich=getattr(Formatter, "__rich_console__", False)) + print(formatted, rich=rich, force=True) sys.stdout = sys.stdout - if self.config.slack_output: + if settings.slack_output: client = WebClient(os.environ["SLACK_BOT_TOKEN"]) warnings.filterwarnings("ignore", category=UserWarning) client.files_upload( - channels=f"#{self.config.slack_output}", + channels=f"#{settings.slack_output}", title="KRR Report", file=f"./{file_name}", - initial_comment=f'Kubernetes Resource Report for {(" ".join(self.config.namespaces))}', + initial_comment=f'Kubernetes Resource Report for {(" ".join(settings.namespaces))}', ) os.remove(file_name) def __get_resource_minimal(self, resource: ResourceType) -> float: if resource == ResourceType.CPU: - return 1 / 1000 * self.config.cpu_min_value + return 1 / 1000 * settings.cpu_min_value elif resource == ResourceType.Memory: - return 1024**2 * self.config.memory_min_value + return 1024**2 * settings.memory_min_value else: return 0 @@ -130,10 +136,12 @@ async def _calculate_object_recommendations(self, object: K8sObjectData) -> RunR object.pods = await prometheus_loader.load_pods(object, self._strategy.settings.history_timedelta) if object.pods == []: - # TODO: Fallback to Kubernetes API + # Fallback to Kubernetes API object.pods = await self._k8s_loader.load_pods(object) - if object.pods != []: # NOTE: Kubernetes API returned pods, but Prometheus did not - self.warning( + + # NOTE: Kubernetes API returned pods, but Prometheus did not + if object.pods != []: + logger.warning( f"Was not able to load any pods for {object} from Prometheus.\n\t" "This could mean that Prometheus is missing some required metrics.\n\t" "Loaded pods from Kubernetes API instead." @@ -146,7 +154,7 @@ async def _calculate_object_recommendations(self, object: K8sObjectData) -> RunR step=self._strategy.settings.timeframe_timedelta, ) - self.debug(f"Calculating recommendations for {object} with {len(metrics)} metrics") + logger.debug(f"Calculating recommendations for {object} with {len(metrics)} metrics") # NOTE: We run this in a threadpool as the strategy calculation might be CPU intensive # But keep in mind that numpy calcluations will not block the GIL @@ -170,16 +178,16 @@ async def _gather_object_allocations(self, k8s_object: K8sObjectData) -> Resourc async def _collect_result(self) -> Result: clusters = await self._k8s_loader.list_clusters() - if clusters and len(clusters) > 1 and self.config.prometheus_url: + if clusters and len(clusters) > 1 and settings.prometheus_url: # this can only happen for multi-cluster querying a single centeralized prometheus # In this scenario we dont yet support determining which metrics belong to which cluster so the reccomendation can be incorrect raise ClusterNotSpecifiedException( f"Cannot scan multiple clusters for this prometheus, Rerun with the flag `-c ` where is one of {clusters}" ) - self.info(f'Using clusters: {clusters if clusters is not None else "inner cluster"}') + logger.info(f'Using clusters: {clusters if clusters is not None else "inner cluster"}') - with ProgressBar(self.config, title="Calculating Recommendation") as self.__progressbar: + with ProgressBar(title="Calculating Recommendation") as self.__progressbar: scans_tasks = [ asyncio.create_task(self._gather_object_allocations(k8s_object)) async for k8s_object in self._k8s_loader.list_scannable_objects(clusters) @@ -188,10 +196,12 @@ async def _collect_result(self) -> Result: scans = await asyncio.gather(*scans_tasks) if len(scans) == 0: - self.warning("Current filters resulted in no objects available to scan.") - self.warning("Try to change the filters or check if there is anything available.") - if self.config.namespaces == "*": - self.warning("Note that you are using the '*' namespace filter, which by default excludes kube-system.") + logger.warning("Current filters resulted in no objects available to scan.") + logger.warning("Try to change the filters or check if there is anything available.") + if settings.namespaces == "*": + logger.warning( + "Note that you are using the '*' namespace filter, which by default excludes kube-system." + ) return Result( scans=[], strategy=StrategyData(name=str(self._strategy).lower(), settings=self._strategy.settings.dict()), @@ -210,25 +220,26 @@ async def run(self) -> None: self._greet() try: - self.config.load_kubeconfig() + settings.load_kubeconfig() except Exception as e: - self.error(f"Could not load kubernetes configuration: {e}") - self.error("Try to explicitly set --context and/or --kubeconfig flags.") + logger.error(f"Could not load kubernetes configuration: {e}") + logger.error("Try to explicitly set --context and/or --kubeconfig flags.") return try: # eks has a lower step limit than other types of prometheus, it will throw an error step_count = self._strategy.settings.history_duration * 60 / self._strategy.settings.timeframe_duration - if self.config.eks_managed_prom and step_count > 11000: + if settings.eks_managed_prom and step_count > 11000: min_step = self._strategy.settings.history_duration * 60 / 10000 - self.warning( - f"The timeframe duration provided is insufficient and will be overridden with {min_step}. Kindly adjust --timeframe_duration to a value equal to or greater than {min_step}." + logger.warning( + f"The timeframe duration provided is insufficient and will be overridden with {min_step}. " + f"Kindly adjust --timeframe_duration to a value equal to or greater than {min_step}." ) self._strategy.settings.timeframe_duration = min_step result = await self._collect_result() self._process_result(result) except ClusterNotSpecifiedException as e: - self.error(e) + logger.error(e) except Exception: - self.console.print_exception(extra_lines=1, max_frames=10) + logger.exception("An unexpected error occurred") diff --git a/robusta_krr/main.py b/robusta_krr/main.py index 4a114e4a..9710c47a 100644 --- a/robusta_krr/main.py +++ b/robusta_krr/main.py @@ -1,19 +1,19 @@ from __future__ import annotations import asyncio -import textwrap +import inspect +import logging from datetime import datetime -from typing import List, Literal, Optional, Union +from typing import List, Optional from uuid import UUID import typer import urllib3 from pydantic import ValidationError # noqa: F401 -from rich import print # noqa: F401 from robusta_krr import formatters as concrete_formatters # noqa: F401 from robusta_krr.core.abstract import formatters -from robusta_krr.core.abstract.strategies import AnyStrategy, BaseStrategy +from robusta_krr.core.abstract.strategies import BaseStrategy from robusta_krr.core.models.config import Config from robusta_krr.core.runner import Runner from robusta_krr.utils.version import get_version @@ -23,35 +23,36 @@ # NOTE: Disable insecure request warnings, as it might be expected to use self-signed certificates inside the cluster urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) +logger = logging.getLogger("krr") + @app.command(rich_help_panel="Utils") def version() -> None: typer.echo(get_version()) -def __process_type(_T: type) -> str: +def __process_type(_T: type) -> type: """Process type to a python literal""" if _T in (int, float, str, bool, datetime, UUID): - return _T.__name__ + return _T elif _T is Optional: - return f"Optional[{__process_type(_T.__args__[0])}]" # type: ignore + return Optional[{__process_type(_T.__args__[0])}] # type: ignore else: - return "str" # It the type is unknown, just use str and let pydantic handle it + return str # It the type is unknown, just use str and let pydantic handle it def load_commands() -> None: for strategy_name, strategy_type in BaseStrategy.get_all().items(): # type: ignore - FUNC_TEMPLATE = textwrap.dedent( - """ - @app.command(rich_help_panel="Strategies") - def {func_name}( + # NOTE: This wrapper here is needed to avoid the strategy_name being overwritten in the loop + def strategy_wrapper(_strategy_name: str = strategy_name): + def run_strategy( ctx: typer.Context, kubeconfig: Optional[str] = typer.Option( None, "--kubeconfig", "-k", help="Path to kubeconfig file. If not provided, will attempt to find it.", - rich_help_panel="Kubernetes Settings" + rich_help_panel="Kubernetes Settings", ), clusters: List[str] = typer.Option( None, @@ -59,34 +60,34 @@ def {func_name}( "--cluster", "-c", help="List of clusters to run on. By default, will run on the current cluster. Use --all-clusters to run on all clusters.", - rich_help_panel="Kubernetes Settings" + rich_help_panel="Kubernetes Settings", ), all_clusters: bool = typer.Option( False, "--all-clusters", help="Run on all clusters. Overrides --context.", - rich_help_panel="Kubernetes Settings" + rich_help_panel="Kubernetes Settings", ), namespaces: List[str] = typer.Option( None, "--namespace", "-n", help="List of namespaces to run on. By default, will run on all namespaces.", - rich_help_panel="Kubernetes Settings" + rich_help_panel="Kubernetes Settings", ), resources: List[str] = typer.Option( None, "--resource", "-r", help="List of resources to run on (Deployment, StatefullSet, DaemonSet, Job, Rollout). By default, will run on all resources. Case insensitive.", - rich_help_panel="Kubernetes Settings" + rich_help_panel="Kubernetes Settings", ), selector: Optional[str] = typer.Option( None, "--selector", "-s", help="Selector (label query) to filter on, supports '=', '==', and '!='.(e.g. -s key1=value1,key2=value2). Matching objects must satisfy all of the specified label constraints.", - rich_help_panel="Kubernetes Settings" + rich_help_panel="Kubernetes Settings", ), prometheus_url: Optional[str] = typer.Option( None, @@ -188,15 +189,34 @@ def {func_name}( help="Max workers to use for async requests.", rich_help_panel="Threading Settings", ), - format: str = typer.Option("table", "--formatter", "-f", help="Output formatter ({formatters})", rich_help_panel="Logging Settings"), - verbose: bool = typer.Option(False, "--verbose", "-v", help="Enable verbose mode", rich_help_panel="Logging Settings"), - quiet: bool = typer.Option(False, "--quiet", "-q", help="Enable quiet mode", rich_help_panel="Logging Settings"), - log_to_stderr: bool = typer.Option(False, "--logtostderr", help="Pass logs to stderr", rich_help_panel="Logging Settings"), - file_output: Optional[str] = typer.Option(None, "--fileoutput", help="Print the output to a file", rich_help_panel="Output Settings"), - slack_output: Optional[str] = typer.Option(None, "--slackoutput", help="Send to output to a slack channel, must have SLACK_BOT_TOKEN", rich_help_panel="Output Settings"), - {strategy_settings}, + format: str = typer.Option( + "table", + "--formatter", + "-f", + help=f"Output formatter ({', '.join(formatters.list_available())})", + rich_help_panel="Logging Settings", + ), + verbose: bool = typer.Option( + False, "--verbose", "-v", help="Enable verbose mode", rich_help_panel="Logging Settings" + ), + quiet: bool = typer.Option( + False, "--quiet", "-q", help="Enable quiet mode", rich_help_panel="Logging Settings" + ), + log_to_stderr: bool = typer.Option( + False, "--logtostderr", help="Pass logs to stderr", rich_help_panel="Logging Settings" + ), + file_output: Optional[str] = typer.Option( + None, "--fileoutput", help="Print the output to a file", rich_help_panel="Output Settings" + ), + slack_output: Optional[str] = typer.Option( + None, + "--slackoutput", + help="Send to output to a slack channel, must have SLACK_BOT_TOKEN", + rich_help_panel="Output Settings", + ), + **strategy_args, ) -> None: - '''Run KRR using the `{func_name}` strategy''' + f"""Run KRR using the `{_strategy_name}` strategy""" try: config = Config( @@ -227,47 +247,39 @@ def {func_name}( log_to_stderr=log_to_stderr, file_output=file_output, slack_output=slack_output, - strategy="{func_name}", - other_args={strategy_args}, + strategy=_strategy_name, + other_args=strategy_args, ) - except ValidationError as e: - print(str(e)) + Config.set_config(config) + except ValidationError: + logger.exception("Error occured while parsing arguments") else: - runner = Runner(config) + runner = Runner() asyncio.run(runner.run()) - """ - ) - exec( - FUNC_TEMPLATE.format( - func_name=strategy_name, - strategy_name=strategy_type.__name__, - strategy_settings=",\n".join( - f'{field_name}: {__process_type(field_meta.type_)} = typer.Option({field_meta.default!r}, "--{field_name}", help="{field_meta.field_info.description}", rich_help_panel="Strategy Settings")' + run_strategy.__name__ = strategy_name + signature = inspect.signature(run_strategy) + run_strategy.__signature__ = signature.replace( # type: ignore + parameters=list(signature.parameters.values())[:-1] + + [ + inspect.Parameter( + name=field_name, + kind=inspect.Parameter.KEYWORD_ONLY, + default=typer.Option( + field_meta.default, + f"--{field_name}", + help=f"{field_meta.field_info.description}", + rich_help_panel="Strategy Settings", + ), + annotation=__process_type(field_meta.type_), + ) for field_name, field_meta in strategy_type.get_settings_type().__fields__.items() - ), - strategy_args="{" - + ",\n".join( - f"'{field_name}': {field_name}" for field_name in strategy_type.get_settings_type().__fields__ - ) - + "}", - formatters=", ".join(formatters.list_available()), - ), - globals() - | {strategy.__name__: strategy for strategy in AnyStrategy.get_all().values()} # Defined strategies - | { - "Runner": Runner, - "Config": Config, - "List": List, - "Optional": Optional, - "Union": Union, - "Literal": Literal, - "asyncio": asyncio, - "typer": typer, - # Required imports, here to make the linter happy (it doesn't know that exec will use them) - }, - locals(), - ) + ] + ) + + app.command(rich_help_panel="Strategies")(run_strategy) + + strategy_wrapper() def run() -> None: diff --git a/robusta_krr/utils/configurable.py b/robusta_krr/utils/configurable.py deleted file mode 100644 index 183fc295..00000000 --- a/robusta_krr/utils/configurable.py +++ /dev/null @@ -1,99 +0,0 @@ -import abc -from typing import Literal, Union - -from rich.console import Console - -from robusta_krr.core.models.config import Config - - -class Configurable(abc.ABC): - """ - A class that can be configured with a Config object. - Opens the possibility to use custom logging methods, that can be configured with the Config object. - - Also makes a `console` attribute available, which is a rich console. - """ - - def __init__(self, config: Config) -> None: - self.config = config - self.console: Console = self.config.console # type: ignore - - @property - def debug_active(self) -> bool: - return self.config.verbose and not self.config.quiet - - @property - def echo_active(self) -> bool: - return not self.config.quiet - - @staticmethod - def __add_prefix(text: str, prefix: str, /, no_prefix: bool) -> str: - return f"{prefix} {text}" if not no_prefix else text - - def print_result(self, content: str, rich: bool = True) -> None: - """ - Prints the result in a console. The result is always put in stdout. - """ - if rich: - result_console = Console() - result_console.print(content, overflow="ignore") - else: - print(content) - - def echo( - self, message: str = "", *, no_prefix: bool = False, type: Literal["INFO", "WARNING", "ERROR"] = "INFO" - ) -> None: - """ - Echoes a message to the user. - If quiet mode is enabled, the message will not be echoed - """ - - color = {"INFO": "green", "WARNING": "yellow", "ERROR": "red"}[type] - - if self.echo_active: - self.console.print( - self.__add_prefix(message, f"[bold {color}][{type}][/bold {color}]", no_prefix=no_prefix) - ) - - def debug(self, message: str = "") -> None: - """ - Echoes a message to the user if verbose mode is enabled - """ - - if self.debug_active: - self.console.print( - self.__add_prefix( - message, - "[bold green][DEBUG][/bold green]", - no_prefix=False, - ) - ) - - def debug_exception(self) -> None: - """ - Echoes the exception traceback to the user if verbose mode is enabled - """ - - if self.debug_active: - self.console.print_exception() - - def info(self, message: str = "") -> None: - """ - Echoes an info message to the user - """ - - self.echo(message, type="INFO") - - def warning(self, message: str = "") -> None: - """ - Echoes a warning message to the user - """ - - self.echo(message, type="WARNING") - - def error(self, message: Union[str, Exception] = "") -> None: - """ - Echoes an error message to the user - """ - - self.echo(str(message), type="ERROR") diff --git a/robusta_krr/utils/print.py b/robusta_krr/utils/print.py new file mode 100644 index 00000000..1963c3e0 --- /dev/null +++ b/robusta_krr/utils/print.py @@ -0,0 +1,21 @@ +import sys + +from rich import print as r_print + +from robusta_krr.core.models.config import settings + +py_print = print + + +def print(*objects, rich: bool = True, force: bool = False) -> None: + """ + A wrapper around `rich.print` that prints only if `settings.quiet` is False. + """ + print_func = r_print if rich else py_print + output = sys.stdout if force or not settings.log_to_stderr else sys.stderr + + if not settings.quiet or force: + print_func(*objects, file=output) # type: ignore + + +__all__ = ["print"] diff --git a/robusta_krr/utils/progress_bar.py b/robusta_krr/utils/progress_bar.py index 32bf2116..57dc1135 100644 --- a/robusta_krr/utils/progress_bar.py +++ b/robusta_krr/utils/progress_bar.py @@ -1,10 +1,9 @@ from alive_progress import alive_bar -from robusta_krr.core.models.config import Config -from robusta_krr.utils.configurable import Configurable +from robusta_krr.core.models.config import settings -class ProgressBar(Configurable): +class ProgressBar: """ Progress bar for displaying progress of gathering recommendations. @@ -12,11 +11,10 @@ class ProgressBar(Configurable): Use `progress` method to step the progress bar. """ - def __init__(self, config: Config, **kwargs) -> None: - super().__init__(config) - self.show_bar = self.echo_active + def __init__(self, **kwargs) -> None: + self.show_bar = not settings.quiet and not settings.log_to_stderr if self.show_bar: - self.alive_bar = alive_bar(**kwargs) + self.alive_bar = alive_bar(**kwargs, enrich_print=False) def __enter__(self): if self.show_bar: diff --git a/robusta_krr/utils/service_discovery.py b/robusta_krr/utils/service_discovery.py index c1e27631..988a8c0c 100644 --- a/robusta_krr/utils/service_discovery.py +++ b/robusta_krr/utils/service_discovery.py @@ -1,3 +1,4 @@ +import logging from abc import ABC, abstractmethod from typing import Optional @@ -8,16 +9,16 @@ from kubernetes.client.models.v1_ingress import V1Ingress from kubernetes.client.models.v1_service import V1Service -from robusta_krr.core.models.config import Config -from robusta_krr.utils.configurable import Configurable +from robusta_krr.core.models.config import settings +logger = logging.getLogger("krr") -class ServiceDiscovery(Configurable): + +class ServiceDiscovery: SERVICE_CACHE_TTL_SEC = 900 cache: TTLCache = TTLCache(maxsize=1, ttl=SERVICE_CACHE_TTL_SEC) - def __init__(self, config: Config, api_client: Optional[ApiClient] = None) -> None: - super().__init__(config) + def __init__(self, api_client: Optional[ApiClient] = None) -> None: self.api_client = api_client def find_service_url(self, label_selector: str) -> Optional[str]: @@ -35,7 +36,7 @@ def find_service_url(self, label_selector: str) -> Optional[str]: namespace = svc.metadata.namespace port = svc.spec.ports[0].port - if self.config.inside_cluster: + if settings.inside_cluster: return f"http://{name}.{namespace}.svc.cluster.local:{port}" elif self.api_client is not None: @@ -47,7 +48,7 @@ def find_ingress_host(self, label_selector: str) -> Optional[str]: """ Discover the ingress host of the Prometheus if krr is not running in cluster """ - if self.config.inside_cluster: + if settings.inside_cluster: return None v1 = client.NetworkingV1Api(api_client=self.api_client) @@ -69,14 +70,14 @@ def find_url(self, selectors: list[str]) -> Optional[str]: return cached_value for label_selector in selectors: - self.debug(f"Trying to find service with label selector {label_selector}") + logger.debug(f"Trying to find service with label selector {label_selector}") service_url = self.find_service_url(label_selector) if service_url: - self.debug(f"Found service with label selector {label_selector}") + logger.debug(f"Found service with label selector {label_selector}") self.cache[cache_key] = service_url return service_url - self.debug(f"Trying to find ingress with label selector {label_selector}") + logger.debug(f"Trying to find ingress with label selector {label_selector}") self.find_ingress_host(label_selector) ingress_url = self.find_ingress_host(label_selector) if ingress_url: diff --git a/tests/conftest.py b/tests/conftest.py index 81800c39..8906c423 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -82,6 +82,18 @@ def mock_prometheus_loader(): yield +@pytest.fixture(autouse=True, scope="session") +def mock_prometheus_load_pods(): + with patch( + "robusta_krr.core.integrations.prometheus.loader.PrometheusMetricsLoader.load_pods", + new=AsyncMock( + return_value=TEST_OBJECT.pods, + ), + ) as mock_prometheus_loader: + mock_prometheus_loader + yield + + @pytest.fixture(autouse=True, scope="session") def mock_prometheus_init(): with patch("robusta_krr.core.integrations.prometheus.loader.PrometheusMetricsLoader.__init__", return_value=None): diff --git a/tests/test_krr.py b/tests/test_krr.py index f013ee42..2a2fb947 100644 --- a/tests/test_krr.py +++ b/tests/test_krr.py @@ -6,7 +6,7 @@ from robusta_krr.main import app, load_commands -runner = CliRunner() +runner = CliRunner(mix_stderr=False) load_commands() STRATEGY_NAME = "simple" @@ -30,8 +30,9 @@ def test_run(log_flag: str): @pytest.mark.parametrize("format", ["json", "yaml", "table", "pprint"]) -def test_output_formats(format: str): - result = runner.invoke(app, [STRATEGY_NAME, "-q", "-f", format]) +@pytest.mark.parametrize("output", ["--logtostderr", "-q"]) +def test_output_formats(format: str, output: str): + result = runner.invoke(app, [STRATEGY_NAME, output, "-f", format]) try: assert result.exit_code == 0, result.exc_info except AssertionError as e: From 248be94aaa84c7680622b3ecb57a6f934d9165ff Mon Sep 17 00:00:00 2001 From: LeaveMyYard Date: Thu, 5 Oct 2023 16:21:56 +0300 Subject: [PATCH 3/5] Add warnings to json result (for UI to use) --- robusta_krr/core/integrations/prometheus/loader.py | 6 +++++- .../metrics_service/prometheus_metrics_service.py | 5 +++++ robusta_krr/core/models/objects.py | 11 +++++++++++ robusta_krr/core/runner.py | 5 +++-- robusta_krr/utils/progress_bar.py | 5 +++-- 5 files changed, 27 insertions(+), 5 deletions(-) diff --git a/robusta_krr/core/integrations/prometheus/loader.py b/robusta_krr/core/integrations/prometheus/loader.py index 9129bb6f..c0995360 100644 --- a/robusta_krr/core/integrations/prometheus/loader.py +++ b/robusta_krr/core/integrations/prometheus/loader.py @@ -70,7 +70,11 @@ def get_metrics_service( return None async def load_pods(self, object: K8sObjectData, period: datetime.timedelta) -> list[PodData]: - return await self.loader.load_pods(object, period) + try: + return await self.loader.load_pods(object, period) + except Exception as e: + logger.exception(f"Failed to load pods for {object}: {e}") + return [] async def gather_data( self, diff --git a/robusta_krr/core/integrations/prometheus/metrics_service/prometheus_metrics_service.py b/robusta_krr/core/integrations/prometheus/metrics_service/prometheus_metrics_service.py index 7894e53a..091041e4 100644 --- a/robusta_krr/core/integrations/prometheus/metrics_service/prometheus_metrics_service.py +++ b/robusta_krr/core/integrations/prometheus/metrics_service/prometheus_metrics_service.py @@ -142,6 +142,11 @@ async def gather_data( data = await metric_loader.load_data(object, period, step) if len(data) == 0: + if "CPU" in LoaderClass.__name__: + object.add_warning("NoPrometheusCPUMetrics") + elif "Memory" in LoaderClass.__name__: + object.add_warning("NoPrometheusMemoryMetrics") + logger.warning( f"{metric_loader.service_name} returned no {metric_loader.__class__.__name__} metrics for {object}" ) diff --git a/robusta_krr/core/models/objects.py b/robusta_krr/core/models/objects.py index c5f6a984..62149c39 100644 --- a/robusta_krr/core/models/objects.py +++ b/robusta_krr/core/models/objects.py @@ -27,6 +27,13 @@ class HPAData(pd.BaseModel): target_memory_utilization_percentage: Optional[float] +PodWarning = Literal[ + "NoPrometheusPods", + "NoPrometheusCPUMetrics", + "NoPrometheusMemoryMetrics", +] + + class K8sObjectData(pd.BaseModel): # NOTE: Here None means that we are running inside the cluster cluster: Optional[str] @@ -37,6 +44,7 @@ class K8sObjectData(pd.BaseModel): namespace: str kind: KindLiteral allocations: ResourceAllocations + warnings: set[PodWarning] = set() _api_resource = pd.PrivateAttr(None) @@ -46,6 +54,9 @@ def __str__(self) -> str: def __hash__(self) -> int: return hash(str(self)) + def add_warning(self, warning: PodWarning) -> None: + self.warnings.add(warning) + @property def current_pods_count(self) -> int: return len([pod for pod in self.pods if not pod.deleted]) diff --git a/robusta_krr/core/runner.py b/robusta_krr/core/runner.py index 7bf5b320..f50f55f1 100644 --- a/robusta_krr/core/runner.py +++ b/robusta_krr/core/runner.py @@ -141,6 +141,7 @@ async def _calculate_object_recommendations(self, object: K8sObjectData) -> RunR # NOTE: Kubernetes API returned pods, but Prometheus did not if object.pods != []: + object.add_warning("NoPrometheusPods") logger.warning( f"Was not able to load any pods for {object} from Prometheus.\n\t" "This could mean that Prometheus is missing some required metrics.\n\t" @@ -154,12 +155,12 @@ async def _calculate_object_recommendations(self, object: K8sObjectData) -> RunR step=self._strategy.settings.timeframe_timedelta, ) - logger.debug(f"Calculating recommendations for {object} with {len(metrics)} metrics") - # NOTE: We run this in a threadpool as the strategy calculation might be CPU intensive # But keep in mind that numpy calcluations will not block the GIL loop = asyncio.get_running_loop() result = await loop.run_in_executor(self._executor, self._strategy.run, metrics, object) + + logger.info(f"Calculated recommendations for {object} (using {len(metrics)} metrics)") return self._format_result(result) async def _gather_object_allocations(self, k8s_object: K8sObjectData) -> ResourceScan: diff --git a/robusta_krr/utils/progress_bar.py b/robusta_krr/utils/progress_bar.py index 57dc1135..b796b6a1 100644 --- a/robusta_krr/utils/progress_bar.py +++ b/robusta_krr/utils/progress_bar.py @@ -1,6 +1,6 @@ from alive_progress import alive_bar -from robusta_krr.core.models.config import settings +# from robusta_krr.core.models.config import settings class ProgressBar: @@ -12,7 +12,8 @@ class ProgressBar: """ def __init__(self, **kwargs) -> None: - self.show_bar = not settings.quiet and not settings.log_to_stderr + # self.show_bar = not settings.quiet and not settings.log_to_stderr + self.show_bar = False # FIXME: Progress bar is not working good with other logs if self.show_bar: self.alive_bar = alive_bar(**kwargs, enrich_print=False) From ac1eadac45e7372d77f905c698f7e64808c4da8b Mon Sep 17 00:00:00 2001 From: LeaveMyYard Date: Mon, 9 Oct 2023 10:55:22 +0300 Subject: [PATCH 4/5] Add readme requirements for used metrics --- README.md | 31 +++++++++++++++++++++++-------- robusta_krr/core/models/config.py | 2 +- robusta_krr/core/runner.py | 3 ++- 3 files changed, 26 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index c06e3546..b7c3ecf2 100644 --- a/README.md +++ b/README.md @@ -87,6 +87,20 @@ Read more about [how KRR works](#how-it-works) and [KRR vs Kubernetes VPA](#diff ## Installation +### Requirements + +KRR requires you to have Prometheus, running either on your cluster or centralized. + +Additionally to that, [kube-state-metrics](https://github.com/kubernetes/kube-state-metrics) needs to be running on your cluster, as KRR is dependant on next metrics: + +- `container_cpu_usage_seconds_total` +- `container_memory_working_set_bytes` +- `kube_replicaset_owner` +- `kube_pod_owner` +- `kube_pod_status_phase` + +_Note: If one of last three metrics is absent KRR will still work, but will result in potentially worse results and raise a warning_ + ### With brew (MacOS/Linux): 1. Add our tap: @@ -193,9 +207,10 @@ krr simple -v ``` Other helpful flags: -* `--cpu-min` Sets the minimum recommended cpu value in millicores -* `--mem-min` Sets the minimum recommended memory value in MB -* `--history_duration` The duration of the prometheus history data to use (in hours) + +- `--cpu-min` Sets the minimum recommended cpu value in millicores +- `--mem-min` Sets the minimum recommended memory value in MB +- `--history_duration` The duration of the prometheus history data to use (in hours) More specific information on Strategy Settings can be found using @@ -209,15 +224,14 @@ krr simple --help With the [free Robusta SaaS platform](https://home.robusta.dev/) you can: -* See why KRR recommends what it does -* Sort and filter recommendations by namespace, priority, and more -* Copy a YAML snippet to fix the problems KRR finds +- See why KRR recommends what it does +- Sort and filter recommendations by namespace, priority, and more +- Copy a YAML snippet to fix the problems KRR finds ![Robusta UI Screen Shot][ui-screenshot]

(back to top)

- ## How it works ### Metrics Gathering @@ -431,9 +445,10 @@ python krr.py simple -p "https://prom-api.coralogix..." --coralogix_token ## Grafana Cloud managed Prometheus -For Grafana Cloud managed Prometheus you need to specify prometheus link, prometheus user, and an access token of your Grafana Cloud stack. The Prometheus link and user for the stack can be found on the Grafana Cloud Portal. An access token with a `metrics:read` scope can also be created using Access Policies on the same portal. +For Grafana Cloud managed Prometheus you need to specify prometheus link, prometheus user, and an access token of your Grafana Cloud stack. The Prometheus link and user for the stack can be found on the Grafana Cloud Portal. An access token with a `metrics:read` scope can also be created using Access Policies on the same portal. Next, run the following command, after setting the values of PROM_URL, PROM_USER, and PROM_TOKEN variables with your Grafana Cloud stack's prometheus link, prometheus user, and access token. + ```sh python krr.py simple -p $PROM_URL --prometheus-auth-header "Bearer ${PROM_USER}:${PROM_TOKEN}" --prometheus-ssl-enabled ``` diff --git a/robusta_krr/core/models/config.py b/robusta_krr/core/models/config.py index 6e7ae304..3d6aabc0 100644 --- a/robusta_krr/core/models/config.py +++ b/robusta_krr/core/models/config.py @@ -138,7 +138,7 @@ def set_config(config: Config) -> None: # NOTE: This class is just a proxy for _config. # Import settings from this module and use it like it is just a config object. -class _Settings(Config): +class _Settings(Config): # Config here is used for type checking def __init__(self) -> None: pass diff --git a/robusta_krr/core/runner.py b/robusta_krr/core/runner.py index f50f55f1..3c496346 100644 --- a/robusta_krr/core/runner.py +++ b/robusta_krr/core/runner.py @@ -145,7 +145,8 @@ async def _calculate_object_recommendations(self, object: K8sObjectData) -> RunR logger.warning( f"Was not able to load any pods for {object} from Prometheus.\n\t" "This could mean that Prometheus is missing some required metrics.\n\t" - "Loaded pods from Kubernetes API instead." + "Loaded pods from Kubernetes API instead.\n\t" + "See more info at https://github.com/robusta-dev/krr#requirements " ) metrics = await prometheus_loader.gather_data( From 9a6a48369148f086e57314b1a1f6248926a070d4 Mon Sep 17 00:00:00 2001 From: LeaveMyYard Date: Fri, 20 Oct 2023 14:08:15 +0300 Subject: [PATCH 5/5] Update readme --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index b7c3ecf2..c533cbc1 100644 --- a/README.md +++ b/README.md @@ -89,9 +89,9 @@ Read more about [how KRR works](#how-it-works) and [KRR vs Kubernetes VPA](#diff ### Requirements -KRR requires you to have Prometheus, running either on your cluster or centralized. +KRR requires you to have Prometheus. -Additionally to that, [kube-state-metrics](https://github.com/kubernetes/kube-state-metrics) needs to be running on your cluster, as KRR is dependant on next metrics: +Additionally to that, [kube-state-metrics](https://github.com/kubernetes/kube-state-metrics) needs to be running on your cluster, as KRR is dependant on those metrics: - `container_cpu_usage_seconds_total` - `container_memory_working_set_bytes` @@ -99,7 +99,7 @@ Additionally to that, [kube-state-metrics](https://github.com/kubernetes/kube-st - `kube_pod_owner` - `kube_pod_status_phase` -_Note: If one of last three metrics is absent KRR will still work, but will result in potentially worse results and raise a warning_ +_Note: If one of last three metrics is absent KRR will still work, but it will only consider currently-running pods when calculating recommendations. Historic pods that no longer exist in the cluster will not be taken into consideration._ ### With brew (MacOS/Linux):