Skip to content

Commit

Permalink
Add retries (#394)
Browse files Browse the repository at this point in the history
  • Loading branch information
moshemorad authored Jan 16, 2025
1 parent 7274380 commit 08ee404
Show file tree
Hide file tree
Showing 5 changed files with 30 additions and 5 deletions.
19 changes: 17 additions & 2 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ idna = "3.7"
urllib3 = "^1.26.20"
setuptools = "^70.0.0"
zipp = "^3.19.1"
tenacity = "^9.0.0"



Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -53,3 +53,4 @@ tzlocal==5.2 ; python_version >= "3.9" and python_full_version < "3.13"
urllib3==1.26.19 ; python_version >= "3.9" and python_full_version < "3.13"
websocket-client==1.7.0 ; python_version >= "3.9" and python_full_version < "3.13"
zipp==3.19.2 ; python_version >= "3.9" and python_version < "3.13"
tenacity==9.0.0 ; python_version >= "3.9" and python_version < "3.13"
2 changes: 2 additions & 0 deletions robusta_krr/core/integrations/prometheus/metrics/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import numpy as np
import pydantic as pd
from prometrix import CustomPrometheusConnect
from tenacity import retry, stop_after_attempt, wait_random

from robusta_krr.core.abstract.metrics import BaseMetric
from robusta_krr.core.abstract.strategies import PodsTimeData
Expand Down Expand Up @@ -116,6 +117,7 @@ def _step_to_string(self, step: datetime.timedelta) -> str:
return f"{int(step.total_seconds()) // (60 * 60 * 24)}d"
return f"{int(step.total_seconds()) // 60}m"

@retry(wait=wait_random(min=2, max=10), stop=stop_after_attempt(5))
def _query_prometheus_sync(self, data: PrometheusMetricData) -> list[PrometheusSeries]:
if data.type == QueryType.QueryRange:
response = self.prometheus.safe_custom_query_range(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from kubernetes.client import ApiClient
from prometheus_api_client import PrometheusApiClientException
from prometrix import PrometheusNotFound, get_custom_prometheus_connect
from tenacity import retry, stop_after_attempt, wait_random

from robusta_krr.core.abstract.strategies import PodsTimeData
from robusta_krr.core.integrations import openshift
Expand Down Expand Up @@ -114,13 +115,15 @@ def check_connection(self):
"""
self.prometheus.check_prometheus_connection()

@retry(wait=wait_random(min=2, max=10), stop=stop_after_attempt(5))
async def query(self, query: str) -> dict:
loop = asyncio.get_running_loop()
return await loop.run_in_executor(
self.executor,
lambda: self.prometheus.safe_custom_query(query=query)["result"],
)

@retry(wait=wait_random(min=2, max=10), stop=stop_after_attempt(5))
async def query_range(self, query: str, start: datetime, end: datetime, step: timedelta) -> dict:
loop = asyncio.get_running_loop()
return await loop.run_in_executor(
Expand Down Expand Up @@ -190,9 +193,12 @@ async def gather_data(
ResourceHistoryData: The gathered resource history data.
"""
logger.debug(f"Gathering {LoaderClass.__name__} metric for {object}")

metric_loader = LoaderClass(self.prometheus, self.name(), self.executor)
data = await metric_loader.load_data(object, period, step)
try:
metric_loader = LoaderClass(self.prometheus, self.name(), self.executor)
data = await metric_loader.load_data(object, period, step)
except Exception:
logger.exception("Failed to gather resource history data for %s", object)
data = {}

if len(data) == 0:
if "CPU" in LoaderClass.__name__:
Expand Down

0 comments on commit 08ee404

Please sign in to comment.