Skip to content

Commit

Permalink
Merge pull request #647 from gulfofmaine/per-server-timeouts
Browse files Browse the repository at this point in the history
Request config per server
  • Loading branch information
abkfenris authored Dec 20, 2022
2 parents f914268 + cc9c541 commit 9f2a52f
Show file tree
Hide file tree
Showing 6 changed files with 103 additions and 27 deletions.
11 changes: 11 additions & 0 deletions Changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,17 @@ Changes:

Fixes:

- __Migration__ Enable per server timeouts and timing between requests. Replaces `RETRIEVE_DATAFRAME_TIMEOUT_SECONDS`.
- Fix invalid logging kwarg.
- Upgraded data retrieval logging to error to send to Sentry.
- Raise parse error when `-` is not used to split server and dataset.

## 0.4.8-2 - 12/19/2022

Fixes

- Gitops repo & CI working directory name mismatch

## 0.4.8 - 12/19/2022

Changes:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# Generated by Django 4.1.3 on 2022-12-20 00:58

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
("deployments", "0039_erddapserver_proxy_cors"),
]

operations = [
migrations.AddField(
model_name="erddapserver",
name="request_refresh_time_seconds",
field=models.FloatField(
default=0,
help_text=(
"Minimum number of seconds to attempt to delay between requests. "
"If dataset refreshes are triggered independently (e.g. via ERDDAP subscriptions) "
"they might ignore this."
),
verbose_name="Refresh request time in seconds",
),
),
migrations.AddField(
model_name="erddapserver",
name="request_timeout_seconds",
field=models.PositiveIntegerField(
default=60,
help_text="Seconds before requests time out.",
verbose_name="Request timeout in seconds",
),
),
]
14 changes: 14 additions & 0 deletions app/deployments/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,20 @@ class ErddapServer(models.Model):
"if the remote server does not support CORS"
),
)
request_refresh_time_seconds = models.FloatField(
"Refresh request time in seconds",
default=0,
help_text=(
"Minimum number of seconds to attempt to delay between requests. "
"If dataset refreshes are triggered independently "
"(e.g. via ERDDAP subscriptions) they might ignore this."
),
)
request_timeout_seconds = models.PositiveIntegerField(
"Request timeout in seconds",
default=60,
help_text=("Seconds before requests time out."),
)

def __str__(self):
if self.name:
Expand Down
34 changes: 19 additions & 15 deletions app/deployments/tasks.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import logging
import time
from datetime import timedelta

import requests
Expand Down Expand Up @@ -39,7 +40,7 @@ def update_values_for_timeseries(timeseries):
return

except Timeout as error:
logger.warning(
logger.error(
(
f"Timeout when trying to retrieve dataset {timeseries[0].dataset.name} "
f"with constraint {timeseries[0].constraints}: {error}"
Expand Down Expand Up @@ -76,7 +77,7 @@ def update_values_for_timeseries(timeseries):
f"Unable to find position in dataframe for {series.platform.name} - "
f"{series.variable}"
)
logger.warning(message)
logger.error(message)
return

try:
Expand Down Expand Up @@ -112,12 +113,15 @@ def refresh_dataset(dataset_id: int, healthcheck: bool = False):
dataset.refresh_attempted = timezone.now()
dataset.save()

request_refresh_time_seconds = dataset.server.request_refresh_time_seconds

if healthcheck:
dataset.healthcheck_start()

groups = dataset.group_timeseries_by_constraint()

for constraints, timeseries in groups.items():
time.sleep(request_refresh_time_seconds)
update_values_for_timeseries(timeseries)

if healthcheck:
Expand Down Expand Up @@ -185,7 +189,7 @@ def handle_500_time_range_error(timeseries_group, compare_text: str) -> bool:
try:
times_str = compare_text.rpartition("actual_range:")[-1].rpartition(")")[0]
except (AttributeError, IndexError) as e:
logger.warning(
logger.error(
(
f"Unable to access and attribute or index of {timeseries_group[0].dataset.name} "
f"with constraint {timeseries_group[0].constraints}: {e}"
Expand All @@ -207,7 +211,7 @@ def handle_500_time_range_error(timeseries_group, compare_text: str) -> bool:
try:
end_time = times[0]
except IndexError:
logger.warning(
logger.error(
(
"Unable to parse datetimes in error processing dataset "
f"{timeseries_group[0].dataset.name} with constraint "
Expand All @@ -225,7 +229,7 @@ def handle_500_time_range_error(timeseries_group, compare_text: str) -> bool:
ts.end_time = end_time
ts.save()

logger.warning(
logger.error(
f"Set end time for {ts} to {end_time} based on responses",
extra=error_extra(timeseries_group, compare_text),
exc_info=True,
Expand Down Expand Up @@ -257,7 +261,7 @@ def handle_500_unrecognized_constraint(timeseries_group, compare_text: str) -> b
returns True if handled
"""
if "Unrecognized constraint variable=" in compare_text:
logger.warning(
logger.error(
(
f"Invalid constraint variable for dataset {timeseries_group[0].dataset.name} "
f"with constraints {timeseries_group[0].constraints}"
Expand Down Expand Up @@ -303,7 +307,7 @@ def handle_400_errors(timeseries_group, compare_text: str) -> bool:
def handle_429_too_many_requests(timeseries_group, compare_text: str) -> bool:
"""Too many requests too quickly to the server"""
if "Too Many Requests" in compare_text and "code=429" in compare_text:
logger.warning(
logger.error(
f"Too many requests to server {timeseries_group[0].dataset.server}",
extra=error_extra(timeseries_group, compare_text),
)
Expand All @@ -315,7 +319,7 @@ def handle_429_too_many_requests(timeseries_group, compare_text: str) -> bool:
def handle_400_unrecognized_variable(timeseries_group, compare_text: str) -> bool:
"""When there is an unrecognized variable requested"""
if "Unrecognized variable=" in compare_text:
logger.warning(
logger.error(
f"Unrecognized variable for dataset {timeseries_group[0].dataset.name}",
extra=error_extra(timeseries_group, compare_text),
)
Expand Down Expand Up @@ -343,9 +347,9 @@ def handle_404_errors(timeseries_group, compare_text: str) -> bool:
def handle_404_dataset_file_not_found(timeseries_group, compare_text: str) -> bool:

if "java.io.FileNotFoundException" in compare_text and "code=404" in compare_text:
logger.warning(
logger.error(
f"{timeseries_group[0].dataset.name} does not exist on the server",
error=error_extra(timeseries_group, compare_text),
extra=error_extra(timeseries_group, compare_text),
)
return True

Expand All @@ -355,9 +359,9 @@ def handle_404_dataset_file_not_found(timeseries_group, compare_text: str) -> bo
def handle_404_no_matching_time(timeseries_group, compare_text: str) -> bool:
"""Handle when the station does not have time for the current request"""
if "No data matches time" in compare_text and "code=404" in compare_text:
logger.warning(
logger.error(
f"{timeseries_group[0].dataset.name} does not currently have a valid time",
error=error_extra(timeseries_group, compare_text),
extra=error_extra(timeseries_group, compare_text),
)
return True

Expand All @@ -370,7 +374,7 @@ def handle_404_no_matching_station(timeseries_group, compare_text: str) -> bool:
"Your query produced no matching results" in compare_text
and "There are no matching stations" in compare_text
):
logger.warning(
logger.error(
(
f"{timeseries_group[0].dataset.name} does not have a requested station. "
"Please check the constraints"
Expand All @@ -388,7 +392,7 @@ def handle_404_no_matching_dataset_id(timeseries_group, compare_text: str) -> bo
"Resource not found" in compare_text
and "Currently unknown datasetID" in compare_text
):
logger.warning(
logger.error(
(
f"{timeseries_group[0].dataset.name} is currently unknown by the server. "
"Please investigate if the dataset has moved"
Expand Down Expand Up @@ -418,7 +422,7 @@ def handle_http_errors(timeseries_group, error: HTTPError) -> bool:
return True

if error.response.status_code == 404:
logger.warning(
logger.error(
(
f"No rows found for {timeseries_group[0].dataset.name} "
f"with constraint {timeseries_group[0].constraints}: {error}"
Expand Down
14 changes: 9 additions & 5 deletions app/deployments/utils/erddap_datasets.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
import os
from datetime import datetime, timedelta
from logging import getLogger

from erddapy import ERDDAP
from pandas import DataFrame

from ..models import ErddapServer

logger = getLogger(__name__)


Expand Down Expand Up @@ -42,7 +43,12 @@ def setup_variables(
return server


def retrieve_dataframe(server, dataset: str, constraints, timeseries) -> DataFrame:
def retrieve_dataframe(
server: ErddapServer,
dataset: str,
constraints,
timeseries,
) -> DataFrame:
"""Returns a dataframe from ERDDAP for a given dataset
Attempts to sort the dataframe by time"""
Expand All @@ -53,9 +59,7 @@ def retrieve_dataframe(server, dataset: str, constraints, timeseries) -> DataFra
constraints=constraints,
)

e.requests_kwargs["timeout"] = float(
os.environ.get("RETRIEVE_DATAFRAME_TIMEOUT_SECONDS", 60),
)
e.requests_kwargs["timeout"] = server.request_timeout_seconds

df = e.to_pandas(parse_dates=True)

Expand Down
22 changes: 15 additions & 7 deletions app/deployments/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from django.views.decorators.cache import cache_page
from rest_framework import viewsets
from rest_framework.decorators import action
from rest_framework.exceptions import APIException
from rest_framework.exceptions import APIException, ParseError
from rest_framework.response import Response

from .models import ErddapDataset, ErddapServer, Platform
Expand Down Expand Up @@ -49,20 +49,28 @@ class DatasetViewSet(viewsets.ReadOnlyModelViewSet):
queryset = ErddapDataset.objects.select_related("server")
serializer_class = ErddapDatasetSerializer

def retrieve(self, request, *args, **kwargs): # pylint: disable=unused-argument
def dataset(self, **kwargs) -> ErddapDataset:
"""Retrieve the dataset from the request kwargs"""
pk = kwargs["pk"]

server, dataset = pk.split("-", maxsplit=1)
try:
server, dataset = pk.split("-", maxsplit=1)
except ValueError:
raise ParseError(
detail="Invalid server-dataset key. Server and dataset must be split by `-`.",
)

dataset = get_object_or_404(self.queryset, name=dataset, server__name=server)
return dataset

def retrieve(self, request, *args, **kwargs): # pylint: disable=unused-argument
dataset = self.dataset(**kwargs)
serializer = self.serializer_class(dataset, context={"request": request})
return Response(serializer.data)

@action(detail=True)
def refresh(self, request, **kwargs):
pk = kwargs["pk"]

server, dataset = pk.split("-", maxsplit=1)
dataset = get_object_or_404(self.queryset, name=dataset, server__name=server)
dataset = self.dataset(**kwargs)

refresh_dataset.delay(dataset.id, healthcheck=True)

Expand Down

0 comments on commit 9f2a52f

Please sign in to comment.