Skip to content

Commit

Permalink
fix: add retry for fetch_url (#8958)
Browse files Browse the repository at this point in the history
  • Loading branch information
yoonhyejin authored Oct 6, 2023
1 parent 26bc039 commit ea87feb
Showing 1 changed file with 24 additions and 10 deletions.
34 changes: 24 additions & 10 deletions docs-website/download_historical_versions.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import json
import os
import tarfile
import time
import urllib.request

repo_url = "https://api.github.com/repos/datahub-project/static-assets"
Expand All @@ -16,17 +17,30 @@ def download_file(url, destination):
f.write(chunk)


def fetch_urls(repo_url: str, folder_path: str, file_format: str):
def fetch_urls(
repo_url: str, folder_path: str, file_format: str, max_retries=3, retry_delay=5
):
api_url = f"{repo_url}/contents/{folder_path}"
response = urllib.request.urlopen(api_url)
data = response.read().decode("utf-8")
urls = [
file["download_url"]
for file in json.loads(data)
if file["name"].endswith(file_format)
]
print(urls)
return urls
for attempt in range(max_retries + 1):
try:
response = urllib.request.urlopen(api_url)
if response.status == 403 or (500 <= response.status < 600):
raise Exception(f"HTTP Error {response.status}: {response.reason}")
data = response.read().decode("utf-8")
urls = [
file["download_url"]
for file in json.loads(data)
if file["name"].endswith(file_format)
]
print(urls)
return urls
except Exception as e:
if attempt < max_retries:
print(f"Attempt {attempt + 1}/{max_retries}: {e}")
time.sleep(retry_delay)
else:
print(f"Max retries reached. Unable to fetch data.")
raise


def extract_tar_file(destination_path):
Expand Down

0 comments on commit ea87feb

Please sign in to comment.