From ea87febd2bdf0aebf603532be9448e6435f1fea9 Mon Sep 17 00:00:00 2001 From: Hyejin Yoon <0327jane@gmail.com> Date: Fri, 6 Oct 2023 14:36:32 +0900 Subject: [PATCH] fix: add retry for fetch_url (#8958) --- docs-website/download_historical_versions.py | 34 ++++++++++++++------ 1 file changed, 24 insertions(+), 10 deletions(-) diff --git a/docs-website/download_historical_versions.py b/docs-website/download_historical_versions.py index 83157edc1972c..53ee9cf1e63ef 100644 --- a/docs-website/download_historical_versions.py +++ b/docs-website/download_historical_versions.py @@ -1,6 +1,7 @@ import json import os import tarfile +import time import urllib.request repo_url = "https://api.github.com/repos/datahub-project/static-assets" @@ -16,17 +17,30 @@ def download_file(url, destination): f.write(chunk) -def fetch_urls(repo_url: str, folder_path: str, file_format: str): +def fetch_urls( + repo_url: str, folder_path: str, file_format: str, max_retries=3, retry_delay=5 +): api_url = f"{repo_url}/contents/{folder_path}" - response = urllib.request.urlopen(api_url) - data = response.read().decode("utf-8") - urls = [ - file["download_url"] - for file in json.loads(data) - if file["name"].endswith(file_format) - ] - print(urls) - return urls + for attempt in range(max_retries + 1): + try: + response = urllib.request.urlopen(api_url) + if response.status == 403 or (500 <= response.status < 600): + raise Exception(f"HTTP Error {response.status}: {response.reason}") + data = response.read().decode("utf-8") + urls = [ + file["download_url"] + for file in json.loads(data) + if file["name"].endswith(file_format) + ] + print(urls) + return urls + except Exception as e: + if attempt < max_retries: + print(f"Attempt {attempt + 1}/{max_retries}: {e}") + time.sleep(retry_delay) + else: + print(f"Max retries reached. Unable to fetch data.") + raise def extract_tar_file(destination_path):