Skip to content

Commit

Permalink
changed library for downloads
Browse files Browse the repository at this point in the history
  • Loading branch information
andremann committed Nov 6, 2024
1 parent f6587fd commit 4afc37d
Showing 1 changed file with 13 additions and 6 deletions.
19 changes: 13 additions & 6 deletions notebooks/beginners_kit.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
"outputs": [],
"source": [
"import os\n",
"from urllib.request import urlretrieve\n",
"import requests\n",
"from urllib.parse import urlsplit\n",
"import tarfile\n",
"from pathlib import Path\n",
Expand All @@ -47,15 +47,23 @@
"\n",
"\n",
"\n",
"def download_tar(url, path):\n",
"def download_and_extract(url, path):\n",
" tar_name = urlsplit(url).path.split('/')[-1] # publication.tar\n",
" tar_path = os.path.join(path, tar_name) # data/raw/publication.tar\n",
" untarred_folder = tar_name.split('.')[0] # publication\n",
" untarred_path = os.path.join(path, untarred_folder) # data/raw/publication\n",
" if not os.path.exists(untarred_path):\n",
" if not os.path.exists(tar_path):\n",
" print(f\"downloading ${url}\")\n",
" urlretrieve(url, tar_path)\n",
" # urlretrieve(url, tar_path)\n",
" try:\n",
" with requests.get(url, stream=True) as response:\n",
" response.raise_for_status()\n",
" with open(tar_path, 'wb') as f:\n",
" for chunk in response.iter_content(chunk_size=8192):\n",
" f.write(chunk)\n",
" except requests.exceptions.RequestException as e:\n",
" print(\"Error downloading the file:\", e)\n",
"\n",
" print(f\"untar ${tar_name}\")\n",
" with tarfile.open(tar_path, \"r\") as tar:\n",
Expand All @@ -65,10 +73,9 @@
" os.remove(tar_path)\n",
"\n",
"\n",
"\"\"\" Downloads data into /data/raw\n",
"\"\"\"\n",
"# Download data into /data/raw\n",
"for tar in openaire_files:\n",
" download_tar(tar, \"/app/openaire/data/raw\")"
" download_and_extract(tar, \"/app/openaire/data/raw\")"
]
},
{
Expand Down

0 comments on commit 4afc37d

Please sign in to comment.