Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

232 export files improvements #314

Merged
merged 6 commits into from
Nov 10, 2023
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions data_registry/templates/includes/files.html
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,13 @@
<ul>
{% if files.full %}
<li>
<a href="{% url 'download-export' %}?spider={{ collection.source_id }}&job_id={{ job.id }}&full=true&suffix={{ suffix }}" rel="nofollow" download>{% translate "All time" %}</a>
<a href="{% url 'download' collection.id %}?name=full.{{ suffix }}" rel="nofollow" download>{% translate "All time" %}</a>
<span class="text-muted small">{{ files.full|humanfilesize }}</span>
</li>
{% endif %}
{% for file in files.by_year|dictsortreversed:"year" %}
<li>
<a href="{% url 'download-export' %}?spider={{ collection.source_id }}&job_id={{ job.id }}&year={{ file.year }}&suffix={{ suffix }}" rel="nofollow" download>{{ file.year }}</a>
<a href="{% url 'download' collection.id %}?name={{ file.year }}.{{ suffix }}" rel="nofollow" download>{{ file.year }}</a>
<span class="text-muted small">{{ file.size|humanfilesize }}</span>
</li>
{% empty %}
Expand Down
2 changes: 1 addition & 1 deletion exporter/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@
from exporter import views

urlpatterns = [
path("api/download_export", views.download_export, name="download-export"),
path("publication/<int:id>/download", views.download_export, name="download"),
jpmckinney marked this conversation as resolved.
Show resolved Hide resolved
]
22 changes: 12 additions & 10 deletions exporter/views.py
Original file line number Diff line number Diff line change
@@ -1,33 +1,35 @@
from django.http import FileResponse
from django.http.response import HttpResponseBadRequest, HttpResponseNotFound
from django.shortcuts import get_object_or_404

from data_registry.util import collection_queryset
from exporter.util import Export, TaskStatus


def download_export(request):
def download_export(request, id):
"""
Returns an exported file as a FileResponse object.
"""
job_id = int(request.GET.get("job_id") or 0) # guard against path traversal
full = request.GET.get("full")
year = int(request.GET.get("year") or 0) # guard against path traversal
suffix = request.GET.get("suffix")
spider = request.GET.get("spider")
name = request.GET.get("name")

# Guard against path traversal.
if suffix not in ("jsonl.gz", "csv.tar.gz", "xlsx"):
if not name.endswith(("jsonl.gz", "csv.tar.gz", "xlsx")):
return HttpResponseBadRequest("Suffix not recognized")
jpmckinney marked this conversation as resolved.
Show resolved Hide resolved

stem = "full" if full else year
export = Export(job_id, basename=f"{stem}.{suffix}")
collection = get_object_or_404(collection_queryset(request), id=id)

active_job = collection.job.filter(active=True).first()
if not active_job:
return HttpResponseNotFound("No active job was found for this collection")
yolile marked this conversation as resolved.
Show resolved Hide resolved

export = Export(active_job.id, basename=f"{name}")
yolile marked this conversation as resolved.
Show resolved Hide resolved
if export.status != TaskStatus.COMPLETED:
return HttpResponseNotFound("File not found")

return FileResponse(
export.path.open("rb"),
as_attachment=True,
filename=f"{spider}_{stem}.{suffix}",
filename=f"{collection.source_id}_{name}",
# Set Content-Encoding to skip GZipMiddleware. (ContentEncodingMiddleware removes the empty header.)
# https://docs.djangoproject.com/en/4.2/ref/middleware/#module-django.middleware.gzip
headers={"Content-Encoding": ""},
Expand Down
2 changes: 1 addition & 1 deletion tests/data_registry/test_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def setUpTestData(cls):
@patch("exporter.util.Export.get_files")
def test_detail(self, get_files):
get_files.return_value = {"jsonl": {"by_year": [{"year": 2022, "size": 1}]}}
url = f"/api/download_export?spider=paraguay_dncp_records&job_id={self.job.id}&year=2022&suffix=jsonl.gz"
url = f"/publication/{self.collection.id}/download?name=2022.jsonl.gz"

with self.assertNumQueries(2):
response = Client().get(f"/en/publication/{self.collection.id}")
Expand Down
44 changes: 27 additions & 17 deletions tests/exporter/test_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,36 +3,47 @@

from django.test import Client, TestCase, override_settings

from data_registry.models import Collection


@override_settings(EXPORTER_DIR=os.path.join("tests", "fixtures"))
class ViewsTests(TestCase):
@classmethod
def setUp(cls):
cls.collection = Collection.objects.create(
id=2,
title="Dirección Nacional de Contrataciones Públicas (DNCP)",
source_id="abc",
public=True,
)
cls.job = cls.collection.job.create(
active=True,
)

def test_download_export_invalid_suffix(self):
with self.assertNumQueries(0):
response = Client().get("/api/download_export?suffix=invalid")
response = Client().get("/publication/2/download?name=invalid")

self.assertEqual(response.status_code, 400)
self.assertEqual(response.content, b"Suffix not recognized")
yolile marked this conversation as resolved.
Show resolved Hide resolved

def test_download_export_empty_parameter(self):
for parameter in ("job_id", "year"):
with self.subTest(parameter=parameter):
with self.assertNumQueries(0):
response = Client().get(f"/api/download_export?suffix=jsonl.gz&{parameter}=")
with self.assertNumQueries(0):
response = Client().get("/publication/2/download?name=")

self.assertEqual(response.status_code, 404)
self.assertEqual(response.content, b"File not found")
self.assertEqual(response.status_code, 400)
self.assertEqual(response.content, b"Suffix not recognized")
yolile marked this conversation as resolved.
Show resolved Hide resolved

def test_download_export_waiting(self):
with self.assertNumQueries(0):
response = Client().get("/api/download_export?suffix=jsonl.gz&year=2000&job_id=0")
with self.assertNumQueries(1):
response = Client().get("/publication/1/download?name=2000.jsonl.gz")

self.assertEqual(response.status_code, 404)
self.assertEqual(response.content, b"File not found")
jpmckinney marked this conversation as resolved.
Show resolved Hide resolved

@patch("exporter.util.Export.lockfile", new_callable=PropertyMock)
def test_download_export_running(self, exists):
with self.assertNumQueries(0):
response = Client().get("/api/download_export?suffix=jsonl.gz&year=2000&job_id=1")
with self.assertNumQueries(2):
response = Client().get("/publication/2/download?name=2000.jsonl.gz")

self.assertEqual(response.status_code, 404)
self.assertEqual(response.content, b"File not found")
Expand All @@ -44,12 +55,11 @@ def test_download_export_completed(self):
("xlsx", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"),
):
with self.subTest(suffix=suffix):
with self.assertNumQueries(0):
with self.assertNumQueries(2):
response = Client().get(
f"/api/download_export?suffix={suffix}&year=2000&job_id=1&spider=abc",
f"/publication/2/download?name=2000.{suffix}",
HTTP_ACCEPT_ENCODING="gzip",
)

self.assertEqual(response.status_code, 200)
response.headers.pop("Content-Length")
self.assertDictEqual(
Expand All @@ -60,10 +70,10 @@ def test_download_export_completed(self):
"Content-Type": content_type,
"Cross-Origin-Opener-Policy": "same-origin",
"Referrer-Policy": "same-origin",
"Vary": "Accept-Language",
"Vary": "Accept-Language, Cookie",
"X-Content-Type-Options": "nosniff",
"X-Frame-Options": "DENY",
},
)
with open(os.path.join("tests", "fixtures", "1", f"2000.{suffix}"), "rb") as f:
with open(os.path.join("tests", "fixtures", "2", f"2000.{suffix}"), "rb") as f:
self.assertEqual(b"".join(response.streaming_content), f.read())
File renamed without changes.
File renamed without changes.
File renamed without changes.