From 705f8669e74815d016272495e3da69c23a68fd76 Mon Sep 17 00:00:00 2001 From: Addison Schiller Date: Tue, 24 Oct 2017 15:41:36 -0400 Subject: [PATCH 1/3] Look for `mfr` query param from mfr in v1 provider We can export .gdoc as a pdf if we know its coming from mfr. Added `alt_export` functions and properties to Gdrive utils and metadata. Gdrive download now looks for 'mfr' in its kwargs and will request file as pdf if used properly. --- tests/providers/googledrive/test_metadata.py | 3 ++ tests/providers/googledrive/test_provider.py | 44 +++++++++++++++++++ waterbutler/providers/googledrive/metadata.py | 8 ++++ waterbutler/providers/googledrive/provider.py | 11 ++++- waterbutler/providers/googledrive/utils.py | 18 ++++++++ .../server/api/v1/provider/metadata.py | 1 + 6 files changed, 83 insertions(+), 2 deletions(-) diff --git a/tests/providers/googledrive/test_metadata.py b/tests/providers/googledrive/test_metadata.py index f425504ea..aedfb31d3 100644 --- a/tests/providers/googledrive/test_metadata.py +++ b/tests/providers/googledrive/test_metadata.py @@ -44,6 +44,7 @@ def test_file_metadata_drive(self, basepath, root_provider_fixtures): assert parsed.materialized_path == str(path) assert parsed.is_google_doc is False assert parsed.export_name == item['title'] + assert parsed.alt_export_name == 'PART_1420130849837.pdf' def test_file_metadata_drive_slashes(self, basepath, root_provider_fixtures): item = root_provider_fixtures['file_forward_slash'] @@ -66,6 +67,7 @@ def test_file_metadata_drive_slashes(self, basepath, root_provider_fixtures): assert parsed.materialized_path == str(path) assert parsed.is_google_doc is False assert parsed.export_name == item['title'] + assert parsed.alt_export_name == 'PART_1420130849837.pdf' def test_file_metadata_docs(self, basepath, root_provider_fixtures): item = root_provider_fixtures['docs_file_metadata'] @@ -80,6 +82,7 @@ def test_file_metadata_docs(self, basepath, root_provider_fixtures): } assert parsed.is_google_doc is True assert parsed.export_name == item['title'] + '.docx' + assert parsed.alt_export_name == 'version-test.pdf' def test_folder_metadata(self, root_provider_fixtures): item = root_provider_fixtures['folder_metadata'] diff --git a/tests/providers/googledrive/test_provider.py b/tests/providers/googledrive/test_provider.py index 81624f5a5..0e94a0a9e 100644 --- a/tests/providers/googledrive/test_provider.py +++ b/tests/providers/googledrive/test_provider.py @@ -588,6 +588,7 @@ class TestDownload: ds.DRIVE_IGNORE_VERSION) GDOC_EXPORT_MIME_TYPE = 'application/vnd.openxmlformats-officedocument.wordprocessingml.document' + GDOC_ALT_EXPORT_MIME_TYPE = 'application/pdf' @pytest.mark.asyncio @pytest.mark.aiohttpretty @@ -619,6 +620,36 @@ async def test_download_editable_gdoc_no_revision(self, provider, sharing_fixtur assert aiohttpretty.has_call(method='GET', uri=revisions_url) assert aiohttpretty.has_call(method='GET', uri=download_file_url) + @pytest.mark.asyncio + @pytest.mark.aiohttpretty + async def test_download_editable_gdoc_as_mfr(self, provider, sharing_fixtures): + metadata_body = sharing_fixtures['editable_gdoc']['metadata'] + path = GoogleDrivePath( + '/sharing/editable_gdoc', + _ids=['1', '2', metadata_body['id']] + ) + + metadata_query = provider._build_query(path.identifier) + metadata_url = provider.build_url('files', path.identifier) + aiohttpretty.register_json_uri('GET', metadata_url, body=metadata_body) + + revisions_body = sharing_fixtures['editable_gdoc']['revisions'] + revisions_url = provider.build_url('files', metadata_body['id'], 'revisions') + aiohttpretty.register_json_uri('GET', revisions_url, body=revisions_body) + + file_content = b'we love you conrad' + download_file_url = metadata_body['exportLinks'][self.GDOC_ALT_EXPORT_MIME_TYPE] + aiohttpretty.register_uri('GET', download_file_url, body=file_content, auto_length=True) + + result = await provider.download(path, mfr='true') + assert result.name == 'editable_gdoc.pdf' + + content = await result.read() + assert content == file_content + assert aiohttpretty.has_call(method='GET', uri=metadata_url) + assert aiohttpretty.has_call(method='GET', uri=revisions_url) + assert aiohttpretty.has_call(method='GET', uri=download_file_url) + @pytest.mark.asyncio @pytest.mark.aiohttpretty async def test_download_editable_gdoc_good_revision(self, provider, sharing_fixtures): @@ -1577,6 +1608,19 @@ async def test_intra_copy_file(self, provider, root_provider_fixtures): class TestOperationsOrMisc: + def test_misc_utils(self): + metadata = { + 'mimeType': 'application/vnd.google-apps.drawing', + 'exportLinks': { + 'image/jpeg': 'badurl.osf.899' + } + } + ext = drive_utils.get_alt_download_extension(metadata) + link = drive_utils.get_alt_export_link(metadata) + + assert ext == '.jpg' + assert link == 'badurl.osf.899' + @pytest.mark.asyncio @pytest.mark.aiohttpretty async def test_can_duplicate_names(self, provider): diff --git a/waterbutler/providers/googledrive/metadata.py b/waterbutler/providers/googledrive/metadata.py index 5e4fbafa8..e4b5931f7 100644 --- a/waterbutler/providers/googledrive/metadata.py +++ b/waterbutler/providers/googledrive/metadata.py @@ -136,6 +136,14 @@ def export_name(self): title += ext return title + @property + def alt_export_name(self): + title = self._file_title + if self.is_google_doc: + ext = utils.get_alt_download_extension(self.raw) + title += ext + return title + @property def _file_title(self): return self.raw['title'] diff --git a/waterbutler/providers/googledrive/provider.py b/waterbutler/providers/googledrive/provider.py index 73712d601..504a97b71 100644 --- a/waterbutler/providers/googledrive/provider.py +++ b/waterbutler/providers/googledrive/provider.py @@ -234,9 +234,16 @@ async def download(self, # type: ignore metadata = await self.metadata(path, revision=revision) + if 'mfr' in kwargs and kwargs['mfr'] and kwargs['mfr'].lower() == 'true': + download_url = metadata.raw.get('downloadUrl') or drive_utils.get_alt_export_link(metadata.raw), # type: ignore + export_name = metadata.alt_export_name + else: + download_url = metadata.raw.get('downloadUrl') or drive_utils.get_export_link(metadata.raw), # type: ignore + export_name = metadata.export_name # type: ignore + download_resp = await self.make_request( 'GET', - metadata.raw.get('downloadUrl') or drive_utils.get_export_link(metadata.raw), # type: ignore + *download_url, range=range, expects=(200, 206), throws=exceptions.DownloadError, @@ -251,7 +258,7 @@ async def download(self, # type: ignore if download_resp.headers.get('Content-Type'): # TODO: Add these properties to base class officially, instead of as one-off stream.content_type = download_resp.headers['Content-Type'] # type: ignore - stream.name = metadata.export_name # type: ignore + stream.name = export_name # type: ignore return stream async def upload(self, stream, path: wb_path.WaterButlerPath, *args, **kwargs) \ diff --git a/waterbutler/providers/googledrive/utils.py b/waterbutler/providers/googledrive/utils.py index 6f5b1be98..4bcf382c8 100644 --- a/waterbutler/providers/googledrive/utils.py +++ b/waterbutler/providers/googledrive/utils.py @@ -4,6 +4,8 @@ 'ext': '.gdoc', 'download_ext': '.docx', 'type': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', + 'alt_download_ext': '.pdf', + 'alt_type': 'application/pdf', }, { 'mime_type': 'application/vnd.google-apps.drawing', @@ -59,6 +61,22 @@ def get_download_extension(metadata): return format['download_ext'] +def get_alt_download_extension(metadata): + format = get_format(metadata) + try: + return format['alt_download_ext'] + except: + return format['download_ext'] + + +def get_alt_export_link(metadata): + format = get_format(metadata) + try: + return metadata['exportLinks'][format['alt_type']] + except: + return metadata['exportLinks'][format['type']] + + def get_export_link(metadata): format = get_format(metadata) return metadata['exportLinks'][format['type']] diff --git a/waterbutler/server/api/v1/provider/metadata.py b/waterbutler/server/api/v1/provider/metadata.py index c99fb9fa0..ed1cb39e2 100644 --- a/waterbutler/server/api/v1/provider/metadata.py +++ b/waterbutler/server/api/v1/provider/metadata.py @@ -71,6 +71,7 @@ async def download_file(self): range=request_range, accept_url='direct' not in self.request.query_arguments, mode=self.get_query_argument('mode', default=None), + mfr=self.get_query_argument('mfr', default=None) ) if isinstance(stream, str): From 8947b8f94e9cb4d5f6f350cfc88fe08981f2e619 Mon Sep 17 00:00:00 2001 From: Addison Schiller Date: Tue, 31 Oct 2017 16:11:06 -0400 Subject: [PATCH 2/3] Util variable names Change download_url for `mfr` case --- waterbutler/providers/googledrive/provider.py | 10 +++-- waterbutler/providers/googledrive/utils.py | 39 ++++++++----------- 2 files changed, 23 insertions(+), 26 deletions(-) diff --git a/waterbutler/providers/googledrive/provider.py b/waterbutler/providers/googledrive/provider.py index 504a97b71..68444647f 100644 --- a/waterbutler/providers/googledrive/provider.py +++ b/waterbutler/providers/googledrive/provider.py @@ -234,16 +234,18 @@ async def download(self, # type: ignore metadata = await self.metadata(path, revision=revision) - if 'mfr' in kwargs and kwargs['mfr'] and kwargs['mfr'].lower() == 'true': - download_url = metadata.raw.get('downloadUrl') or drive_utils.get_alt_export_link(metadata.raw), # type: ignore + if kwargs.get('mfr', None) and kwargs['mfr'].lower() == 'true': + download_url = drive_utils.get_alt_export_link(metadata.raw) # type: ignore export_name = metadata.alt_export_name else: - download_url = metadata.raw.get('downloadUrl') or drive_utils.get_export_link(metadata.raw), # type: ignore + + # TODO figure out metadata.raw.get('downloadUrl') + download_url = metadata.raw.get('downloadUrl') or drive_utils.get_export_link(metadata.raw) # type: ignore export_name = metadata.export_name # type: ignore download_resp = await self.make_request( 'GET', - *download_url, + download_url, range=range, expects=(200, 206), throws=exceptions.DownloadError, diff --git a/waterbutler/providers/googledrive/utils.py b/waterbutler/providers/googledrive/utils.py index 4bcf382c8..19fba3628 100644 --- a/waterbutler/providers/googledrive/utils.py +++ b/waterbutler/providers/googledrive/utils.py @@ -39,44 +39,39 @@ def is_docs_file(metadata): def get_mimetype_from_ext(ext): - for format in DOCS_FORMATS: - if format['ext'] == ext: - return format['mime_type'] + for format_type in DOCS_FORMATS: + if format_type['ext'] == ext: + return format_type['mime_type'] def get_format(metadata): - for format in DOCS_FORMATS: - if format['mime_type'] == metadata['mimeType']: - return format + for format_type in DOCS_FORMATS: + if format_type['mime_type'] == metadata['mimeType']: + return format_type return DOCS_DEFAULT_FORMAT def get_extension(metadata): - format = get_format(metadata) - return format['ext'] + format_type = get_format(metadata) + return format_type['ext'] def get_download_extension(metadata): - format = get_format(metadata) - return format['download_ext'] + format_type = get_format(metadata) + return format_type['download_ext'] def get_alt_download_extension(metadata): - format = get_format(metadata) - try: - return format['alt_download_ext'] - except: - return format['download_ext'] + format_type = get_format(metadata) + return format_type.get('alt_download_ext', None) or format_type['download_ext'] def get_alt_export_link(metadata): - format = get_format(metadata) - try: - return metadata['exportLinks'][format['alt_type']] - except: - return metadata['exportLinks'][format['type']] + format_type = get_format(metadata) + export_links = metadata['exportLinks'] + return export_links.get(format_type['alt_type'], None) or export_links[format_type['type']] def get_export_link(metadata): - format = get_format(metadata) - return metadata['exportLinks'][format['type']] + format_type = get_format(metadata) + return metadata['exportLinks'][format_type['type']] From 99cc7d7fec5b3d69c3f3f0da80cb99810c224f9c Mon Sep 17 00:00:00 2001 From: Addison Schiller Date: Tue, 28 Nov 2017 16:47:47 -0500 Subject: [PATCH 3/3] fix get_alt_export_link --- waterbutler/providers/googledrive/utils.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/waterbutler/providers/googledrive/utils.py b/waterbutler/providers/googledrive/utils.py index 19fba3628..4ba6cc178 100644 --- a/waterbutler/providers/googledrive/utils.py +++ b/waterbutler/providers/googledrive/utils.py @@ -69,7 +69,10 @@ def get_alt_download_extension(metadata): def get_alt_export_link(metadata): format_type = get_format(metadata) export_links = metadata['exportLinks'] - return export_links.get(format_type['alt_type'], None) or export_links[format_type['type']] + if format_type.get('alt_type'): + return export_links.get(format_type['alt_type']) + else: + return export_links[format_type['type']] def get_export_link(metadata):