Skip to content

Commit

Permalink
Merge pull request DataShades#26 from DataShades/SEED-411.4
Browse files Browse the repository at this point in the history
Fix resource_create, resource_patch API calls
  • Loading branch information
smotornyuk authored Feb 1, 2021
2 parents 0ff667c + 4d430e6 commit 330a6b3
Show file tree
Hide file tree
Showing 11 changed files with 92 additions and 65 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ first. Run next command from extension folder:

For CKAN>=2.9 use the following command instead:

ckan -c /etc/ckan/default/production.ini ` db upgrade -p cloudstorage
ckan -c /etc/ckan/default/production.ini db upgrade -p cloudstorage

With that feature you can use `cloudstorage_clean_multipart` action, which is available
only for sysadmins. After executing, all unfinished multipart uploads, older than 7 days,
Expand Down
6 changes: 5 additions & 1 deletion ckanext/cloudstorage/helpers.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from ckanext.cloudstorage.storage import ResourceCloudStorage

import ckan.plugins.toolkit as tk

def use_secure_urls():
return all([
Expand All @@ -10,3 +10,7 @@ def use_secure_urls():
'S3' in ResourceCloudStorage.driver_name.fget(None),
'host' in ResourceCloudStorage.driver_options.fget(None),
])


def use_multipart_upload():
return use_secure_urls()
5 changes: 4 additions & 1 deletion ckanext/cloudstorage/plugin/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,10 @@ def update_config(self, config):
# ITemplateHelpers

def get_helpers(self):
return dict(cloudstorage_use_secure_urls=helpers.use_secure_urls)
return dict(
cloudstorage_use_secure_urls=helpers.use_secure_urls,
cloudstorage_use_multipart_upload=helpers.use_multipart_upload,
)

# IConfigurable

Expand Down
71 changes: 38 additions & 33 deletions ckanext/cloudstorage/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# -*- coding: utf-8 -*-
import cgi
import mimetypes
import logging
import os
import six
from six.moves.urllib.parse import urljoin
Expand Down Expand Up @@ -29,6 +30,8 @@

from werkzeug.datastructures import FileStorage as FlaskFileStorage

log = logging.getLogger(__name__)

ALLOWED_UPLOAD_TYPES = (cgi.FieldStorage, FlaskFileStorage)
AWS_UPLOAD_PART_SIZE = 5 * 1024 * 1024

Expand All @@ -39,20 +42,20 @@ def _get_underlying_file(wrapper):
return wrapper.file


def _md5sum(source_path):
def _md5sum(fobj):
block_count = 0
block = True
md5string = b''
with open(source_path, "rb") as f:
while block:
block = f.read(AWS_UPLOAD_PART_SIZE)
if block:
block_count += 1
hash_obj = hashlib.md5()
hash_obj.update(block)
md5string = md5string + binascii.unhexlify(hash_obj.hexdigest())
else:
break
while block:
block = fobj.read(AWS_UPLOAD_PART_SIZE)
if block:
block_count += 1
hash_obj = hashlib.md5()
hash_obj.update(block)
md5string = md5string + binascii.unhexlify(hash_obj.hexdigest())
else:
break
fobj.seek(0, os.SEEK_SET)
hash_obj = hashlib.md5()
hash_obj.update(md5string)
return hash_obj.hexdigest() + "-" + str(block_count)
Expand Down Expand Up @@ -299,36 +302,38 @@ def upload(self, id, max_size=10):
object_name = self.path_from_filename(id, self.filename)
try:
cloud_object = self.container.get_object(object_name=object_name)
print("\t Object found, checking size {0}: {1}".format(object_name, cloud_object.size))
file_size = os.path.getsize(file_upload.name)
print("\t - File size {0}: {1}".format(file_upload.name, file_size))
log.debug("\t Object found, checking size %s: %s", object_name, cloud_object.size)
if os.path.isfile(self.filename):
file_size = os.path.getsize(self.filename)
else:
self.file_upload.seek(0, os.SEEK_END)
file_size = self.file_upload.tell()
self.file_upload.seek(0, os.SEEK_SET)

log.debug("\t - File size %s: %s", self.filename, file_size)
if file_size == int(cloud_object.size):
print("\t Size fits, checking hash {0}: {1}".format(object_name, cloud_object.hash))
hash_file = hashlib.md5(open(file_upload.name, 'rb').read()).hexdigest()
print("\t - File hash {0}: {1}".format(file_upload.name, hash_file))
log.debug("\t Size fits, checking hash %s: %s", object_name, cloud_object.hash)
hash_file = hashlib.md5(self.file_upload.read()).hexdigest()
self.file_upload.seek(0, os.SEEK_SET)
log.debug("\t - File hash %s: %s", self.filename, hash_file)
# basic hash
if hash_file == cloud_object.hash:
print("\t => File found, matching hash, skipping upload")
log.debug("\t => File found, matching hash, skipping upload")
return
# multipart hash
multi_hash_file = _md5sum(file_upload.name)
print("\t - File multi hash {0}: {1}".format(file_upload.name, multi_hash_file))
multi_hash_file = _md5sum(self.file_upload)
log.debug("\t - File multi hash %s: %s", self.filename, multi_hash_file)
if multi_hash_file == cloud_object.hash:
print("\t => File found, matching hash, skipping upload")
log.debug("\t => File found, matching hash, skipping upload")
return
print("\t Resource found in the cloud but outdated, uploading")
log.debug("\t Resource found in the cloud but outdated, uploading")
except ObjectDoesNotExistError:
print("\t Resource not found in the cloud, uploading")

# FIX: replaced call with a simpler version
with open(file_upload.name, 'rb') as iterator:
self.container.upload_object_via_stream(iterator=iterator, object_name=object_name)
print("\t => UPLOADED {0}: {1}".format(file_upload.name, object_name))
except ValueError as v:
print(traceback.format_exc())
raise v
except types.InvalidCredsError as err:
print(traceback.format_exc())
log.debug("\t Resource not found in the cloud, uploading")

self.container.upload_object_via_stream(iterator=iter(file_upload), object_name=object_name)
log.debug("\t => UPLOADED %s: %s", self.filename, object_name)
except (ValueError, types.InvalidCredsError) as err:
log.error(traceback.format_exc())
raise err

elif self._clear and self.old_filename and not self.leave_files:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
<div
data-module="cloudstorage-multipart-upload"
data-module-cloud='S3'
{# prevent type-guessing inside JS module by prefixing ID with underscore #}
data-module-package-id="_{{ pkg_name }}"
data-module-max-size="{{ max_size }}"
>
{% if h.cloudstorage_use_multipart_upload() %}
data-module="cloudstorage-multipart-upload"
data-module-cloud='S3'
{# prevent type-guessing inside JS module by prefixing ID with underscore #}
data-module-package-id="_{{ pkg_name }}"
data-module-max-size="{{ max_size }}"
{% endif %}
>
{{ parent() }}

</div>
8 changes: 3 additions & 5 deletions ckanext/cloudstorage/tests/fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ def with_request_context(test_request_context):
yield

@pytest.fixture
def make_resource(clean_db, ckan_config, monkeypatch, tmpdir):
def create_with_upload(clean_db, ckan_config, monkeypatch, tmpdir):
"""Shortcut for creating uploaded resource.
Requires content and name for newly created resource. By default
is using `resource_create` action, but it can be changed by
Expand All @@ -165,8 +165,8 @@ def make_resource(clean_db, ckan_config, monkeypatch, tmpdir):
additional named arguments, that will be used as resource
properties.
Example::
def test_uploaded_resource(make_resource):
resource = make_resource("hello world", "file.txt")
def test_uploaded_resource(create_with_upload):
resource = create_with_upload("hello world", "file.txt", package_id=factories.Dataset()['id'])
assert resource["url_type"] == "upload"
assert resource["format"] == "TXT"
assert resource["size"] == 11
Expand All @@ -186,7 +186,5 @@ def factory(data, filename, context={}, **kwargs):
u"upload": test_resource,
}
params.update(kwargs)
if u'package_id' not in params:
params[u'package_id'] = factories.Dataset()[u"id"]
return test_helpers.call_action(action, context, **params)
return factory
9 changes: 3 additions & 6 deletions ckanext/cloudstorage/tests/logic/action/test_multipart.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,7 @@ def test_upload(self):
storage = ResourceCloudStorage(res)
assert storage.path_from_filename(
res['id'], filename) == multipart['name']
with pytest.raises(ObjectDoesNotExistError):
storage.get_url_from_filename(res['id'], filename)
assert storage.get_url_from_filename(res['id'], filename) is None

fp = six.BytesIO(b'b' * 1024 * 1024 * 5)
fp.seek(0)
Expand All @@ -36,8 +35,7 @@ def test_upload(self):
partNumber=1,
upload=FakeFileStorage(fp, filename))

with pytest.raises(ObjectDoesNotExistError):
storage.get_url_from_filename(res['id'], filename)
assert storage.get_url_from_filename(res['id'], filename) is None

fp = six.BytesIO(b'a' * 1024 * 1024 * 5)
fp.seek(0)
Expand All @@ -47,8 +45,7 @@ def test_upload(self):
partNumber=2,
upload=FakeFileStorage(fp, filename))

with pytest.raises(ObjectDoesNotExistError):
storage.get_url_from_filename(res['id'], filename)
assert storage.get_url_from_filename(res['id'], filename) is None

result = helpers.call_action(
'cloudstorage_finish_multipart', uploadId=multipart['id'])
Expand Down
7 changes: 3 additions & 4 deletions ckanext/cloudstorage/tests/test_plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,19 +28,18 @@ def test_required_config(self, ckan_config, monkeypatch, option):
plugin.configure(ckan_config)

@pytest.mark.usefixtures('clean_db')
def test_before_delete(self, make_resource):
def test_before_delete(self, create_with_upload):
"""When resource deleted, we must remove corresponding file from S3.
"""
name = 'test.txt'
resource = make_resource('hello world', name, name=name)
resource = create_with_upload('hello world', name, name=name, package_id=factories.Dataset()['id'])
plugin = p.get_plugin('cloudstorage')
uploader = plugin.get_resource_uploader(resource)
assert uploader.get_url_from_filename(resource['id'], name)

helpers.call_action('resource_delete', id=resource['id'])
with pytest.raises(ObjectDoesNotExistError):
assert uploader.get_url_from_filename(resource['id'], name)
assert uploader.get_url_from_filename(resource['id'], name) is None

@pytest.mark.usefixtures('clean_db')
def test_before_delete_for_linked_resource(self):
Expand Down
21 changes: 17 additions & 4 deletions ckanext/cloudstorage/tests/test_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import pytest

from six.moves.urllib.parse import urlparse
from ckan.tests import factories

from ckanext.cloudstorage.storage import CloudStorage, ResourceCloudStorage

Expand All @@ -23,20 +24,32 @@ def test_props(self):
@pytest.mark.ckan_config('ckan.plugins', 'cloudstorage')
@pytest.mark.usefixtures('with_driver_options', 'with_plugins')
class TestResourceCloudStorage(object):
def test_not_secure_url_from_filename(self, make_resource):
def test_not_secure_url_from_filename(self, create_with_upload):
filename = 'file.txt'
resource = make_resource('test', filename)
resource = create_with_upload('test', filename, package_id=factories.Dataset()['id'])
storage = ResourceCloudStorage(resource)
url = storage.get_url_from_filename(resource['id'], filename)
assert storage.container_name in url
assert not urlparse(url).query

@pytest.mark.ckan_config('ckanext.cloudstorage.use_secure_urls', True)
def test_secure_url_from_filename(self, make_resource):
def test_secure_url_from_filename(self, create_with_upload):
filename = 'file.txt'
resource = make_resource('test', filename)
resource = create_with_upload('test', filename, package_id=factories.Dataset()['id'])
storage = ResourceCloudStorage(resource)
if not storage.can_use_advanced_aws or not storage.use_secure_urls:
pytest.skip('SecureURL not supported')
url = storage.get_url_from_filename(resource['id'], filename)
assert urlparse(url).query

@pytest.mark.ckan_config('ckanext.cloudstorage.use_secure_urls', True)
def test_hash_check(self, create_with_upload):
filename = 'file.txt'
resource = create_with_upload('test', filename, package_id=factories.Dataset()['id'])
storage = ResourceCloudStorage(resource)
if not storage.can_use_advanced_aws or not storage.use_secure_urls:
pytest.skip('SecureURL not supported')
url = storage.get_url_from_filename(resource['id'], filename)
resource = create_with_upload('test', filename, action='resource_update', id=resource['id'])

assert urlparse(url).query
4 changes: 2 additions & 2 deletions ckanext/cloudstorage/tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,9 @@ def test_status_codes(self, app):
app.get(url, status=302, extra_environ=env, follow_redirects=False)

@pytest.mark.usefixtures('clean_db')
def test_download(self, make_resource, app):
def test_download(self, create_with_upload, app):
filename = 'file.txt'
resource = make_resource('hello world', filename)
resource = create_with_upload('hello world', filename, package_id=factories.Dataset()['id'])
url = tk.url_for(
'resource.download',
id=resource['package_id'],
Expand Down
10 changes: 8 additions & 2 deletions test.ini
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ use = config:../ckan/test-core.ini

# Logging configuration
[loggers]
keys = root, ckan, sqlalchemy
keys = root, ckan, sqlalchemy, ckanext

[handlers]
keys = console
Expand Down Expand Up @@ -45,4 +45,10 @@ level = NOTSET
formatter = generic

[formatter_generic]
format = %(asctime)s %(levelname)-5.5s [%(name)s] %(message)s
format = %(asctime)s %(levelname)-5.5s [%(name)s] %(message)s

[logger_ckanext]
level = DEBUG
handlers = console
qualname = ckanext
propagate = 0

0 comments on commit 330a6b3

Please sign in to comment.