Skip to content

Commit

Permalink
Improved migration
Browse files Browse the repository at this point in the history
  • Loading branch information
espona committed Dec 8, 2020
1 parent cb16d52 commit 2bdefeb
Showing 1 changed file with 54 additions and 5 deletions.
59 changes: 54 additions & 5 deletions ckanext/cloudstorage/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,21 +11,26 @@
from ckan import model
from ckan.lib import munge
import ckan.plugins as p
import hashlib
import binascii

from libcloud.storage.types import Provider, ObjectDoesNotExistError
from libcloud.storage.providers import get_driver
import libcloud.common.types as types

if p.toolkit.check_ckan_version("2.9"):
from werkzeug.datastructures import FileStorage as UploadedFileType

config = p.toolkit.config
else:
from pylons import config
UploadedFileType = cgi.FieldStorage

UploadedFileType = cgi.FieldStorage

from werkzeug.datastructures import FileStorage as FlaskFileStorage

ALLOWED_UPLOAD_TYPES = (cgi.FieldStorage, FlaskFileStorage)
AWS_UPLOAD_PART_SIZE = 5 * 1024 * 1024


def _get_underlying_file(wrapper):
Expand All @@ -34,6 +39,25 @@ def _get_underlying_file(wrapper):
return wrapper.file


def _md5sum(source_path):
block_count = 0
block = True
md5string = b''
with open(source_path, "rb") as f:
while block:
block = f.read(AWS_UPLOAD_PART_SIZE)
if block:
block_count += 1
hash_obj = hashlib.md5()
hash_obj.update(block)
md5string = md5string + binascii.unhexlify(hash_obj.hexdigest())
else:
break
hash_obj = hashlib.md5()
hash_obj.update(md5string)
return hash_obj.hexdigest() + "-" + str(block_count)


class CloudStorage(object):
def __init__(self):
self.driver = get_driver(
Expand Down Expand Up @@ -185,7 +209,7 @@ def __init__(self, resource):

# Check to see if a file has been provided
if isinstance(upload_field_storage, (ALLOWED_UPLOAD_TYPES)) and \
upload_field_storage.filename:
upload_field_storage.filename:
self.filename = munge.munge_filename(upload_field_storage.filename)
self.file_upload = _get_underlying_file(upload_field_storage)
resource['url'] = self.filename
Expand Down Expand Up @@ -260,7 +284,7 @@ def upload(self, id, max_size=10):
# in Python3 libcloud iterates over uploaded file,
# while it's wrappend into non-iterator. So, pick real
# file-object and give it to cloudstorage
#if six.PY3:
# if six.PY3:
# file_upload = file_upload._file

# self.container.upload_object_via_stream(
Expand All @@ -271,10 +295,35 @@ def upload(self, id, max_size=10):
# )
# )

# check if already uploaded
object_name = self.path_from_filename(id, self.filename)
try:
cloud_object = self.container.get_object(object_name=object_name)
print("\t Object found, checking size {0}: {1}".format(object_name, cloud_object.size))
file_size = os.path.getsize(file_upload.name)
print("\t - File size {0}: {1}".format(file_upload.name, file_size))
if file_size == int(cloud_object.size):
print("\t Size fits, checking hash {0}: {1}".format(object_name, cloud_object.hash))
hash_file = hashlib.md5(open(file_upload.name, 'rb').read()).hexdigest()
print("\t - File hash {0}: {1}".format(file_upload.name, hash_file))
# basic hash
if hash_file == cloud_object.hash:
print("\t => File found, matching hash, skipping upload")
return
# multipart hash
multi_hash_file = _md5sum(file_upload.name)
print("\t - File multi hash {0}: {1}".format(file_upload.name, multi_hash_file))
if multi_hash_file == cloud_object.hash:
print("\t => File found, matching hash, skipping upload")
return
print("\t Resource found in the cloud but outdated, uploading")
except ObjectDoesNotExistError:
print("\t Resource not found in the cloud, uploading")

# FIX: replaced call with a simpler version
with open(file_upload.name, 'rb') as iterator:
self.container.upload_object_via_stream(iterator=iterator,
object_name=self.path_from_filename(id, self.filename))
self.container.upload_object_via_stream(iterator=iterator, object_name=object_name)
print("\t => UPLOADED {0}: {1}".format(file_upload.name, object_name))
except ValueError as v:
print(traceback.format_exc())
raise v
Expand Down

0 comments on commit 2bdefeb

Please sign in to comment.