Skip to content

Commit

Permalink
Merge pull request #188 from brain-bican/issue_34
Browse files Browse the repository at this point in the history
Compression support
  • Loading branch information
hkir-dev authored Jul 8, 2024
2 parents 900d86b + 10600c7 commit c9b64a0
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 1 deletion.
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ RUN python3 -m pip install numpy==1.26.4
RUN python3 -m pip install marshmallow==3.21.1
RUN python3 -m pip install python-dateutil==2.9.0
RUN python3 -m pip install --no-deps cas-tools==1.0.1
RUN python3 -m pip install --no-deps tdta==0.1.0.dev15
RUN python3 -m pip install --no-deps tdta==0.1.0.dev16

#RUN Rscript $WORKSPACE/dendR/install_packages.R

Expand Down
38 changes: 38 additions & 0 deletions scripts/import.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import click
import logging
import subprocess
import zipfile
import shutil
import pandas as pd
from dataclasses import asdict
Expand All @@ -19,6 +20,7 @@

# see Dockerfile
WORKSPACE = "/tools"
GITHUB_SIZE_LIMIT = 50 * 1000 * 1000 # 50 MB


@click.group()
Expand Down Expand Up @@ -48,6 +50,7 @@ def import_data(input, schema, curation_tables, force):
user_data_path = None
user_config_path = None
user_cas_path = None
unzip_files_in_folder(input)
for filename in os.listdir(input):
f = os.path.join(input, filename)
if os.path.isfile(f):
Expand Down Expand Up @@ -110,11 +113,46 @@ def add_new_files_to_git(project_folder, new_files):
project_folder: project folder path
new_files: imported/created file paths to add to the version control
"""
for file_path in new_files:
if os.path.getsize(file_path) > GITHUB_SIZE_LIMIT:
zip_path = zip_file(file_path)
new_files.remove(file_path)
runcmd("cd {dir} && git add {zip_path}".format(dir=project_folder, zip_path=zip_path))
runcmd("cd {dir} && git reset {file_path}".format(dir=project_folder, file_path=file_path))

runcmd("cd {dir} && git add {files}".
format(dir=project_folder,
files=" ".join([t.replace(project_folder, ".", 1) for t in new_files])))


def unzip_files_in_folder(folder_path):
files_in_folder = os.listdir(folder_path)
for file_name in files_in_folder:
if file_name.endswith('.zip'):
zip_path = os.path.join(folder_path, file_name)
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
zip_ref.extractall(folder_path)
print(f"Extracted '{zip_path}'.")


def zip_file(file_path):
"""
Zips the file if it exceeds the GitHub size limit.
Parameters:
file_path: file path to zip
Returns: zipped file path
"""
folder = os.path.dirname(file_path)
base_name = os.path.basename(file_path)
zip_base = os.path.splitext(base_name)[0]

single_zip_path = os.path.join(folder, f"{zip_base}.zip")
with zipfile.ZipFile(single_zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
zipf.write(file_path, base_name)

return single_zip_path


def add_user_table_to_nanobot(user_data_path, schema_folder, curation_tables_folder, cas_schema, delete_source=False, force=False):
"""
Adds user data to the nanobot. Adds user table to the curation tables folder and updates the nanobot table schema.
Expand Down

0 comments on commit c9b64a0

Please sign in to comment.