Skip to content

Commit

Permalink
added s3 uploading
Browse files Browse the repository at this point in the history
  • Loading branch information
khoroshevskyi committed Nov 28, 2023
1 parent 3c8db2f commit c2fb40d
Show file tree
Hide file tree
Showing 6 changed files with 54 additions and 11 deletions.
25 changes: 24 additions & 1 deletion bedboss/bedboss.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
BED_FOLDER_NAME,
BIGBED_FOLDER_NAME,
BEDBOSS_PEP_SCHEMA_PATH,
OUTPUT_FOLDER_NAME,
)
from bedboss.utils import (
extract_file_name,
Expand Down Expand Up @@ -203,6 +204,7 @@ def insert_pep(
just_db_commit: bool = False,
no_db_commit: bool = False,
force_overwrite: bool = False,
upload_s3: bool = False,
pm: pypiper.PipelineManager = None,
*args,
**kwargs,
Expand All @@ -223,6 +225,7 @@ def insert_pep(
:param just_db_commit: whether just to commit the JSON to the database
:param no_db_commit: whether the JSON commit to the database should be skipped
:param force_overwrite: whether to overwrite the existing record
:param upload_s3: whether to upload to s3
:param pm: pypiper object
:return: None
"""
Expand Down Expand Up @@ -270,6 +273,22 @@ def insert_pep(
)
pep.samples[i].record_identifier = bed_id

if upload_s3:
command = f"aws s3 sync {os.path.join(output_folder, BED_FOLDER_NAME)} s3://bedbase/{BED_FOLDER_NAME} --size-only --exclude 'bed_qc/*'"
_LOGGER.info("Uploading to s3 bed files")
pm.run(cmd=command, lock_name="s3_sync_big")

command = f"aws s3 sync {os.path.join(output_folder, BIGBED_FOLDER_NAME)} s3://bedbase/{BIGBED_FOLDER_NAME} --size-only"
_LOGGER.info("Uploading to s3 bigbed files")
pm.run(cmd=command, lock_name="s3_sync_bigbed")

command = f"aws s3 sync {os.path.join(output_folder, OUTPUT_FOLDER_NAME)} s3://bedbase/{OUTPUT_FOLDER_NAME} --size-only"
_LOGGER.info("Uploading to s3 bed statistics files")
pm.run(cmd=command, lock_name="s3_sync_bedstat")

else:
_LOGGER.info("Skipping uploading to s3. Flag `upload_s3` is set to False")

if create_bedset:
_LOGGER.info(f"Creating bedset from {pep.name}")
run_bedbuncher(
Expand Down Expand Up @@ -299,7 +318,11 @@ def main(test_args: dict = None) -> NoReturn:

args_dict = vars(args)

pm_out_folder = args_dict.get("outfolder") or args_dict.get('output_folder') or "test_outfolder",
pm_out_folder = (
args_dict.get("outfolder")
or args_dict.get("output_folder")
or "test_outfolder",
)
pm_out_folder = os.path.join(os.path.abspath(pm_out_folder[0]), "pipeline_manager")

pm = pypiper.PipelineManager(
Expand Down
12 changes: 8 additions & 4 deletions bedboss/bedbuncher/bedbuncher.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,11 @@
from pephubclient.helpers import is_registry_path
import logging

from bedboss.const import DEFAULT_BEDBASE_API_URL, DEFAULT_BEDBASE_CACHE_PATH
from bedboss.const import (
DEFAULT_BEDBASE_API_URL,
DEFAULT_BEDBASE_CACHE_PATH,
OUTPUT_FOLDER_NAME,
)


_LOGGER = logging.getLogger("bedboss")
Expand All @@ -25,9 +29,9 @@ def create_bedset_from_pep(
"""
Create bedset from pep file, where sample_name is bed identifier
:param pep:
:param bedbase_api:
:param cache_folder:
:param pep: peppy object with bedfiles. where pep contains sample attribute with bedfile identifier, or sample_name is bedfile identifier
:param bedbase_api: bedbase api url
:param cache_folder: cache folder path
:return:
"""
_LOGGER.info("Creating bedset from pep.")
Expand Down
11 changes: 9 additions & 2 deletions bedboss/bedmaker/bedmaker.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
STANDARD_CHROM_LIST,
BED_TO_BIGBED_PROGRAM,
BIGBED_TO_BED_PROGRAM,
QC_FOLDER_NAME,
)

_LOGGER = logging.getLogger("bedboss")
Expand Down Expand Up @@ -149,7 +150,9 @@ def __init__(

if not pm:
self.logs_name = "bedmaker_logs"
self.logs_dir = os.path.join(self.bed_parent, self.logs_name, self.sample_name)
self.logs_dir = os.path.join(
self.bed_parent, self.logs_name, self.sample_name
)
if not os.path.exists(self.logs_dir):
_LOGGER.info("bedmaker logs directory doesn't exist. Creating one...")
os.makedirs(self.logs_dir)
Expand All @@ -173,7 +176,11 @@ def make(self) -> NoReturn:
self.make_bed()

if self.check_qc:
bedqc(self.output_bed, outfolder=os.path.join(self.bed_parent, "bed_qc"), pm=self.pm)
bedqc(
self.output_bed,
outfolder=os.path.join(self.bed_parent, QC_FOLDER_NAME),
pm=self.pm,
)

self.make_bigbed()

Expand Down
5 changes: 3 additions & 2 deletions bedboss/bedstat/bedstat.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
from typing import Union
import json
import yaml
import os
import requests
import pypiper
import bbconf
import logging
from geniml.io import RegionSet

from bedboss.const import OUTPUT_FOLDER_NAME


_LOGGER = logging.getLogger("bedboss")

Expand Down Expand Up @@ -81,7 +82,7 @@ def bedstat(
"""
# TODO why are we no longer using bbconf to get the output path?
# outfolder_stats = bbc.get_bedstat_output_path()
outfolder_stats = os.path.join(outfolder, "output", "bedstat_output")
outfolder_stats = os.path.join(outfolder, OUTPUT_FOLDER_NAME, "bedstat_output")
try:
os.makedirs(outfolder_stats)
except FileExistsError:
Expand Down
10 changes: 8 additions & 2 deletions bedboss/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,14 @@ def build_argparser() -> ArgumentParser:
sub_all_pep.add_argument(
"--force_overwrite",
action="store_true",
help="Weather to overwrite existing records. Default: False",
help="Weather to overwrite existing records. [Default: False]",
)
sub_all_pep.add_argument(
"--upload-s3",
action="store_true",
help="Weather to upload bed, bigbed, and statistics to s3. "
"Before uploading you have to set up all necessury env vars: "
"AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, and AWS_ENDPOINT_URL. [Default: False]",
)

# bed_qc
Expand Down Expand Up @@ -463,5 +470,4 @@ def build_argparser() -> ArgumentParser:
for sub in [sub_all_pep, sub_all, sub_make, sub_stat, sub_qc]:
sub_all_pep = pypiper.add_pypiper_args(sub)


return logmuse.add_logging_options(parser)
2 changes: 2 additions & 0 deletions bedboss/const.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@

BED_FOLDER_NAME = "bed_files"
BIGBED_FOLDER_NAME = "bigbed_files"
OUTPUT_FOLDER_NAME = "output"
QC_FOLDER_NAME = "bed_qc"

# bedmaker

Expand Down

0 comments on commit c2fb40d

Please sign in to comment.