Skip to content

Commit

Permalink
1.5.0
Browse files Browse the repository at this point in the history
  • Loading branch information
mbaudis committed Feb 20, 2024
1 parent 46c5fde commit 4e52ff0
Show file tree
Hide file tree
Showing 60 changed files with 6,010 additions and 100,266 deletions.
6 changes: 2 additions & 4 deletions bin/ISCNsegmenter.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,14 +26,12 @@
################################################################################

def main():

iscn_segmenter()

################################################################################

def iscn_segmenter():

initialize_bycon_service(byc)
initialize_bycon_service(byc, "iscn_segmenter")
run_beacon_init_stack(byc)
generate_genome_bins(byc)

Expand All @@ -59,7 +57,7 @@ def iscn_segmenter():
else:
output_file = path.splitext(output_file)[0]

if byc["test_mode"] is True:
if BYC["TEST_MODE"] is True:
output_file += "_test"

output_file += ".pgxseg"
Expand Down
36 changes: 17 additions & 19 deletions bin/analysesStatusmapsRefresher.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,8 @@

services_lib_path = path.join( path.dirname( path.abspath(__file__) ), pardir, "services", "lib" )
sys.path.append( services_lib_path )
from bycon_bundler import ByconBundler
from bycon_plot import *
from interval_utils import generate_genome_bins, interval_cnv_arrays
from collation_utils import set_collation_types

"""
Expand All @@ -28,39 +27,38 @@
################################################################################

def main():

callsets_refresher()

################################################################################

def callsets_refresher():

initialize_bycon_service(byc)
initialize_bycon_service(byc, "callsets_refresher")
run_beacon_init_stack(byc)
generate_genome_bins(byc)

if len(byc["dataset_ids"]) > 1:
print("Please give only one dataset using -d")
exit()

ds_id = byc["dataset_ids"][0]
print(f'=> Using data values from {ds_id}')

generate_genome_bins(byc)
set_collation_types(byc)
print(f'=> Using data values from {ds_id} for {byc.get("genomic_interval_count", 0)} intervals...')

data_client = MongoClient(host=byc["mongohost"])
form = byc.get("form_data", {})
data_client = MongoClient(host=DB_MONGOHOST)
data_db = data_client[ ds_id ]
cs_coll = data_db[ "analyses" ]
v_coll = data_db[ "variants" ]

record_queries = ByconQuery(byc).recordsQuery()

execute_bycon_queries( ds_id, record_queries, byc )

ds_results = byc["dataset_results"][ds_id]
res = execute_bycon_queries( ds_id, record_queries, byc )
ds_results = res.get(ds_id, {})
has_analyses = ds_results.get("analyses._id")

no_cnv_type = 0

if not "analyses._id" in ds_results.keys():
if not has_analyses:
cs_ids = []
for cs in cs_coll.find( {} ):
cs_ids.append(cs["_id"])
Expand All @@ -87,7 +85,7 @@ def callsets_refresher():

bar.next()

if not "CNV" in cs.get("variant_class", "CNV"):
if "SNV" in cs.get("variant_class", "CNV"):
no_cnv_type += 1
continue

Expand All @@ -102,13 +100,13 @@ def callsets_refresher():
cs_update_obj.update({"cnv_statusmaps": maps})
cs_update_obj.update({"cnv_stats": cs_cnv_stats})
cs_update_obj.update({"cnv_chro_stats": cs_chro_stats})
cs_update_obj.update({ "updated": datetime.now().isoformat() })
cs_update_obj.update({ "updated": datetime.datetime.now().isoformat() })

if not byc["test_mode"]:
if BYC.get("TEST_MODE", False) is True:
prjsonnice(cs_chro_stats)
else:
cs_coll.update_one( { "_id": _id }, { '$set': cs_update_obj } )
updated += 1
else:
prjsonnice(cs_chro_stats)

####################################################################
####################################################################
Expand All @@ -118,7 +116,7 @@ def callsets_refresher():

print(f"{counter} analyses were processed")
print(f"{no_cnv_type} analyses were not from CNV calling")
print(f'{updated} analyses were updated for\n `cnv_statusmaps`\n `cnv_stats`\n `cnv_chro_stats`\nusing {byc["genomic_interval_count"]} bins ({byc["interval_definitions"].get("genome_binning", "")})')
print(f'{updated} analyses were updated for\n `cnv_statusmaps`\n `cnv_stats`\n `cnv_chro_stats`\nusing {byc["genomic_interval_count"]} bins ({form.get("genome_binning", "")})')

################################################################################
################################################################################
Expand Down
53 changes: 16 additions & 37 deletions bin/collationsCreator.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,34 +30,30 @@ def main():
################################################################################

def collations_creator():

initialize_bycon_service(byc)
select_dataset_ids(byc)
initialize_bycon_service(byc, "collations_creator")
run_beacon_init_stack(byc)

if len(byc["dataset_ids"]) > 1:
print("Please give only one dataset using -d")
exit()

ds_id = byc["dataset_ids"][0]

print( "Creating collations for " + ds_id)
print(f'Creating collations for {ds_id}')

set_collation_types(byc)

for coll_type, coll_defs in byc["filter_definitions"].items():

collationed = coll_defs.get("collationed")
if not collationed:
continue

pre = coll_defs["namespace_prefix"]
pre_h_f = path.join( pkg_path, "rsrc", "classificationTrees", coll_type, "numbered_hierarchies.tsv" )
collection = coll_defs["scope"]
db_key = coll_defs["db_key"]

if "PMID" in coll_type:
if "pubmed" in coll_type:
hier = _make_dummy_publication_hierarchy(byc)
elif path.exists( pre_h_f ):
elif path.exists( pre_h_f ):
print( "Creating hierarchy for " + coll_type)
hier = get_prefix_hierarchy( ds_id, coll_type, pre_h_f, byc)
else:
Expand Down Expand Up @@ -87,39 +83,27 @@ def collations_creator():
onto_keys.update(child_ids)

sel_hiers = [ ]

no = len(hier.keys())
matched = 0

if not byc["test_mode"]:
bar = Bar("Writing "+pre, max = no, suffix='%(percent)d%%'+" of "+str(no) )

if not BYC["TEST_MODE"]:
bar = Bar("Writing "+pre, max = no, suffix='%(percent)d%%'+" of "+str(no) )
for count, code in enumerate(hier.keys(), start=1):

if not byc["test_mode"]:
if not BYC["TEST_MODE"]:
bar.next()

children = list( set( hier[ code ][ "child_terms" ] ) & onto_keys )

hier[ code ].update( { "child_terms": children } )

if len( children ) < 1:
if byc["test_mode"]:
if BYC["TEST_MODE"]:
print(code+" w/o children")
continue

code_no = data_coll.count_documents( { db_key: code } )

if code_no < 1:
code_no = 0

if len( children ) < 2:
child_no = code_no
else:
child_no = data_coll.count_documents( { db_key: { "$in": children } } )

if child_no > 0:

# sub_id = re.sub(pre, coll_type, code)
sub_id = code
update_obj = hier[ code ].copy()
Expand All @@ -138,23 +122,19 @@ def collations_creator():
"updated": datetime.datetime.now().isoformat(),
"db_key": db_key
})

if "reference" in coll_defs:
url = coll_defs["reference"].get("root", "https://progenetix.org/services/ids/")
r = coll_defs["reference"].get("replace", ["___nothing___", ""])
ref = url+re.sub(r[0], r[1], code)
update_obj.update({"reference": ref })

matched += 1

if not byc["test_mode"]:
if not BYC["TEST_MODE"]:
sel_hiers.append( update_obj )
else:
print("{}:\t{} ({} deep) samples - {} / {} {}".format(sub_id, code_no, child_no, count, no, pre))


# UPDATE
if not byc["test_mode"]:
if not BYC["TEST_MODE"]:
bar.finish()
print("==> Updating database ...")
if matched > 0:
Expand All @@ -169,18 +149,15 @@ def get_prefix_hierarchy( ds_id, coll_type, pre_h_f, byc):

coll_defs = byc["filter_definitions"][coll_type]
hier = hierarchy_from_file(ds_id, coll_type, pre_h_f, byc)

no = len(hier.keys())

# now adding terms missing from the tree ###################################

print("Looking for missing {} codes in {}.{} ...".format(coll_type, ds_id, coll_defs["scope"]))
data_client = MongoClient(host=environ.get("BYCON_MONGO_HOST", "localhost"))
data_db = data_client[ ds_id ]
data_coll = data_db[coll_defs["scope"]]

db_key = coll_defs.get("db_key", "")

db_key = coll_defs.get("db_key", "")
onto_ids = _get_ids_for_prefix( data_coll, coll_defs )

added_no = 0
Expand Down Expand Up @@ -273,7 +250,7 @@ def get_prefix_hierarchy( ds_id, coll_type, pre_h_f, byc):

def _make_dummy_publication_hierarchy(byc):

coll_type = "PMID"
coll_type = "pubmed"
coll_defs = byc["filter_definitions"][coll_type]
data_db = "progenetix"
data_coll = MongoClient(host=environ.get("BYCON_MONGO_HOST", "localhost"))[ data_db ][ "publications" ]
Expand Down Expand Up @@ -311,7 +288,7 @@ def _make_dummy_publication_hierarchy(byc):

def _get_dummy_hierarchy(ds_id, coll_type, coll_defs, byc):

data_client = MongoClient(host=environ.get("BYCON_MONGO_HOST", "localhost"))
data_client = MongoClient(host=DB_MONGOHOST)
data_db = data_client[ ds_id ]
data_coll = data_db[ coll_defs["scope"] ]
data_pat = coll_defs["pattern"]
Expand Down Expand Up @@ -369,8 +346,10 @@ def _get_ids_for_prefix(data_coll, coll_defs):
db_key = coll_defs["db_key"]
pre_re = re.compile( coll_defs["pattern"] )

prdbug(f'_get_ids_for_prefix ... : "{db_key}"" - pattern {pre_re}')
pre_ids = data_coll.distinct( db_key, { db_key: { "$regex": pre_re } } )
pre_ids = list(filter(lambda d: pre_re.match(d), pre_ids))
prdbug(f'_get_ids_for_prefix ... : found {len(pre_ids)}')

return pre_ids

Expand Down
3 changes: 1 addition & 2 deletions bin/collationsPlotter.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,7 @@ def main():
################################################################################

def collations_plotter():

initialize_bycon_service(byc)
initialize_bycon_service(byc, "collations_plotter")
run_beacon_init_stack(byc)
generate_genome_bins(byc)

Expand Down
1 change: 0 additions & 1 deletion bin/config/iscn_segmenter.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
---
defaults:
genome_binning: 1Mb
import_files_root:
- imports

Expand Down
3 changes: 1 addition & 2 deletions bin/databaseArchiver.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,7 @@ def main():
################################################################################

def database_archiver():

initialize_bycon_service(byc)
initialize_bycon_service(byc, "database_archiver")
run_beacon_init_stack(byc)

if len(byc["dataset_ids"]) != 1:
Expand Down
1 change: 0 additions & 1 deletion bin/examplezSampler.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
biosample_id_list=[]
client = MongoClient(host=environ.get("BYCON_MONGO_HOST", "localhost"))


progenetix_biosample_id_list=[]
progenetix_individual_id_list=[]
progenetix_variant_id_list=[]
Expand Down
2 changes: 0 additions & 2 deletions bin/examplezUpdater.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ def main():
################################################################################

def examplez_updater():

# Note: This doesn't use the standard `bycon` dataset id argument input since
# you may want to create a new database not in the configuration list ...
parser = argparse.ArgumentParser()
Expand All @@ -43,7 +42,6 @@ def examplez_updater():
}

# collecting the actions

print('Database to create/update:', e_ds_id)
if e_ds_id in db_names:
ddb_resp = input(f'Drop existing {e_ds_id} database first?\n(Y|n): ')
Expand Down
Loading

0 comments on commit 4e52ff0

Please sign in to comment.