Skip to content

Commit

Permalink
1.4.1
Browse files Browse the repository at this point in the history
  • Loading branch information
mbaudis committed Feb 2, 2024
1 parent f1ffa1b commit d9d518d
Show file tree
Hide file tree
Showing 16 changed files with 34 additions and 55 deletions.
7 changes: 3 additions & 4 deletions bin/ISCNsegmenter.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,14 +34,13 @@ def main():
def iscn_segmenter():

initialize_bycon_service(byc)
set_processing_modes(byc)
parse_variants(byc)
set_genome_rsrc_path(byc)
run_beacon_init_stack(byc)
generate_genome_bins(byc)

group_parameter = byc["form_data"].get("groupBy", "histological_diagnosis_id")
input_file = byc["form_data"].get("inputfile")
output_file = byc["form_data"].get("outputfile")
dt_m = byc.get("datatable_mappings", {})

technique = "cCGH"
iscn_field = "iscn_ccgh"
Expand Down Expand Up @@ -88,7 +87,7 @@ def iscn_segmenter():
"callset_id": s.get("callset_id", "exp-"+n),
"individual_id": s.get("individual_id", "ind-"+n),
}
update_bs = import_datatable_dict_line(byc, update_bs, fieldnames, s, "biosample")
update_bs = import_datatable_dict_line(dt_m, update_bs, fieldnames, s, "biosample")
h_line = pgxseg_biosample_meta_line(byc, update_bs, group_parameter)
pgxseg.write( "{}\n".format(h_line) )

Expand Down
3 changes: 0 additions & 3 deletions bin/analysesStatusmapsRefresher.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,6 @@ def callsets_refresher():
ds_id = byc["dataset_ids"][0]
print(f'=> Using data values from {ds_id}')

# re-doing the interval generation for non-standard CNV binning
# genome_binning_from_args(byc)
set_genome_rsrc_path(byc)
generate_genome_bins(byc)

data_client = MongoClient(host=byc["mongohost"])
Expand Down
3 changes: 0 additions & 3 deletions bin/frequencymapsCreator.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,6 @@ def frequencymaps_creator():

set_collation_types(byc)

# re-doing the interval generation for non-standard CNV binning
# genome_binning_from_args(byc)
set_genome_rsrc_path(byc)
generate_genome_bins(byc)

print(f'=> Using data values from {ds_id} for {byc.get("genomic_interval_count", 0)} intervals...')
Expand Down
2 changes: 1 addition & 1 deletion bin/local/local_paths.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ server_callsets_dir_loc:
- arraymap
- grch38

callset_probefile_name: probes,cn.tsv
probefile_name: probes,cn.tsv

test_domains:
- progenetix.test
Expand Down
4 changes: 1 addition & 3 deletions bin/pgxProbesPlotter.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,8 @@ def main():
################################################################################

def pgx_probes_plotter():

initialize_bycon_service(byc)
parse_variants(byc)
set_genome_rsrc_path(byc)
run_beacon_init_stack(byc)
generate_genome_bins(byc)

input_file = byc["form_data"].get("inputfile")
Expand Down
4 changes: 2 additions & 2 deletions bin/publicationsInserter.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@ def main():
def publications_inserter():

initialize_bycon_service(byc, "publications_inserter")
set_processing_modes(byc)

run_beacon_init_stack(byc)
g_url = byc["service_config"]["google_spreadsheet_tsv_url"]
skip_cols = byc["service_config"]["skipped_columns"]

Expand Down
3 changes: 2 additions & 1 deletion bin/variantsInserter.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ def variantsInserter():

ds_id = byc["dataset_ids"][0]
input_file = byc["form_data"].get("inputfile")
dt_m = byc.get("datatable_mappings", {})

if not input_file:
print("No input file file specified (-i, --inputfile) => quitting ...")
Expand Down Expand Up @@ -121,7 +122,7 @@ def variantsInserter():
"individual_id": v.get("individual_id", re.sub("pgxbs-", "pgxind-", bs_id))
})

insert_v = import_datatable_dict_line(byc, insert_v, variants.fieldnames, v, "genomicVariant")
insert_v = import_datatable_dict_line(dt_m, insert_v, variants.fieldnames, v, "genomicVariant")
prdbug(insert_v, byc.get("debug_mode"))
insert_v = ByconVariant(byc).pgxVariant(insert_v)
insert_v.update({"updated": datetime.datetime.now().isoformat()})
Expand Down
7 changes: 0 additions & 7 deletions docs/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,3 @@ mongorestore --db $database .../mongodump/examplez/

Please see the [helper apps documentation](applications/#data-transformation-database-maintenance).

## Utility apps

### `ISCNsegmenter`

#### Examples

* `bin/ISCNsegmenter.py -i imports/ccghtest.tab -o exports/cghtest-with-histo.pgxseg`
2 changes: 1 addition & 1 deletion install.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -39,4 +39,4 @@ bycon_instance_pars:
- arraymap
- grch38

callset_probefile_name: probes,cn.tsv
probefile_name: probes,cn.tsv
2 changes: 1 addition & 1 deletion local/local_paths.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ server_callsets_dir_loc:
- arraymap
- grch38

callset_probefile_name: probes,cn.tsv
probefile_name: probes,cn.tsv

test_domains:
- progenetix.test
Expand Down
25 changes: 11 additions & 14 deletions services/lib/bycon_bundler.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,9 @@
from pymongo import MongoClient
from copy import deepcopy

from cgi_parsing import prdbug
from interval_utils import interval_cnv_arrays, interval_counts_from_callsets
from variant_mapping import ByconVariant
from bycon_helpers import return_paginated_list
from bycon_helpers import return_paginated_list, prdbug

services_lib_path = path.join( path.dirname( path.abspath(__file__) ) )
sys.path.append( services_lib_path )
Expand All @@ -30,12 +29,14 @@ class ByconBundler:
"""

def __init__(self, byc):

self.byc = byc
self.byc = byc # needed to for some called classes (ByconVariant...)
self.errors = []
self.debug_mode = byc.get("debug_mode", False)
self.filepath = None
self.local_paths = byc.get("local_paths", {})
self.datasets_results = None
self.dataset_ids = self.byc.get("dataset_ids", [])
self.dataset_ids = byc.get("dataset_ids", [])
self.datatable_mappings = byc.get("datatable_mappings", {})
self.filters = byc.get("filters", [])
self.min_number = byc["form_data"].get("min_number", 0)
self.method = byc.get("method", "___none___")
Expand Down Expand Up @@ -176,10 +177,6 @@ def callsets_variants_bundles(self):
p_o.update({
"variants": list(filter(lambda v: v.get("callset_id", "___none___") == cs_id, bb["variants"]))
})

# for v in bb["variants"]:
# if v.get("callset_id", "___none___") == cs_id:
# p_o["variants"].append(ByconVariant(self.byc).byconVariant(v))
c_p_l.append(p_o)

self.callsetVariantsBundles = c_p_l
Expand Down Expand Up @@ -245,16 +242,14 @@ def __deparse_pgxseg_samples_header(self):
continue

bios = {"id": bs_id}
bios = import_datatable_dict_line(self.byc, bios, fieldnames, bios_d, "biosample")
bios = import_datatable_dict_line(self.datatable_mappings, bios, fieldnames, bios_d, "biosample")
cs_id = bios.get("callset_id", re.sub("pgxbs", "pgxcs", bs_id) )
ind_id = bios.get("individual_id", re.sub("pgxbs", "pgxind", bs_id) )
ind = {"id": ind_id}
cs = {"id": cs_id, "biosample_id": bs_id, "individual_id": ind_id}

bios.update({"individual_id": ind_id})

# b_k_b["callsets_by_id"].update({ cs_id: import_datatable_dict_line(self.byc, cs, fieldnames, bios_d, "analysis") })
# b_k_b["individuals_by_id"].update({ ind_id: import_datatable_dict_line(self.byc, ind, fieldnames, bios_d, "individual") })
b_k_b["callsets_by_id"].update({ cs_id: cs })
b_k_b["individuals_by_id"].update({ ind_id: ind })
b_k_b["biosamples_by_id"].update({ bs_id: bios })
Expand All @@ -267,6 +262,7 @@ def __deparse_pgxseg_samples_header(self):

def __callsets_bundle_from_result_set(self):
for ds_id, ds_res in self.datasets_results.items():
# prdbug(f'{ds_id} - {ds_res}', self.debug_mode)
if not ds_res:
continue
if not "analyses._id" in ds_res:
Expand All @@ -287,6 +283,7 @@ def __callsets_bundle_from_result_set(self):

cnv_chro_stats = cs.get("cnv_chro_stats", False)
cnv_statusmaps = cs.get("cnv_statusmaps", False)
prdbug(f'{cs__id} - {cnv_chro_stats}, {cnv_statusmaps}', self.debug_mode)

if cnv_chro_stats is False or cnv_statusmaps is False:
continue
Expand All @@ -297,7 +294,7 @@ def __callsets_bundle_from_result_set(self):
"biosample_id": cs.get("biosample_id", "NA"),
"cnv_chro_stats": cs.get("cnv_chro_stats"),
"cnv_statusmaps": cs.get("cnv_statusmaps"),
"probefile": callset_guess_probefile_path(cs, self.byc),
"probefile": callset_guess_probefile_path(cs, self.local_paths),
"variants": []
}

Expand Down Expand Up @@ -373,7 +370,7 @@ def __keyed_bundle_add_variants_from_lines(self):
"callset_id": cs_id,
}

update_v = import_datatable_dict_line(self.byc, update_v, fieldnames, v, "genomicVariant")
update_v = import_datatable_dict_line(self.datatable_mappings, update_v, fieldnames, v, "genomicVariant")
update_v = ByconVariant(self.byc).pgxVariant(update_v)

update_v.update({
Expand Down
3 changes: 1 addition & 2 deletions services/lib/bycon_plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,7 @@
from os import environ, path
from PIL import Image, ImageColor, ImageDraw

from cgi_parsing import prjsonnice, test_truthy, prdbug
from genome_utils import GeneInfo, ChroNames
from bycon import prjsonnice, test_truthy, prdbug, GeneInfo, ChroNames

services_lib_path = path.join( path.dirname( path.abspath(__file__) ) )
sys.path.append( services_lib_path )
Expand Down
2 changes: 1 addition & 1 deletion services/lib/collation_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def set_collation_types(byc):
continue
s_p.update({p:byc["filter_definitions"][p]})
if len(s_p.keys()) < 1:
print("No existing collation type was provided with -c ...")
print("No existing collation type was provided with `--collationTypes` ...")
exit()

byc.update({"filter_definitions":s_p})
Expand Down
7 changes: 3 additions & 4 deletions services/lib/datatable_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,7 @@
from random import sample as randomSamples

# bycon
from cgi_parsing import prdbug, prjsonnice
from bycon_helpers import assign_nested_value, get_nested_value
from bycon import assign_nested_value, get_nested_value, prdbug, prjsonnice

################################################################################

Expand Down Expand Up @@ -79,9 +78,9 @@ def export_datatable_download(results, byc):

################################################################################

def import_datatable_dict_line(byc, parent, fieldnames, lineobj, primary_scope="biosample"):
def import_datatable_dict_line(datatable_mappings, parent, fieldnames, lineobj, primary_scope="biosample"):

dt_m = byc["datatable_mappings"]
dt_m = datatable_mappings

if not primary_scope in dt_m["definitions"]:
return
Expand Down
13 changes: 6 additions & 7 deletions services/lib/file_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,10 @@
from copy import deepcopy
from random import sample as random_samples

from cgi_parsing import prjsonnice
from bycon import ByconVariant, prjsonnice, return_paginated_list

from datatable_utils import import_datatable_dict_line
from interval_utils import interval_cnv_arrays, interval_counts_from_callsets
from variant_mapping import ByconVariant
from bycon_helpers import return_paginated_list

################################################################################

Expand Down Expand Up @@ -58,17 +57,17 @@ def read_www_tsv_to_dictlist(www, max_count=0):

################################################################################

def callset_guess_probefile_path(callset, byc):
local_paths = byc.get("local_paths")
def callset_guess_probefile_path(callset, local_paths):

if not local_paths:
return False
if not "server_callsets_dir_loc" in local_paths:
return False
if not "analysis_info" in callset:
return False

d = Path( path.join( *byc["local_paths"]["server_callsets_dir_loc"]))
n = byc.get("callset_probefile_name", "___none___")
d = Path( path.join( *local_paths["server_callsets_dir_loc"]))
n = local_paths.get("probefile_name", "___none___")

if not d.is_dir():
return False
Expand Down
2 changes: 1 addition & 1 deletion services/local/local_paths.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ server_callsets_dir_loc:
- arraymap
- grch38

callset_probefile_name: probes,cn.tsv
probefile_name: probes,cn.tsv

test_domains:
- progenetix.test
Expand Down

0 comments on commit d9d518d

Please sign in to comment.