Skip to content

Commit

Permalink
mostly compound variants indexes
Browse files Browse the repository at this point in the history
  • Loading branch information
mbaudis committed Feb 21, 2024
1 parent aac4a08 commit 6b9f444
Show file tree
Hide file tree
Showing 5 changed files with 332 additions and 21 deletions.
52 changes: 48 additions & 4 deletions bin/config/housekeeping.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,7 @@ indexed_special_collections:
db_key: id

indexed_special_dbs:

cellz:

biosamples:
location:
db_key: provenance.geo_location.geometry
Expand All @@ -40,10 +38,32 @@ indexed_special_dbs:
location:
db_key: provenance.geo_location.geometry
type: 2dsphere

variants:
variantbracketquery:
db_key:
variant_state.id: 1
location.sequence_id: 1
location.start: 1
location.end: 1
info.var_length: 1
type: compound
variantallelequery:
db_key:
location.sequence_id: 1
location.start: 1
sequence: 1
reference_sequence: 1
type: compound
variantrangequery:
db_key:
location.sequence_id: 1
location.start: 1
location.end: 1
variant_state.id: 1
sequence: 1
type: compound

progenetix:

biosamples:
location:
db_key: provenance.geo_location.geometry
Expand All @@ -52,6 +72,30 @@ indexed_special_dbs:
location:
db_key: provenance.geo_location.geometry
type: 2dsphere
variants:
variantbracketquery:
db_key:
variant_state.id: 1
location.sequence_id: 1
location.start: 1
location.end: 1
info.var_length: 1
type: compound
variantallelequery:
db_key:
location.sequence_id: 1
location.start: 1
sequence: 1
reference_sequence: 1
type: compound
variantrangequery:
db_key:
location.sequence_id: 1
location.start: 1
location.end: 1
variant_state.id: 1
sequence: 1
type: compound

publications:
id:
Expand Down
12 changes: 4 additions & 8 deletions bin/lib/mongodb_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,10 @@ def mongodb_update_indexes(ds_id, byc):
data_db = mongo_client[ds_id]
coll_names = data_db.list_collection_names()
for r_t, r_d in b_rt_s.items():

collname = r_d.get("collection", False)
if collname not in coll_names:
print(f"¡¡¡ Collection {collname} does not exist in {ds_id} !!!")
continue

i_coll = data_db[ collname ]
io_params = dt_m["definitions"][ r_t ]["parameters"]

Expand Down Expand Up @@ -51,7 +49,6 @@ def mongodb_update_indexes(ds_id, byc):
################################################################################

def __index_by_colldef(ds_id, coll_defs):

mongo_client = MongoClient(host=DB_MONGOHOST)
i_db = mongo_client[ds_id]
coll_names = i_db.list_collection_names()
Expand All @@ -61,14 +58,17 @@ def __index_by_colldef(ds_id, coll_defs):
continue

i_coll = i_db[ collname ]

for p_k, p_v in io_params.items():
special = p_v.get("type", "___none___")
k = p_v["db_key"]
if "2dsphere" in special:
print(f'Creating GEOSPHERE index "{k}" in {collname} from {ds_id}')
i_coll.create_index([(k, GEOSPHERE)])
pass
elif "compound" in special:
print(f'Creating compound index "{k}" in {collname} from {ds_id}')
i_coll.create_index(k)
pass
else:
print(f'Creating index "{k}" in {collname} from {ds_id}')
try:
Expand All @@ -77,7 +77,3 @@ def __index_by_colldef(ds_id, coll_defs):
except Exception:
print(f'¡¡¡ Index "{k}" in {collname} from {ds_id} has one with same id !!!')





4 changes: 1 addition & 3 deletions bin/pgxProbesPlotter.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,9 @@
################################################################################

def main():

pgx_probes_plotter()


################################################################################

def pgx_probes_plotter():
Expand All @@ -34,7 +34,6 @@ def pgx_probes_plotter():
if not input_file:
print("No input file specified (-i, --inputfile) => read_probedata_file(filepath, byc):quitting ...")
exit()

if not "probe" in input_file:
print('Only probe files are accepted (should have "...probes..." in name).')
exit()
Expand All @@ -43,7 +42,6 @@ def pgx_probes_plotter():

# TODO: method for multiple?
cs_probes = pb.read_probedata_file(input_file)

plot_data_bundle = {
"callsets_probes_bundles": [ {"id": "TBD", "probes": cs_probes }]
}
Expand Down
Loading

0 comments on commit 6b9f444

Please sign in to comment.