Skip to content

Commit

Permalink
fix namespace updates
Browse files Browse the repository at this point in the history
  • Loading branch information
JMante1 committed Oct 12, 2022
1 parent cd1100b commit dc08169
Show file tree
Hide file tree
Showing 6 changed files with 178 additions and 804 deletions.
196 changes: 98 additions & 98 deletions SBOL3_simple_library4.nt

Large diffs are not rendered by default.

Binary file modified SBOL3_simple_library4.xlsx
Binary file not shown.
50 changes: 50 additions & 0 deletions excelutils/excel_sbol_utils/helpers.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import re
import string
import rdflib
from openpyxl.worksheet import cell_range, worksheet
from pathlib import Path

Expand Down Expand Up @@ -146,3 +147,52 @@ def read_variant_table(excel_file: Path) -> tuple[str, str, list[list]]:
variant_lists = [[v for v in column if v] for column in column_iterator] # drop the empty cells from each range

return library_name, base_sequence, variant_lists

def update_uri_refs(doc, update_dict, use_derived=True, derived_ls = ['_sequence']):
"""
This updates a set of referenced uris (may be a namespace or identity update)
Args:
doc (SBOL3 Document): document to be updated
update_dict (dict): dictionary of the form {old_uri:new_uri}
use_derived (bool, optional): Whether or not to also update derived uris. Defaults to True.
derived_ls (list, optional): List of derivations e.g. also version of the uri
with _sequence added to the end. Defaults to ['_sequence'].
Returns:
doc (SBOL3 Document): updated document
"""
# create all the additional uris that will need to be updated
derived_keys = []
for deriv in derived_ls:
der_update = [f'{x}{deriv}' for x in update_dict.keys()]
derived_keys.extend(der_update)

# pull the graph from the document
g = doc.graph()
for index, (subject, predicate, _object) in enumerate(g):
# if the object is one of the items to be updated do so
if str(_object) in update_dict:
g.remove((subject, predicate, _object))
new = rdflib.URIRef(update_dict[str(_object)])
g.add((subject, predicate, new))
# update any derived objects
elif use_derived and str(_object) in derived_keys:
suffix = str(_object).split('_')[-1] # assumes suffix starts with '_'
suffix = f'_{suffix}'
g.remove((subject, predicate, _object))
old = str(_object)
new = f"{update_dict[old.replace(suffix, '')]}{suffix}"
new = rdflib.URIRef(new)
g.add((subject, predicate, new))
# update any derived subjects
if use_derived and str(subject) in derived_keys:
suffix = str(subject).split('_')[-1] # assumes suffix starts with '_'
suffix = f'_{suffix}'
g.remove((subject, predicate, _object))
old = str(subject)
new = f"{update_dict[old.replace(suffix, '')]}{suffix}"
new = rdflib.URIRef(new)
g.add((new, predicate, _object))
doc._parse_graph(g)
return doc
28 changes: 19 additions & 9 deletions excelutils/excel_sbol_utils/library3.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,12 +154,13 @@ def dataSource(rowobj):
val = vals[list(vals.keys())[colnum]]

datasource_dict = {'GenBank':{'Replace Example':'https://www.ncbi.nlm.nih.gov/nuccore/{REPLACE_HERE}', 'Literal Part':'TRUE', 'Namespace':'https://www.ncbi.nlm.nih.gov/nuccore', 'Prefix':'gb'},
'PubMed':{'Replace Example':'https://pubmed.ncbi.nlm.nih.gov/{REPLACE_HERE}/', 'Literal Part':'FALSE', 'Namespace':'', 'Prefix':''},
'PubMed':{'Replace Example':'https://pubmed.ncbi.nlm.nih.gov/{REPLACE_HERE}/', 'Literal Part':'FALSE', 'Namespace':'', 'Prefix':'', 'derived_from':''},
'iGEM registry':{'Replace Example':'http://parts.igem.org/Part:{REPLACE_HERE}', 'Literal Part':'TRUE', 'Namespace':'http://parts.igem.org', 'Prefix':'igem'},
'AddGene':{'Replace Example':'https://www.addgene.org/{REPLACE_HERE}/', 'Literal Part':'FALSE', 'Namespace':'', 'Prefix':''},
'Seva plasmids':{'Replace Example':'http://www.sevahub.es/public/Canonical/{REPLACE_HERE}/1', 'Literal Part':'TRUE', 'Namespace':'', 'Prefix':''},
'Tax_id':{'Replace Example':'https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&id={REPLACE_HERE}', 'Literal Part':'FALSE', 'Namespace':'', 'Prefix':''},
'SynBioHub':{'Replace Example':'{REPLACE_HERE}', 'Literal Part':'TRUE', 'Namespace':'', 'Prefix':''},
'URL':{'Replace Example':'{REPLACE_HERE}', 'Literal Part':'FALSE', 'Namespace':val, 'Prefix':'', 'derived_from':f'{val}/{rowobj.obj.displayId}'},
'Local Sequence File':{'Replace Example':'', 'Literal Part':'FALSE', 'Namespace':'', 'Prefix':''},
'URL for GenBank file':{'Replace Example':'{REPLACE_HERE}', 'Literal Part':'TRUE', 'Namespace':'', 'Prefix':''},
'URL for FASTA file':{'Replace Example':'{REPLACE_HERE}', 'Literal Part':'TRUE', 'Namespace':'', 'Prefix':''}
Expand All @@ -168,28 +169,37 @@ def dataSource(rowobj):
literal = datasource_dict[pref]['Literal Part']

if literal == 'FALSE':
rowobj.obj.wasDerivedFrom = val
if len(datasource_dict[pref]['derived_from']) > 0:
rowobj.obj.derived_from = [datasource_dict[pref]['derived_from']]
ns = datasource_dict[pref]['Namespace']
if len(ns) > 0:
if len(datasource_dict[pref]['Prefix']) > 0:
if datasource_dict[pref]['Prefix'] not in rowobj.doc_pref_terms:
rowobj.doc.bind(datasource_dict[pref]['Prefix'], ns)
rowobj.doc_pref_terms.append(datasource_dict[pref]['Prefix'])

old_id = rowobj.obj.identity
rowobj.doc.change_object_namespace([rowobj.obj], ns)
new_id = rowobj.obj.identity
rowobj.data_source_id_to_update[old_id] = new_id

else:
ns = datasource_dict[pref]['Namespace']
if len(ns) > 0:
if datasource_dict[pref]['Prefix'] not in rowobj.doc_pref_terms:
rowobj.doc.bind(datasource_dict[pref]['Prefix'], ns)
rowobj.doc_pref_terms.append(datasource_dict[pref]['Prefix'])
if len(datasource_dict[pref]['Prefix']) > 0:
if datasource_dict[pref]['Prefix'] not in rowobj.doc_pref_terms:
rowobj.doc.bind(datasource_dict[pref]['Prefix'], ns)
rowobj.doc_pref_terms.append(datasource_dict[pref]['Prefix'])

old_id = rowobj.obj.identity
rowobj.doc.change_object_namespace([rowobj.obj], ns)
new_id = rowobj.obj.identity
rowobj.data_source_id_to_update[old_id] = new_id
if val != rowobj.obj.display_id:
# rowobj.data_source_id_to_update[rowobj.obj.identity] = {'current_id': rowobj.obj.display_id,
# 'update_id': val}
new_identity = str(rowobj.obj.identity).replace(rowobj.obj.display_id, helpers.check_name(val))
id_map = {rowobj.obj.identity:new_identity}
# print(str(id_map))
rowobj.obj.set_identity(new_identity)
rowobj.obj.update_all_dependents(id_map) # this function doesn't yet do everything it should
warnings.warn('not yet possible to have display id that is different from source value')
rowobj.data_source_id_to_update[old_id] = new_identity

def sequence(rowobj):
Expand Down
Loading

0 comments on commit dc08169

Please sign in to comment.