Skip to content

Commit

Permalink
EFO: add xref details to node data
Browse files Browse the repository at this point in the history
merges #21
refs #18

Co-authored-by: Bartek Foltyn <[email protected]>
  • Loading branch information
bfoltyn and Bartek Foltyn authored Nov 9, 2023
1 parent 1f3ba0b commit c8e0502
Showing 1 changed file with 63 additions and 8 deletions.
71 changes: 63 additions & 8 deletions nxontology_data/efo/efo.py
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,68 @@ def update_term(old_term: str) -> str:
)
return {k: sorted(v) for k, v in current_to_old.items()}

def get_xref_details(self) -> dict[str, dict[str, str | list[str] | None]]:
xrefs = self.get_xrefs_df()[["efo_id", "xref_bioregistry"]].rename(
columns={"xref_bioregistry": "xref_id"}
)

xref_sources = (
self.get_xref_sources_df()
.assign(
xref_id=lambda df: df["xref"]
.str.split(":", expand=True)
.apply(
lambda row: normalize_parsed_curie(
xref_prefix=row[0],
xref_accession=row[1],
collapse_orphanet=True,
),
axis="columns",
)
)
.groupby(["efo_id", "xref_id"])["axiom_source"]
.apply(list)
.reset_index()
.rename(columns={"axiom_source": "sources"})
)

def get_relation(x: list[str]) -> str | None:
if "skos:exactMatch" in x or "mondo:exactMatch" in x:
return "skos:exactMatch"
if "skos:closeMatch" in x or "mondo:closeMatch" in x:
return "skos:closeMatch"
return None

mapping_properties = (
self.get_mapping_properties_df()
.groupby(["efo_id", "xref_id"])["mapping_property_id"]
.apply(list)
.reset_index()
.rename(columns={"mapping_property_id": "mapping_properties"})
.assign(
relation=lambda x: x["mapping_properties"].apply(get_relation),
)
)

xref_details = (
xrefs.merge(
mapping_properties,
how="outer",
on=["efo_id", "xref_id"],
)
.merge(
xref_sources,
how="outer",
on=["efo_id", "xref_id"],
)
.query("efo_id != xref_id")
)

return {
k: v[["xref_id", "relation", "sources"]].to_dict(orient="records")
for k, v in xref_details.groupby("efo_id")
}

def get_nodes(self) -> list[dict[str, Any]]:
logger.info("Generating nodes")
node_df = self.get_terms_df()
Expand All @@ -265,6 +327,7 @@ def get_nodes(self) -> list[dict[str, Any]]:
.apply(lambda df: sorted(set(df.xref_bioregistry.dropna())))
)
node_df["subsets"] = node_df.efo_id.map(self.get_subsets())
node_df["xref_details"] = node_df.efo_id.map(self.get_xref_details())
# Use .to_json and not .to_dict to convert NaN to None
return json.loads(node_df.to_json(orient="records")) # type: ignore [no-any-return]

Expand Down Expand Up @@ -319,14 +382,6 @@ def write_outputs(self) -> None:
write_dataframe(
self.get_obsolete_df(), output_dir.joinpath(f"{self.name}_obsolete.json.gz")
)
write_dataframe(
self.get_mapping_properties_df(),
output_dir.joinpath(f"{self.name}_mapping_properties.json.gz"),
)
write_dataframe(
self.get_xref_sources_df(),
output_dir.joinpath(f"{self.name}_xref_sources.json.gz"),
)
if nxo.name == "efo_otar_profile":
nxo_slim = self.create_slim_nxo(nxo)
# classify EFO node/disease precision using nxontology-ml
Expand Down

0 comments on commit c8e0502

Please sign in to comment.