From 372f13dc5ad89af45e1c602bc8f0a08a6b428ef2 Mon Sep 17 00:00:00 2001 From: Kim Rutherford Date: Mon, 13 Nov 2023 18:56:06 +1300 Subject: [PATCH] Add a condition_details vec to OntAnnotationDetail Show we can show the condition details on the website. Refs pombase/website#2029 --- src/pombase/data_types.rs | 4 +++- src/pombase/web/data_build.rs | 33 +++++++++++++++++++++++++++++++++ tests/test_data_build.rs | 8 ++++++-- 3 files changed, 42 insertions(+), 3 deletions(-) diff --git a/src/pombase/data_types.rs b/src/pombase/data_types.rs index a6a8c84f..8476fa48 100644 --- a/src/pombase/data_types.rs +++ b/src/pombase/data_types.rs @@ -1,4 +1,4 @@ -use std::collections::{HashMap, HashSet, BTreeMap}; +use std::collections::{HashMap, HashSet, BTreeMap, BTreeSet}; use std::fmt::Display; use std::fmt; @@ -754,6 +754,8 @@ pub struct OntAnnotationDetail { pub genotype_background: Option, #[serde(skip_serializing_if="HashSet::is_empty", default)] pub conditions: HashSet, + #[serde(skip_serializing_if="BTreeSet::is_empty", default)] + pub condition_details: BTreeSet<(TermId, Option)>, #[serde(skip_serializing_if="Option::is_none")] pub date: Option, #[serde(skip_serializing_if="Option::is_none")] diff --git a/src/pombase/web/data_build.rs b/src/pombase/web/data_build.rs index 3eb8ea0f..acba95cc 100644 --- a/src/pombase/web/data_build.rs +++ b/src/pombase/web/data_build.rs @@ -841,6 +841,27 @@ fn set_has_protein_features(genes: &mut UniquenameGeneMap, protein_view_data: &H } } + +lazy_static! { + static ref CONDITION_DETAIL_RE: Regex = + Regex::new(r"^([A-Z][\w]+:\d\d\d+)(?:\((.+)\))?$").unwrap(); +} + +// parse something like "FYECO:0000005(32C)" or "FYECO:0000329(2% (v/v))" into a term ID +// and a String containers the details from the brackets +// parse "FYECO:0000005" into (TermId, None) +fn parse_condition_with_detail(condition_string: &str) -> (TermId, Option) { + let captures = CONDITION_DETAIL_RE.captures(condition_string).unwrap(); + + let term_id = captures.get(1).as_ref().unwrap().as_str().into(); + + if let Some(detail) = captures.get(2) { + (term_id, Some(detail.as_str().into())) + } else { + (term_id, None) + } +} + impl <'a> WebDataBuild<'a> { pub fn new(raw: &'a Raw, domain_data: &'a HashMap, @@ -3776,6 +3797,7 @@ phenotypes, so just the first part of this extension will be used: let publication = &feature_cvterm.publication; let mut extra_props: HashMap = HashMap::new(); let mut conditions: HashSet = HashSet::new(); + let mut condition_details: BTreeSet<(TermId, Option)> = BTreeSet::new(); let mut withs: HashSet = HashSet::new(); let mut froms: HashSet = HashSet::new(); let mut qualifiers: Vec = vec![]; @@ -3838,6 +3860,16 @@ phenotypes, so just the first part of this extension will be used: value, feature.uniquename, termid); } }, + "condition_detail" => + if let Some(ref value) = prop.value { + if value.contains(':') { + let parsed_value = parse_condition_with_detail(value); + condition_details.insert(parsed_value); + } else { + eprintln!(r#"ignoring condition that doesn't contain a term ID "{}" (from annotation of {} with {})"#, + value, feature.uniquename, termid); + } + }, "qualifier" => if let Some(value) = prop.value.clone() { qualifiers.push(value); @@ -3998,6 +4030,7 @@ phenotypes, so just the first part of this extension will be used: evidence, eco_evidence, conditions, + condition_details, extension, date, assigned_by, diff --git a/tests/test_data_build.rs b/tests/test_data_build.rs index ac701cae..912788d1 100644 --- a/tests/test_data_build.rs +++ b/tests/test_data_build.rs @@ -1,4 +1,4 @@ -use std::collections::{HashMap, HashSet}; +use std::collections::{HashMap, HashSet, BTreeSet}; use std::iter::FromIterator; use std::cmp::Ordering; @@ -402,6 +402,9 @@ fn make_one_detail(id: i32, gene_uniquename: &str, reference_uniquename: &str, maybe_genotype_uniquename: Option<&str>, evidence: &str, extension: Vec, conditions: HashSet) -> OntAnnotationDetail { + let condition_details: BTreeSet<_> = + conditions.iter().map(|cond| (cond.clone(), None)).collect(); + OntAnnotationDetail { id: id, genes: vec![gene_uniquename.into()], @@ -419,7 +422,8 @@ fn make_one_detail(id: i32, gene_uniquename: &str, reference_uniquename: &str, qualifiers: vec![], extension: extension, gene_ex_props: None, - conditions: conditions, + conditions, + condition_details, assigned_by: Some("PomBase".to_shared_str()), throughput: Some(Throughput::HighThroughput), }