Skip to content

Commit

Permalink
rework ontologyannotation localID field
Browse files Browse the repository at this point in the history
  • Loading branch information
HLWeil committed Aug 11, 2023
1 parent ec68408 commit f879b74
Show file tree
Hide file tree
Showing 10 changed files with 91 additions and 132 deletions.
25 changes: 7 additions & 18 deletions src/ISA/ISA.Json/Ontology.fs
Original file line number Diff line number Diff line change
Expand Up @@ -109,27 +109,16 @@ module OntologyAnnotation =
|> GEncode.choose
|> Encode.object

let localIDDecoder : Decoder<string> =
fun s json ->
match Decode.string s json with
| Ok (Regex.ActivePatterns.TermAnnotation tan) ->
Ok (tan.TermSourceREF)
| _ -> Ok ""
//| Ok s -> Error (DecoderError(s,ErrorReason.FailMessage "Could not parse local ID from string"))
//| Error e -> Error e


let decoder (options : ConverterOptions) : Decoder<OntologyAnnotation> =
Decode.object (fun get ->
{
ID = get.Optional.Field "@id" GDecode.uri
Name = get.Optional.Field "annotationValue" (AnnotationValue.decoder options)
TermSourceREF = get.Optional.Field "termSource" Decode.string
//LocalID = try get.Optional.Field "termAccession" localIDDecoder with | _ -> None
LocalID = get.Optional.Field "termAccession" localIDDecoder |> Option.bind (fun s -> if s = "" then None else Some s)
TermAccessionNumber = get.Optional.Field "termAccession" Decode.string
Comments = get.Optional.Field "comments" (Decode.array (Comment.decoder options))
}
OntologyAnnotation.create(
?Id = get.Optional.Field "@id" GDecode.uri,
?Name = get.Optional.Field "annotationValue" (AnnotationValue.decoder options),
?TermSourceREF = get.Optional.Field "termSource" Decode.string,
?TermAccessionNumber = get.Optional.Field "termAccession" Decode.string,
?Comments = get.Optional.Field "comments" (Decode.array (Comment.decoder options))
)
)

let fromString (s:string) =
Expand Down
10 changes: 5 additions & 5 deletions src/ISA/ISA.Spreadsheet/CompositeHeader.fs
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,10 @@ module ActivePattern =

open ARCtrl.ISA.Regex.ActivePatterns

let mergeTerms tsr1 tan1 tsr2 tan2 =
if tsr1 <> tsr2 then failwithf "TermSourceRef %s and %s do not match" tsr1 tsr2
if tan1 <> tan2 then failwithf "TermAccessionNumber %s and %s do not match" tan1 tan2
{|TermSourceRef = tsr1; TermAccessionNumber = tan1|}
let mergeIDInfo idSpace1 localID1 idSpace2 localID2 =
if idSpace1 <> idSpace2 then failwithf "TermSourceRef %s and %s do not match" idSpace1 idSpace2
if localID1 <> localID2 then failwithf "LocalID %s and %s do not match" localID1 localID2
{|TermSourceRef = idSpace1; TermAccessionNumber = $"{idSpace1}:{localID1}"|}

let (|Term|_|) (categoryParser : string -> string option) (f : OntologyAnnotation -> CompositeHeader) (cells : FsCell list) =
let (|AC|_|) s =
Expand All @@ -25,7 +25,7 @@ module ActivePattern =
//| [AC name; TermAccessionNumber term1; TermSourceREF term2]
//| [AC name; Unit; TermAccessionNumber term1; TermSourceREF term2]
| [AC name; UnitColumnHeader; TSRColumnHeader term1; TANColumnHeader term2] ->
let term = mergeTerms term1.TermSourceREF term1.TermAccessionNumber term2.TermSourceREF term2.TermAccessionNumber
let term = mergeIDInfo term1.IDSpace term1.LocalID term2.IDSpace term2.LocalID
let ont = OntologyAnnotation.fromString(name, term.TermSourceRef, term.TermAccessionNumber)
f ont
|> Some
Expand Down
4 changes: 2 additions & 2 deletions src/ISA/ISA/ArcTypes/CompositeCell.fs
Original file line number Diff line number Diff line change
Expand Up @@ -75,10 +75,10 @@ type CompositeCell =
// TODO: i would really love to have an overload here accepting string input
static member createTerm (oa:OntologyAnnotation) = Term oa
static member createTermFromString (?name: string, ?tsr: string, ?tan: string) =
Term <| OntologyAnnotation.fromString(?term = name, ?tsr = tsr, ?tan = tan)
Term <| OntologyAnnotation.fromString(?termName = name, ?tsr = tsr, ?tan = tan)
static member createUnitized (value: string, ?oa:OntologyAnnotation) = Unitized (value, Option.defaultValue (OntologyAnnotation.empty) oa)
static member createUnitizedFromString (value: string, ?name: string, ?tsr: string, ?tan: string) =
Unitized <| (value, OntologyAnnotation.fromString(?term = name, ?tsr = tsr, ?tan = tan))
Unitized <| (value, OntologyAnnotation.fromString(?termName = name, ?tsr = tsr, ?tan = tan))
static member createFreeText (value: string) = FreeText value

static member emptyTerm = Term OntologyAnnotation.empty
Expand Down
4 changes: 2 additions & 2 deletions src/ISA/ISA/ArcTypes/CompositeHeader.fs
Original file line number Diff line number Diff line change
Expand Up @@ -140,10 +140,10 @@ type CompositeHeader =
// Input/Output have similiar naming as Term, but are more specific.
// So they have to be called first.
| Regex.ActivePatterns.Regex Regex.Pattern.InputPattern r ->
let iotype = r.Groups.["iotype"].Value
let iotype = r.Groups.[Regex.Pattern.MatchGroups.iotype].Value
Input <| IOType.ofString (iotype)
| Regex.ActivePatterns.Regex Regex.Pattern.OutputPattern r ->
let iotype = r.Groups.["iotype"].Value
let iotype = r.Groups.[Regex.Pattern.MatchGroups.iotype].Value
Output <| IOType.ofString (iotype)
// Is term column
| Regex.ActivePatterns.TermColumn r ->
Expand Down
2 changes: 1 addition & 1 deletion src/ISA/ISA/JsonTypes/Component.fs
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ type Component =

/// Create a ISAJson Component from ISATab string entries
static member fromString (?name: string, ?term:string, ?source:string, ?accession:string, ?comments : Comment []) =
let cType = OntologyAnnotation.fromString (?term = term, ?tsr=source, ?tan=accession, ?comments = comments) |> Option.fromValueWithDefault OntologyAnnotation.empty
let cType = OntologyAnnotation.fromString (?termName = term, ?tsr=source, ?tan=accession, ?comments = comments) |> Option.fromValueWithDefault OntologyAnnotation.empty
match name with
| Some n ->
let v,u = Component.decomposeName n
Expand Down
41 changes: 18 additions & 23 deletions src/ISA/ISA/JsonTypes/OntologyAnnotation.fs
Original file line number Diff line number Diff line change
Expand Up @@ -11,28 +11,31 @@ type OntologyAnnotation =
ID : URI option
Name : AnnotationValue option
TermSourceREF : string option
LocalID : string option
TermAccessionNumber : URI option
Comments : Comment [] option
}

static member make id name termSourceREF localID termAccessionNumber comments=
static member make id name termSourceREF termAccessionNumber comments=
{
ID = id
Name = name
TermSourceREF = termSourceREF
LocalID = localID
TermAccessionNumber = termAccessionNumber
Comments = comments
}

/// This function creates the type exactly as given. If you want a more streamlined approach use `OntologyAnnotation.fromString`.
static member create(?Id,?Name,?TermSourceREF,?LocalID,?TermAccessionNumber,?Comments) : OntologyAnnotation =
OntologyAnnotation.make Id Name TermSourceREF LocalID TermAccessionNumber Comments
static member create(?Id,?Name,?TermSourceREF,?TermAccessionNumber,?Comments) : OntologyAnnotation =
OntologyAnnotation.make Id Name TermSourceREF TermAccessionNumber Comments

static member empty =
OntologyAnnotation.create()


member this.IDInfo =
this.TermAccessionNumber
|> Option.bind Regex.tryParseTermAnnotation

/// Returns the name of the ontology as string
// TODO: Why is this called Text, while everything else is called string?
member this.NameText =
Expand Down Expand Up @@ -76,20 +79,12 @@ type OntologyAnnotation =
///</summary>
///<param name="tsr">Term source reference</param>
///<param name="tan">Term accession number</param>
static member fromString (?term:string, ?tsr:string, ?tan:string, ?comments : Comment []) =

let tsr,localID =
match tan with
| Some (Regex.ActivePatterns.TermAnnotation tan) ->
(if tsr.IsSome then tsr else Some tan.TermSourceREF),
Some tan.LocalTAN
| _ -> tsr,None
static member fromString (?termName:string, ?tsr:string, ?tan:string, ?comments : Comment []) =

OntologyAnnotation.make
None
(term |> Option.map AnnotationValue.fromString)
(termName |> Option.map AnnotationValue.fromString)
tsr
localID
tan
comments

Expand All @@ -99,20 +94,20 @@ type OntologyAnnotation =
|> Regex.tryParseTermAnnotation
|> Option.get
|> fun r ->
let accession = r.TermSourceREF + ":" + r.LocalTAN
OntologyAnnotation.fromString ("", r.TermSourceREF, accession)
let accession = r.IDSpace + ":" + r.LocalID
OntologyAnnotation.fromString ("", r.IDSpace, accession)

/// Parses any value in `TermAccessionString` to term accession format "termsourceref:localtan". Exmp.: "MS:000001".
///
/// If `TermAccessionString` cannot be parsed to this format, returns empty string!
member this.TermAccessionShort =
match this.TermSourceREF, this.LocalID with
| Some tsr, Some id -> $"{tsr}:{id}"
match this.IDInfo with
| Some id -> $"{id.IDSpace}:{id.LocalID}"
| _ -> ""

member this.TermAccessionOntobeeUrl =
match this.TermSourceREF, this.LocalID with
| Some tsr, Some id -> OntologyAnnotation.createUriAnnotation tsr id
match this.IDInfo with
| Some id -> OntologyAnnotation.createUriAnnotation id.IDSpace id.LocalID
| _ -> ""

member this.TermAccessionAndOntobeeUrlIfShort =
Expand Down Expand Up @@ -234,4 +229,4 @@ type OntologyAnnotation =

member this.Copy() =
let nextComments = this.Comments |> Option.map (Array.map (fun c -> c.Copy()))
OntologyAnnotation.make this.ID this.Name this.TermSourceREF this.LocalID this.TermAccessionNumber nextComments
OntologyAnnotation.make this.ID this.Name this.TermSourceREF this.TermAccessionNumber nextComments
71 changes: 39 additions & 32 deletions src/ISA/ISA/Regex.fs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,20 @@ open System

module Pattern =

module MatchGroups =

[<Literal>]
let numberFormat = "numberFormat"

[<Literal>]
let localID = "localid"

[<Literal>]
let idspace = "idspace"

[<Literal>]
let iotype = "iotype"

/// This pattern is only used to remove any leftover #id attributes from previous Swate version.
/// `"Parameter [biological replicate#2]"` This #id is deprecated but the pattern can still be used to remove any from files.
/// Was deprecated before 2023.
Expand All @@ -21,8 +35,7 @@ module Pattern =
let SquaredBracketsTermNamePattern = "\[.*\]" // @"(?<= \[)[^#\]]*(?=[\]#])" <- Cannot be used in IE11

/// Used to get unit name from Excel numberFormat: 0.00 "degree Celsius" --> degree Celsius
[<LiteralAttribute>]
let ExcelNumberFormat = "\"(?<numberFormat>(.*?))\""
let ExcelNumberFormat = $"\"(?<{MatchGroups.numberFormat}>(.*?))\""

/// Hits Unit column header
[<LiteralAttribute>]
Expand Down Expand Up @@ -55,29 +68,23 @@ module Pattern =
let TermAccessionNumberColumnPattern = @"Term Accession Number\s\((?<id>.*)\)"

/// Hits term accession, without id: ENVO:01001831
[<LiteralAttribute>]
let TermAnnotationShortPattern = @"(?<termsourceref>\w+?):(?<localtan>\w+)" //prev: @"[\w]+?:[\d]+"
let TermAnnotationShortPattern = $@"(?<{MatchGroups.idspace}>\w+?):(?<{MatchGroups.localID}>\w+)" //prev: @"[\w]+?:[\d]+"

// https://obofoundry.org/id-policy.html#mapping-of-owl-ids-to-obo-format-ids
/// <summary>Regex pattern is designed to hit only Foundry-compliant URIs.</summary>
[<LiteralAttribute>]
let TermAnnotationURIPattern = @"http://purl.obolibrary.org/obo/(?<termsourceref>\w+?)_(?<localtan>\w+)"
let TermAnnotationURIPattern = $@"http://purl.obolibrary.org/obo/(?<{MatchGroups.idspace}>\w+?)_(?<{MatchGroups.localID}>\w+)"

/// Watch this closely, this could hit some edge cases we do not want to cover.
[<LiteralAttribute>]
let TermAnnotationURIPattern_lessRestrictive = @".*\/(?<termsourceref>\w+?)[:_](?<localtan>\w+)"
let TermAnnotationURIPattern_lessRestrictive = $@".*\/(?<{MatchGroups.idspace}>\w+?)[:_](?<{MatchGroups.localID}>\w+)"

/// This pattern is used to match both Input and Output columns and capture the IOType as `iotype` group.
[<LiteralAttribute>]
let IOTypePattern = @"(Input|Output)\s\[(?<iotype>.+)\]"
let IOTypePattern = $@"(Input|Output)\s\[(?<{MatchGroups.iotype}>.+)\]"

/// This pattern is used to match Input column and capture the IOType as `iotype` group.
[<LiteralAttribute>]
let InputPattern = @"Input\s\[(?<iotype>.+)\]"
let InputPattern = $@"Input\s\[(?<{MatchGroups.iotype}>.+)\]"

/// This pattern is used to match Output column and capture the IOType as `iotype` group.
[<LiteralAttribute>]
let OutputPattern = @"Output\s\[(?<iotype>.+)\]"
let OutputPattern = $@"Output\s\[(?<{MatchGroups.iotype}>.+)\]"

/// This pattern matches any column header starting with some text, followed by one whitespace and a term name inside squared brackets.
///
Expand Down Expand Up @@ -176,9 +183,9 @@ module ActivePatterns =
let (|TermAnnotationShort|_|) input =
match input with
| Regex Pattern.TermAnnotationShortPattern value ->
let termsourceref = value.Groups.["termsourceref"].Value
let localtan = value.Groups.["localtan"].Value
{|TermSourceREF = termsourceref; LocalTAN = localtan|}
let idspace = value.Groups.[Pattern.MatchGroups.idspace].Value
let localID = value.Groups.[Pattern.MatchGroups.localID].Value
{|IDSpace = idspace; LocalID = localID|}
|> Some
| _ ->
None
Expand All @@ -193,9 +200,9 @@ module ActivePatterns =
| Regex Pattern.TermAnnotationShortPattern value
| Regex Pattern.TermAnnotationURIPattern value
| Regex Pattern.TermAnnotationURIPattern_lessRestrictive value ->
let termsourceref = value.Groups.["termsourceref"].Value
let localtan = value.Groups.["localtan"].Value
{|TermSourceREF = termsourceref; LocalTAN = localtan; TermAccessionNumber = input|}
let idspace = value.Groups.[Pattern.MatchGroups.idspace].Value
let localID = value.Groups.[Pattern.MatchGroups.localID].Value
{|IDSpace = idspace; LocalID = localID|}
|> Some
| _ ->
None
Expand All @@ -208,7 +215,7 @@ module ActivePatterns =
| Regex Pattern.TermSourceREFColumnPattern r ->
match r.Groups.["id"].Value with
| TermAnnotation r -> Some r
| _ -> Some {|LocalTAN = ""; TermAccessionNumber = ""; TermSourceREF = ""|}
| _ -> Some {|IDSpace = ""; LocalID = ""|}
| _ -> None

/// Matches a "Term Accession Number (ShortTerm)" column header and returns the ShortTerm as Term Source Ref and Annotation Number.
Expand All @@ -219,21 +226,21 @@ module ActivePatterns =
| Regex Pattern.TermAccessionNumberColumnPattern r ->
match r.Groups.["id"].Value with
| TermAnnotation r -> Some r
| _ -> Some {|LocalTAN = ""; TermAccessionNumber = ""; TermSourceREF = ""|}
| _ -> Some {|IDSpace = ""; LocalID = ""|}
| _ -> None

/// Matches a "Input [InputType]" column header and returns the InputType as string.
let (|InputColumnHeader|_|) input =
match input with
| Regex Pattern.InputPattern r ->
Some r.Groups.["iotype"].Value
Some r.Groups.[Pattern.MatchGroups.iotype].Value
| _ -> None

/// Matches a "Output [OutputType]" column header and returns the OutputType as string.
let (|OutputColumnHeader|_|) input =
match input with
| Regex Pattern.OutputPattern r ->
Some r.Groups.["iotype"].Value
Some r.Groups.[Pattern.MatchGroups.iotype].Value
| _ -> None


Expand Down Expand Up @@ -261,9 +268,9 @@ let tryParseReferenceColumnHeader (str : string) =
let tryParseTermAnnotationShort (str:string) =
match str.Trim() with
| Regex TermAnnotationShortPattern value ->
let termsourceref = value.Groups.["termsourceref"].Value
let localtan = value.Groups.["localtan"].Value
{|TermSourceREF = termsourceref; LocalTAN = localtan|}
let idspace = value.Groups.[Pattern.MatchGroups.idspace].Value
let localid = value.Groups.[Pattern.MatchGroups.localID].Value
{|IDSpace = idspace; LocalID = localid|}
|> Some
| _ -> None

Expand All @@ -279,17 +286,17 @@ let tryParseTermAnnotation (str:string) =
| Regex TermAnnotationShortPattern value
| Regex TermAnnotationURIPattern value
| Regex TermAnnotationURIPattern_lessRestrictive value ->
let termsourceref = value.Groups.["termsourceref"].Value
let localtan = value.Groups.["localtan"].Value
{|TermSourceREF = termsourceref; LocalTAN = localtan|}
let idspace = value.Groups.[Pattern.MatchGroups.idspace].Value
let localid = value.Groups.[Pattern.MatchGroups.localID].Value
{|IDSpace = idspace; LocalID = localid|}
|> Some
| _ ->
None

/// Tries to parse 'str' to term accession and returns it in the format `Some "termsourceref:localtan"`. Exmp.: `Some "MS:000001"`
let tryGetTermAnnotationShortString (str:string) =
tryParseTermAnnotation str
|> Option.map (fun r -> r.TermSourceREF + ":" + r.LocalTAN)
|> Option.map (fun r -> r.IDSpace + ":" + r.LocalID)

/// Parses 'str' to term accession and returns it in the format "termsourceref:localtan". Exmp.: "MS:000001"
let getTermAnnotationShortString (str:string) =
Expand Down Expand Up @@ -320,7 +327,7 @@ let tryParseIOTypeHeader (headerStr: string) =
match headerStr.Trim() with
| Regex IOTypePattern value ->
// remove quotes at beginning and end of matched string
let numberFormat = value.Groups.["iotype"].Value
let numberFormat = value.Groups.[Pattern.MatchGroups.iotype].Value
Some numberFormat
| _ ->
None
Expand Down
Loading

0 comments on commit f879b74

Please sign in to comment.