Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Arcctrl spreadsheet parsing fixes #144

Merged
merged 3 commits into from
Aug 1, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/ISA/ISA.Spreadsheet/ArcAssay.fs
Original file line number Diff line number Diff line change
Expand Up @@ -87,8 +87,8 @@ let fromFsWorkbook (doc:FsWorkbook) =
ArcAssay.create(Identifier.createMissingIdentifier())
let sheets =
doc.GetWorksheets()
|> List.choose ArcTable.tryFromFsWorksheet
if sheets.IsEmpty then
|> Seq.choose ArcTable.tryFromFsWorksheet
if sheets |> Seq.isEmpty then
assayMetaData
else
assayMetaData.Tables <- ResizeArray(sheets)
Expand Down
4 changes: 2 additions & 2 deletions src/ISA/ISA.Spreadsheet/ArcStudy.fs
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,8 @@ let fromFsWorkbook (doc:FsWorkbook) =

let sheets =
doc.GetWorksheets()
|> List.choose ArcTable.tryFromFsWorksheet
if sheets.IsEmpty then
|> Seq.choose ArcTable.tryFromFsWorksheet
if sheets |> Seq.isEmpty then
studyMetadata
else
studyMetadata.Tables <- ResizeArray(sheets)
Expand Down
6 changes: 3 additions & 3 deletions src/ISA/ISA.Spreadsheet/ArcTable.fs
Original file line number Diff line number Diff line change
Expand Up @@ -62,11 +62,11 @@ let annotationTablePrefix = "annotationTable"

let groupColumnsByHeader (columns : list<FsColumn>) =
columns
|> Aux.List.groupWhen (fun c ->
ISA.Regex.tryParseTermAnnotation c.[1].Value
|> Aux.List.groupWhen (fun c ->
ISA.Regex.tryParseReferenceColumnHeader c.[1].Value
|> Option.isNone
&&
c.[1].Value <> "Unit"
(c.[1].Value.StartsWith "Unit" |> not)
)

/// Returns the annotation table of the worksheet if it exists, else returns None
Expand Down
10 changes: 9 additions & 1 deletion src/ISA/ISA.Spreadsheet/CompositeHeader.fs
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,12 @@ module ActivePattern =
Some r
| _ -> None

let (|Component|_|) (cells : FsCell list) =
match cells with
| Term Regex.tryParseComponentColumnHeader CompositeHeader.Component r ->
Some r
| _ -> None

let (|Input|_|) (cells : FsCell list) =
let cellValues = cells |> List.map (fun c -> c.Value)
match cellValues with
Expand Down Expand Up @@ -95,11 +101,13 @@ let fromFsCells (cells : list<FsCell>) : CompositeHeader =
| Parameter p -> p
| Factor f -> f
| Characteristic c -> c
| Component c -> c
| Input i -> i
| Output o -> o
| ProtocolHeader ph -> ph
| FreeText ft -> ft
| _ -> raise (System.NotImplementedException("parseCompositeHeader"))
| _ -> failwithf "Could not parse header group %O" cells


let toFsCells (hasUnit : bool) (header : CompositeHeader) : list<FsCell> =
if header.IsSingleColumn then
Expand Down
2 changes: 1 addition & 1 deletion src/ISA/ISA.Spreadsheet/ISA.Spreadsheet.fsproj
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
</ItemGroup>

<ItemGroup>
<PackageReference Include="FsSpreadsheet" Version="3.1.1" />
<PackageReference Include="FsSpreadsheet" Version="3.2.0" />
</ItemGroup>

<ItemGroup>
Expand Down
2 changes: 1 addition & 1 deletion src/ISA/ISA.Spreadsheet/InvestigationFile/Investigation.fs
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@ module ArcInvestigation =
let fromFsWorkbook (doc:FsWorkbook) =
try
doc.GetWorksheets()
|> List.head
|> Seq.head
|> FsWorksheet.getRows
|> Seq.map SparseRow.fromFsRow
|> fromRows
Expand Down
3 changes: 1 addition & 2 deletions src/ISA/ISA/ArcTypes/CompositeColumn.fs
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,7 @@ type CompositeColumn = {
true
| h, c ->
if raiseExeption then
let n = System.Math.Min(c.Length,3)
let exampleCells = c.[n]
let exampleCells = c.[0]
let msg = $"Invalid combination of header `{h}` and cells `{exampleCells}`"
failwith msg
// Maybe still return `msg` somehow if `raiseExeption` is false?
Expand Down
38 changes: 33 additions & 5 deletions src/ISA/ISA/Regex.fs
Original file line number Diff line number Diff line change
Expand Up @@ -36,23 +36,23 @@ module Pattern =
///
/// the id part "MS:1003022" is captured as `id` group.
[<LiteralAttribute>]
let ReferenceColumnPattern = @"(Term Source REF|Term Accession Number)\s\((?<id>.+)\)"
let ReferenceColumnPattern = @"(Term Source REF|Term Accession Number)\s\((?<id>.*)\)"

/// Hits Term Accession Number column header
///
/// Example 1: "Term Source REF (MS:1003022)"
///
/// the id part "MS:1003022" is captured as `id` group.
[<LiteralAttribute>]
let TermSourceREFColumnPattern = @"Term Source REF\s\((?<id>.+)\)"
let TermSourceREFColumnPattern = @"Term Source REF\s\((?<id>.*)\)"

/// Hits Term Source REF column header
///
/// Example 1: "Term Accession Number (MS:1003022)"
///
/// the id part "MS:1003022" is captured as `id` group.
[<LiteralAttribute>]
let TermAccessionNumberColumnPattern = @"Term Accession Number\s\((?<id>.+)\)"
let TermAccessionNumberColumnPattern = @"Term Accession Number\s\((?<id>.*)\)"

/// Hits term accession, without id: ENVO:01001831
[<LiteralAttribute>]
Expand Down Expand Up @@ -107,6 +107,14 @@ module ActivePatterns =
if m.Success then Some(m)
else None

/// Matches any column header starting with some text, followed by one whitespace and a term name inside squared brackets.
let (|ReferenceColumnHeader|_|) input =
match input with
| Regex Pattern.ReferenceColumnPattern r ->
{|Annotation = r.Groups.["id"].Value|}
|> Some
| _ -> None

/// Matches any column header starting with some text, followed by one whitespace and a term name inside squared brackets.
let (|TermColumn|_|) input =
match input with
Expand Down Expand Up @@ -152,6 +160,15 @@ module ActivePatterns =
| _ -> None
| _ -> None

/// Matches a "Component [Term]" or "Component Value [Term]" column header and returns the Term string.
let (|ComponentColumnHeader|_|) input =
match input with
| TermColumn r ->
match r.TermColumnType with
| "Component"
| "Component Value" -> Some r.TermName
| _ -> None
| _ -> None

/// Matches a short term string and returns the term source ref and the annotation number strings.
///
Expand Down Expand Up @@ -191,7 +208,7 @@ module ActivePatterns =
| Regex Pattern.TermSourceREFColumnPattern r ->
match r.Groups.["id"].Value with
| TermAnnotation r -> Some r
| _ -> None
| _ -> Some {|LocalTAN = ""; TermAccessionNumber = ""; TermSourceREF = ""|}
| _ -> None

/// Matches a "Term Accession Number (ShortTerm)" column header and returns the ShortTerm as Term Source Ref and Annotation Number.
Expand All @@ -202,7 +219,7 @@ module ActivePatterns =
| Regex Pattern.TermAccessionNumberColumnPattern r ->
match r.Groups.["id"].Value with
| TermAnnotation r -> Some r
| _ -> None
| _ -> Some {|LocalTAN = ""; TermAccessionNumber = ""; TermSourceREF = ""|}
| _ -> None

/// Matches a "Input [InputType]" column header and returns the InputType as string.
Expand Down Expand Up @@ -235,6 +252,11 @@ open System
open System.Text.RegularExpressions


let tryParseReferenceColumnHeader (str : string) =
match str.Trim() with
| ReferenceColumnHeader v ->
Some v
| _ -> None

let tryParseTermAnnotationShort (str:string) =
match str.Trim() with
Expand Down Expand Up @@ -334,6 +356,12 @@ let tryParseCharacteristicColumnHeader input =
| CharacteristicColumnHeader r -> Some r
| _ -> None

/// Matches a "Component [Term]" or "Characteristics [Term]" or "Component Value [Term]" column header and returns the Term string.
let tryParseComponentColumnHeader input =
match input with
| ComponentColumnHeader r -> Some r
| _ -> None

/// Matches a "Term Source REF (ShortTerm)" column header and returns the ShortTerm as Term Source Ref and Annotation Number.
///
/// Example: "Term Source REF (MS:1003022)" --> term source ref: "MS"; annotation number: "1003022"
Expand Down
5 changes: 4 additions & 1 deletion tests/ISA/ISA.Spreadsheet.Tests/ArcTableTests.fs
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ let private simpleTable =
[
Protocol.REF.appendLolColumn 1
Protocol.Type.appendCollectionColumn 1
Protocol.Component.appendInstrumentColumn 1
Parameter.appendTemperatureColumn 1
Parameter.appendInstrumentColumn 1
Characteristic.appendOrganismColumn 1
Expand All @@ -65,13 +66,14 @@ let private simpleTable =
let table = table.Value

Expect.equal table.Name wsName "Name did not match"
Expect.equal table.ColumnCount 6 "Wrong number of columns"
Expect.equal table.ColumnCount 7 "Wrong number of columns"
Expect.equal table.RowCount 1 "Wrong number of rows"

let expectedHeaders =
[
Protocol.REF.lolHeader
Protocol.Type.collectionHeader
Protocol.Component.instrumentHeader
Parameter.temperatureHeader
Parameter.instrumentHeader
Characteristic.organismHeader
Expand All @@ -83,6 +85,7 @@ let private simpleTable =
[
Protocol.REF.lolValue
Protocol.Type.collectionValue
Protocol.Component.instrumentValue
Parameter.temperatureValue
Parameter.instrumentValue
Characteristic.organismValue
Expand Down
8 changes: 4 additions & 4 deletions tests/ISA/ISA.Spreadsheet.Tests/FableTests.fs
Original file line number Diff line number Diff line change
Expand Up @@ -18,18 +18,18 @@ let tests_typeTranspilation = testList "type transpilation" [
let wb = new FsWorkbook()
let ws = FsWorksheet.init ("My Worksheet")
wb.AddWorksheet(ws)
Expect.equal (wb.GetWorksheets().Length) 1 "length"
Expect.equal (wb.GetWorksheets().Count) 1 "length"
Expect.equal (wb.GetWorksheets().[0].Name) "My Worksheet" "length"
testCase "FsTable" <| fun _ ->
let wb = new FsWorkbook()
let ws = FsWorksheet.init ("My Worksheet")
wb.AddWorksheet(ws)
Expect.equal (wb.GetWorksheets().Length) 1 "length"
Expect.equal (wb.GetWorksheets().Count) 1 "length"
Expect.equal (wb.GetWorksheets().[0].Name) "My Worksheet" "length"
let table = FsTable("My Table",FsRangeAddress(FsAddress(1,1),FsAddress(5,5)))
let table = FsTable("MyTable",FsRangeAddress(FsAddress(1,1),FsAddress(5,5)))
ws.AddTable(table) |> ignore
Expect.equal (wb.GetTables().Length) 1 "table length"
Expect.equal (wb.GetTables().[0].Name) "My Table" "table name"
Expect.equal (wb.GetTables().[0].Name) "MyTable" "table name"
]


Expand Down
29 changes: 29 additions & 0 deletions tests/ISA/ISA.Spreadsheet.Tests/TestObjects/ArcTable.fs
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,35 @@ module Protocol =
t.Cell(FsAddress(i, colCount + 2),c).SetValueAs collectionValueV2
t.Cell(FsAddress(i, colCount + 3),c).SetValueAs collectionValueV3

module Component =

let instrumentHeader =
CompositeHeader.Component
(OntologyAnnotation.fromString("instrument model","MS","MS:1000031"))
let instrumentValue =
CompositeCell.createTermFromString
("Thermo Fisher Scientific instrument model","MS","http://purl.obolibrary.org/obo/MS_1000483")


let instrumentHeaderV1 = "Component [instrument model]"
let instrumentHeaderV2 = "Term Source REF (MS:1000031)"
let instrumentHeaderV3 = "Term Accession Number (MS:1000031)"

let instrumentValueV1 = "Thermo Fisher Scientific instrument model"
let instrumentValueV2 = "MS"
let instrumentValueV3 = "http://purl.obolibrary.org/obo/MS_1000483"

let appendInstrumentColumn l (c : FsCellsCollection) (t : FsTable) =
let colCount = if t.IsEmpty(c) then 0 else t.ColumnCount()
t.Cell(FsAddress(1, colCount + 1),c).SetValueAs instrumentHeaderV1
t.Cell(FsAddress(1, colCount + 2),c).SetValueAs instrumentHeaderV2
t.Cell(FsAddress(1, colCount + 3),c).SetValueAs instrumentHeaderV3
for i = 2 to l + 1 do
t.Cell(FsAddress(i, colCount + 1),c).SetValueAs instrumentValueV1
t.Cell(FsAddress(i, colCount + 2),c).SetValueAs instrumentValueV2
t.Cell(FsAddress(i, colCount + 3),c).SetValueAs instrumentValueV3


let initTable (appendOperations : (FsCellsCollection -> FsTable -> unit) list)=
let c = FsCellsCollection()
let t = FsTable(ArcTable.annotationTablePrefix, FsRangeAddress("A1:A1"))
Expand Down
2 changes: 1 addition & 1 deletion tests/ISA/ISA.Tests/Identifier.Tests.fs
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,6 @@ let private tests_checkValidCharacters = testList "checkValidCharacters" [


let main =
testList "ArcInvestigation" [
testList "Identifier" [
tests_checkValidCharacters
]
88 changes: 87 additions & 1 deletion tests/ISA/ISA.Tests/Regex.Tests.fs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ open Fable.Mocha
open Expecto
#endif

let tests_AutoGeneratedTableName =
let private tests_AutoGeneratedTableName =
testList "AutoGeneratedTableName" [
testCase "match" (fun () ->
let testString = @"New Table 10"
Expand Down Expand Up @@ -53,8 +53,94 @@ let tests_AutoGeneratedTableName =
)
]

let private tests_AnnotationTableColums =
testList "AnnotationTableColumns" [
testCase "Term Source REF" (fun () ->
let localID = "12345"
let space = "UO"
let testString = $"Term Source REF ({space}:{localID})"
let r =
match testString with
| Regex.ActivePatterns.TSRColumnHeader result -> Some result
| _ -> None
Expect.isSome r "Could not match TSRColumnHeader"
let rv = r.Value
Expect.equal rv.LocalTAN localID "LocalId did not match"
Expect.equal rv.TermSourceREF space "TermSourceREF did not match"
Expect.equal rv.TermAccessionNumber $"{space}:{localID}" "TermAccessionNumber did not match"
)
testCase "Term Source REF Empty" (fun () ->
let testString = $"Term Source REF ()"
let r =
match testString with
| Regex.ActivePatterns.TSRColumnHeader result -> Some result
| _ -> None
Expect.isSome r "Could not match TSRColumnHeader"
let rv = r.Value
Expect.equal rv.LocalTAN "" "LocalID should be empty"
Expect.equal rv.TermSourceREF "" "TermSourceREF should be empty"
Expect.equal rv.TermAccessionNumber "" "TermAccessionNumber should be empty"
)
testCase "Term Accession Number" (fun () ->
let localID = "12345"
let space = "UO"
let testString = $"Term Accession Number ({space}:{localID})"
let r =
match testString with
| Regex.ActivePatterns.TANColumnHeader result -> Some result
| _ -> None
Expect.isSome r "Could not match TANColumnHeader"
let rv = r.Value
Expect.equal rv.LocalTAN localID "LocalId did not match"
Expect.equal rv.TermSourceREF space "TermSourceREF did not match"
Expect.equal rv.TermAccessionNumber $"{space}:{localID}" "TermAccessionNumber did not match"
)
testCase "Term Accession Number Empty" (fun () ->
let testString = $"Term Accession Number ()"
let r =
match testString with
| Regex.ActivePatterns.TANColumnHeader result -> Some result
| _ -> None
Expect.isSome r "Could not match TANColumnHeader"
let rv = r.Value
Expect.equal rv.LocalTAN "" "LocalID should be empty"
Expect.equal rv.TermSourceREF "" "TermSourceREF should be empty"
Expect.equal rv.TermAccessionNumber "" "TermAccessionNumber should be empty"
)
testCase "Reference Column Header Empty" (fun () ->

let testString = $"Term Accession Number ()"
let r =
match testString with
| Regex.ActivePatterns.ReferenceColumnHeader result -> Some result
| _ -> None
Expect.isSome r "Could not match TANColumnHeader"
let rv = r.Value
Expect.equal rv.Annotation "" "Annotation should be empty"

let testString = $"Term Source REF ()"
let r =
match testString with
| Regex.ActivePatterns.ReferenceColumnHeader result -> Some result
| _ -> None
Expect.isSome r "Could not match TANColumnHeader"
let rv = r.Value
Expect.equal rv.Annotation "" "Annotation should be empty"

let testString = $"Any Other String ()"
let r =
match testString with
| Regex.ActivePatterns.ReferenceColumnHeader result -> Some result
| _ -> None
Expect.isNone r "Should not match other String"
)
]



let main =
testList "Regex" [
tests_AnnotationTableColums
tests_AutoGeneratedTableName
]