Skip to content

Commit

Permalink
Merge pull request #184 from nfdi4plants/developer_TopLevelAssays
Browse files Browse the repository at this point in the history
ArcInvestigation level assays
  • Loading branch information
HLWeil authored Sep 18, 2023
2 parents 4e676ea + f812f4c commit b18400f
Show file tree
Hide file tree
Showing 36 changed files with 3,567 additions and 1,637 deletions.
72 changes: 36 additions & 36 deletions src/ARCtrl/ARCtrl.fs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
open ARCtrl.FileSystem
open ARCtrl.Contract
open ARCtrl.ISA
open ARCtrl.ISA.Spreadsheet
open FsSpreadsheet
open Fable.Core

Expand All @@ -18,7 +19,7 @@ module ARCAux =
let getArcStudiesFromContracts (contracts: Contract []) =
contracts
|> Array.choose Contract.ArcStudy.tryFromContract
|> Array.map (fun x -> x :?> FsWorkbook |> ISA.Spreadsheet.ArcStudy.fromFsWorkbook)
|> Array.map (fun x -> x :?> FsWorkbook |> ArcStudy.fromFsWorkbook)

let getArcInvestigationFromContracts (contracts: Contract []) =
contracts
Expand All @@ -30,12 +31,7 @@ module ARCAux =
let (studyNames,assayNames) =
match isa with
| Some inv ->
inv.Studies
|> Seq.fold (fun (studyNames,assayNames) s ->
Array.append studyNames [|s.Identifier|],
Array.append assayNames (s.Assays |> Seq.map (fun a -> a.Identifier) |> Array.ofSeq)

) ([||],[||])
inv.StudyIdentifiers |> Seq.toArray, inv.AssayIdentifiers |> Seq.toArray
| None -> ([||],[||])
let assays = FileSystemTree.createAssaysFolder (assayNames |> Array.map FileSystemTree.createAssayFolder)
let studies = FileSystemTree.createStudiesFolder (studyNames |> Array.map FileSystemTree.createStudyFolder)
Expand Down Expand Up @@ -165,31 +161,34 @@ type ARC(?isa : ISA.ArcInvestigation, ?cwl : CWL.CWL, ?fs : FileSystem.FileSyste
/// get investigation from xlsx
let investigation = ARCAux.getArcInvestigationFromContracts contracts
/// get studies from xlsx
let studies = ARCAux.getArcStudiesFromContracts contracts
let studies = ARCAux.getArcStudiesFromContracts contracts |> Array.map fst
/// get assays from xlsx
let assays = ARCAux.getArcAssaysFromContracts contracts

investigation.Studies |> Seq.iter (fun registeredStudy ->
studies |> Array.iter (fun study ->
/// Try find registered study in parsed READ contracts
let studyOpt = studies |> Array.tryFind (fun s -> s.Identifier = registeredStudy.Identifier)
match studyOpt with
| Some study -> // This study element is parsed from FsWorkbook and has no regsitered assays, yet

if enableLogging then printfn "Found study: %s" registeredStudy.Identifier
registeredStudy.Assays |> Seq.iter (fun registeredAssay ->
/// Try find registered assay in parsed READ contracts
let assayOpt = assays |> Array.tryFind (fun a -> a.Identifier = registeredAssay.Identifier)
match assayOpt with
| Some assay ->
if enableLogging then printfn "Found assay: %s - %s" registeredStudy.Identifier registeredAssay.Identifier
registeredAssay.AddTables(assay.Tables)
| None ->
if enableLogging then printfn "Unable to find registered assay '%s' in fullfilled READ contracts!" registeredAssay.Identifier
)
study.Tables
|> Seq.iter (fun table -> registeredStudy.SetTable(table.Name , table))
let registeredStudyOpt = investigation.Studies |> Seq.tryFind (fun s -> s.Identifier = study.Identifier)
match registeredStudyOpt with
| Some registeredStudy ->
registeredStudy.UpdateReferenceByStudyFile(study,true)
| None ->
investigation.AddStudy(study)
)
assays |> Array.iter (fun assay ->
/// Try find registered study in parsed READ contracts
let registeredAssayOpt = investigation.Assays |> Seq.tryFind (fun a -> a.Identifier = assay.Identifier)
match registeredAssayOpt with
| Some registeredAssay -> // This study element is parsed from FsWorkbook and has no regsitered assays, yet
registeredAssay.UpdateReferenceByAssayFile(assay,true)
| None ->
if enableLogging then printfn "Unable to find registered study '%s' in fullfilled READ contracts!" registeredStudy.Identifier
investigation.AddAssay(assay)
let assay = investigation.Assays |> Seq.find (fun a -> a.Identifier = assay.Identifier)
let updatedTables =
assay.StudiesRegisteredIn
|> Array.fold (fun tables study ->
ArcTables.updateReferenceTablesBySheets(ArcTables(study.Tables),tables,false)
) (ArcTables(assay.Tables))
assay.Tables <- updatedTables.Tables
)
this.ISA <- Some investigation

Expand All @@ -212,18 +211,19 @@ type ARC(?isa : ISA.ArcInvestigation, ?cwl : CWL.CWL, ?fs : FileSystem.FileSyste
workbooks.Add (Path.InvestigationFileName, (DTOType.ISA_Investigation, ISA.Spreadsheet.ArcInvestigation.toFsWorkbook inv))
inv.Studies
|> Seq.iter (fun s ->

workbooks.Add (
Identifier.Study.fileNameFromIdentifier s.Identifier,
(DTOType.ISA_Study, ISA.Spreadsheet.ArcStudy.toFsWorkbook s))
s.Assays
|> Seq.iter (fun a ->
let key = Identifier.Assay.fileNameFromIdentifier a.Identifier
if workbooks.ContainsKey key |> not then
workbooks.Add (
key,
(DTOType.ISA_Assay, ISA.Spreadsheet.ArcAssay.toFsWorkbook a))
(DTOType.ISA_Study, ArcStudy.toFsWorkbook s)
)
)
inv.Assays
|> Seq.iter (fun a ->
workbooks.Add (
Identifier.Assay.fileNameFromIdentifier a.Identifier,
(DTOType.ISA_Assay, ISA.Spreadsheet.ArcAssay.toFsWorkbook a))
)

| None ->
workbooks.Add (Path.InvestigationFileName, (DTOType.ISA_Investigation, ISA.Spreadsheet.ArcInvestigation.toFsWorkbook (ArcInvestigation.create(Identifier.MISSING_IDENTIFIER))))
printfn "ARC contains no ISA part."
Expand Down Expand Up @@ -257,7 +257,7 @@ type ARC(?isa : ISA.ArcInvestigation, ?cwl : CWL.CWL, ?fs : FileSystem.FileSyste

let registeredAssays =
registeredStudies
|> Array.map (fun s -> s.Assays.ToArray()) // to-do: s.RegisteredAssays
|> Array.map (fun s -> s.RegisteredAssays.ToArray()) // to-do: s.RegisteredAssays
|> Array.concat

let includeRootFiles : Set<string> =
Expand Down
8 changes: 4 additions & 4 deletions src/ISA/ISA.Json/Study.fs
Original file line number Diff line number Diff line change
Expand Up @@ -113,11 +113,11 @@ module ArcStudy =
GDecode.fromString (decoder (ConverterOptions())) s
|> ArcStudy.fromStudy

let toString (a:ArcStudy) =
encoder (ConverterOptions()) (a.ToStudy())
let toString (a:ArcStudy) (assays: ResizeArray<ArcAssay>) =
encoder (ConverterOptions()) (a.ToStudy(assays))
|> Encode.toString 2

/// exports in json-ld format
let toStringLD (a:ArcStudy) =
encoder (ConverterOptions(SetID=true,IncludeType=true)) (a.ToStudy())
let toStringLD (a:ArcStudy) (assays: ResizeArray<ArcAssay>) =
encoder (ConverterOptions(SetID=true,IncludeType=true)) (a.ToStudy(assays))
|> Encode.toString 2
15 changes: 12 additions & 3 deletions src/ISA/ISA.Spreadsheet/ArcInvestigation.fs
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,8 @@ module ArcInvestigation =
|> InvestigationInfo.ToSparseTable
|> SparseTable.ToRows

let fromParts (investigationInfo:InvestigationInfo) (ontologySourceReference:OntologySourceReference list) (publications: Publication list) (contacts: Person list) (studies: ArcStudy list) (remarks: Remark list) =
let fromParts (investigationInfo:InvestigationInfo) (ontologySourceReference:OntologySourceReference list) (publications: Publication list) (contacts: Person list) (studies: ArcStudy list) (assays: ArcAssay list) (remarks: Remark list) =
let studyIdentifiers = studies |> List.map (fun s -> s.Identifier)
ArcInvestigation.make
(investigationInfo.Identifier)
(Option.fromValueWithDefault "" investigationInfo.Title)
Expand All @@ -108,7 +109,9 @@ module ArcInvestigation =
(Array.ofList ontologySourceReference)
(Array.ofList publications)
(Array.ofList contacts)
(ResizeArray(assays))
(ResizeArray(studies))
(ResizeArray(studyIdentifiers))
(Array.ofList investigationInfo.Comments)
(Array.ofList remarks)

Expand Down Expand Up @@ -145,7 +148,13 @@ module ArcInvestigation =
loop currentLine ontologySourceReferences investigationInfo publications contacts studies (List.append remarks newRemarks) lineNumber

| k ->
fromParts investigationInfo ontologySourceReferences publications contacts (List.rev studies) remarks
let studies,assays =
studies
|> List.unzip
|> fun (s,a) ->
s |> List.rev,
a |> List.concat |> List.distinctBy (fun a -> a.Identifier)
fromParts investigationInfo ontologySourceReferences publications contacts studies assays remarks

if en.MoveNext () then
let currentLine = en.Current |> SparseRow.tryGetValueAt 0
Expand Down Expand Up @@ -187,7 +196,7 @@ module ArcInvestigation =

for study in (List.ofSeq investigation.Studies) do
yield SparseRow.fromValues [studyLabel]
yield! Studies.toRows study
yield! Studies.toRows study None
}
|> insertRemarks (List.ofArray investigation.Remarks)
|> seq
Expand Down
122 changes: 67 additions & 55 deletions src/ISA/ISA.Spreadsheet/ArcStudy.fs
Original file line number Diff line number Diff line change
@@ -1,69 +1,81 @@
module ARCtrl.ISA.Spreadsheet.ArcStudy
namespace ARCtrl.ISA.Spreadsheet

open ARCtrl.ISA
open FsSpreadsheet

let [<Literal>] obsoleteStudiesLabel = "STUDY METADATA"
let [<Literal>] studiesLabel = "STUDY"

let [<Literal>] obsoleteMetaDataSheetName = "Study"
let [<Literal>] metaDataSheetName = "isa_study"
module ArcStudy =

let [<Literal>] obsoleteStudiesLabel = "STUDY METADATA"
let [<Literal>] studiesLabel = "STUDY"

let toMetadataSheet (study : ArcStudy) : FsWorksheet =
let toRows (study:ArcStudy) =
seq {
yield SparseRow.fromValues [studiesLabel]
yield! Studies.StudyInfo.toRows study
}
let sheet = FsWorksheet(metaDataSheetName)
study
|> toRows
|> Seq.iteri (fun rowI r -> SparseRow.writeToSheet (rowI + 1) r sheet)
sheet
let [<Literal>] obsoleteMetaDataSheetName = "Study"
let [<Literal>] metaDataSheetName = "isa_study"

let fromMetadataSheet (sheet : FsWorksheet) : ArcStudy =
let fromRows (rows: seq<SparseRow>) =
let en = rows.GetEnumerator()
en.MoveNext() |> ignore
let _,_,_,study = Studies.fromRows 2 en
study
sheet.Rows
|> Seq.map SparseRow.fromFsRow
|> fromRows
|> Option.defaultValue (ArcStudy.create(Identifier.createMissingIdentifier()))
let toMetadataSheet (study : ArcStudy) (assays : ArcAssay list option) : FsWorksheet =
//let toRows (study:ArcStudy) assays =
// seq {
// yield SparseRow.fromValues [studiesLabel]
// yield! Studies.StudyInfo.toRows study
// }
let sheet = FsWorksheet(metaDataSheetName)
Studies.toRows study assays
|> Seq.append [SparseRow.fromValues [studiesLabel]]
|> Seq.iteri (fun rowI r -> SparseRow.writeToSheet (rowI + 1) r sheet)
sheet

/// Reads an assay from a spreadsheet
let fromFsWorkbook (doc:FsWorkbook) =
// Reading the "Assay" metadata sheet. Here metadata
let studyMetadata =
let fromMetadataSheet (sheet : FsWorksheet) : ArcStudy*ArcAssay list =
let fromRows (rows: seq<SparseRow>) =
let en = rows.GetEnumerator()
en.MoveNext() |> ignore
let _,_,_,study = Studies.fromRows 2 en
study
sheet.Rows
|> Seq.map SparseRow.fromFsRow
|> fromRows
|> Option.defaultValue (ArcStudy.create(Identifier.createMissingIdentifier()),[])

[<AutoOpen>]
module Extensions =

type ArcStudy with

/// Reads an assay from a spreadsheet
static member fromFsWorkbook (doc:FsWorkbook) =
// Reading the "Assay" metadata sheet. Here metadata
let studyMetadata,assays =

match doc.TryGetWorksheetByName metaDataSheetName with
| Option.Some sheet ->
fromMetadataSheet sheet
| None ->
match doc.TryGetWorksheetByName obsoleteMetaDataSheetName with
| Option.Some sheet ->
fromMetadataSheet sheet
| None ->
printfn "Cannot retrieve metadata: Study file does not contain \"%s\" or \"%s\" sheet." metaDataSheetName obsoleteMetaDataSheetName
ArcStudy.create(Identifier.createMissingIdentifier())
match doc.TryGetWorksheetByName ArcStudy.metaDataSheetName with
| Option.Some sheet ->
ArcStudy.fromMetadataSheet sheet
| None ->
match doc.TryGetWorksheetByName ArcStudy.obsoleteMetaDataSheetName with
| Option.Some sheet ->
ArcStudy.fromMetadataSheet sheet
| None ->
printfn "Cannot retrieve metadata: Study file does not contain \"%s\" or \"%s\" sheet." ArcStudy.metaDataSheetName ArcStudy.obsoleteMetaDataSheetName
ArcStudy.create(Identifier.createMissingIdentifier()),[]

let sheets =
doc.GetWorksheets()
|> Seq.choose ArcTable.tryFromFsWorksheet
if sheets |> Seq.isEmpty then
studyMetadata
else
studyMetadata.Tables <- ResizeArray(sheets)
studyMetadata
let sheets =
doc.GetWorksheets()
|> Seq.choose ArcTable.tryFromFsWorksheet
if sheets |> Seq.isEmpty |> not then
let updatedTables =
ArcTables.updateReferenceTablesBySheets(
(ArcTables studyMetadata.Tables),
(ArcTables (ResizeArray sheets)),
keepUnusedRefTables = true
)
studyMetadata.Tables <- updatedTables.Tables
studyMetadata
,assays

let toFsWorkbook (study : ArcStudy) =
let doc = new FsWorkbook()
let metaDataSheet = toMetadataSheet study
doc.AddWorksheet metaDataSheet
static member toFsWorkbook (study : ArcStudy,?assays : ArcAssay list) =
let doc = new FsWorkbook()
let metaDataSheet = ArcStudy.toMetadataSheet study assays
doc.AddWorksheet metaDataSheet

study.Tables
|> Seq.iter (ArcTable.toFsWorksheet >> doc.AddWorksheet)
study.Tables
|> Seq.iter (ArcTable.toFsWorksheet >> doc.AddWorksheet)

doc
doc
12 changes: 6 additions & 6 deletions src/ISA/ISA.Spreadsheet/Metadata/Assays.fs
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,13 @@ module Assays =


let fromString measurementType measurementTypeTermSourceREF measurementTypeTermAccessionNumber technologyType technologyTypeTermSourceREF technologyTypeTermAccessionNumber technologyPlatform fileName comments : ArcAssay =
let measurementType = OntologyAnnotation.fromString(measurementType,?tan = measurementTypeTermAccessionNumber,?tsr = measurementTypeTermSourceREF)
let technologyType = OntologyAnnotation.fromString(technologyType,?tan = technologyTypeTermAccessionNumber,?tsr = technologyTypeTermSourceREF)
let measurementType = OntologyAnnotation.fromString(?termName = measurementType,?tan = measurementTypeTermAccessionNumber,?tsr = measurementTypeTermSourceREF)
let technologyType = OntologyAnnotation.fromString(?termName = technologyType,?tan = technologyTypeTermAccessionNumber,?tsr = technologyTypeTermSourceREF)
ArcAssay.make
(fileName)
(Option.fromValueWithDefault OntologyAnnotation.empty measurementType)
(Option.fromValueWithDefault OntologyAnnotation.empty technologyType)
(Option.fromValueWithDefault "" technologyPlatform |> Option.map ArcAssay.decomposeTechnologyPlatform)
(technologyPlatform |> Option.map ArcAssay.decomposeTechnologyPlatform)
(ResizeArray())
[||]
(comments)
Expand All @@ -50,13 +50,13 @@ module Assays =
|> Array.ofList

fromString
(matrix.TryGetValueDefault("",(measurementTypeLabel,i)))
(matrix.TryGetValue(measurementTypeLabel,i))
(matrix.TryGetValue((measurementTypeTermSourceREFLabel,i)))
(matrix.TryGetValue((measurementTypeTermAccessionNumberLabel,i)))
(matrix.TryGetValueDefault("",(technologyTypeLabel,i)))
(matrix.TryGetValue(technologyTypeLabel,i))
(matrix.TryGetValue((technologyTypeTermSourceREFLabel,i)))
(matrix.TryGetValue((technologyTypeTermAccessionNumberLabel,i)))
(matrix.TryGetValueDefault("",(technologyPlatformLabel,i)))
(matrix.TryGetValue(technologyPlatformLabel,i))
(matrix.TryGetValueDefault(Identifier.createMissingIdentifier(),(fileNameLabel,i)) |> Identifier.Assay.identifierFromFileName)
comments
)
Expand Down
Loading

0 comments on commit b18400f

Please sign in to comment.