Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ArcInvestigation level assays #184

Merged
merged 17 commits into from
Sep 18, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 36 additions & 36 deletions src/ARCtrl/ARCtrl.fs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
open ARCtrl.FileSystem
open ARCtrl.Contract
open ARCtrl.ISA
open ARCtrl.ISA.Spreadsheet
open FsSpreadsheet
open Fable.Core

Expand All @@ -18,7 +19,7 @@ module ARCAux =
let getArcStudiesFromContracts (contracts: Contract []) =
contracts
|> Array.choose Contract.ArcStudy.tryFromContract
|> Array.map (fun x -> x :?> FsWorkbook |> ISA.Spreadsheet.ArcStudy.fromFsWorkbook)
|> Array.map (fun x -> x :?> FsWorkbook |> ArcStudy.fromFsWorkbook)

let getArcInvestigationFromContracts (contracts: Contract []) =
contracts
Expand All @@ -30,12 +31,7 @@ module ARCAux =
let (studyNames,assayNames) =
match isa with
| Some inv ->
inv.Studies
|> Seq.fold (fun (studyNames,assayNames) s ->
Array.append studyNames [|s.Identifier|],
Array.append assayNames (s.Assays |> Seq.map (fun a -> a.Identifier) |> Array.ofSeq)

) ([||],[||])
inv.StudyIdentifiers |> Seq.toArray, inv.AssayIdentifiers |> Seq.toArray
| None -> ([||],[||])
let assays = FileSystemTree.createAssaysFolder (assayNames |> Array.map FileSystemTree.createAssayFolder)
let studies = FileSystemTree.createStudiesFolder (studyNames |> Array.map FileSystemTree.createStudyFolder)
Expand Down Expand Up @@ -165,31 +161,34 @@ type ARC(?isa : ISA.ArcInvestigation, ?cwl : CWL.CWL, ?fs : FileSystem.FileSyste
/// get investigation from xlsx
let investigation = ARCAux.getArcInvestigationFromContracts contracts
/// get studies from xlsx
let studies = ARCAux.getArcStudiesFromContracts contracts
let studies = ARCAux.getArcStudiesFromContracts contracts |> Array.map fst
/// get assays from xlsx
let assays = ARCAux.getArcAssaysFromContracts contracts

investigation.Studies |> Seq.iter (fun registeredStudy ->
studies |> Array.iter (fun study ->
/// Try find registered study in parsed READ contracts
let studyOpt = studies |> Array.tryFind (fun s -> s.Identifier = registeredStudy.Identifier)
match studyOpt with
| Some study -> // This study element is parsed from FsWorkbook and has no regsitered assays, yet

if enableLogging then printfn "Found study: %s" registeredStudy.Identifier
registeredStudy.Assays |> Seq.iter (fun registeredAssay ->
/// Try find registered assay in parsed READ contracts
let assayOpt = assays |> Array.tryFind (fun a -> a.Identifier = registeredAssay.Identifier)
match assayOpt with
| Some assay ->
if enableLogging then printfn "Found assay: %s - %s" registeredStudy.Identifier registeredAssay.Identifier
registeredAssay.AddTables(assay.Tables)
| None ->
if enableLogging then printfn "Unable to find registered assay '%s' in fullfilled READ contracts!" registeredAssay.Identifier
)
study.Tables
|> Seq.iter (fun table -> registeredStudy.SetTable(table.Name , table))
let registeredStudyOpt = investigation.Studies |> Seq.tryFind (fun s -> s.Identifier = study.Identifier)
match registeredStudyOpt with
| Some registeredStudy ->
registeredStudy.UpdateReferenceByStudyFile(study,true)
| None ->
investigation.AddStudy(study)
)
assays |> Array.iter (fun assay ->
/// Try find registered study in parsed READ contracts
let registeredAssayOpt = investigation.Assays |> Seq.tryFind (fun a -> a.Identifier = assay.Identifier)
match registeredAssayOpt with
| Some registeredAssay -> // This study element is parsed from FsWorkbook and has no regsitered assays, yet
registeredAssay.UpdateReferenceByAssayFile(assay,true)
| None ->
if enableLogging then printfn "Unable to find registered study '%s' in fullfilled READ contracts!" registeredStudy.Identifier
investigation.AddAssay(assay)
let assay = investigation.Assays |> Seq.find (fun a -> a.Identifier = assay.Identifier)
let updatedTables =
assay.StudiesRegisteredIn
|> Array.fold (fun tables study ->
ArcTables.updateReferenceTablesBySheets(ArcTables(study.Tables),tables,false)
) (ArcTables(assay.Tables))
assay.Tables <- updatedTables.Tables
)
this.ISA <- Some investigation

Expand All @@ -212,18 +211,19 @@ type ARC(?isa : ISA.ArcInvestigation, ?cwl : CWL.CWL, ?fs : FileSystem.FileSyste
workbooks.Add (Path.InvestigationFileName, (DTOType.ISA_Investigation, ISA.Spreadsheet.ArcInvestigation.toFsWorkbook inv))
inv.Studies
|> Seq.iter (fun s ->

workbooks.Add (
Identifier.Study.fileNameFromIdentifier s.Identifier,
(DTOType.ISA_Study, ISA.Spreadsheet.ArcStudy.toFsWorkbook s))
s.Assays
|> Seq.iter (fun a ->
let key = Identifier.Assay.fileNameFromIdentifier a.Identifier
if workbooks.ContainsKey key |> not then
workbooks.Add (
key,
(DTOType.ISA_Assay, ISA.Spreadsheet.ArcAssay.toFsWorkbook a))
(DTOType.ISA_Study, ArcStudy.toFsWorkbook s)
)
)
inv.Assays
|> Seq.iter (fun a ->
workbooks.Add (
Identifier.Assay.fileNameFromIdentifier a.Identifier,
(DTOType.ISA_Assay, ISA.Spreadsheet.ArcAssay.toFsWorkbook a))
)

| None ->
workbooks.Add (Path.InvestigationFileName, (DTOType.ISA_Investigation, ISA.Spreadsheet.ArcInvestigation.toFsWorkbook (ArcInvestigation.create(Identifier.MISSING_IDENTIFIER))))
printfn "ARC contains no ISA part."
Expand Down Expand Up @@ -257,7 +257,7 @@ type ARC(?isa : ISA.ArcInvestigation, ?cwl : CWL.CWL, ?fs : FileSystem.FileSyste

let registeredAssays =
registeredStudies
|> Array.map (fun s -> s.Assays.ToArray()) // to-do: s.RegisteredAssays
|> Array.map (fun s -> s.RegisteredAssays.ToArray()) // to-do: s.RegisteredAssays
|> Array.concat

let includeRootFiles : Set<string> =
Expand Down
8 changes: 4 additions & 4 deletions src/ISA/ISA.Json/Study.fs
Original file line number Diff line number Diff line change
Expand Up @@ -113,11 +113,11 @@ module ArcStudy =
GDecode.fromString (decoder (ConverterOptions())) s
|> ArcStudy.fromStudy

let toString (a:ArcStudy) =
encoder (ConverterOptions()) (a.ToStudy())
let toString (a:ArcStudy) (assays: ResizeArray<ArcAssay>) =
encoder (ConverterOptions()) (a.ToStudy(assays))
|> Encode.toString 2

/// exports in json-ld format
let toStringLD (a:ArcStudy) =
encoder (ConverterOptions(SetID=true,IncludeType=true)) (a.ToStudy())
let toStringLD (a:ArcStudy) (assays: ResizeArray<ArcAssay>) =
encoder (ConverterOptions(SetID=true,IncludeType=true)) (a.ToStudy(assays))
|> Encode.toString 2
15 changes: 12 additions & 3 deletions src/ISA/ISA.Spreadsheet/ArcInvestigation.fs
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,8 @@ module ArcInvestigation =
|> InvestigationInfo.ToSparseTable
|> SparseTable.ToRows

let fromParts (investigationInfo:InvestigationInfo) (ontologySourceReference:OntologySourceReference list) (publications: Publication list) (contacts: Person list) (studies: ArcStudy list) (remarks: Remark list) =
let fromParts (investigationInfo:InvestigationInfo) (ontologySourceReference:OntologySourceReference list) (publications: Publication list) (contacts: Person list) (studies: ArcStudy list) (assays: ArcAssay list) (remarks: Remark list) =
let studyIdentifiers = studies |> List.map (fun s -> s.Identifier)
ArcInvestigation.make
(investigationInfo.Identifier)
(Option.fromValueWithDefault "" investigationInfo.Title)
Expand All @@ -108,7 +109,9 @@ module ArcInvestigation =
(Array.ofList ontologySourceReference)
(Array.ofList publications)
(Array.ofList contacts)
(ResizeArray(assays))
(ResizeArray(studies))
(ResizeArray(studyIdentifiers))
(Array.ofList investigationInfo.Comments)
(Array.ofList remarks)

Expand Down Expand Up @@ -145,7 +148,13 @@ module ArcInvestigation =
loop currentLine ontologySourceReferences investigationInfo publications contacts studies (List.append remarks newRemarks) lineNumber

| k ->
fromParts investigationInfo ontologySourceReferences publications contacts (List.rev studies) remarks
let studies,assays =
studies
|> List.unzip
|> fun (s,a) ->
s |> List.rev,
a |> List.concat |> List.distinctBy (fun a -> a.Identifier)
fromParts investigationInfo ontologySourceReferences publications contacts studies assays remarks

if en.MoveNext () then
let currentLine = en.Current |> SparseRow.tryGetValueAt 0
Expand Down Expand Up @@ -187,7 +196,7 @@ module ArcInvestigation =

for study in (List.ofSeq investigation.Studies) do
yield SparseRow.fromValues [studyLabel]
yield! Studies.toRows study
yield! Studies.toRows study None
}
|> insertRemarks (List.ofArray investigation.Remarks)
|> seq
Expand Down
122 changes: 67 additions & 55 deletions src/ISA/ISA.Spreadsheet/ArcStudy.fs
Original file line number Diff line number Diff line change
@@ -1,69 +1,81 @@
module ARCtrl.ISA.Spreadsheet.ArcStudy
namespace ARCtrl.ISA.Spreadsheet

open ARCtrl.ISA
open FsSpreadsheet

let [<Literal>] obsoleteStudiesLabel = "STUDY METADATA"
let [<Literal>] studiesLabel = "STUDY"

let [<Literal>] obsoleteMetaDataSheetName = "Study"
let [<Literal>] metaDataSheetName = "isa_study"
module ArcStudy =

let [<Literal>] obsoleteStudiesLabel = "STUDY METADATA"
let [<Literal>] studiesLabel = "STUDY"

let toMetadataSheet (study : ArcStudy) : FsWorksheet =
let toRows (study:ArcStudy) =
seq {
yield SparseRow.fromValues [studiesLabel]
yield! Studies.StudyInfo.toRows study
}
let sheet = FsWorksheet(metaDataSheetName)
study
|> toRows
|> Seq.iteri (fun rowI r -> SparseRow.writeToSheet (rowI + 1) r sheet)
sheet
let [<Literal>] obsoleteMetaDataSheetName = "Study"
let [<Literal>] metaDataSheetName = "isa_study"

let fromMetadataSheet (sheet : FsWorksheet) : ArcStudy =
let fromRows (rows: seq<SparseRow>) =
let en = rows.GetEnumerator()
en.MoveNext() |> ignore
let _,_,_,study = Studies.fromRows 2 en
study
sheet.Rows
|> Seq.map SparseRow.fromFsRow
|> fromRows
|> Option.defaultValue (ArcStudy.create(Identifier.createMissingIdentifier()))
let toMetadataSheet (study : ArcStudy) (assays : ArcAssay list option) : FsWorksheet =
//let toRows (study:ArcStudy) assays =
// seq {
// yield SparseRow.fromValues [studiesLabel]
// yield! Studies.StudyInfo.toRows study
// }
let sheet = FsWorksheet(metaDataSheetName)
Studies.toRows study assays
|> Seq.append [SparseRow.fromValues [studiesLabel]]
|> Seq.iteri (fun rowI r -> SparseRow.writeToSheet (rowI + 1) r sheet)
sheet

/// Reads an assay from a spreadsheet
let fromFsWorkbook (doc:FsWorkbook) =
// Reading the "Assay" metadata sheet. Here metadata
let studyMetadata =
let fromMetadataSheet (sheet : FsWorksheet) : ArcStudy*ArcAssay list =
let fromRows (rows: seq<SparseRow>) =
let en = rows.GetEnumerator()
en.MoveNext() |> ignore
let _,_,_,study = Studies.fromRows 2 en
study
sheet.Rows
|> Seq.map SparseRow.fromFsRow
|> fromRows
|> Option.defaultValue (ArcStudy.create(Identifier.createMissingIdentifier()),[])

[<AutoOpen>]
module Extensions =

type ArcStudy with

/// Reads an assay from a spreadsheet
static member fromFsWorkbook (doc:FsWorkbook) =
// Reading the "Assay" metadata sheet. Here metadata
let studyMetadata,assays =

match doc.TryGetWorksheetByName metaDataSheetName with
| Option.Some sheet ->
fromMetadataSheet sheet
| None ->
match doc.TryGetWorksheetByName obsoleteMetaDataSheetName with
| Option.Some sheet ->
fromMetadataSheet sheet
| None ->
printfn "Cannot retrieve metadata: Study file does not contain \"%s\" or \"%s\" sheet." metaDataSheetName obsoleteMetaDataSheetName
ArcStudy.create(Identifier.createMissingIdentifier())
match doc.TryGetWorksheetByName ArcStudy.metaDataSheetName with
| Option.Some sheet ->
ArcStudy.fromMetadataSheet sheet
| None ->
match doc.TryGetWorksheetByName ArcStudy.obsoleteMetaDataSheetName with
| Option.Some sheet ->
ArcStudy.fromMetadataSheet sheet
| None ->
printfn "Cannot retrieve metadata: Study file does not contain \"%s\" or \"%s\" sheet." ArcStudy.metaDataSheetName ArcStudy.obsoleteMetaDataSheetName
ArcStudy.create(Identifier.createMissingIdentifier()),[]

let sheets =
doc.GetWorksheets()
|> Seq.choose ArcTable.tryFromFsWorksheet
if sheets |> Seq.isEmpty then
studyMetadata
else
studyMetadata.Tables <- ResizeArray(sheets)
studyMetadata
let sheets =
doc.GetWorksheets()
|> Seq.choose ArcTable.tryFromFsWorksheet
if sheets |> Seq.isEmpty |> not then
let updatedTables =
ArcTables.updateReferenceTablesBySheets(
(ArcTables studyMetadata.Tables),
(ArcTables (ResizeArray sheets)),
keepUnusedRefTables = true
)
studyMetadata.Tables <- updatedTables.Tables
studyMetadata
,assays

let toFsWorkbook (study : ArcStudy) =
let doc = new FsWorkbook()
let metaDataSheet = toMetadataSheet study
doc.AddWorksheet metaDataSheet
static member toFsWorkbook (study : ArcStudy,?assays : ArcAssay list) =
let doc = new FsWorkbook()
let metaDataSheet = ArcStudy.toMetadataSheet study assays
doc.AddWorksheet metaDataSheet

study.Tables
|> Seq.iter (ArcTable.toFsWorksheet >> doc.AddWorksheet)
study.Tables
|> Seq.iter (ArcTable.toFsWorksheet >> doc.AddWorksheet)

doc
doc
12 changes: 6 additions & 6 deletions src/ISA/ISA.Spreadsheet/Metadata/Assays.fs
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,13 @@ module Assays =


let fromString measurementType measurementTypeTermSourceREF measurementTypeTermAccessionNumber technologyType technologyTypeTermSourceREF technologyTypeTermAccessionNumber technologyPlatform fileName comments : ArcAssay =
let measurementType = OntologyAnnotation.fromString(measurementType,?tan = measurementTypeTermAccessionNumber,?tsr = measurementTypeTermSourceREF)
let technologyType = OntologyAnnotation.fromString(technologyType,?tan = technologyTypeTermAccessionNumber,?tsr = technologyTypeTermSourceREF)
let measurementType = OntologyAnnotation.fromString(?termName = measurementType,?tan = measurementTypeTermAccessionNumber,?tsr = measurementTypeTermSourceREF)
let technologyType = OntologyAnnotation.fromString(?termName = technologyType,?tan = technologyTypeTermAccessionNumber,?tsr = technologyTypeTermSourceREF)
ArcAssay.make
(fileName)
(Option.fromValueWithDefault OntologyAnnotation.empty measurementType)
(Option.fromValueWithDefault OntologyAnnotation.empty technologyType)
(Option.fromValueWithDefault "" technologyPlatform |> Option.map ArcAssay.decomposeTechnologyPlatform)
(technologyPlatform |> Option.map ArcAssay.decomposeTechnologyPlatform)
(ResizeArray())
[||]
(comments)
Expand All @@ -50,13 +50,13 @@ module Assays =
|> Array.ofList

fromString
(matrix.TryGetValueDefault("",(measurementTypeLabel,i)))
(matrix.TryGetValue(measurementTypeLabel,i))
(matrix.TryGetValue((measurementTypeTermSourceREFLabel,i)))
(matrix.TryGetValue((measurementTypeTermAccessionNumberLabel,i)))
(matrix.TryGetValueDefault("",(technologyTypeLabel,i)))
(matrix.TryGetValue(technologyTypeLabel,i))
(matrix.TryGetValue((technologyTypeTermSourceREFLabel,i)))
(matrix.TryGetValue((technologyTypeTermAccessionNumberLabel,i)))
(matrix.TryGetValueDefault("",(technologyPlatformLabel,i)))
(matrix.TryGetValue(technologyPlatformLabel,i))
(matrix.TryGetValueDefault(Identifier.createMissingIdentifier(),(fileNameLabel,i)) |> Identifier.Assay.identifierFromFileName)
comments
)
Expand Down
Loading