From b6e82703d47faba1d3574e7d0777d4c4cb39c6c0 Mon Sep 17 00:00:00 2001 From: Heinrich Lukas Weil Date: Tue, 12 Sep 2023 16:19:25 +0200 Subject: [PATCH] finish up first version of updated arc read in --- src/ARCtrl/ARCtrl.fs | 61 +++++++++++------------ src/ISA/ISA/ArcTypes/ArcTable.fs | 6 +++ src/ISA/ISA/ArcTypes/ArcTableAux.fs | 10 ++++ src/ISA/ISA/ArcTypes/ArcTypes.fs | 70 +++++++++++++++++++-------- tests/ARCtrl/ARCtrl.Tests.fs | 2 +- tests/ISA/ISA.Tests/ArcStudy.Tests.fs | 13 +++-- 6 files changed, 101 insertions(+), 61 deletions(-) diff --git a/src/ARCtrl/ARCtrl.fs b/src/ARCtrl/ARCtrl.fs index 1d47f94f..66e94591 100644 --- a/src/ARCtrl/ARCtrl.fs +++ b/src/ARCtrl/ARCtrl.fs @@ -31,12 +31,7 @@ module ARCAux = let (studyNames,assayNames) = match isa with | Some inv -> - inv.Studies - |> Seq.fold (fun (studyNames,assayNames) s -> - Array.append studyNames [|s.Identifier|], - Array.append assayNames (s.Assays |> Seq.map (fun a -> a.Identifier) |> Array.ofSeq) - - ) ([||],[||]) + inv.StudyIdentifiers |> Seq.toArray, inv.AssayIdentifiers |> Seq.toArray | None -> ([||],[||]) let assays = FileSystemTree.createAssaysFolder (assayNames |> Array.map FileSystemTree.createAssayFolder) let studies = FileSystemTree.createStudiesFolder (studyNames |> Array.map FileSystemTree.createStudyFolder) @@ -166,32 +161,29 @@ type ARC(?isa : ISA.ArcInvestigation, ?cwl : CWL.CWL, ?fs : FileSystem.FileSyste /// get investigation from xlsx let investigation = ARCAux.getArcInvestigationFromContracts contracts /// get studies from xlsx - let studies = ARCAux.getArcStudiesFromContracts contracts + let studies = ARCAux.getArcStudiesFromContracts contracts |> Array.map fst /// get assays from xlsx let assays = ARCAux.getArcAssaysFromContracts contracts - investigation.Studies |> Seq.iter (fun registeredStudy -> + studies |> Seq.iter (fun study -> /// Try find registered study in parsed READ contracts - let studyOpt = studies |> Array.tryFind (fun s -> s.Identifier = registeredStudy.Identifier) - match studyOpt with - | Some study -> // This study element is parsed from FsWorkbook and has no regsitered assays, yet - - if enableLogging then printfn "Found study: %s" registeredStudy.Identifier - registeredStudy.Assays |> Seq.iter (fun registeredAssay -> - /// Try find registered assay in parsed READ contracts - let assayOpt = assays |> Array.tryFind (fun a -> a.Identifier = registeredAssay.Identifier) - match assayOpt with - | Some assay -> - if enableLogging then printfn "Found assay: %s - %s" registeredStudy.Identifier registeredAssay.Identifier - registeredAssay.AddTables(assay.Tables) - | None -> - if enableLogging then printfn "Unable to find registered assay '%s' in fullfilled READ contracts!" registeredAssay.Identifier - ) - study.Tables - |> Seq.iter (fun table -> registeredStudy.SetTable(table.Name , table)) + let registeredStudyOpt = investigation.Studies |> Seq.tryFind (fun s -> s.Identifier = study.Identifier) + match registeredStudyOpt with + | Some registeredStudy -> // This study element is parsed from FsWorkbook and has no regsitered assays, yet + registeredStudy.UpdateReferenceByStudyFile(study,true) + | None -> + investigation.AddRegisteredStudy(study) + ) + assays |> Seq.iter (fun assay -> + /// Try find registered study in parsed READ contracts + let registeredAssayOpt = investigation.Assays |> Seq.tryFind (fun a -> a.Identifier = assay.Identifier) + match registeredAssayOpt with + | Some registeredAssay -> // This study element is parsed from FsWorkbook and has no regsitered assays, yet + registeredAssay.UpdateReferenceByAssayFile(assay,true) | None -> - if enableLogging then printfn "Unable to find registered study '%s' in fullfilled READ contracts!" registeredStudy.Identifier + investigation.AddAssay(assay) ) + this.ISA <- Some investigation member this.UpdateFileSystem() = @@ -213,18 +205,19 @@ type ARC(?isa : ISA.ArcInvestigation, ?cwl : CWL.CWL, ?fs : FileSystem.FileSyste workbooks.Add (Path.InvestigationFileName, (DTOType.ISA_Investigation, ISA.Spreadsheet.ArcInvestigation.toFsWorkbook inv)) inv.Studies |> Seq.iter (fun s -> + workbooks.Add ( Identifier.Study.fileNameFromIdentifier s.Identifier, - (DTOType.ISA_Study, ISA.Spreadsheet.ArcStudy.toFsWorkbook s)) - s.Assays - |> Seq.iter (fun a -> - let key = Identifier.Assay.fileNameFromIdentifier a.Identifier - if workbooks.ContainsKey key |> not then - workbooks.Add ( - key, - (DTOType.ISA_Assay, ISA.Spreadsheet.ArcAssay.toFsWorkbook a)) + (DTOType.ISA_Study, ArcStudy.toFsWorkbook s) ) ) + inv.Assays + |> Seq.iter (fun a -> + workbooks.Add ( + Identifier.Assay.fileNameFromIdentifier a.Identifier, + (DTOType.ISA_Assay, ISA.Spreadsheet.ArcAssay.toFsWorkbook a)) + ) + | None -> workbooks.Add (Path.InvestigationFileName, (DTOType.ISA_Investigation, ISA.Spreadsheet.ArcInvestigation.toFsWorkbook (ArcInvestigation.create(Identifier.MISSING_IDENTIFIER)))) printfn "ARC contains no ISA part." diff --git a/src/ISA/ISA/ArcTypes/ArcTable.fs b/src/ISA/ISA/ArcTypes/ArcTable.fs index 4e970871..dc53f9f4 100644 --- a/src/ISA/ISA/ArcTypes/ArcTable.fs +++ b/src/ISA/ISA/ArcTypes/ArcTable.fs @@ -535,6 +535,12 @@ type ArcTable = |> fun rows -> ProcessParsing.alignByHeaders rows |> fun (headers, rows) -> ArcTable.create(name,headers,rows) + /// This method is meant to update an ArcTable stored as a protocol in a study or investigation file with the information from an ArcTable actually stored as an annotation table + member this.UpdateReferenceByAnnotationTable(table:ArcTable) = + ArcTableAux.Unchecked.extendToRowCount table.RowCount this.Headers this.Values + for c in table.Columns do + this.AddColumn(c.Header, cells = c.Cells,forceReplace = true) + /// Pretty printer override this.ToString() = [ diff --git a/src/ISA/ISA/ArcTypes/ArcTableAux.fs b/src/ISA/ISA/ArcTypes/ArcTableAux.fs index 30f1e607..58e749db 100644 --- a/src/ISA/ISA/ArcTypes/ArcTableAux.fs +++ b/src/ISA/ISA/ArcTypes/ArcTableAux.fs @@ -275,6 +275,16 @@ module Unchecked = for missingColumn,missingRow in missingKeys do setCellAt (missingColumn,missingRow,empty) values + /// Increases the table size to the given new row count and fills the new rows with the last value of the column + let extendToRowCount rowCount (headers: ResizeArray) (values:Dictionary) = + let columnCount = getColumnCount headers + let previousRowCount = getRowCount values + // iterate over columns + for columnIndex = 0 to columnCount - 1 do + let lastValue = values[columnIndex,previousRowCount-1] + for rowIndex = previousRowCount - 1 to rowCount - 1 do + setCellAt (columnIndex,rowIndex,lastValue) values + let addRow (index:int) (newCells:CompositeCell []) (headers: ResizeArray) (values:Dictionary) = /// Store start rowCount here, so it does not get changed midway through let rowCount = getRowCount values diff --git a/src/ISA/ISA/ArcTypes/ArcTypes.fs b/src/ISA/ISA/ArcTypes/ArcTypes.fs index e746fc55..07395580 100644 --- a/src/ISA/ISA/ArcTypes/ArcTypes.fs +++ b/src/ISA/ISA/ArcTypes/ArcTypes.fs @@ -530,6 +530,23 @@ type ArcAssay(identifier: string, ?measurementType : OntologyAnnotation, ?techno member internal this.RemoveFromInvestigation () = this.Investigation <- None + /// Updates given assay stored in an study or investigation file with values from an assay file. + member this.UpdateReferenceByAssayFile(assay:ArcAssay,?onlyReplaceExisting : bool) = + let onlyReplaceExisting = defaultArg onlyReplaceExisting false + let updateAlways = onlyReplaceExisting |> not + if assay.MeasurementType.IsSome || updateAlways then + this.MeasurementType <- assay.MeasurementType + if assay.TechnologyPlatform.IsSome || updateAlways then + this.TechnologyPlatform <- assay.TechnologyPlatform + if assay.TechnologyType.IsSome || updateAlways then + this.TechnologyType <- assay.TechnologyType + if assay.Tables.Count <> 0 || updateAlways then + this.Tables <- assay.Tables + if assay.Comments.Length <> 0 || updateAlways then + this.Comments <- assay.Comments + if assay.Performers.Length <> 0 || updateAlways then + this.Performers <- assay.Performers + /// Copies ArcAssay object without the pointer to the parent ArcInvestigation /// /// In order to copy the pointer to the parent ArcInvestigation as well, use the Copy() method of the ArcInvestigation instead. @@ -1055,7 +1072,7 @@ type ArcStudy(identifier : string, ?title, ?description, ?submissionDate, ?publi this.Investigation <- None - /// Copies ArcStudy objec without the pointer to the parent ArcInvestigation + /// Copies ArcStudy object without the pointer to the parent ArcInvestigation /// /// This copy does only contain the identifiers of the registered ArcAssays and not the actual objects. /// @@ -1087,14 +1104,12 @@ type ArcStudy(identifier : string, ?title, ?description, ?submissionDate, ?publi ) /// - /// Updates given study with another study, Identifier will never be updated. By default update is full replace. Optional Parameters can be used to specify update logic. + /// Updates given study from an investigation file against a study from a study file. Identifier will never be updated. /// /// The study used for updating this study. /// If true, this will only update fields which are `Some` or non-empty lists. Default: **false** - /// If true, this will append lists instead of replacing. Will return only distinct elements. Default: **false** - member this.UpdateBy(study:ArcStudy,?onlyReplaceExisting : bool,?appendSequences : bool) = + member this.UpdateReferenceByStudyFile(study:ArcStudy,?onlyReplaceExisting : bool) = let onlyReplaceExisting = defaultArg onlyReplaceExisting false - let appendSequences = defaultArg appendSequences false let updateAlways = onlyReplaceExisting |> not if study.Title.IsSome || updateAlways then this.Title <- study.Title @@ -1105,26 +1120,31 @@ type ArcStudy(identifier : string, ?title, ?description, ?submissionDate, ?publi if study.PublicReleaseDate.IsSome || updateAlways then this.PublicReleaseDate <- study.PublicReleaseDate if study.Publications.Length <> 0 || updateAlways then - let s = ArcTypesAux.updateAppendArray appendSequences this.Publications study.Publications - this.Publications <- s + this.Publications <- study.Publications if study.Contacts.Length <> 0 || updateAlways then - let s = ArcTypesAux.updateAppendArray appendSequences this.Contacts study.Contacts - this.Contacts <- s + this.Contacts <- study.Contacts if study.StudyDesignDescriptors.Length <> 0 || updateAlways then - let s = ArcTypesAux.updateAppendArray appendSequences this.StudyDesignDescriptors study.StudyDesignDescriptors - this.StudyDesignDescriptors <- s + this.StudyDesignDescriptors <- study.StudyDesignDescriptors if study.Tables.Count <> 0 || updateAlways then - let s = ArcTypesAux.updateAppendResizeArray appendSequences this.Tables study.Tables + let s = + study.Tables + |> Seq.append this.Tables + |> Seq.groupBy (fun t -> t.Name) + |> Seq.map (fun (_,ts) -> + if Seq.length ts = 2 then + (Seq.item 0 ts).UpdateReferenceByAnnotationTable (Seq.item 1 ts) + Seq.head ts + else + Seq.head ts + ) + |> ResizeArray this.Tables <- s if study.RegisteredAssayIdentifiers.Count <> 0 || updateAlways then - let s = ArcTypesAux.updateAppendResizeArray appendSequences this.RegisteredAssayIdentifiers study.RegisteredAssayIdentifiers - this.RegisteredAssayIdentifiers <- s - if study.Factors.Length <> 0 || updateAlways then - let s = ArcTypesAux.updateAppendArray appendSequences this.Factors study.Factors - this.Factors <- s + this.RegisteredAssayIdentifiers <- study.RegisteredAssayIdentifiers + if study.Factors.Length <> 0 || updateAlways then + this.Factors <- study.Factors if study.Comments.Length <> 0 || updateAlways then - let s = ArcTypesAux.updateAppendArray appendSequences this.Comments study.Comments - this.Comments <- s + this.Comments <- study.Comments /// /// Creates an ISA-Json compatible Study from ArcStudy. @@ -1399,6 +1419,18 @@ type ArcInvestigation(identifier : string, ?title : string, ?description : strin copy.RegisterStudy(studyIdentifier) copy + // - Study API - CRUD // + member this.AddRegisteredStudy (study: ArcStudy) = + this.AddStudy study + this.RegisterStudy(study.Identifier) + + static member addRegisteredStudy(study: ArcStudy) = + fun (inv: ArcInvestigation) -> + let copy = inv.Copy() + let study = study.Copy() + copy.AddRegisteredStudy(study) + copy + // - Study API - CRUD // member this.RemoveStudyAt(index: int) = this.Studies.RemoveAt(index) diff --git a/tests/ARCtrl/ARCtrl.Tests.fs b/tests/ARCtrl/ARCtrl.Tests.fs index 74199296..56046244 100644 --- a/tests/ARCtrl/ARCtrl.Tests.fs +++ b/tests/ARCtrl/ARCtrl.Tests.fs @@ -168,7 +168,7 @@ let private test_writeContracts = testList "write_contracts" [ let inv = ArcInvestigation("MyInvestigation", "BestTitle") let assay = ArcAssay("MyAssay") inv.InitStudy("Study1").AddRegisteredAssay(assay) |> ignore - inv.InitStudy("Study2").AddRegisteredAssay(assay) |> ignore + inv.InitStudy("Study2").RegisterAssay(assay.Identifier) |> ignore let arc = ARC(isa = inv) let contracts = arc.GetWriteContracts() let contractPathsString = contracts |> Array.map (fun c -> c.Path) |> String.concat ", " diff --git a/tests/ISA/ISA.Tests/ArcStudy.Tests.fs b/tests/ISA/ISA.Tests/ArcStudy.Tests.fs index 1b9586f0..f9068e02 100644 --- a/tests/ISA/ISA.Tests/ArcStudy.Tests.fs +++ b/tests/ISA/ISA.Tests/ArcStudy.Tests.fs @@ -314,7 +314,7 @@ let tests_copy = () ] -let tests_UpdateBy = testList "UpdateBy" [ +let tests_UpdateBy = testList "UpdateReferenceByStudyFile" [ let createFullStudy() = let identifier = "MyIdentifier" let title = "Study Title" @@ -329,7 +329,6 @@ let tests_UpdateBy = testList "UpdateBy" [ let assay_identifiers = getAssayIdentifiers assays let factors = [|Factor.create("Factor 1")|] let comments = [|Comment.create("Comment 1")|] - ArcStudy( identifier = identifier, title = title, @@ -361,7 +360,7 @@ let tests_UpdateBy = testList "UpdateBy" [ factors = [||], comments = [||] ) - actual.UpdateBy(next) + actual.UpdateReferenceByStudyFile(next) Expect.notEqual actual next "not equal" Expect.notEqual actual.Identifier next.Identifier "Identifier" Expect.equal actual.Title next.Title "Title" @@ -381,7 +380,7 @@ let tests_UpdateBy = testList "UpdateBy" [ let actual = createFullStudy() let next = ArcStudy.init("NextIdentifier") let expected = createFullStudy() - actual.UpdateBy(next, true) + actual.UpdateReferenceByStudyFile(next, true) Expect.notEqual actual next "not equal" Expect.notEqual actual.Identifier next.Identifier "Identifier" Expect.equal actual.Title expected.Title "Title" @@ -412,7 +411,7 @@ let tests_UpdateBy = testList "UpdateBy" [ factors = [|Factor.create(Name="NextFactor")|], comments = [|Comment.create(Name="NextCommentName", Value="NextCommentValue")|] ) - actual.UpdateBy(next, true) + actual.UpdateReferenceByStudyFile(next, true) Expect.notEqual actual next "not equal" Expect.notEqual actual.Identifier next.Identifier "Identifier" Expect.equal actual.Title next.Title "Title" @@ -441,7 +440,7 @@ let tests_UpdateBy = testList "UpdateBy" [ tables = ResizeArray([ArcTable.init("NextTable")]) ) let original = createFullStudy() - actual.UpdateBy(next, true, true) + actual.UpdateReferenceByStudyFile(next, true) Expect.notEqual actual next "not equal" Expect.notEqual actual.Identifier next.Identifier "Identifier" Expect.equal actual.Title next.Title "Title" @@ -470,7 +469,7 @@ let tests_UpdateBy = testList "UpdateBy" [ tables = ResizeArray([ArcTable.init("NextTable")]) ) let original = createFullStudy() - actual.UpdateBy(next, false, true) + actual.UpdateReferenceByStudyFile(next, false) Expect.notEqual actual next "not equal" Expect.notEqual actual.Identifier next.Identifier "Identifier" Expect.equal actual.Title next.Title "Title"