From 095114dab4404ee2e8d4f2dc81e8220f474a10f2 Mon Sep 17 00:00:00 2001 From: Kevin Schneider Date: Tue, 12 Sep 2023 14:50:53 +0200 Subject: [PATCH] implement #187 -Add `GetRegisteredPayload` and `GetAdditionalPayload` - Add tests --- src/ARCtrl/ARCtrl.fs | 97 +++++++++++++++++++++++ tests/ARCtrl/ARCtrl.Tests.fs | 149 +++++++++++++++++++++++++++++++++++ 2 files changed, 246 insertions(+) diff --git a/src/ARCtrl/ARCtrl.fs b/src/ARCtrl/ARCtrl.fs index 3b799d60..37dc2230 100644 --- a/src/ARCtrl/ARCtrl.fs +++ b/src/ARCtrl/ARCtrl.fs @@ -242,7 +242,104 @@ type ARC(?isa : ISA.ArcInvestigation, ?cwl : CWL.CWL, ?fs : FileSystem.FileSyste let fsCopy = _fs.Copy() new ARC(?isa = isaCopy, ?cwl = _cwl, fs = fsCopy) + /// + /// Returns the FileSystemTree of the ARC with only the registered files and folders included. + /// + /// Wether or not to ignore hidden files and folders starting with '.'. If true, no hidden files are included in the result. (default: true) + member this.GetRegisteredPayload(?IgnoreHidden:bool) = + + let isaCopy = _isa |> Option.map (fun i -> i.Copy()) // not sure if needed, but let's be safe + + let registeredStudies = + isaCopy + |> Option.map (fun isa -> isa.Studies.ToArray()) // to-do: isa.RegisteredStudies + |> Option.defaultValue [||] + + let registeredAssays = + registeredStudies + |> Array.map (fun s -> s.Assays.ToArray()) // to-do: s.RegisteredAssays + |> Array.concat + + let includeRootFiles : Set = + set [ + "isa.investigation.xlsx" + "README.md" + ] + + let includeStudyFiles = + registeredStudies + |> Array.map (fun s -> + let studyFoldername = $"studies/{s.Identifier}" + + set [ + yield $"{studyFoldername}/isa.study.xlsx" + yield $"{studyFoldername}/README.md" + + //just allow any constructed path from cell values. there may be occasions where this includes wrong files, but its good enough for now. + for (kv) in s.Tables[0].Values do + yield kv.Value.AsFreeText // from arc root + yield $"{studyFoldername}/resources/{kv.Value.AsFreeText}" // from study root > resources + yield $"{studyFoldername}/protocols/{kv.Value.AsFreeText}" // from study root > protocols + ] + ) + |> Set.unionMany + + let includeAssayFiles = + registeredAssays + |> Array.map (fun a -> + let assayFoldername = $"assays/{a.Identifier}" + + set [ + yield $"{assayFoldername}/isa.assay.xlsx" + yield $"{assayFoldername}/README.md" + + //just allow any constructed path from cell values. there may be occasions where this includes wrong files, but its good enough for now. + for (kv) in a.Tables[0].Values do + yield kv.Value.AsFreeText // from arc root + yield $"{assayFoldername}/dataset/{kv.Value.AsFreeText}" // from assay root > dataset + yield $"{assayFoldername}/protocols/{kv.Value.AsFreeText}" // from assay root > protocols + ] + ) + |> Set.unionMany + + let includeFiles = Set.unionMany [includeRootFiles; includeStudyFiles; includeAssayFiles] + + let ignoreHidden = defaultArg IgnoreHidden true + let fsCopy = _fs.Copy() // not sure if needed, but let's be safe + + fsCopy.Tree + |> FileSystemTree.toFilePaths() + |> Array.filter (fun p -> + p.StartsWith("workflows") + || p.StartsWith("runs") + || includeFiles.Contains(p) + ) + |> FileSystemTree.fromFilePaths + |> fun tree -> if ignoreHidden then tree |> FileSystemTree.filterFiles (fun n -> not (n.StartsWith("."))) else Some tree + |> Option.bind (fun tree -> if ignoreHidden then tree |> FileSystemTree.filterFolders (fun n -> not (n.StartsWith("."))) else Some tree) + |> Option.defaultValue (FileSystemTree.fromFilePaths [||]) + + /// + /// Returns the FileSystemTree of the ARC with only and folders included that are considered additional payload. + /// + /// Wether or not to ignore hidden files and folders starting with '.'. If true, no hidden files are included in the result. (default: true) + + member this.GetAdditionalPayload(?IgnoreHidden:bool) = + let ignoreHidden = defaultArg IgnoreHidden true + let registeredPayload = + this.GetRegisteredPayload() + |> FileSystemTree.toFilePaths() + |> set + + _fs.Copy().Tree + |> FileSystemTree.toFilePaths() + |> Array.filter (fun p -> not (registeredPayload.Contains(p))) + |> FileSystemTree.fromFilePaths + |> fun tree -> if ignoreHidden then tree |> FileSystemTree.filterFiles (fun n -> not (n.StartsWith("."))) else Some tree + |> Option.bind (fun tree -> if ignoreHidden then tree |> FileSystemTree.filterFolders (fun n -> not (n.StartsWith("."))) else Some tree) + |> Option.defaultValue (FileSystemTree.fromFilePaths [||]) + //-Pseudo code-// //// Option 1 diff --git a/tests/ARCtrl/ARCtrl.Tests.fs b/tests/ARCtrl/ARCtrl.Tests.fs index a89e1377..12298f8f 100644 --- a/tests/ARCtrl/ARCtrl.Tests.fs +++ b/tests/ARCtrl/ARCtrl.Tests.fs @@ -249,9 +249,158 @@ let private test_updateFileSystem = testList "update_Filesystem" [ ) ] +open ARCtrl.FileSystem + +let private ``payload_file_filters`` = + + let orderFST (fs : FileSystemTree) = + fs + |> FileSystemTree.toFilePaths() + |> Array.sort + |> FileSystemTree.fromFilePaths + + testList "payload file filters" [ + let inv = ArcInvestigation("MyInvestigation", "BestTitle") + + let assay = ArcAssay("registered_assay") + let assayTable = assay.InitTable("MyAssayTable") + assayTable.AppendColumn(CompositeHeader.Input (IOType.RawDataFile), [|CompositeCell.createFreeText "registered_assay_input.txt"|]) + assayTable.AppendColumn(CompositeHeader.ProtocolREF, [|CompositeCell.createFreeText "assay_protocol.rtf"|]) + assayTable.AppendColumn(CompositeHeader.Output (IOType.DerivedDataFile), [|CompositeCell.createFreeText "registered_assay_output.txt"|]) + + let study = ArcStudy("registered_study") + let studyTable = study.InitTable("MyStudyTable") + studyTable.AppendColumn(CompositeHeader.Input (IOType.Sample), [|CompositeCell.createFreeText "some_study_input_material"|]) + studyTable.AppendColumn(CompositeHeader.FreeText "Some File", [|CompositeCell.createFreeText "xd/some_file_that_lies_in_slashxd.txt"|]) + studyTable.AppendColumn(CompositeHeader.ProtocolREF, [|CompositeCell.createFreeText "study_protocol.pdf"|]) + studyTable.AppendColumn(CompositeHeader.Output (IOType.RawDataFile), [|CompositeCell.createFreeText "registered_study_output.txt"|]) + study.AddAssay(assay) + + inv.AddStudy(study) + + let fs = + Folder("root",[| + File "isa.investigation.xlsx"; // this should be included + File "README.md"; // this should be included + Folder("xd", [|File "some_file_that_lies_in_slashxd.txt"|]); // this should be included + Folder(".arc", [|File ".gitkeep"|]); + Folder(".git",[| + File "config"; File "description"; File "HEAD"; + Folder("hooks",[| + File "applypatch-msg.sample"; File "commit-msg.sample"; + File "fsmonitor-watchman.sample"; File "post-update.sample"; + File "pre-applypatch.sample"; File "pre-commit.sample"; + File "pre-merge-commit.sample"; File "pre-push.sample"; + File "pre-rebase.sample"; File "pre-receive.sample"; + File "prepare-commit-msg.sample"; + File "push-to-checkout.sample"; File "update.sample" + |]); + Folder ("info", [|File "exclude"|]) + |]); + Folder("assays",[| + File ".gitkeep"; + Folder("registered_assay",[| + File "isa.assay.xlsx"; // this should be included + File "README.md"; // this should be included + Folder ("dataset", [| + File "registered_assay_input.txt" // this should be included + File "registered_assay_output.txt" // this should be included + File "unregistered_file.txt" + |]; ); + Folder ("protocols", [|File "assay_protocol.rtf"|]) // this should be included + |]); + Folder + ("unregistered_assay",[| + File "isa.assay.xlsx"; File "README.md"; + Folder ("dataset", [|File ".gitkeep"|]); + Folder ("protocols", [|File ".gitkeep"|]) + |]) + |]); + Folder("runs", [|File ".gitkeep"|]); // this folder should be included (empty) + Folder("studies",[| + File ".gitkeep"; + Folder("registered_study",[| + File "isa.study.xlsx"; // this should be included + File "README.md"; // this should be included + Folder ("protocols", [|File "study_protocol.pdf"|]); // this should be included + Folder ("resources", [|File "registered_study_output.txt"|]) // this should be included + |]); + Folder("unregistered_study",[| + File "isa.study.xlsx"; File "README.md"; + Folder ("protocols", [|File ".gitkeep"|]); + Folder ("resources", [|File ".gitkeep"|]) + |]); + |]); + Folder ("workflows", [|File ".gitkeep"|]) // this folder should be included (empty) + |]) + + let arc = ARC(isa = inv, fs = FileSystem.create(fs)) + + test "GetRegisteredPayload" { + let expected = + Folder("root",[| + File "isa.investigation.xlsx"; // this should be included + File "README.md"; // this should be included + Folder("xd", [|File "some_file_that_lies_in_slashxd.txt"|]); // this should be included + Folder("assays",[| + Folder("registered_assay",[| + File "isa.assay.xlsx"; // this should be included + File "README.md"; // this should be included + Folder ("dataset", [| + File "registered_assay_input.txt" // this should be included + File "registered_assay_output.txt" // this should be included + |]; ); + Folder ("protocols", [|File "assay_protocol.rtf"|]) // this should be included + |]); + |]); + Folder("runs", [||]); // this folder should be included (empty) + Folder("studies",[| + Folder("registered_study",[| + File "isa.study.xlsx"; // this should be included + File "README.md"; // this should be included + Folder ("protocols", [|File "study_protocol.pdf"|]); // this should be included + Folder ("resources", [|File "registered_study_output.txt"|]) // this should be included + |]); + |]); + Folder ("workflows", [||]) // this folder should be included (empty) + |]) + + let actual = arc.GetRegisteredPayload() + Expect.equal (orderFST actual) (orderFST expected) "incorrect payload." + } + test "GetAdditionalPayload" { + let expected = + Folder("root",[| + Folder("assays",[| + Folder("registered_assay",[| + Folder ("dataset", [| + File "unregistered_file.txt" + |]; ); + |]); + Folder + ("unregistered_assay",[| + File "isa.assay.xlsx"; File "README.md"; + Folder ("dataset", [||]); + Folder ("protocols", [||]) + |]) + |]); + Folder("studies",[| + Folder("unregistered_study",[| + File "isa.study.xlsx"; File "README.md"; + Folder ("protocols", [||]); + Folder ("resources", [||]) + |]); + |]); + |]) + let actual = arc.GetAdditionalPayload() + Expect.equal (orderFST actual) (orderFST expected) "incorrect payload." + } + ] + let main = testList "main" [ test_model test_updateFileSystem test_isaFromContracts test_writeContracts + payload_file_filters ] \ No newline at end of file