diff --git a/.gitignore b/.gitignore index 1bb922e7..cfc29395 100644 --- a/.gitignore +++ b/.gitignore @@ -69,4 +69,4 @@ docs/source/_templates/ !/pyproject.toml !/requirements.txt !/setup.cfg -!/.github \ No newline at end of file +!/.github diff --git a/README.md b/README.md index 4f7eee22..e5e7e6c6 100644 --- a/README.md +++ b/README.md @@ -58,6 +58,8 @@ you do not have powerful GPUs available, it is possible to run using only CPUs. For instance, the tutorial data set consisting of simulated drug, metabolomics and proteomics data for 500 individuals runs fine on a standard macbook. +> Note: The pip installation of `move-dl` does not setup your local GPU automatically + # The MOVE pipeline MOVE has five-six steps: diff --git a/src/move/conf/schema.py b/src/move/conf/schema.py index 8a91fe58..3c4576f6 100644 --- a/src/move/conf/schema.py +++ b/src/move/conf/schema.py @@ -28,6 +28,10 @@ class InputConfig: name: str weight: int = 1 +@dataclass +class ContinuousInputConfig(InputConfig): + scale: bool = True + @dataclass class ContinuousInputConfig(InputConfig): diff --git a/tutorial/README.md b/tutorial/README.md index a047b0eb..557ebb70 100644 --- a/tutorial/README.md +++ b/tutorial/README.md @@ -4,7 +4,7 @@ We have provided a tutorial. In this first tutorial, we inspect datasets reporting whether 500 fictitious individuals have taken one of 20 imaginary -drugs. We have included a pair of pretend omics datasets, with measurements +drugs. We have included a pair of simulated omics datasets, with measurements for each sample (individual). All these measurements were generated randomly, but we have added 200 associations between different pairs of drugs and omics features. Let us find them with MOVE! @@ -146,10 +146,11 @@ reconstructing our input data and generating an informative latent space. Run: >>> move-dl data=random_small task=random_small__latent ``` -:arrow_up: This command will create four types of plot: +:arrow_up: This command will create four types of plot in the `results/latent_space` folder: -- Loss curve shows the overall loss, KLD term, binary cross-entropy term, and -sum of squared errors term over number of training epochs. +- Loss curve shows the overall loss and each of it's three components: + Kullback-Leiber-Divergence (KLD) term, binary cross-entropy term, + and sum of squared errors term over number of training epochs. - Reconstructions metrics boxplot shows a score (accuracy or cosine similarity for categorical and continuous datasets, respectively) per reconstructed dataset. @@ -171,7 +172,8 @@ and the omics features. Run: >>> move-dl data=random_small task=random_small__id_assoc_ttest ``` -:arrow_up: This command will create a `results_sig_assoc.tsv` file, listing +:arrow_up: This command will create a `results_sig_assoc.tsv` +file in `results/identify_asscociations`, listing each pair of associated features and the corresponding median p-value for such association. There should be ~120 associations found.