diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 30624191c..236c4c82b 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -96,7 +96,7 @@ jobs: - name: Migrations run: | tox -e migrations - + - name: Run entire test suite if: github.event_name != 'pull_request' timeout-minutes: 120 @@ -109,6 +109,20 @@ jobs: run: | tox -e ${{ matrix.toxenv }}-partial + - name: List files in test data directory + if: always() + run: | + ls -a -R ${{ github.workspace }}/tests/.test_data + + - name: Upload artifacts + if: always() + uses: actions/upload-artifact@v4 + with: + path: ${{ github.workspace }}/tests/.test_data/* + retention-days: 1 + name: test_files + include-hidden-files: true + build: runs-on: arc-runner needs: test diff --git a/docs/CHANGELOG.rst b/docs/CHANGELOG.rst index 9f3bf8022..9788b420b 100644 --- a/docs/CHANGELOG.rst +++ b/docs/CHANGELOG.rst @@ -13,9 +13,11 @@ Unreleased Added ----- +- Save test folder data using actions/upload-artifact Changed ------- +- Calculate up to 5 principal components in ``pca`` process Fixed ----- diff --git a/resolwe_bio/processes/clustering/pca.py b/resolwe_bio/processes/clustering/pca.py index 28b4a4485..144941bac 100644 --- a/resolwe_bio/processes/clustering/pca.py +++ b/resolwe_bio/processes/clustering/pca.py @@ -30,7 +30,7 @@ def component_top_factors(component, allgenes_array, max_size=20): return list(zip(np.array(allgenes_array)[ixs].tolist(), component[ixs].tolist())) -def get_pca(expressions=pd.DataFrame(), gene_labels=[]): +def get_pca(expressions=pd.DataFrame(), gene_labels=[], n_components=5): """Compute PCA.""" if not gene_labels: gene_labels = expressions.index @@ -41,11 +41,13 @@ def get_pca(expressions=pd.DataFrame(), gene_labels=[]): all_components = [[], []] all_explained_variance_ratios = [0.0, 0.0] else: - pca = PCA(n_components=2, whiten=True) + pca_components = min(expressions.shape[0], expressions.shape[1], n_components) + pca = PCA(n_components=pca_components, whiten=True) pca_expressions = pca.fit_transform(expressions.transpose()) coordinates = [ - t[:2].tolist() if len(t) > 1 else [t[0], 0.0] for t in pca_expressions + t[:pca_components].tolist() if len(t) > 1 else [t[0], 0.0] + for t in pca_expressions ] all_components = [ component_top_factors(component=component, allgenes_array=gene_labels) @@ -197,7 +199,7 @@ class PrinicipalComponentAnalysis(Process): }, } data_name = "PCA" - version = "3.0.0" + version = "3.1.0" process_type = "data:pca" category = "Enrichment and Clustering" scheduling_class = SchedulingClass.INTERACTIVE diff --git a/resolwe_bio/tests/files/exp_6_rc.tab.gz b/resolwe_bio/tests/files/exp_6_rc.tab.gz new file mode 100644 index 000000000..9e2c7655d Binary files /dev/null and b/resolwe_bio/tests/files/exp_6_rc.tab.gz differ diff --git a/resolwe_bio/tests/files/exp_6_tpm.tab.gz b/resolwe_bio/tests/files/exp_6_tpm.tab.gz new file mode 100644 index 000000000..2494de3cc Binary files /dev/null and b/resolwe_bio/tests/files/exp_6_tpm.tab.gz differ diff --git a/resolwe_bio/tests/files/exp_7_rc.tab.gz b/resolwe_bio/tests/files/exp_7_rc.tab.gz new file mode 100644 index 000000000..631b29397 Binary files /dev/null and b/resolwe_bio/tests/files/exp_7_rc.tab.gz differ diff --git a/resolwe_bio/tests/files/exp_7_tpm.tab.gz b/resolwe_bio/tests/files/exp_7_tpm.tab.gz new file mode 100644 index 000000000..0c436c4b8 Binary files /dev/null and b/resolwe_bio/tests/files/exp_7_tpm.tab.gz differ diff --git a/resolwe_bio/tests/files/exp_8_rc.tab.gz b/resolwe_bio/tests/files/exp_8_rc.tab.gz new file mode 100644 index 000000000..5025d213e Binary files /dev/null and b/resolwe_bio/tests/files/exp_8_rc.tab.gz differ diff --git a/resolwe_bio/tests/files/exp_8_tpm.tab.gz b/resolwe_bio/tests/files/exp_8_tpm.tab.gz new file mode 100644 index 000000000..5786c1f67 Binary files /dev/null and b/resolwe_bio/tests/files/exp_8_tpm.tab.gz differ diff --git a/resolwe_bio/tests/files/pca_plot.json.gz b/resolwe_bio/tests/files/pca_plot.json.gz index 9e864980c..64001776d 100644 Binary files a/resolwe_bio/tests/files/pca_plot.json.gz and b/resolwe_bio/tests/files/pca_plot.json.gz differ diff --git a/resolwe_bio/tests/files/pca_plot_2.json.gz b/resolwe_bio/tests/files/pca_plot_2.json.gz index 159d0e620..8bc69bc35 100644 Binary files a/resolwe_bio/tests/files/pca_plot_2.json.gz and b/resolwe_bio/tests/files/pca_plot_2.json.gz differ diff --git a/resolwe_bio/tests/files/pca_plot_3.json.gz b/resolwe_bio/tests/files/pca_plot_3.json.gz index 6de8fa108..ce1f41de8 100644 Binary files a/resolwe_bio/tests/files/pca_plot_3.json.gz and b/resolwe_bio/tests/files/pca_plot_3.json.gz differ diff --git a/resolwe_bio/tests/files/pca_plot_4.json.gz b/resolwe_bio/tests/files/pca_plot_4.json.gz index 1e8f08e59..5a797678f 100644 Binary files a/resolwe_bio/tests/files/pca_plot_4.json.gz and b/resolwe_bio/tests/files/pca_plot_4.json.gz differ diff --git a/resolwe_bio/tests/processes/test_pca.py b/resolwe_bio/tests/processes/test_pca.py index 8e135092b..56b4a521f 100644 --- a/resolwe_bio/tests/processes/test_pca.py +++ b/resolwe_bio/tests/processes/test_pca.py @@ -53,17 +53,62 @@ def test_pca(self): source="DICTYBASE", species="Dictyostelium discoideum", ) + expression_5 = self.prepare_expression( + f_rc="exp_6_rc.tab.gz", + f_exp="exp_6_tpm.tab.gz", + f_type="TPM", + source="DICTYBASE", + species="Dictyostelium discoideum", + ) + expression_6 = self.prepare_expression( + f_rc="exp_7_rc.tab.gz", + f_exp="exp_7_tpm.tab.gz", + f_type="TPM", + source="DICTYBASE", + species="Dictyostelium discoideum", + ) + expression_7 = self.prepare_expression( + f_rc="exp_8_rc.tab.gz", + f_exp="exp_8_tpm.tab.gz", + f_type="TPM", + source="DICTYBASE", + species="Dictyostelium discoideum", + ) inputs = { - "exps": [expression_1.pk, expression_2.pk], + "exps": [ + expression_1.pk, + expression_2.pk, + expression_5.pk, + expression_6.pk, + expression_7.pk, + ], "source": "DICTYBASE", "species": "Dictyostelium discoideum", } pca = self.run_process("pca", inputs) saved_json, test_json = self.get_json("pca_plot.json.gz", pca.output["pca"]) + # returns 4 PCA components. Last component differs when testing on different systems + # and is not tested here. self.assertAlmostEqualGeneric( - round_elements(test_json["flot"]["data"]), - round_elements(saved_json["flot"]["data"]), + round_elements(test_json["flot"]["data"][0][:3]), + round_elements(saved_json["flot"]["data"][0][:3]), + ) + self.assertAlmostEqualGeneric( + round_elements(test_json["flot"]["data"][1][:3]), + round_elements(saved_json["flot"]["data"][1][:3]), + ) + self.assertAlmostEqualGeneric( + round_elements(test_json["flot"]["data"][2][:3]), + round_elements(saved_json["flot"]["data"][2][:3]), + ) + self.assertAlmostEqualGeneric( + round_elements(test_json["flot"]["data"][3][:3]), + round_elements(saved_json["flot"]["data"][3][:3]), + ) + self.assertAlmostEqualGeneric( + round_elements(test_json["flot"]["data"][4][:3]), + round_elements(saved_json["flot"]["data"][4][:3]), ) self.assertAlmostEqualGeneric( round_elements(test_json["explained_variance_ratios"]), @@ -84,8 +129,8 @@ def test_pca(self): pca = self.run_process("pca", inputs) saved_json, test_json = self.get_json("pca_plot_2.json.gz", pca.output["pca"]) self.assertAlmostEqualGeneric( - round_elements(test_json["flot"]["data"]), - round_elements(saved_json["flot"]["data"]), + round_elements(test_json["flot"]["data"][0][:3]), + round_elements(saved_json["flot"]["data"][0][:3]), ) self.assertEqual(len(pca.process_warning), 0) @@ -95,8 +140,8 @@ def test_pca(self): pca = self.run_process("pca", inputs) saved_json, test_json = self.get_json("pca_plot_3.json.gz", pca.output["pca"]) self.assertAlmostEqualGeneric( - round_elements(test_json["flot"]["data"]), - round_elements(saved_json["flot"]["data"]), + round_elements(test_json["flot"]["data"][0][:3]), + round_elements(saved_json["flot"]["data"][0][:3]), ) self.assertEqual(len(pca.process_warning), 0) diff --git a/tox.ini b/tox.ini index 4a263a08a..89cb5cb7c 100644 --- a/tox.ini +++ b/tox.ini @@ -35,7 +35,7 @@ commands = partial: --only-changes-to \ partial: {env:RESOLWE_TEST_ONLY_CHANGES_TO:master} \ partial: --changes-file-types .resolwebio-filetypes.yml \ - --verbosity 2 --parallel + --verbosity 2 --parallel --keep-data # Check types. [testenv:mypy]