From eb23885b3b710803143061dc4d6f85809a2a7cc4 Mon Sep 17 00:00:00 2001 From: Anthony Bretaudeau Date: Thu, 22 Aug 2024 10:01:50 +0200 Subject: [PATCH 01/11] ci + lint doc --- .github/workflows/ci.yml | 25 ++++- README.md | 229 ++++++++++++++++++++------------------- setup.py | 1 - 3 files changed, 144 insertions(+), 111 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index bf59355..2dff33b 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -17,6 +17,16 @@ jobs: pip install flake8 - name: Flake8 run: flake8 --ignore=E501,W504 biomaj_download/*.py biomaj_download/download + - name: Install Protoc + uses: arduino/setup-protoc@v3 + - name: Check that protobuf-compiler has been run + run: | + cd biomaj_download/message + protoc --python_out=. downmessage.proto + if [[ `git status --porcelain` ]]; then + echo "Protobuf-compiler has not been run, please do it and commit the modifed python files" + exit 1 + fi test: runs-on: ubuntu-latest @@ -35,9 +45,22 @@ jobs: run: | LOCAL_IRODS=0 NETWORK=0 pytest -v tests/biomaj_tests.py + pkg_build: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + - uses: actions/setup-python@v1 + with: + python-version: 3.8 + - name: Check that the package build works + run: | + pip install -U pip setuptools build + python -m build --sdist --wheel --outdir dist/ . + pypi: runs-on: ubuntu-latest - needs: [lint, test] + needs: [lint, test, pkg_build] name: Deploy release to Pypi steps: - name: Checkout diff --git a/README.md b/README.md index e25a7b1..de2f92d 100644 --- a/README.md +++ b/README.md @@ -1,66 +1,77 @@ -# About +# Biomaj-download + +## About [![PyPI version](https://badge.fury.io/py/biomaj-download.svg)](https://badge.fury.io/py/biomaj-download) Microservice to manage the downloads of biomaj. -A protobuf interface is available in biomaj_download/message/message_pb2.py to exchange messages between BioMAJ and the download service. +A protobuf interface is available in `biomaj_download/message/message_pb2.py` to exchange messages between BioMAJ and the download service. Messages go through RabbitMQ (to be installed). Python3 support only, python2 support is dropped -# Protobuf +## Protobuf To compile protobuf, in biomaj_download/message: - protoc --python_out=. downmessage.proto - -# Development +```bash +cd biomaj_download/message/ +protoc --python_out=. downmessage.proto +``` - flake8 --ignore E501 biomaj_download/\*.py biomaj_download/download +## Development -# Test +## Test To run the test suite, use: - LOCAL_IRODS=0 pytest -v tests/biomaj_tests.py +```bash +LOCAL_IRODS=0 pytest -v tests/biomaj_tests.py +``` This command skips the test that need a local iRODS server. Some test might fail due to network connection. You can skip them with: - NETWORK=0 pytest -v tests/biomaj_tests.py +```bash +NETWORK=0 pytest -v tests/biomaj_tests.py +``` +## Run -# Run +### Message consumer -## Message consumer: +```bash +export BIOMAJ_CONFIG=path_to_config.yml +python bin/biomaj_download_consumer.py +``` - export BIOMAJ_CONFIG=path_to_config.yml - python bin/biomaj_download_consumer.py - -## Web server +### Web server If package is installed via pip, you need a file named *gunicorn_conf.py* containing somehwhere on local server: - def worker_exit(server, worker): - from prometheus_client import multiprocess - multiprocess.mark_process_dead(worker.pid) +```python +def worker_exit(server, worker): + from prometheus_client import multiprocess + multiprocess.mark_process_dead(worker.pid) +``` If you cloned the repository and installed it via python setup.py install, just refer to the *gunicorn_conf.py* in the cloned repository. - - export BIOMAJ_CONFIG=path_to_config.yml - rm -rf ..path_to/prometheus-multiproc - mkdir -p ..path_to/prometheus-multiproc - export prometheus_multiproc_dir=..path_to/prometheus-multiproc - gunicorn -c gunicorn_conf.py biomaj_download.biomaj_download_web:app +```bash +export BIOMAJ_CONFIG=path_to_config.yml +rm -rf ..path_to/prometheus-multiproc +mkdir -p ..path_to/prometheus-multiproc +export prometheus_multiproc_dir=..path_to/prometheus-multiproc +gunicorn -c gunicorn_conf.py biomaj_download.biomaj_download_web:app +``` Web processes should be behind a proxy/load balancer, API base url /api/download Prometheus endpoint metrics are exposed via /metrics on web server -# Retrying +## Retrying A common problem when downloading a large number of files is the handling of temporary failures (network issues, server too busy to answer, etc.). Since version 3.1.2, `biomaj-download` uses the [Tenacity library](https://github.com/jd/tenacity) which is designed to handle this. @@ -73,14 +84,14 @@ For bank configuration those options also parse strings read from the configurat This parsing is based on the [Simple Eval library](https://github.com/danthedeckie/simpleeval). The rules are straightforward: - * All concrete stop and wait classes defined in Tenacity (i.e. classes inheriting from `stop_base` and `wait_base` respectively) can be used - by calling their constructor with the expected parameters. - For example, the string `"stop_after_attempt(5)"` will create the desired object. - Note that stop and wait classes that need no argument must be used as constants (i.e. use `"stop_never"` and not `"stop_never()"`). - Currently, this is the case for `"stop_never"` (as in Tenacity) and `"wait_none"` (this slightly differs from Tenacity where it is `"wait_none()"`). - * You can use classes that allow to combine other stop conditions (namely `stop_all` and `stop_any`) or wait policies (namely `wait_combine`). - * Operator `+` can be used to add wait policies (similar to `wait_combine`). - * Operators `&` and `|` can be used to compose stop conditions (similar to `wait_all` and `wait_none` respectively). +* All concrete stop and wait classes defined in Tenacity (i.e. classes inheriting from `stop_base` and `wait_base` respectively) can be used + by calling their constructor with the expected parameters. + For example, the string `"stop_after_attempt(5)"` will create the desired object. +Note that stop and wait classes that need no argument must be used as constants (i.e. use `"stop_never"` and not `"stop_never()"`). +Currently, this is the case for `"stop_never"` (as in Tenacity) and `"wait_none"` (this slightly differs from Tenacity where it is `"wait_none()"`). +* You can use classes that allow to combine other stop conditions (namely `stop_all` and `stop_any`) or wait policies (namely `wait_combine`). +* Operator `+` can be used to add wait policies (similar to `wait_combine`). +* Operators `&` and `|` can be used to compose stop conditions (similar to `wait_all` and `wait_none` respectively). However, in this case, you can't use your own conditions. The complete list of stop conditions is: @@ -107,10 +118,10 @@ Please refer to [Tenacity doc](https://tenacity.readthedocs.io/en/latest/) for t Examples (inspired by Tenacity doc): - * `"wait_fixed(3) + wait_random(0, 2)"` and `"wait_combine(wait_fixed(3), wait_random(0, 2))"` are equivalent and will wait 3 seconds + up to 2 seconds of random delay - * `"wait_chain(*([wait_fixed(3) for i in range(3)] + [wait_fixed(7) for i in range(2)] + [wait_fixed(9)]))"` will wait 3s for 3 attempts, 7s for the next 2 attempts and 9s for all attempts thereafter (here `+` is the list concatenation). - * `"wait_none + wait_random(1,2)"` will wait between 1s and 2s (since `wait_none` doesn't wait). - * `"stop_never | stop_after_attempt(5)"` will stop after 5 attempts (since `stop_never` never stops). +* `"wait_fixed(3) + wait_random(0, 2)"` and `"wait_combine(wait_fixed(3), wait_random(0, 2))"` are equivalent and will wait 3 seconds + up to 2 seconds of random delay +* `"wait_chain(*([wait_fixed(3) for i in range(3)] + [wait_fixed(7) for i in range(2)] + [wait_fixed(9)]))"` will wait 3s for 3 attempts, 7s for the next 2 attempts and 9s for all attempts thereafter (here `+` is the list concatenation). +* `"wait_none + wait_random(1,2)"` will wait between 1s and 2s (since `wait_none` doesn't wait). +* `"stop_never | stop_after_attempt(5)"` will stop after 5 attempts (since `stop_never` never stops). Note that some protocols (e.g. FTP) classify errors as temporary or permanent (for example trying to download inexisting file). More generally, we could distinguish permanent errors based on error codes, etc. and not retry in this case. @@ -118,96 +129,96 @@ However in our experience, so called permanent errors may well be temporary. Therefore downloaders always retry whatever the error. In some cases, this is a waste of time but generally this is worth it. -# Host keys +## Host keys When using the `sftp` protocol, `biomaj-download` must check the host key. Those keys are stored in a file (for instance `~/.ssh/known_hosts`). Two options are available to configure this: - - **ssh_hosts_file** which sets the file to use - - **ssh_new_host** which sets what to do for a new host +* **ssh_hosts_file** which sets the file to use +* **ssh_new_host** which sets what to do for a new host When the host and the key are found in the file, the connection is accepted. If the host is found but the key missmatches, the connection is rejected (this usually indicates a problem or a change of configuration on the remote server). When the host is not found, the decision depends on the value of **ssh_new_host**: - - `reject` means that the connection is rejected - - `accept` means that the connection is accepted - - `add` means that the connection is accepted and the key is added to the file +* `reject` means that the connection is rejected +* `accept` means that the connection is accepted +* `add` means that the connection is accepted and the key is added to the file See the description of the options in [Download options](#download-options). -# Download options +## Download options Since version 3.0.26, you can use the `set_options` method to pass a dictionary of downloader-specific options. The following list shows some options and their effect (the option to set is the key and the parameter is the associated value): - * **stop_condition**: - * parameter: an instance of Tenacity `stop_base` or a string (see [Retrying](#retrying)). - * downloader(s): all (except `LocalDownload`). - * effect: sets the condition on which we should stop retrying to download a file. - * default: `stop_after_attempt(3)` (i.e. stop after 3 attempts). - * note: introduced in version 3.2.1. - * **wait_policy**: - * parameter: an instance of Tenacity `wait_base` or a string (see [Retrying](#retrying)). - * downloader(s): all (except `LocalDownload`). - * effect: sets the wait policy between download attempts. - * default: `wait_fixed(3)` (i.e. wait 3 seconds between attempts). - * note: introduced in version 3.2.1. - * **skip_check_uncompress**: - * parameter: bool. - * downloader(s): all (except `LocalDownload`). - * effect: if true, don't test the archives after download. - * default: false (i.e. test the archives). - * **ssl_verifyhost**: - * parameter: bool. - * downloader(s): `CurlDownload` (and derived classes: `DirectFTPDownload`, `DirectHTTPDownload`). - * effect: if false, don't check that the name of the remote server is the same than in the SSL certificate. - * default: true (i.e. check host name). - * note: it's generally a bad idea to disable this verification. However some servers are badly configured. See [here](https://curl.haxx.se/libcurl/c/CURLOPT_SSL_VERIFYHOST.html) for the corresponding cURL option. - * **ssl_verifypeer**: - * parameter: bool. - * downloader(s): `CurlDownload` (and derived classes: `DirectFTPDownload`, `DirectHTTPDownload`). - * effect: if false, don't check the authenticity of the peer's certificate. - * default: true (i.e. check authenticity). - * note: it's generally a bad idea to disable this verification. However some servers are badly configured. See [here](https://curl.haxx.se/libcurl/c/CURLOPT_SSL_VERIFYPEER.html) for the corresponding cURL option. - * **ssl_server_cert**: - * parameter: path of the certificate file. - * downloader(s): `CurlDownload` (and derived classes: `DirectFTPDownload`, `DirectHTTPDownload`). - * effect: use the certificate(s) in this file to verify the peer with. - * default: use OS certificates. - * note: see [here](https://curl.haxx.se/libcurl/c/CURLOPT_CAINFO.html) for the corresponding cURL option. - * parameter: int. - * downloader(s): `CurlDownload` (and derived classes: `DirectFTPDownload`, `DirectHTTPDownload`). - * effect: sets the interval, in seconds, that the operating system will wait between sending keepalive probes. - * default: cURL default (60s at the time of this writing). - * note: see [here](https://curl.haxx.se/libcurl/c/CURLOPT_TCP_KEEPINTVL.html) for the corresponding cURL option. - * **ftp_method**: - * parameter: one of `default`, `multicwd`, `nocwd`, `singlecwd` (case insensitive). - * downloader(s): `CurlDownload` (and derived classes: `DirectFTPDownload`, `DirectHTTPDownload`) - only used for `FTP(S)`. - * effect: sets the method used to reach a file on a FTP(S) server (`nocwd` and `singlecwd` are usually faster but not always supported). - * default: `default` (which is `multicwd` at the time of this writing as in cURL). - * note: see [here](https://curl.haxx.se/libcurl/c/CURLOPT_FTP_FILEMETHOD.html) for the corresponding cURL option; introduced in version 3.1.2. - * **ssh_hosts_file**: - * parameter: path of the known hosts file. - * downloader(s): `CurlDownload` (and derived classes: `DirectFTPDownload`, `DirectHTTPDownload`) - only used for `SFTP`. - * effect: sets the file used to read/store host keys for `SFTP`. - * default: `~/.ssh/known_hosts` (where `~` is the home directory of the current user). - * note: see [here](https://curl.haxx.se/libcurl/c/CURLOPT_SSH_KNOWNHOSTS.html) for the corresponding cURL option and the option below; introduced in version 3.2.1. - * **ssh_new_host**: - * parameter: one of `reject`, `accept`, `add`. - * downloader(s): `CurlDownload` (and derived classes: `DirectFTPDownload`, `DirectHTTPDownload`) - only used for `SFTP`. - * effect: sets the policy to use for an unknown host. - * default: `reject` (i.e. refuse new hosts - you must add them in the file for instance with `ssh` or `sftp`). - * note: see [here](https://curl.haxx.se/libcurl/c/CURLOPT_SSH_KEYFUNCTION.html) for the corresponding cURL option and the option above; introduced in version 3.2.1. - * *allow_redirections*: - * parameter: bool. - * downloader(s): `CurlDownload` (and derived classes: `DirectFTPDownload`, `DirectHTTPDownload`) - only used for `HTTPS(S)`. - * effect: sets the policy for `HTTP` redirections. - * default: `true` (i.e. follow redirections). - * note: see [here](https://curl.haxx.se/libcurl/c/CURLOPT_FOLLOWLOCATION.html) for the corresponding cURL option; introduced in version 3.2.3. +* **stop_condition**: + * parameter: an instance of Tenacity `stop_base` or a string (see [Retrying](#retrying)). + * downloader(s): all (except `LocalDownload`). + * effect: sets the condition on which we should stop retrying to download a file. + * default: `stop_after_attempt(3)` (i.e. stop after 3 attempts). + * note: introduced in version 3.2.1. +* **wait_policy**: + * parameter: an instance of Tenacity `wait_base` or a string (see [Retrying](#retrying)). + * downloader(s): all (except `LocalDownload`). + * effect: sets the wait policy between download attempts. + * default: `wait_fixed(3)` (i.e. wait 3 seconds between attempts). + * note: introduced in version 3.2.1. +* **skip_check_uncompress**: + * parameter: bool. + * downloader(s): all (except `LocalDownload`). + * effect: if true, don't test the archives after download. + * default: false (i.e. test the archives). +* **ssl_verifyhost**: + * parameter: bool. + * downloader(s): `CurlDownload` (and derived classes: `DirectFTPDownload`, `DirectHTTPDownload`). + * effect: if false, don't check that the name of the remote server is the same than in the SSL certificate. + * default: true (i.e. check host name). + * note: it's generally a bad idea to disable this verification. However some servers are badly configured. See [here](https://curl.haxx.se/libcurl/c/CURLOPT_SSL_VERIFYHOST.html) for the corresponding cURL option. +* **ssl_verifypeer**: + * parameter: bool. + * downloader(s): `CurlDownload` (and derived classes: `DirectFTPDownload`, `DirectHTTPDownload`). + * effect: if false, don't check the authenticity of the peer's certificate. + * default: true (i.e. check authenticity). + * note: it's generally a bad idea to disable this verification. However some servers are badly configured. See [here](https://curl.haxx.se/libcurl/c/CURLOPT_SSL_VERIFYPEER.html) for the corresponding cURL option. +* **ssl_server_cert**: + * parameter: path of the certificate file. + * downloader(s): `CurlDownload` (and derived classes: `DirectFTPDownload`, `DirectHTTPDownload`). + * effect: use the certificate(s) in this file to verify the peer with. + * default: use OS certificates. + * note: see [here](https://curl.haxx.se/libcurl/c/CURLOPT_CAINFO.html) for the corresponding cURL option. + * parameter: int. + * downloader(s): `CurlDownload` (and derived classes: `DirectFTPDownload`, `DirectHTTPDownload`). + * effect: sets the interval, in seconds, that the operating system will wait between sending keepalive probes. + * default: cURL default (60s at the time of this writing). + * note: see [here](https://curl.haxx.se/libcurl/c/CURLOPT_TCP_KEEPINTVL.html) for the corresponding cURL option. +* **ftp_method**: + * parameter: one of `default`, `multicwd`, `nocwd`, `singlecwd` (case insensitive). + * downloader(s): `CurlDownload` (and derived classes: `DirectFTPDownload`, `DirectHTTPDownload`) - only used for `FTP(S)`. + * effect: sets the method used to reach a file on a FTP(S) server (`nocwd` and `singlecwd` are usually faster but not always supported). + * default: `default` (which is `multicwd` at the time of this writing as in cURL). + * note: see [here](https://curl.haxx.se/libcurl/c/CURLOPT_FTP_FILEMETHOD.html) for the corresponding cURL option; introduced in version 3.1.2. +* **ssh_hosts_file**: + * parameter: path of the known hosts file. + * downloader(s): `CurlDownload` (and derived classes: `DirectFTPDownload`, `DirectHTTPDownload`) - only used for `SFTP`. + * effect: sets the file used to read/store host keys for `SFTP`. + * default: `~/.ssh/known_hosts` (where `~` is the home directory of the current user). + * note: see [here](https://curl.haxx.se/libcurl/c/CURLOPT_SSH_KNOWNHOSTS.html) for the corresponding cURL option and the option below; introduced in version 3.2.1. +* **ssh_new_host**: + * parameter: one of `reject`, `accept`, `add`. + * downloader(s): `CurlDownload` (and derived classes: `DirectFTPDownload`, `DirectHTTPDownload`) - only used for `SFTP`. + * effect: sets the policy to use for an unknown host. + * default: `reject` (i.e. refuse new hosts - you must add them in the file for instance with `ssh` or `sftp`). + * note: see [here](https://curl.haxx.se/libcurl/c/CURLOPT_SSH_KEYFUNCTION.html) for the corresponding cURL option and the option above; introduced in version 3.2.1. +* *allow_redirections*: + * parameter: bool. + * downloader(s): `CurlDownload` (and derived classes: `DirectFTPDownload`, `DirectHTTPDownload`) - only used for `HTTPS(S)`. + * effect: sets the policy for `HTTP` redirections. + * default: `true` (i.e. follow redirections). + * note: see [here](https://curl.haxx.se/libcurl/c/CURLOPT_FOLLOWLOCATION.html) for the corresponding cURL option; introduced in version 3.2.3. Those options can be set in bank properties. See file `global.properties.example` in [biomaj module](https://github.com/genouest/biomaj). diff --git a/setup.py b/setup.py index 9db6dec..4d2c203 100644 --- a/setup.py +++ b/setup.py @@ -3,7 +3,6 @@ except ImportError: from distutils.core import setup -from distutils.command.install import install import os From 470d500c9a826da5e0d77684409126817f06b6d7 Mon Sep 17 00:00:00 2001 From: Anthony Bretaudeau Date: Thu, 22 Aug 2024 10:02:52 +0200 Subject: [PATCH 02/11] debug --- .github/workflows/ci.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2dff33b..998fac5 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -23,6 +23,8 @@ jobs: run: | cd biomaj_download/message protoc --python_out=. downmessage.proto + git status --porcelain + git diff if [[ `git status --porcelain` ]]; then echo "Protobuf-compiler has not been run, please do it and commit the modifed python files" exit 1 From 1202062a3f922cc4477fb246245dff76df464770 Mon Sep 17 00:00:00 2001 From: Anthony Bretaudeau Date: Thu, 22 Aug 2024 10:06:26 +0200 Subject: [PATCH 03/11] pin --- .github/workflows/ci.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 998fac5..2409eb4 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -18,7 +18,8 @@ jobs: - name: Flake8 run: flake8 --ignore=E501,W504 biomaj_download/*.py biomaj_download/download - name: Install Protoc - uses: arduino/setup-protoc@v3 + uses: arduino/setup-protoc@v3 with: + version: "3.27.2" - name: Check that protobuf-compiler has been run run: | cd biomaj_download/message From c9e993230b0c1d64d0d19647b8422548e79c97e5 Mon Sep 17 00:00:00 2001 From: Anthony Bretaudeau Date: Thu, 22 Aug 2024 10:07:10 +0200 Subject: [PATCH 04/11] syntax --- .github/workflows/ci.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2409eb4..a36fe03 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -18,7 +18,8 @@ jobs: - name: Flake8 run: flake8 --ignore=E501,W504 biomaj_download/*.py biomaj_download/download - name: Install Protoc - uses: arduino/setup-protoc@v3 with: + uses: arduino/setup-protoc@v3 + with: version: "3.27.2" - name: Check that protobuf-compiler has been run run: | From 110f305db25501b41e4e9c9e6ae9c37d9aff7559 Mon Sep 17 00:00:00 2001 From: Anthony Bretaudeau Date: Thu, 22 Aug 2024 10:10:46 +0200 Subject: [PATCH 05/11] try --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a36fe03..2fb4799 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -20,7 +20,7 @@ jobs: - name: Install Protoc uses: arduino/setup-protoc@v3 with: - version: "3.27.2" + version: "3.27.x" - name: Check that protobuf-compiler has been run run: | cd biomaj_download/message From 653d43d076fe5d02a28fa60b1a8976ff8a289059 Mon Sep 17 00:00:00 2001 From: Anthony Bretaudeau Date: Thu, 22 Aug 2024 10:12:15 +0200 Subject: [PATCH 06/11] try --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2fb4799..63556de 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -20,7 +20,7 @@ jobs: - name: Install Protoc uses: arduino/setup-protoc@v3 with: - version: "3.27.x" + version: "19.x" - name: Check that protobuf-compiler has been run run: | cd biomaj_download/message From 33552edf6551505cceace16cce813529fd0820d2 Mon Sep 17 00:00:00 2001 From: Anthony Bretaudeau Date: Thu, 22 Aug 2024 10:14:19 +0200 Subject: [PATCH 07/11] try --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 63556de..fa5966f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -20,7 +20,7 @@ jobs: - name: Install Protoc uses: arduino/setup-protoc@v3 with: - version: "19.x" + version: "19.6" - name: Check that protobuf-compiler has been run run: | cd biomaj_download/message From 4461abb965ba64d52d05c2fe3c13c731651c15c4 Mon Sep 17 00:00:00 2001 From: Anthony Bretaudeau Date: Thu, 22 Aug 2024 10:18:33 +0200 Subject: [PATCH 08/11] try --- .github/workflows/ci.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index fa5966f..3542ec6 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -21,6 +21,7 @@ jobs: uses: arduino/setup-protoc@v3 with: version: "19.6" + repo-token: ${{ secrets.GITHUB_TOKEN }} - name: Check that protobuf-compiler has been run run: | cd biomaj_download/message From 93449134735ecc7a6baa607b0ee8efd9e7cbbc1c Mon Sep 17 00:00:00 2001 From: Anthony Bretaudeau Date: Thu, 22 Aug 2024 10:19:32 +0200 Subject: [PATCH 09/11] c'mon --- .github/workflows/ci.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 3542ec6..e28c8bb 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -20,7 +20,6 @@ jobs: - name: Install Protoc uses: arduino/setup-protoc@v3 with: - version: "19.6" repo-token: ${{ secrets.GITHUB_TOKEN }} - name: Check that protobuf-compiler has been run run: | From 16858b4bcc2caf361e95e8617fcf54dce4a30d35 Mon Sep 17 00:00:00 2001 From: Anthony Bretaudeau Date: Thu, 22 Aug 2024 10:56:01 +0200 Subject: [PATCH 10/11] protobuf: doc + update compiled version --- README.md | 10 +- biomaj_download/message/downmessage_pb2.py | 174 ++++----------------- 2 files changed, 41 insertions(+), 143 deletions(-) diff --git a/README.md b/README.md index de2f92d..8f5536f 100644 --- a/README.md +++ b/README.md @@ -13,11 +13,15 @@ Python3 support only, python2 support is dropped ## Protobuf -To compile protobuf, in biomaj_download/message: +If you make changes to protobuf code, you need to compile it to python code: ```bash -cd biomaj_download/message/ -protoc --python_out=. downmessage.proto +cd /tmp/protoc/ +PB_REL="https://github.com/protocolbuffers/protobuf/releases" +curl -LO $PB_REL/download/v23.2/protoc-23.2-linux-x86_64.zip # Version used by GitHub Actions currently +unzip protoc-23.2-linux-x86_64.zip +cd ..../biomaj_download/message/ +/tmp/protoc/bin/protoc --python_out=. downmessage.proto ``` ## Development diff --git a/biomaj_download/message/downmessage_pb2.py b/biomaj_download/message/downmessage_pb2.py index a9d6767..f5d7f49 100644 --- a/biomaj_download/message/downmessage_pb2.py +++ b/biomaj_download/message/downmessage_pb2.py @@ -4,9 +4,8 @@ """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool -from google.protobuf import message as _message -from google.protobuf import reflection as _reflection from google.protobuf import symbol_database as _symbol_database +from google.protobuf.internal import builder as _builder # @@protoc_insertion_point(imports) _sym_db = _symbol_database.Default() @@ -16,147 +15,42 @@ DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x11\x64ownmessage.proto\x12\x0f\x62iomaj.download\"\x9d\x02\n\x04\x46ile\x12\x0c\n\x04name\x18\x01 \x02(\t\x12\x0c\n\x04root\x18\x02 \x01(\t\x12\x0f\n\x07save_as\x18\x03 \x01(\t\x12\x0b\n\x03url\x18\x04 \x01(\t\x12\x30\n\x08metadata\x18\x05 \x01(\x0b\x32\x1e.biomaj.download.File.MetaData\x1a\xa8\x01\n\x08MetaData\x12\x13\n\x0bpermissions\x18\x01 \x01(\t\x12\r\n\x05group\x18\x02 \x01(\t\x12\x0c\n\x04size\x18\x03 \x01(\x03\x12\x0c\n\x04hash\x18\x04 \x01(\t\x12\x0c\n\x04year\x18\x05 \x01(\x05\x12\r\n\x05month\x18\x06 \x01(\x05\x12\x0b\n\x03\x64\x61y\x18\x07 \x01(\x05\x12\x0e\n\x06\x66ormat\x18\x08 \x01(\t\x12\x0b\n\x03md5\x18\t \x01(\t\x12\x15\n\rdownload_time\x18\n \x01(\x03\"0\n\x08\x46ileList\x12$\n\x05\x66iles\x18\x01 \x03(\x0b\x32\x15.biomaj.download.File\"\xaa\x02\n\tOperation\x12\x32\n\x04type\x18\x01 \x02(\x0e\x32$.biomaj.download.Operation.OPERATION\x12/\n\x08\x64ownload\x18\x02 \x01(\x0b\x32\x1d.biomaj.download.DownloadFile\x12)\n\x07process\x18\x03 \x01(\x0b\x32\x18.biomaj.download.Process\x12/\n\x05trace\x18\x04 \x01(\x0b\x32 .biomaj.download.Operation.Trace\x1a*\n\x05Trace\x12\x10\n\x08trace_id\x18\x01 \x02(\t\x12\x0f\n\x07span_id\x18\x02 \x02(\t\"0\n\tOPERATION\x12\x08\n\x04LIST\x10\x00\x12\x0c\n\x08\x44OWNLOAD\x10\x01\x12\x0b\n\x07PROCESS\x10\x02\"\x17\n\x07Process\x12\x0c\n\x04\x65xec\x18\x01 \x02(\t\"\x94\x0b\n\x0c\x44ownloadFile\x12\x0c\n\x04\x62\x61nk\x18\x01 \x02(\t\x12\x0f\n\x07session\x18\x02 \x02(\t\x12\x11\n\tlocal_dir\x18\x03 \x02(\t\x12\x18\n\x10timeout_download\x18\x04 \x01(\x05\x12=\n\x0bremote_file\x18\x05 \x02(\x0b\x32(.biomaj.download.DownloadFile.RemoteFile\x12\x32\n\x05proxy\x18\x06 \x01(\x0b\x32#.biomaj.download.DownloadFile.Proxy\x12\x43\n\x0bhttp_method\x18\x08 \x01(\x0e\x32).biomaj.download.DownloadFile.HTTP_METHOD:\x03GET\x12;\n\x07options\x18\t \x03(\x0b\x32*.biomaj.download.DownloadFile.OptionsEntry\x1a$\n\x05Param\x12\x0c\n\x04name\x18\x01 \x02(\t\x12\r\n\x05value\x18\x02 \x02(\t\x1a\xcd\x03\n\tHttpParse\x12\x91\x01\n\x08\x64ir_line\x18\x01 \x02(\t:\x7f[\\s]*.*([\\d]{2}-[\\w\\d]{2,5}-[\\d]{4}\\s[\\d]{2}:[\\d]{2})\x12\xa5\x01\n\tfile_line\x18\x02 \x02(\t:\x91\x01[\\s] Date: Thu, 22 Aug 2024 10:58:14 +0200 Subject: [PATCH 11/11] doc --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 8f5536f..95f0e57 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ Microservice to manage the downloads of biomaj. -A protobuf interface is available in `biomaj_download/message/message_pb2.py` to exchange messages between BioMAJ and the download service. +A protobuf interface is available in `biomaj_download/message/downmessage_pb2.py` to exchange messages between BioMAJ and the download service. Messages go through RabbitMQ (to be installed). Python3 support only, python2 support is dropped