From f7c4a8331fba7d185665649a36c1c67b8df61202 Mon Sep 17 00:00:00 2001 From: Provost Simon Date: Thu, 4 Jul 2024 02:41:38 +0100 Subject: [PATCH 1/3] refactor(docs): update readme's library's logo --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 7d17d1cf..a5243200 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@


- Scikit-longitudinal + Scikit-longitudinal
Scikit-longitudinal
From 880b83ce25d14c2a706a63f1d5b5c02a7fe74a19 Mon Sep 17 00:00:00 2001 From: Provost Simon Date: Thu, 4 Jul 2024 02:49:46 +0100 Subject: [PATCH 2/3] refactor(core): update install / docker install --- dockerfile | 101 +++--------------- docs/contribution.md | 41 ++++++- pdm.lock | 8 +- pyproject.toml | 6 +- .../docker_scikit_longitudinal_installs.sh | 25 ----- scripts/linux/docker_start_pdm_env.sh | 8 -- 6 files changed, 62 insertions(+), 127 deletions(-) delete mode 100644 scripts/linux/docker_scikit_longitudinal_installs.sh delete mode 100644 scripts/linux/docker_start_pdm_env.sh diff --git a/dockerfile b/dockerfile index 5c0c2152..7732183b 100644 --- a/dockerfile +++ b/dockerfile @@ -1,24 +1,10 @@ -ARG CONDA_VER=2023.09-0 -ARG OS_TYPE +ARG PYTHON_BASE=3.9-slim +FROM python:$PYTHON_BASE AS builder -FROM python:3.9.8 AS builder +RUN pip install -U pdm +ENV PDM_CHECK_UPDATE=false -# ========================== -# Dockerfile for Scikit Longitudinal Project -# System: Linux under Python 3.9.8 lightweight image -# Python: 3.9.8 -# ========================== - -RUN echo "==========================\nStage 1: The Build Process\n==========================" - -# ----------------------------------- -# 🛠 System-level Setup and Libraries 🛠 -# ----------------------------------- -RUN apt-get update && apt-get install -y libomp-dev - -# ------------------------ -# 🛠 Compiler Configurations 🛠 -# ------------------------ +RUN apt-get update && apt-get install -y build-essential libomp-dev libc-dev && apt-get clean && rm -rf /var/lib/apt/lists/* ENV CC=gcc ENV CXX=g++ ENV CPPFLAGS="-I/usr/local/include" @@ -26,81 +12,26 @@ ENV CFLAGS="-Wall" ENV CXXFLAGS="-Wall" ENV LDFLAGS="-L/usr/local/lib" -# ------------------- -# 🛠 Python Utilities 🛠 -# ------------------- -RUN echo "🛠 Python Utilities 🛠" RUN pip install -U pip setuptools wheel -RUN pip install pdm -# --------------------------- -# 📦 Python Dependency Setup 📦 -# --------------------------- COPY pyproject.toml pdm.lock /scikit_longitudinal/ -WORKDIR /scikit_longitudinal -RUN mkdir __pypackages__ - -FROM python:3.9 -RUN echo "==========================\nStage 2: The Run-Time Setup\n==========================" - -# ----------------------------------- -# 🛠 System-level Setup and Libraries 🛠 -# ----------------------------------- -RUN echo "🛠 System-level Setup and Libraries 🛠" -RUN apt-get update && apt-get install -y libomp-dev build-essential wget curl libc-dev && \ - apt-get clean && \ - rm -rf /var/lib/apt/lists/* - -# ------------------------- -# 🐍 Anaconda Installation 🐍 -# ------------------------- -RUN echo "🐍 Anaconda Installation 🐍" -ARG CONDA_VER -ARG OS_TYPE -RUN if [ -z "${OS_TYPE}" ]; then echo "OS_TYPE argument not provided"; exit 1; fi -RUN wget -q "https://repo.anaconda.com/archive/Anaconda3-${CONDA_VER}-Linux-${OS_TYPE}.sh" -O ~/Anaconda.sh -RUN dpkg --add-architecture arm64 -RUN apt update -y -RUN apt install -y libc6:arm64 -RUN bash ~/Anaconda.sh -b -p /anaconda -RUN rm ~/Anaconda.sh -ENV PATH=/anaconda/bin:${PATH} -RUN conda update --quiet -y conda +COPY scikit_longitudinal/ /scikit_longitudinal/scikit_longitudinal/ +COPY data/ /scikit_longitudinal/data/ +COPY scripts/ /scikit_longitudinal/scripts/ +COPY .env README.md .coveragerc /scikit_longitudinal/ -# ------------------------ -# 🛠 Compiler Configurations 🛠 -# ------------------------ -RUN echo "🛠 Compiler Configurations 🛠" -ENV CC=gcc -ENV CXX=g++ -ENV CPPFLAGS="-I/usr/local/include" -ENV CFLAGS="-Wall" -ENV CXXFLAGS="-Wall" -ENV LDFLAGS="-L/usr/local/lib" +WORKDIR /scikit_longitudinal +RUN pdm install --check --with :all --no-editable -# --------------------------- -# 🐍 Python Environment Setup 🐍 -# --------------------------- -RUN echo "🐍 Python Environment Setup 🐍" -ENV PYTHONPATH=/scikit_longitudinal/pkgs +FROM python:$PYTHON_BASE -# ---------------------- -# 📦 Project File Setup 📦 -# ---------------------- -RUN echo "📦 Project File Setup 📦" +COPY --from=builder /scikit_longitudinal/.venv/ /scikit_longitudinal/.venv +ENV PATH="/scikit_longitudinal/.venv/bin:$PATH" COPY pyproject.toml pdm.lock /scikit_longitudinal/ COPY scikit_longitudinal/ /scikit_longitudinal/scikit_longitudinal/ -COPY scikit-learn/ /scikit_longitudinal/scikit-learn/ COPY data/ /scikit_longitudinal/data/ COPY scripts/ /scikit_longitudinal/scripts/ -COPY .env README.md .coveragerc /scripts/linux/docker_scikit_longitudinal_installs.sh /scripts/linux/docker_start_pdm_env.sh /scikit_longitudinal/ +COPY .env README.md .coveragerc /scikit_longitudinal/ -# ------------------------------- -# 🚀 Scikit Longitudinal Installation 🚀 -# ------------------------------- -RUN echo "🚀 Scikit Longitudinal Installation 🚀" WORKDIR /scikit_longitudinal -RUN pip install pdm -ENV PDM_IN_ENV=in-project -RUN chmod +x /scikit_longitudinal/scripts/linux/docker_scikit_longitudinal_installs.sh /scikit_longitudinal/scripts/linux/docker_start_pdm_env.sh -RUN /scikit_longitudinal/scripts/linux/docker_scikit_longitudinal_installs.sh \ No newline at end of file +CMD ["/bin/bash"] \ No newline at end of file diff --git a/docs/contribution.md b/docs/contribution.md index 6d56c8b0..a17bcff6 100644 --- a/docs/contribution.md +++ b/docs/contribution.md @@ -69,7 +69,13 @@ Please follow the instructions below for setting up your development environment !!! warning "Fully-working environment setup is not guaranteed on Windows. We recommend using a Unix-based system for development. Such as MacOS or Linux. On Windows, Docker is recommended having been tested on Windows 10 & 11." -To manually configure your environment, please adhere to the following procedure meticulously: +Prior-all, you need to open the `.env` file at the root and set: +```bash +SKLONG_PYTHON_VERSION= # e.g. 3.9.8 +SKLONG_PYTHON_PATH= # e.g. /usr/bin/python3.9 +``` + +Next, to manually configure your environment, please adhere to the following procedure meticulously: 1. **Setting up the package manager:** - Initialise the package manager with Conda as the backend for virtual environments: @@ -208,7 +214,38 @@ feel free to open an issue on the GitHub repository for additional support. ```bash git config --global core.autocrlf true ``` - + +!!! tip "Docker and Jetbrains" + If you are using JetBrains, you should be able to leverage the `.run/` configurations at the root of the folder. + They should be automatically detected by your Jetbrains IDE (e.g PyCharm) and you can run the tests from there. + Make sure to edit the configuration to adapt to your use-case. + + Configs available: + - `Scikit_longitudinal_ARM_architecture.run.xml`: If you are on an ARM architecture. Such as Macbook with Apple Silicon chips. + - `Scikit_longitudinal_Intel_architecture.run.xml`: If you are on an Intel architecture. Such as most of the Windows and Linux machines or Macbook with Intel chips. + +!!! warning "Docker with Apple Silicon" + If you are on an Apple Silicon chip, the current library is `x86_64` based. Therefore, you should configure Docker + so that it runs on such architecture. Be at the root of the project and run the following commands: + + 1. **Prepare [QUS](https://github.com/dbhi/qus) for Docker:** + ```bash + docker run --rm --privileged aptman/qus -- -r + docker run --rm --privileged aptman/qus -s -- -p x86_64 + ``` + 2. **Build the Docker Image:** + ```bash + docker buildx create --use + docker buildx build --platform linux/amd64 -t scikit_longitudinal:latest . + ``` + 3. **Run the Docker Container:** + ```bash + docker run -it scikit_longitudinal:latest /bin/bash + ``` + 4. **Run the tests:** + ```bash + pytest scikit_longitudinal/ --cov=./ --cov-report=html --cov-config=.coveragerc --cov-report=html:htmlcov/scikit_longitudinal -s -vv --capture=no + ``` ## ⚙️ How To Build The Distribution Packages To build the distribution packages for the project, follow these steps: diff --git a/pdm.lock b/pdm.lock index 083ad7f2..08437a1b 100644 --- a/pdm.lock +++ b/pdm.lock @@ -5,7 +5,7 @@ groups = ["default", "doc", "lint", "test"] strategy = ["cross_platform", "inherit_metadata"] lock_version = "4.4.2" -content_hash = "sha256:c1129c24423921fe206e4c4be38c7e4b9d5d8f37da7506940fe29e74d7296d48" +content_hash = "sha256:8af2f61ffab8b573d6ce05c07bcbe24da49c372a20e18f0a73c4787ea828b050" [[package]] name = "aiosignal" @@ -301,13 +301,13 @@ files = [ [[package]] name = "certifi" -version = "2024.6.2" +version = "2024.7.4" requires_python = ">=3.6" summary = "Python package for providing Mozilla's CA Bundle." groups = ["default", "doc", "test"] files = [ - {file = "certifi-2024.6.2-py3-none-any.whl", hash = "sha256:ddc6c8ce995e6987e7faf5e3f1b02b302836a0e5d98ece18392cb1a36c72ad56"}, - {file = "certifi-2024.6.2.tar.gz", hash = "sha256:3cd43f1c6fa7dedc5899d69d3ad0398fd018ad1a17fba83ddaf78aa46c747516"}, + {file = "certifi-2024.7.4-py3-none-any.whl", hash = "sha256:c198e21b1289c2ab85ee4e67bb4b4ef3ead0892059901a8d5b622f24a1101e90"}, + {file = "certifi-2024.7.4.tar.gz", hash = "sha256:5a1e7645bc0ec61a09e26c36f6106dd4cf40c6db3a1fb6352b0244e7fb057c7b"}, ] [[package]] diff --git a/pyproject.toml b/pyproject.toml index 7a10995b..29333a90 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,7 +25,7 @@ dependencies = [ "joblib>=0.11", "deep-forest>=0.1.7", "starboost==0.0.2", - "scikit-lexicographical-trees>=0.0.2", + "scikit-lexicographical-trees==0.0.2", ] requires-python = ">=3.9,<3.10" readme = "README.md" @@ -75,12 +75,12 @@ doc = [ _set_pdm_use_venv = { cmd = "pdm use --venv ${PDM_IN_ENV}" } _use_python39.shell = "pdm use \"${SKLONG_PYTHON_PATH}\"" _use_python39.env_file = ".env" -_create_env.shell = "pdm venv create --with-pip ${SKLONG_PYTHON_VERSION} --force" +_create_env.shell = "pdm venv create --with-pip ${SKLONG_PYTHON_VERSION}" _create_env.env_file = ".env" _check_sklong_vars = {shell = "echo 'Make sure to set the SKLONG_PYTHON_PATH and SKLONG_PYTHON_VERSION environment variables'"} _set_env_var = {shell = "echo 'could you please run `export PDM_IN_ENV=in-project`'"} _activate_env = {cmd = "echo 'could you please activate the environment via `eval $(pdm venv activate (dollar-sign)PDM_IN_ENV)`'"} -setup_sklong = {composite = ["_check_sklong_vars", "_use_python39", "_create_env", "_set_env_var", "_activate_env"]} +setup_sklong = {composite = ["_check_sklong_vars", "_create_env", "_set_env_var", "_activate_env"]} remove_env = { cmd = "pdm venv remove ${PDM_IN_ENV}" } _clean_project = {cmd = "echo could you please deactivate the environment via `conda deactivate` then run `pdm run remove_env`"} clean = {composite = ["_check_pdm_vars", "_clean_project"] } diff --git a/scripts/linux/docker_scikit_longitudinal_installs.sh b/scripts/linux/docker_scikit_longitudinal_installs.sh deleted file mode 100644 index f41adfc9..00000000 --- a/scripts/linux/docker_scikit_longitudinal_installs.sh +++ /dev/null @@ -1,25 +0,0 @@ -#!/bin/bash - -TEMP_PYTHON_PATH=/usr/local/bin/python -if [ ! -f ".env" ]; then - echo "Creating .env file..." - touch .env -fi -if grep -q "SKLONG_PYTHON_PATH" ".env"; then - sed -i "s|SKLONG_PYTHON_PATH=.*|SKLONG_PYTHON_PATH=${TEMP_PYTHON_PATH}|" .env -else - echo "SKLONG_PYTHON_PATH=${TEMP_PYTHON_PATH}" >> .env -fi - -pdm config venv.backend conda -pdm use 3.9 - -pdm run setup_sklong -export PDM_IN_ENV=in-project - -conda init bash -source ~/.bashrc - -eval $(pdm venv activate $PDM_IN_ENV) -pdm run install_prod -pdm run install_dev diff --git a/scripts/linux/docker_start_pdm_env.sh b/scripts/linux/docker_start_pdm_env.sh deleted file mode 100644 index 11556cb6..00000000 --- a/scripts/linux/docker_start_pdm_env.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/bin/bash - -export PDM_IN_ENV=in-project - -conda init bash -source ~/.bashrc - -eval $(pdm venv activate $PDM_IN_ENV) From da4150d752911fd643869dd717098c149dc5540c Mon Sep 17 00:00:00 2001 From: Provost Simon Date: Thu, 4 Jul 2024 03:18:07 +0100 Subject: [PATCH 3/3] refactor(versions): add v0.0.4 Scikit-Longitudinal changelog [cd build] [cd tests] --- CHANGELOG.MD | 34 ++++++++++++++++++++++++++++++---- docs/examples/index.md | 7 ++++++- pyproject.toml | 3 +-- setup.py | 2 +- 4 files changed, 38 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.MD b/CHANGELOG.MD index 990b9bf3..0d592faf 100644 --- a/CHANGELOG.MD +++ b/CHANGELOG.MD @@ -5,13 +5,39 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [v0.0.4] - 2024-07-04 - First Public Release and Major Enhancements + +### Added + +- **Documentation**: Comprehensive new documentation with Material for MKDocs. This includes a detailed tutorial on understanding vectors of waves in longitudinal datasets, a contribution guide, an FAQ section, and complete API references for all estimators, preprocessors, data preparations, and the pipeline manager. +- **Docker Installation**: Added new Docker installation process. +- **Windows Support**: Windows is now supported via Docker. +- **New Classifiers/Regressors**: Introduced Lexico Deep Forest, Lexico Gradient Boosting, and Lexico Decision Tree Regressor. +- **PyPI Availability**: Scikit-Longitudinal is now available on PyPI. +- **Continuous Integration**: Integrated unit testing, documentation, and PyPI publishing within the CI pipeline. + +### Improved + +- **PDM Setup and Installation**: Enhanced setup and installation processes using PDM. +- **Testing Coverage**: Improved testing coverage, ensuring that nearly 90% of the library is tested. +- **Scikit-Lexicographical-Trees**: Extracted the lexicographical scikit-learn tree node splitting function into its own repository and published it to PyPI as Scikit-Lexicographical-Trees. This is now leveraged by our lexico-based estimators. +- **.env Management**: Improved management of environment variables. +- **Lexicographical Enhancements**: Integrated lexicographical enhancements of the waves vector within the variant of scikit-learn, scikit-lexicographical-trees, improving memory and time efficiency by handling algorithmic temporality directly in C++. + +### To-Do + +- **Docstrings Alignment**: Ensure that docstrings in the codebase align with the official documentation to avoid confusion. +- **Native Windows Compatibility**: Achieve Windows compatibility without relying on Docker (requires access to a Windows machine). +- **Future Enhancements**: Ongoing improvements and new features as they are identified. +- **Documentation examples**: Add examples to the documentation to help users understand how to use the library with Jupyter notebooks. + ## [v0.0.3] - 2023-10-31 - Usability, Maintainability, and Compliance Enhancements ### Added - Features Group Missing Waves Handling: Introduced mechanisms for gracefully handling missing waves in features groups. - Readiness Descriptions: New readiness indicators provide detailed descriptions of temporal data management across the library. -- AutoLD Compliance: The library is now compliant with AutoLD standards. +- Auto-Sklong Compliance: The library is now compliant with Auto-Sklong standards. - Package Management Transition: Switched from Poetry to PDM for improved package and dependency management. - Docker Support: Linux-based Docker environment setup for streamlined installation and deployment. - Platform Testing: Library is tested on both Mac and Linux, with Windows support nearing completion. @@ -21,7 +47,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Removed -- Irrelevant Scripts: Removed scripts related to visualisations not core to the library's functionality. +- Irrelevant Scripts: Removed scripts related to visualizations not core to the library's functionality. - Experiments Branch: Moved all experiment-related codes to a dedicated `Experiments` branch. ## [v0.0.2] - 2023-05-17 - Enhanced Longitudinal Analysis and Parallelization Features @@ -47,7 +73,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - CFS per Group for Longitudinal Data: Python implementation with parallelism for better performance. [Unreleased]: https://github.com/simonprovost/scikit-longitudinal/compare/v0.0.3...HEAD +[v0.0.4]: https://github.com/simonprovost/scikit-longitudinal/releases/tag/v0.0.4 [v0.0.3]: https://github.com/simonprovost/scikit-longitudinal/releases/tag/v0.0.3 [v0.0.2]: https://github.com/simonprovost/scikit-longitudinal/releases/tag/v0.0.2 -[v0.0.1]: https://github.com/simonprovost/scikit-longitudinal/releases/tag/v0.0.1 - +[v0.0.1]: https://github.com/simonprovost/scikit-longitudinal/releases/tag/v0.0.1 \ No newline at end of file diff --git a/docs/examples/index.md b/docs/examples/index.md index ea5bad3c..31a9a85a 100644 --- a/docs/examples/index.md +++ b/docs/examples/index.md @@ -6,7 +6,12 @@ hide: # :construction:Coming Soon! # :construction: Coming Soon! -We're currently working on creating a comprehensive examples page for you. This page will feature `Jupyter notebooks` that demonstrate how to use `Scikit-Longitudinal` effectively. +We're currently working on creating a comprehensive examples page for you. +This page will feature `Jupyter notebooks` that demonstrate how to use `Scikit-Longitudinal` effectively. + +!!! note + In the meantime, throughout each estimator, preprocessors, data preparations, and the pipeline manager, + you can find some starting-point examples in the end of each documentation-based page.