diff --git a/README.md b/README.md
index 0e5ae456..b4907e3d 100644
--- a/README.md
+++ b/README.md
@@ -6,7 +6,10 @@ UVVisML
Predict optical properties of molecules with machine learning.
-## Setup
+## Colab Examples
+A Google Colab notebook is available [here](https://colab.research.google.com/github/learningmatter-mit/uvvisml/blob/main/uvvisml_demo.ipynb) with examples of using the various types of models and predictions. Alternatively, you may use the command line instructions below.
+
+## Command Line Setup
0. Install [Anaconda or Miniconda](https://docs.conda.io/projects/continuumio-conda/en/latest/user-guide/install/index.html) if you have not yet done so.
1. `git clone git@github.com:learningmatter-mit/uvvisml.git`
2. `cd uvvisml`
diff --git a/uvvisml_demo.ipynb b/uvvisml_demo.ipynb
new file mode 100644
index 00000000..c9383afc
--- /dev/null
+++ b/uvvisml_demo.ipynb
@@ -0,0 +1,3984 @@
+{
+ "nbformat": 4,
+ "nbformat_minor": 0,
+ "metadata": {
+ "colab": {
+ "name": "uvvisml_demo.ipynb",
+ "provenance": []
+ },
+ "kernelspec": {
+ "name": "python3",
+ "display_name": "Python 3"
+ },
+ "language_info": {
+ "name": "python"
+ }
+ },
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "view-in-github",
+ "colab_type": "text"
+ },
+ "source": [
+ ""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "# Setup"
+ ],
+ "metadata": {
+ "id": "6jZnD-vU9Fd-"
+ }
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "WGZxehx_44Rj",
+ "outputId": "bbf17a4d-3e75-4f9b-ea11-d42c39cad146"
+ },
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "--2022-02-05 05:19:39-- https://repo.anaconda.com/miniconda/Miniconda3-py37_4.8.3-Linux-x86_64.sh\n",
+ "Resolving repo.anaconda.com (repo.anaconda.com)... 104.16.131.3, 104.16.130.3, 2606:4700::6810:8303, ...\n",
+ "Connecting to repo.anaconda.com (repo.anaconda.com)|104.16.131.3|:443... connected.\n",
+ "HTTP request sent, awaiting response... 416 Requested Range Not Satisfiable\n",
+ "\n",
+ " The file is already fully retrieved; nothing to do.\n",
+ "\n",
+ "PREFIX=/usr/local\n",
+ "Unpacking payload ...\n",
+ "Collecting package metadata (current_repodata.json): - \b\b\\ \b\b| \b\b/ \b\b- \b\bdone\n",
+ "Solving environment: | \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \n",
+ "The environment is inconsistent, please check the package plan carefully\n",
+ "The following packages are causing the inconsistency:\n",
+ "\n",
+ " - conda-forge/linux-64::rdkit==2020.09.2=py37h713bca6_0\n",
+ " - conda-forge/noarch::sphinxcontrib-qthelp==1.0.3=py_0\n",
+ " - defaults/noarch::urllib3==1.26.6=pyhd3eb1b0_1\n",
+ " - defaults/linux-64::python==3.7.7=hcff3b4d_5\n",
+ " - defaults/linux-64::ncurses==6.2=he6710b0_1\n",
+ " - conda-forge/noarch::hyperopt==0.2.5=pyh9f0ad1d_0\n",
+ " - conda-forge/linux-64::pcre==8.45=h9c3ff4c_0\n",
+ " - defaults/noarch::tqdm==4.61.2=pyhd3eb1b0_1\n",
+ " - defaults/linux-64::chardet==4.0.0=py37h06a4308_1003\n",
+ " - conda-forge/linux-64::libpng==1.6.37=h21135ba_2\n",
+ " - conda-forge/linux-64::tornado==6.1=py37h5e8e339_1\n",
+ " - conda-forge/linux-64::pymongo==3.10.1=py37h3340039_2\n",
+ " - defaults/linux-64::libmklml==2019.0.5=h06a4308_0\n",
+ " - defaults/linux-64::tk==8.6.10=hbc83047_0\n",
+ " - conda-forge/linux-64::click==8.0.3=py37h89c1867_1\n",
+ " - conda-forge/linux-64::docutils==0.16=py37h89c1867_3\n",
+ " - defaults/linux-64::kiwisolver==1.3.1=py37h2531618_0\n",
+ " - conda-forge/linux-64::reportlab==3.5.68=py37h69800bb_0\n",
+ " - conda-forge/noarch::flask==2.0.2=pyhd8ed1ab_0\n",
+ " - conda-forge/linux-64::bzip2==1.0.8=h7f98852_4\n",
+ " - defaults/noarch::requests==2.25.1=pyhd3eb1b0_0\n",
+ " - conda-forge/linux-64::boost-cpp==1.74.0=h9359b55_0\n",
+ " - conda-forge/linux-64::scikit-learn==0.23.2=py37hddcf8d6_3\n",
+ " - conda-forge/noarch::python-dateutil==2.8.2=pyhd8ed1ab_0\n",
+ " - conda-forge/linux-64::libcblas==3.9.0=5_h92ddd45_netlib\n",
+ " - conda-forge/noarch::werkzeug==2.0.2=pyhd8ed1ab_0\n",
+ " - conda-forge/noarch::snowballstemmer==2.2.0=pyhd8ed1ab_0\n",
+ " - conda-forge/noarch::sphinxcontrib-jsmath==1.0.1=py_0\n",
+ " - conda-forge/linux-64::freetype==2.10.4=h0708190_1\n",
+ " - conda-forge/linux-64::lz4-c==1.9.3=h9c3ff4c_1\n",
+ " - defaults/noarch::pycparser==2.20=py_2\n",
+ " - conda-forge/noarch::pandas-flavor==0.2.0=py_0\n",
+ " - defaults/linux-64::yaml==0.2.5=h7b6447c_0\n",
+ " - conda-forge/noarch::pygments==2.11.2=pyhd8ed1ab_0\n",
+ " - conda-forge/noarch::pyparsing==3.0.7=pyhd8ed1ab_0\n",
+ " - conda-forge/linux-64::liblapack==3.9.0=5_h92ddd45_netlib\n",
+ " - conda-forge/noarch::chemprop==1.4.1=pyhd8ed1ab_0\n",
+ " - defaults/linux-64::libuuid==1.0.3=h7f8727e_2\n",
+ " - conda-forge/linux-64::certifi==2021.10.8=py37h89c1867_1\n",
+ " - conda-forge/linux-64::python_abi==3.7=2_cp37m\n",
+ " - conda-forge/noarch::cloudpickle==2.0.0=pyhd8ed1ab_0\n",
+ " - conda-forge/noarch::networkx==2.6.3=pyhd8ed1ab_1\n",
+ " - conda-forge/noarch::typed-argument-parser==1.7.2=pyhd8ed1ab_0\n",
+ " - conda-forge/linux-64::importlib-metadata==4.10.1=py37h89c1867_0\n",
+ " - conda-forge/noarch::packaging==21.3=pyhd8ed1ab_0\n",
+ " - defaults/linux-64::brotlipy==0.7.0=py37h27cfd23_1003\n",
+ " - defaults/linux-64::pysocks==1.7.1=py37_1\n",
+ " - defaults/linux-64::pip==21.1.3=py37h06a4308_0\n",
+ " - defaults/noarch::pyopenssl==20.0.1=pyhd3eb1b0_1\n",
+ " - conda-forge/noarch::sphinxcontrib-htmlhelp==2.0.0=pyhd8ed1ab_0\n",
+ " - conda-forge/noarch::dataclasses==0.8=pyhc8e2a94_3\n",
+ " - defaults/linux-64::xz==5.2.5=h7b6447c_0\n",
+ " - defaults/linux-64::cairo==1.16.0=hf32fb01_1\n",
+ " - defaults/linux-64::cryptography==3.4.7=py37hd23ed53_0\n",
+ " - conda-forge/noarch::colorama==0.4.4=pyh9f0ad1d_0\n",
+ " - defaults/linux-64::pycosat==0.6.3=py37h27cfd23_0\n",
+ " - conda-forge/linux-64::libiconv==1.16=h516909a_0\n",
+ " - conda-forge/linux-64::pycairo==1.20.1=py37hfff247e_0\n",
+ " - conda-forge/linux-64::pixman==0.40.0=h36c2ea0_0\n",
+ " - conda-forge/noarch::babel==2.9.1=pyh44b312d_0\n",
+ " - conda-forge/linux-64::sqlalchemy==1.3.23=py37h5e8e339_0\n",
+ " - conda-forge/noarch::threadpoolctl==3.1.0=pyh8a188c0_0\n",
+ " - defaults/linux-64::fontconfig==2.13.1=h6c09931_0\n",
+ " - defaults/linux-64::libedit==3.1.20181209=hc058e9b_0\n",
+ " - defaults/linux-64::readline==8.1=h27cfd23_0\n",
+ " - conda-forge/noarch::alabaster==0.7.12=py_0\n",
+ " - conda-forge/noarch::xarray==0.20.2=pyhd8ed1ab_0\n",
+ " - conda-forge/noarch::jinja2==3.0.3=pyhd8ed1ab_0\n",
+ " - conda-forge/noarch::itsdangerous==2.0.1=pyhd8ed1ab_0\n",
+ " - defaults/noarch::six==1.16.0=pyhd3eb1b0_0\n",
+ " - conda-forge/linux-64::zstd==1.4.9=ha95c52a_0\n",
+ " - defaults/linux-64::libxcb==1.14=h7b6447c_0\n",
+ " - defaults/noarch::wheel==0.36.2=pyhd3eb1b0_0\n",
+ " - conda-forge/noarch::typing_inspect==0.7.1=pyh6c4a22f_0\n",
+ " - defaults/linux-64::libstdcxx-ng==9.3.0=hd4cf53a_17\n",
+ " - defaults/linux-64::libgcc-ng==9.3.0=h5101ec6_17\n",
+ " - conda-forge/linux-64::pandas==1.2.3=py37hdc94413_0\n",
+ " - conda-forge/noarch::joblib==1.1.0=pyhd8ed1ab_0\n",
+ " - conda-forge/linux-64::libblas==3.9.0=1_h86c2bf4_netlib\n",
+ " - conda-forge/noarch::sphinxcontrib-applehelp==1.0.2=py_0\n",
+ " - conda-forge/linux-64::conda==4.11.0=py37h89c1867_0\n",
+ " - conda-forge/linux-64::scipy==1.5.3=py37h14a347d_0\n",
+ " - conda-forge/noarch::importlib_metadata==4.10.1=hd8ed1ab_0\n",
+ " - conda-forge/noarch::sphinx==4.4.0=pyh6c4a22f_1\n",
+ " - conda-forge/linux-64::mypy_extensions==0.4.3=py37h89c1867_4\n",
+ " - conda-forge/linux-64::protobuf==3.15.8=py37hcd2ae1e_0\n",
+ " - conda-forge/linux-64::boost==1.74.0=py37h6dcda5c_3\n",
+ " - conda-forge/noarch::sphinxcontrib-serializinghtml==1.1.5=pyhd8ed1ab_1\n",
+ " - defaults/linux-64::libffi==3.3=he6710b0_2\n",
+ " - conda-forge/noarch::olefile==0.46=pyh9f0ad1d_1\n",
+ " - conda-forge/linux-64::matplotlib-base==3.3.4=py37h0c9df89_0\n",
+ " - defaults/linux-64::jpeg==9d=h7f8727e_0\n",
+ " - defaults/linux-64::pytorch==1.8.1=cpu_py37h60491be_0\n",
+ " - conda-forge/noarch::pytz==2021.3=pyhd8ed1ab_0\n",
+ " - conda-forge/linux-64::openssl==1.1.1k=h7f98852_0\n",
+ " - conda-forge/linux-64::libxml2==2.9.10=h68273f3_2\n",
+ " - conda-forge/linux-64::numpy==1.20.3=py37h038b26d_1\n",
+ " - conda-forge/linux-64::ninja==1.10.2=h4bd325d_0\n",
+ " - conda-forge/noarch::typing-extensions==4.0.1=hd8ed1ab_0\n",
+ " - conda-forge/linux-64::libopenblas==0.3.17=pthreads_h8fe5266_1\n",
+ " - defaults/noarch::idna==2.10=pyhd3eb1b0_0\n",
+ " - conda-forge/linux-64::libprotobuf==3.15.8=h780b84a_0\n",
+ " - defaults/linux-64::ruamel_yaml==0.15.100=py37h27cfd23_0\n",
+ " - defaults/linux-64::glib==2.69.1=h4ff587b_1\n",
+ " - conda-forge/noarch::cycler==0.11.0=pyhd8ed1ab_0\n",
+ " - conda-forge/noarch::tensorboardx==2.4=pyhd8ed1ab_0\n",
+ " - conda-forge/linux-64::markupsafe==2.0.1=py37h5e8e339_0\n",
+ " - conda-forge/noarch::zipp==3.7.0=pyhd8ed1ab_1\n",
+ " - defaults/linux-64::zlib==1.2.11=h7b6447c_3\n",
+ " - conda-forge/linux-64::icu==67.1=he1b5a44_0\n",
+ " - conda-forge/noarch::sphinxcontrib-devhelp==1.0.2=py_0\n",
+ " - conda-forge/linux-64::pillow==6.2.1=py37h6b7be26_0\n",
+ " - defaults/linux-64::cffi==1.14.6=py37h400218f_0\n",
+ " - defaults/linux-64::sqlite==3.36.0=hc218d9a_0\n",
+ " - conda-forge/linux-64::libtiff==4.0.10=hc3755c2_1005\n",
+ " - defaults/linux-64::conda-package-handling==1.7.3=py37h27cfd23_1\n",
+ " - defaults/linux-64::setuptools==52.0.0=py37h06a4308_0\n",
+ " - conda-forge/linux-64::future==0.18.2=py37h89c1867_4\n",
+ " - conda-forge/noarch::imagesize==1.3.0=pyhd8ed1ab_0\n",
+ " - conda-forge/noarch::typing_extensions==4.0.1=pyha770c72_0\n",
+ "\b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\b| \b\b/ \b\b- \b\b\\ \b\bdone\n",
+ "\n",
+ "## Package Plan ##\n",
+ "\n",
+ " environment location: /usr/local\n",
+ "\n",
+ " added / updated specs:\n",
+ " - _libgcc_mutex==0.1=main\n",
+ " - ca-certificates==2020.1.1=0\n",
+ " - certifi==2020.4.5.1=py37_0\n",
+ " - cffi==1.14.0=py37he30daa8_1\n",
+ " - chardet==3.0.4=py37_1003\n",
+ " - conda-package-handling==1.6.1=py37h7b6447c_0\n",
+ " - conda==4.8.3=py37_0\n",
+ " - cryptography==2.9.2=py37h1ba5d50_0\n",
+ " - idna==2.9=py_1\n",
+ " - ld_impl_linux-64==2.33.1=h53a641e_7\n",
+ " - libedit==3.1.20181209=hc058e9b_0\n",
+ " - libffi==3.3=he6710b0_1\n",
+ " - libgcc-ng==9.1.0=hdf63c60_0\n",
+ " - libstdcxx-ng==9.1.0=hdf63c60_0\n",
+ " - ncurses==6.2=he6710b0_1\n",
+ " - openssl==1.1.1g=h7b6447c_0\n",
+ " - pip==20.0.2=py37_3\n",
+ " - pycosat==0.6.3=py37h7b6447c_0\n",
+ " - pycparser==2.20=py_0\n",
+ " - pyopenssl==19.1.0=py37_0\n",
+ " - pysocks==1.7.1=py37_0\n",
+ " - python==3.7.7=hcff3b4d_5\n",
+ " - readline==8.0=h7b6447c_0\n",
+ " - requests==2.23.0=py37_0\n",
+ " - ruamel_yaml==0.15.87=py37h7b6447c_0\n",
+ " - setuptools==46.4.0=py37_0\n",
+ " - six==1.14.0=py37_0\n",
+ " - sqlite==3.31.1=h62c20be_1\n",
+ " - tk==8.6.8=hbc83047_0\n",
+ " - tqdm==4.46.0=py_0\n",
+ " - urllib3==1.25.8=py37_0\n",
+ " - wheel==0.34.2=py37_0\n",
+ " - xz==5.2.5=h7b6447c_0\n",
+ " - yaml==0.1.7=had09818_2\n",
+ " - zlib==1.2.11=h7b6447c_3\n",
+ "\n",
+ "\n",
+ "The following packages will be UPDATED:\n",
+ "\n",
+ " rdkit 2020.09.2-py37h713bca6_0 --> 2020.09.5-py37he53b9e1_0\n",
+ "\n",
+ "The following packages will be SUPERSEDED by a higher-priority channel:\n",
+ "\n",
+ " ca-certificates conda-forge::ca-certificates-2021.10.~ --> pkgs/main::ca-certificates-2020.1.1-0\n",
+ "\n",
+ "\n",
+ "Preparing transaction: / \b\b- \b\bdone\n",
+ "Executing transaction: | \b\b/ \b\b- \b\bdone\n",
+ "installation finished.\n",
+ "WARNING:\n",
+ " You currently have a PYTHONPATH environment variable set. This may cause\n",
+ " unexpected behavior when running the Python interpreter in Miniconda3.\n",
+ " For best results, please verify that your PYTHONPATH only points to\n",
+ " directories of packages that are compatible with the Python interpreter\n",
+ " in Miniconda3: /usr/local\n",
+ "Collecting package metadata (current_repodata.json): ...working... done\n",
+ "Solving environment: ...working... failed with initial frozen solve. Retrying with flexible solve.\n",
+ "Collecting package metadata (repodata.json): ...working... done\n",
+ "Solving environment: ...working... done\n",
+ "\n",
+ "## Package Plan ##\n",
+ "\n",
+ " environment location: /usr/local\n",
+ "\n",
+ " added / updated specs:\n",
+ " - rdkit==2020.09.2\n",
+ "\n",
+ "\n",
+ "The following packages will be UPDATED:\n",
+ "\n",
+ " ca-certificates pkgs/main::ca-certificates-2020.1.1-0 --> conda-forge::ca-certificates-2021.10.8-ha878542_0\n",
+ "\n",
+ "The following packages will be DOWNGRADED:\n",
+ "\n",
+ " rdkit 2020.09.5-py37he53b9e1_0 --> 2020.09.2-py37h713bca6_0\n",
+ "\n",
+ "\n",
+ "Preparing transaction: ...working... done\n",
+ "Verifying transaction: ...working... done\n",
+ "Executing transaction: ...working... done\n",
+ "Collecting package metadata (current_repodata.json): ...working... done\n",
+ "Solving environment: ...working... done\n",
+ "\n",
+ "# All requested packages already installed.\n",
+ "\n",
+ "fatal: destination path 'uvvisml' already exists and is not an empty directory.\n",
+ "--2022-02-05 05:24:58-- https://zenodo.org/record/5573027/files/models.tar.gz\n",
+ "Resolving zenodo.org (zenodo.org)... 137.138.76.77\n",
+ "Connecting to zenodo.org (zenodo.org)|137.138.76.77|:443... connected.\n",
+ "HTTP request sent, awaiting response... 200 OK\n",
+ "Length: 651010218 (621M) [application/octet-stream]\n",
+ "Saving to: ‘models.tar.gz’\n",
+ "\n",
+ "models.tar.gz 100%[===================>] 620.85M 7.17MB/s in 1m 55s \n",
+ "\n",
+ "2022-02-05 05:26:55 (5.39 MB/s) - ‘models.tar.gz’ saved [651010218/651010218]\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "!wget -c https://repo.anaconda.com/miniconda/Miniconda3-py37_4.8.3-Linux-x86_64.sh\n",
+ "!chmod +x Miniconda3-py37_4.8.3-Linux-x86_64.sh\n",
+ "!bash ./Miniconda3-py37_4.8.3-Linux-x86_64.sh -b -f -p /usr/local\n",
+ "!conda install -q -y -c conda-forge rdkit==2020.09.2\n",
+ "!conda install -q -y -c conda-forge chemprop\n",
+ "!git clone https://github.com/learningmatter-mit/uvvisml\n",
+ "\n",
+ "import os\n",
+ "import sys\n",
+ "import pandas as pd\n",
+ "sys.path.append('/usr/local/lib/python3.7/site-packages/')\n",
+ "import chemprop\n",
+ "\n",
+ "os.chdir('uvvisml/uvvisml')\n",
+ "\n",
+ "!bash get_model_files.sh"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "# Data"
+ ],
+ "metadata": {
+ "id": "MQ67V6hr_6Yz"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "test_file = 'data/splits/lambda_max_abs/deep4chem/group_by_smiles/smiles_target_test.csv'\n",
+ "df = pd.read_csv(test_file)\n",
+ "df"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 424
+ },
+ "id": "pqtogu2T7OTd",
+ "outputId": "e85776ac-c71d-45cd-def3-61f1cc5d4cb5"
+ },
+ "execution_count": 3,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " smiles | \n",
+ " solvent | \n",
+ " peakwavs_max | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " CCN(CC)c1ccc2c(C(F)(F)F)cc(=O)oc2c1 | \n",
+ " C1CCCCC1 | \n",
+ " 376.0 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " CCN(CC)c1ccc2c(C(F)(F)F)cc(=O)oc2c1 | \n",
+ " CCOC(C)=O | \n",
+ " 392.0 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " CCN(CC)c1ccc2c(C(F)(F)F)cc(=O)oc2c1 | \n",
+ " CC#N | \n",
+ " 396.0 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " CCN(CC)c1ccc2c(C(F)(F)F)cc(=O)oc2c1 | \n",
+ " CCO | \n",
+ " 400.0 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " CCN(CC)c1ccc2c(C(F)(F)F)cc(=O)oc2c1 | \n",
+ " OCC(O)CO | \n",
+ " 413.0 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 1705 | \n",
+ " c1cc2c3ccc[n+]4cccc(c5ccc[n+](c1)c25)c34 | \n",
+ " C[N+](=O)[O-] | \n",
+ " 424.0 | \n",
+ "
\n",
+ " \n",
+ " 1706 | \n",
+ " c1cc2c3ccc[n+]4cccc(c5ccc[n+](c1)c25)c34 | \n",
+ " CS(C)=O | \n",
+ " 432.0 | \n",
+ "
\n",
+ " \n",
+ " 1707 | \n",
+ " COc1cc(C)c(-c2cc(-c3c(C)cc(OC)cc3C)c3ccc4c(-c5... | \n",
+ " ClCCl | \n",
+ " 367.0 | \n",
+ "
\n",
+ " \n",
+ " 1708 | \n",
+ " N#Cc1c(N2CCCCC2)cc(-c2cccc3ccccc23)c2c1-c1cccc... | \n",
+ " C1CCOC1 | \n",
+ " 358.0 | \n",
+ "
\n",
+ " \n",
+ " 1709 | \n",
+ " N#Cc1c(N2CCCC2)cc(-c2ccccc2)c2c1Cc1ccccc1-2 | \n",
+ " C1CCOC1 | \n",
+ " 382.0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
1710 rows × 3 columns
\n",
+ "
\n",
+ "
\n",
+ " \n",
+ " \n",
+ "\n",
+ " \n",
+ "
\n",
+ "
\n",
+ " "
+ ],
+ "text/plain": [
+ " smiles ... peakwavs_max\n",
+ "0 CCN(CC)c1ccc2c(C(F)(F)F)cc(=O)oc2c1 ... 376.0\n",
+ "1 CCN(CC)c1ccc2c(C(F)(F)F)cc(=O)oc2c1 ... 392.0\n",
+ "2 CCN(CC)c1ccc2c(C(F)(F)F)cc(=O)oc2c1 ... 396.0\n",
+ "3 CCN(CC)c1ccc2c(C(F)(F)F)cc(=O)oc2c1 ... 400.0\n",
+ "4 CCN(CC)c1ccc2c(C(F)(F)F)cc(=O)oc2c1 ... 413.0\n",
+ "... ... ... ...\n",
+ "1705 c1cc2c3ccc[n+]4cccc(c5ccc[n+](c1)c25)c34 ... 424.0\n",
+ "1706 c1cc2c3ccc[n+]4cccc(c5ccc[n+](c1)c25)c34 ... 432.0\n",
+ "1707 COc1cc(C)c(-c2cc(-c3c(C)cc(OC)cc3C)c3ccc4c(-c5... ... 367.0\n",
+ "1708 N#Cc1c(N2CCCCC2)cc(-c2cccc3ccccc23)c2c1-c1cccc... ... 358.0\n",
+ "1709 N#Cc1c(N2CCCC2)cc(-c2ccccc2)c2c1Cc1ccccc1-2 ... 382.0\n",
+ "\n",
+ "[1710 rows x 3 columns]"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 3
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "# Make Predictions"
+ ],
+ "metadata": {
+ "id": "WZktZ7DsAAMn"
+ }
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "## Predict experimental peak with model trained on combined training set"
+ ],
+ "metadata": {
+ "id": "9qWycAVkKlS0"
+ }
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "**Equivalent to command line:**\n",
+ "\n",
+ "python uvvisml/predict.py --test_file uvvisml/data/splits/lambda_max_abs/deep4chem/group_by_smiles/smiles_target_test.csv --property absorption_peak_nm_expt --method chemprop --preds_file test_preds.csv"
+ ],
+ "metadata": {
+ "id": "-g-bD5g6JCCy"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "arguments = [\n",
+ " '--test_path', test_file,\n",
+ " '--preds_path', '/dev/null',\n",
+ " '--checkpoint_dir', 'models/lambda_max_abs/chemprop/combined/production/fold_0',\n",
+ " '--number_of_molecules', '2',\n",
+ " #'--gpu', '0'\n",
+ "]\n",
+ "\n",
+ "args = chemprop.args.PredictArgs().parse_args(arguments)\n",
+ "preds = chemprop.train.make_predictions(args=args)\n",
+ "\n",
+ "preds = [x[0] for x in preds]\n",
+ "df['peakwavs_max_pred'] = preds\n",
+ "df"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 1000
+ },
+ "id": "5a1-7UXcJCss",
+ "outputId": "eaedecae-627b-4006-e481-7eefc5df90b5"
+ },
+ "execution_count": 4,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Loading training args\n",
+ "Setting molecule featurization parameters to default.\n",
+ "Loading data\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "1710it [00:00, 126450.28it/s]\n",
+ "100%|██████████| 1710/1710 [00:00<00:00, 221927.71it/s]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Validating SMILES\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\n",
+ "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py:481: UserWarning: This DataLoader will create 8 worker processes in total. Our suggested max number of worker in current system is 2, which is smaller than what this DataLoader is going to create. Please be aware that excessive worker creation might get DataLoader running slow or even freeze, lower the worker number to avoid potential slowness/freeze if necessary.\n",
+ " cpuset_checked))\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Test size = 1,710\n",
+ "Predicting with an ensemble of 5 models\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\r 0%| | 0/5 [00:00, ?it/s]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Loading pretrained parameter \"encoder.encoder.0.cached_zero_vector\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_i.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_h.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_o.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_o.bias\".\n",
+ "Loading pretrained parameter \"encoder.encoder.1.cached_zero_vector\".\n",
+ "Loading pretrained parameter \"encoder.encoder.1.W_i.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.1.W_h.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.1.W_o.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.1.W_o.bias\".\n",
+ "Loading pretrained parameter \"ffn.1.weight\".\n",
+ "Loading pretrained parameter \"ffn.1.bias\".\n",
+ "Loading pretrained parameter \"ffn.4.weight\".\n",
+ "Loading pretrained parameter \"ffn.4.bias\".\n",
+ "Loading pretrained parameter \"ffn.7.weight\".\n",
+ "Loading pretrained parameter \"ffn.7.bias\".\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\n",
+ " 0%| | 0/35 [00:00, ?it/s]\u001b[A\n",
+ " 3%|▎ | 1/35 [00:01<00:49, 1.46s/it]\u001b[A\n",
+ " 6%|▌ | 2/35 [00:02<00:34, 1.05s/it]\u001b[A\n",
+ " 9%|▊ | 3/35 [00:02<00:19, 1.61it/s]\u001b[A\n",
+ " 11%|█▏ | 4/35 [00:02<00:13, 2.26it/s]\u001b[A\n",
+ " 14%|█▍ | 5/35 [00:02<00:09, 3.04it/s]\u001b[A\n",
+ " 17%|█▋ | 6/35 [00:02<00:07, 3.71it/s]\u001b[A\n",
+ " 20%|██ | 7/35 [00:02<00:06, 4.03it/s]\u001b[A\n",
+ " 23%|██▎ | 8/35 [00:03<00:05, 4.68it/s]\u001b[A\n",
+ " 26%|██▌ | 9/35 [00:03<00:04, 5.34it/s]\u001b[A\n",
+ " 29%|██▊ | 10/35 [00:03<00:04, 6.22it/s]\u001b[A\n",
+ " 31%|███▏ | 11/35 [00:03<00:03, 6.01it/s]\u001b[A\n",
+ " 34%|███▍ | 12/35 [00:03<00:04, 4.70it/s]\u001b[A\n",
+ " 37%|███▋ | 13/35 [00:04<00:07, 2.85it/s]\u001b[A\n",
+ " 40%|████ | 14/35 [00:05<00:09, 2.13it/s]\u001b[A\n",
+ " 43%|████▎ | 15/35 [00:05<00:10, 1.90it/s]\u001b[A\n",
+ " 46%|████▌ | 16/35 [00:06<00:08, 2.35it/s]\u001b[A\n",
+ " 49%|████▊ | 17/35 [00:06<00:07, 2.46it/s]\u001b[A\n",
+ " 51%|█████▏ | 18/35 [00:06<00:07, 2.37it/s]\u001b[A\n",
+ " 54%|█████▍ | 19/35 [00:07<00:07, 2.28it/s]\u001b[A\n",
+ " 57%|█████▋ | 20/35 [00:07<00:06, 2.33it/s]\u001b[A\n",
+ " 60%|██████ | 21/35 [00:08<00:05, 2.70it/s]\u001b[A\n",
+ " 63%|██████▎ | 22/35 [00:08<00:04, 3.21it/s]\u001b[A\n",
+ " 66%|██████▌ | 23/35 [00:08<00:03, 3.42it/s]\u001b[A\n",
+ " 69%|██████▊ | 24/35 [00:08<00:02, 4.17it/s]\u001b[A\n",
+ " 71%|███████▏ | 25/35 [00:08<00:02, 4.96it/s]\u001b[A\n",
+ " 74%|███████▍ | 26/35 [00:08<00:01, 5.78it/s]\u001b[A\n",
+ " 80%|████████ | 28/35 [00:09<00:01, 7.00it/s]\u001b[A\n",
+ " 83%|████████▎ | 29/35 [00:09<00:00, 7.12it/s]\u001b[A\n",
+ " 86%|████████▌ | 30/35 [00:09<00:00, 7.68it/s]\u001b[A\n",
+ " 89%|████████▊ | 31/35 [00:09<00:00, 7.75it/s]\u001b[A\n",
+ " 91%|█████████▏| 32/35 [00:09<00:00, 7.78it/s]\u001b[A\n",
+ " 94%|█████████▍| 33/35 [00:09<00:00, 8.18it/s]\u001b[A\n",
+ "100%|██████████| 35/35 [00:09<00:00, 10.62it/s]\u001b[A\n",
+ " 20%|██ | 1/5 [00:10<00:40, 10.13s/it]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Loading pretrained parameter \"encoder.encoder.0.cached_zero_vector\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_i.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_h.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_o.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_o.bias\".\n",
+ "Loading pretrained parameter \"encoder.encoder.1.cached_zero_vector\".\n",
+ "Loading pretrained parameter \"encoder.encoder.1.W_i.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.1.W_h.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.1.W_o.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.1.W_o.bias\".\n",
+ "Loading pretrained parameter \"ffn.1.weight\".\n",
+ "Loading pretrained parameter \"ffn.1.bias\".\n",
+ "Loading pretrained parameter \"ffn.4.weight\".\n",
+ "Loading pretrained parameter \"ffn.4.bias\".\n",
+ "Loading pretrained parameter \"ffn.7.weight\".\n",
+ "Loading pretrained parameter \"ffn.7.bias\".\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\n",
+ " 0%| | 0/35 [00:00, ?it/s]\u001b[A\n",
+ " 3%|▎ | 1/35 [00:02<01:22, 2.43s/it]\u001b[A\n",
+ " 6%|▌ | 2/35 [00:02<00:39, 1.19s/it]\u001b[A\n",
+ " 9%|▊ | 3/35 [00:02<00:22, 1.43it/s]\u001b[A\n",
+ " 11%|█▏ | 4/35 [00:03<00:15, 2.06it/s]\u001b[A\n",
+ " 14%|█▍ | 5/35 [00:03<00:10, 2.81it/s]\u001b[A\n",
+ " 17%|█▋ | 6/35 [00:03<00:08, 3.54it/s]\u001b[A\n",
+ " 20%|██ | 7/35 [00:03<00:07, 3.88it/s]\u001b[A\n",
+ " 23%|██▎ | 8/35 [00:03<00:05, 4.72it/s]\u001b[A\n",
+ " 26%|██▌ | 9/35 [00:03<00:04, 5.39it/s]\u001b[A\n",
+ " 29%|██▊ | 10/35 [00:03<00:04, 5.64it/s]\u001b[A\n",
+ " 31%|███▏ | 11/35 [00:04<00:04, 5.99it/s]\u001b[A\n",
+ " 34%|███▍ | 12/35 [00:04<00:03, 5.93it/s]\u001b[A\n",
+ " 37%|███▋ | 13/35 [00:04<00:04, 5.17it/s]\u001b[A\n",
+ " 40%|████ | 14/35 [00:04<00:03, 5.84it/s]\u001b[A\n",
+ " 43%|████▎ | 15/35 [00:04<00:03, 5.53it/s]\u001b[A\n",
+ " 46%|████▌ | 16/35 [00:04<00:03, 6.05it/s]\u001b[A\n",
+ " 49%|████▊ | 17/35 [00:05<00:02, 6.00it/s]\u001b[A\n",
+ " 51%|█████▏ | 18/35 [00:05<00:02, 5.78it/s]\u001b[A\n",
+ " 54%|█████▍ | 19/35 [00:05<00:02, 6.01it/s]\u001b[A\n",
+ " 57%|█████▋ | 20/35 [00:05<00:02, 6.28it/s]\u001b[A\n",
+ " 63%|██████▎ | 22/35 [00:05<00:01, 8.17it/s]\u001b[A\n",
+ " 66%|██████▌ | 23/35 [00:05<00:01, 7.96it/s]\u001b[A\n",
+ " 71%|███████▏ | 25/35 [00:06<00:01, 9.70it/s]\u001b[A\n",
+ " 74%|███████▍ | 26/35 [00:06<00:00, 9.15it/s]\u001b[A\n",
+ " 80%|████████ | 28/35 [00:06<00:00, 9.18it/s]\u001b[A\n",
+ " 83%|████████▎ | 29/35 [00:06<00:00, 8.92it/s]\u001b[A\n",
+ " 89%|████████▊ | 31/35 [00:06<00:00, 9.34it/s]\u001b[A\n",
+ " 91%|█████████▏| 32/35 [00:06<00:00, 9.15it/s]\u001b[A\n",
+ " 94%|█████████▍| 33/35 [00:06<00:00, 9.07it/s]\u001b[A\n",
+ " 97%|█████████▋| 34/35 [00:07<00:00, 9.11it/s]\u001b[A\n",
+ " 40%|████ | 2/5 [00:17<00:25, 8.49s/it]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Loading pretrained parameter \"encoder.encoder.0.cached_zero_vector\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_i.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_h.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_o.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_o.bias\".\n",
+ "Loading pretrained parameter \"encoder.encoder.1.cached_zero_vector\".\n",
+ "Loading pretrained parameter \"encoder.encoder.1.W_i.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.1.W_h.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.1.W_o.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.1.W_o.bias\".\n",
+ "Loading pretrained parameter \"ffn.1.weight\".\n",
+ "Loading pretrained parameter \"ffn.1.bias\".\n",
+ "Loading pretrained parameter \"ffn.4.weight\".\n",
+ "Loading pretrained parameter \"ffn.4.bias\".\n",
+ "Loading pretrained parameter \"ffn.7.weight\".\n",
+ "Loading pretrained parameter \"ffn.7.bias\".\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\n",
+ " 0%| | 0/35 [00:00, ?it/s]\u001b[A\n",
+ " 3%|▎ | 1/35 [00:01<00:51, 1.51s/it]\u001b[A\n",
+ " 6%|▌ | 2/35 [00:01<00:24, 1.33it/s]\u001b[A\n",
+ " 9%|▊ | 3/35 [00:01<00:15, 2.13it/s]\u001b[A\n",
+ " 11%|█▏ | 4/35 [00:02<00:11, 2.77it/s]\u001b[A\n",
+ " 14%|█▍ | 5/35 [00:02<00:08, 3.64it/s]\u001b[A\n",
+ " 17%|█▋ | 6/35 [00:02<00:06, 4.23it/s]\u001b[A\n",
+ " 20%|██ | 7/35 [00:02<00:06, 4.32it/s]\u001b[A\n",
+ " 23%|██▎ | 8/35 [00:02<00:05, 5.06it/s]\u001b[A\n",
+ " 26%|██▌ | 9/35 [00:02<00:04, 5.62it/s]\u001b[A\n",
+ " 29%|██▊ | 10/35 [00:02<00:03, 6.30it/s]\u001b[A\n",
+ " 31%|███▏ | 11/35 [00:03<00:03, 6.90it/s]\u001b[A\n",
+ " 34%|███▍ | 12/35 [00:03<00:04, 5.63it/s]\u001b[A\n",
+ " 37%|███▋ | 13/35 [00:03<00:04, 5.42it/s]\u001b[A\n",
+ " 40%|████ | 14/35 [00:03<00:03, 5.39it/s]\u001b[A\n",
+ " 43%|████▎ | 15/35 [00:03<00:03, 5.58it/s]\u001b[A\n",
+ " 46%|████▌ | 16/35 [00:03<00:03, 6.11it/s]\u001b[A\n",
+ " 49%|████▊ | 17/35 [00:04<00:03, 5.87it/s]\u001b[A\n",
+ " 51%|█████▏ | 18/35 [00:04<00:03, 5.66it/s]\u001b[A\n",
+ " 54%|█████▍ | 19/35 [00:04<00:02, 6.06it/s]\u001b[A\n",
+ " 57%|█████▋ | 20/35 [00:04<00:02, 5.85it/s]\u001b[A\n",
+ " 63%|██████▎ | 22/35 [00:04<00:01, 7.68it/s]\u001b[A\n",
+ " 66%|██████▌ | 23/35 [00:04<00:01, 7.78it/s]\u001b[A\n",
+ " 71%|███████▏ | 25/35 [00:05<00:01, 9.49it/s]\u001b[A\n",
+ " 77%|███████▋ | 27/35 [00:05<00:00, 10.03it/s]\u001b[A\n",
+ " 83%|████████▎ | 29/35 [00:05<00:00, 9.11it/s]\u001b[A\n",
+ " 89%|████████▊ | 31/35 [00:05<00:00, 9.64it/s]\u001b[A\n",
+ " 91%|█████████▏| 32/35 [00:05<00:00, 9.41it/s]\u001b[A\n",
+ " 94%|█████████▍| 33/35 [00:05<00:00, 9.35it/s]\u001b[A\n",
+ "100%|██████████| 35/35 [00:06<00:00, 11.24it/s]\u001b[A\n",
+ " 60%|██████ | 3/5 [00:23<00:15, 7.59s/it]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Loading pretrained parameter \"encoder.encoder.0.cached_zero_vector\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_i.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_h.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_o.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_o.bias\".\n",
+ "Loading pretrained parameter \"encoder.encoder.1.cached_zero_vector\".\n",
+ "Loading pretrained parameter \"encoder.encoder.1.W_i.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.1.W_h.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.1.W_o.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.1.W_o.bias\".\n",
+ "Loading pretrained parameter \"ffn.1.weight\".\n",
+ "Loading pretrained parameter \"ffn.1.bias\".\n",
+ "Loading pretrained parameter \"ffn.4.weight\".\n",
+ "Loading pretrained parameter \"ffn.4.bias\".\n",
+ "Loading pretrained parameter \"ffn.7.weight\".\n",
+ "Loading pretrained parameter \"ffn.7.bias\".\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\n",
+ " 0%| | 0/35 [00:00, ?it/s]\u001b[A\n",
+ " 3%|▎ | 1/35 [00:01<00:49, 1.47s/it]\u001b[A\n",
+ " 6%|▌ | 2/35 [00:01<00:26, 1.26it/s]\u001b[A\n",
+ " 9%|▊ | 3/35 [00:01<00:15, 2.04it/s]\u001b[A\n",
+ " 11%|█▏ | 4/35 [00:02<00:11, 2.75it/s]\u001b[A\n",
+ " 14%|█▍ | 5/35 [00:02<00:08, 3.60it/s]\u001b[A\n",
+ " 17%|█▋ | 6/35 [00:02<00:06, 4.16it/s]\u001b[A\n",
+ " 20%|██ | 7/35 [00:02<00:06, 4.39it/s]\u001b[A\n",
+ " 23%|██▎ | 8/35 [00:02<00:05, 5.16it/s]\u001b[A\n",
+ " 26%|██▌ | 9/35 [00:02<00:04, 5.76it/s]\u001b[A\n",
+ " 29%|██▊ | 10/35 [00:02<00:03, 6.54it/s]\u001b[A\n",
+ " 31%|███▏ | 11/35 [00:03<00:03, 6.67it/s]\u001b[A\n",
+ " 34%|███▍ | 12/35 [00:03<00:03, 6.44it/s]\u001b[A\n",
+ " 37%|███▋ | 13/35 [00:03<00:03, 5.90it/s]\u001b[A\n",
+ " 40%|████ | 14/35 [00:03<00:03, 5.38it/s]\u001b[A\n",
+ " 43%|████▎ | 15/35 [00:03<00:03, 5.15it/s]\u001b[A\n",
+ " 46%|████▌ | 16/35 [00:04<00:03, 5.85it/s]\u001b[A\n",
+ " 49%|████▊ | 17/35 [00:04<00:03, 5.85it/s]\u001b[A\n",
+ " 51%|█████▏ | 18/35 [00:04<00:02, 5.82it/s]\u001b[A\n",
+ " 54%|█████▍ | 19/35 [00:04<00:02, 5.67it/s]\u001b[A\n",
+ " 57%|█████▋ | 20/35 [00:04<00:02, 6.19it/s]\u001b[A\n",
+ " 63%|██████▎ | 22/35 [00:04<00:01, 7.58it/s]\u001b[A\n",
+ " 66%|██████▌ | 23/35 [00:05<00:01, 7.44it/s]\u001b[A\n",
+ " 71%|███████▏ | 25/35 [00:05<00:01, 8.96it/s]\u001b[A\n",
+ " 77%|███████▋ | 27/35 [00:05<00:00, 9.53it/s]\u001b[A\n",
+ " 80%|████████ | 28/35 [00:05<00:00, 8.83it/s]\u001b[A\n",
+ " 83%|████████▎ | 29/35 [00:05<00:00, 8.44it/s]\u001b[A\n",
+ " 86%|████████▌ | 30/35 [00:05<00:00, 8.55it/s]\u001b[A\n",
+ " 89%|████████▊ | 31/35 [00:05<00:00, 8.54it/s]\u001b[A\n",
+ " 91%|█████████▏| 32/35 [00:05<00:00, 8.40it/s]\u001b[A\n",
+ " 94%|█████████▍| 33/35 [00:06<00:00, 8.62it/s]\u001b[A\n",
+ " 97%|█████████▋| 34/35 [00:06<00:00, 8.74it/s]\u001b[A\n",
+ " 80%|████████ | 4/5 [00:30<00:07, 7.17s/it]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Loading pretrained parameter \"encoder.encoder.0.cached_zero_vector\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_i.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_h.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_o.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_o.bias\".\n",
+ "Loading pretrained parameter \"encoder.encoder.1.cached_zero_vector\".\n",
+ "Loading pretrained parameter \"encoder.encoder.1.W_i.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.1.W_h.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.1.W_o.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.1.W_o.bias\".\n",
+ "Loading pretrained parameter \"ffn.1.weight\".\n",
+ "Loading pretrained parameter \"ffn.1.bias\".\n",
+ "Loading pretrained parameter \"ffn.4.weight\".\n",
+ "Loading pretrained parameter \"ffn.4.bias\".\n",
+ "Loading pretrained parameter \"ffn.7.weight\".\n",
+ "Loading pretrained parameter \"ffn.7.bias\".\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\n",
+ " 0%| | 0/35 [00:00, ?it/s]\u001b[A\n",
+ " 3%|▎ | 1/35 [00:01<00:50, 1.48s/it]\u001b[A\n",
+ " 6%|▌ | 2/35 [00:01<00:26, 1.25it/s]\u001b[A\n",
+ " 9%|▊ | 3/35 [00:01<00:15, 2.05it/s]\u001b[A\n",
+ " 11%|█▏ | 4/35 [00:02<00:11, 2.77it/s]\u001b[A\n",
+ " 14%|█▍ | 5/35 [00:02<00:08, 3.62it/s]\u001b[A\n",
+ " 17%|█▋ | 6/35 [00:02<00:06, 4.27it/s]\u001b[A\n",
+ " 20%|██ | 7/35 [00:02<00:05, 4.68it/s]\u001b[A\n",
+ " 23%|██▎ | 8/35 [00:02<00:05, 5.19it/s]\u001b[A\n",
+ " 26%|██▌ | 9/35 [00:02<00:04, 5.76it/s]\u001b[A\n",
+ " 29%|██▊ | 10/35 [00:02<00:04, 6.04it/s]\u001b[A\n",
+ " 31%|███▏ | 11/35 [00:03<00:03, 6.71it/s]\u001b[A\n",
+ " 34%|███▍ | 12/35 [00:03<00:03, 5.95it/s]\u001b[A\n",
+ " 37%|███▋ | 13/35 [00:03<00:04, 5.24it/s]\u001b[A\n",
+ " 40%|████ | 14/35 [00:03<00:03, 5.69it/s]\u001b[A\n",
+ " 43%|████▎ | 15/35 [00:03<00:03, 5.57it/s]\u001b[A\n",
+ " 46%|████▌ | 16/35 [00:03<00:03, 6.28it/s]\u001b[A\n",
+ " 49%|████▊ | 17/35 [00:04<00:02, 6.64it/s]\u001b[A\n",
+ " 51%|█████▏ | 18/35 [00:04<00:03, 5.18it/s]\u001b[A\n",
+ " 54%|█████▍ | 19/35 [00:04<00:02, 5.79it/s]\u001b[A\n",
+ " 57%|█████▋ | 20/35 [00:04<00:02, 6.46it/s]\u001b[A\n",
+ " 63%|██████▎ | 22/35 [00:04<00:01, 8.11it/s]\u001b[A\n",
+ " 66%|██████▌ | 23/35 [00:04<00:01, 8.08it/s]\u001b[A\n",
+ " 71%|███████▏ | 25/35 [00:05<00:01, 9.59it/s]\u001b[A\n",
+ " 74%|███████▍ | 26/35 [00:05<00:00, 9.09it/s]\u001b[A\n",
+ " 77%|███████▋ | 27/35 [00:05<00:00, 8.95it/s]\u001b[A\n",
+ " 80%|████████ | 28/35 [00:05<00:00, 8.38it/s]\u001b[A\n",
+ " 83%|████████▎ | 29/35 [00:05<00:00, 8.28it/s]\u001b[A\n",
+ " 89%|████████▊ | 31/35 [00:05<00:00, 9.04it/s]\u001b[A\n",
+ " 91%|█████████▏| 32/35 [00:05<00:00, 8.79it/s]\u001b[A\n",
+ " 94%|█████████▍| 33/35 [00:06<00:00, 8.45it/s]\u001b[A\n",
+ " 97%|█████████▋| 34/35 [00:06<00:00, 8.61it/s]\u001b[A\n",
+ "100%|██████████| 5/5 [00:37<00:00, 7.41s/it]\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Saving predictions to /dev/null\n",
+ "Elapsed time = 0:00:38\n"
+ ]
+ },
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/html": [
+ "\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " smiles | \n",
+ " solvent | \n",
+ " peakwavs_max | \n",
+ " peakwavs_max_pred | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " CCN(CC)c1ccc2c(C(F)(F)F)cc(=O)oc2c1 | \n",
+ " C1CCCCC1 | \n",
+ " 376.0 | \n",
+ " 378.089791 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " CCN(CC)c1ccc2c(C(F)(F)F)cc(=O)oc2c1 | \n",
+ " CCOC(C)=O | \n",
+ " 392.0 | \n",
+ " 388.387075 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " CCN(CC)c1ccc2c(C(F)(F)F)cc(=O)oc2c1 | \n",
+ " CC#N | \n",
+ " 396.0 | \n",
+ " 394.557472 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " CCN(CC)c1ccc2c(C(F)(F)F)cc(=O)oc2c1 | \n",
+ " CCO | \n",
+ " 400.0 | \n",
+ " 400.817724 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " CCN(CC)c1ccc2c(C(F)(F)F)cc(=O)oc2c1 | \n",
+ " OCC(O)CO | \n",
+ " 413.0 | \n",
+ " 410.902339 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 1705 | \n",
+ " c1cc2c3ccc[n+]4cccc(c5ccc[n+](c1)c25)c34 | \n",
+ " C[N+](=O)[O-] | \n",
+ " 424.0 | \n",
+ " 425.084342 | \n",
+ "
\n",
+ " \n",
+ " 1706 | \n",
+ " c1cc2c3ccc[n+]4cccc(c5ccc[n+](c1)c25)c34 | \n",
+ " CS(C)=O | \n",
+ " 432.0 | \n",
+ " 428.209718 | \n",
+ "
\n",
+ " \n",
+ " 1707 | \n",
+ " COc1cc(C)c(-c2cc(-c3c(C)cc(OC)cc3C)c3ccc4c(-c5... | \n",
+ " ClCCl | \n",
+ " 367.0 | \n",
+ " 370.200150 | \n",
+ "
\n",
+ " \n",
+ " 1708 | \n",
+ " N#Cc1c(N2CCCCC2)cc(-c2cccc3ccccc23)c2c1-c1cccc... | \n",
+ " C1CCOC1 | \n",
+ " 358.0 | \n",
+ " 355.987562 | \n",
+ "
\n",
+ " \n",
+ " 1709 | \n",
+ " N#Cc1c(N2CCCC2)cc(-c2ccccc2)c2c1Cc1ccccc1-2 | \n",
+ " C1CCOC1 | \n",
+ " 382.0 | \n",
+ " 368.792145 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
1710 rows × 4 columns
\n",
+ "
\n",
+ "
\n",
+ " \n",
+ " \n",
+ "\n",
+ " \n",
+ "
\n",
+ "
\n",
+ " "
+ ],
+ "text/plain": [
+ " smiles ... peakwavs_max_pred\n",
+ "0 CCN(CC)c1ccc2c(C(F)(F)F)cc(=O)oc2c1 ... 378.089791\n",
+ "1 CCN(CC)c1ccc2c(C(F)(F)F)cc(=O)oc2c1 ... 388.387075\n",
+ "2 CCN(CC)c1ccc2c(C(F)(F)F)cc(=O)oc2c1 ... 394.557472\n",
+ "3 CCN(CC)c1ccc2c(C(F)(F)F)cc(=O)oc2c1 ... 400.817724\n",
+ "4 CCN(CC)c1ccc2c(C(F)(F)F)cc(=O)oc2c1 ... 410.902339\n",
+ "... ... ... ...\n",
+ "1705 c1cc2c3ccc[n+]4cccc(c5ccc[n+](c1)c25)c34 ... 425.084342\n",
+ "1706 c1cc2c3ccc[n+]4cccc(c5ccc[n+](c1)c25)c34 ... 428.209718\n",
+ "1707 COc1cc(C)c(-c2cc(-c3c(C)cc(OC)cc3C)c3ccc4c(-c5... ... 370.200150\n",
+ "1708 N#Cc1c(N2CCCCC2)cc(-c2cccc3ccccc23)c2c1-c1cccc... ... 355.987562\n",
+ "1709 N#Cc1c(N2CCCC2)cc(-c2ccccc2)c2c1Cc1ccccc1-2 ... 368.792145\n",
+ "\n",
+ "[1710 rows x 4 columns]"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 4
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "## Predict TDDFT peak in vacuum"
+ ],
+ "metadata": {
+ "id": "GE9Tof7UK8cI"
+ }
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "**Equivalent to command line:**\n",
+ "\n",
+ "python uvvisml/predict.py --test_file uvvisml/data/splits/lambda_max_abs/deep4chem/group_by_smiles/smiles_target_test.csv --property vertical_excitation_eV_tddft --method chemprop --preds_file test_preds.csv"
+ ],
+ "metadata": {
+ "id": "zypfNmVzJKEc"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "arguments = [\n",
+ " '--test_path', test_file,\n",
+ " '--preds_path', '/dev/null',\n",
+ " '--checkpoint_dir', 'models/lambda_max_abs_wb97xd3/chemprop/all_wb97xd3/production/fold_0',\n",
+ " '--number_of_molecules', '1',\n",
+ " #'--gpu', '0'\n",
+ "]\n",
+ "\n",
+ "args = chemprop.args.PredictArgs().parse_args(arguments)\n",
+ "preds = chemprop.train.make_predictions(args=args)\n",
+ "\n",
+ "preds = [x[0] for x in preds] # predictions are in eV\n",
+ "df['peakwavs_max_pred'] = preds\n",
+ "df['peakwavs_max_pred'] = 1240/df['peakwavs_max_pred'] # convert from eV to nm\n",
+ "df"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 1000
+ },
+ "id": "kswx6y_uJHqW",
+ "outputId": "bd9ea495-c3ef-47bb-a138-508aeb281a5e"
+ },
+ "execution_count": 5,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Loading training args\n",
+ "Setting molecule featurization parameters to default.\n",
+ "Loading data\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "1710it [00:00, 80922.70it/s]\n",
+ "100%|██████████| 1710/1710 [00:00<00:00, 161116.45it/s]\n",
+ "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py:481: UserWarning: This DataLoader will create 8 worker processes in total. Our suggested max number of worker in current system is 2, which is smaller than what this DataLoader is going to create. Please be aware that excessive worker creation might get DataLoader running slow or even freeze, lower the worker number to avoid potential slowness/freeze if necessary.\n",
+ " cpuset_checked))\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Validating SMILES\n",
+ "Test size = 1,710\n",
+ "Predicting with an ensemble of 5 models\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\r 0%| | 0/5 [00:00, ?it/s]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Loading pretrained parameter \"encoder.encoder.0.cached_zero_vector\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_i.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_h.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_o.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_o.bias\".\n",
+ "Loading pretrained parameter \"ffn.1.weight\".\n",
+ "Loading pretrained parameter \"ffn.1.bias\".\n",
+ "Loading pretrained parameter \"ffn.4.weight\".\n",
+ "Loading pretrained parameter \"ffn.4.bias\".\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\n",
+ " 0%| | 0/35 [00:00, ?it/s]\u001b[A\n",
+ " 3%|▎ | 1/35 [00:01<00:48, 1.44s/it]\u001b[A\n",
+ " 6%|▌ | 2/35 [00:01<00:24, 1.34it/s]\u001b[A\n",
+ " 9%|▊ | 3/35 [00:01<00:15, 2.07it/s]\u001b[A\n",
+ " 11%|█▏ | 4/35 [00:02<00:11, 2.62it/s]\u001b[A\n",
+ " 14%|█▍ | 5/35 [00:02<00:09, 3.21it/s]\u001b[A\n",
+ " 17%|█▋ | 6/35 [00:02<00:08, 3.56it/s]\u001b[A\n",
+ " 20%|██ | 7/35 [00:02<00:08, 3.49it/s]\u001b[A\n",
+ " 23%|██▎ | 8/35 [00:02<00:06, 3.94it/s]\u001b[A\n",
+ " 26%|██▌ | 9/35 [00:03<00:05, 4.39it/s]\u001b[A\n",
+ " 29%|██▊ | 10/35 [00:03<00:05, 4.76it/s]\u001b[A\n",
+ " 31%|███▏ | 11/35 [00:03<00:04, 4.92it/s]\u001b[A\n",
+ " 34%|███▍ | 12/35 [00:03<00:04, 4.83it/s]\u001b[A\n",
+ " 37%|███▋ | 13/35 [00:04<00:05, 4.19it/s]\u001b[A\n",
+ " 40%|████ | 14/35 [00:04<00:04, 4.48it/s]\u001b[A\n",
+ " 43%|████▎ | 15/35 [00:04<00:04, 4.28it/s]\u001b[A\n",
+ " 46%|████▌ | 16/35 [00:04<00:04, 4.68it/s]\u001b[A\n",
+ " 49%|████▊ | 17/35 [00:04<00:03, 4.69it/s]\u001b[A\n",
+ " 51%|█████▏ | 18/35 [00:05<00:03, 4.74it/s]\u001b[A\n",
+ " 54%|█████▍ | 19/35 [00:05<00:03, 4.88it/s]\u001b[A\n",
+ " 57%|█████▋ | 20/35 [00:05<00:02, 5.15it/s]\u001b[A\n",
+ " 60%|██████ | 21/35 [00:05<00:02, 5.58it/s]\u001b[A\n",
+ " 63%|██████▎ | 22/35 [00:05<00:02, 5.98it/s]\u001b[A\n",
+ " 66%|██████▌ | 23/35 [00:05<00:02, 5.14it/s]\u001b[A\n",
+ " 69%|██████▊ | 24/35 [00:06<00:01, 5.62it/s]\u001b[A\n",
+ " 71%|███████▏ | 25/35 [00:06<00:01, 6.21it/s]\u001b[A\n",
+ " 74%|███████▍ | 26/35 [00:06<00:01, 6.26it/s]\u001b[A\n",
+ " 77%|███████▋ | 27/35 [00:06<00:01, 6.22it/s]\u001b[A\n",
+ " 80%|████████ | 28/35 [00:06<00:01, 5.57it/s]\u001b[A\n",
+ " 83%|████████▎ | 29/35 [00:07<00:01, 5.17it/s]\u001b[A\n",
+ " 86%|████████▌ | 30/35 [00:07<00:00, 5.41it/s]\u001b[A\n",
+ " 89%|████████▊ | 31/35 [00:07<00:00, 5.42it/s]\u001b[A\n",
+ " 91%|█████████▏| 32/35 [00:07<00:00, 5.16it/s]\u001b[A\n",
+ " 94%|█████████▍| 33/35 [00:07<00:00, 5.22it/s]\u001b[A\n",
+ " 97%|█████████▋| 34/35 [00:07<00:00, 5.35it/s]\u001b[A\n",
+ " 20%|██ | 1/5 [00:08<00:33, 8.27s/it]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Loading pretrained parameter \"encoder.encoder.0.cached_zero_vector\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_i.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_h.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_o.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_o.bias\".\n",
+ "Loading pretrained parameter \"ffn.1.weight\".\n",
+ "Loading pretrained parameter \"ffn.1.bias\".\n",
+ "Loading pretrained parameter \"ffn.4.weight\".\n",
+ "Loading pretrained parameter \"ffn.4.bias\".\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\n",
+ " 0%| | 0/35 [00:00, ?it/s]\u001b[A\n",
+ " 3%|▎ | 1/35 [00:01<00:48, 1.42s/it]\u001b[A\n",
+ " 6%|▌ | 2/35 [00:01<00:23, 1.42it/s]\u001b[A\n",
+ " 9%|▊ | 3/35 [00:01<00:14, 2.17it/s]\u001b[A\n",
+ " 11%|█▏ | 4/35 [00:02<00:11, 2.64it/s]\u001b[A\n",
+ " 14%|█▍ | 5/35 [00:02<00:09, 3.24it/s]\u001b[A\n",
+ " 17%|█▋ | 6/35 [00:02<00:08, 3.59it/s]\u001b[A\n",
+ " 20%|██ | 7/35 [00:02<00:07, 3.52it/s]\u001b[A\n",
+ " 23%|██▎ | 8/35 [00:02<00:06, 4.02it/s]\u001b[A\n",
+ " 26%|██▌ | 9/35 [00:03<00:05, 4.43it/s]\u001b[A\n",
+ " 29%|██▊ | 10/35 [00:03<00:05, 4.71it/s]\u001b[A\n",
+ " 31%|███▏ | 11/35 [00:03<00:04, 4.92it/s]\u001b[A\n",
+ " 34%|███▍ | 12/35 [00:03<00:04, 4.77it/s]\u001b[A\n",
+ " 37%|███▋ | 13/35 [00:03<00:05, 4.24it/s]\u001b[A\n",
+ " 40%|████ | 14/35 [00:04<00:04, 4.51it/s]\u001b[A\n",
+ " 43%|████▎ | 15/35 [00:04<00:04, 4.31it/s]\u001b[A\n",
+ " 46%|████▌ | 16/35 [00:04<00:04, 4.65it/s]\u001b[A\n",
+ " 49%|████▊ | 17/35 [00:04<00:03, 4.85it/s]\u001b[A\n",
+ " 51%|█████▏ | 18/35 [00:05<00:03, 4.42it/s]\u001b[A\n",
+ " 54%|█████▍ | 19/35 [00:05<00:03, 4.89it/s]\u001b[A\n",
+ " 57%|█████▋ | 20/35 [00:05<00:02, 5.22it/s]\u001b[A\n",
+ " 60%|██████ | 21/35 [00:05<00:02, 5.64it/s]\u001b[A\n",
+ " 63%|██████▎ | 22/35 [00:05<00:02, 6.07it/s]\u001b[A\n",
+ " 66%|██████▌ | 23/35 [00:05<00:02, 5.37it/s]\u001b[A\n",
+ " 69%|██████▊ | 24/35 [00:06<00:01, 5.80it/s]\u001b[A\n",
+ " 71%|███████▏ | 25/35 [00:06<00:01, 6.41it/s]\u001b[A\n",
+ " 74%|███████▍ | 26/35 [00:06<00:01, 6.38it/s]\u001b[A\n",
+ " 77%|███████▋ | 27/35 [00:06<00:01, 6.32it/s]\u001b[A\n",
+ " 80%|████████ | 28/35 [00:06<00:01, 5.53it/s]\u001b[A\n",
+ " 83%|████████▎ | 29/35 [00:06<00:01, 5.19it/s]\u001b[A\n",
+ " 86%|████████▌ | 30/35 [00:07<00:00, 5.41it/s]\u001b[A\n",
+ " 89%|████████▊ | 31/35 [00:07<00:00, 5.41it/s]\u001b[A\n",
+ " 91%|█████████▏| 32/35 [00:07<00:00, 5.18it/s]\u001b[A\n",
+ " 94%|█████████▍| 33/35 [00:07<00:00, 5.29it/s]\u001b[A\n",
+ " 97%|█████████▋| 34/35 [00:07<00:00, 5.35it/s]\u001b[A\n",
+ " 40%|████ | 2/5 [00:16<00:24, 8.21s/it]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Loading pretrained parameter \"encoder.encoder.0.cached_zero_vector\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_i.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_h.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_o.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_o.bias\".\n",
+ "Loading pretrained parameter \"ffn.1.weight\".\n",
+ "Loading pretrained parameter \"ffn.1.bias\".\n",
+ "Loading pretrained parameter \"ffn.4.weight\".\n",
+ "Loading pretrained parameter \"ffn.4.bias\".\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\n",
+ " 0%| | 0/35 [00:00, ?it/s]\u001b[A\n",
+ " 3%|▎ | 1/35 [00:01<00:48, 1.42s/it]\u001b[A\n",
+ " 6%|▌ | 2/35 [00:01<00:23, 1.41it/s]\u001b[A\n",
+ " 9%|▊ | 3/35 [00:01<00:14, 2.16it/s]\u001b[A\n",
+ " 11%|█▏ | 4/35 [00:02<00:11, 2.66it/s]\u001b[A\n",
+ " 14%|█▍ | 5/35 [00:02<00:09, 3.27it/s]\u001b[A\n",
+ " 17%|█▋ | 6/35 [00:02<00:07, 3.66it/s]\u001b[A\n",
+ " 20%|██ | 7/35 [00:02<00:07, 3.62it/s]\u001b[A\n",
+ " 23%|██▎ | 8/35 [00:02<00:06, 4.12it/s]\u001b[A\n",
+ " 26%|██▌ | 9/35 [00:03<00:05, 4.57it/s]\u001b[A\n",
+ " 29%|██▊ | 10/35 [00:03<00:05, 4.94it/s]\u001b[A\n",
+ " 31%|███▏ | 11/35 [00:03<00:04, 5.08it/s]\u001b[A\n",
+ " 34%|███▍ | 12/35 [00:03<00:04, 4.91it/s]\u001b[A\n",
+ " 37%|███▋ | 13/35 [00:03<00:05, 4.35it/s]\u001b[A\n",
+ " 40%|████ | 14/35 [00:04<00:04, 4.55it/s]\u001b[A\n",
+ " 43%|████▎ | 15/35 [00:04<00:04, 4.27it/s]\u001b[A\n",
+ " 46%|████▌ | 16/35 [00:04<00:04, 4.60it/s]\u001b[A\n",
+ " 49%|████▊ | 17/35 [00:04<00:03, 4.89it/s]\u001b[A\n",
+ " 51%|█████▏ | 18/35 [00:04<00:03, 4.64it/s]\u001b[A\n",
+ " 54%|█████▍ | 19/35 [00:05<00:03, 4.82it/s]\u001b[A\n",
+ " 57%|█████▋ | 20/35 [00:05<00:02, 5.17it/s]\u001b[A\n",
+ " 60%|██████ | 21/35 [00:05<00:02, 5.52it/s]\u001b[A\n",
+ " 63%|██████▎ | 22/35 [00:05<00:02, 5.90it/s]\u001b[A\n",
+ " 66%|██████▌ | 23/35 [00:05<00:02, 5.32it/s]\u001b[A\n",
+ " 69%|██████▊ | 24/35 [00:05<00:01, 5.80it/s]\u001b[A\n",
+ " 71%|███████▏ | 25/35 [00:06<00:01, 6.31it/s]\u001b[A\n",
+ " 74%|███████▍ | 26/35 [00:06<00:01, 6.36it/s]\u001b[A\n",
+ " 77%|███████▋ | 27/35 [00:06<00:01, 6.23it/s]\u001b[A\n",
+ " 80%|████████ | 28/35 [00:06<00:01, 5.52it/s]\u001b[A\n",
+ " 83%|████████▎ | 29/35 [00:06<00:01, 5.18it/s]\u001b[A\n",
+ " 86%|████████▌ | 30/35 [00:07<00:00, 5.42it/s]\u001b[A\n",
+ " 89%|████████▊ | 31/35 [00:07<00:00, 5.31it/s]\u001b[A\n",
+ " 91%|█████████▏| 32/35 [00:07<00:00, 5.11it/s]\u001b[A\n",
+ " 94%|█████████▍| 33/35 [00:07<00:00, 5.23it/s]\u001b[A\n",
+ " 97%|█████████▋| 34/35 [00:07<00:00, 5.36it/s]\u001b[A\n",
+ " 60%|██████ | 3/5 [00:24<00:16, 8.18s/it]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Loading pretrained parameter \"encoder.encoder.0.cached_zero_vector\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_i.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_h.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_o.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_o.bias\".\n",
+ "Loading pretrained parameter \"ffn.1.weight\".\n",
+ "Loading pretrained parameter \"ffn.1.bias\".\n",
+ "Loading pretrained parameter \"ffn.4.weight\".\n",
+ "Loading pretrained parameter \"ffn.4.bias\".\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\n",
+ " 0%| | 0/35 [00:00, ?it/s]\u001b[A\n",
+ " 3%|▎ | 1/35 [00:01<00:48, 1.42s/it]\u001b[A\n",
+ " 6%|▌ | 2/35 [00:01<00:23, 1.40it/s]\u001b[A\n",
+ " 9%|▊ | 3/35 [00:01<00:14, 2.15it/s]\u001b[A\n",
+ " 11%|█▏ | 4/35 [00:02<00:11, 2.67it/s]\u001b[A\n",
+ " 14%|█▍ | 5/35 [00:02<00:09, 3.27it/s]\u001b[A\n",
+ " 17%|█▋ | 6/35 [00:02<00:07, 3.64it/s]\u001b[A\n",
+ " 20%|██ | 7/35 [00:02<00:07, 3.66it/s]\u001b[A\n",
+ " 23%|██▎ | 8/35 [00:02<00:06, 4.19it/s]\u001b[A\n",
+ " 26%|██▌ | 9/35 [00:03<00:05, 4.62it/s]\u001b[A\n",
+ " 29%|██▊ | 10/35 [00:03<00:05, 4.80it/s]\u001b[A\n",
+ " 31%|███▏ | 11/35 [00:03<00:04, 4.98it/s]\u001b[A\n",
+ " 34%|███▍ | 12/35 [00:03<00:04, 4.69it/s]\u001b[A\n",
+ " 37%|███▋ | 13/35 [00:03<00:05, 4.20it/s]\u001b[A\n",
+ " 40%|████ | 14/35 [00:04<00:04, 4.43it/s]\u001b[A\n",
+ " 43%|████▎ | 15/35 [00:04<00:04, 4.42it/s]\u001b[A\n",
+ " 46%|████▌ | 16/35 [00:04<00:03, 4.79it/s]\u001b[A\n",
+ " 49%|████▊ | 17/35 [00:04<00:03, 5.04it/s]\u001b[A\n",
+ " 51%|█████▏ | 18/35 [00:05<00:03, 4.51it/s]\u001b[A\n",
+ " 54%|█████▍ | 19/35 [00:05<00:03, 4.45it/s]\u001b[A\n",
+ " 57%|█████▋ | 20/35 [00:05<00:03, 4.62it/s]\u001b[A\n",
+ " 60%|██████ | 21/35 [00:05<00:02, 5.12it/s]\u001b[A\n",
+ " 63%|██████▎ | 22/35 [00:05<00:02, 5.58it/s]\u001b[A\n",
+ " 66%|██████▌ | 23/35 [00:05<00:02, 5.26it/s]\u001b[A\n",
+ " 69%|██████▊ | 24/35 [00:06<00:01, 5.77it/s]\u001b[A\n",
+ " 71%|███████▏ | 25/35 [00:06<00:01, 6.22it/s]\u001b[A\n",
+ " 74%|███████▍ | 26/35 [00:06<00:01, 6.25it/s]\u001b[A\n",
+ " 77%|███████▋ | 27/35 [00:06<00:01, 5.98it/s]\u001b[A\n",
+ " 80%|████████ | 28/35 [00:06<00:01, 5.36it/s]\u001b[A\n",
+ " 83%|████████▎ | 29/35 [00:06<00:01, 5.09it/s]\u001b[A\n",
+ " 86%|████████▌ | 30/35 [00:07<00:00, 5.25it/s]\u001b[A\n",
+ " 89%|████████▊ | 31/35 [00:07<00:00, 5.48it/s]\u001b[A\n",
+ " 91%|█████████▏| 32/35 [00:07<00:00, 5.16it/s]\u001b[A\n",
+ " 94%|█████████▍| 33/35 [00:07<00:00, 5.24it/s]\u001b[A\n",
+ " 97%|█████████▋| 34/35 [00:07<00:00, 5.35it/s]\u001b[A\n",
+ " 80%|████████ | 4/5 [00:32<00:08, 8.21s/it]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Loading pretrained parameter \"encoder.encoder.0.cached_zero_vector\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_i.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_h.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_o.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_o.bias\".\n",
+ "Loading pretrained parameter \"ffn.1.weight\".\n",
+ "Loading pretrained parameter \"ffn.1.bias\".\n",
+ "Loading pretrained parameter \"ffn.4.weight\".\n",
+ "Loading pretrained parameter \"ffn.4.bias\".\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\n",
+ " 0%| | 0/35 [00:00, ?it/s]\u001b[A\n",
+ " 3%|▎ | 1/35 [00:01<00:48, 1.44s/it]\u001b[A\n",
+ " 6%|▌ | 2/35 [00:01<00:23, 1.39it/s]\u001b[A\n",
+ " 9%|▊ | 3/35 [00:01<00:14, 2.15it/s]\u001b[A\n",
+ " 11%|█▏ | 4/35 [00:02<00:11, 2.68it/s]\u001b[A\n",
+ " 14%|█▍ | 5/35 [00:02<00:09, 3.29it/s]\u001b[A\n",
+ " 17%|█▋ | 6/35 [00:02<00:08, 3.50it/s]\u001b[A\n",
+ " 20%|██ | 7/35 [00:02<00:08, 3.47it/s]\u001b[A\n",
+ " 23%|██▎ | 8/35 [00:02<00:06, 4.00it/s]\u001b[A\n",
+ " 26%|██▌ | 9/35 [00:03<00:05, 4.46it/s]\u001b[A\n",
+ " 29%|██▊ | 10/35 [00:03<00:05, 4.83it/s]\u001b[A\n",
+ " 31%|███▏ | 11/35 [00:03<00:04, 4.94it/s]\u001b[A\n",
+ " 34%|███▍ | 12/35 [00:03<00:04, 4.79it/s]\u001b[A\n",
+ " 37%|███▋ | 13/35 [00:04<00:05, 3.69it/s]\u001b[A\n",
+ " 40%|████ | 14/35 [00:04<00:05, 3.76it/s]\u001b[A\n",
+ " 43%|████▎ | 15/35 [00:04<00:05, 3.65it/s]\u001b[A\n",
+ " 46%|████▌ | 16/35 [00:04<00:04, 3.89it/s]\u001b[A\n",
+ " 49%|████▊ | 17/35 [00:05<00:04, 3.91it/s]\u001b[A\n",
+ " 51%|█████▏ | 18/35 [00:05<00:04, 4.06it/s]\u001b[A\n",
+ " 54%|█████▍ | 19/35 [00:05<00:03, 4.07it/s]\u001b[A\n",
+ " 57%|█████▋ | 20/35 [00:05<00:03, 4.45it/s]\u001b[A\n",
+ " 60%|██████ | 21/35 [00:05<00:02, 5.06it/s]\u001b[A\n",
+ " 63%|██████▎ | 22/35 [00:06<00:02, 5.56it/s]\u001b[A\n",
+ " 66%|██████▌ | 23/35 [00:06<00:02, 5.07it/s]\u001b[A\n",
+ " 69%|██████▊ | 24/35 [00:06<00:01, 5.59it/s]\u001b[A\n",
+ " 71%|███████▏ | 25/35 [00:06<00:01, 6.26it/s]\u001b[A\n",
+ " 74%|███████▍ | 26/35 [00:06<00:01, 6.28it/s]\u001b[A\n",
+ " 77%|███████▋ | 27/35 [00:06<00:01, 6.22it/s]\u001b[A\n",
+ " 80%|████████ | 28/35 [00:07<00:01, 5.45it/s]\u001b[A\n",
+ " 83%|████████▎ | 29/35 [00:07<00:01, 5.12it/s]\u001b[A\n",
+ " 86%|████████▌ | 30/35 [00:07<00:00, 5.37it/s]\u001b[A\n",
+ " 89%|████████▊ | 31/35 [00:07<00:00, 5.29it/s]\u001b[A\n",
+ " 91%|█████████▏| 32/35 [00:07<00:00, 5.11it/s]\u001b[A\n",
+ " 94%|█████████▍| 33/35 [00:08<00:00, 5.23it/s]\u001b[A\n",
+ " 97%|█████████▋| 34/35 [00:08<00:00, 5.29it/s]\u001b[A\n",
+ "100%|██████████| 5/5 [00:41<00:00, 8.28s/it]\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Saving predictions to /dev/null\n",
+ "Elapsed time = 0:00:42\n"
+ ]
+ },
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/html": [
+ "\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " smiles | \n",
+ " solvent | \n",
+ " peakwavs_max | \n",
+ " peakwavs_max_pred | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " CCN(CC)c1ccc2c(C(F)(F)F)cc(=O)oc2c1 | \n",
+ " C1CCCCC1 | \n",
+ " 376.0 | \n",
+ " 309.772465 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " CCN(CC)c1ccc2c(C(F)(F)F)cc(=O)oc2c1 | \n",
+ " CCOC(C)=O | \n",
+ " 392.0 | \n",
+ " 309.772465 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " CCN(CC)c1ccc2c(C(F)(F)F)cc(=O)oc2c1 | \n",
+ " CC#N | \n",
+ " 396.0 | \n",
+ " 309.772465 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " CCN(CC)c1ccc2c(C(F)(F)F)cc(=O)oc2c1 | \n",
+ " CCO | \n",
+ " 400.0 | \n",
+ " 309.772465 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " CCN(CC)c1ccc2c(C(F)(F)F)cc(=O)oc2c1 | \n",
+ " OCC(O)CO | \n",
+ " 413.0 | \n",
+ " 309.772465 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 1705 | \n",
+ " c1cc2c3ccc[n+]4cccc(c5ccc[n+](c1)c25)c34 | \n",
+ " C[N+](=O)[O-] | \n",
+ " 424.0 | \n",
+ " 346.755560 | \n",
+ "
\n",
+ " \n",
+ " 1706 | \n",
+ " c1cc2c3ccc[n+]4cccc(c5ccc[n+](c1)c25)c34 | \n",
+ " CS(C)=O | \n",
+ " 432.0 | \n",
+ " 346.755560 | \n",
+ "
\n",
+ " \n",
+ " 1707 | \n",
+ " COc1cc(C)c(-c2cc(-c3c(C)cc(OC)cc3C)c3ccc4c(-c5... | \n",
+ " ClCCl | \n",
+ " 367.0 | \n",
+ " 318.639569 | \n",
+ "
\n",
+ " \n",
+ " 1708 | \n",
+ " N#Cc1c(N2CCCCC2)cc(-c2cccc3ccccc23)c2c1-c1cccc... | \n",
+ " C1CCOC1 | \n",
+ " 358.0 | \n",
+ " 310.870571 | \n",
+ "
\n",
+ " \n",
+ " 1709 | \n",
+ " N#Cc1c(N2CCCC2)cc(-c2ccccc2)c2c1Cc1ccccc1-2 | \n",
+ " C1CCOC1 | \n",
+ " 382.0 | \n",
+ " 317.668449 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
1710 rows × 4 columns
\n",
+ "
\n",
+ "
\n",
+ " \n",
+ " \n",
+ "\n",
+ " \n",
+ "
\n",
+ "
\n",
+ " "
+ ],
+ "text/plain": [
+ " smiles ... peakwavs_max_pred\n",
+ "0 CCN(CC)c1ccc2c(C(F)(F)F)cc(=O)oc2c1 ... 309.772465\n",
+ "1 CCN(CC)c1ccc2c(C(F)(F)F)cc(=O)oc2c1 ... 309.772465\n",
+ "2 CCN(CC)c1ccc2c(C(F)(F)F)cc(=O)oc2c1 ... 309.772465\n",
+ "3 CCN(CC)c1ccc2c(C(F)(F)F)cc(=O)oc2c1 ... 309.772465\n",
+ "4 CCN(CC)c1ccc2c(C(F)(F)F)cc(=O)oc2c1 ... 309.772465\n",
+ "... ... ... ...\n",
+ "1705 c1cc2c3ccc[n+]4cccc(c5ccc[n+](c1)c25)c34 ... 346.755560\n",
+ "1706 c1cc2c3ccc[n+]4cccc(c5ccc[n+](c1)c25)c34 ... 346.755560\n",
+ "1707 COc1cc(C)c(-c2cc(-c3c(C)cc(OC)cc3C)c3ccc4c(-c5... ... 318.639569\n",
+ "1708 N#Cc1c(N2CCCCC2)cc(-c2cccc3ccccc23)c2c1-c1cccc... ... 310.870571\n",
+ "1709 N#Cc1c(N2CCCC2)cc(-c2ccccc2)c2c1Cc1ccccc1-2 ... 317.668449\n",
+ "\n",
+ "[1710 rows x 4 columns]"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 5
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "## Predict experimental peak with model trained on Deep4Chem training set"
+ ],
+ "metadata": {
+ "id": "54x-eGwxLEZ1"
+ }
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "**Equivalent to command line:**\n",
+ "\n",
+ "python uvvisml/predict.py --test_file uvvisml/data/splits/lambda_max_abs/deep4chem/group_by_smiles/smiles_target_test.csv --property absorption_peak_nm_expt --method chemprop --preds_file test_preds.csv --train_dataset deep4chem"
+ ],
+ "metadata": {
+ "id": "_68WpdSDI110"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "arguments = [\n",
+ " '--test_path', test_file,\n",
+ " '--preds_path', '/dev/null',\n",
+ " '--checkpoint_dir', 'models/lambda_max_abs/chemprop/deep4chem/production/fold_0',\n",
+ " '--number_of_molecules', '2',\n",
+ " #'--gpu', '0'\n",
+ "]\n",
+ "\n",
+ "args = chemprop.args.PredictArgs().parse_args(arguments)\n",
+ "preds = chemprop.train.make_predictions(args=args)\n",
+ "\n",
+ "preds = [x[0] for x in preds]\n",
+ "df['peakwavs_max_pred'] = preds\n",
+ "df"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 1000
+ },
+ "id": "ICmfelGD7rcf",
+ "outputId": "ebc7e05e-b415-40bb-b4d4-5f5fc4284a86"
+ },
+ "execution_count": 6,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Loading training args\n",
+ "Setting molecule featurization parameters to default.\n",
+ "Loading data\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "1710it [00:00, 141559.62it/s]\n",
+ "100%|██████████| 1710/1710 [00:00<00:00, 153911.16it/s]\n",
+ "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py:481: UserWarning: This DataLoader will create 8 worker processes in total. Our suggested max number of worker in current system is 2, which is smaller than what this DataLoader is going to create. Please be aware that excessive worker creation might get DataLoader running slow or even freeze, lower the worker number to avoid potential slowness/freeze if necessary.\n",
+ " cpuset_checked))\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Validating SMILES\n",
+ "Test size = 1,710\n",
+ "Predicting with an ensemble of 5 models\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\r 0%| | 0/5 [00:00, ?it/s]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Loading pretrained parameter \"encoder.encoder.0.cached_zero_vector\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_i.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_h.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_o.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_o.bias\".\n",
+ "Loading pretrained parameter \"encoder.encoder.1.cached_zero_vector\".\n",
+ "Loading pretrained parameter \"encoder.encoder.1.W_i.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.1.W_h.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.1.W_o.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.1.W_o.bias\".\n",
+ "Loading pretrained parameter \"ffn.1.weight\".\n",
+ "Loading pretrained parameter \"ffn.1.bias\".\n",
+ "Loading pretrained parameter \"ffn.4.weight\".\n",
+ "Loading pretrained parameter \"ffn.4.bias\".\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\n",
+ " 0%| | 0/35 [00:00, ?it/s]\u001b[A\n",
+ " 3%|▎ | 1/35 [00:02<01:34, 2.79s/it]\u001b[A\n",
+ " 6%|▌ | 2/35 [00:03<00:44, 1.35s/it]\u001b[A\n",
+ " 9%|▊ | 3/35 [00:03<00:27, 1.16it/s]\u001b[A\n",
+ " 11%|█▏ | 4/35 [00:03<00:20, 1.49it/s]\u001b[A\n",
+ " 14%|█▍ | 5/35 [00:04<00:16, 1.85it/s]\u001b[A\n",
+ " 17%|█▋ | 6/35 [00:04<00:14, 2.06it/s]\u001b[A\n",
+ " 20%|██ | 7/35 [00:04<00:13, 2.08it/s]\u001b[A\n",
+ " 23%|██▎ | 8/35 [00:05<00:11, 2.39it/s]\u001b[A\n",
+ " 26%|██▌ | 9/35 [00:05<00:09, 2.64it/s]\u001b[A\n",
+ " 29%|██▊ | 10/35 [00:05<00:08, 2.86it/s]\u001b[A\n",
+ " 31%|███▏ | 11/35 [00:06<00:07, 3.03it/s]\u001b[A\n",
+ " 34%|███▍ | 12/35 [00:06<00:07, 2.89it/s]\u001b[A\n",
+ " 37%|███▋ | 13/35 [00:06<00:08, 2.64it/s]\u001b[A\n",
+ " 40%|████ | 14/35 [00:07<00:07, 2.82it/s]\u001b[A\n",
+ " 43%|████▎ | 15/35 [00:07<00:07, 2.67it/s]\u001b[A\n",
+ " 46%|████▌ | 16/35 [00:07<00:06, 2.86it/s]\u001b[A\n",
+ " 49%|████▊ | 17/35 [00:08<00:06, 2.90it/s]\u001b[A\n",
+ " 51%|█████▏ | 18/35 [00:08<00:05, 2.92it/s]\u001b[A\n",
+ " 54%|█████▍ | 19/35 [00:08<00:04, 3.25it/s]\u001b[A\n",
+ " 57%|█████▋ | 20/35 [00:09<00:04, 3.35it/s]\u001b[A\n",
+ " 60%|██████ | 21/35 [00:09<00:03, 3.51it/s]\u001b[A\n",
+ " 63%|██████▎ | 22/35 [00:09<00:03, 3.61it/s]\u001b[A\n",
+ " 66%|██████▌ | 23/35 [00:10<00:03, 3.21it/s]\u001b[A\n",
+ " 69%|██████▊ | 24/35 [00:10<00:03, 3.47it/s]\u001b[A\n",
+ " 71%|███████▏ | 25/35 [00:10<00:02, 3.68it/s]\u001b[A\n",
+ " 74%|███████▍ | 26/35 [00:10<00:02, 3.68it/s]\u001b[A\n",
+ " 77%|███████▋ | 27/35 [00:11<00:02, 3.59it/s]\u001b[A\n",
+ " 80%|████████ | 28/35 [00:11<00:02, 3.18it/s]\u001b[A\n",
+ " 83%|████████▎ | 29/35 [00:11<00:02, 2.99it/s]\u001b[A\n",
+ " 86%|████████▌ | 30/35 [00:12<00:01, 3.12it/s]\u001b[A\n",
+ " 89%|████████▊ | 31/35 [00:12<00:01, 3.12it/s]\u001b[A\n",
+ " 91%|█████████▏| 32/35 [00:12<00:01, 3.00it/s]\u001b[A\n",
+ " 94%|█████████▍| 33/35 [00:13<00:00, 3.02it/s]\u001b[A\n",
+ " 97%|█████████▋| 34/35 [00:13<00:00, 3.12it/s]\u001b[A\n",
+ " 20%|██ | 1/5 [00:13<00:55, 13.86s/it]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Loading pretrained parameter \"encoder.encoder.0.cached_zero_vector\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_i.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_h.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_o.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_o.bias\".\n",
+ "Loading pretrained parameter \"encoder.encoder.1.cached_zero_vector\".\n",
+ "Loading pretrained parameter \"encoder.encoder.1.W_i.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.1.W_h.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.1.W_o.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.1.W_o.bias\".\n",
+ "Loading pretrained parameter \"ffn.1.weight\".\n",
+ "Loading pretrained parameter \"ffn.1.bias\".\n",
+ "Loading pretrained parameter \"ffn.4.weight\".\n",
+ "Loading pretrained parameter \"ffn.4.bias\".\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\n",
+ " 0%| | 0/35 [00:00, ?it/s]\u001b[A\n",
+ " 3%|▎ | 1/35 [00:01<01:02, 1.83s/it]\u001b[A\n",
+ " 6%|▌ | 2/35 [00:02<00:31, 1.04it/s]\u001b[A\n",
+ " 9%|▊ | 3/35 [00:02<00:20, 1.54it/s]\u001b[A\n",
+ " 11%|█▏ | 4/35 [00:02<00:17, 1.77it/s]\u001b[A\n",
+ " 14%|█▍ | 5/35 [00:03<00:14, 2.11it/s]\u001b[A\n",
+ " 17%|█▋ | 6/35 [00:03<00:12, 2.28it/s]\u001b[A\n",
+ " 20%|██ | 7/35 [00:04<00:12, 2.24it/s]\u001b[A\n",
+ " 23%|██▎ | 8/35 [00:04<00:10, 2.57it/s]\u001b[A\n",
+ " 26%|██▌ | 9/35 [00:04<00:09, 2.81it/s]\u001b[A\n",
+ " 29%|██▊ | 10/35 [00:04<00:08, 2.95it/s]\u001b[A\n",
+ " 31%|███▏ | 11/35 [00:05<00:07, 3.06it/s]\u001b[A\n",
+ " 34%|███▍ | 12/35 [00:05<00:07, 2.93it/s]\u001b[A\n",
+ " 37%|███▋ | 13/35 [00:06<00:08, 2.52it/s]\u001b[A\n",
+ " 40%|████ | 14/35 [00:06<00:07, 2.71it/s]\u001b[A\n",
+ " 43%|████▎ | 15/35 [00:06<00:07, 2.66it/s]\u001b[A\n",
+ " 46%|████▌ | 16/35 [00:07<00:06, 2.82it/s]\u001b[A\n",
+ " 49%|████▊ | 17/35 [00:07<00:06, 2.85it/s]\u001b[A\n",
+ " 51%|█████▏ | 18/35 [00:07<00:05, 2.91it/s]\u001b[A\n",
+ " 54%|█████▍ | 19/35 [00:07<00:04, 3.21it/s]\u001b[A\n",
+ " 57%|█████▋ | 20/35 [00:08<00:04, 3.34it/s]\u001b[A\n",
+ " 60%|██████ | 21/35 [00:08<00:04, 3.35it/s]\u001b[A\n",
+ " 63%|██████▎ | 22/35 [00:08<00:03, 3.46it/s]\u001b[A\n",
+ " 66%|██████▌ | 23/35 [00:09<00:03, 3.19it/s]\u001b[A\n",
+ " 69%|██████▊ | 24/35 [00:09<00:03, 3.42it/s]\u001b[A\n",
+ " 71%|███████▏ | 25/35 [00:09<00:02, 3.68it/s]\u001b[A\n",
+ " 74%|███████▍ | 26/35 [00:09<00:02, 3.67it/s]\u001b[A\n",
+ " 77%|███████▋ | 27/35 [00:10<00:02, 3.62it/s]\u001b[A\n",
+ " 80%|████████ | 28/35 [00:10<00:02, 3.21it/s]\u001b[A\n",
+ " 83%|████████▎ | 29/35 [00:11<00:02, 2.92it/s]\u001b[A\n",
+ " 86%|████████▌ | 30/35 [00:11<00:01, 3.07it/s]\u001b[A\n",
+ " 89%|████████▊ | 31/35 [00:11<00:01, 3.16it/s]\u001b[A\n",
+ " 91%|█████████▏| 32/35 [00:11<00:01, 2.99it/s]\u001b[A\n",
+ " 94%|█████████▍| 33/35 [00:12<00:00, 3.02it/s]\u001b[A\n",
+ " 97%|█████████▋| 34/35 [00:12<00:00, 3.09it/s]\u001b[A\n",
+ " 40%|████ | 2/5 [00:27<00:40, 13.45s/it]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Loading pretrained parameter \"encoder.encoder.0.cached_zero_vector\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_i.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_h.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_o.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_o.bias\".\n",
+ "Loading pretrained parameter \"encoder.encoder.1.cached_zero_vector\".\n",
+ "Loading pretrained parameter \"encoder.encoder.1.W_i.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.1.W_h.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.1.W_o.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.1.W_o.bias\".\n",
+ "Loading pretrained parameter \"ffn.1.weight\".\n",
+ "Loading pretrained parameter \"ffn.1.bias\".\n",
+ "Loading pretrained parameter \"ffn.4.weight\".\n",
+ "Loading pretrained parameter \"ffn.4.bias\".\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\n",
+ " 0%| | 0/35 [00:00, ?it/s]\u001b[A\n",
+ " 3%|▎ | 1/35 [00:01<01:01, 1.81s/it]\u001b[A\n",
+ " 6%|▌ | 2/35 [00:02<00:31, 1.05it/s]\u001b[A\n",
+ " 9%|▊ | 3/35 [00:02<00:20, 1.55it/s]\u001b[A\n",
+ " 11%|█▏ | 4/35 [00:02<00:16, 1.83it/s]\u001b[A\n",
+ " 14%|█▍ | 5/35 [00:03<00:13, 2.15it/s]\u001b[A\n",
+ " 17%|█▋ | 6/35 [00:03<00:12, 2.30it/s]\u001b[A\n",
+ " 20%|██ | 7/35 [00:04<00:12, 2.18it/s]\u001b[A\n",
+ " 23%|██▎ | 8/35 [00:04<00:10, 2.49it/s]\u001b[A\n",
+ " 26%|██▌ | 9/35 [00:04<00:09, 2.73it/s]\u001b[A\n",
+ " 29%|██▊ | 10/35 [00:04<00:08, 2.92it/s]\u001b[A\n",
+ " 31%|███▏ | 11/35 [00:05<00:07, 3.02it/s]\u001b[A\n",
+ " 34%|███▍ | 12/35 [00:05<00:07, 2.93it/s]\u001b[A\n",
+ " 37%|███▋ | 13/35 [00:06<00:08, 2.62it/s]\u001b[A\n",
+ " 40%|████ | 14/35 [00:06<00:07, 2.77it/s]\u001b[A\n",
+ " 43%|████▎ | 15/35 [00:06<00:07, 2.67it/s]\u001b[A\n",
+ " 46%|████▌ | 16/35 [00:07<00:06, 2.81it/s]\u001b[A\n",
+ " 49%|████▊ | 17/35 [00:07<00:06, 2.90it/s]\u001b[A\n",
+ " 51%|█████▏ | 18/35 [00:07<00:05, 2.97it/s]\u001b[A\n",
+ " 54%|█████▍ | 19/35 [00:07<00:04, 3.28it/s]\u001b[A\n",
+ " 57%|█████▋ | 20/35 [00:08<00:04, 3.39it/s]\u001b[A\n",
+ " 60%|██████ | 21/35 [00:08<00:03, 3.59it/s]\u001b[A\n",
+ " 63%|██████▎ | 22/35 [00:08<00:03, 3.69it/s]\u001b[A\n",
+ " 66%|██████▌ | 23/35 [00:09<00:03, 3.14it/s]\u001b[A\n",
+ " 69%|██████▊ | 24/35 [00:09<00:03, 3.42it/s]\u001b[A\n",
+ " 71%|███████▏ | 25/35 [00:09<00:02, 3.71it/s]\u001b[A\n",
+ " 74%|███████▍ | 26/35 [00:09<00:02, 3.68it/s]\u001b[A\n",
+ " 77%|███████▋ | 27/35 [00:10<00:02, 3.59it/s]\u001b[A\n",
+ " 80%|████████ | 28/35 [00:10<00:02, 3.18it/s]\u001b[A\n",
+ " 83%|████████▎ | 29/35 [00:10<00:01, 3.04it/s]\u001b[A\n",
+ " 86%|████████▌ | 30/35 [00:11<00:01, 3.16it/s]\u001b[A\n",
+ " 89%|████████▊ | 31/35 [00:11<00:01, 3.15it/s]\u001b[A\n",
+ " 91%|█████████▏| 32/35 [00:11<00:00, 3.01it/s]\u001b[A\n",
+ " 94%|█████████▍| 33/35 [00:12<00:00, 3.03it/s]\u001b[A\n",
+ " 97%|█████████▋| 34/35 [00:12<00:00, 3.12it/s]\u001b[A\n",
+ " 60%|██████ | 3/5 [00:39<00:26, 13.22s/it]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Loading pretrained parameter \"encoder.encoder.0.cached_zero_vector\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_i.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_h.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_o.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_o.bias\".\n",
+ "Loading pretrained parameter \"encoder.encoder.1.cached_zero_vector\".\n",
+ "Loading pretrained parameter \"encoder.encoder.1.W_i.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.1.W_h.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.1.W_o.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.1.W_o.bias\".\n",
+ "Loading pretrained parameter \"ffn.1.weight\".\n",
+ "Loading pretrained parameter \"ffn.1.bias\".\n",
+ "Loading pretrained parameter \"ffn.4.weight\".\n",
+ "Loading pretrained parameter \"ffn.4.bias\".\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\n",
+ " 0%| | 0/35 [00:00, ?it/s]\u001b[A\n",
+ " 3%|▎ | 1/35 [00:01<01:01, 1.82s/it]\u001b[A\n",
+ " 6%|▌ | 2/35 [00:02<00:31, 1.05it/s]\u001b[A\n",
+ " 9%|▊ | 3/35 [00:02<00:20, 1.54it/s]\u001b[A\n",
+ " 11%|█▏ | 4/35 [00:02<00:17, 1.81it/s]\u001b[A\n",
+ " 14%|█▍ | 5/35 [00:03<00:13, 2.16it/s]\u001b[A\n",
+ " 17%|█▋ | 6/35 [00:03<00:12, 2.32it/s]\u001b[A\n",
+ " 20%|██ | 7/35 [00:03<00:12, 2.31it/s]\u001b[A\n",
+ " 23%|██▎ | 8/35 [00:04<00:10, 2.61it/s]\u001b[A\n",
+ " 26%|██▌ | 9/35 [00:04<00:09, 2.82it/s]\u001b[A\n",
+ " 29%|██▊ | 10/35 [00:04<00:08, 3.00it/s]\u001b[A\n",
+ " 31%|███▏ | 11/35 [00:05<00:07, 3.10it/s]\u001b[A\n",
+ " 34%|███▍ | 12/35 [00:05<00:07, 2.89it/s]\u001b[A\n",
+ " 37%|███▋ | 13/35 [00:05<00:08, 2.59it/s]\u001b[A\n",
+ " 40%|████ | 14/35 [00:06<00:07, 2.75it/s]\u001b[A\n",
+ " 43%|████▎ | 15/35 [00:06<00:07, 2.71it/s]\u001b[A\n",
+ " 46%|████▌ | 16/35 [00:07<00:06, 2.84it/s]\u001b[A\n",
+ " 49%|████▊ | 17/35 [00:07<00:06, 2.91it/s]\u001b[A\n",
+ " 51%|█████▏ | 18/35 [00:07<00:05, 2.96it/s]\u001b[A\n",
+ " 54%|█████▍ | 19/35 [00:07<00:05, 3.13it/s]\u001b[A\n",
+ " 57%|█████▋ | 20/35 [00:08<00:04, 3.28it/s]\u001b[A\n",
+ " 60%|██████ | 21/35 [00:08<00:04, 3.28it/s]\u001b[A\n",
+ " 63%|██████▎ | 22/35 [00:08<00:03, 3.44it/s]\u001b[A\n",
+ " 66%|██████▌ | 23/35 [00:09<00:03, 3.20it/s]\u001b[A\n",
+ " 69%|██████▊ | 24/35 [00:09<00:03, 3.48it/s]\u001b[A\n",
+ " 71%|███████▏ | 25/35 [00:09<00:02, 3.74it/s]\u001b[A\n",
+ " 74%|███████▍ | 26/35 [00:09<00:02, 3.73it/s]\u001b[A\n",
+ " 77%|███████▋ | 27/35 [00:10<00:02, 3.67it/s]\u001b[A\n",
+ " 80%|████████ | 28/35 [00:10<00:02, 3.26it/s]\u001b[A\n",
+ " 83%|████████▎ | 29/35 [00:10<00:02, 2.99it/s]\u001b[A\n",
+ " 86%|████████▌ | 30/35 [00:11<00:01, 3.13it/s]\u001b[A\n",
+ " 89%|████████▊ | 31/35 [00:11<00:01, 3.23it/s]\u001b[A\n",
+ " 91%|█████████▏| 32/35 [00:11<00:00, 3.04it/s]\u001b[A\n",
+ " 94%|█████████▍| 33/35 [00:12<00:00, 3.03it/s]\u001b[A\n",
+ " 97%|█████████▋| 34/35 [00:12<00:00, 3.11it/s]\u001b[A\n",
+ " 80%|████████ | 4/5 [00:52<00:13, 13.10s/it]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Loading pretrained parameter \"encoder.encoder.0.cached_zero_vector\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_i.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_h.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_o.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_o.bias\".\n",
+ "Loading pretrained parameter \"encoder.encoder.1.cached_zero_vector\".\n",
+ "Loading pretrained parameter \"encoder.encoder.1.W_i.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.1.W_h.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.1.W_o.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.1.W_o.bias\".\n",
+ "Loading pretrained parameter \"ffn.1.weight\".\n",
+ "Loading pretrained parameter \"ffn.1.bias\".\n",
+ "Loading pretrained parameter \"ffn.4.weight\".\n",
+ "Loading pretrained parameter \"ffn.4.bias\".\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\n",
+ " 0%| | 0/35 [00:00, ?it/s]\u001b[A\n",
+ " 3%|▎ | 1/35 [00:01<01:01, 1.79s/it]\u001b[A\n",
+ " 6%|▌ | 2/35 [00:02<00:31, 1.05it/s]\u001b[A\n",
+ " 9%|▊ | 3/35 [00:02<00:20, 1.55it/s]\u001b[A\n",
+ " 11%|█▏ | 4/35 [00:02<00:16, 1.83it/s]\u001b[A\n",
+ " 14%|█▍ | 5/35 [00:03<00:13, 2.17it/s]\u001b[A\n",
+ " 17%|█▋ | 6/35 [00:03<00:12, 2.32it/s]\u001b[A\n",
+ " 20%|██ | 7/35 [00:03<00:12, 2.26it/s]\u001b[A\n",
+ " 23%|██▎ | 8/35 [00:04<00:10, 2.56it/s]\u001b[A\n",
+ " 26%|██▌ | 9/35 [00:04<00:09, 2.82it/s]\u001b[A\n",
+ " 29%|██▊ | 10/35 [00:04<00:08, 2.95it/s]\u001b[A\n",
+ " 31%|███▏ | 11/35 [00:05<00:08, 2.91it/s]\u001b[A\n",
+ " 34%|███▍ | 12/35 [00:05<00:08, 2.86it/s]\u001b[A\n",
+ " 37%|███▋ | 13/35 [00:06<00:08, 2.62it/s]\u001b[A\n",
+ " 40%|████ | 14/35 [00:06<00:07, 2.78it/s]\u001b[A\n",
+ " 43%|████▎ | 15/35 [00:06<00:07, 2.73it/s]\u001b[A\n",
+ " 46%|████▌ | 16/35 [00:07<00:06, 2.87it/s]\u001b[A\n",
+ " 49%|████▊ | 17/35 [00:07<00:06, 2.92it/s]\u001b[A\n",
+ " 51%|█████▏ | 18/35 [00:07<00:05, 2.99it/s]\u001b[A\n",
+ " 54%|█████▍ | 19/35 [00:07<00:05, 3.08it/s]\u001b[A\n",
+ " 57%|█████▋ | 20/35 [00:08<00:04, 3.23it/s]\u001b[A\n",
+ " 60%|██████ | 21/35 [00:08<00:04, 3.31it/s]\u001b[A\n",
+ " 63%|██████▎ | 22/35 [00:08<00:03, 3.46it/s]\u001b[A\n",
+ " 66%|██████▌ | 23/35 [00:09<00:03, 3.11it/s]\u001b[A\n",
+ " 69%|██████▊ | 24/35 [00:09<00:03, 3.37it/s]\u001b[A\n",
+ " 71%|███████▏ | 25/35 [00:09<00:02, 3.63it/s]\u001b[A\n",
+ " 74%|███████▍ | 26/35 [00:09<00:02, 3.60it/s]\u001b[A\n",
+ " 77%|███████▋ | 27/35 [00:10<00:02, 3.48it/s]\u001b[A\n",
+ " 80%|████████ | 28/35 [00:10<00:02, 3.12it/s]\u001b[A\n",
+ " 83%|████████▎ | 29/35 [00:11<00:02, 2.92it/s]\u001b[A\n",
+ " 86%|████████▌ | 30/35 [00:11<00:01, 3.07it/s]\u001b[A\n",
+ " 89%|████████▊ | 31/35 [00:11<00:01, 3.16it/s]\u001b[A\n",
+ " 91%|█████████▏| 32/35 [00:11<00:00, 3.01it/s]\u001b[A\n",
+ " 94%|█████████▍| 33/35 [00:12<00:00, 3.04it/s]\u001b[A\n",
+ " 97%|█████████▋| 34/35 [00:12<00:00, 3.11it/s]\u001b[A\n",
+ "100%|██████████| 5/5 [01:05<00:00, 13.18s/it]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Saving predictions to /dev/null\n",
+ "Elapsed time = 0:01:06\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\n"
+ ]
+ },
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/html": [
+ "\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " smiles | \n",
+ " solvent | \n",
+ " peakwavs_max | \n",
+ " peakwavs_max_pred | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " CCN(CC)c1ccc2c(C(F)(F)F)cc(=O)oc2c1 | \n",
+ " C1CCCCC1 | \n",
+ " 376.0 | \n",
+ " 382.903437 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " CCN(CC)c1ccc2c(C(F)(F)F)cc(=O)oc2c1 | \n",
+ " CCOC(C)=O | \n",
+ " 392.0 | \n",
+ " 395.478472 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " CCN(CC)c1ccc2c(C(F)(F)F)cc(=O)oc2c1 | \n",
+ " CC#N | \n",
+ " 396.0 | \n",
+ " 400.821401 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " CCN(CC)c1ccc2c(C(F)(F)F)cc(=O)oc2c1 | \n",
+ " CCO | \n",
+ " 400.0 | \n",
+ " 418.106349 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " CCN(CC)c1ccc2c(C(F)(F)F)cc(=O)oc2c1 | \n",
+ " OCC(O)CO | \n",
+ " 413.0 | \n",
+ " 426.706045 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 1705 | \n",
+ " c1cc2c3ccc[n+]4cccc(c5ccc[n+](c1)c25)c34 | \n",
+ " C[N+](=O)[O-] | \n",
+ " 424.0 | \n",
+ " 474.872657 | \n",
+ "
\n",
+ " \n",
+ " 1706 | \n",
+ " c1cc2c3ccc[n+]4cccc(c5ccc[n+](c1)c25)c34 | \n",
+ " CS(C)=O | \n",
+ " 432.0 | \n",
+ " 466.555822 | \n",
+ "
\n",
+ " \n",
+ " 1707 | \n",
+ " COc1cc(C)c(-c2cc(-c3c(C)cc(OC)cc3C)c3ccc4c(-c5... | \n",
+ " ClCCl | \n",
+ " 367.0 | \n",
+ " 365.403474 | \n",
+ "
\n",
+ " \n",
+ " 1708 | \n",
+ " N#Cc1c(N2CCCCC2)cc(-c2cccc3ccccc23)c2c1-c1cccc... | \n",
+ " C1CCOC1 | \n",
+ " 358.0 | \n",
+ " 349.835862 | \n",
+ "
\n",
+ " \n",
+ " 1709 | \n",
+ " N#Cc1c(N2CCCC2)cc(-c2ccccc2)c2c1Cc1ccccc1-2 | \n",
+ " C1CCOC1 | \n",
+ " 382.0 | \n",
+ " 352.427928 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
1710 rows × 4 columns
\n",
+ "
\n",
+ "
\n",
+ " \n",
+ " \n",
+ "\n",
+ " \n",
+ "
\n",
+ "
\n",
+ " "
+ ],
+ "text/plain": [
+ " smiles ... peakwavs_max_pred\n",
+ "0 CCN(CC)c1ccc2c(C(F)(F)F)cc(=O)oc2c1 ... 382.903437\n",
+ "1 CCN(CC)c1ccc2c(C(F)(F)F)cc(=O)oc2c1 ... 395.478472\n",
+ "2 CCN(CC)c1ccc2c(C(F)(F)F)cc(=O)oc2c1 ... 400.821401\n",
+ "3 CCN(CC)c1ccc2c(C(F)(F)F)cc(=O)oc2c1 ... 418.106349\n",
+ "4 CCN(CC)c1ccc2c(C(F)(F)F)cc(=O)oc2c1 ... 426.706045\n",
+ "... ... ... ...\n",
+ "1705 c1cc2c3ccc[n+]4cccc(c5ccc[n+](c1)c25)c34 ... 474.872657\n",
+ "1706 c1cc2c3ccc[n+]4cccc(c5ccc[n+](c1)c25)c34 ... 466.555822\n",
+ "1707 COc1cc(C)c(-c2cc(-c3c(C)cc(OC)cc3C)c3ccc4c(-c5... ... 365.403474\n",
+ "1708 N#Cc1c(N2CCCCC2)cc(-c2cccc3ccccc23)c2c1-c1cccc... ... 349.835862\n",
+ "1709 N#Cc1c(N2CCCC2)cc(-c2ccccc2)c2c1Cc1ccccc1-2 ... 352.427928\n",
+ "\n",
+ "[1710 rows x 4 columns]"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 6
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "## Predict experimental peak with multi-fidelity model"
+ ],
+ "metadata": {
+ "id": "ha40pwkQKZ2F"
+ }
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "**Equivalent to command line:**\n",
+ "\n",
+ "python uvvisml/predict.py --test_file uvvisml/data/splits/lambda_max_abs/deep4chem/group_by_smiles/smiles_target_test.csv --property absorption_peak_nm_expt --method chemprop_tddft --preds_file test_preds.csv"
+ ],
+ "metadata": {
+ "id": "8IpT4M5CJjln"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "# TDDFT Predictions\n",
+ "arguments = [\n",
+ " '--test_path', test_file,\n",
+ " '--preds_path', 'test_tddft_preds.csv',\n",
+ " '--checkpoint_dir', 'models/lambda_max_abs_wb97xd3/chemprop/all_wb97xd3/production/fold_0',\n",
+ " '--number_of_molecules', '1',\n",
+ " #'--gpu', '0'\n",
+ "]\n",
+ "\n",
+ "args = chemprop.args.PredictArgs().parse_args(arguments)\n",
+ "_ = chemprop.train.make_predictions(args=args)\n",
+ "\n",
+ "# Convert Predictions to Features File\n",
+ "!python models/tddft_to_features_file.py\n",
+ "\n",
+ "# Experimental Predictions\n",
+ "arguments = [\n",
+ " '--test_path', test_file,\n",
+ " '--preds_path', '/dev/null',\n",
+ " '--checkpoint_dir', 'models/lambda_max_abs/chemprop_tddft/combined/production/fold_0',\n",
+ " '--number_of_molecules', '2',\n",
+ " '--features_path', 'features_test.csv'\n",
+ " #'--gpu', '0'\n",
+ "]\n",
+ "\n",
+ "args = chemprop.args.PredictArgs().parse_args(arguments)\n",
+ "preds = chemprop.train.make_predictions(args=args)\n",
+ "\n",
+ "preds = [x[0] for x in preds]\n",
+ "df['peakwavs_max_pred'] = preds\n",
+ "df"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 1000
+ },
+ "id": "6OuRAYrKJjaa",
+ "outputId": "31d1f5a5-4355-4bbb-d3ea-d0be94169167"
+ },
+ "execution_count": 7,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Loading training args\n",
+ "Setting molecule featurization parameters to default.\n",
+ "Loading data\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "1710it [00:00, 134827.05it/s]\n",
+ "100%|██████████| 1710/1710 [00:00<00:00, 136684.77it/s]\n",
+ "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py:481: UserWarning: This DataLoader will create 8 worker processes in total. Our suggested max number of worker in current system is 2, which is smaller than what this DataLoader is going to create. Please be aware that excessive worker creation might get DataLoader running slow or even freeze, lower the worker number to avoid potential slowness/freeze if necessary.\n",
+ " cpuset_checked))\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Validating SMILES\n",
+ "Test size = 1,710\n",
+ "Predicting with an ensemble of 5 models\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\r 0%| | 0/5 [00:00, ?it/s]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Loading pretrained parameter \"encoder.encoder.0.cached_zero_vector\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_i.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_h.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_o.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_o.bias\".\n",
+ "Loading pretrained parameter \"ffn.1.weight\".\n",
+ "Loading pretrained parameter \"ffn.1.bias\".\n",
+ "Loading pretrained parameter \"ffn.4.weight\".\n",
+ "Loading pretrained parameter \"ffn.4.bias\".\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\n",
+ " 0%| | 0/35 [00:00, ?it/s]\u001b[A\n",
+ " 3%|▎ | 1/35 [00:01<00:49, 1.45s/it]\u001b[A\n",
+ " 6%|▌ | 2/35 [00:01<00:24, 1.35it/s]\u001b[A\n",
+ " 9%|▊ | 3/35 [00:01<00:15, 2.11it/s]\u001b[A\n",
+ " 11%|█▏ | 4/35 [00:02<00:11, 2.60it/s]\u001b[A\n",
+ " 14%|█▍ | 5/35 [00:02<00:09, 3.20it/s]\u001b[A\n",
+ " 17%|█▋ | 6/35 [00:02<00:08, 3.61it/s]\u001b[A\n",
+ " 20%|██ | 7/35 [00:02<00:07, 3.52it/s]\u001b[A\n",
+ " 23%|██▎ | 8/35 [00:02<00:06, 4.04it/s]\u001b[A\n",
+ " 26%|██▌ | 9/35 [00:03<00:05, 4.52it/s]\u001b[A\n",
+ " 29%|██▊ | 10/35 [00:03<00:05, 4.92it/s]\u001b[A\n",
+ " 31%|███▏ | 11/35 [00:03<00:04, 5.04it/s]\u001b[A\n",
+ " 34%|███▍ | 12/35 [00:03<00:05, 4.58it/s]\u001b[A\n",
+ " 37%|███▋ | 13/35 [00:04<00:05, 4.22it/s]\u001b[A\n",
+ " 40%|████ | 14/35 [00:04<00:04, 4.47it/s]\u001b[A\n",
+ " 43%|████▎ | 15/35 [00:04<00:04, 4.41it/s]\u001b[A\n",
+ " 46%|████▌ | 16/35 [00:04<00:04, 4.74it/s]\u001b[A\n",
+ " 49%|████▊ | 17/35 [00:04<00:03, 4.82it/s]\u001b[A\n",
+ " 51%|█████▏ | 18/35 [00:05<00:03, 4.55it/s]\u001b[A\n",
+ " 54%|█████▍ | 19/35 [00:05<00:03, 4.99it/s]\u001b[A\n",
+ " 57%|█████▋ | 20/35 [00:05<00:03, 4.90it/s]\u001b[A\n",
+ " 60%|██████ | 21/35 [00:05<00:02, 5.44it/s]\u001b[A\n",
+ " 63%|██████▎ | 22/35 [00:05<00:02, 5.83it/s]\u001b[A\n",
+ " 66%|██████▌ | 23/35 [00:05<00:02, 5.39it/s]\u001b[A\n",
+ " 69%|██████▊ | 24/35 [00:06<00:01, 5.88it/s]\u001b[A\n",
+ " 71%|███████▏ | 25/35 [00:06<00:01, 6.42it/s]\u001b[A\n",
+ " 74%|███████▍ | 26/35 [00:06<00:01, 6.13it/s]\u001b[A\n",
+ " 77%|███████▋ | 27/35 [00:06<00:01, 6.04it/s]\u001b[A\n",
+ " 80%|████████ | 28/35 [00:06<00:01, 5.22it/s]\u001b[A\n",
+ " 83%|████████▎ | 29/35 [00:07<00:01, 5.03it/s]\u001b[A\n",
+ " 86%|████████▌ | 30/35 [00:07<00:00, 5.29it/s]\u001b[A\n",
+ " 89%|████████▊ | 31/35 [00:07<00:00, 5.50it/s]\u001b[A\n",
+ " 91%|█████████▏| 32/35 [00:07<00:00, 5.22it/s]\u001b[A\n",
+ " 94%|█████████▍| 33/35 [00:07<00:00, 5.28it/s]\u001b[A\n",
+ " 97%|█████████▋| 34/35 [00:07<00:00, 5.36it/s]\u001b[A\n",
+ " 20%|██ | 1/5 [00:08<00:32, 8.24s/it]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Loading pretrained parameter \"encoder.encoder.0.cached_zero_vector\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_i.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_h.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_o.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_o.bias\".\n",
+ "Loading pretrained parameter \"ffn.1.weight\".\n",
+ "Loading pretrained parameter \"ffn.1.bias\".\n",
+ "Loading pretrained parameter \"ffn.4.weight\".\n",
+ "Loading pretrained parameter \"ffn.4.bias\".\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\n",
+ " 0%| | 0/35 [00:00, ?it/s]\u001b[A\n",
+ " 3%|▎ | 1/35 [00:01<00:48, 1.44s/it]\u001b[A\n",
+ " 6%|▌ | 2/35 [00:01<00:24, 1.36it/s]\u001b[A\n",
+ " 9%|▊ | 3/35 [00:01<00:15, 2.11it/s]\u001b[A\n",
+ " 11%|█▏ | 4/35 [00:02<00:11, 2.69it/s]\u001b[A\n",
+ " 14%|█▍ | 5/35 [00:02<00:09, 3.28it/s]\u001b[A\n",
+ " 17%|█▋ | 6/35 [00:02<00:07, 3.63it/s]\u001b[A\n",
+ " 20%|██ | 7/35 [00:02<00:07, 3.54it/s]\u001b[A\n",
+ " 23%|██▎ | 8/35 [00:02<00:06, 4.08it/s]\u001b[A\n",
+ " 26%|██▌ | 9/35 [00:03<00:05, 4.51it/s]\u001b[A\n",
+ " 29%|██▊ | 10/35 [00:03<00:05, 4.88it/s]\u001b[A\n",
+ " 31%|███▏ | 11/35 [00:03<00:04, 4.97it/s]\u001b[A\n",
+ " 34%|███▍ | 12/35 [00:03<00:04, 4.74it/s]\u001b[A\n",
+ " 37%|███▋ | 13/35 [00:03<00:05, 4.33it/s]\u001b[A\n",
+ " 40%|████ | 14/35 [00:04<00:04, 4.57it/s]\u001b[A\n",
+ " 43%|████▎ | 15/35 [00:04<00:04, 4.31it/s]\u001b[A\n",
+ " 46%|████▌ | 16/35 [00:04<00:04, 4.69it/s]\u001b[A\n",
+ " 49%|████▊ | 17/35 [00:04<00:03, 4.68it/s]\u001b[A\n",
+ " 51%|█████▏ | 18/35 [00:05<00:03, 4.50it/s]\u001b[A\n",
+ " 54%|█████▍ | 19/35 [00:05<00:03, 5.04it/s]\u001b[A\n",
+ " 57%|█████▋ | 20/35 [00:05<00:02, 5.36it/s]\u001b[A\n",
+ " 60%|██████ | 21/35 [00:05<00:02, 5.76it/s]\u001b[A\n",
+ " 63%|██████▎ | 22/35 [00:05<00:02, 6.08it/s]\u001b[A\n",
+ " 66%|██████▌ | 23/35 [00:05<00:02, 5.07it/s]\u001b[A\n",
+ " 69%|██████▊ | 24/35 [00:06<00:01, 5.56it/s]\u001b[A\n",
+ " 71%|███████▏ | 25/35 [00:06<00:01, 6.09it/s]\u001b[A\n",
+ " 74%|███████▍ | 26/35 [00:06<00:01, 6.19it/s]\u001b[A\n",
+ " 77%|███████▋ | 27/35 [00:06<00:01, 6.17it/s]\u001b[A\n",
+ " 80%|████████ | 28/35 [00:06<00:01, 5.38it/s]\u001b[A\n",
+ " 83%|████████▎ | 29/35 [00:06<00:01, 5.15it/s]\u001b[A\n",
+ " 86%|████████▌ | 30/35 [00:07<00:00, 5.46it/s]\u001b[A\n",
+ " 89%|████████▊ | 31/35 [00:07<00:00, 5.60it/s]\u001b[A\n",
+ " 91%|█████████▏| 32/35 [00:07<00:00, 5.33it/s]\u001b[A\n",
+ " 94%|█████████▍| 33/35 [00:07<00:00, 5.39it/s]\u001b[A\n",
+ " 97%|█████████▋| 34/35 [00:07<00:00, 5.47it/s]\u001b[A\n",
+ " 40%|████ | 2/5 [00:16<00:24, 8.20s/it]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Loading pretrained parameter \"encoder.encoder.0.cached_zero_vector\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_i.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_h.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_o.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_o.bias\".\n",
+ "Loading pretrained parameter \"ffn.1.weight\".\n",
+ "Loading pretrained parameter \"ffn.1.bias\".\n",
+ "Loading pretrained parameter \"ffn.4.weight\".\n",
+ "Loading pretrained parameter \"ffn.4.bias\".\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\n",
+ " 0%| | 0/35 [00:00, ?it/s]\u001b[A\n",
+ " 3%|▎ | 1/35 [00:07<04:04, 7.21s/it]\u001b[A\n",
+ " 6%|▌ | 2/35 [00:07<01:47, 3.25s/it]\u001b[A\n",
+ " 9%|▊ | 3/35 [00:07<00:59, 1.87s/it]\u001b[A\n",
+ " 11%|█▏ | 4/35 [00:08<00:38, 1.23s/it]\u001b[A\n",
+ " 14%|█▍ | 5/35 [00:08<00:25, 1.17it/s]\u001b[A\n",
+ " 17%|█▋ | 6/35 [00:08<00:18, 1.57it/s]\u001b[A\n",
+ " 20%|██ | 7/35 [00:08<00:14, 1.88it/s]\u001b[A\n",
+ " 23%|██▎ | 8/35 [00:09<00:11, 2.40it/s]\u001b[A\n",
+ " 26%|██▌ | 9/35 [00:09<00:08, 2.97it/s]\u001b[A\n",
+ " 29%|██▊ | 10/35 [00:09<00:07, 3.51it/s]\u001b[A\n",
+ " 31%|███▏ | 11/35 [00:09<00:06, 3.86it/s]\u001b[A\n",
+ " 34%|███▍ | 12/35 [00:09<00:05, 4.04it/s]\u001b[A\n",
+ " 37%|███▋ | 13/35 [00:10<00:06, 3.66it/s]\u001b[A\n",
+ " 40%|████ | 14/35 [00:10<00:05, 4.00it/s]\u001b[A\n",
+ " 43%|████▎ | 15/35 [00:10<00:05, 3.95it/s]\u001b[A\n",
+ " 46%|████▌ | 16/35 [00:10<00:04, 4.40it/s]\u001b[A\n",
+ " 49%|████▊ | 17/35 [00:10<00:03, 4.59it/s]\u001b[A\n",
+ " 51%|█████▏ | 18/35 [00:11<00:03, 4.34it/s]\u001b[A\n",
+ " 54%|█████▍ | 19/35 [00:11<00:03, 4.72it/s]\u001b[A\n",
+ " 57%|█████▋ | 20/35 [00:11<00:02, 5.13it/s]\u001b[A\n",
+ " 60%|██████ | 21/35 [00:11<00:02, 5.43it/s]\u001b[A\n",
+ " 63%|██████▎ | 22/35 [00:11<00:02, 5.86it/s]\u001b[A\n",
+ " 66%|██████▌ | 23/35 [00:12<00:02, 5.23it/s]\u001b[A\n",
+ " 69%|██████▊ | 24/35 [00:12<00:01, 5.77it/s]\u001b[A\n",
+ " 71%|███████▏ | 25/35 [00:12<00:01, 6.45it/s]\u001b[A\n",
+ " 74%|███████▍ | 26/35 [00:12<00:01, 6.46it/s]\u001b[A\n",
+ " 77%|███████▋ | 27/35 [00:12<00:01, 6.39it/s]\u001b[A\n",
+ " 80%|████████ | 28/35 [00:12<00:01, 5.63it/s]\u001b[A\n",
+ " 83%|████████▎ | 29/35 [00:13<00:01, 5.12it/s]\u001b[A\n",
+ " 86%|████████▌ | 30/35 [00:13<00:00, 5.33it/s]\u001b[A\n",
+ " 89%|████████▊ | 31/35 [00:13<00:00, 5.45it/s]\u001b[A\n",
+ " 91%|█████████▏| 32/35 [00:13<00:00, 5.21it/s]\u001b[A\n",
+ " 94%|█████████▍| 33/35 [00:13<00:00, 5.24it/s]\u001b[A\n",
+ " 97%|█████████▋| 34/35 [00:14<00:00, 5.32it/s]\u001b[A\n",
+ " 60%|██████ | 3/5 [00:31<00:22, 11.33s/it]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Loading pretrained parameter \"encoder.encoder.0.cached_zero_vector\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_i.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_h.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_o.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_o.bias\".\n",
+ "Loading pretrained parameter \"ffn.1.weight\".\n",
+ "Loading pretrained parameter \"ffn.1.bias\".\n",
+ "Loading pretrained parameter \"ffn.4.weight\".\n",
+ "Loading pretrained parameter \"ffn.4.bias\".\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\n",
+ " 0%| | 0/35 [00:00, ?it/s]\u001b[A\n",
+ " 3%|▎ | 1/35 [00:01<00:53, 1.57s/it]\u001b[A\n",
+ " 6%|▌ | 2/35 [00:01<00:26, 1.25it/s]\u001b[A\n",
+ " 9%|▊ | 3/35 [00:02<00:16, 1.95it/s]\u001b[A\n",
+ " 11%|█▏ | 4/35 [00:02<00:12, 2.49it/s]\u001b[A\n",
+ " 14%|█▍ | 5/35 [00:02<00:09, 3.09it/s]\u001b[A\n",
+ " 17%|█▋ | 6/35 [00:02<00:08, 3.46it/s]\u001b[A\n",
+ " 20%|██ | 7/35 [00:02<00:08, 3.36it/s]\u001b[A\n",
+ " 23%|██▎ | 8/35 [00:03<00:06, 3.90it/s]\u001b[A\n",
+ " 26%|██▌ | 9/35 [00:03<00:05, 4.35it/s]\u001b[A\n",
+ " 29%|██▊ | 10/35 [00:03<00:05, 4.57it/s]\u001b[A\n",
+ " 31%|███▏ | 11/35 [00:03<00:05, 4.78it/s]\u001b[A\n",
+ " 34%|███▍ | 12/35 [00:03<00:04, 4.67it/s]\u001b[A\n",
+ " 37%|███▋ | 13/35 [00:04<00:05, 4.09it/s]\u001b[A\n",
+ " 40%|████ | 14/35 [00:04<00:04, 4.33it/s]\u001b[A\n",
+ " 43%|████▎ | 15/35 [00:04<00:05, 3.98it/s]\u001b[A\n",
+ " 46%|████▌ | 16/35 [00:04<00:04, 4.02it/s]\u001b[A\n",
+ " 49%|████▊ | 17/35 [00:05<00:04, 4.01it/s]\u001b[A\n",
+ " 51%|█████▏ | 18/35 [00:05<00:04, 3.57it/s]\u001b[A\n",
+ " 54%|█████▍ | 19/35 [00:05<00:04, 3.90it/s]\u001b[A\n",
+ " 57%|█████▋ | 20/35 [00:06<00:03, 3.94it/s]\u001b[A\n",
+ " 60%|██████ | 21/35 [00:06<00:03, 4.52it/s]\u001b[A\n",
+ " 63%|██████▎ | 22/35 [00:06<00:02, 4.97it/s]\u001b[A\n",
+ " 66%|██████▌ | 23/35 [00:06<00:02, 4.65it/s]\u001b[A\n",
+ " 69%|██████▊ | 24/35 [00:06<00:02, 5.18it/s]\u001b[A\n",
+ " 71%|███████▏ | 25/35 [00:06<00:01, 5.72it/s]\u001b[A\n",
+ " 74%|███████▍ | 26/35 [00:07<00:01, 5.71it/s]\u001b[A\n",
+ " 77%|███████▋ | 27/35 [00:07<00:01, 5.63it/s]\u001b[A\n",
+ " 80%|████████ | 28/35 [00:07<00:01, 5.09it/s]\u001b[A\n",
+ " 83%|████████▎ | 29/35 [00:07<00:01, 4.84it/s]\u001b[A\n",
+ " 86%|████████▌ | 30/35 [00:07<00:00, 5.08it/s]\u001b[A\n",
+ " 89%|████████▊ | 31/35 [00:08<00:00, 4.99it/s]\u001b[A\n",
+ " 91%|█████████▏| 32/35 [00:08<00:00, 4.78it/s]\u001b[A\n",
+ " 94%|█████████▍| 33/35 [00:08<00:00, 4.92it/s]\u001b[A\n",
+ " 97%|█████████▋| 34/35 [00:08<00:00, 4.98it/s]\u001b[A\n",
+ " 80%|████████ | 4/5 [00:40<00:10, 10.40s/it]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Loading pretrained parameter \"encoder.encoder.0.cached_zero_vector\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_i.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_h.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_o.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_o.bias\".\n",
+ "Loading pretrained parameter \"ffn.1.weight\".\n",
+ "Loading pretrained parameter \"ffn.1.bias\".\n",
+ "Loading pretrained parameter \"ffn.4.weight\".\n",
+ "Loading pretrained parameter \"ffn.4.bias\".\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\n",
+ " 0%| | 0/35 [00:00, ?it/s]\u001b[A\n",
+ " 3%|▎ | 1/35 [00:01<00:49, 1.46s/it]\u001b[A\n",
+ " 6%|▌ | 2/35 [00:01<00:24, 1.37it/s]\u001b[A\n",
+ " 9%|▊ | 3/35 [00:01<00:15, 2.09it/s]\u001b[A\n",
+ " 11%|█▏ | 4/35 [00:02<00:12, 2.55it/s]\u001b[A\n",
+ " 14%|█▍ | 5/35 [00:02<00:09, 3.13it/s]\u001b[A\n",
+ " 17%|█▋ | 6/35 [00:02<00:08, 3.49it/s]\u001b[A\n",
+ " 20%|██ | 7/35 [00:02<00:08, 3.40it/s]\u001b[A\n",
+ " 23%|██▎ | 8/35 [00:03<00:07, 3.55it/s]\u001b[A\n",
+ " 26%|██▌ | 9/35 [00:03<00:06, 4.04it/s]\u001b[A\n",
+ " 29%|██▊ | 10/35 [00:03<00:05, 4.48it/s]\u001b[A\n",
+ " 31%|███▏ | 11/35 [00:03<00:05, 4.69it/s]\u001b[A\n",
+ " 34%|███▍ | 12/35 [00:04<00:06, 3.75it/s]\u001b[A\n",
+ " 37%|███▋ | 13/35 [00:04<00:06, 3.39it/s]\u001b[A\n",
+ " 40%|████ | 14/35 [00:04<00:06, 3.35it/s]\u001b[A\n",
+ " 43%|████▎ | 15/35 [00:05<00:06, 3.06it/s]\u001b[A\n",
+ " 46%|████▌ | 16/35 [00:05<00:05, 3.52it/s]\u001b[A\n",
+ " 49%|████▊ | 17/35 [00:05<00:04, 3.99it/s]\u001b[A\n",
+ " 51%|█████▏ | 18/35 [00:05<00:05, 3.36it/s]\u001b[A\n",
+ " 54%|█████▍ | 19/35 [00:06<00:04, 3.35it/s]\u001b[A\n",
+ " 57%|█████▋ | 20/35 [00:06<00:04, 3.39it/s]\u001b[A\n",
+ " 60%|██████ | 21/35 [00:06<00:03, 4.00it/s]\u001b[A\n",
+ " 63%|██████▎ | 22/35 [00:06<00:02, 4.57it/s]\u001b[A\n",
+ " 66%|██████▌ | 23/35 [00:06<00:02, 4.53it/s]\u001b[A\n",
+ " 69%|██████▊ | 24/35 [00:07<00:02, 5.06it/s]\u001b[A\n",
+ " 71%|███████▏ | 25/35 [00:07<00:01, 5.56it/s]\u001b[A\n",
+ " 74%|███████▍ | 26/35 [00:07<00:01, 5.68it/s]\u001b[A\n",
+ " 77%|███████▋ | 27/35 [00:07<00:01, 5.68it/s]\u001b[A\n",
+ " 80%|████████ | 28/35 [00:07<00:01, 4.88it/s]\u001b[A\n",
+ " 83%|████████▎ | 29/35 [00:08<00:01, 4.74it/s]\u001b[A\n",
+ " 86%|████████▌ | 30/35 [00:08<00:01, 4.90it/s]\u001b[A\n",
+ " 89%|████████▊ | 31/35 [00:08<00:00, 5.15it/s]\u001b[A\n",
+ " 91%|█████████▏| 32/35 [00:08<00:00, 4.86it/s]\u001b[A\n",
+ " 94%|█████████▍| 33/35 [00:08<00:00, 4.97it/s]\u001b[A\n",
+ " 97%|█████████▋| 34/35 [00:09<00:00, 5.05it/s]\u001b[A\n",
+ "100%|██████████| 5/5 [00:49<00:00, 9.98s/it]\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Saving predictions to test_tddft_preds.csv\n",
+ "Elapsed time = 0:00:50\n",
+ "Loading training args\n",
+ "Setting molecule featurization parameters to default.\n",
+ "Loading data\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "1710it [00:00, 121014.04it/s]\n",
+ "100%|██████████| 1710/1710 [00:00<00:00, 82896.17it/s]\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Validating SMILES\n",
+ "Test size = 1,710\n",
+ "Predicting with an ensemble of 5 models\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\r 0%| | 0/5 [00:00, ?it/s]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Loading pretrained parameter \"encoder.encoder.0.cached_zero_vector\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_i.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_h.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_o.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_o.bias\".\n",
+ "Loading pretrained parameter \"encoder.encoder.1.cached_zero_vector\".\n",
+ "Loading pretrained parameter \"encoder.encoder.1.W_i.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.1.W_h.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.1.W_o.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.1.W_o.bias\".\n",
+ "Loading pretrained parameter \"ffn.1.weight\".\n",
+ "Loading pretrained parameter \"ffn.1.bias\".\n",
+ "Loading pretrained parameter \"ffn.4.weight\".\n",
+ "Loading pretrained parameter \"ffn.4.bias\".\n",
+ "Loading pretrained parameter \"ffn.7.weight\".\n",
+ "Loading pretrained parameter \"ffn.7.bias\".\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\n",
+ " 0%| | 0/35 [00:00, ?it/s]\u001b[A\n",
+ " 3%|▎ | 1/35 [00:04<02:26, 4.30s/it]\u001b[A\n",
+ " 6%|▌ | 2/35 [00:07<01:56, 3.53s/it]\u001b[A\n",
+ " 9%|▊ | 3/35 [00:09<01:38, 3.09s/it]\u001b[A\n",
+ " 11%|█▏ | 4/35 [00:13<01:41, 3.27s/it]\u001b[A\n",
+ " 14%|█▍ | 5/35 [00:16<01:34, 3.14s/it]\u001b[A\n",
+ " 17%|█▋ | 6/35 [00:19<01:34, 3.26s/it]\u001b[A\n",
+ " 20%|██ | 7/35 [00:23<01:38, 3.50s/it]\u001b[A\n",
+ " 23%|██▎ | 8/35 [00:26<01:25, 3.18s/it]\u001b[A\n",
+ " 26%|██▌ | 9/35 [00:28<01:18, 3.01s/it]\u001b[A\n",
+ " 29%|██▊ | 10/35 [00:31<01:10, 2.83s/it]\u001b[A\n",
+ " 31%|███▏ | 11/35 [00:33<01:06, 2.76s/it]\u001b[A\n",
+ " 34%|███▍ | 12/35 [00:37<01:06, 2.87s/it]\u001b[A\n",
+ " 37%|███▋ | 13/35 [00:41<01:11, 3.26s/it]\u001b[A\n",
+ " 40%|████ | 14/35 [00:44<01:05, 3.13s/it]\u001b[A\n",
+ " 43%|████▎ | 15/35 [00:47<01:04, 3.25s/it]\u001b[A\n",
+ " 46%|████▌ | 16/35 [00:50<00:57, 3.01s/it]\u001b[A\n",
+ " 49%|████▊ | 17/35 [00:52<00:51, 2.87s/it]\u001b[A\n",
+ " 51%|█████▏ | 18/35 [00:55<00:47, 2.82s/it]\u001b[A\n",
+ " 54%|█████▍ | 19/35 [00:57<00:42, 2.67s/it]\u001b[A\n",
+ " 57%|█████▋ | 20/35 [01:00<00:41, 2.73s/it]\u001b[A\n",
+ " 60%|██████ | 21/35 [01:03<00:37, 2.68s/it]\u001b[A\n",
+ " 63%|██████▎ | 22/35 [01:05<00:34, 2.69s/it]\u001b[A\n",
+ " 66%|██████▌ | 23/35 [01:09<00:36, 3.08s/it]\u001b[A\n",
+ " 69%|██████▊ | 24/35 [01:12<00:31, 2.90s/it]\u001b[A\n",
+ " 71%|███████▏ | 25/35 [01:14<00:27, 2.71s/it]\u001b[A\n",
+ " 74%|███████▍ | 26/35 [01:17<00:24, 2.76s/it]\u001b[A\n",
+ " 77%|███████▋ | 27/35 [01:20<00:22, 2.83s/it]\u001b[A\n",
+ " 80%|████████ | 28/35 [01:24<00:22, 3.25s/it]\u001b[A\n",
+ " 83%|████████▎ | 29/35 [01:28<00:20, 3.49s/it]\u001b[A\n",
+ " 86%|████████▌ | 30/35 [01:31<00:16, 3.34s/it]\u001b[A\n",
+ " 89%|████████▊ | 31/35 [01:34<00:13, 3.26s/it]\u001b[A\n",
+ " 91%|█████████▏| 32/35 [01:38<00:10, 3.45s/it]\u001b[A\n",
+ " 94%|█████████▍| 33/35 [01:42<00:06, 3.43s/it]\u001b[A\n",
+ " 97%|█████████▋| 34/35 [01:45<00:03, 3.37s/it]\u001b[A\n",
+ "100%|██████████| 35/35 [01:45<00:00, 2.57s/it]\u001b[A\n",
+ " 20%|██ | 1/5 [01:46<07:06, 106.73s/it]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Loading pretrained parameter \"encoder.encoder.0.cached_zero_vector\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_i.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_h.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_o.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_o.bias\".\n",
+ "Loading pretrained parameter \"encoder.encoder.1.cached_zero_vector\".\n",
+ "Loading pretrained parameter \"encoder.encoder.1.W_i.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.1.W_h.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.1.W_o.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.1.W_o.bias\".\n",
+ "Loading pretrained parameter \"ffn.1.weight\".\n",
+ "Loading pretrained parameter \"ffn.1.bias\".\n",
+ "Loading pretrained parameter \"ffn.4.weight\".\n",
+ "Loading pretrained parameter \"ffn.4.bias\".\n",
+ "Loading pretrained parameter \"ffn.7.weight\".\n",
+ "Loading pretrained parameter \"ffn.7.bias\".\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\n",
+ " 0%| | 0/35 [00:00, ?it/s]\u001b[A\n",
+ " 3%|▎ | 1/35 [00:04<02:42, 4.79s/it]\u001b[A\n",
+ " 6%|▌ | 2/35 [00:07<02:02, 3.72s/it]\u001b[A\n",
+ " 9%|▊ | 3/35 [00:10<01:41, 3.18s/it]\u001b[A\n",
+ " 11%|█▏ | 4/35 [00:13<01:42, 3.32s/it]\u001b[A\n",
+ " 14%|█▍ | 5/35 [00:16<01:34, 3.16s/it]\u001b[A\n",
+ " 17%|█▋ | 6/35 [00:20<01:35, 3.29s/it]\u001b[A\n",
+ " 20%|██ | 7/35 [00:24<01:38, 3.52s/it]\u001b[A\n",
+ " 23%|██▎ | 8/35 [00:26<01:26, 3.19s/it]\u001b[A\n",
+ " 26%|██▌ | 9/35 [00:29<01:18, 3.02s/it]\u001b[A\n",
+ " 29%|██▊ | 10/35 [00:31<01:11, 2.85s/it]\u001b[A\n",
+ " 31%|███▏ | 11/35 [00:34<01:07, 2.79s/it]\u001b[A\n",
+ " 34%|███▍ | 12/35 [00:37<01:06, 2.89s/it]\u001b[A\n",
+ " 37%|███▋ | 13/35 [00:41<01:11, 3.26s/it]\u001b[A\n",
+ " 40%|████ | 14/35 [00:44<01:05, 3.12s/it]\u001b[A\n",
+ " 43%|████▎ | 15/35 [00:48<01:04, 3.25s/it]\u001b[A\n",
+ " 46%|████▌ | 16/35 [00:50<00:56, 2.99s/it]\u001b[A\n",
+ " 49%|████▊ | 17/35 [00:53<00:51, 2.86s/it]\u001b[A\n",
+ " 51%|█████▏ | 18/35 [00:55<00:47, 2.81s/it]\u001b[A\n",
+ " 54%|█████▍ | 19/35 [00:58<00:42, 2.66s/it]\u001b[A\n",
+ " 57%|█████▋ | 20/35 [01:00<00:40, 2.73s/it]\u001b[A\n",
+ " 60%|██████ | 21/35 [01:03<00:37, 2.68s/it]\u001b[A\n",
+ " 63%|██████▎ | 22/35 [01:06<00:35, 2.69s/it]\u001b[A\n",
+ " 66%|██████▌ | 23/35 [01:10<00:36, 3.07s/it]\u001b[A\n",
+ " 69%|██████▊ | 24/35 [01:12<00:31, 2.90s/it]\u001b[A\n",
+ " 71%|███████▏ | 25/35 [01:14<00:27, 2.72s/it]\u001b[A\n",
+ " 74%|███████▍ | 26/35 [01:17<00:25, 2.78s/it]\u001b[A\n",
+ " 77%|███████▋ | 27/35 [01:20<00:22, 2.85s/it]\u001b[A\n",
+ " 80%|████████ | 28/35 [01:25<00:22, 3.28s/it]\u001b[A\n",
+ " 83%|████████▎ | 29/35 [01:29<00:21, 3.50s/it]\u001b[A\n",
+ " 86%|████████▌ | 30/35 [01:32<00:16, 3.36s/it]\u001b[A\n",
+ " 89%|████████▊ | 31/35 [01:35<00:13, 3.28s/it]\u001b[A\n",
+ " 91%|█████████▏| 32/35 [01:39<00:10, 3.48s/it]\u001b[A\n",
+ " 94%|█████████▍| 33/35 [01:42<00:06, 3.46s/it]\u001b[A\n",
+ " 97%|█████████▋| 34/35 [01:45<00:03, 3.40s/it]\u001b[A\n",
+ "100%|██████████| 35/35 [01:46<00:00, 2.59s/it]\u001b[A\n",
+ " 40%|████ | 2/5 [03:34<05:21, 107.19s/it]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Loading pretrained parameter \"encoder.encoder.0.cached_zero_vector\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_i.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_h.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_o.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_o.bias\".\n",
+ "Loading pretrained parameter \"encoder.encoder.1.cached_zero_vector\".\n",
+ "Loading pretrained parameter \"encoder.encoder.1.W_i.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.1.W_h.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.1.W_o.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.1.W_o.bias\".\n",
+ "Loading pretrained parameter \"ffn.1.weight\".\n",
+ "Loading pretrained parameter \"ffn.1.bias\".\n",
+ "Loading pretrained parameter \"ffn.4.weight\".\n",
+ "Loading pretrained parameter \"ffn.4.bias\".\n",
+ "Loading pretrained parameter \"ffn.7.weight\".\n",
+ "Loading pretrained parameter \"ffn.7.bias\".\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\n",
+ " 0%| | 0/35 [00:00, ?it/s]\u001b[A\n",
+ " 3%|▎ | 1/35 [00:04<02:26, 4.31s/it]\u001b[A\n",
+ " 6%|▌ | 2/35 [00:07<01:55, 3.51s/it]\u001b[A\n",
+ " 9%|▊ | 3/35 [00:09<01:37, 3.06s/it]\u001b[A\n",
+ " 11%|█▏ | 4/35 [00:13<01:41, 3.27s/it]\u001b[A\n",
+ " 14%|█▍ | 5/35 [00:16<01:33, 3.13s/it]\u001b[A\n",
+ " 17%|█▋ | 6/35 [00:19<01:34, 3.25s/it]\u001b[A\n",
+ " 20%|██ | 7/35 [00:23<01:37, 3.47s/it]\u001b[A\n",
+ " 23%|██▎ | 8/35 [00:26<01:24, 3.14s/it]\u001b[A\n",
+ " 26%|██▌ | 9/35 [00:28<01:16, 2.96s/it]\u001b[A\n",
+ " 29%|██▊ | 10/35 [00:31<01:09, 2.79s/it]\u001b[A\n",
+ " 31%|███▏ | 11/35 [00:33<01:05, 2.73s/it]\u001b[A\n",
+ " 34%|███▍ | 12/35 [00:36<01:05, 2.84s/it]\u001b[A\n",
+ " 37%|███▋ | 13/35 [00:40<01:10, 3.22s/it]\u001b[A\n",
+ " 40%|████ | 14/35 [00:43<01:04, 3.09s/it]\u001b[A\n",
+ " 43%|████▎ | 15/35 [00:47<01:04, 3.22s/it]\u001b[A\n",
+ " 46%|████▌ | 16/35 [00:49<00:56, 2.96s/it]\u001b[A\n",
+ " 49%|████▊ | 17/35 [00:52<00:50, 2.83s/it]\u001b[A\n",
+ " 51%|█████▏ | 18/35 [00:54<00:46, 2.75s/it]\u001b[A\n",
+ " 54%|█████▍ | 19/35 [00:56<00:41, 2.61s/it]\u001b[A\n",
+ " 57%|█████▋ | 20/35 [00:59<00:40, 2.70s/it]\u001b[A\n",
+ " 60%|██████ | 21/35 [01:02<00:37, 2.64s/it]\u001b[A\n",
+ " 63%|██████▎ | 22/35 [01:04<00:34, 2.65s/it]\u001b[A\n",
+ " 66%|██████▌ | 23/35 [01:09<00:37, 3.11s/it]\u001b[A\n",
+ " 69%|██████▊ | 24/35 [01:11<00:32, 2.92s/it]\u001b[A\n",
+ " 71%|███████▏ | 25/35 [01:13<00:27, 2.71s/it]\u001b[A\n",
+ " 74%|███████▍ | 26/35 [01:16<00:24, 2.76s/it]\u001b[A\n",
+ " 77%|███████▋ | 27/35 [01:19<00:22, 2.84s/it]\u001b[A\n",
+ " 80%|████████ | 28/35 [01:24<00:22, 3.27s/it]\u001b[A\n",
+ " 83%|████████▎ | 29/35 [01:28<00:20, 3.49s/it]\u001b[A\n",
+ " 86%|████████▌ | 30/35 [01:31<00:16, 3.34s/it]\u001b[A\n",
+ " 89%|████████▊ | 31/35 [01:34<00:13, 3.26s/it]\u001b[A\n",
+ " 91%|█████████▏| 32/35 [01:37<00:10, 3.44s/it]\u001b[A\n",
+ " 94%|█████████▍| 33/35 [01:41<00:06, 3.42s/it]\u001b[A\n",
+ " 97%|█████████▋| 34/35 [01:44<00:03, 3.35s/it]\u001b[A\n",
+ "100%|██████████| 35/35 [01:45<00:00, 2.55s/it]\u001b[A\n",
+ " 60%|██████ | 3/5 [05:20<03:33, 106.69s/it]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Loading pretrained parameter \"encoder.encoder.0.cached_zero_vector\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_i.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_h.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_o.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_o.bias\".\n",
+ "Loading pretrained parameter \"encoder.encoder.1.cached_zero_vector\".\n",
+ "Loading pretrained parameter \"encoder.encoder.1.W_i.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.1.W_h.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.1.W_o.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.1.W_o.bias\".\n",
+ "Loading pretrained parameter \"ffn.1.weight\".\n",
+ "Loading pretrained parameter \"ffn.1.bias\".\n",
+ "Loading pretrained parameter \"ffn.4.weight\".\n",
+ "Loading pretrained parameter \"ffn.4.bias\".\n",
+ "Loading pretrained parameter \"ffn.7.weight\".\n",
+ "Loading pretrained parameter \"ffn.7.bias\".\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\n",
+ " 0%| | 0/35 [00:00, ?it/s]\u001b[A\n",
+ " 3%|▎ | 1/35 [00:04<02:27, 4.33s/it]\u001b[A\n",
+ " 6%|▌ | 2/35 [00:07<01:55, 3.50s/it]\u001b[A\n",
+ " 9%|▊ | 3/35 [00:09<01:37, 3.05s/it]\u001b[A\n",
+ " 11%|█▏ | 4/35 [00:13<01:41, 3.26s/it]\u001b[A\n",
+ " 14%|█▍ | 5/35 [00:16<01:33, 3.12s/it]\u001b[A\n",
+ " 17%|█▋ | 6/35 [00:19<01:34, 3.26s/it]\u001b[A\n",
+ " 20%|██ | 7/35 [00:23<01:36, 3.45s/it]\u001b[A\n",
+ " 23%|██▎ | 8/35 [00:26<01:24, 3.14s/it]\u001b[A\n",
+ " 26%|██▌ | 9/35 [00:28<01:17, 2.97s/it]\u001b[A\n",
+ " 29%|██▊ | 10/35 [00:31<01:10, 2.82s/it]\u001b[A\n",
+ " 31%|███▏ | 11/35 [00:33<01:05, 2.75s/it]\u001b[A\n",
+ " 34%|███▍ | 12/35 [00:36<01:05, 2.85s/it]\u001b[A\n",
+ " 37%|███▋ | 13/35 [00:40<01:10, 3.22s/it]\u001b[A\n",
+ " 40%|████ | 14/35 [00:43<01:04, 3.09s/it]\u001b[A\n",
+ " 43%|████▎ | 15/35 [00:47<01:04, 3.22s/it]\u001b[A\n",
+ " 46%|████▌ | 16/35 [00:49<00:56, 2.98s/it]\u001b[A\n",
+ " 49%|████▊ | 17/35 [00:52<00:50, 2.83s/it]\u001b[A\n",
+ " 51%|█████▏ | 18/35 [00:54<00:47, 2.77s/it]\u001b[A\n",
+ " 54%|█████▍ | 19/35 [00:57<00:42, 2.63s/it]\u001b[A\n",
+ " 57%|█████▋ | 20/35 [00:59<00:40, 2.72s/it]\u001b[A\n",
+ " 60%|██████ | 21/35 [01:02<00:37, 2.65s/it]\u001b[A\n",
+ " 63%|██████▎ | 22/35 [01:05<00:34, 2.65s/it]\u001b[A\n",
+ " 66%|██████▌ | 23/35 [01:09<00:36, 3.05s/it]\u001b[A\n",
+ " 69%|██████▊ | 24/35 [01:11<00:31, 2.86s/it]\u001b[A\n",
+ " 71%|███████▏ | 25/35 [01:13<00:26, 2.67s/it]\u001b[A\n",
+ " 74%|███████▍ | 26/35 [01:16<00:24, 2.73s/it]\u001b[A\n",
+ " 77%|███████▋ | 27/35 [01:19<00:22, 2.80s/it]\u001b[A\n",
+ " 80%|████████ | 28/35 [01:23<00:22, 3.24s/it]\u001b[A\n",
+ " 83%|████████▎ | 29/35 [01:27<00:20, 3.47s/it]\u001b[A\n",
+ " 86%|████████▌ | 30/35 [01:30<00:16, 3.34s/it]\u001b[A\n",
+ " 89%|████████▊ | 31/35 [01:33<00:13, 3.27s/it]\u001b[A\n",
+ " 91%|█████████▏| 32/35 [01:37<00:10, 3.46s/it]\u001b[A\n",
+ " 94%|█████████▍| 33/35 [01:41<00:06, 3.43s/it]\u001b[A\n",
+ " 97%|█████████▋| 34/35 [01:44<00:03, 3.35s/it]\u001b[A\n",
+ "100%|██████████| 35/35 [01:45<00:00, 2.55s/it]\u001b[A\n",
+ " 80%|████████ | 4/5 [07:06<01:46, 106.39s/it]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Loading pretrained parameter \"encoder.encoder.0.cached_zero_vector\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_i.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_h.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_o.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_o.bias\".\n",
+ "Loading pretrained parameter \"encoder.encoder.1.cached_zero_vector\".\n",
+ "Loading pretrained parameter \"encoder.encoder.1.W_i.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.1.W_h.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.1.W_o.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.1.W_o.bias\".\n",
+ "Loading pretrained parameter \"ffn.1.weight\".\n",
+ "Loading pretrained parameter \"ffn.1.bias\".\n",
+ "Loading pretrained parameter \"ffn.4.weight\".\n",
+ "Loading pretrained parameter \"ffn.4.bias\".\n",
+ "Loading pretrained parameter \"ffn.7.weight\".\n",
+ "Loading pretrained parameter \"ffn.7.bias\".\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\n",
+ " 0%| | 0/35 [00:00, ?it/s]\u001b[A\n",
+ " 3%|▎ | 1/35 [00:04<02:27, 4.35s/it]\u001b[A\n",
+ " 6%|▌ | 2/35 [00:07<01:55, 3.50s/it]\u001b[A\n",
+ " 9%|▊ | 3/35 [00:09<01:37, 3.06s/it]\u001b[A\n",
+ " 11%|█▏ | 4/35 [00:13<01:43, 3.34s/it]\u001b[A\n",
+ " 14%|█▍ | 5/35 [00:16<01:35, 3.17s/it]\u001b[A\n",
+ " 17%|█▋ | 6/35 [00:19<01:35, 3.28s/it]\u001b[A\n",
+ " 20%|██ | 7/35 [00:23<01:36, 3.46s/it]\u001b[A\n",
+ " 23%|██▎ | 8/35 [00:26<01:24, 3.14s/it]\u001b[A\n",
+ " 26%|██▌ | 9/35 [00:28<01:16, 2.96s/it]\u001b[A\n",
+ " 29%|██▊ | 10/35 [00:31<01:09, 2.79s/it]\u001b[A\n",
+ " 31%|███▏ | 11/35 [00:33<01:05, 2.72s/it]\u001b[A\n",
+ " 34%|███▍ | 12/35 [00:36<01:04, 2.81s/it]\u001b[A\n",
+ " 37%|███▋ | 13/35 [00:40<01:09, 3.18s/it]\u001b[A\n",
+ " 40%|████ | 14/35 [00:43<01:04, 3.05s/it]\u001b[A\n",
+ " 43%|████▎ | 15/35 [00:47<01:04, 3.21s/it]\u001b[A\n",
+ " 46%|████▌ | 16/35 [00:49<00:56, 2.97s/it]\u001b[A\n",
+ " 49%|████▊ | 17/35 [00:52<00:51, 2.83s/it]\u001b[A\n",
+ " 51%|█████▏ | 18/35 [00:54<00:46, 2.76s/it]\u001b[A\n",
+ " 54%|█████▍ | 19/35 [00:56<00:41, 2.62s/it]\u001b[A\n",
+ " 57%|█████▋ | 20/35 [00:59<00:40, 2.69s/it]\u001b[A\n",
+ " 60%|██████ | 21/35 [01:02<00:36, 2.63s/it]\u001b[A\n",
+ " 63%|██████▎ | 22/35 [01:04<00:34, 2.64s/it]\u001b[A\n",
+ " 66%|██████▌ | 23/35 [01:08<00:36, 3.03s/it]\u001b[A\n",
+ " 69%|██████▊ | 24/35 [01:11<00:31, 2.85s/it]\u001b[A\n",
+ " 71%|███████▏ | 25/35 [01:13<00:26, 2.66s/it]\u001b[A\n",
+ " 74%|███████▍ | 26/35 [01:16<00:24, 2.72s/it]\u001b[A\n",
+ " 77%|███████▋ | 27/35 [01:19<00:22, 2.79s/it]\u001b[A\n",
+ " 80%|████████ | 28/35 [01:23<00:22, 3.22s/it]\u001b[A\n",
+ " 83%|████████▎ | 29/35 [01:27<00:20, 3.44s/it]\u001b[A\n",
+ " 86%|████████▌ | 30/35 [01:30<00:16, 3.30s/it]\u001b[A\n",
+ " 89%|████████▊ | 31/35 [01:33<00:12, 3.22s/it]\u001b[A\n",
+ " 91%|█████████▏| 32/35 [01:37<00:10, 3.42s/it]\u001b[A\n",
+ " 94%|█████████▍| 33/35 [01:40<00:06, 3.40s/it]\u001b[A\n",
+ " 97%|█████████▋| 34/35 [01:43<00:03, 3.33s/it]\u001b[A\n",
+ "100%|██████████| 35/35 [01:44<00:00, 2.54s/it]\u001b[A\n",
+ "100%|██████████| 5/5 [08:51<00:00, 106.35s/it]\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Saving predictions to /dev/null\n",
+ "Elapsed time = 0:08:52\n"
+ ]
+ },
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/html": [
+ "\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " smiles | \n",
+ " solvent | \n",
+ " peakwavs_max | \n",
+ " peakwavs_max_pred | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " CCN(CC)c1ccc2c(C(F)(F)F)cc(=O)oc2c1 | \n",
+ " C1CCCCC1 | \n",
+ " 376.0 | \n",
+ " 375.545257 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " CCN(CC)c1ccc2c(C(F)(F)F)cc(=O)oc2c1 | \n",
+ " CCOC(C)=O | \n",
+ " 392.0 | \n",
+ " 390.993980 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " CCN(CC)c1ccc2c(C(F)(F)F)cc(=O)oc2c1 | \n",
+ " CC#N | \n",
+ " 396.0 | \n",
+ " 397.488817 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " CCN(CC)c1ccc2c(C(F)(F)F)cc(=O)oc2c1 | \n",
+ " CCO | \n",
+ " 400.0 | \n",
+ " 400.081324 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " CCN(CC)c1ccc2c(C(F)(F)F)cc(=O)oc2c1 | \n",
+ " OCC(O)CO | \n",
+ " 413.0 | \n",
+ " 412.967337 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 1705 | \n",
+ " c1cc2c3ccc[n+]4cccc(c5ccc[n+](c1)c25)c34 | \n",
+ " C[N+](=O)[O-] | \n",
+ " 424.0 | \n",
+ " 424.124035 | \n",
+ "
\n",
+ " \n",
+ " 1706 | \n",
+ " c1cc2c3ccc[n+]4cccc(c5ccc[n+](c1)c25)c34 | \n",
+ " CS(C)=O | \n",
+ " 432.0 | \n",
+ " 428.538180 | \n",
+ "
\n",
+ " \n",
+ " 1707 | \n",
+ " COc1cc(C)c(-c2cc(-c3c(C)cc(OC)cc3C)c3ccc4c(-c5... | \n",
+ " ClCCl | \n",
+ " 367.0 | \n",
+ " 355.781207 | \n",
+ "
\n",
+ " \n",
+ " 1708 | \n",
+ " N#Cc1c(N2CCCCC2)cc(-c2cccc3ccccc23)c2c1-c1cccc... | \n",
+ " C1CCOC1 | \n",
+ " 358.0 | \n",
+ " 358.098561 | \n",
+ "
\n",
+ " \n",
+ " 1709 | \n",
+ " N#Cc1c(N2CCCC2)cc(-c2ccccc2)c2c1Cc1ccccc1-2 | \n",
+ " C1CCOC1 | \n",
+ " 382.0 | \n",
+ " 380.867901 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
1710 rows × 4 columns
\n",
+ "
\n",
+ "
\n",
+ " \n",
+ " \n",
+ "\n",
+ " \n",
+ "
\n",
+ "
\n",
+ " "
+ ],
+ "text/plain": [
+ " smiles ... peakwavs_max_pred\n",
+ "0 CCN(CC)c1ccc2c(C(F)(F)F)cc(=O)oc2c1 ... 375.545257\n",
+ "1 CCN(CC)c1ccc2c(C(F)(F)F)cc(=O)oc2c1 ... 390.993980\n",
+ "2 CCN(CC)c1ccc2c(C(F)(F)F)cc(=O)oc2c1 ... 397.488817\n",
+ "3 CCN(CC)c1ccc2c(C(F)(F)F)cc(=O)oc2c1 ... 400.081324\n",
+ "4 CCN(CC)c1ccc2c(C(F)(F)F)cc(=O)oc2c1 ... 412.967337\n",
+ "... ... ... ...\n",
+ "1705 c1cc2c3ccc[n+]4cccc(c5ccc[n+](c1)c25)c34 ... 424.124035\n",
+ "1706 c1cc2c3ccc[n+]4cccc(c5ccc[n+](c1)c25)c34 ... 428.538180\n",
+ "1707 COc1cc(C)c(-c2cc(-c3c(C)cc(OC)cc3C)c3ccc4c(-c5... ... 355.781207\n",
+ "1708 N#Cc1c(N2CCCCC2)cc(-c2cccc3ccccc23)c2c1-c1cccc... ... 358.098561\n",
+ "1709 N#Cc1c(N2CCCC2)cc(-c2ccccc2)c2c1Cc1ccccc1-2 ... 380.867901\n",
+ "\n",
+ "[1710 rows x 4 columns]"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 7
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "## Predict experimental peak with model trained on combined training set (with ensemble variance)"
+ ],
+ "metadata": {
+ "id": "IHGJvB47P8lI"
+ }
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "**Equivalent to command line:**\n",
+ "\n",
+ "python uvvisml/predict.py --test_file uvvisml/data/splits/lambda_max_abs/deep4chem/group_by_smiles/smiles_target_test.csv --property absorption_peak_nm_expt --method chemprop --preds_file test_preds.csv"
+ ],
+ "metadata": {
+ "id": "FBPuuTzeQCfW"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "arguments = [\n",
+ " '--test_path', test_file,\n",
+ " '--preds_path', 'test_preds.csv',\n",
+ " '--checkpoint_dir', 'models/lambda_max_abs/chemprop/combined/production/fold_0',\n",
+ " '--number_of_molecules', '2',\n",
+ " '--ensemble_variance',\n",
+ " #'--gpu', '0'\n",
+ "]\n",
+ "\n",
+ "args = chemprop.args.PredictArgs().parse_args(arguments)\n",
+ "_ = chemprop.train.make_predictions(args=args)\n",
+ "\n",
+ "df = pd.read_csv('test_preds.csv')\n",
+ "df"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 1000
+ },
+ "id": "t--C7XlAP8bD",
+ "outputId": "525fb972-f0e0-4ebb-969b-53cb2706a24f"
+ },
+ "execution_count": 8,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Loading training args\n",
+ "Setting molecule featurization parameters to default.\n",
+ "Loading data\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "1710it [00:00, 129826.41it/s]\n",
+ "100%|██████████| 1710/1710 [00:00<00:00, 108043.62it/s]\n",
+ "/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py:481: UserWarning: This DataLoader will create 8 worker processes in total. Our suggested max number of worker in current system is 2, which is smaller than what this DataLoader is going to create. Please be aware that excessive worker creation might get DataLoader running slow or even freeze, lower the worker number to avoid potential slowness/freeze if necessary.\n",
+ " cpuset_checked))\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Validating SMILES\n",
+ "Test size = 1,710\n",
+ "Predicting with an ensemble of 5 models\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\r 0%| | 0/5 [00:00, ?it/s]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Loading pretrained parameter \"encoder.encoder.0.cached_zero_vector\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_i.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_h.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_o.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_o.bias\".\n",
+ "Loading pretrained parameter \"encoder.encoder.1.cached_zero_vector\".\n",
+ "Loading pretrained parameter \"encoder.encoder.1.W_i.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.1.W_h.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.1.W_o.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.1.W_o.bias\".\n",
+ "Loading pretrained parameter \"ffn.1.weight\".\n",
+ "Loading pretrained parameter \"ffn.1.bias\".\n",
+ "Loading pretrained parameter \"ffn.4.weight\".\n",
+ "Loading pretrained parameter \"ffn.4.bias\".\n",
+ "Loading pretrained parameter \"ffn.7.weight\".\n",
+ "Loading pretrained parameter \"ffn.7.bias\".\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\n",
+ " 0%| | 0/35 [00:00, ?it/s]\u001b[A\n",
+ " 3%|▎ | 1/35 [00:01<00:51, 1.51s/it]\u001b[A\n",
+ " 6%|▌ | 2/35 [00:01<00:25, 1.28it/s]\u001b[A\n",
+ " 9%|▊ | 3/35 [00:01<00:15, 2.05it/s]\u001b[A\n",
+ " 11%|█▏ | 4/35 [00:02<00:11, 2.74it/s]\u001b[A\n",
+ " 14%|█▍ | 5/35 [00:02<00:08, 3.57it/s]\u001b[A\n",
+ " 17%|█▋ | 6/35 [00:02<00:07, 4.02it/s]\u001b[A\n",
+ " 20%|██ | 7/35 [00:02<00:06, 4.36it/s]\u001b[A\n",
+ " 23%|██▎ | 8/35 [00:02<00:05, 5.05it/s]\u001b[A\n",
+ " 26%|██▌ | 9/35 [00:02<00:04, 5.66it/s]\u001b[A\n",
+ " 29%|██▊ | 10/35 [00:02<00:03, 6.50it/s]\u001b[A\n",
+ " 31%|███▏ | 11/35 [00:03<00:03, 6.54it/s]\u001b[A\n",
+ " 34%|███▍ | 12/35 [00:03<00:03, 6.14it/s]\u001b[A\n",
+ " 37%|███▋ | 13/35 [00:03<00:04, 5.18it/s]\u001b[A\n",
+ " 40%|████ | 14/35 [00:03<00:03, 5.74it/s]\u001b[A\n",
+ " 43%|████▎ | 15/35 [00:03<00:03, 5.55it/s]\u001b[A\n",
+ " 46%|████▌ | 16/35 [00:03<00:02, 6.36it/s]\u001b[A\n",
+ " 49%|████▊ | 17/35 [00:04<00:03, 5.86it/s]\u001b[A\n",
+ " 51%|█████▏ | 18/35 [00:04<00:02, 6.31it/s]\u001b[A\n",
+ " 54%|█████▍ | 19/35 [00:04<00:02, 6.25it/s]\u001b[A\n",
+ " 57%|█████▋ | 20/35 [00:04<00:02, 6.61it/s]\u001b[A\n",
+ " 63%|██████▎ | 22/35 [00:04<00:01, 8.23it/s]\u001b[A\n",
+ " 66%|██████▌ | 23/35 [00:04<00:01, 7.60it/s]\u001b[A\n",
+ " 71%|███████▏ | 25/35 [00:05<00:01, 9.40it/s]\u001b[A\n",
+ " 77%|███████▋ | 27/35 [00:05<00:00, 10.14it/s]\u001b[A\n",
+ " 83%|████████▎ | 29/35 [00:05<00:00, 8.80it/s]\u001b[A\n",
+ " 89%|████████▊ | 31/35 [00:05<00:00, 9.18it/s]\u001b[A\n",
+ " 91%|█████████▏| 32/35 [00:05<00:00, 9.00it/s]\u001b[A\n",
+ " 94%|█████████▍| 33/35 [00:05<00:00, 9.14it/s]\u001b[A\n",
+ "100%|██████████| 35/35 [00:06<00:00, 11.09it/s]\u001b[A\n",
+ " 20%|██ | 1/5 [00:06<00:25, 6.42s/it]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Loading pretrained parameter \"encoder.encoder.0.cached_zero_vector\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_i.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_h.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_o.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_o.bias\".\n",
+ "Loading pretrained parameter \"encoder.encoder.1.cached_zero_vector\".\n",
+ "Loading pretrained parameter \"encoder.encoder.1.W_i.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.1.W_h.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.1.W_o.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.1.W_o.bias\".\n",
+ "Loading pretrained parameter \"ffn.1.weight\".\n",
+ "Loading pretrained parameter \"ffn.1.bias\".\n",
+ "Loading pretrained parameter \"ffn.4.weight\".\n",
+ "Loading pretrained parameter \"ffn.4.bias\".\n",
+ "Loading pretrained parameter \"ffn.7.weight\".\n",
+ "Loading pretrained parameter \"ffn.7.bias\".\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\n",
+ " 0%| | 0/35 [00:00, ?it/s]\u001b[A\n",
+ " 3%|▎ | 1/35 [00:01<00:52, 1.53s/it]\u001b[A\n",
+ " 6%|▌ | 2/35 [00:01<00:26, 1.26it/s]\u001b[A\n",
+ " 9%|▊ | 3/35 [00:01<00:15, 2.08it/s]\u001b[A\n",
+ " 11%|█▏ | 4/35 [00:02<00:11, 2.77it/s]\u001b[A\n",
+ " 14%|█▍ | 5/35 [00:02<00:08, 3.60it/s]\u001b[A\n",
+ " 17%|█▋ | 6/35 [00:02<00:06, 4.29it/s]\u001b[A\n",
+ " 20%|██ | 7/35 [00:02<00:06, 4.40it/s]\u001b[A\n",
+ " 23%|██▎ | 8/35 [00:02<00:05, 5.27it/s]\u001b[A\n",
+ " 26%|██▌ | 9/35 [00:02<00:04, 5.96it/s]\u001b[A\n",
+ " 29%|██▊ | 10/35 [00:02<00:03, 6.64it/s]\u001b[A\n",
+ " 31%|███▏ | 11/35 [00:03<00:03, 6.57it/s]\u001b[A\n",
+ " 34%|███▍ | 12/35 [00:03<00:03, 6.64it/s]\u001b[A\n",
+ " 37%|███▋ | 13/35 [00:03<00:03, 5.96it/s]\u001b[A\n",
+ " 40%|████ | 14/35 [00:03<00:03, 5.34it/s]\u001b[A\n",
+ " 43%|████▎ | 15/35 [00:03<00:03, 5.43it/s]\u001b[A\n",
+ " 46%|████▌ | 16/35 [00:03<00:03, 5.97it/s]\u001b[A\n",
+ " 49%|████▊ | 17/35 [00:04<00:02, 6.53it/s]\u001b[A\n",
+ " 51%|█████▏ | 18/35 [00:04<00:03, 5.28it/s]\u001b[A\n",
+ " 54%|█████▍ | 19/35 [00:04<00:03, 4.98it/s]\u001b[A\n",
+ " 57%|█████▋ | 20/35 [00:04<00:02, 5.75it/s]\u001b[A\n",
+ " 60%|██████ | 21/35 [00:04<00:02, 6.58it/s]\u001b[A\n",
+ " 66%|██████▌ | 23/35 [00:05<00:01, 7.65it/s]\u001b[A\n",
+ " 71%|███████▏ | 25/35 [00:05<00:01, 9.24it/s]\u001b[A\n",
+ " 77%|███████▋ | 27/35 [00:05<00:00, 9.80it/s]\u001b[A\n",
+ " 83%|████████▎ | 29/35 [00:05<00:00, 8.85it/s]\u001b[A\n",
+ " 86%|████████▌ | 30/35 [00:05<00:00, 9.04it/s]\u001b[A\n",
+ " 89%|████████▊ | 31/35 [00:05<00:00, 8.99it/s]\u001b[A\n",
+ " 91%|█████████▏| 32/35 [00:05<00:00, 8.84it/s]\u001b[A\n",
+ " 94%|█████████▍| 33/35 [00:06<00:00, 9.08it/s]\u001b[A\n",
+ " 97%|█████████▋| 34/35 [00:06<00:00, 9.29it/s]\u001b[A\n",
+ " 40%|████ | 2/5 [00:12<00:19, 6.48s/it]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Loading pretrained parameter \"encoder.encoder.0.cached_zero_vector\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_i.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_h.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_o.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_o.bias\".\n",
+ "Loading pretrained parameter \"encoder.encoder.1.cached_zero_vector\".\n",
+ "Loading pretrained parameter \"encoder.encoder.1.W_i.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.1.W_h.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.1.W_o.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.1.W_o.bias\".\n",
+ "Loading pretrained parameter \"ffn.1.weight\".\n",
+ "Loading pretrained parameter \"ffn.1.bias\".\n",
+ "Loading pretrained parameter \"ffn.4.weight\".\n",
+ "Loading pretrained parameter \"ffn.4.bias\".\n",
+ "Loading pretrained parameter \"ffn.7.weight\".\n",
+ "Loading pretrained parameter \"ffn.7.bias\".\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\n",
+ " 0%| | 0/35 [00:00, ?it/s]\u001b[A\n",
+ " 3%|▎ | 1/35 [00:01<00:50, 1.49s/it]\u001b[A\n",
+ " 6%|▌ | 2/35 [00:01<00:26, 1.23it/s]\u001b[A\n",
+ " 9%|▊ | 3/35 [00:01<00:15, 2.04it/s]\u001b[A\n",
+ " 11%|█▏ | 4/35 [00:02<00:11, 2.74it/s]\u001b[A\n",
+ " 14%|█▍ | 5/35 [00:02<00:08, 3.60it/s]\u001b[A\n",
+ " 17%|█▋ | 6/35 [00:02<00:06, 4.29it/s]\u001b[A\n",
+ " 20%|██ | 7/35 [00:02<00:06, 4.55it/s]\u001b[A\n",
+ " 23%|██▎ | 8/35 [00:02<00:05, 5.35it/s]\u001b[A\n",
+ " 26%|██▌ | 9/35 [00:02<00:04, 5.71it/s]\u001b[A\n",
+ " 29%|██▊ | 10/35 [00:02<00:03, 6.45it/s]\u001b[A\n",
+ " 31%|███▏ | 11/35 [00:03<00:03, 6.37it/s]\u001b[A\n",
+ " 34%|███▍ | 12/35 [00:03<00:03, 6.21it/s]\u001b[A\n",
+ " 37%|███▋ | 13/35 [00:03<00:04, 5.30it/s]\u001b[A\n",
+ " 40%|████ | 14/35 [00:03<00:04, 5.09it/s]\u001b[A\n",
+ " 43%|████▎ | 15/35 [00:03<00:04, 4.97it/s]\u001b[A\n",
+ " 46%|████▌ | 16/35 [00:04<00:03, 5.45it/s]\u001b[A\n",
+ " 49%|████▊ | 17/35 [00:04<00:03, 5.58it/s]\u001b[A\n",
+ " 51%|█████▏ | 18/35 [00:04<00:02, 5.74it/s]\u001b[A\n",
+ " 54%|█████▍ | 19/35 [00:04<00:02, 5.86it/s]\u001b[A\n",
+ " 57%|█████▋ | 20/35 [00:04<00:02, 6.25it/s]\u001b[A\n",
+ " 63%|██████▎ | 22/35 [00:04<00:01, 7.98it/s]\u001b[A\n",
+ " 66%|██████▌ | 23/35 [00:05<00:01, 7.65it/s]\u001b[A\n",
+ " 71%|███████▏ | 25/35 [00:05<00:01, 9.38it/s]\u001b[A\n",
+ " 77%|███████▋ | 27/35 [00:05<00:00, 9.90it/s]\u001b[A\n",
+ " 80%|████████ | 28/35 [00:05<00:00, 8.96it/s]\u001b[A\n",
+ " 83%|████████▎ | 29/35 [00:05<00:00, 8.56it/s]\u001b[A\n",
+ " 89%|████████▊ | 31/35 [00:05<00:00, 9.08it/s]\u001b[A\n",
+ " 91%|█████████▏| 32/35 [00:05<00:00, 8.90it/s]\u001b[A\n",
+ " 94%|█████████▍| 33/35 [00:06<00:00, 8.99it/s]\u001b[A\n",
+ "100%|██████████| 35/35 [00:06<00:00, 10.95it/s]\u001b[A\n",
+ " 60%|██████ | 3/5 [00:19<00:13, 6.51s/it]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Loading pretrained parameter \"encoder.encoder.0.cached_zero_vector\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_i.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_h.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_o.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_o.bias\".\n",
+ "Loading pretrained parameter \"encoder.encoder.1.cached_zero_vector\".\n",
+ "Loading pretrained parameter \"encoder.encoder.1.W_i.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.1.W_h.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.1.W_o.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.1.W_o.bias\".\n",
+ "Loading pretrained parameter \"ffn.1.weight\".\n",
+ "Loading pretrained parameter \"ffn.1.bias\".\n",
+ "Loading pretrained parameter \"ffn.4.weight\".\n",
+ "Loading pretrained parameter \"ffn.4.bias\".\n",
+ "Loading pretrained parameter \"ffn.7.weight\".\n",
+ "Loading pretrained parameter \"ffn.7.bias\".\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\n",
+ " 0%| | 0/35 [00:00, ?it/s]\u001b[A\n",
+ " 3%|▎ | 1/35 [00:01<00:50, 1.48s/it]\u001b[A\n",
+ " 6%|▌ | 2/35 [00:01<00:26, 1.24it/s]\u001b[A\n",
+ " 9%|▊ | 3/35 [00:01<00:15, 2.04it/s]\u001b[A\n",
+ " 11%|█▏ | 4/35 [00:02<00:11, 2.70it/s]\u001b[A\n",
+ " 14%|█▍ | 5/35 [00:02<00:08, 3.55it/s]\u001b[A\n",
+ " 17%|█▋ | 6/35 [00:02<00:07, 4.11it/s]\u001b[A\n",
+ " 20%|██ | 7/35 [00:02<00:06, 4.39it/s]\u001b[A\n",
+ " 23%|██▎ | 8/35 [00:02<00:05, 5.12it/s]\u001b[A\n",
+ " 26%|██▌ | 9/35 [00:02<00:04, 5.83it/s]\u001b[A\n",
+ " 29%|██▊ | 10/35 [00:02<00:03, 6.43it/s]\u001b[A\n",
+ " 31%|███▏ | 11/35 [00:03<00:04, 5.76it/s]\u001b[A\n",
+ " 34%|███▍ | 12/35 [00:03<00:04, 5.02it/s]\u001b[A\n",
+ " 37%|███▋ | 13/35 [00:03<00:04, 4.88it/s]\u001b[A\n",
+ " 40%|████ | 14/35 [00:03<00:04, 5.20it/s]\u001b[A\n",
+ " 43%|████▎ | 15/35 [00:04<00:05, 3.85it/s]\u001b[A\n",
+ " 46%|████▌ | 16/35 [00:04<00:04, 4.32it/s]\u001b[A\n",
+ " 49%|████▊ | 17/35 [00:04<00:03, 4.52it/s]\u001b[A\n",
+ " 51%|█████▏ | 18/35 [00:04<00:04, 4.20it/s]\u001b[A\n",
+ " 54%|█████▍ | 19/35 [00:05<00:03, 4.56it/s]\u001b[A\n",
+ " 57%|█████▋ | 20/35 [00:05<00:03, 4.22it/s]\u001b[A\n",
+ " 63%|██████▎ | 22/35 [00:05<00:02, 6.00it/s]\u001b[A\n",
+ " 66%|██████▌ | 23/35 [00:05<00:01, 6.13it/s]\u001b[A\n",
+ " 71%|███████▏ | 25/35 [00:05<00:01, 7.98it/s]\u001b[A\n",
+ " 77%|███████▋ | 27/35 [00:05<00:00, 8.94it/s]\u001b[A\n",
+ " 80%|████████ | 28/35 [00:06<00:00, 8.33it/s]\u001b[A\n",
+ " 83%|████████▎ | 29/35 [00:06<00:00, 8.26it/s]\u001b[A\n",
+ " 89%|████████▊ | 31/35 [00:06<00:00, 8.70it/s]\u001b[A\n",
+ " 91%|█████████▏| 32/35 [00:06<00:00, 8.43it/s]\u001b[A\n",
+ " 94%|█████████▍| 33/35 [00:06<00:00, 8.62it/s]\u001b[A\n",
+ " 97%|█████████▋| 34/35 [00:06<00:00, 8.90it/s]\u001b[A\n",
+ " 80%|████████ | 4/5 [00:26<00:06, 6.77s/it]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Loading pretrained parameter \"encoder.encoder.0.cached_zero_vector\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_i.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_h.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_o.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.0.W_o.bias\".\n",
+ "Loading pretrained parameter \"encoder.encoder.1.cached_zero_vector\".\n",
+ "Loading pretrained parameter \"encoder.encoder.1.W_i.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.1.W_h.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.1.W_o.weight\".\n",
+ "Loading pretrained parameter \"encoder.encoder.1.W_o.bias\".\n",
+ "Loading pretrained parameter \"ffn.1.weight\".\n",
+ "Loading pretrained parameter \"ffn.1.bias\".\n",
+ "Loading pretrained parameter \"ffn.4.weight\".\n",
+ "Loading pretrained parameter \"ffn.4.bias\".\n",
+ "Loading pretrained parameter \"ffn.7.weight\".\n",
+ "Loading pretrained parameter \"ffn.7.bias\".\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\n",
+ " 0%| | 0/35 [00:00, ?it/s]\u001b[A\n",
+ " 3%|▎ | 1/35 [00:01<00:50, 1.49s/it]\u001b[A\n",
+ " 6%|▌ | 2/35 [00:01<00:26, 1.27it/s]\u001b[A\n",
+ " 9%|▊ | 3/35 [00:01<00:16, 1.92it/s]\u001b[A\n",
+ " 11%|█▏ | 4/35 [00:02<00:13, 2.23it/s]\u001b[A\n",
+ " 14%|█▍ | 5/35 [00:02<00:10, 2.83it/s]\u001b[A\n",
+ " 17%|█▋ | 6/35 [00:02<00:08, 3.31it/s]\u001b[A\n",
+ " 20%|██ | 7/35 [00:02<00:07, 3.79it/s]\u001b[A\n",
+ " 23%|██▎ | 8/35 [00:03<00:05, 4.59it/s]\u001b[A\n",
+ " 26%|██▌ | 9/35 [00:03<00:04, 5.27it/s]\u001b[A\n",
+ " 29%|██▊ | 10/35 [00:03<00:04, 5.90it/s]\u001b[A\n",
+ " 31%|███▏ | 11/35 [00:03<00:04, 5.82it/s]\u001b[A\n",
+ " 34%|███▍ | 12/35 [00:03<00:04, 5.01it/s]\u001b[A\n",
+ " 37%|███▋ | 13/35 [00:03<00:04, 4.79it/s]\u001b[A\n",
+ " 40%|████ | 14/35 [00:04<00:04, 5.04it/s]\u001b[A\n",
+ " 43%|████▎ | 15/35 [00:04<00:03, 5.15it/s]\u001b[A\n",
+ " 46%|████▌ | 16/35 [00:04<00:03, 5.94it/s]\u001b[A\n",
+ " 49%|████▊ | 17/35 [00:04<00:03, 5.98it/s]\u001b[A\n",
+ " 51%|█████▏ | 18/35 [00:04<00:03, 4.96it/s]\u001b[A\n",
+ " 54%|█████▍ | 19/35 [00:05<00:03, 4.82it/s]\u001b[A\n",
+ " 57%|█████▋ | 20/35 [00:05<00:02, 5.07it/s]\u001b[A\n",
+ " 60%|██████ | 21/35 [00:05<00:02, 5.62it/s]\u001b[A\n",
+ " 66%|██████▌ | 23/35 [00:05<00:01, 7.00it/s]\u001b[A\n",
+ " 71%|███████▏ | 25/35 [00:05<00:01, 8.75it/s]\u001b[A\n",
+ " 77%|███████▋ | 27/35 [00:05<00:00, 9.64it/s]\u001b[A\n",
+ " 83%|████████▎ | 29/35 [00:06<00:00, 8.71it/s]\u001b[A\n",
+ " 89%|████████▊ | 31/35 [00:06<00:00, 9.26it/s]\u001b[A\n",
+ " 91%|█████████▏| 32/35 [00:06<00:00, 9.09it/s]\u001b[A\n",
+ " 94%|█████████▍| 33/35 [00:06<00:00, 9.08it/s]\u001b[A\n",
+ " 97%|█████████▋| 34/35 [00:06<00:00, 9.14it/s]\u001b[A\n",
+ "100%|██████████| 5/5 [00:33<00:00, 6.75s/it]\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Saving predictions to test_preds.csv\n",
+ "Elapsed time = 0:00:34\n"
+ ]
+ },
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/html": [
+ "\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " smiles | \n",
+ " solvent | \n",
+ " peakwavs_max | \n",
+ " peakwavs_max_epi_unc | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " CCN(CC)c1ccc2c(C(F)(F)F)cc(=O)oc2c1 | \n",
+ " C1CCCCC1 | \n",
+ " 378.089791 | \n",
+ " 5.984684 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " CCN(CC)c1ccc2c(C(F)(F)F)cc(=O)oc2c1 | \n",
+ " CCOC(C)=O | \n",
+ " 388.387075 | \n",
+ " 3.289096 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " CCN(CC)c1ccc2c(C(F)(F)F)cc(=O)oc2c1 | \n",
+ " CC#N | \n",
+ " 394.557472 | \n",
+ " 6.647632 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " CCN(CC)c1ccc2c(C(F)(F)F)cc(=O)oc2c1 | \n",
+ " CCO | \n",
+ " 400.817724 | \n",
+ " 7.351677 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " CCN(CC)c1ccc2c(C(F)(F)F)cc(=O)oc2c1 | \n",
+ " OCC(O)CO | \n",
+ " 410.902339 | \n",
+ " 5.109207 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 1705 | \n",
+ " c1cc2c3ccc[n+]4cccc(c5ccc[n+](c1)c25)c34 | \n",
+ " C[N+](=O)[O-] | \n",
+ " 425.084342 | \n",
+ " 8.989382 | \n",
+ "
\n",
+ " \n",
+ " 1706 | \n",
+ " c1cc2c3ccc[n+]4cccc(c5ccc[n+](c1)c25)c34 | \n",
+ " CS(C)=O | \n",
+ " 428.209718 | \n",
+ " 9.853369 | \n",
+ "
\n",
+ " \n",
+ " 1707 | \n",
+ " COc1cc(C)c(-c2cc(-c3c(C)cc(OC)cc3C)c3ccc4c(-c5... | \n",
+ " ClCCl | \n",
+ " 370.200150 | \n",
+ " 128.172913 | \n",
+ "
\n",
+ " \n",
+ " 1708 | \n",
+ " N#Cc1c(N2CCCCC2)cc(-c2cccc3ccccc23)c2c1-c1cccc... | \n",
+ " C1CCOC1 | \n",
+ " 355.987562 | \n",
+ " 2.114018 | \n",
+ "
\n",
+ " \n",
+ " 1709 | \n",
+ " N#Cc1c(N2CCCC2)cc(-c2ccccc2)c2c1Cc1ccccc1-2 | \n",
+ " C1CCOC1 | \n",
+ " 368.792145 | \n",
+ " 11.947813 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
1710 rows × 4 columns
\n",
+ "
\n",
+ "
\n",
+ " \n",
+ " \n",
+ "\n",
+ " \n",
+ "
\n",
+ "
\n",
+ " "
+ ],
+ "text/plain": [
+ " smiles ... peakwavs_max_epi_unc\n",
+ "0 CCN(CC)c1ccc2c(C(F)(F)F)cc(=O)oc2c1 ... 5.984684\n",
+ "1 CCN(CC)c1ccc2c(C(F)(F)F)cc(=O)oc2c1 ... 3.289096\n",
+ "2 CCN(CC)c1ccc2c(C(F)(F)F)cc(=O)oc2c1 ... 6.647632\n",
+ "3 CCN(CC)c1ccc2c(C(F)(F)F)cc(=O)oc2c1 ... 7.351677\n",
+ "4 CCN(CC)c1ccc2c(C(F)(F)F)cc(=O)oc2c1 ... 5.109207\n",
+ "... ... ... ...\n",
+ "1705 c1cc2c3ccc[n+]4cccc(c5ccc[n+](c1)c25)c34 ... 8.989382\n",
+ "1706 c1cc2c3ccc[n+]4cccc(c5ccc[n+](c1)c25)c34 ... 9.853369\n",
+ "1707 COc1cc(C)c(-c2cc(-c3c(C)cc(OC)cc3C)c3ccc4c(-c5... ... 128.172913\n",
+ "1708 N#Cc1c(N2CCCCC2)cc(-c2cccc3ccccc23)c2c1-c1cccc... ... 2.114018\n",
+ "1709 N#Cc1c(N2CCCC2)cc(-c2ccccc2)c2c1Cc1ccccc1-2 ... 11.947813\n",
+ "\n",
+ "[1710 rows x 4 columns]"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 8
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ ""
+ ],
+ "metadata": {
+ "id": "VKVIvJcXR5S5"
+ },
+ "execution_count": 8,
+ "outputs": []
+ }
+ ]
+}