diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml
index 50a618e..24c939c 100644
--- a/.github/workflows/docs.yaml
+++ b/.github/workflows/docs.yaml
@@ -9,28 +9,30 @@ jobs:
   build:
     runs-on: ubuntu-latest
     name: Build the Sphinx docs
+    strategy:
+      matrix:
+        python-version: ["3.11"]
     steps:
-    - uses: actions/checkout@v3
-    - name: Set up Python 3.8
-      uses: actions/setup-python@v3
-      with:
-        python-version: 3.8
-    - name: Install package dependencies
-      run: pip install -e .[rdkit]
-    - name: Install sphinx dependencies
-      run: pip install -r docs/requirements.txt
-    - name: Make docs
-      working-directory: ./docs
-      run: make html
-    - name: Upload artifacts
-      uses: actions/upload-artifact@v3
-      with:
-        name: html-docs
-        path: docs/build/html/
-    - name: Deploy
-      uses: peaceiris/actions-gh-pages@v3
-      if: github.ref == 'refs/heads/main'
-      with:
-        github_token: ${{ secrets.GITHUB_TOKEN }}
-        publish_dir: docs/build/html
-
+      - uses: actions/checkout@v3
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v3
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Install package dependencies
+        run: pip install -e .[rdkit]
+      - name: Install sphinx dependencies
+        run: pip install -r docs/requirements.txt
+      - name: Make docs
+        working-directory: ./docs
+        run: make html
+      - name: Upload artifacts
+        uses: actions/upload-artifact@v3
+        with:
+          name: html-docs
+          path: docs/build/html/
+      - name: Deploy
+        uses: peaceiris/actions-gh-pages@v3
+        if: github.ref == 'refs/heads/main'
+        with:
+          github_token: ${{ secrets.GITHUB_TOKEN }}
+          publish_dir: docs/build/html
diff --git a/.github/workflows/pypi.yaml b/.github/workflows/pypi.yaml
index 6553ea9..3cd7159 100644
--- a/.github/workflows/pypi.yaml
+++ b/.github/workflows/pypi.yaml
@@ -3,26 +3,28 @@ name: Build and publish rxn-onmt-models on PyPI
 on:
   push:
     tags:
-    - 'v*'
+      - "v*"
 
 jobs:
   build-and-publish:
     name: Build and publish rxn-onmt-models on PyPI
     runs-on: ubuntu-latest
-
+    strategy:
+      matrix:
+        python-version: ["3.11"]
     steps:
-    - uses: actions/checkout@master
-    - name: Python setup 3.9
-      uses: actions/setup-python@v1
-      with:
-        python-version: 3.9
-    - name: Install build package (for packaging)
-      run: pip install --upgrade build
-    - name: Build dist
-      run: python -m build
-    - name: Publish to PyPI
-      uses: pypa/gh-action-pypi-publish@release/v1
-      with:
-        user: __token__
-        password: ${{ secrets.PYPI_TOKEN }}
-        skip_existing: true
+      - uses: actions/checkout@master
+      - name: Python setup ${{ matrix.python-version }}
+        uses: actions/setup-python@v1
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Install build package (for packaging)
+        run: pip install --upgrade build
+      - name: Build dist
+        run: python -m build
+      - name: Publish to PyPI
+        uses: pypa/gh-action-pypi-publish@release/v1
+        with:
+          user: __token__
+          password: ${{ secrets.PYPI_TOKEN }}
+          skip_existing: true
diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml
index e10201b..79a5bfa 100644
--- a/.github/workflows/tests.yaml
+++ b/.github/workflows/tests.yaml
@@ -6,23 +6,27 @@ jobs:
   tests:
     runs-on: ubuntu-latest
     name: Style, mypy, pytest
+    strategy:
+      matrix:
+        python-version: ["3.11"]
     steps:
-    - uses: actions/checkout@v3
-    - name: Set up Python 3.7
-      uses: actions/setup-python@v3
-      with:
-        python-version: 3.7
-    - name: Install Dependencies
-      run: pip install -e .[dev,rdkit]
-    - name: Check black
-      run: python -m black --check --diff --color .
-    - name: Check isort
-      run: python -m isort --check --diff .
-    - name: Check flake8
-      run: python -m flake8 .
-    - name: Check mypy (on the package)
-      run: python -m mypy --namespace-packages -p rxn.onmt_models
-    - name: Check mypy (on the tests)
-      run: python -m mypy tests
-    - name: Run pytests
-      run: python -m pytest
+      - uses: actions/checkout@v3
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v3
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Install Dependencies
+        run: pip install -e .[dev,rdkit]
+      - name: Check black
+        run: python -m black --check --diff --color .
+      - name: Check isort
+        run: python -m isort --check --diff .
+      - name: Check flake8
+        run: python -m flake8 .
+      - name: Check mypy (on the package)
+        run: python -m mypy --namespace-packages -p rxn.onmt_models
+      - name: Check mypy (on the tests)
+        run: python -m mypy tests
+      - name: Run pytests
+        run: python -m pytest
+
diff --git a/pyproject.toml b/pyproject.toml
index afd23ab..9cad47e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -11,6 +11,7 @@ module = [
     "numpy.*",
     "pandas.*",
     "pytest.*",
+    "yaml.*",
 ]
 ignore_missing_imports = true
 
diff --git a/setup.cfg b/setup.cfg
index c9f9c7d..d34f092 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -28,9 +28,10 @@ install_requires =
     attrs>=21.2.0
     click>=8.0
     rxn-chem-utils>=1.1.4
-    rxn-onmt-utils>=1.0.3
     rxn-reaction-preprocessing>=2.0.2
     rxn-utils>=1.1.9
+    rxn-onmt-utils @ git+https://github.com/rxn4chemistry/rxn-onmt-utils.git@0058c723c7371c6ff3b88647247c9e44cf1ffaa7 #rxn-onmt-utils without rxn-opennmt-py depedency
+    OpenNMT-py>=3.5.1 # official onmt
 
 [options.packages.find]
 where = src
diff --git a/src/rxn/onmt_models/scripts/rxn_onmt_continue_training.py b/src/rxn/onmt_models/scripts/rxn_onmt_continue_training.py
index 1004f92..e993557 100644
--- a/src/rxn/onmt_models/scripts/rxn_onmt_continue_training.py
+++ b/src/rxn/onmt_models/scripts/rxn_onmt_continue_training.py
@@ -1,4 +1,5 @@
 import logging
+from pathlib import Path
 from typing import Optional, Tuple
 
 import click
@@ -20,6 +21,12 @@
 logger.addHandler(logging.NullHandler())
 
 
+def get_src_tgt_vocab(data: Path) -> Tuple[Path, Path]:
+    src_vocab = data.parent / (data.name + ".vocab.src")
+    tgt_vocab = data.parent / (data.name + ".vocab.tgt")
+    return src_vocab, tgt_vocab
+
+
 @click.command(context_settings=dict(show_default=True))
 @click.option("--batch_size", default=defaults.BATCH_SIZE)
 @click.option(
@@ -57,6 +64,7 @@
     default=100000,
     help="Number of steps, including steps from the initial training run.",
 )
+@click.option("--model_task", type=str, required=True)
 def main(
     batch_size: int,
     data_weights: Tuple[int, ...],
@@ -66,6 +74,7 @@ def main(
     preprocess_dir: str,
     train_from: Optional[str],
     train_num_steps: int,
+    model_task: str,
 ) -> None:
     """Continue training for an OpenNMT model.
 
@@ -100,9 +109,15 @@ def main(
     dropout = get_model_dropout(train_from)
     seed = get_model_seed(train_from)
 
+    src_vocab, tgt_vocab = get_src_tgt_vocab(
+        data=onmt_preprocessed_files.preprocess_prefix
+    )
+
     train_cmd = OnmtTrainCommand.continue_training(
         batch_size=batch_size,
         data=onmt_preprocessed_files.preprocess_prefix,
+        src_vocab=src_vocab,
+        tgt_vocab=tgt_vocab,
         keep_checkpoint=keep_checkpoint,
         dropout=dropout,
         save_model=model_files.model_prefix,
@@ -111,11 +126,11 @@ def main(
         train_steps=train_num_steps,
         no_gpu=no_gpu,
         data_weights=data_weights,
+        model_task=model_task,
     )
 
     # Write config file
-    command_and_args = train_cmd.save_to_config_cmd(config_file)
-    run_command(command_and_args)
+    train_cmd.save_to_config_cmd(config_file)
 
     # Actual training config file
     command_and_args = train_cmd.execute_from_config_cmd(config_file)
diff --git a/src/rxn/onmt_models/scripts/rxn_onmt_finetune.py b/src/rxn/onmt_models/scripts/rxn_onmt_finetune.py
index c30fed8..4137238 100644
--- a/src/rxn/onmt_models/scripts/rxn_onmt_finetune.py
+++ b/src/rxn/onmt_models/scripts/rxn_onmt_finetune.py
@@ -54,6 +54,7 @@
 @click.option("--warmup_steps", default=defaults.WARMUP_STEPS)
 @click.option("--report_every", default=1000)
 @click.option("--save_checkpoint_steps", default=5000)
+@click.option("--model_task", type=str, required=True)
 def main(
     batch_size: int,
     data_weights: Tuple[int, ...],
@@ -69,6 +70,7 @@ def main(
     warmup_steps: int,
     report_every: int,
     save_checkpoint_steps: int,
+    model_task: str,
 ) -> None:
     """Finetune an OpenNMT model."""
 
@@ -112,7 +114,7 @@ def main(
         dropout=dropout,
         keep_checkpoint=keep_checkpoint,
         learning_rate=learning_rate,
-        rnn_size=rnn_size,
+        hidden_size=rnn_size,
         save_model=model_files.model_prefix,
         seed=seed,
         train_from=train_from,
@@ -122,11 +124,11 @@ def main(
         data_weights=data_weights,
         report_every=report_every,
         save_checkpoint_steps=save_checkpoint_steps,
+        model_task=model_task,
     )
 
     # Write config file
-    command_and_args = train_cmd.save_to_config_cmd(config_file)
-    run_command(command_and_args)
+    train_cmd.save_to_config_cmd(config_file)
 
     # Actual training config file
     command_and_args = train_cmd.execute_from_config_cmd(config_file)
diff --git a/src/rxn/onmt_models/scripts/rxn_onmt_preprocess.py b/src/rxn/onmt_models/scripts/rxn_onmt_preprocess.py
index b2afc1c..7105a3a 100644
--- a/src/rxn/onmt_models/scripts/rxn_onmt_preprocess.py
+++ b/src/rxn/onmt_models/scripts/rxn_onmt_preprocess.py
@@ -1,9 +1,10 @@
 import logging
 import random
 from pathlib import Path
-from typing import List, Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple
 
 import click
+import yaml
 from rxn.chemutils.tokenization import ensure_tokenized_file
 from rxn.onmt_utils import __version__ as onmt_utils_version
 from rxn.onmt_utils.train_command import preprocessed_id_names
@@ -51,6 +52,89 @@ def determine_train_dataset(
     return src, tgt
 
 
+def get_build_vocab_config_file(
+    train_srcs: List[PathLike],
+    train_tgts: List[PathLike],
+    valid_src: PathLike,
+    valid_tgt: PathLike,
+    save_data: Path,
+    share_vocab: bool = True,
+    overwrite: bool = True,
+    src_seq_length: int = 3000,
+    tgt_seq_length: int = 3000,
+    src_vocab_size: int = 3000,
+    tgt_vocab_size: int = 3000,
+) -> Path:
+    """Wrapper function of the legacy cli `onmt_preprocessed` arguments.
+    The goal is to make them compatible with ONMT v.3.5.1 cli `onmt_build_vocab`.
+    The function takes the arguments of former onmt_preprocessed cli and dumps
+    them into a `config.yaml` file with a specific structure compatible with `onmt_build_vocab`.
+    The upgraded `onmt_build_vocab` takes them as `onmt_build_vocab -config config.yaml`.
+
+    Args:
+        train_srcs (List[PathLike]): List of train source data files
+        train_tgts (List[PathLike]): List of train target data files
+        valid_src (List[PathLike]): List of validation source data files
+        valid_tgt (List[PathLike]): List of validation target data files
+        save_data (PathLike): Save vocabulary data directory
+        share_vocab (bool, optional): Share vocab. Defaults to True.
+        overwrite (bool, optional): Overwrite output directory. Defaults to True.
+        src_seq_length (int, optional): src_seq_length. Defaults to 3000.
+        tgt_seq_length (int, optional): tgt_seq_length. Defaults to 3000.
+        src_vocab_size (int, optional): src_vocab_size. Defaults to 3000.
+        tgt_vocab_size (int, optional): tgt_vocab_size. Defaults to 3000.
+
+    Returns:
+        PathLike: Path of the config.yaml which is in directory `save_data`
+    """
+
+    # Build dictionary with build vocab config content
+    # See structure https://opennmt.net/OpenNMT-py/quickstart.html (Step 1: Prepare the data)
+    build_vocab_config: Dict[str, Any] = {}
+
+    # Arguments save data
+    build_vocab_config["save_data"] = str(save_data.parent)
+    build_vocab_config["src_vocab"] = str(
+        save_data.parent / (save_data.name + ".vocab.src")
+    )
+    build_vocab_config["tgt_vocab"] = str(
+        save_data.parent / (save_data.name + ".vocab.tgt")
+    )
+
+    # Other arguments
+    build_vocab_config["overwrite"] = str(overwrite)
+    build_vocab_config["share_vocab"] = str(share_vocab)
+    build_vocab_config["src_seq_length"] = str(src_seq_length)
+    build_vocab_config["tgt_seq_length"] = str(tgt_seq_length)
+    build_vocab_config["src_vocab_size"] = str(src_vocab_size)
+    build_vocab_config["tgt_vocab_size"] = str(tgt_vocab_size)
+
+    # Arguments data paths (train)
+    build_vocab_config["data"] = {}
+    # TODO: raise error if lengths: train_srcs, train_tgts, valid_src, valid_tgt are different
+    number_corpus = len(train_srcs)
+    for i in range(number_corpus):
+        build_vocab_config["data"][f"corpus_{i+1}"] = {
+            "path_src": str(train_srcs[i]),
+            "path_tgt": str(train_tgts[i]),
+        }
+
+    # Arguments data paths (valid)
+    build_vocab_config["data"]["valid"] = {
+        "path_src": str(valid_src),
+        "path_tgt": str(valid_tgt),
+    }
+
+    # Path to same yaml file
+    config_file_path = save_data.parent / (save_data.name + "_build_vocab_config.yaml")
+
+    # Save file that will be -config argument of onmt_build_vocab
+    with open(config_file_path, "w+") as file:
+        yaml.dump(build_vocab_config, file)
+
+    return config_file_path
+
+
 @click.command()
 @click.option(
     "--input_dir",
@@ -180,21 +264,28 @@ def main(
     valid_src = ensure_tokenized_file(valid_src)
     valid_tgt = ensure_tokenized_file(valid_tgt)
 
+    # Create config file for onmt_build_vocab for OpenNMT v.3.5.1
+    # Dump train_srcs, train_tgts, valid_src, valid_tgt etc and return path
+    config_file_path = get_build_vocab_config_file(
+        train_srcs=train_srcs,
+        train_tgts=train_tgts,
+        valid_src=valid_src,
+        valid_tgt=valid_tgt,
+        save_data=onmt_preprocessed_files.preprocess_prefix,
+        share_vocab=True,
+        overwrite=True,
+        src_seq_length=3000,
+        tgt_seq_length=3000,
+        src_vocab_size=3000,
+        tgt_vocab_size=3000,
+    )
+
     # yapf: disable
     command_and_args = [
         str(e) for e in [
-            'onmt_preprocess',
-            '-train_src', *train_srcs,
-            '-train_tgt', *train_tgts,
-            '-valid_src', valid_src,
-            '-valid_tgt', valid_tgt,
-            '-save_data', onmt_preprocessed_files.preprocess_prefix,
-            '-src_seq_length', 3000,
-            '-tgt_seq_length', 3000,
-            '-src_vocab_size', 3000,
-            '-tgt_vocab_size', 3000,
-            '-share_vocab',
-            '-overwrite',
+            'onmt_build_vocab',
+            '-config', config_file_path,
+            '-n_sample', -1,
         ]
     ]
     # yapf: enable
diff --git a/src/rxn/onmt_models/scripts/rxn_onmt_train.py b/src/rxn/onmt_models/scripts/rxn_onmt_train.py
index dc33a28..f29fda5 100644
--- a/src/rxn/onmt_models/scripts/rxn_onmt_train.py
+++ b/src/rxn/onmt_models/scripts/rxn_onmt_train.py
@@ -1,4 +1,6 @@
 import logging
+import warnings
+from pathlib import Path
 from typing import Tuple
 
 import click
@@ -15,6 +17,30 @@
 logger.addHandler(logging.NullHandler())
 
 
+def get_src_tgt_vocab(data: Path) -> Tuple[Path, Path]:
+    src_vocab = data.parent / (data.name + ".vocab.src")
+    tgt_vocab = data.parent / (data.name + ".vocab.tgt")
+    return src_vocab, tgt_vocab
+
+
+def check_rnn_vs_hidden_size(hidden_size: int, rnn_size: int) -> int:
+    """
+    Helper function that checks wether hidden_size and rnn_size are given, decides which one to use and raises warnings.
+    rnn_size always has a default defaults.RNN_SIZE, if no hidden_size is given, rnn_size will be used.
+    If hidden_size is given, hidden size will be used.
+    """
+    if hidden_size is None:
+        warnings.warn(
+            f"Argument hidden_size is not given, rnn_size with value {rnn_size} will be used"
+        )
+        return rnn_size
+    if hidden_size is not None:
+        warnings.warn(
+            f"Argument hidden_size was given with value {hidden_size}, rnn_size argument will be overwritten."
+        )
+        return hidden_size
+
+
 @click.command(context_settings=dict(show_default=True))
 @click.option("--batch_size", default=defaults.BATCH_SIZE)
 @click.option(
@@ -44,11 +70,13 @@
     help="Directory with OpenNMT-preprocessed files",
 )
 @click.option("--rnn_size", default=defaults.RNN_SIZE)
+@click.option("--hidden_size")
 @click.option("--seed", default=defaults.SEED)
 @click.option("--train_num_steps", default=100000)
 @click.option("--transformer_ff", default=defaults.TRANSFORMER_FF)
 @click.option("--warmup_steps", default=defaults.WARMUP_STEPS)
 @click.option("--word_vec_size", default=defaults.WORD_VEC_SIZE)
+@click.option("--model_task", type=str, required=True)
 def main(
     batch_size: int,
     data_weights: Tuple[int, ...],
@@ -61,11 +89,13 @@ def main(
     no_gpu: bool,
     preprocess_dir: str,
     rnn_size: int,
+    hidden_size: int,
     seed: int,
     train_num_steps: int,
     transformer_ff: int,
     warmup_steps: int,
     word_vec_size: int,
+    model_task: str,
 ) -> None:
     """Train an OpenNMT model.
 
@@ -73,6 +103,9 @@ def main(
     `data_weights` parameters are given (Note: needs to be consistent with the
     rxn-onmt-preprocess command executed before training.
     """
+    # Check rnn_size or hidden_size given, not both
+    # NOTE: rnn_size argument is kept for compatibility
+    hidden_size = check_rnn_vs_hidden_size(hidden_size=hidden_size, rnn_size=rnn_size)
 
     # set up paths
     model_files = ModelFiles(model_output_dir)
@@ -88,15 +121,22 @@ def main(
 
     config_file = model_files.next_config_file()
 
+    src_vocab, tgt_vocab = get_src_tgt_vocab(
+        data=onmt_preprocessed_files.preprocess_prefix
+    )
+
+    # Init
     train_cmd = OnmtTrainCommand.train(
         batch_size=batch_size,
         data=onmt_preprocessed_files.preprocess_prefix,
+        src_vocab=src_vocab,
+        tgt_vocab=tgt_vocab,
         dropout=dropout,
         heads=heads,
         keep_checkpoint=keep_checkpoint,
         layers=layers,
         learning_rate=learning_rate,
-        rnn_size=rnn_size,
+        hidden_size=hidden_size,
         save_model=model_files.model_prefix,
         seed=seed,
         train_steps=train_num_steps,
@@ -105,11 +145,11 @@ def main(
         word_vec_size=word_vec_size,
         no_gpu=no_gpu,
         data_weights=data_weights,
+        model_task=model_task,
     )
 
     # Write config file
-    command_and_args = train_cmd.save_to_config_cmd(config_file)
-    run_command(command_and_args)
+    train_cmd.save_to_config_cmd(config_file)
 
     # Actual training config file
     command_and_args = train_cmd.execute_from_config_cmd(config_file)
diff --git a/src/rxn/onmt_models/training_files.py b/src/rxn/onmt_models/training_files.py
index 1157480..9eb5954 100644
--- a/src/rxn/onmt_models/training_files.py
+++ b/src/rxn/onmt_models/training_files.py
@@ -94,7 +94,7 @@ def preprocess_prefix(self) -> Path:
 
     @property
     def vocab_file(self) -> Path:
-        return self.preprocess_prefix.with_suffix(".vocab.pt")
+        return self.preprocess_prefix.with_suffix(".vocab.src")
 
 
 class RxnPreprocessingFiles: