-
Notifications
You must be signed in to change notification settings - Fork 1
/
main.py
27 lines (18 loc) · 901 Bytes
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
"""Preprocess the data and train the model."""
from pathlib import Path
import hydra
from epochalyst.logging.section_separator import print_section_separator
from omegaconf import DictConfig
from transformers import set_seed
from src.pipeline.setup_pipeline import setup_pipeline
@hydra.main(version_base=None, config_path="conf", config_name="main")
def run_train(cfg: DictConfig) -> None: # TODO(Jeffrey): Use TrainConfig instead of DictConfig
"""Train a model pipeline. Entry point for Hydra which loads the config file.
:param cfg: Configuration for the training script
"""
print_section_separator("Q3 National Archive - Preparing the data.")
set_seed(42)
pipeline = setup_pipeline(cfg)
pipeline.run_dossier_pipeline(cfg.dossier_id, Path(cfg.raw_data_path), Path(cfg.processed_data_path), Path(cfg.final_data_path))
if __name__ == "__main__":
run_train()