-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathrun.py
95 lines (80 loc) · 5.83 KB
/
run.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
import argparse
import logging.config
from config.flaskconfig import LOGGING_CONFIG
logging.config.fileConfig(LOGGING_CONFIG)
logger = logging.getLogger(__name__)
from src.upload_download_data import upload_data, download_data
from src.data_preprocess import preprocess_data
from src.featurize import featurize
from src.eda import eda
from src.model_training import random_forest
from src.model_evaluation import evaluate_model
from config.flaskconfig import SQLALCHEMY_DATABASE_URI
from src.customer_db import create_db, initial_ingest
from test.reproducibility_test import run_reproducibility_tests
if __name__ == '__main__':
parser = argparse.ArgumentParser(description="Run Components of Model Source Code")
subparsers = parser.add_subparsers()
# Sub-parser for downloading raw data from S3 bucket
sb_download = subparsers.add_parser("download_data", description="Download data from S3 bucket")
sb_download.add_argument('--file_path', default='data/raw.csv', help='path to the raw data')
sb_download.add_argument('--config', default='config/config.yml', help='path to yaml file with configurations')
sb_download.set_defaults(func=download_data)
# Sub-parser for cleaning raw data
sb_preprocess = subparsers.add_parser("preprocess_data", description="Clean the raw data")
sb_preprocess.add_argument('--in_file_path', default='data/raw.csv', help='path to the raw data')
sb_preprocess.add_argument('--out_file_path', default='data/preprocessed.csv', help='path to the preprocessed data')
sb_preprocess.add_argument('--config', default='config/config.yml', help='path to yaml file with configurations')
sb_preprocess.set_defaults(func=preprocess_data)
# Sub-parser for featurizing preprocessed data
sb_feature = subparsers.add_parser("featurize", description="featurize the preprocessed data")
sb_feature.add_argument('--in_file_path', default='data/preprocessed.csv', help='path to the preprocessed data')
sb_feature.add_argument('--out_file_path', default='data/featurized.csv', help='path to the featurized data')
sb_feature.add_argument('--config', default='config/config.yml', help='path to yaml file with configurations')
sb_feature.set_defaults(func=featurize)
# Sub-parser for exploratory data analysis
sb_eda = subparsers.add_parser("eda", description="exploratory data analysis")
sb_eda.add_argument('--in_file_preprocessed', default='data/preprocessed.csv', help='path to preprocessed data')
sb_eda.add_argument('--in_file_featurized', default='data/featurized.csv', help='path to featurized data')
sb_eda.add_argument('--out_file_path', default='eda', help='path to eda outputs')
sb_eda.add_argument('--config', default='config/config.yml', help='path to yaml file with configurations')
sb_eda.set_defaults(func=eda)
# Sub-parser for training random forest model
sb_random_forest = subparsers.add_parser("random_forest", description="training random forest model")
sb_random_forest.add_argument('--in_file_path', default='data/featurized.csv', help='path to the featurized data')
sb_random_forest.add_argument('--out_file_path', default='models', help='path to model-related outputs')
sb_random_forest.add_argument('--config', default='config/config.yml', help='path to yaml file with configurations')
sb_random_forest.set_defaults(func=random_forest)
# Sub-parser for evaluating the random forest model
sb_eval = subparsers.add_parser("evaluate_model",
description="evaluate the model and save evaluations in .csv files")
sb_eval.add_argument('--in_file_path', default='models/predictions.csv', help='path to the model predictions file')
sb_eval.add_argument('--out_file_path', default='models', help='path to evaluation-related outputs')
sb_eval.add_argument('--config', default='config/config.yml', help='path to yaml file with configurations')
sb_eval.set_defaults(func=evaluate_model)
# Sub-parser for creating a database to store user input
sb_create_db = subparsers.add_parser("create_db", description="Creating the Databse")
sb_create_db.add_argument("--engine_string", default=SQLALCHEMY_DATABASE_URI,
help="SQLAlchemy connection URI for database")
sb_create_db.set_defaults(func=create_db)
# Sub-parser for conducting initial ingestion to the database
sb_init_ingest = subparsers.add_parser("initial_ingest", description="Initial ingestion to the Databse")
sb_init_ingest.add_argument("--engine_string", default=SQLALCHEMY_DATABASE_URI,
help="SQLAlchemy connection URI for database")
sb_init_ingest.add_argument('--config', default='config/config.yml', help='path to yaml file with configurations')
sb_init_ingest.add_argument('--num_records', default=5, help='the number of records to ingest')
sb_init_ingest.set_defaults(func=initial_ingest)
# Sub-parser for conducting reproducibility test
sb_reproducibility_test = subparsers.add_parser("run_reproducibility_tests",
description="Run reproducibility tests")
sb_reproducibility_test.add_argument('--config', default='test/reproducibility_test_config.yml',
help='path to yaml file with configurations')
sb_reproducibility_test.set_defaults(func=run_reproducibility_tests)
# Sub-parser for uploading the data to S3 bucket
sb_upload = subparsers.add_parser("upload_data", description="Upload data into S3")
sb_upload.add_argument('--local_file_path', help="Local folder containing data to be uploaded")
sb_upload.add_argument('--file_name', help="File name of the data file")
sb_upload.add_argument('--bucket_name', help="AWS S3 bucket where the data will be stored")
sb_upload.set_defaults(func=upload_data)
args = parser.parse_args()
args.func(args)