-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathtraining_optimisation_loop.py
91 lines (80 loc) · 3.71 KB
/
training_optimisation_loop.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import argparse
from sigopt import Connection
import glob
import re
import subprocess
import os
parser = argparse.ArgumentParser(description='training')
parser.add_argument('-node', default=None, help='node to use, in format boole-n021')
parser.add_argument('-gpu', default='0', help='which gpu to use')
parser.add_argument('-experiment_id', default=None, help='the folder containing the preprocessed data')
opts = parser.parse_args()
def main():
if not opts.node or not opts.experiment_id:
print('please supply all the command line inputs')
quit()
conn = Connection(client_token="IFAQZABYDOBABXMSZYAWSYKHYSONHNPEACATCSCCIDXDQFLG")
# start for loop
for i in range(20):
suggestion = conn.experiments(opts.experiment_id).suggestions().create(
metadata={
'node': opts.node,
'gpu': str(opts.gpu)
}
)
training_opts = []
# this is kind of a messy workaround because opennmt-py doens't like
# vector sizes with odd numbers
for key, value in suggestion.assignments.items():
if 'rnn_size' in key or 'word_vec_size' in key:
value = value * 2
if 'dropout' in key:
value = "{0:.2f}".format(value)
training_opts += [key] + [str(value)]
training_opts = ' '.join(training_opts)
training_opts = '\"' + training_opts + '\"'
preprocessed_dir = \
glob.glob('/home/henrye/projects/E2E/experiments/*' + opts.experiment_id + '*')[0]
print('starting training with suggestion: ', training_opts)
training_output_full = subprocess.run(['bash',
'/home/henrye/projects/E2E/bash_training_scripts/start_training_template.sh',
'-n', opts.node,
'-g', opts.gpu,
'-p', preprocessed_dir,
'-t', training_opts],
stdout=subprocess.PIPE)
print('here is the bash command for testing training:\n', ' '.join(training_output_full.args))
print('finished training, starting evaluation')
training_output = training_output_full.stdout.decode()
checkpoint_dir = re.search('the checkpoint directory is (.*) end transmission', \
training_output).group(1)
evaluation_opts = 'empty'
evaluation_output_full = subprocess.run(['bash',
'/home/henrye/projects/E2E/bash_training_scripts/evaluate_model_checkpoint.sh',
'-n', opts.node,
'-g', opts.gpu,
'-c', checkpoint_dir,
'-e', evaluation_opts],
stdout=subprocess.PIPE)
print('here is the bash command for testing evaluation:\n', ' '.join(evaluation_output_full.args))
evaluation_output = evaluation_output_full.stdout.decode()
generated_outputs_directory = re.search('the generated outputs directory is (.*) end transmission', \
evaluation_output).group(1)
print('outputs directory: ', generated_outputs_directory,
'\n there should be familiar looking files in here')
output_scores = glob.glob(os.path.join(generated_outputs_directory, 'BLEU*dev*'))
output_scores = sorted(output_scores, reverse=True)
best_bleu = re.search('BLEU,(.*),NIST', output_scores[0]).group(1)
# submit bleu in observation
print('best bleu: ', best_bleu)
conn.experiments(opts.experiment_id).observations().create(
suggestion=suggestion.id,
value=float(best_bleu),
metadata={
'node': opts.node,
'gpu': str(opts.gpu),
'output_dir': generated_outputs_directory
}
)
if __name__ == '__main__':
main()