-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
62 lines (50 loc) · 1.65 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
import tensorflow as tf
import numpy as np
import gym
from collections import deque
from matplotlib import pyplot as plt
import random
from utils import NormalizedActions
from NAF import NAF
MAX_EP_STEPS = 200
LEARNING_RATE = 0.001
GAMMA = 0.99
TAU = 0.001
RENDER_ENV = False
GYM_MONITOR_EN = True
ENV_NAME = 'Pendulum-v0'
MONITOR_DIR = './results/qnaf_7'
SUMMARY_DIR = './results/tf_qnaf_7'
RANDOM_SEED = 42
BUFFER_SIZE = 800000
MINIBATCH_SIZE = 64
NOISE_MEAN = 0
NOISE_VAR = 1
OU_THETA = 0.15
OU_MU = 0.
OU_SIGMA = 0.3
EXPLORATION_TIME = 50
MAX_EPISODES = 200
def main(_):
np.random.seed(RANDOM_SEED)
tf.set_random_seed(RANDOM_SEED)
env = NormalizedActions(gym.make(ENV_NAME))
env.seed(RANDOM_SEED)
if GYM_MONITOR_EN:
if not RENDER_ENV:
env = gym.wrappers.Monitor(env, MONITOR_DIR, video_callable=False, force=True)
else:
env = gym.wrappers.Monitor(env, MONITOR_DIR, force=True)
with tf.Session() as sess:
monitor_dir = MONITOR_DIR# + str(iteration)
naf = NAF(sess, env, LEARNING_RATE, TAU, GAMMA,
BUFFER_SIZE, RANDOM_SEED, monitor_dir, False, det=False, pg=False, qnaf=False,
scope='qn', hn=0, ac=True,
sep_V=True, per_st=False)
naf.run_n_episodes(EXPLORATION_TIME, MAX_EP_STEPS,
MINIBATCH_SIZE, num_updates=5, eta=1, num_updates_ac=3, T=5)
naf.run_n_episodes(MAX_EPISODES - EXPLORATION_TIME, MAX_EP_STEPS,
MINIBATCH_SIZE, False, num_updates=5, eta=1, num_updates_ac=3, T=5)
naf.plot_rewards(monitor_dir)
if __name__ == '__main__':
tf.app.run()