-
Notifications
You must be signed in to change notification settings - Fork 27
/
Copy pathrun_pendulum.py
35 lines (31 loc) · 1.36 KB
/
run_pendulum.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
import numpy as np
import torch
from agents import PPO
from curiosity import ICM, MlpICMModel
from envs import MultiEnv
from models import MLP
from reporters import TensorBoardReporter
from rewards import GeneralizedAdvantageEstimation, GeneralizedRewardEstimation
if __name__ == '__main__':
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
reporter = TensorBoardReporter()
agent = PPO(MultiEnv('Pendulum-v0', 10, reporter),
reporter=reporter,
normalize_state=True,
normalize_reward=True,
model_factory=MLP.factory(),
curiosity_factory=ICM.factory(MlpICMModel.factory(), policy_weight=1, reward_scale=0.01, weight=0.2,
intrinsic_reward_integration=0.01, reporter=reporter),
reward=GeneralizedRewardEstimation(gamma=0.95, lam=0.15),
advantage=GeneralizedAdvantageEstimation(gamma=0.95, lam=0.15),
learning_rate=4e-4,
clip_range=0.3,
v_clip_range=0.5,
c_entropy=1e-2,
c_value=0.5,
n_mini_batches=32,
n_optimization_epochs=10,
clip_grad_norm=0.5)
agent.to(device, torch.float32, np.float32)
agent.learn(epochs=30, n_steps=200)
agent.eval(n_steps=600, render=True)