forked from HorizonRobotics/alf
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmisc_playground_two_balls.gin
executable file
·92 lines (77 loc) · 3.51 KB
/
misc_playground_two_balls.gin
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
# Training & playing the default goal task with Agent Learning Framework (Alf)
# python -m alf.bin.train --root_dir=~/tmp/gro_ppo --gin_file=grocery_alf_ppo.gin --alsologtostderr
# python -m alf.bin.play --root_dir=~/tmp/gro_ppo --gin_file=grocery_alf_ppo.gin --record_file=grocery.mp4
# Training with other task, 'kickball' for example:
# python -m alf.bin.train --root_dir=~/tmp/gro_ppo --gin_file=grocery_alf_ppo.gin --alsologtostderr --gin_param="GroceryGround.task_name='kickball'"
# If you are not recording video and observation does not contain image, you can add 'DISPLAY=null' to skip camera rendering, which will speedup the simulation a lot:
# DISPLAY=null python -m alf.bin.train ...
# python alf/bin/train.py --gin_file=alf/examples/misc_playground_two_balls.gin --root_dir=logs/misc_playground_two_balls-tmp
include 'alf/examples/ppo.gin'
import social_bot
import alf.environments.suite_socialbot
# environment config
batch_size=3 # 16
create_environment.env_name="SocialBot-PlayGround-v0"
create_environment.num_parallel_environments=%batch_size
create_environment.env_load_fn=@suite_socialbot.load
# misc
GoalTask.move_goal_during_episode=False
GoalTask.success_with_angle_requirement=False
GoalTask.distraction_list=['coke_can', 'table', 'car_wheel', 'plastic_cup', 'beer', 'blue_ball']
GoalTask.additional_observation_list=['blue_ball']
GoalTask.fail_distance_thresh=1000
# algorithm config
observation_spec=@get_observation_spec()
action_spec=@get_action_spec()
actor/ActorDistributionNetwork.input_tensor_spec=%observation_spec
actor/ActorDistributionNetwork.output_tensor_spec=%action_spec
actor/ActorDistributionNetwork.fc_layer_params=(128, 64)
actor/[email protected]
actor/ActorDistributionNetwork.continuous_projection_net=@NormalProjectionNetwork
NormalProjectionNetwork.init_means_output_factor=1e-10
NormalProjectionNetwork.std_bias_initializer_value=0.0
value/ValueNetwork.input_tensor_spec=%observation_spec
value/ValueNetwork.fc_layer_params=(128, 64)
value/[email protected]
ac/Adam.learning_rate=3e-4
# misc
misc/soi/TensorSpec.shape=(3,)
misc/soc/TensorSpec.shape=(3,)
MISCAlgorithm.observation_spec=%observation_spec
MISCAlgorithm.action_spec=%action_spec
MISCAlgorithm.batch_size=%batch_size
MISCAlgorithm.soi_spec=@misc/soi/TensorSpec()
MISCAlgorithm.soc_spec=@misc/soc/TensorSpec()
MISCAlgorithm.n_objects=2
MISCAlgorithm.hidden_size=128 # 256
import alf.examples.misc_playground_two_balls
MISCAlgorithm.split_observation_fn=@misc_playground_two_balls.split_observation_fn
Agent.intrinsic_curiosity_module=@MISCAlgorithm()
Agent.extrinsic_reward_coef=0.0
Agent.intrinsic_reward_coef=1.0
# #
# ActorCriticAlgorithm
ActorCriticAlgorithm.actor_network=@actor/ActorDistributionNetwork()
ActorCriticAlgorithm.value_network=@value/ValueNetwork()
Agent.optimizer=@ac/Adam()
Agent.gradient_clipping=0.5
Agent.clip_by_global_norm=True
PPOLoss.entropy_regularization=5e-3
PPOLoss.gamma=0.99
PPOLoss.normalize_advantages=True
PPOLoss.td_lambda=0.95
PPOLoss.td_error_loss_fn=@element_wise_squared_loss
TrainerConfig.mini_batch_length=1
TrainerConfig.unroll_length=1024
TrainerConfig.mini_batch_size=4096
TrainerConfig.num_iterations= 500
TrainerConfig.summary_interval=1
TrainerConfig.num_updates_per_train_step=20
TrainerConfig.eval_interval=100
TrainerConfig.summarize_grads_and_vars=True
TrainerConfig.debug_summaries=True
TrainerConfig.checkpoint_interval=1
TrainerConfig.use_rollout_state=True
TrainerConfig.use_tf_functions = True
TrainerConfig.random_seed = None
ReplayBuffer.max_length=2048