Skip to content

Commit

Permalink
cleanup and tuning, adding osc mini_cheetahs
Browse files Browse the repository at this point in the history
  • Loading branch information
sheim committed Feb 8, 2024
1 parent d4378f6 commit ce6efd0
Show file tree
Hide file tree
Showing 6 changed files with 828 additions and 145 deletions.
17 changes: 16 additions & 1 deletion gym/envs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,14 @@
# * 3. add the runner confg name and location to the runner config dict
# * 3. register the task experiment name to the env/config/ppo classes

# * from y import x where {y:x}
class_dict = {
"LeggedRobot": ".base.legged_robot",
"FixedRobot": ".base.fixed_robot",
"Cartpole": ".cartpole.cartpole",
"MiniCheetah": ".mini_cheetah.mini_cheetah",
"MiniCheetahRef": ".mini_cheetah.mini_cheetah_ref",
"MiniCheetahOsc": ".mini_cheetah.mini_cheetah_osc",
"MiniCheetahOscIK": ".mini_cheetah.mini_cheetah_osc_IK",
"MIT_Humanoid": ".mit_humanoid.mit_humanoid",
"Anymal": ".anymal_c.anymal",
"A1": ".a1.a1",
Expand All @@ -25,6 +26,8 @@
"CartpoleCfg": ".cartpole.cartpole_config",
"MiniCheetahCfg": ".mini_cheetah.mini_cheetah_config",
"MiniCheetahRefCfg": ".mini_cheetah.mini_cheetah_ref_config",
"MiniCheetahOscCfg": ".mini_cheetah.mini_cheetah_osc_config",
"MiniCheetahOscIKCfg": ".mini_cheetah.mini_cheetah_osc_IK_config",
"MITHumanoidCfg": ".mit_humanoid.mit_humanoid_config",
"A1Cfg": ".a1.a1_config",
"AnymalCFlatCfg": ".anymal_c.flat.anymal_c_flat_config",
Expand All @@ -35,6 +38,8 @@
"CartpoleRunnerCfg": ".cartpole.cartpole_config",
"MiniCheetahRunnerCfg": ".mini_cheetah.mini_cheetah_config",
"MiniCheetahRefRunnerCfg": ".mini_cheetah.mini_cheetah_ref_config",
"MiniCheetahOscRunnerCfg": ".mini_cheetah.mini_cheetah_osc_config",
"MiniCheetahOscIKRunnerCfg": ".mini_cheetah.mini_cheetah_osc_IK_config",
"MITHumanoidRunnerCfg": ".mit_humanoid.mit_humanoid_config",
"A1RunnerCfg": ".a1.a1_config",
"AnymalCFlatRunnerCfg": ".anymal_c.flat.anymal_c_flat_config",
Expand All @@ -49,6 +54,16 @@
"MiniCheetahRefCfg",
"MiniCheetahRefRunnerCfg",
],
"mini_cheetah_osc": [
"MiniCheetahOsc",
"MiniCheetahOscCfg",
"MiniCheetahOscRunnerCfg",
],
"mini_cheetah_osc_IK": [
"MiniCheetahOscIK",
"MiniCheetahOscIKCfg",
"MiniCheetahOscIKRunnerCfg",
],
"humanoid": ["MIT_Humanoid", "MITHumanoidCfg", "MITHumanoidRunnerCfg"],
"humanoid_running": [
"HumanoidRunning",
Expand Down
125 changes: 60 additions & 65 deletions gym/envs/mini_cheetah/mini_cheetah_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,14 @@
LeggedRobotRunnerCfg,
)

BASE_HEIGHT_REF = 0.33
BASE_HEIGHT_REF = 0.3


class MiniCheetahCfg(LeggedRobotCfg):
class env(LeggedRobotCfg.env):
num_envs = 2**12
num_actuators = 12
episode_length_s = 10
episode_length_s = 4

class terrain(LeggedRobotCfg.terrain):
mesh_type = "plane"
Expand All @@ -25,67 +25,63 @@ class init_state(LeggedRobotCfg.init_state):
# * reset setup chooses how the initial conditions are chosen.
# * "reset_to_basic" = a single position
# * "reset_to_range" = uniformly random from a range defined below
reset_mode = "reset_to_basic"
reset_mode = "reset_to_range"

# * default COM for basic initialization
pos = [0.0, 0.0, 0.33] # x,y,z [m]
pos = [0.0, 0.0, 0.35] # x,y,z [m]
rot = [0.0, 0.0, 0.0, 1.0] # x,y,z,w [quat]
lin_vel = [0.0, 0.0, 0.0] # x,y,z [m/s]
ang_vel = [0.0, 0.0, 0.0] # x,y,z [rad/s]

# * initialization for random range setup
dof_pos_range = {
"haa": [-0.05, 0.05],
"hfe": [-0.85, -0.6],
"kfe": [-1.45, 1.72],
}
dof_vel_range = {
"haa": [0.0, 0.0],
"hfe": [0.0, 0.0],
"kfe": [0.0, 0.0],
"haa": [-0.01, 0.01],
"hfe": [-0.785398, -0.785398],
"kfe": [1.596976, 1.596976],
}
dof_vel_range = {"haa": [0.0, 0.0], "hfe": [0.0, 0.0], "kfe": [0.0, 0.0]}
root_pos_range = [
[0.0, 0.0], # x
[0.0, 0.0], # y
[0.37, 0.4], # z
[0.35, 0.35], # z
[0.0, 0.0], # roll
[0.0, 0.0], # pitch
[0.0, 0.0],
] # yaw
[0.0, 0.0], # yaw
]
root_vel_range = [
[-0.05, 0.05], # x
[-0.5, 2.0], # x
[0.0, 0.0], # y
[-0.05, 0.05], # z
[0.0, 0.0], # roll
[0.0, 0.0], # pitch
[0.0, 0.0],
] # yaw
[0.0, 0.0], # yaw
]

class control(LeggedRobotCfg.control):
# * PD Drive parameters:
stiffness = {"haa": 20.0, "hfe": 20.0, "kfe": 20.0}
damping = {"haa": 0.5, "hfe": 0.5, "kfe": 0.5}
ctrl_frequency = 100
desired_sim_frequency = 1000
desired_sim_frequency = 500

class commands:
# * time before command are changed[s]
resampling_time = 10.0
resampling_time = 3.0

class ranges:
lin_vel_x = [-1.0, 1.0] # min max [m/s]
lin_vel_x = [-2.0, 3.0] # min max [m/s]
lin_vel_y = 1.0 # max [m/s]
yaw_vel = 1 # max [rad/s]
yaw_vel = 3 # max [rad/s]

class push_robots:
toggle = False
interval_s = 1
max_push_vel_xy = 0.5
toggle = True
interval_s = 15
max_push_vel_xy = 0.05
push_box_dims = [0.3, 0.1, 0.1] # x,y,z [m]

class domain_rand:
randomize_friction = False
friction_range = [0.5, 1.25]
randomize_friction = True
friction_range = [0.5, 1.0]
randomize_base_mass = False
added_mass_range = [-1.0, 1.0]

Expand All @@ -95,16 +91,13 @@ class asset(LeggedRobotCfg.asset):
+ "mini_cheetah/urdf/mini_cheetah_simple.urdf"
)
foot_name = "foot"
penalize_contacts_on = []
terminate_after_contacts_on = ["base", "thigh"]
penalize_contacts_on = ["shank"]
terminate_after_contacts_on = ["base"]
end_effector_names = ["foot"]
# * merge bodies connected by fixed joints.
collapse_fixed_joints = False
# * 1 to disable, 0 to enable...bitwise filter
self_collisions = 1
flip_visual_attachments = False
disable_gravity = False
# * set all torques set to 0
disable_motors = False
joint_damping = 0.1
rotor_inertia = [0.002268, 0.002268, 0.005484] * 4
Expand All @@ -118,34 +111,34 @@ class reward_settings(LeggedRobotCfg.reward_settings):
tracking_sigma = 0.25

class scaling(LeggedRobotCfg.scaling):
base_ang_vel = 1.0
base_lin_vel = 1.0
commands = 1
dof_vel = 100.0 # ought to be roughly max expected speed.
base_height = 1
dof_pos = 1
base_ang_vel = 0.3
base_lin_vel = BASE_HEIGHT_REF
dof_vel = 4 * [2.0, 2.0, 4.0]
base_height = 0.3
dof_pos = 4 * [0.2, 0.3, 0.3]
dof_pos_obs = dof_pos
dof_pos_target = dof_pos
# tau_ff = 4*[18, 18, 28] # hip-abad, hip-pitch, knee
dof_pos_target = 4 * [0.2, 0.3, 0.3]
tau_ff = 4 * [18, 18, 28]
commands = [3, 1, 3]


class MiniCheetahRunnerCfg(LeggedRobotRunnerCfg):
seed = -1

class policy(LeggedRobotRunnerCfg.policy):
actor_hidden_dims = [256, 256, 256]
critic_hidden_dims = [256, 256, 256]
actor_hidden_dims = [256, 256, 128]
critic_hidden_dims = [128, 64]
# * can be elu, relu, selu, crelu, lrelu, tanh, sigmoid
activation = "elu"

actor_obs = [
"base_height",
"base_lin_vel",
"base_ang_vel",
"projected_gravity",
"commands",
"dof_pos_obs",
"dof_vel",
"dof_pos_target",
]
critic_obs = [
"base_height",
Expand All @@ -155,33 +148,38 @@ class policy(LeggedRobotRunnerCfg.policy):
"commands",
"dof_pos_obs",
"dof_vel",
"dof_pos_target",
]

actions = ["dof_pos_target"]
add_noise = False
add_noise = True

class noise:
dof_pos_obs = 0.005 # can be made very low
scale = 1.0
dof_pos_obs = 0.01
base_ang_vel = 0.01
dof_pos = 0.005
dof_vel = 0.005
base_ang_vel = 0.05
projected_gravity = 0.02
lin_vel = 0.05
ang_vel = [0.3, 0.15, 0.4]
gravity_vec = 0.1

class reward(LeggedRobotRunnerCfg.policy.reward):
class weights(LeggedRobotRunnerCfg.policy.reward.weights):
tracking_lin_vel = 5.0
tracking_ang_vel = 5.0
tracking_lin_vel = 4.0
tracking_ang_vel = 2.0
lin_vel_z = 0.0
ang_vel_xy = 0.0
ang_vel_xy = 0.01
orientation = 1.0
torques = 5.0e-7
dof_vel = 1.0
base_height = 1.0
action_rate = 0.001
action_rate2 = 0.0001
dof_vel = 0.0
min_base_height = 1.5
action_rate = 0.01
action_rate2 = 0.001
stand_still = 0.0
dof_pos_limits = 0.0
feet_contact_forces = 0.0
dof_near_home = 1.0
dof_near_home = 0.0

class termination_weight:
termination = 0.01
Expand All @@ -191,23 +189,20 @@ class algorithm(LeggedRobotRunnerCfg.algorithm):
value_loss_coef = 1.0
use_clipped_value_loss = True
clip_param = 0.2
entropy_coef = 0.01
num_learning_epochs = 6
entropy_coef = 0.02
num_learning_epochs = 4
# * mini batch size = num_envs*nsteps / nminibatches
num_mini_batches = 6
learning_rate = 1.0e-4
num_mini_batches = 8
learning_rate = 1.0e-5
schedule = "adaptive" # can be adaptive or fixed
discount_horizon = 1.0 # [s]
GAE_bootstrap_horizon = 1.0 # [s]
GAE_bootstrap_horizon = 2.0 # [s]
desired_kl = 0.01
max_grad_norm = 1.0

class runner(LeggedRobotRunnerCfg.runner):
run_name = ""
experiment_name = "mini_cheetah"
# * number of policy updates
max_iterations = 1000
max_iterations = 500
algorithm_class_name = "PPO"
# * per iteration
# * (n_steps in Rudin 2021 paper - batch_size = n_steps * n_robots)
num_steps_per_env = 24
num_steps_per_env = 32
Loading

0 comments on commit ce6efd0

Please sign in to comment.