Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implementing encoding networks using Sequential #989

Merged
merged 5 commits into from
Aug 27, 2021
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion alf/algorithms/generator_test.py
Original file line number Diff line number Diff line change
@@ -142,7 +142,7 @@ def _train():
inputs=None, loss_func=_neglogprob, batch_size=batch_size)
generator.update_with_gradient(alg_step.info)

for i in range(2000):
for i in range(2100):
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is due to input preprocessor is not copied and causes different parameter initialization. See comment for encoding network.

_train()
if functional_gradient:
weight = net._fc_layers[0].weight
8 changes: 4 additions & 4 deletions alf/algorithms/hypernetwork_algorithm_test.py
Original file line number Diff line number Diff line change
@@ -139,12 +139,12 @@ def _train(train_batch=None, entropy_regularization=None):

def _test(i, sampled_predictive=False):
print("-" * 68)
weight = algorithm._generator._net._fc_layers[0].weight
weight = algorithm._generator._net[0].weight
learned_cov = weight @ weight.t()
print("norm of generator weight: {}".format(weight.norm()))
print("norm of learned_cov: {}".format(learned_cov.norm()))

learned_mean = algorithm._generator._net._fc_layers[0].bias
learned_mean = algorithm._generator._net[0].bias
predicts = inputs @ learned_mean # [batch]
pred_err = torch.norm(predicts - targets.squeeze())
print("train_iter {}: pred err {}".format(i, pred_err))
@@ -183,10 +183,10 @@ def _test(i, sampled_predictive=False):
if i % 1000 == 0:
_test(i)

learned_mean = algorithm._generator._net._fc_layers[0].bias
learned_mean = algorithm._generator._net[0].bias
mean_err = torch.norm(learned_mean - true_mean.squeeze())
mean_err = mean_err / torch.norm(true_mean)
weight = algorithm._generator._net._fc_layers[0].weight
weight = algorithm._generator._net[0].weight
learned_cov = weight @ weight.t()
cov_err = torch.norm(learned_cov - true_cov)
cov_err = cov_err / torch.norm(true_cov)
4 changes: 2 additions & 2 deletions alf/algorithms/icm_algorithm.py
Original file line number Diff line number Diff line change
@@ -179,8 +179,8 @@ def _step(self, time_step: TimeStep, state, calc_rewards=True):
prev_feature = state

forward_pred, _ = self._forward_net(
inputs=[prev_feature.detach(),
self._encode_action(prev_action)])
[prev_feature.detach(),
self._encode_action(prev_action)])
# nn.MSELoss doesn't support reducing along a dim
forward_loss = 0.5 * torch.mean(
math_ops.square(forward_pred - feature.detach()), dim=-1)
38 changes: 26 additions & 12 deletions alf/layers.py
Original file line number Diff line number Diff line change
@@ -636,26 +636,40 @@ def __init__(self,
else:
self._bias = None

for i in range(n):
if kernel_initializer is None:
variance_scaling_init(
self._weight.data[i],
gain=kernel_init_gain,
nonlinearity=self._activation)
else:
kernel_initializer(self._weight.data[i])

if use_bias:
nn.init.constant_(self._bias.data, bias_init_value)
self._n = n
self._kernel_initializer = kernel_initializer
self._kernel_init_gain = kernel_init_gain
self._bias_init_value = bias_init_value
self._use_bias = use_bias
self._use_bn = use_bn
self._use_ln = use_ln
if use_bn:
self._bn = nn.BatchNorm1d(n * output_size)
else:
self._bn = None

if use_ln:
self._ln = nn.GroupNorm(n, n * output_size)
else:
self._ln = None
self.reset_parameters()

def reset_parameters(self):
for i in range(self._n):
if self._kernel_initializer is None:
variance_scaling_init(
self._weight.data[i],
gain=self._kernel_init_gain,
nonlinearity=self._activation)
else:
self._kernel_initializer(self._weight.data[i])

if self._use_bias:
nn.init.constant_(self._bias.data, self._bias_init_value)

if self._use_ln:
self._ln.reset_parameters()
if self._use_bn:
self._bn.reset_parameters()

def forward(self, inputs):
"""Forward
6 changes: 4 additions & 2 deletions alf/networks/actor_distribution_networks_test.py
Original file line number Diff line number Diff line change
@@ -52,12 +52,13 @@ def _init(self, lstm_hidden_size):
actor_fc_layer_params=(64, 32))
if isinstance(lstm_hidden_size, int):
lstm_hidden_size = [lstm_hidden_size]
state = []
state = [()]
for size in lstm_hidden_size:
state.append((torch.randn((
1,
size,
), dtype=torch.float32), ) * 2)
state.append(())
else:
network_ctor = ActorDistributionNetwork
state = ()
@@ -157,7 +158,8 @@ def test_mixed_actor_distributions(self, lstm_hidden_size):
if lstm_hidden_size is None:
self.assertEqual(state, ())
else:
self.assertEqual(len(state), len(lstm_hidden_size))
self.assertEqual(
len(alf.nest.flatten(state)), 2 * len(lstm_hidden_size))

def test_make_parallel(self):
obs_spec = TensorSpec((3, 20, 20), torch.float32)
3 changes: 2 additions & 1 deletion alf/networks/actor_networks_test.py
Original file line number Diff line number Diff line change
@@ -32,12 +32,13 @@ def _init(self, lstm_hidden_size):
actor_fc_layer_params=actor_fc_layer_params)
if isinstance(lstm_hidden_size, int):
lstm_hidden_size = [lstm_hidden_size]
state = []
state = [()]
for size in lstm_hidden_size:
state.append((torch.randn((
1,
size,
), dtype=torch.float32), ) * 2)
state.append(())
else:
network_ctor = actor_network.ActorNetwork
state = ()
8 changes: 6 additions & 2 deletions alf/networks/containers.py
Original file line number Diff line number Diff line change
@@ -115,8 +115,12 @@ def Sequential(*modules,


class _Sequential(Network):
def __init__(self, elements, element_dict, output, input_tensor_spec,
name):
def __init__(self,
elements=(),
element_dict={},
output='',
input_tensor_spec=None,
name='Sequential'):
state_spec = []
modules = []
inputs = []
5 changes: 3 additions & 2 deletions alf/networks/critic_networks.py
Original file line number Diff line number Diff line change
@@ -224,8 +224,9 @@ def __init__(self,
"""
super().__init__(
input_tensor_spec=critic_network.input_tensor_spec, name=name)
self._obs_encoder = critic_network._obs_encoder.make_parallel(n)
self._action_encoder = critic_network._action_encoder.make_parallel(n)
self._obs_encoder = critic_network._obs_encoder.make_parallel(n, True)
self._action_encoder = critic_network._action_encoder.make_parallel(
n, True)
self._joint_encoder = critic_network._joint_encoder.make_parallel(n)
self._output_spec = TensorSpec((n, ) +
critic_network.output_spec.shape)
3 changes: 2 additions & 1 deletion alf/networks/dynamics_networks.py
Original file line number Diff line number Diff line change
@@ -162,7 +162,8 @@ def __init__(self,
"""
super().__init__(
input_tensor_spec=dynamics_network.input_tensor_spec, name=name)
self._joint_encoder = dynamics_network._joint_encoder.make_parallel(n)
self._joint_encoder = dynamics_network._joint_encoder.make_parallel(
n, True)
self._prob = dynamics_network._prob
if self._prob:
self._projection_net = \
Loading