-
My policy: PPOPolicy(
(actor): Recurrent(
(nn): LSTM(128, 128, batch_first=True)
(fc1): Linear(in_features=2, out_features=128, bias=True)
(fc2): Linear(in_features=128, out_features=3, bias=True)
)
(critic): Net(
(model): MLP(
(model): Sequential(
(0): Linear(in_features=2, out_features=128, bias=True)
(1): ReLU()
)
)
)
(_actor_critic): ActorCritic(
(actor): Recurrent(
(nn): LSTM(128, 128, batch_first=True)
(fc1): Linear(in_features=2, out_features=128, bias=True)
(fc2): Linear(in_features=128, out_features=3, bias=True)
)
(critic): Net(
(model): MLP(
(model): Sequential(
(0): Linear(in_features=2, out_features=128, bias=True)
(1): ReLU()
)
)
)
)
) Exception: ---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
...
---> trainer.run()
File d:\Miniconda3\lib\site-packages\tianshou\trainer\base.py:439, in BaseTrainer.run(self)
437 try:
438 self.is_run = True
--> 439 deque(self, maxlen=0) # feed the entire iterator into a zero-length deque
440 info = gather_info(
441 self.start_time, self.train_collector, self.test_collector,
442 self.best_reward, self.best_reward_std
443 )
444 finally:
File d:\Miniconda3\lib\site-packages\tianshou\trainer\base.py:251, in BaseTrainer.__iter__(self)
250 def __iter__(self): # type: ignore
--> 251 self.reset()
252 return self
File d:\Miniconda3\lib\site-packages\tianshou\trainer\base.py:236, in BaseTrainer.reset(self)
234 assert self.episode_per_test is not None
235 self.test_collector.reset_stat()
--> 236 test_result = test_episode(
237 self.policy, self.test_collector, self.test_fn, self.start_epoch,
238 self.episode_per_test, self.logger, self.env_step, self.reward_metric
239 )
240 self.best_epoch = self.start_epoch
241 self.best_reward, self.best_reward_std = \
242 test_result["rew"], test_result["rew_std"]
File d:\Miniconda3\lib\site-packages\tianshou\trainer\utils.py:27, in test_episode(policy, collector, test_fn, epoch, n_episode, logger, global_step, reward_metric)
25 if test_fn:
26 test_fn(epoch, global_step)
---> 27 result = collector.collect(n_episode=n_episode)
28 if reward_metric:
29 rew = reward_metric(result["rews"])
File d:\Miniconda3\lib\site-packages\tianshou\data\collector.py:287, in Collector.collect(self, n_step, n_episode, random, render, no_grad, gym_reset_kwargs)
284 if no_grad:
285 with torch.no_grad(): # faster than retain_grad version
286 # self.data.obs will be used by agent to get result
--> 287 result = self.policy(self.data, last_state)
288 else:
289 result = self.policy(self.data, last_state)
File d:\Miniconda3\lib\site-packages\torch\nn\modules\module.py:1130, in Module._call_impl(self, *input, **kwargs)
1126 # If we don't have any hooks, we want to skip the rest of the logic in
1127 # this function, and just call forward.
1128 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1129 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1130 return forward_call(*input, **kwargs)
1131 # Do not call functions when jit is used
1132 full_backward_hooks, non_full_backward_hooks = [], []
File d:\Miniconda3\lib\site-packages\tianshou\policy\modelfree\pg.py:112, in PGPolicy.forward(self, batch, state, **kwargs)
110 dist = self.dist_fn(*logits)
111 else:
--> 112 dist = self.dist_fn(logits)
113 if self._deterministic_eval and not self.training:
114 if self.action_type == "discrete":
File d:\Miniconda3\lib\site-packages\torch\distributions\categorical.py:64, in Categorical.__init__(self, probs, logits, validate_args)
62 self._num_events = self._param.size()[-1]
63 batch_shape = self._param.size()[:-1] if self._param.ndimension() > 1 else torch.Size()
---> 64 super(Categorical, self).__init__(batch_shape, validate_args=validate_args)
File d:\Miniconda3\lib\site-packages\torch\distributions\distribution.py:55, in Distribution.__init__(self, batch_shape, event_shape, validate_args)
53 valid = constraint.check(value)
54 if not valid.all():
---> 55 raise ValueError(
56 f"Expected parameter {param} "
57 f"({type(value).__name__} of shape {tuple(value.shape)}) "
58 f"of distribution {repr(self)} "
59 f"to satisfy the constraint {repr(constraint)}, "
60 f"but found invalid values:\n{value}"
61 )
62 super(Distribution, self).__init__()
ValueError: Expected parameter probs (Tensor of shape (24, 3)) of distribution Categorical(probs: torch.Size([24, 3])) to satisfy the constraint Simplex(), but found invalid values:
tensor([[ 0.1664, 0.2217, 0.6119],
[ 0.1788, 0.2127, 0.6085],
[ 0.2399, 0.1794, 0.5808],
[ 0.2340, 0.1823, 0.5837],
[ 0.1779, 0.2133, 0.6088],
[-0.1317, 0.4470, 0.6847],
[ 0.1711, 0.2180, 0.6109],
[-0.0800, 0.3624, 0.7176],
[ 0.1664, 0.2217, 0.6119],
[ 0.1652, 0.2361, 0.5987],
[ 0.1692, 0.2194, 0.6113],
[ 0.1650, 0.2230, 0.6121],
[ 0.1675, 0.2208, 0.6117],
[ 0.1949, 0.2346, 0.5704],
[ 0.1968, 0.2344, 0.5688],
[ 0.1657, 0.2362, 0.5982],
[ 0.1777, 0.2363, 0.5859],
[-0.1306, 0.4481, 0.6825],
[ 0.1691, 0.2365, 0.5945],
[-0.1050, 0.3797, 0.7254],
[ 0.1600, 0.2341, 0.6058],
[ 0.1592, 0.2330, 0.6078],
[ 0.1629, 0.2356, 0.6015],
[-0.1306, 0.4481, 0.6825]]) The |
Beta Was this translation helpful? Give feedback.
Answered by
GF-Huang
Jul 19, 2022
Replies: 1 comment
-
Sorry for my mistake, just ignore this discussion. |
Beta Was this translation helpful? Give feedback.
0 replies
Answer selected by
GF-Huang
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Sorry for my mistake, just ignore this discussion.