Is this a BUG of tianshou or pytorch? #701

GF-Huang · 2022-07-19T15:53:48Z

GF-Huang
Jul 19, 2022

My policy:

PPOPolicy(
  (actor): Recurrent(
    (nn): LSTM(128, 128, batch_first=True)
    (fc1): Linear(in_features=2, out_features=128, bias=True)
    (fc2): Linear(in_features=128, out_features=3, bias=True)
  )
  (critic): Net(
    (model): MLP(
      (model): Sequential(
        (0): Linear(in_features=2, out_features=128, bias=True)
        (1): ReLU()
      )
    )
  )
  (_actor_critic): ActorCritic(
    (actor): Recurrent(
      (nn): LSTM(128, 128, batch_first=True)
      (fc1): Linear(in_features=2, out_features=128, bias=True)
      (fc2): Linear(in_features=128, out_features=3, bias=True)
    )
    (critic): Net(
      (model): MLP(
        (model): Sequential(
          (0): Linear(in_features=2, out_features=128, bias=True)
          (1): ReLU()
        )
      )
    )
  )
)

Exception:

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
...
---> trainer.run()

File d:\Miniconda3\lib\site-packages\tianshou\trainer\base.py:439, in BaseTrainer.run(self)
    437 try:
    438     self.is_run = True
--> 439     deque(self, maxlen=0)  # feed the entire iterator into a zero-length deque
    440     info = gather_info(
    441         self.start_time, self.train_collector, self.test_collector,
    442         self.best_reward, self.best_reward_std
    443     )
    444 finally:

File d:\Miniconda3\lib\site-packages\tianshou\trainer\base.py:251, in BaseTrainer.__iter__(self)
    250 def __iter__(self):  # type: ignore
--> 251     self.reset()
    252     return self

File d:\Miniconda3\lib\site-packages\tianshou\trainer\base.py:236, in BaseTrainer.reset(self)
    234 assert self.episode_per_test is not None
    235 self.test_collector.reset_stat()
--> 236 test_result = test_episode(
    237     self.policy, self.test_collector, self.test_fn, self.start_epoch,
    238     self.episode_per_test, self.logger, self.env_step, self.reward_metric
    239 )
    240 self.best_epoch = self.start_epoch
    241 self.best_reward, self.best_reward_std = \
    242     test_result["rew"], test_result["rew_std"]

File d:\Miniconda3\lib\site-packages\tianshou\trainer\utils.py:27, in test_episode(policy, collector, test_fn, epoch, n_episode, logger, global_step, reward_metric)
     25 if test_fn:
     26     test_fn(epoch, global_step)
---> 27 result = collector.collect(n_episode=n_episode)
     28 if reward_metric:
     29     rew = reward_metric(result["rews"])

File d:\Miniconda3\lib\site-packages\tianshou\data\collector.py:287, in Collector.collect(self, n_step, n_episode, random, render, no_grad, gym_reset_kwargs)
    284 if no_grad:
    285     with torch.no_grad():  # faster than retain_grad version
    286         # self.data.obs will be used by agent to get result
--> 287         result = self.policy(self.data, last_state)
    288 else:
    289     result = self.policy(self.data, last_state)

File d:\Miniconda3\lib\site-packages\torch\nn\modules\module.py:1130, in Module._call_impl(self, *input, **kwargs)
   1126 # If we don't have any hooks, we want to skip the rest of the logic in
   1127 # this function, and just call forward.
   1128 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
   1129         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1130     return forward_call(*input, **kwargs)
   1131 # Do not call functions when jit is used
   1132 full_backward_hooks, non_full_backward_hooks = [], []

File d:\Miniconda3\lib\site-packages\tianshou\policy\modelfree\pg.py:112, in PGPolicy.forward(self, batch, state, **kwargs)
    110     dist = self.dist_fn(*logits)
    111 else:
--> 112     dist = self.dist_fn(logits)
    113 if self._deterministic_eval and not self.training:
    114     if self.action_type == "discrete":

File d:\Miniconda3\lib\site-packages\torch\distributions\categorical.py:64, in Categorical.__init__(self, probs, logits, validate_args)
     62 self._num_events = self._param.size()[-1]
     63 batch_shape = self._param.size()[:-1] if self._param.ndimension() > 1 else torch.Size()
---> 64 super(Categorical, self).__init__(batch_shape, validate_args=validate_args)

File d:\Miniconda3\lib\site-packages\torch\distributions\distribution.py:55, in Distribution.__init__(self, batch_shape, event_shape, validate_args)
     53         valid = constraint.check(value)
     54         if not valid.all():
---> 55             raise ValueError(
     56                 f"Expected parameter {param} "
     57                 f"({type(value).__name__} of shape {tuple(value.shape)}) "
     58                 f"of distribution {repr(self)} "
     59                 f"to satisfy the constraint {repr(constraint)}, "
     60                 f"but found invalid values:\n{value}"
     61             )
     62 super(Distribution, self).__init__()

ValueError: Expected parameter probs (Tensor of shape (24, 3)) of distribution Categorical(probs: torch.Size([24, 3])) to satisfy the constraint Simplex(), but found invalid values:
tensor([[ 0.1664,  0.2217,  0.6119],
        [ 0.1788,  0.2127,  0.6085],
        [ 0.2399,  0.1794,  0.5808],
        [ 0.2340,  0.1823,  0.5837],
        [ 0.1779,  0.2133,  0.6088],
        [-0.1317,  0.4470,  0.6847],
        [ 0.1711,  0.2180,  0.6109],
        [-0.0800,  0.3624,  0.7176],
        [ 0.1664,  0.2217,  0.6119],
        [ 0.1652,  0.2361,  0.5987],
        [ 0.1692,  0.2194,  0.6113],
        [ 0.1650,  0.2230,  0.6121],
        [ 0.1675,  0.2208,  0.6117],
        [ 0.1949,  0.2346,  0.5704],
        [ 0.1968,  0.2344,  0.5688],
        [ 0.1657,  0.2362,  0.5982],
        [ 0.1777,  0.2363,  0.5859],
        [-0.1306,  0.4481,  0.6825],
        [ 0.1691,  0.2365,  0.5945],
        [-0.1050,  0.3797,  0.7254],
        [ 0.1600,  0.2341,  0.6058],
        [ 0.1592,  0.2330,  0.6078],
        [ 0.1629,  0.2356,  0.6015],
        [-0.1306,  0.4481,  0.6825]])

The tensor.shape seems correct but pytorch raise exception:

Answered by GF-Huang

Jul 19, 2022

Sorry for my mistake, just ignore this discussion.

View full answer

GF-Huang · 2022-07-19T19:05:04Z

GF-Huang
Jul 19, 2022
Author

Sorry for my mistake, just ignore this discussion.

0 replies

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Is this a BUG of tianshou or pytorch? #701

{{title}}

{{editor}}'s edit

{{editor}}'s edit

Replies: 1 comment

{{title}}

Select a reply

Is this a BUG of tianshou or pytorch? #701

GF-Huang Jul 19, 2022

Replies: 1 comment

GF-Huang Jul 19, 2022 Author

GF-Huang
Jul 19, 2022

GF-Huang
Jul 19, 2022
Author