Source code for layeredrl.policies.uniform_policy

from typing import Dict, Tuple, Optional

import torch

from .policy import Policy



[docs]
class UniformPolicy(Policy):
    """A policy that randomly samples actions from the action space."""


[docs]
    def reset(self) -> None:
        pass


    def _get_raw_action(
        self,
        mapped_env_obs: torch.Tensor,
        level_input: Optional[torch.Tensor],
        level_state: Optional[Dict],
        deterministic: bool,
    ) -> Tuple[torch.Tensor, Dict]:
        """Get a raw, untransformed action for the given observation.

        The components of the raw action lie in the range [-1, 1] for
        continuous action spaces.

        Args:
            mapped_env_obs: The observation from the environment after the
                env_obs_map has been applied.
            level_input: The input to this level, i.e., the action from the level above.
            level_state: The state of the level.
            deterministic: Whether to return a deterministic action (as opposed
                to a stochastic one).
        Returns:
            The unscaled action, and a Dict with info about the action (logits etc.)
        """
        return torch.tensor(self.action_space.sample(), device=self.device), {}