Module TeachMyAgent.students.openai_baselines.common.wrappers
Expand source code
import gym
class TimeLimit(gym.Wrapper):
def __init__(self, env, max_episode_steps=None):
super(TimeLimit, self).__init__(env)
self._max_episode_steps = max_episode_steps
self._elapsed_steps = 0
def step(self, ac):
observation, reward, done, info = self.env.step(ac)
self._elapsed_steps += 1
if self._elapsed_steps >= self._max_episode_steps:
done = True
info['TimeLimit.truncated'] = True
return observation, reward, done, info
def reset(self, **kwargs):
self._elapsed_steps = 0
return self.env.reset(**kwargs)
class ClipActionsWrapper(gym.Wrapper):
def step(self, action):
import numpy as np
action = np.nan_to_num(action)
action = np.clip(action, self.action_space.low, self.action_space.high)
return self.env.step(action)
def reset(self, **kwargs):
return self.env.reset(**kwargs)
Classes
class ClipActionsWrapper (env: gym.core.Env)
-
Wraps an environment to allow a modular transformation of the :meth:
step
and :meth:reset
methods.This class is the base class for all wrappers. The subclass could override some methods to change the behavior of the original environment without touching the original code.
Note
Don't forget to call
super().__init__(env)
if the subclass overrides :meth:__init__
.Wraps an environment to allow a modular transformation of the :meth:
step
and :meth:reset
methods.Args
env
- The environment to wrap
Expand source code
class ClipActionsWrapper(gym.Wrapper): def step(self, action): import numpy as np action = np.nan_to_num(action) action = np.clip(action, self.action_space.low, self.action_space.high) return self.env.step(action) def reset(self, **kwargs): return self.env.reset(**kwargs)
Ancestors
- gym.core.Wrapper
- gym.core.Env
- typing.Generic
Instance variables
var action_space : gym.spaces.space.Space[~ActType]
-
Returns the action space of the environment.
Expand source code
@property def action_space(self) -> spaces.Space[ActType]: """Returns the action space of the environment.""" if self._action_space is None: return self.env.action_space return self._action_space
var metadata : dict
-
Returns the environment metadata.
Expand source code
@property def metadata(self) -> dict: """Returns the environment metadata.""" if self._metadata is None: return self.env.metadata return self._metadata
var observation_space : gym.spaces.space.Space
-
Returns the observation space of the environment.
Expand source code
@property def observation_space(self) -> spaces.Space: """Returns the observation space of the environment.""" if self._observation_space is None: return self.env.observation_space return self._observation_space
var render_mode : Union[str, NoneType]
-
Returns the environment render_mode.
Expand source code
@property def render_mode(self) -> Optional[str]: """Returns the environment render_mode.""" return self.env.render_mode
var spec
-
Returns the environment specification.
Expand source code
@property def spec(self): """Returns the environment specification.""" return self.env.spec
Methods
def reset(self, **kwargs)
-
Resets the environment with kwargs.
Expand source code
def reset(self, **kwargs): return self.env.reset(**kwargs)
def step(self, action)
-
Steps through the environment with action.
Expand source code
def step(self, action): import numpy as np action = np.nan_to_num(action) action = np.clip(action, self.action_space.low, self.action_space.high) return self.env.step(action)
class TimeLimit (env, max_episode_steps=None)
-
Wraps an environment to allow a modular transformation of the :meth:
step
and :meth:reset
methods.This class is the base class for all wrappers. The subclass could override some methods to change the behavior of the original environment without touching the original code.
Note
Don't forget to call
super().__init__(env)
if the subclass overrides :meth:__init__
.Wraps an environment to allow a modular transformation of the :meth:
step
and :meth:reset
methods.Args
env
- The environment to wrap
Expand source code
class TimeLimit(gym.Wrapper): def __init__(self, env, max_episode_steps=None): super(TimeLimit, self).__init__(env) self._max_episode_steps = max_episode_steps self._elapsed_steps = 0 def step(self, ac): observation, reward, done, info = self.env.step(ac) self._elapsed_steps += 1 if self._elapsed_steps >= self._max_episode_steps: done = True info['TimeLimit.truncated'] = True return observation, reward, done, info def reset(self, **kwargs): self._elapsed_steps = 0 return self.env.reset(**kwargs)
Ancestors
- gym.core.Wrapper
- gym.core.Env
- typing.Generic
Instance variables
var action_space : gym.spaces.space.Space[~ActType]
-
Returns the action space of the environment.
Expand source code
@property def action_space(self) -> spaces.Space[ActType]: """Returns the action space of the environment.""" if self._action_space is None: return self.env.action_space return self._action_space
var metadata : dict
-
Returns the environment metadata.
Expand source code
@property def metadata(self) -> dict: """Returns the environment metadata.""" if self._metadata is None: return self.env.metadata return self._metadata
var observation_space : gym.spaces.space.Space
-
Returns the observation space of the environment.
Expand source code
@property def observation_space(self) -> spaces.Space: """Returns the observation space of the environment.""" if self._observation_space is None: return self.env.observation_space return self._observation_space
var render_mode : Union[str, NoneType]
-
Returns the environment render_mode.
Expand source code
@property def render_mode(self) -> Optional[str]: """Returns the environment render_mode.""" return self.env.render_mode
var spec
-
Returns the environment specification.
Expand source code
@property def spec(self): """Returns the environment specification.""" return self.env.spec
Methods
def reset(self, **kwargs)
-
Resets the environment with kwargs.
Expand source code
def reset(self, **kwargs): self._elapsed_steps = 0 return self.env.reset(**kwargs)
def step(self, ac)
-
Steps through the environment with action.
Expand source code
def step(self, ac): observation, reward, done, info = self.env.step(ac) self._elapsed_steps += 1 if self._elapsed_steps >= self._max_episode_steps: done = True info['TimeLimit.truncated'] = True return observation, reward, done, info