Module TeachMyAgent.teachers.algos.AbstractTeacher
Expand source code
import numpy as np
class AbstractTeacher(object):
'''
Base class for ACL methods.
This will be used to sample tasks for the DeepRL student given a task space provided at the beginning of training.
'''
def __init__(self, mins, maxs, env_reward_lb, env_reward_ub, seed=None, **args):
'''
Creates an ACL teacher.
Args:
mins: Lower bounds of task space
max: Upper bounds of task space
env_reward_lb: Minimum return possible of the environment (used only if `scale_reward` is activated on the `TeacherController`)
env_reward_ub: maximum return possible of the environment (used only if `scale_reward` is activated on the `TeacherController`)
seed: Seed
**args: Additional kwargs specific to the ACL method
'''
self.seed = seed
if not seed:
self.seed = np.random.randint(42, 424242)
self.random_state = np.random.RandomState(self.seed)
# Task space boundaries
self.mins = np.array(mins)
self.maxs = np.array(maxs)
# Student's value estimator
self.value_estimator = lambda state: None
# If reward should be normalized
self.env_reward_lb = env_reward_lb
self.env_reward_ub = env_reward_ub
# Book-keeping logs
self.bk = {}
def create_dist_from_bounds(self, mins, maxs, subspace):
'''
Create a gaussian distribution from bounds (either over the whole space or only a subspace if `subspace == True`)
Args:
mins: Lower bounds of task space
max: Upper bounds of task space
subspace (bool): Whether the gaussian distribution should be over a subspace
(with mean randomly sampled and std equal to 10% of each dimension) or spread over the whole
task space
'''
if subspace:
mean = np.array([self.random_state.uniform(min, max) for min, max in zip(mins, maxs)])
variance = [(abs(max - min) * 0.1) ** 2 for min, max in zip(mins, maxs)] # std = 10 % of each dimension
else:
mean = np.array([np.mean([min, max]) for min, max in zip(mins, maxs)])
variance = [(abs(max - min) / 4)**2 for min, max in zip(mins, maxs)] # std = 0.25 * range => ~95.5% of samples are between the bounds
variance = [1e-6 if v == 0 else v for v in variance] # avoid errors with null variance
covariance = np.diag(variance)
return mean, covariance
def get_or_create_dist(self, dist_dict, mins, maxs, subspace=False):
'''
Get distribution if `dist_dict` is not None else create a new one (Gaussian).
Args:
dist_dict: Dictionary containing a gaussian distribution
mins: Lower bounds of task space
max: Upper bounds of task space
subspace (bool): Whether the gaussian distribution should be over a subspace
(with mean randomly sampled and std equal to 10% of each dimension) or spread over the whole
task space
'''
if dist_dict is not None:
dist_mean = dist_dict["mean"]
dist_variance = dist_dict["variance"]
else:
dist_mean, dist_variance = self.create_dist_from_bounds(mins, maxs, subspace)
return dist_mean, dist_variance
def rescale_task(self, task, original_space=(0, 1)):
'''
Maps a task from the n-dimensional task space towards a n-dimensional [0, 1] space.
Args:
task: Task that has to be mapped
original_space: Target space bounds
'''
return np.array([np.interp(task[i], original_space, (self.mins[i], self.maxs[i]))
for i in range(len(self.mins))])
def inverse_rescale_task(self, task, original_space=(0, 1)):
'''
Maps a task from a n-dimensional [0, 1] space towards the n-dimensional task space.
Args:
task: Task that has to be mapped
original_space: Source space bounds
'''
return np.array([np.interp(task[i], (self.mins[i], self.maxs[i]), original_space)
for i in range(len(self.mins))])
def record_initial_state(self, task, state):
'''
Record initial state of the environment given a task.
'''
pass
def episodic_update(self, task, reward, is_success):
'''
Get the episodic reward and binary success reward of a task.
'''
pass
def step_update(self, state, action, reward, next_state, done):
'''
Get step-related information.
'''
pass
def sample_task(self):
'''
Sample a new task.
'''
pass
def non_exploratory_task_sampling(self):
'''
Sample a task without exploration (used to visualize the curriculum)
'''
return {"task": self.sample_task(), "infos": None}
def is_non_exploratory_task_sampling_available(self):
'''
Whether the method above can be called.
'''
return True
def dump(self, dump_dict):
'''
Save the teacher.
Args:
dump_dict: Dictionary storing what must be saved.
'''
dump_dict.update(self.bk)
return dump_dict
Classes
class AbstractTeacher (mins, maxs, env_reward_lb, env_reward_ub, seed=None, **args)
-
Base class for ACL methods.
This will be used to sample tasks for the DeepRL student given a task space provided at the beginning of training.
Creates an ACL teacher.
Args
mins
- Lower bounds of task space
max
- Upper bounds of task space
env_reward_lb
- Minimum return possible of the environment (used only if
scale_reward
is activated on theTeacherController
) env_reward_ub
- maximum return possible of the environment (used only if
scale_reward
is activated on theTeacherController
) seed
- Seed
**args
- Additional kwargs specific to the ACL method
Expand source code
class AbstractTeacher(object): ''' Base class for ACL methods. This will be used to sample tasks for the DeepRL student given a task space provided at the beginning of training. ''' def __init__(self, mins, maxs, env_reward_lb, env_reward_ub, seed=None, **args): ''' Creates an ACL teacher. Args: mins: Lower bounds of task space max: Upper bounds of task space env_reward_lb: Minimum return possible of the environment (used only if `scale_reward` is activated on the `TeacherController`) env_reward_ub: maximum return possible of the environment (used only if `scale_reward` is activated on the `TeacherController`) seed: Seed **args: Additional kwargs specific to the ACL method ''' self.seed = seed if not seed: self.seed = np.random.randint(42, 424242) self.random_state = np.random.RandomState(self.seed) # Task space boundaries self.mins = np.array(mins) self.maxs = np.array(maxs) # Student's value estimator self.value_estimator = lambda state: None # If reward should be normalized self.env_reward_lb = env_reward_lb self.env_reward_ub = env_reward_ub # Book-keeping logs self.bk = {} def create_dist_from_bounds(self, mins, maxs, subspace): ''' Create a gaussian distribution from bounds (either over the whole space or only a subspace if `subspace == True`) Args: mins: Lower bounds of task space max: Upper bounds of task space subspace (bool): Whether the gaussian distribution should be over a subspace (with mean randomly sampled and std equal to 10% of each dimension) or spread over the whole task space ''' if subspace: mean = np.array([self.random_state.uniform(min, max) for min, max in zip(mins, maxs)]) variance = [(abs(max - min) * 0.1) ** 2 for min, max in zip(mins, maxs)] # std = 10 % of each dimension else: mean = np.array([np.mean([min, max]) for min, max in zip(mins, maxs)]) variance = [(abs(max - min) / 4)**2 for min, max in zip(mins, maxs)] # std = 0.25 * range => ~95.5% of samples are between the bounds variance = [1e-6 if v == 0 else v for v in variance] # avoid errors with null variance covariance = np.diag(variance) return mean, covariance def get_or_create_dist(self, dist_dict, mins, maxs, subspace=False): ''' Get distribution if `dist_dict` is not None else create a new one (Gaussian). Args: dist_dict: Dictionary containing a gaussian distribution mins: Lower bounds of task space max: Upper bounds of task space subspace (bool): Whether the gaussian distribution should be over a subspace (with mean randomly sampled and std equal to 10% of each dimension) or spread over the whole task space ''' if dist_dict is not None: dist_mean = dist_dict["mean"] dist_variance = dist_dict["variance"] else: dist_mean, dist_variance = self.create_dist_from_bounds(mins, maxs, subspace) return dist_mean, dist_variance def rescale_task(self, task, original_space=(0, 1)): ''' Maps a task from the n-dimensional task space towards a n-dimensional [0, 1] space. Args: task: Task that has to be mapped original_space: Target space bounds ''' return np.array([np.interp(task[i], original_space, (self.mins[i], self.maxs[i])) for i in range(len(self.mins))]) def inverse_rescale_task(self, task, original_space=(0, 1)): ''' Maps a task from a n-dimensional [0, 1] space towards the n-dimensional task space. Args: task: Task that has to be mapped original_space: Source space bounds ''' return np.array([np.interp(task[i], (self.mins[i], self.maxs[i]), original_space) for i in range(len(self.mins))]) def record_initial_state(self, task, state): ''' Record initial state of the environment given a task. ''' pass def episodic_update(self, task, reward, is_success): ''' Get the episodic reward and binary success reward of a task. ''' pass def step_update(self, state, action, reward, next_state, done): ''' Get step-related information. ''' pass def sample_task(self): ''' Sample a new task. ''' pass def non_exploratory_task_sampling(self): ''' Sample a task without exploration (used to visualize the curriculum) ''' return {"task": self.sample_task(), "infos": None} def is_non_exploratory_task_sampling_available(self): ''' Whether the method above can be called. ''' return True def dump(self, dump_dict): ''' Save the teacher. Args: dump_dict: Dictionary storing what must be saved. ''' dump_dict.update(self.bk) return dump_dict
Subclasses
Methods
def create_dist_from_bounds(self, mins, maxs, subspace)
-
Create a gaussian distribution from bounds (either over the whole space or only a subspace if
subspace == True
)Args
mins
- Lower bounds of task space
max
- Upper bounds of task space
subspace
:bool
- Whether the gaussian distribution should be over a subspace (with mean randomly sampled and std equal to 10% of each dimension) or spread over the whole task space
Expand source code
def create_dist_from_bounds(self, mins, maxs, subspace): ''' Create a gaussian distribution from bounds (either over the whole space or only a subspace if `subspace == True`) Args: mins: Lower bounds of task space max: Upper bounds of task space subspace (bool): Whether the gaussian distribution should be over a subspace (with mean randomly sampled and std equal to 10% of each dimension) or spread over the whole task space ''' if subspace: mean = np.array([self.random_state.uniform(min, max) for min, max in zip(mins, maxs)]) variance = [(abs(max - min) * 0.1) ** 2 for min, max in zip(mins, maxs)] # std = 10 % of each dimension else: mean = np.array([np.mean([min, max]) for min, max in zip(mins, maxs)]) variance = [(abs(max - min) / 4)**2 for min, max in zip(mins, maxs)] # std = 0.25 * range => ~95.5% of samples are between the bounds variance = [1e-6 if v == 0 else v for v in variance] # avoid errors with null variance covariance = np.diag(variance) return mean, covariance
def dump(self, dump_dict)
-
Save the teacher.
Args
dump_dict
- Dictionary storing what must be saved.
Expand source code
def dump(self, dump_dict): ''' Save the teacher. Args: dump_dict: Dictionary storing what must be saved. ''' dump_dict.update(self.bk) return dump_dict
def episodic_update(self, task, reward, is_success)
-
Get the episodic reward and binary success reward of a task.
Expand source code
def episodic_update(self, task, reward, is_success): ''' Get the episodic reward and binary success reward of a task. ''' pass
def get_or_create_dist(self, dist_dict, mins, maxs, subspace=False)
-
Get distribution if
dist_dict
is not None else create a new one (Gaussian).Args
dist_dict
- Dictionary containing a gaussian distribution
mins
- Lower bounds of task space
max
- Upper bounds of task space
subspace
:bool
- Whether the gaussian distribution should be over a subspace (with mean randomly sampled and std equal to 10% of each dimension) or spread over the whole task space
Expand source code
def get_or_create_dist(self, dist_dict, mins, maxs, subspace=False): ''' Get distribution if `dist_dict` is not None else create a new one (Gaussian). Args: dist_dict: Dictionary containing a gaussian distribution mins: Lower bounds of task space max: Upper bounds of task space subspace (bool): Whether the gaussian distribution should be over a subspace (with mean randomly sampled and std equal to 10% of each dimension) or spread over the whole task space ''' if dist_dict is not None: dist_mean = dist_dict["mean"] dist_variance = dist_dict["variance"] else: dist_mean, dist_variance = self.create_dist_from_bounds(mins, maxs, subspace) return dist_mean, dist_variance
def inverse_rescale_task(self, task, original_space=(0, 1))
-
Maps a task from a n-dimensional [0, 1] space towards the n-dimensional task space.
Args
task
- Task that has to be mapped
original_space
- Source space bounds
Expand source code
def inverse_rescale_task(self, task, original_space=(0, 1)): ''' Maps a task from a n-dimensional [0, 1] space towards the n-dimensional task space. Args: task: Task that has to be mapped original_space: Source space bounds ''' return np.array([np.interp(task[i], (self.mins[i], self.maxs[i]), original_space) for i in range(len(self.mins))])
def is_non_exploratory_task_sampling_available(self)
-
Whether the method above can be called.
Expand source code
def is_non_exploratory_task_sampling_available(self): ''' Whether the method above can be called. ''' return True
def non_exploratory_task_sampling(self)
-
Sample a task without exploration (used to visualize the curriculum)
Expand source code
def non_exploratory_task_sampling(self): ''' Sample a task without exploration (used to visualize the curriculum) ''' return {"task": self.sample_task(), "infos": None}
def record_initial_state(self, task, state)
-
Record initial state of the environment given a task.
Expand source code
def record_initial_state(self, task, state): ''' Record initial state of the environment given a task. ''' pass
def rescale_task(self, task, original_space=(0, 1))
-
Maps a task from the n-dimensional task space towards a n-dimensional [0, 1] space.
Args
task
- Task that has to be mapped
original_space
- Target space bounds
Expand source code
def rescale_task(self, task, original_space=(0, 1)): ''' Maps a task from the n-dimensional task space towards a n-dimensional [0, 1] space. Args: task: Task that has to be mapped original_space: Target space bounds ''' return np.array([np.interp(task[i], original_space, (self.mins[i], self.maxs[i])) for i in range(len(self.mins))])
def sample_task(self)
-
Sample a new task.
Expand source code
def sample_task(self): ''' Sample a new task. ''' pass
def step_update(self, state, action, reward, next_state, done)
-
Get step-related information.
Expand source code
def step_update(self, state, action, reward, next_state, done): ''' Get step-related information. ''' pass