Expand source code
import sys
import itertools
if __name__ == '__main__':
'''
Generate scripts to perform grid search on teachers' hyperparameters.
Defines the values to test for each hyperparameter.
'''
tuning_dict = {
"ALP-GMM": {
"fit": [50, 150, 250, 350],
"max_k": [5, 10, 15, 20],
"random_percentage": [5, 10, 20, 30]
},
"Covar-GMM": {
"fit": [50, 150, 250, 350],
"max_k": [5, 10, 15, 20],
"random_percentage": [5, 10, 20, 30]
},
"ADR": {
"min_reward_thr": [0, 50],
"max_reward_thr": [180, 230, 280],
"boundary_sampling_p": [0.3, 0.5, 0.7],
"queue_len": [10, 20],
"step_size": [0.05, 0.1]
},
"RIAC": {
"max_region_size": [50, 150, 250, 350],
"nb_split_attempts": [25, 50, 75, 100],
"min_dims_range_ratio": [0.0667, 0.1, 0.1667, 0.2],
},
"Self-Paced": {
"sp_update_offset": [100000, 200000],
"sp_update_frequency": [50000, 100000],
"alpha_offset": [0, 5, 10],
"zeta": [0.05, 0.25, 0.5],
"max_kl": [0.1, 0.8],
"use_avg_performance": [None]
},
"Setter-Solver": {
"ss_update_frequency": [50, 100, 200, 300],
"setter_loss_noise_ub": [0.005, 0.01, 0.05, 0.1],
"setter_hidden_size": [64, 128, 256, 512],
},
"GoalGAN": {
"state_noise_level": [0.01, 0.05, 0.1],
"gg_update_size": [100, 200, 300],
"p_old": [0.1, 0.2, 0.3],
"n_rollouts": [2, 5, 10],
"use_pretrained_samples": [None]
},
}
with open("hp_tuning_teachers.txt", 'w') as f:
for teacher in tuning_dict:
f.write('## {}\n'.format(teacher))
current_teacher_parameters = list(tuning_dict[teacher].keys())
current_teacher_hyperparams = tuning_dict[teacher].values()
for point in itertools.product(*current_teacher_hyperparams):
current_arguments = '--*teacher ' + teacher
for i in range(len(current_teacher_parameters)):
current_arguments += ' --*' + current_teacher_parameters[i]
current_arguments += ' ' + str(point[i]) if point[i] is not None else ''
f.write(
'--slurm_conf jeanzay_medium --nb_seeds 16 --exp_name teachers_hp_tuning --allow_expert_knowledge original '
'--test_set parametric_stumps_test_set --env parametric-continuous-stump-tracks-v0 --max_stump_h 3.0 '
'--max_obstacle_spacing 6.0 --walker_type old_classic_bipedal --student sac_v0.1.1 --backend tf1 '
'--steps_per_ep 500000 --nb_test_episode 100 --nb_env_steps 7 {} --keep_periodical_task_samples 250000\n'
.format(current_arguments)
)