Module TeachMyAgent.students.package_to_hub

Expand source code
from huggingface_hub import HfApi, upload_folder
from huggingface_hub.repocard import metadata_eval_result, metadata_save
import tempfile
from pathlib import Path
import subprocess
import json

def package_to_hub(repo_id,
                   ta_config,
                   model_path,
                   mean_reward,
                   std_reward,
                   hyperparameters,
                   token=None
                   ):
    # # Step 1: Clone or create the repo
    repo_url = HfApi().create_repo(
        repo_id=repo_id,
        repo_type="model",
        token=token,
        private=False,
        exist_ok=True,
    )

    with tempfile.TemporaryDirectory() as tmpdirname:
        tmpdirname = Path(tmpdirname)

        # Step 2: Save the tfjs model
        subprocess.run('tensorflowjs_converter --input_format=tf_saved_model --output_node_names="parkour_walker" ' +
                       f'--saved_model_tags=serve --skip_op_check {model_path}/tf1_save {tmpdirname}',
                       shell=True, check=True)

        # Step 3: Write TeachMyAgent's config fle
        with open(tmpdirname / "ta-config.json", "w") as outfile:
            json.dump(ta_config, outfile)

        # Step 5: Generate the model card
        generated_model_card, metadata = _generate_model_card(ta_config["name"], mean_reward, std_reward, hyperparameters)
        _save_model_card(tmpdirname, generated_model_card, metadata)

        repo_url = upload_folder(
            repo_id=repo_id,
            folder_path=tmpdirname,
            path_in_repo="",
            commit_message=f"Uploading {repo_id}",
            token=token,
        )

    return repo_url

def _generate_model_card(model_name, mean_reward, std_reward, hyperparameters):
    """
    Generate the model card for the Hub
    :param model_name: name of the model
    :mean_reward: mean reward of the agent
    :std_reward: standard deviation of the mean reward of the agent
    :hyperparameters: training arguments
    """
    # Step 1: Select the tags
    metadata = generate_metadata(model_name, mean_reward, std_reward)

    # Transform the hyperparams namespace to string
    # converted_dict = vars(hyperparameters)
    converted_str = str(hyperparameters)
    converted_str = converted_str.split(", ")
    converted_str = '\n'.join(converted_str)

    # Step 2: Generate the model card
    model_card = f"""
  # Deep RL Agent Playing TeachMyAgent's parkour.
  You can find more info about TeachMyAgent [here](https://developmentalsystems.org/TeachMyAgent/).
  
  Results of our benchmark can be found in our [paper](https://arxiv.org/pdf/2103.09815.pdf).
  
  You can test this policy [here](https://huggingface.co/spaces/flowers-team/Interactive_DeepRL_Demo)
  
  ## Results
  Percentage of mastered tasks (i.e. reward >= 230) after 20 millions steps on the Parkour track. 
  
  Results shown are averages over 16 seeds along with the standard deviation for each morphology as well as the aggregation of the 48 seeds in the *Overall* column. 
  
  We highlight the best results in bold.
  
  | Algorithm     | BipedalWalker  | Fish          | Climber      | Overall       |
  |---------------|----------------|---------------|--------------|---------------|
  | Random        | 27.25 (± 10.7) | 23.6 (± 21.3) | 0.0 (± 0.0)  | 16.9 (± 18.3) |
  | ADR           | 14.7 (± 19.4)  | 5.3 (± 20.6)  | 0.0 (± 0.0)  | 6.7 (± 17.4)  |
  | ALP-GMM       | **42.7** (± 11.2)  | 36.1 (± 28.5) | 0.4 (± 1.2)  | **26.4** (± 25.7) |
  | Covar-GMM     | 35.7 (± 15.9)  | 29.9 (± 27.9) | 0.5 (± 1.9)  | 22.1 (± 24.2) |
  | GoalGAN       | 25.4 (± 24.7)  | 34.7 ± 37.0)  | 0.8 (± 2.7)  | 20.3 (± 29.5) |
  | RIAC          | 31.2 (± 8.2)   | **37.4** (± 25.4) | 0.4  (± 1.4) | 23.0 (± 22.4) |
  | SPDL          | 30.6 (± 22.8)  | 9.0 (± 24.2)  | **1.0** (± 3.4)  | 13.5 (± 23.0) |
  | Setter-Solver | 28.75 (± 20.7) | 5.1 (± 7.6)   | 0.0 (± 0.0)  | 11.3 (± 17.9) |

  # Hyperparameters
  ```python
  {converted_str}
  ```
  """
    return model_card, metadata


def generate_metadata(model_name, mean_reward, std_reward):
    """
    Define the tags for the model card
    :param model_name: name of the model
    :mean_reward: mean reward of the agent
    :std_reward: standard deviation of the mean reward of the agent
    """
    metadata = {}
    metadata["tags"] = [
        "sac",
        "deep-reinforcement-learning",
        "reinforcement-learning",
        "teach-my-agent-parkour"
    ]

    # Add metrics
    eval = metadata_eval_result(
        model_pretty_name=model_name,
        task_pretty_name="reinforcement-learning",
        task_id="reinforcement-learning",
        metrics_pretty_name="mean_reward",
        metrics_id="mean_reward",
        metrics_value=f"{mean_reward:.2f} +/- {std_reward:.2f}",
        dataset_pretty_name="teach-my-agent-parkour",
        dataset_id="teach-my-agent-parkour"
    )

    # Merges both dictionaries
    metadata = {**metadata, **eval}

    return metadata


def _save_model_card(local_path, generated_model_card, metadata):
    """Saves a model card for the repository.
    :param local_path: repository directory
    :param generated_model_card: model card generated by _generate_model_card()
    :param metadata: metadata
    """
    readme_path = local_path / "README.md"
    readme = ""
    if readme_path.exists():
        with readme_path.open("r", encoding="utf8") as f:
            readme = f.read()
    else:
        readme = generated_model_card

    with readme_path.open("w", encoding="utf-8") as f:
        f.write(readme)

    # Save our metrics to Readme metadata
    metadata_save(readme_path, metadata)

Functions

def generate_metadata(model_name, mean_reward, std_reward)

Define the tags for the model card :param model_name: name of the model :mean_reward: mean reward of the agent :std_reward: standard deviation of the mean reward of the agent

Expand source code
def generate_metadata(model_name, mean_reward, std_reward):
    """
    Define the tags for the model card
    :param model_name: name of the model
    :mean_reward: mean reward of the agent
    :std_reward: standard deviation of the mean reward of the agent
    """
    metadata = {}
    metadata["tags"] = [
        "sac",
        "deep-reinforcement-learning",
        "reinforcement-learning",
        "teach-my-agent-parkour"
    ]

    # Add metrics
    eval = metadata_eval_result(
        model_pretty_name=model_name,
        task_pretty_name="reinforcement-learning",
        task_id="reinforcement-learning",
        metrics_pretty_name="mean_reward",
        metrics_id="mean_reward",
        metrics_value=f"{mean_reward:.2f} +/- {std_reward:.2f}",
        dataset_pretty_name="teach-my-agent-parkour",
        dataset_id="teach-my-agent-parkour"
    )

    # Merges both dictionaries
    metadata = {**metadata, **eval}

    return metadata
def package_to_hub(repo_id, ta_config, model_path, mean_reward, std_reward, hyperparameters, token=None)
Expand source code
def package_to_hub(repo_id,
                   ta_config,
                   model_path,
                   mean_reward,
                   std_reward,
                   hyperparameters,
                   token=None
                   ):
    # # Step 1: Clone or create the repo
    repo_url = HfApi().create_repo(
        repo_id=repo_id,
        repo_type="model",
        token=token,
        private=False,
        exist_ok=True,
    )

    with tempfile.TemporaryDirectory() as tmpdirname:
        tmpdirname = Path(tmpdirname)

        # Step 2: Save the tfjs model
        subprocess.run('tensorflowjs_converter --input_format=tf_saved_model --output_node_names="parkour_walker" ' +
                       f'--saved_model_tags=serve --skip_op_check {model_path}/tf1_save {tmpdirname}',
                       shell=True, check=True)

        # Step 3: Write TeachMyAgent's config fle
        with open(tmpdirname / "ta-config.json", "w") as outfile:
            json.dump(ta_config, outfile)

        # Step 5: Generate the model card
        generated_model_card, metadata = _generate_model_card(ta_config["name"], mean_reward, std_reward, hyperparameters)
        _save_model_card(tmpdirname, generated_model_card, metadata)

        repo_url = upload_folder(
            repo_id=repo_id,
            folder_path=tmpdirname,
            path_in_repo="",
            commit_message=f"Uploading {repo_id}",
            token=token,
        )

    return repo_url