Source code for alf.environments.suite_dmc

# Copyright (c) 2021 Horizon Robotics and ALF Contributors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import gym

import alf
from alf.environments.dmc_gym_wrapper import DMCGYMWrapper, dm_control
from alf.environments.suite_gym import wrap_env


[docs]def is_available():
    """
    Check if the required environment is installed.
    """
    return dm_control is not None


[docs]@alf.configurable
def load(environment_name='cheetah:run',
         from_pixels=True,
         image_size=100,
         env_id=None,
         discount=1.0,
         visualize_reward=False,
         max_episode_steps=1000,
         control_timestep=None,
         gym_env_wrappers=(),
         alf_env_wrappers=()):
    """ Load a MuJoCo environment.

    For installation of DMControl, see https://github.com/deepmind/dm_control.
    For installation of MuJoCo210, see https://mujoco.org.

    Args:
        environment_name (str): this string must have the format
            "domain_name:task_name", where "domain_name" is defined by DM control as
            the physical model name, and "task_name" is an instance of the model
            with a parcular MDP structure.
        from_pixels (boolean): Output image if set to True.
        image_size (int): The height and width of the output
            image from the environment.
        env_id (int): (optional) ID of the environment.
        discount (float): Discount to use for the environment.
        visualize_reward: if True, then the rendered frame will have
            a highlighted color when the agent achieves a reward.
        max_episode_steps (int): The maximum episode step in the environment.
        control_timestep (float): the time duration between two agent actions. If
            this is greater than the agent's primitive physics timestep, then
            multiple physics simulation steps might be performed between two actions.
            The difference between multi-physics steps and "action repeats"/FrameSkip
            is that the intermediate physics step won't need to render an observation
            (which might save time if rendering is costly). However, this also
            means that unlike "action repeats"/FrameSkip which accumulates rewards
            of several repeated steps, only a single-step reward is obtained after
            all the physics simulation steps are done. The total number of
            physics simulation steps in an episode is
            ``control_timestep / physics_timestep * frame_skip * max_episode_steps``.
            If None, the default control timstep defined by DM control suite will
            be used.
        gym_env_wrappers (Iterable): Iterable with references to gym_wrappers
            classes to use directly on the gym environment.
        alf_env_wrappers (Iterable): Iterable with references to alf_wrappers
            classes to use on the ALF environment. There will be an
            AlfEnvironmentDMC2GYMWrapper added before any alf_wrappers.

    Returns:
        A wrapped AlfEnvironment
    """
    names = environment_name.split(":")
    assert len(names) == 2, (
        "environment_name must be in the format 'domain_name:task_name'!"
        f" Provided environment_name: {environment_name}")

    domain_name, task_name = names
    gym_env = DMCGYMWrapper(
        domain_name=domain_name,
        task_name=task_name,
        visualize_reward=visualize_reward,
        from_pixels=from_pixels,
        control_timestep=control_timestep,
        height=image_size,
        width=image_size)
    return wrap_env(
        gym_env,
        env_id=env_id,
        discount=discount,
        max_episode_steps=max_episode_steps,
        gym_env_wrappers=gym_env_wrappers,
        alf_env_wrappers=alf_env_wrappers,
        image_channel_first=False)