Source code for alf.environments.simple.noisy_array

# Copyright (c) 2019 Horizon Robotics. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import gym
from gym import spaces
import numpy as np

import cv2


[docs]class NoisyArray(gym.Env):
    """
    A synthetic noisy array to test the agent's robustness to random noises. The
    binary array has a length of (K+M), where the subarray of length K is a
    onehot vector with 1 representing the agent's current location, and the
    remaining M bits constitute a noise vector in {0,1}^M. For example (K=5,
    M=3):

        0 0 1 0 0 | 0 1 1

    and the agent is at i==2 now.

    The agent always starts from i==0. The goal is to reach i==K-1 (it cannot
    step on the noise vector). It has three actions: LEFT, RIGHT, and FIRE. The
    FIRE action changes the noise vector into some random M bits, without
    changing the agent's position. Both LEFT and RIGHT won't change the noise
    vector.

    In the example above, if the next action is FIRE, then the resulting array
    might be

        0 0 1 0 0 | 1 1 0

    If the next action is RIGHT, then the resulting array should be:

        0 0 0 1 0 | 0 1 1

    The game ends whether the array looks like

        0 0 0 0 1 | X X X
    """
    LEFT = 0
    FIRE = 1
    RIGHT = 2

    def __init__(self, K=11, M=100, auto_noise=False):
        """
        Args:
            K (int): K-1 will be the minimum steps that take the agent from left
                to right and get a reward of 1
            M (int): the length of the noisy vector. The total observation length
                would be K+M
            auto_noise (bool): if True, the noise vector will change automatically
                at every step, and FIRE becomes "no-operation".
        """
        super().__init__()
        self.observation_space = spaces.Box(
            low=0, high=1, shape=(K + M, ), dtype=np.float32)
        self.action_space = spaces.Discrete(3)
        self._K = K
        self._M = M
        self._auto_noise = auto_noise
        self.reset()

[docs]    def reset(self):
        # reset to leftmost
        self._position = 0
        self._action = None
        self._noise_vector = np.random.randint(2, size=self._M)
        self._game_over = False
        self._obs = self._gen_observation(act=self.FIRE)[0]
        return self._obs

[docs]    def step(self, action):
        # auto_reset will be called by wrappers
        self._action = action
        self._obs, r = self._gen_observation(act=self._action)
        return self._obs, r, self._game_over, {}

[docs]    def render(self, mode="human", close=False):
        # first convert obs to an RGB array
        obs = np.copy(1 - self._obs)
        obs[self._K:] *= 0.5  # turn the noise portion to gray
        obs *= 255
        obs = obs.astype("uint8")

        grid_size = 16
        length = obs.shape[0]
        rgb_array = np.expand_dims(obs, axis=0)
        rgb_array = cv2.resize(
            rgb_array,
            dsize=(length * grid_size, grid_size),
            interpolation=cv2.INTER_NEAREST)
        rgb_array = cv2.cvtColor(rgb_array, cv2.COLOR_GRAY2RGB)

        if mode == "rgb_array":
            return rgb_array
        else:
            if self._action is not None:
                print("action: ", self._action)
            cv2.imshow("NoisyArray", rgb_array)
            cv2.waitKey(100)

    def _gen_observation(self, act):
        movement = act - 1
        # If the current position is beyond the right boundary, put the agent
        # back to the left
        self._position = max(self._position + movement, 0)
        self._position %= self._K

        self._game_over = (self._position == self._K - 1)

        reward = 1 if self._game_over else 0

        if act == self.FIRE or self._auto_noise:
            self._noise_vector = np.random.randint(2, size=self._M)

        position_array = np.zeros(self._K, dtype=np.float32)
        position_array[self._position] = 1

        observation = np.concatenate((position_array,
                                      self._noise_vector.astype(np.float32)))
        return observation, reward