Source code for alf.utils.action_quantizer

# Copyright (c) 2020 Horizon Robotics and ALF Contributors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Action Quantizer."""

from absl import logging
import numpy as np
import functools

import torch

import alf


[docs]@alf.configurable class ActionQuantizer(object): def __init__(self, action_spec, sampling_method="uniform", action_bins=7, rep_mode="center"): """Quantize actions in a specified way. Args: action_spec (BoundedTensorSpec): action spec sampling_method (str): sampling space, uniform or log space: - "uniform": the original space - "log": the logarithm space action_bins (int): number of bins used for discretization rep_mode (str): the mode of representation for quantization: - "center": linspace(lb + bin-size/2, ub - bin_size/2, bin_num) - "boundary": linspace(lower_bound, upper_bound, bin_num) """ super().__init__() self._action_spec = action_spec self._action_bins = action_bins self._sampling_method = sampling_method self._rep_mode = rep_mode # action_dim: the length of the continious control vector # action_bins: the number of elements per action_dim self._action_dim = action_spec.shape[0] assert (() == action_spec.maximum.shape) and \ (() == action_spec.minimum.shape), \ "Only support scalar action maximum and minimum bound" self._upper_bound = action_spec.maximum.item() self._lower_bound = action_spec.minimum.item() # TODO: currently use the same quantization across action dims; # can make it different for different dims in the future if self._sampling_method == "uniform": if self._rep_mode == "center": bin_size = ( self._upper_bound - self._lower_bound) / self._action_bins # [lb + bin_size/2, up - bin_size/2] # center value representation LUT_BA = torch.linspace( self._lower_bound + bin_size / 2, self._upper_bound - bin_size / 2, steps=self._action_bins) elif self._rep_mode == "boundary": LUT_BA = torch.linspace( self._lower_bound, self._upper_bound, steps=self._action_bins) else: raise NotImplementedError("Unimplemented sampling method!") # look-up-table, bin-to-action mapping self._LUT_BA = LUT_BA self._bin_size = self._LUT_BA[1] - self._LUT_BA[0]
[docs] def ind_to_action(self, action_ind): action = action_ind * self._bin_size + self._LUT_BA[0] return action
[docs] def action_to_ind(self, action): bin_size = self._bin_size if self._rep_mode == "center": action_ind = torch.round((action - self._LUT_BA[0]) / bin_size) elif self._rep_mode == "boundary": action_ind = (action - self._LUT_BA[0]) // (bin_size) else: raise NotImplementedError("Unsupported representation mode!") return action_ind
@property def action_bins(self): return self._action_bins