# Copyright (c) 2020 Horizon Robotics and ALF Contributors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Action Quantizer."""
from absl import logging
import numpy as np
import functools
import torch
import alf
[docs]@alf.configurable
class ActionQuantizer(object):
def __init__(self,
action_spec,
sampling_method="uniform",
action_bins=7,
rep_mode="center"):
"""Quantize actions in a specified way.
Args:
action_spec (BoundedTensorSpec): action spec
sampling_method (str): sampling space, uniform or log spaceļ¼
- "uniform": the original space
- "log": the logarithm space
action_bins (int): number of bins used for discretization
rep_mode (str): the mode of representation for quantization:
- "center": linspace(lb + bin-size/2, ub - bin_size/2, bin_num)
- "boundary": linspace(lower_bound, upper_bound, bin_num)
"""
super().__init__()
self._action_spec = action_spec
self._action_bins = action_bins
self._sampling_method = sampling_method
self._rep_mode = rep_mode
# action_dim: the length of the continious control vector
# action_bins: the number of elements per action_dim
self._action_dim = action_spec.shape[0]
assert (() == action_spec.maximum.shape) and \
(() == action_spec.minimum.shape), \
"Only support scalar action maximum and minimum bound"
self._upper_bound = action_spec.maximum.item()
self._lower_bound = action_spec.minimum.item()
# TODO: currently use the same quantization across action dims;
# can make it different for different dims in the future
if self._sampling_method == "uniform":
if self._rep_mode == "center":
bin_size = (
self._upper_bound - self._lower_bound) / self._action_bins
# [lb + bin_size/2, up - bin_size/2]
# center value representation
LUT_BA = torch.linspace(
self._lower_bound + bin_size / 2,
self._upper_bound - bin_size / 2,
steps=self._action_bins)
elif self._rep_mode == "boundary":
LUT_BA = torch.linspace(
self._lower_bound,
self._upper_bound,
steps=self._action_bins)
else:
raise NotImplementedError("Unimplemented sampling method!")
# look-up-table, bin-to-action mapping
self._LUT_BA = LUT_BA
self._bin_size = self._LUT_BA[1] - self._LUT_BA[0]
[docs] def ind_to_action(self, action_ind):
action = action_ind * self._bin_size + self._LUT_BA[0]
return action
[docs] def action_to_ind(self, action):
bin_size = self._bin_size
if self._rep_mode == "center":
action_ind = torch.round((action - self._LUT_BA[0]) / bin_size)
elif self._rep_mode == "boundary":
action_ind = (action - self._LUT_BA[0]) // (bin_size)
else:
raise NotImplementedError("Unsupported representation mode!")
return action_ind
@property
def action_bins(self):
return self._action_bins