Source code for alf.networks.preprocessors

# Copyright (c) 2020 Horizon Robotics and ALF Contributors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""This file contains input preprocessors as stateless Networks, used for the
purpose of preprocessing input and making gin files more convenient to configure.

Example:
In your gin file, below will be possible to configure:
input1 (img) -> preprocessor1 -> embed1    ----> EncodingNetwork
input2 (action) -> preprocessor2 -> embed2   /   (with `NestCombiner`)

"""
import abc

import torch
import torch.nn as nn

import alf
from alf.tensor_specs import TensorSpec, BoundedTensorSpec
from alf.nest.utils import get_outer_rank
from alf.networks.network import Network
import alf.utils.math_ops as math_ops


[docs]@alf.configurable class EmbeddingPreprocessor(Network): """A preprocessor that converts the input to an embedding vector. This can be used when the input is a discrete scalar, or a continuous vector to be projected to a different dimension (to have the same length with other vectors). In the former case, ``torch.nn.Embedding`` is used without any activation. In the latter case, an ``EncodingNetwork`` is used with the specified network hyperparameters. """ def __init__(self, input_tensor_spec, embedding_dim, conv_layer_params=None, fc_layer_params=None, activation=torch.relu_, last_activation=math_ops.identity, name="EmbeddingPreproc"): """ Args: input_tensor_spec (TensorSpec): the input spec embedding_dim (int): output embedding size conv_layer_params (tuple[tuple]): a tuple of tuples where each tuple takes a format ``(filters, kernel_size, strides, padding)``, where ``padding`` is optional. fc_layer_params (tuple[int]): a tuple of integers representing FC layer sizes. activation (torch.nn.functional): activation of hidden layers if the input is a continuous vector. last_activation (nn.functional): activation function of the last layer specified by embedding_dim. ``math_ops.identity`` is used by default. Only used when the input is continuous. name (str): """ super().__init__(input_tensor_spec, name=name) if input_tensor_spec.is_discrete: assert isinstance(input_tensor_spec, BoundedTensorSpec) N = input_tensor_spec.maximum - input_tensor_spec.minimum + 1 # use nn.Embedding to support a large dictionary self._embedding_net = nn.Embedding(N, embedding_dim) else: # Only use an MLP for embedding a continuous input # Manually specify all arguments to avoid being overwritten by gin # configuration accidentally self._embedding_net = alf.networks.EncodingNetwork( input_tensor_spec=input_tensor_spec, input_preprocessors=None, preprocessing_combiner=None, conv_layer_params=conv_layer_params, fc_layer_params=fc_layer_params, activation=activation, last_layer_size=embedding_dim, last_activation=last_activation, name="preprocessor_embedding_net") def _preprocess(self, tensor): assert get_outer_rank(tensor, self._input_tensor_spec) == 1, \ "Only supports one outer rank (batch dim)!" ret = self._embedding_net(tensor) # EncodingNetwork returns a pair return (ret if self._input_tensor_spec.is_discrete else ret[0])
[docs] def forward(self, inputs, state=()): """Preprocess either a tensor input or a TensorSpec. Args: inputs (TensorSpec or Tensor): Returns: Tensor or TensorSpec: if ``Tensor``, the returned is the preprocessed result; otherwise it's the tensor spec of the result. """ assert state is (), \ "The preprocessor is assumed to be stateless currently." ret = self._preprocess(inputs) return ret, state