Source code for alf.networks.preprocessors

# Copyright (c) 2020 Horizon Robotics and ALF Contributors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""This file contains input preprocessors as stateless Networks, used for the
purpose of preprocessing input and making gin files more convenient to configure.

Example:
In your gin file, below will be possible to configure:
input1 (img) -> preprocessor1 -> embed1    ----> EncodingNetwork
input2 (action) -> preprocessor2 -> embed2   /   (with `NestCombiner`)

"""
import abc

import torch
import torch.nn as nn

import alf
from alf.tensor_specs import TensorSpec, BoundedTensorSpec
from alf.nest.utils import get_outer_rank
from alf.networks.network import Network
import alf.utils.math_ops as math_ops


[docs]@alf.configurable
class EmbeddingPreprocessor(Network):
    """A preprocessor that converts the input to an embedding vector. This can
    be used when the input is a discrete scalar, or a continuous vector to be
    projected to a different dimension (to have the same length with other
    vectors). In the former case, ``torch.nn.Embedding`` is used without any
    activation. In the latter case, an ``EncodingNetwork`` is used with the
    specified network hyperparameters.
    """

    def __init__(self,
                 input_tensor_spec,
                 embedding_dim,
                 conv_layer_params=None,
                 fc_layer_params=None,
                 activation=torch.relu_,
                 last_activation=math_ops.identity,
                 name="EmbeddingPreproc"):
        """
        Args:
            input_tensor_spec (TensorSpec): the input spec
            embedding_dim (int): output embedding size
            conv_layer_params (tuple[tuple]): a tuple of tuples where each
                tuple takes a format ``(filters, kernel_size, strides, padding)``,
                where ``padding`` is optional.
            fc_layer_params (tuple[int]): a tuple of integers representing FC
                layer sizes.
            activation (torch.nn.functional): activation of hidden layers if the
                input is a continuous vector.
            last_activation (nn.functional): activation function of the
                last layer specified by embedding_dim. ``math_ops.identity`` is
                used by default. Only used when the input is continuous.
            name (str):
        """
        super().__init__(input_tensor_spec, name=name)
        if input_tensor_spec.is_discrete:
            assert isinstance(input_tensor_spec, BoundedTensorSpec)
            N = input_tensor_spec.maximum - input_tensor_spec.minimum + 1
            # use nn.Embedding to support a large dictionary
            self._embedding_net = nn.Embedding(N, embedding_dim)
        else:
            # Only use an MLP for embedding a continuous input
            # Manually specify all arguments to avoid being overwritten by gin
            # configuration accidentally
            self._embedding_net = alf.networks.EncodingNetwork(
                input_tensor_spec=input_tensor_spec,
                input_preprocessors=None,
                preprocessing_combiner=None,
                conv_layer_params=conv_layer_params,
                fc_layer_params=fc_layer_params,
                activation=activation,
                last_layer_size=embedding_dim,
                last_activation=last_activation,
                name="preprocessor_embedding_net")

    def _preprocess(self, tensor):
        assert get_outer_rank(tensor, self._input_tensor_spec) == 1, \
            "Only supports one outer rank (batch dim)!"
        ret = self._embedding_net(tensor)
        # EncodingNetwork returns a pair
        return (ret if self._input_tensor_spec.is_discrete else ret[0])

[docs]    def forward(self, inputs, state=()):
        """Preprocess either a tensor input or a TensorSpec.

        Args:
            inputs (TensorSpec or Tensor):

        Returns:
            Tensor or TensorSpec: if ``Tensor``, the returned is the preprocessed
                result; otherwise it's the tensor spec of the result.
        """
        assert state is (), \
            "The preprocessor is assumed to be stateless currently."

        ret = self._preprocess(inputs)
        return ret, state