Source code for alf.networks.value_networks

# Copyright (c) 2021 Horizon Robotics and ALF Contributors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""ValueNetwork and ValueRNNNetwork."""

import functools
from typing import Callable

import torch
import torch.nn as nn

import alf
from .encoding_networks import EncodingNetwork, LSTMEncodingNetwork
from .preprocessor_networks import PreprocessorNetwork
from alf.networks import Network
from alf.tensor_specs import TensorSpec
import alf.utils.math_ops as math_ops


[docs]@alf.configurable class ValueNetworkBase(Network): """A base class for ``ValueNetwork`` and ``ValueRNNNetwork``. Can also be used to create customized value networks by providing different encoding network creators. """ def __init__(self, input_tensor_spec: alf.NestedTensorSpec, output_tensor_spec: alf.NestedTensorSpec, encoding_network_ctor: Callable, name="ValueNetworkBase", **encoder_kwargs): """ Args: input_tensor_spec: the tensor spec of the input. output_tensor_spec: spec for the value output. encoding_network_ctor: the creator of the encoding network that does the heavy lifting of the value network. name: name of the network encoder_kwargs: the extra keyword arguments to the encoding network """ super().__init__(input_tensor_spec, name=name) if encoder_kwargs.get('kernel_initializer', None) is None: encoder_kwargs[ 'kernel_initializer'] = torch.nn.init.xavier_uniform_ last_kernel_initializer = functools.partial( torch.nn.init.uniform_, a=-0.03, b=0.03) self._encoding_net = encoding_network_ctor( input_tensor_spec=input_tensor_spec, last_layer_size=output_tensor_spec.numel, last_activation=math_ops.identity, last_kernel_initializer=last_kernel_initializer, **encoder_kwargs) self._output_spec = output_tensor_spec
[docs] def forward(self, observation, state=()): """Computes a value given an observation. Args: observation (torch.Tensor): consistent with `input_tensor_spec` state: empty for API consistent with ValueRNNNetwork Returns: value (torch.Tensor): a 1D tensor state: empty """ value, state = self._encoding_net(observation, state) value = value.reshape(value.shape[0], *self._output_spec.shape) return value, state
[docs] def make_parallel(self, n): """Create a ``ParallelValueNetwork`` using ``n`` replicas of ``self``. The initialized network parameters will be different. """ return ParallelValueNetwork(self, n, "parallel_" + self._name)
@property def state_spec(self): """Return the state spec of the value network. It is simply the state spec of the encoding network.""" return self._encoding_net.state_spec
[docs]@alf.configurable class ValueNetwork(ValueNetworkBase): """Output temporally uncorrelated values.""" def __init__(self, input_tensor_spec, output_tensor_spec=TensorSpec(()), input_preprocessors=None, preprocessing_combiner=None, conv_layer_params=None, fc_layer_params=None, activation=torch.relu_, kernel_initializer=None, use_fc_bn=False, name="ValueNetwork"): """Creates a value network that estimates the expected return. Args: input_tensor_spec (TensorSpec): the tensor spec of the input output_tensor_spec (TensorSpec): spec for the output input_preprocessors (nested Network|nn.Module|None): a nest of input preprocessors, each of which will be applied to the corresponding input. If not None, then it must have the same structure with `input_tensor_spec` (after reshaping). If any element is None, then it will be treated as math_ops.identity. This arg is helpful if you want to have separate preprocessings for different inputs by configuring a gin file without changing the code. For example, embedding a discrete input before concatenating it to another continuous vector. preprocessing_combiner (NestCombiner): preprocessing called on complex inputs. Note that this combiner must also accept `input_tensor_spec` as the input to compute the processed tensor spec. For example, see `alf.nest.utils.NestConcat`. This arg is helpful if you want to combine inputs by configuring a gin file without changing the code. conv_layer_params (tuple[tuple]): a tuple of tuples where each tuple takes a format `(filters, kernel_size, strides, padding)`, where `padding` is optional. fc_layer_params (tuple[int]): a tuple of integers representing hidden FC layer sizes. activation (nn.functional): activation used for hidden layers. The last layer will not be activated. kernel_initializer (Callable): initializer for all the layers but the last layer. If none is provided a default xavier_uniform initializer will be used. use_fc_bn (bool): whether use Batch Normalization for the internal FC layers (i.e. FC layers beside the last one). name (str): """ super().__init__( input_tensor_spec, output_tensor_spec, encoding_network_ctor=EncodingNetwork, name=name, input_preprocessors=input_preprocessors, preprocessing_combiner=preprocessing_combiner, conv_layer_params=conv_layer_params, fc_layer_params=fc_layer_params, activation=activation, kernel_initializer=kernel_initializer, use_fc_bn=use_fc_bn)
[docs]class ParallelValueNetwork(Network): """Perform ``n`` value computations in parallel.""" def __init__(self, value_network: ValueNetwork, n: int, name="ParallelValueNetwork"): """ It creates a parallelized version of ``value_network``. Args: value_network (ValueNetwork): non-parallelized value network n (int): make ``n`` replicas from ``value_network`` with different initialization. name (str): """ super().__init__( input_tensor_spec=value_network.input_tensor_spec, name=name) self._encoding_net = value_network._encoding_net.make_parallel(n, True) self._output_spec = TensorSpec((n, ) + value_network.output_spec.shape)
[docs] def forward(self, observation, state=()): """Computes values given a batch of observations. Args: inputs (tuple): A tuple of Tensors consistent with `input_tensor_spec``. state (tuple): Empty for API consistent with ``ValueRNNNetwork``. """ value, state = self._encoding_net(observation, state) value = value.reshape(value.shape[0], *self._output_spec.shape) return value, state
@property def state_spec(self): """Return the state spec of the value network. It is simply the state spec of the encoding network.""" return self._encoding_net.state_spec
[docs]@alf.configurable class ValueRNNNetwork(ValueNetworkBase): """Outputs temporally correlated values.""" def __init__(self, input_tensor_spec, output_tensor_spec=TensorSpec(()), input_preprocessors=None, preprocessing_combiner=None, conv_layer_params=None, fc_layer_params=None, lstm_hidden_size=100, value_fc_layer_params=None, activation=torch.relu_, kernel_initializer=None, name="ValueRNNNetwork"): """Creates an instance of `ValueRNNNetwork`. Args: input_tensor_spec (TensorSpec): the tensor spec of the input output_tensor_spec (TensorSpec): spec for the output input_preprocessors (nested Network|nn.Module|None): a nest of input preprocessors, each of which will be applied to the corresponding input. If not None, then it must have the same structure with `input_tensor_spec` (after reshaping). If any element is None, then it will be treated as math_ops.identity. This arg is helpful if you want to have separate preprocessings for different inputs by configuring a gin file without changing the code. For example, embedding a discrete input before concatenating it to another continuous vector. preprocessing_combiner (NestCombiner): preprocessing called on complex inputs. Note that this combiner must also accept `input_tensor_spec` as the input to compute the processed tensor spec. For example, see `alf.nest.utils.NestConcat`. This arg is helpful if you want to combine inputs by configuring a gin file without changing the code. conv_layer_params (tuple[tuple]): a tuple of tuples where each tuple takes a format `(filters, kernel_size, strides, padding)`, where `padding` is optional. fc_layer_params (tuple[int]): a tuple of integers representing hidden FC layers for encoding the observation. lstm_hidden_size (int or tuple[int]): the hidden size(s) of the LSTM cell(s). Each size corresponds to a cell. If there are multiple sizes, then lstm cells are stacked. value_fc_layer_params (tuple[int]): a tuple of integers representing hidden FC layers that are applied after the lstm cell's output. activation (nn.functional): activation used for hidden layers. The last layer will not be activated. kernel_initializer (Callable): initializer for all the layers but the last layer. If none is provided a default xavier_uniform initializer will be used. name (str): """ super().__init__( input_tensor_spec=input_tensor_spec, output_tensor_spec=output_tensor_spec, encoding_network_ctor=LSTMEncodingNetwork, name=name, input_preprocessors=input_preprocessors, preprocessing_combiner=preprocessing_combiner, conv_layer_params=conv_layer_params, pre_fc_layer_params=fc_layer_params, hidden_size=lstm_hidden_size, post_fc_layer_params=value_fc_layer_params, activation=activation, kernel_initializer=kernel_initializer)