Source code for alf.networks.param_networks

# Copyright (c) 2020 Horizon Robotics and ALF Contributors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Networks with input parameters."""

import functools
import torch
import torch.nn as nn

import alf
from alf.initializers import variance_scaling_init
from alf.layers import ParamFC, ParamConv2D
from alf.networks.network import Network
from alf.tensor_specs import TensorSpec
from alf.utils import common


[docs]@alf.configurable class ParamConvNet(Network): def __init__(self, input_channels, input_size, conv_layer_params, same_padding=False, activation=torch.relu_, use_bias=False, use_ln=False, n_groups=None, kernel_initializer=None, flatten_output=False, name="ParamConvNet"): """A fully 2D conv network that does not maintain its own network parameters, but accepts them from users. If the given parameter tensor has an extra batch dimension (first dimension), it performs parallel operations. Args: input_channels (int): number of channels in the input image input_size (int or tuple): the input image size (height, width) conv_layer_params (tuple[tuple]): a tuple of tuples where each tuple takes a format ``(filters, kernel_size, strides, padding, pooling_kernel)``, where ``padding`` and ``pooling_kernel`` are optional. same_padding (bool): similar to TF's conv2d ``same`` padding mode. If True, the user provided paddings in `conv_layer_params` will be replaced by automatically calculated ones; if False, it corresponds to TF's ``valid`` padding mode (the user can still provide custom paddings though) activation (torch.nn.functional): activation for all the layers use_bias (bool): whether use bias. use_ln (bool): whether use layer normalization n_groups (int): number of parallel groups, must be specified if ``use_ln`` kernel_initializer (Callable): initializer for all the layers. flatten_output (bool): If False, the output will be an image structure of shape ``(B, n, C, H, W)``; otherwise the output will be flattened into a feature of shape ``(B, n, C*H*W)``. name (str): """ input_size = common.tuplify2d(input_size) super().__init__( input_tensor_spec=TensorSpec((input_channels, ) + input_size), name=name) assert isinstance(conv_layer_params, tuple) assert len(conv_layer_params) > 0 if kernel_initializer is None: kernel_initializer = functools.partial( variance_scaling_init, mode='fan_in', distribution='truncated_normal', nonlinearity=activation) self._flatten_output = flatten_output self._conv_layer_params = conv_layer_params self._conv_layers = nn.ModuleList() self._param_length = None for paras in conv_layer_params: filters, kernel_size, strides = paras[:3] padding = paras[3] if len(paras) > 3 else 0 pooling_kernel = paras[4] if len(paras) > 4 else None if same_padding: # overwrite paddings kernel_size = common.tuplify2d(kernel_size) padding = ((kernel_size[0] - 1) // 2, (kernel_size[1] - 1) // 2) self._conv_layers.append( ParamConv2D( input_channels, filters, kernel_size, activation=activation, strides=strides, pooling_kernel=pooling_kernel, padding=padding, use_bias=use_bias, use_ln=use_ln, n_groups=n_groups, kernel_initializer=kernel_initializer)) input_channels = filters @property def param_length(self): """Get total number of parameters for all layers. """ if self._param_length is None: length = 0 for conv_l in self._conv_layers: length = length + conv_l.param_length self._param_length = length return self._param_length
[docs] def set_parameters(self, theta, reinitialize=False): """Distribute parameters to corresponding layers. Args: theta (torch.Tensor): with shape ``[D] (groups=1)`` or ``[B, D] (groups=B)`` where the meaning of the symbols are: - ``B``: batch size - ``D``: length of parameters, should be self.param_length When the shape of inputs is ``[D]``, it will be unsqueezed to ``[1, D]``. reinitialize (bool): whether to reinitialize parameters of each layer. """ if theta.ndim == 1: theta = theta.unsqueeze(0) assert (theta.ndim == 2 and theta.shape[1] == self.param_length), ( "Input theta has wrong shape %s. Expecting shape (, %d)" % self.param_length) pos = 0 for conv_l in self._conv_layers: param_length = conv_l.param_length conv_l.set_parameters( theta[:, pos:pos + param_length], reinitialize=reinitialize) pos = pos + param_length self._output_spec = None
[docs] def forward(self, inputs, state=()): """ Args: inputs (Tensor): state: not used, just keeps the interface same with other networks. """ x = inputs for conv_l in self._conv_layers[:-1]: x = conv_l(x, keep_group_dim=False) x = self._conv_layers[-1](x) if self._flatten_output: x = x.reshape(*x.shape[:-3], -1) return x, state
[docs]@alf.configurable class ParamNetwork(Network): def __init__(self, input_tensor_spec, conv_layer_params=None, fc_layer_params=None, use_conv_bias=False, use_conv_ln=False, use_fc_bias=True, use_fc_ln=False, n_groups=None, activation=torch.relu_, kernel_initializer=None, last_layer_size=None, last_activation=None, last_use_bias=True, last_use_ln=False, name="ParamNetwork"): """A network with Fc and conv2D layers that does not maintain its own network parameters, but accepts them from users. If the given parameter tensor has an extra batch dimension (first dimension), it performs parallel operations. Args: input_tensor_spec (nested TensorSpec): the (nested) tensor spec of the input. If nested, then ``preprocessing_combiner`` must not be None. conv_layer_params (tuple[tuple]): a tuple of tuples where each tuple takes a format ``(filters, kernel_size, strides, padding, pooling_kernel)``, where ``padding`` and ``pooling_kernel`` are optional. fc_layer_params (tuple[int]): a tuple of integers representing FC layer sizes. use_conv_bias (bool): whether use bias for conv layers. use_conv_ln (bool): whether use layer normalization for conv layers. use_fc_bias (bool): whether use bias for fc layers. use_fc_ln (bool): whether use layer normalization for fc layers. n_groups (int): number of parallel groups, must be specified if ``use_bn`` activation (torch.nn.functional): activation for all the layers kernel_initializer (Callable): initializer for all the layers. last_layer_size (int): an optional size of an additional layer appended at the very end. Note that if ``last_activation`` is specified, ``last_layer_size`` has to be specified explicitly. last_activation (nn.functional): activation function of the additional layer specified by ``last_layer_param``. Note that if ``last_layer_param`` is not None, ``last_activation`` has to be specified explicitly. last_use_bias (bool): whether use bias for the additional layer. last_use_fn (bool): whether use layer normalization for the additional layer. name (str): """ super().__init__(input_tensor_spec=input_tensor_spec, name=name) if kernel_initializer is None: kernel_initializer = functools.partial( variance_scaling_init, mode='fan_in', distribution='truncated_normal', nonlinearity=activation) self._param_length = None self._conv_net = None if conv_layer_params: assert isinstance(conv_layer_params, tuple), \ "The input params {} should be tuple".format(conv_layer_params) assert input_tensor_spec.ndim == 3, \ "The input shape {} should be like (C,H,W)!".format( input_tensor_spec.shape) input_channels, height, width = input_tensor_spec.shape self._conv_net = ParamConvNet( input_channels, (height, width), conv_layer_params, activation=activation, use_bias=use_conv_bias, use_ln=use_conv_ln, n_groups=n_groups, kernel_initializer=kernel_initializer, flatten_output=True) input_size = self._conv_net.output_spec.shape[-1] else: assert input_tensor_spec.ndim == 1, \ "The input shape {} should be like (N,)!".format( input_tensor_spec.shape) input_size = input_tensor_spec.shape[0] self._fc_layers = nn.ModuleList() if fc_layer_params is None: fc_layer_params = [] else: assert isinstance(fc_layer_params, tuple) fc_layer_params = list(fc_layer_params) for size in fc_layer_params: self._fc_layers.append( ParamFC( input_size, size, activation=activation, use_bias=use_fc_bias, use_ln=use_fc_ln, n_groups=n_groups, kernel_initializer=kernel_initializer)) input_size = size if last_layer_size is not None or last_activation is not None: assert last_layer_size is not None and last_activation is not None, \ "Both last_layer_param and last_activation need to be specified!" self._fc_layers.append( ParamFC( input_size, last_layer_size, activation=last_activation, use_bias=last_use_bias, use_ln=last_use_ln, n_groups=n_groups, kernel_initializer=kernel_initializer)) input_size = last_layer_size self._output_spec = TensorSpec((input_size, ), dtype=self._input_tensor_spec.dtype) @property def param_length(self): """Get total number of parameters for all layers. """ if self._param_length is None: length = 0 if self._conv_net is not None: length += self._conv_net.param_length for fc_l in self._fc_layers: length = length + fc_l.param_length self._param_length = length return self._param_length
[docs] def set_parameters(self, theta, reinitialize=False): """Distribute parameters to corresponding layers. Args: theta (torch.Tensor): with shape ``[D] (groups=1)`` or ``[B, D] (groups=B)`` where the meaning of the symbols are: - ``B``: batch size - ``D``: length of parameters, should be self.param_length When the shape of inputs is ``[D]``, it will be unsqueezed to ``[1, D]``. reinitialize (bool): whether to reinitialize parameters of each layer. """ if theta.ndim == 1: theta = theta.unsqueeze(0) assert (theta.ndim == 2 and theta.shape[1] == self.param_length), ( "Input theta has wrong shape %s. Expecting shape (, %d)" % self.param_length) if self._conv_net is not None: split = self._conv_net.param_length conv_theta = theta[:, :split] self._conv_net.set_parameters( conv_theta, reinitialize=reinitialize) fc_theta = theta[:, self._conv_net.param_length:] else: fc_theta = theta pos = 0 for fc_l in self._fc_layers: param_length = fc_l.param_length fc_l.set_parameters( fc_theta[:, pos:pos + param_length], reinitialize=reinitialize) pos = pos + param_length
[docs] def forward(self, inputs, state=()): """ Args: inputs (Tensor): state: not used, just keeps the interface same with other networks. """ x = inputs if self._conv_net is not None: x, state = self._conv_net(x, state=state) for fc_l in self._fc_layers: x = fc_l(x) return x, state