| from typing import Callable, List, Optional |
|
|
| import torch |
| from torch import Tensor |
|
|
| from .vision_transformer_utils import _log_api_usage_once |
|
|
|
|
| interpolate = torch.nn.functional.interpolate |
|
|
|
|
| |
| class FrozenBatchNorm2d(torch.nn.Module): |
| """ |
| BatchNorm2d where the batch statistics and the affine parameters are fixed |
| |
| Args: |
| num_features (int): Number of features ``C`` from an expected input of size ``(N, C, H, W)`` |
| eps (float): a value added to the denominator for numerical stability. Default: 1e-5 |
| """ |
|
|
| def __init__( |
| self, |
| num_features: int, |
| eps: float = 1e-5, |
| ): |
| super().__init__() |
| _log_api_usage_once(self) |
| self.eps = eps |
| self.register_buffer("weight", torch.ones(num_features)) |
| self.register_buffer("bias", torch.zeros(num_features)) |
| self.register_buffer("running_mean", torch.zeros(num_features)) |
| self.register_buffer("running_var", torch.ones(num_features)) |
|
|
| def _load_from_state_dict( |
| self, |
| state_dict: dict, |
| prefix: str, |
| local_metadata: dict, |
| strict: bool, |
| missing_keys: List[str], |
| unexpected_keys: List[str], |
| error_msgs: List[str], |
| ): |
| num_batches_tracked_key = prefix + "num_batches_tracked" |
| if num_batches_tracked_key in state_dict: |
| del state_dict[num_batches_tracked_key] |
|
|
| super()._load_from_state_dict( |
| state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs |
| ) |
|
|
| def forward(self, x: Tensor) -> Tensor: |
| |
| |
| w = self.weight.reshape(1, -1, 1, 1) |
| b = self.bias.reshape(1, -1, 1, 1) |
| rv = self.running_var.reshape(1, -1, 1, 1) |
| rm = self.running_mean.reshape(1, -1, 1, 1) |
| scale = w * (rv + self.eps).rsqrt() |
| bias = b - rm * scale |
| return x * scale + bias |
|
|
| def __repr__(self) -> str: |
| return f"{self.__class__.__name__}({self.weight.shape[0]}, eps={self.eps})" |
|
|
|
|
| class ConvNormActivation(torch.nn.Sequential): |
| """ |
| Configurable block used for Convolution-Normalzation-Activation blocks. |
| |
| Args: |
| in_channels (int): Number of channels in the input image |
| out_channels (int): Number of channels produced by the Convolution-Normalzation-Activation block |
| kernel_size: (int, optional): Size of the convolving kernel. Default: 3 |
| stride (int, optional): Stride of the convolution. Default: 1 |
| padding (int, tuple or str, optional): Padding added to all four sides of the input. Default: None, in wich case it will calculated as ``padding = (kernel_size - 1) // 2 * dilation`` |
| groups (int, optional): Number of blocked connections from input channels to output channels. Default: 1 |
| norm_layer (Callable[..., torch.nn.Module], optional): Norm layer that will be stacked on top of the convolutiuon layer. If ``None`` this layer wont be used. Default: ``torch.nn.BatchNorm2d`` |
| activation_layer (Callable[..., torch.nn.Module], optinal): Activation function which will be stacked on top of the normalization layer (if not None), otherwise on top of the conv layer. If ``None`` this layer wont be used. Default: ``torch.nn.ReLU`` |
| dilation (int): Spacing between kernel elements. Default: 1 |
| inplace (bool): Parameter for the activation layer, which can optionally do the operation in-place. Default ``True`` |
| bias (bool, optional): Whether to use bias in the convolution layer. By default, biases are included if ``norm_layer is None``. |
| |
| """ |
|
|
| def __init__( |
| self, |
| in_channels: int, |
| out_channels: int, |
| kernel_size: int = 3, |
| stride: int = 1, |
| padding: Optional[int] = None, |
| groups: int = 1, |
| norm_layer: Optional[Callable[..., torch.nn.Module]] = torch.nn.BatchNorm2d, |
| activation_layer: Optional[Callable[..., torch.nn.Module]] = torch.nn.ReLU, |
| dilation: int = 1, |
| inplace: Optional[bool] = True, |
| bias: Optional[bool] = None, |
| ) -> None: |
| if padding is None: |
| padding = (kernel_size - 1) // 2 * dilation |
| if bias is None: |
| bias = norm_layer is None |
| layers = [ |
| torch.nn.Conv2d( |
| in_channels, |
| out_channels, |
| kernel_size, |
| stride, |
| padding, |
| dilation=dilation, |
| groups=groups, |
| bias=bias, |
| ) |
| ] |
| if norm_layer is not None: |
| layers.append(norm_layer(out_channels)) |
| if activation_layer is not None: |
| params = {} if inplace is None else {"inplace": inplace} |
| layers.append(activation_layer(**params)) |
| super().__init__(*layers) |
| _log_api_usage_once(self) |
| self.out_channels = out_channels |
|
|
|
|
| class SqueezeExcitation(torch.nn.Module): |
| """ |
| This block implements the Squeeze-and-Excitation block from https://arxiv.org/abs/1709.01507 (see Fig. 1). |
| Parameters ``activation``, and ``scale_activation`` correspond to ``delta`` and ``sigma`` in in eq. 3. |
| |
| Args: |
| input_channels (int): Number of channels in the input image |
| squeeze_channels (int): Number of squeeze channels |
| activation (Callable[..., torch.nn.Module], optional): ``delta`` activation. Default: ``torch.nn.ReLU`` |
| scale_activation (Callable[..., torch.nn.Module]): ``sigma`` activation. Default: ``torch.nn.Sigmoid`` |
| """ |
|
|
| def __init__( |
| self, |
| input_channels: int, |
| squeeze_channels: int, |
| activation: Callable[..., torch.nn.Module] = torch.nn.ReLU, |
| scale_activation: Callable[..., torch.nn.Module] = torch.nn.Sigmoid, |
| ) -> None: |
| super().__init__() |
| _log_api_usage_once(self) |
| self.avgpool = torch.nn.AdaptiveAvgPool2d(1) |
| self.fc1 = torch.nn.Conv2d(input_channels, squeeze_channels, 1) |
| self.fc2 = torch.nn.Conv2d(squeeze_channels, input_channels, 1) |
| self.activation = activation() |
| self.scale_activation = scale_activation() |
|
|
| def _scale(self, input: Tensor) -> Tensor: |
| scale = self.avgpool(input) |
| scale = self.fc1(scale) |
| scale = self.activation(scale) |
| scale = self.fc2(scale) |
| return self.scale_activation(scale) |
|
|
| def forward(self, input: Tensor) -> Tensor: |
| scale = self._scale(input) |
| return scale * input |
|
|