Shortcuts

Source code for tllib.regularization.delta

"""
@author: Yifei Ji
@contact: jiyf990330@163.com
"""
import torch
import torch.nn as nn

import functools
from collections import OrderedDict


[docs]class L2Regularization(nn.Module): r"""The L2 regularization of parameters :math:`w` can be described as: .. math:: {\Omega} (w) = \dfrac{1}{2} \Vert w\Vert_2^2 , Args: model (torch.nn.Module): The model to apply L2 penalty. Shape: - Output: scalar. """ def __init__(self, model: nn.Module): super(L2Regularization, self).__init__() self.model = model def forward(self): output = 0.0 for param in self.model.parameters(): output += 0.5 * torch.norm(param) ** 2 return output
[docs]class SPRegularization(nn.Module): r""" The SP (Starting Point) regularization from `Explicit inductive bias for transfer learning with convolutional networks (ICML 2018) <https://arxiv.org/abs/1802.01483>`_ The SP regularization of parameters :math:`w` can be described as: .. math:: {\Omega} (w) = \dfrac{1}{2} \Vert w-w_0\Vert_2^2 , where :math:`w_0` is the parameter vector of the model pretrained on the source problem, acting as the starting point (SP) in fine-tuning. Args: source_model (torch.nn.Module): The source (starting point) model. target_model (torch.nn.Module): The target (fine-tuning) model. Shape: - Output: scalar. """ def __init__(self, source_model: nn.Module, target_model: nn.Module): super(SPRegularization, self).__init__() self.target_model = target_model self.source_weight = {} for name, param in source_model.named_parameters(): self.source_weight[name] = param.detach() def forward(self): output = 0.0 for name, param in self.target_model.named_parameters(): output += 0.5 * torch.norm(param - self.source_weight[name]) ** 2 return output
[docs]class BehavioralRegularization(nn.Module): r""" The behavioral regularization from `DELTA:DEep Learning Transfer using Feature Map with Attention for convolutional networks (ICLR 2019) <https://openreview.net/pdf?id=rkgbwsAcYm>`_ It can be described as: .. math:: {\Omega} (w) = \sum_{j=1}^{N} \Vert FM_j(w, \boldsymbol x)-FM_j(w^0, \boldsymbol x)\Vert_2^2 , where :math:`w^0` is the parameter vector of the model pretrained on the source problem, acting as the starting point (SP) in fine-tuning, :math:`FM_j(w, \boldsymbol x)` is feature maps generated from the :math:`j`-th layer of the model parameterized with :math:`w`, given the input :math:`\boldsymbol x`. Inputs: layer_outputs_source (OrderedDict): The dictionary for source model, where the keys are layer names and the values are feature maps correspondingly. layer_outputs_target (OrderedDict): The dictionary for target model, where the keys are layer names and the values are feature maps correspondingly. Shape: - Output: scalar. """ def __init__(self): super(BehavioralRegularization, self).__init__() def forward(self, layer_outputs_source, layer_outputs_target): output = 0.0 for fm_src, fm_tgt in zip(layer_outputs_source.values(), layer_outputs_target.values()): output += 0.5 * (torch.norm(fm_tgt - fm_src.detach()) ** 2) return output
[docs]class AttentionBehavioralRegularization(nn.Module): r""" The behavioral regularization with attention from `DELTA:DEep Learning Transfer using Feature Map with Attention for convolutional networks (ICLR 2019) <https://openreview.net/pdf?id=rkgbwsAcYm>`_ It can be described as: .. math:: {\Omega} (w) = \sum_{j=1}^{N} W_j(w) \Vert FM_j(w, \boldsymbol x)-FM_j(w^0, \boldsymbol x)\Vert_2^2 , where :math:`w^0` is the parameter vector of the model pretrained on the source problem, acting as the starting point (SP) in fine-tuning. :math:`FM_j(w, \boldsymbol x)` is feature maps generated from the :math:`j`-th layer of the model parameterized with :math:`w`, given the input :math:`\boldsymbol x`. :math:`W_j(w)` is the channel attention of the :math:`j`-th layer of the model parameterized with :math:`w`. Args: channel_attention (list): The channel attentions of feature maps generated by each selected layer. For the layer with C channels, the channel attention is a tensor of shape [C]. Inputs: layer_outputs_source (OrderedDict): The dictionary for source model, where the keys are layer names and the values are feature maps correspondingly. layer_outputs_target (OrderedDict): The dictionary for target model, where the keys are layer names and the values are feature maps correspondingly. Shape: - Output: scalar. """ def __init__(self, channel_attention): super(AttentionBehavioralRegularization, self).__init__() self.channel_attention = channel_attention def forward(self, layer_outputs_source, layer_outputs_target): output = 0.0 for i, (fm_src, fm_tgt) in enumerate(zip(layer_outputs_source.values(), layer_outputs_target.values())): b, c, h, w = fm_src.shape fm_src = fm_src.reshape(b, c, h * w) fm_tgt = fm_tgt.reshape(b, c, h * w) distance = torch.norm(fm_tgt - fm_src.detach(), 2, 2) distance = c * torch.mul(self.channel_attention[i], distance ** 2) / (h * w) output += 0.5 * torch.sum(distance) return output
def get_attribute(obj, attr, *args): def _getattr(obj, attr): return getattr(obj, attr, *args) return functools.reduce(_getattr, [obj] + attr.split('.'))
[docs]class IntermediateLayerGetter: r""" Wraps a model to get intermediate output values of selected layers. Args: model (torch.nn.Module): The model to collect intermediate layer feature maps. return_layers (list): The names of selected modules to return the output. keep_output (bool): If True, `model_output` contains the final model's output, else return None. Default: True Returns: - An OrderedDict of intermediate outputs. The keys are selected layer names in `return_layers` and the values are the feature map outputs. The order is the same as `return_layers`. - The model's final output. If `keep_output` is False, return None. """ def __init__(self, model, return_layers, keep_output=True): self._model = model self.return_layers = return_layers self.keep_output = keep_output def __call__(self, *args, **kwargs): ret = OrderedDict() handles = [] for name in self.return_layers: layer = get_attribute(self._model, name) def hook(module, input, output, name=name): ret[name] = output try: h = layer.register_forward_hook(hook) except AttributeError as e: raise AttributeError(f'Module {name} not found') handles.append(h) if self.keep_output: output = self._model(*args, **kwargs) else: self._model(*args, **kwargs) output = None for h in handles: h.remove() return ret, output

Docs

Access comprehensive documentation for Transfer Learning Library

View Docs

Tutorials

Get started for Transfer Learning Library

Get Started

Paper List

Get started for transfer learning

View Resources