diff --git a/cv/semantic_segmentation/icnet/pytorch/model/__init__.py b/cv/semantic_segmentation/icnet/pytorch/model/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..0f4f51982c3ac6b1ea547959809a48f799c248bf --- /dev/null +++ b/cv/semantic_segmentation/icnet/pytorch/model/__init__.py @@ -0,0 +1,17 @@ +# Copyright (c) 2022, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# Copyright (c) SegmenTron. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +__all__ = [k for k in globals().keys() if not k.startswith("_")] diff --git a/cv/semantic_segmentation/icnet/pytorch/model/builder.py b/cv/semantic_segmentation/icnet/pytorch/model/builder.py index bc53306c39b2d60207f07bbb43dba90a094b6d6b..d9ddd746138ee48034b5b3a7bf2ced238ddf5e4b 100644 --- a/cv/semantic_segmentation/icnet/pytorch/model/builder.py +++ b/cv/semantic_segmentation/icnet/pytorch/model/builder.py @@ -1,6 +1,6 @@ # Copyright (c) 2022, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. # All Rights Reserved. -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright (c) SegmenTron. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -161,5 +161,4 @@ def icnet_resnet50(num_classes, **kwargs): ) cfg = build_segmentron_config(_cfg) cfg.MODEL.NUM_CLASS = num_classes - return ICNet(cfg, **kwargs) - + return ICNet(cfg, **kwargs) \ No newline at end of file diff --git a/cv/semantic_segmentation/icnet/pytorch/model/models/__init__.py b/cv/semantic_segmentation/icnet/pytorch/model/models/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..3d0438ffa4b1041cc616d4a4cb5f48c4e63720e5 --- /dev/null +++ b/cv/semantic_segmentation/icnet/pytorch/model/models/__init__.py @@ -0,0 +1,19 @@ +# Copyright (c) 2022, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""Model Zoo""" + + +from .icnet import * diff --git a/cv/semantic_segmentation/icnet/pytorch/model/models/backbones/__init__.py b/cv/semantic_segmentation/icnet/pytorch/model/models/backbones/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..3062701f259eb862a2c407efa95382876decd8d2 --- /dev/null +++ b/cv/semantic_segmentation/icnet/pytorch/model/models/backbones/__init__.py @@ -0,0 +1,6 @@ +from .build import BACKBONE_REGISTRY, get_segmentation_backbone +from .xception import * +from .mobilenet import * +from .resnet import * +from .hrnet import * +from .eespnet import * diff --git a/cv/semantic_segmentation/icnet/pytorch/model/models/backbones/build.py b/cv/semantic_segmentation/icnet/pytorch/model/models/backbones/build.py new file mode 100644 index 0000000000000000000000000000000000000000..ec1c34cbd0387df61109dd1134b7cb18457360c2 --- /dev/null +++ b/cv/semantic_segmentation/icnet/pytorch/model/models/backbones/build.py @@ -0,0 +1,52 @@ +# Copyright (c) 2022, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# Copyright (c) SegmenTron. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import os +import torch +import logging +import torch.utils.model_zoo as model_zoo + +from ...utils.registry import Registry + +BACKBONE_REGISTRY = Registry("BACKBONE") +BACKBONE_REGISTRY.__doc__ = """ +Registry for backbone, i.e. resnet. + +The registered object will be called with `obj()` +and expected to return a `nn.Module` object. +""" + +model_urls = { + 'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth', + 'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth', + 'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth', + 'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth', + 'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth', + 'resnet50c': 'https://github.com/LikeLy-Journey/SegmenTron/releases/download/v0.1.0/resnet50-25c4b509.pth', + 'resnet101c': 'https://github.com/LikeLy-Journey/SegmenTron/releases/download/v0.1.0/resnet101-2a57e44d.pth', + 'resnet152c': 'https://github.com/LikeLy-Journey/SegmenTron/releases/download/v0.1.0/resnet152-0d43d698.pth', + 'xception65': 'https://github.com/LikeLy-Journey/SegmenTron/releases/download/v0.1.0/tf-xception65-270e81cf.pth', + 'hrnet_w18_small_v1': 'https://github.com/LikeLy-Journey/SegmenTron/releases/download/v0.1.0/hrnet-w18-small-v1-08f8ae64.pth', + 'mobilenet_v2': 'https://github.com/LikeLy-Journey/SegmenTron/releases/download/v0.1.0/mobilenetV2-15498621.pth', +} + + +def get_segmentation_backbone(backbone, cfg=None, norm_layer=torch.nn.BatchNorm2d): + """ + Built the backbone model, defined by `cfg.MODEL.BACKBONE`. + """ + model = BACKBONE_REGISTRY.get(backbone)(cfg=cfg, norm_layer=norm_layer) + return model diff --git a/cv/semantic_segmentation/icnet/pytorch/model/models/backbones/eespnet.py b/cv/semantic_segmentation/icnet/pytorch/model/models/backbones/eespnet.py new file mode 100644 index 0000000000000000000000000000000000000000..603415dfc397d80fdcea94e3024cd7500e297e64 --- /dev/null +++ b/cv/semantic_segmentation/icnet/pytorch/model/models/backbones/eespnet.py @@ -0,0 +1,184 @@ +# Copyright (c) 2022, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# Copyright (c) SegmenTron. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import math +import sys + +import torch +import torch.nn as nn +import torch.nn.functional as F + +from ...modules import _ConvBNPReLU, _ConvBN, _BNPReLU, EESP +from .build import BACKBONE_REGISTRY + +__all__ = ['EESPNet', 'eespnet'] + + +def padding_channel(): + return "--padding-channel" in sys.argv + + +class DownSampler(nn.Module): + + def __init__(self, in_channels, out_channels, k=4, r_lim=9, reinf=True, inp_reinf=3, norm_layer=None): + super(DownSampler, self).__init__() + channels_diff = out_channels - in_channels + self.eesp = EESP(in_channels, channels_diff, stride=2, k=k, + r_lim=r_lim, down_method='avg', norm_layer=norm_layer) + self.avg = nn.AvgPool2d(kernel_size=3, padding=1, stride=2) + if reinf: + self.inp_reinf = nn.Sequential( + _ConvBNPReLU(inp_reinf, inp_reinf, 3, 1, 1), + _ConvBN(inp_reinf, out_channels, 1, 1)) + self.act = nn.PReLU(out_channels) + + def forward(self, x, x2=None): + avg_out = self.avg(x) + eesp_out = self.eesp(x) + output = torch.cat([avg_out, eesp_out], 1) + if x2 is not None: + w1 = avg_out.size(2) + while True: + x2 = F.avg_pool2d(x2, kernel_size=3, padding=1, stride=2) + w2 = x2.size(2) + if w2 == w1: + break + output = output + self.inp_reinf(x2) + + return self.act(output) + + +class EESPNet(nn.Module): + def __init__(self, cfg=None, num_classes=1000, scale=1, reinf=True, norm_layer=nn.BatchNorm2d): + super(EESPNet, self).__init__() + inp_reinf = 4 if padding_channel() else 3 + reps = [0, 3, 7, 3] + r_lim = [13, 11, 9, 7, 5] + K = [4] * len(r_lim) + + # set out_channels + base, levels, base_s = 32, 5, 0 + out_channels = [base] * levels + for i in range(levels): + if i == 0: + base_s = int(base * scale) + base_s = math.ceil(base_s / K[0]) * K[0] + out_channels[i] = base if base_s > base else base_s + else: + out_channels[i] = base_s * pow(2, i) + if scale <= 1.5: + out_channels.append(1024) + elif scale in [1.5, 2]: + out_channels.append(1280) + else: + raise ValueError("Unknown scale value.") + + self.level1 = _ConvBNPReLU(3, out_channels[0], 3, 2, 1, norm_layer=norm_layer) + + self.level2_0 = DownSampler(out_channels[0], out_channels[1], k=K[0], r_lim=r_lim[0], + reinf=reinf, inp_reinf=inp_reinf, norm_layer=norm_layer) + + self.level3_0 = DownSampler(out_channels[1], out_channels[2], k=K[1], r_lim=r_lim[1], + reinf=reinf, inp_reinf=inp_reinf, norm_layer=norm_layer) + self.level3 = nn.ModuleList() + for i in range(reps[1]): + self.level3.append(EESP(out_channels[2], out_channels[2], k=K[2], r_lim=r_lim[2], + norm_layer=norm_layer)) + + self.level4_0 = DownSampler(out_channels[2], out_channels[3], k=K[2], r_lim=r_lim[2], + reinf=reinf, inp_reinf=inp_reinf, norm_layer=norm_layer) + self.level4 = nn.ModuleList() + for i in range(reps[2]): + self.level4.append(EESP(out_channels[3], out_channels[3], k=K[3], r_lim=r_lim[3], + norm_layer=norm_layer)) + + self.level5_0 = DownSampler(out_channels[3], out_channels[4], k=K[3], r_lim=r_lim[3], + reinf=reinf, inp_reinf=inp_reinf, norm_layer=norm_layer) + self.level5 = nn.ModuleList() + for i in range(reps[2]): + self.level5.append(EESP(out_channels[4], out_channels[4], k=K[4], r_lim=r_lim[4], + norm_layer=norm_layer)) + + self.level5.append(_ConvBNPReLU(out_channels[4], out_channels[4], 3, 1, 1, + groups=out_channels[4], norm_layer=norm_layer)) + self.level5.append(_ConvBNPReLU(out_channels[4], out_channels[5], 1, 1, 0, + groups=K[4], norm_layer=norm_layer)) + + self.fc = nn.Linear(out_channels[5], num_classes) + + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') + if m.bias is not None: + nn.init.constant_(m.bias, 0) + elif isinstance(m, nn.BatchNorm2d): + nn.init.constant_(m.weight, 1) + nn.init.constant_(m.bias, 0) + elif isinstance(m, nn.Linear): + nn.init.normal_(m.weight, std=0.001) + if m.bias is not None: + nn.init.constant_(m.bias, 0) + + def forward(self, x, seg=True): + out_l1 = self.level1(x) + + out_l2 = self.level2_0(out_l1, x) + + out_l3_0 = self.level3_0(out_l2, x) + for i, layer in enumerate(self.level3): + if i == 0: + out_l3 = layer(out_l3_0) + else: + out_l3 = layer(out_l3) + + out_l4_0 = self.level4_0(out_l3, x) + for i, layer in enumerate(self.level4): + if i == 0: + out_l4 = layer(out_l4_0) + else: + out_l4 = layer(out_l4) + + if not seg: + out_l5_0 = self.level5_0(out_l4) # down-sampled + for i, layer in enumerate(self.level5): + if i == 0: + out_l5 = layer(out_l5_0) + else: + out_l5 = layer(out_l5) + + output_g = F.adaptive_avg_pool2d(out_l5, output_size=1) + output_g = F.dropout(output_g, p=0.2, training=self.training) + output_1x1 = output_g.view(output_g.size(0), -1) + + return self.fc(output_1x1) + return out_l1, out_l2, out_l3, out_l4 + + +@BACKBONE_REGISTRY.register() +def eespnet(cfg=None, norm_layer=nn.BatchNorm2d): + return EESPNet(cfg=cfg, norm_layer=norm_layer) + +# def eespnet(pretrained=False, **kwargs): +# model = EESPNet(**kwargs) +# if pretrained: +# raise ValueError("Don't support pretrained") +# return model + + +if __name__ == '__main__': + img = torch.randn(1, 3, 224, 224) + model = eespnet() + out = model(img) diff --git a/cv/semantic_segmentation/icnet/pytorch/model/models/backbones/hrnet.py b/cv/semantic_segmentation/icnet/pytorch/model/models/backbones/hrnet.py new file mode 100644 index 0000000000000000000000000000000000000000..fb6936b9ce55251f0f1eda9c0ba1e1a5f879f5cf --- /dev/null +++ b/cv/semantic_segmentation/icnet/pytorch/model/models/backbones/hrnet.py @@ -0,0 +1,521 @@ +# Copyright (c) 2022, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# Copyright (c) SegmenTron. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + + +# this code is heavily based on https://github.com/HRNet + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import logging + +import torch +import torch.nn as nn +import torch._utils +import numpy as np + +from .build import BACKBONE_REGISTRY + + +def conv3x3(in_planes, out_planes, stride=1): + """3x3 convolution with padding""" + return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, + padding=1, bias=False) + + +class BasicBlock(nn.Module): + expansion = 1 + + def __init__(self, inplanes, planes, stride=1, downsample=None): + super(BasicBlock, self).__init__() + self.conv1 = conv3x3(inplanes, planes, stride) + self.bn1 = nn.BatchNorm2d(planes) + self.relu = nn.ReLU(inplace=True) + self.conv2 = conv3x3(planes, planes) + self.bn2 = nn.BatchNorm2d(planes) + self.downsample = downsample + self.stride = stride + + def forward(self, x): + residual = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + + if self.downsample is not None: + residual = self.downsample(x) + + out += residual + out = self.relu(out) + + return out + + +class Bottleneck(nn.Module): + expansion = 4 + + def __init__(self, inplanes, planes, stride=1, downsample=None): + super(Bottleneck, self).__init__() + self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) + self.bn1 = nn.BatchNorm2d(planes) + self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, + padding=1, bias=False) + self.bn2 = nn.BatchNorm2d(planes) + self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1, + bias=False) + self.bn3 = nn.BatchNorm2d(planes * self.expansion) + self.relu = nn.ReLU(inplace=True) + self.downsample = downsample + self.stride = stride + + def forward(self, x): + residual = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + out = self.relu(out) + + out = self.conv3(out) + out = self.bn3(out) + + if self.downsample is not None: + residual = self.downsample(x) + + out += residual + out = self.relu(out) + + return out + + +class HighResolutionModule(nn.Module): + def __init__(self, num_branches, blocks, num_blocks, num_inchannels, + num_channels, fuse_method, multi_scale_output=True): + super(HighResolutionModule, self).__init__() + self._check_branches( + num_branches, blocks, num_blocks, num_inchannels, num_channels) + + self.num_inchannels = num_inchannels + self.fuse_method = fuse_method + self.num_branches = num_branches + + self.multi_scale_output = multi_scale_output + + self.branches = self._make_branches( + num_branches, blocks, num_blocks, num_channels) + self.fuse_layers = self._make_fuse_layers() + self.relu = nn.ReLU(False) + + def _check_branches(self, num_branches, blocks, num_blocks, + num_inchannels, num_channels): + if num_branches != len(num_blocks): + error_msg = 'NUM_BRANCHES({}) <> NUM_BLOCKS({})'.format( + num_branches, len(num_blocks)) + logging.error(error_msg) + raise ValueError(error_msg) + + if num_branches != len(num_channels): + error_msg = 'NUM_BRANCHES({}) <> NUM_CHANNELS({})'.format( + num_branches, len(num_channels)) + logging.error(error_msg) + raise ValueError(error_msg) + + if num_branches != len(num_inchannels): + error_msg = 'NUM_BRANCHES({}) <> NUM_INCHANNELS({})'.format( + num_branches, len(num_inchannels)) + logging.error(error_msg) + raise ValueError(error_msg) + + def _make_one_branch(self, branch_index, block, num_blocks, num_channels, + stride=1): + downsample = None + if stride != 1 or \ + self.num_inchannels[branch_index] != num_channels[branch_index] * block.expansion: + downsample = nn.Sequential( + nn.Conv2d(self.num_inchannels[branch_index], + num_channels[branch_index] * block.expansion, + kernel_size=1, stride=stride, bias=False), + nn.BatchNorm2d(num_channels[branch_index] * block.expansion), + ) + + layers = [] + layers.append(block(self.num_inchannels[branch_index], + num_channels[branch_index], stride, downsample)) + self.num_inchannels[branch_index] = \ + num_channels[branch_index] * block.expansion + for i in range(1, num_blocks[branch_index]): + layers.append(block(self.num_inchannels[branch_index], + num_channels[branch_index])) + + return nn.Sequential(*layers) + + def _make_branches(self, num_branches, block, num_blocks, num_channels): + branches = [] + + for i in range(num_branches): + branches.append( + self._make_one_branch(i, block, num_blocks, num_channels)) + + return nn.ModuleList(branches) + + def _make_fuse_layers(self): + if self.num_branches == 1: + return None + + num_branches = self.num_branches + num_inchannels = self.num_inchannels + fuse_layers = [] + for i in range(num_branches if self.multi_scale_output else 1): + fuse_layer = [] + for j in range(num_branches): + if j > i: + fuse_layer.append(nn.Sequential( + nn.Conv2d(num_inchannels[j], + num_inchannels[i], + 1, + 1, + 0, + bias=False), + nn.BatchNorm2d(num_inchannels[i]), + nn.Upsample(scale_factor=2 ** (j - i), mode='nearest'))) + elif j == i: + fuse_layer.append(None) + else: + conv3x3s = [] + for k in range(i - j): + if k == i - j - 1: + num_outchannels_conv3x3 = num_inchannels[i] + conv3x3s.append(nn.Sequential( + nn.Conv2d(num_inchannels[j], + num_outchannels_conv3x3, + 3, 2, 1, bias=False), + nn.BatchNorm2d(num_outchannels_conv3x3))) + else: + num_outchannels_conv3x3 = num_inchannels[j] + conv3x3s.append(nn.Sequential( + nn.Conv2d(num_inchannels[j], + num_outchannels_conv3x3, + 3, 2, 1, bias=False), + nn.BatchNorm2d(num_outchannels_conv3x3), + nn.ReLU(False))) + fuse_layer.append(nn.Sequential(*conv3x3s)) + fuse_layers.append(nn.ModuleList(fuse_layer)) + + return nn.ModuleList(fuse_layers) + + def get_num_inchannels(self): + return self.num_inchannels + + def forward(self, x): + if self.num_branches == 1: + return [self.branches[0](x[0])] + + for i in range(self.num_branches): + x[i] = self.branches[i](x[i]) + + x_fuse = [] + for i in range(len(self.fuse_layers)): + y = x[0] if i == 0 else self.fuse_layers[i][0](x[0]) + for j in range(1, self.num_branches): + if i == j: + y = y + x[j] + else: + y = y + self.fuse_layers[i][j](x[j]) + x_fuse.append(self.relu(y)) + + return x_fuse + + +blocks_dict = { + 'BASIC': BasicBlock, + 'BOTTLENECK': Bottleneck +} + + +class HighResolutionNet(nn.Module): + + def __init__(self, cfg=None, norm_layer=nn.BatchNorm2d): + super(HighResolutionNet, self).__init__() + + self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=2, padding=1, + bias=False) + self.bn1 = norm_layer(64) + self.conv2 = nn.Conv2d(64, 64, kernel_size=3, stride=2, padding=1, + bias=False) + self.bn2 = norm_layer(64) + self.relu = nn.ReLU(inplace=True) + + self.stage1_cfg = cfg.MODEL.HRNET.STAGE1 + num_channels = self.stage1_cfg['NUM_CHANNELS'][0] + block = blocks_dict[self.stage1_cfg['BLOCK']] + num_blocks = self.stage1_cfg['NUM_BLOCKS'][0] + self.layer1 = self._make_layer(block, 64, num_channels, num_blocks, norm_layer=norm_layer) + stage1_out_channel = block.expansion * num_channels + + self.stage2_cfg = cfg.MODEL.HRNET.STAGE2 + num_channels = self.stage2_cfg['NUM_CHANNELS'] + block = blocks_dict[self.stage2_cfg['BLOCK']] + num_channels = [ + num_channels[i] * block.expansion for i in range(len(num_channels))] + self.transition1 = self._make_transition_layer( + [stage1_out_channel], num_channels, norm_layer=norm_layer) + self.stage2, pre_stage_channels = self._make_stage( + self.stage2_cfg, num_channels) + + self.stage3_cfg = cfg.MODEL.HRNET.STAGE3 + num_channels = self.stage3_cfg['NUM_CHANNELS'] + block = blocks_dict[self.stage3_cfg['BLOCK']] + num_channels = [ + num_channels[i] * block.expansion for i in range(len(num_channels))] + self.transition2 = self._make_transition_layer( + pre_stage_channels, num_channels) + self.stage3, pre_stage_channels = self._make_stage( + self.stage3_cfg, num_channels) + + self.stage4_cfg = cfg.MODEL.HRNET.STAGE4 + num_channels = self.stage4_cfg['NUM_CHANNELS'] + block = blocks_dict[self.stage4_cfg['BLOCK']] + num_channels = [ + num_channels[i] * block.expansion for i in range(len(num_channels))] + self.transition3 = self._make_transition_layer( + pre_stage_channels, num_channels) + self.stage4, pre_stage_channels = self._make_stage( + self.stage4_cfg, num_channels, multi_scale_output=True) + + self.last_inp_channels = np.int(np.sum(pre_stage_channels)) + # Classification Head + # self.incre_modules, self.downsamp_modules, \ + # self.final_layer = self._make_head(pre_stage_channels) + # + # self.classifier = nn.Linear(2048, 1000) + + def _make_head(self, pre_stage_channels): + head_block = Bottleneck + head_channels = [32, 64, 128, 256] + + # Increasing the #channels on each resolution + # from C, 2C, 4C, 8C to 128, 256, 512, 1024 + incre_modules = [] + for i, channels in enumerate(pre_stage_channels): + incre_module = self._make_layer(head_block, + channels, + head_channels[i], + 1, + stride=1) + incre_modules.append(incre_module) + incre_modules = nn.ModuleList(incre_modules) + + # downsampling modules + downsamp_modules = [] + for i in range(len(pre_stage_channels) - 1): + in_channels = head_channels[i] * head_block.expansion + out_channels = head_channels[i + 1] * head_block.expansion + + downsamp_module = nn.Sequential( + nn.Conv2d(in_channels=in_channels, + out_channels=out_channels, + kernel_size=3, + stride=2, + padding=1), + nn.BatchNorm2d(out_channels), + nn.ReLU(inplace=True) + ) + + downsamp_modules.append(downsamp_module) + downsamp_modules = nn.ModuleList(downsamp_modules) + + final_layer = nn.Sequential( + nn.Conv2d( + in_channels=head_channels[3] * head_block.expansion, + out_channels=2048, + kernel_size=1, + stride=1, + padding=0 + ), + nn.BatchNorm2d(2048), + nn.ReLU(inplace=True) + ) + + return incre_modules, downsamp_modules, final_layer + + def _make_transition_layer(self, num_channels_pre_layer, num_channels_cur_layer, + norm_layer=nn.BatchNorm2d): + num_branches_cur = len(num_channels_cur_layer) + num_branches_pre = len(num_channels_pre_layer) + + transition_layers = [] + for i in range(num_branches_cur): + if i < num_branches_pre: + if num_channels_cur_layer[i] != num_channels_pre_layer[i]: + transition_layers.append(nn.Sequential( + nn.Conv2d(num_channels_pre_layer[i], + num_channels_cur_layer[i], + 3, + 1, + 1, + bias=False), + norm_layer( + num_channels_cur_layer[i]), + nn.ReLU(inplace=True))) + else: + transition_layers.append(None) + else: + conv3x3s = [] + for j in range(i + 1 - num_branches_pre): + inchannels = num_channels_pre_layer[-1] + outchannels = num_channels_cur_layer[i] \ + if j == i - num_branches_pre else inchannels + conv3x3s.append(nn.Sequential( + nn.Conv2d( + inchannels, outchannels, 3, 2, 1, bias=False), + norm_layer(outchannels), + nn.ReLU(inplace=True))) + transition_layers.append(nn.Sequential(*conv3x3s)) + + return nn.ModuleList(transition_layers) + + def _make_layer(self, block, inplanes, planes, blocks, stride=1, norm_layer=nn.BatchNorm2d): + downsample = None + if stride != 1 or inplanes != planes * block.expansion: + downsample = nn.Sequential( + nn.Conv2d(inplanes, planes * block.expansion, + kernel_size=1, stride=stride, bias=False), + norm_layer(planes * block.expansion), + ) + + layers = [] + layers.append(block(inplanes, planes, stride, downsample)) + inplanes = planes * block.expansion + for i in range(1, blocks): + layers.append(block(inplanes, planes)) + + return nn.Sequential(*layers) + + def _make_stage(self, layer_config, num_inchannels, + multi_scale_output=True): + num_modules = layer_config['NUM_MODULES'] + num_branches = layer_config['NUM_BRANCHES'] + num_blocks = layer_config['NUM_BLOCKS'] + num_channels = layer_config['NUM_CHANNELS'] + block = blocks_dict[layer_config['BLOCK']] + fuse_method = layer_config['FUSE_METHOD'] + + modules = [] + for i in range(num_modules): + # multi_scale_output is only used last module + if not multi_scale_output and i == num_modules - 1: + reset_multi_scale_output = False + else: + reset_multi_scale_output = True + + modules.append( + HighResolutionModule(num_branches, + block, + num_blocks, + num_inchannels, + num_channels, + fuse_method, + reset_multi_scale_output) + ) + num_inchannels = modules[-1].get_num_inchannels() + + return nn.Sequential(*modules), num_inchannels + + def forward(self, x): + x = self.conv1(x) + x = self.bn1(x) + x = self.relu(x) + x = self.conv2(x) + x = self.bn2(x) + x = self.relu(x) + x = self.layer1(x) + + x_list = [] + for i in range(self.stage2_cfg['NUM_BRANCHES']): + if self.transition1[i] is not None: + x_list.append(self.transition1[i](x)) + else: + x_list.append(x) + y_list = self.stage2(x_list) + + x_list = [] + for i in range(self.stage3_cfg['NUM_BRANCHES']): + if self.transition2[i] is not None: + x_list.append(self.transition2[i](y_list[-1])) + else: + x_list.append(y_list[i]) + y_list = self.stage3(x_list) + + x_list = [] + for i in range(self.stage4_cfg['NUM_BRANCHES']): + if self.transition3[i] is not None: + x_list.append(self.transition3[i](y_list[-1])) + else: + x_list.append(y_list[i]) + y_list = self.stage4(x_list) + + # Classification Head + # y = self.incre_modules[0](y_list[0]) + # for i in range(len(self.downsamp_modules)): + # y = self.incre_modules[i + 1](y_list[i + 1]) + \ + # self.downsamp_modules[i](y) + # + # y = self.final_layer(y) + # + # if torch._C._get_tracing_state(): + # y = y.flatten(start_dim=2).mean(dim=2) + # else: + # y = F.avg_pool2d(y, kernel_size=y.size() + # [2:]).view(y.size(0), -1) + # + # y = self.classifier(y) + + return tuple(y_list) + + def init_weights(self, pretrained='', ): + logging.info('=> init weights from normal distribution') + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_( + m.weight, mode='fan_out', nonlinearity='relu') + elif isinstance(m, nn.BatchNorm2d): + nn.init.constant_(m.weight, 1) + nn.init.constant_(m.bias, 0) + if os.path.isfile(pretrained): + pretrained_dict = torch.load(pretrained) + logging.info('=> loading pretrained model {}'.format(pretrained)) + model_dict = self.state_dict() + pretrained_dict = {k: v for k, v in pretrained_dict.items() + if k in model_dict.keys()} + for k, _ in pretrained_dict.items(): + logging.info( + '=> loading {} pretrained model {}'.format(k, pretrained)) + model_dict.update(pretrained_dict) + self.load_state_dict(model_dict) + + +@BACKBONE_REGISTRY.register() +def hrnetv1_w18_small(cfg=None, norm_layer=nn.BatchNorm2d): + return HighResolutionNet(cfg=cfg, norm_layer=norm_layer) diff --git a/cv/semantic_segmentation/icnet/pytorch/model/models/backbones/mobilenet.py b/cv/semantic_segmentation/icnet/pytorch/model/models/backbones/mobilenet.py new file mode 100644 index 0000000000000000000000000000000000000000..3541a4cde7fb57ca60ba60ef98855962df473c5c --- /dev/null +++ b/cv/semantic_segmentation/icnet/pytorch/model/models/backbones/mobilenet.py @@ -0,0 +1,168 @@ +# Copyright (c) 2022, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# Copyright (c) SegmenTron. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""MobileNet and MobileNetV2.""" +import torch.nn as nn + +from .build import BACKBONE_REGISTRY +from ...modules import _ConvBNReLU, _DepthwiseConv, InvertedResidual + +__all__ = ['MobileNet', 'MobileNetV2'] + + +class MobileNet(nn.Module): + def __init__(self, cfg=None, num_classes=1000, norm_layer=nn.BatchNorm2d): + super(MobileNet, self).__init__() + multiplier = cfg.MODEL.BACKBONE_SCALE + conv_dw_setting = [ + [64, 1, 1], + [128, 2, 2], + [256, 2, 2], + [512, 6, 2], + [1024, 2, 2]] + input_channels = int(32 * multiplier) if multiplier > 1.0 else 32 + features = [_ConvBNReLU(3, input_channels, 3, 2, 1, norm_layer=norm_layer)] + + for c, n, s in conv_dw_setting: + out_channels = int(c * multiplier) + for i in range(n): + stride = s if i == 0 else 1 + features.append(_DepthwiseConv(input_channels, out_channels, stride, norm_layer)) + input_channels = out_channels + self.last_inp_channels = int(1024 * multiplier) + features.append(nn.AdaptiveAvgPool2d(1)) + self.features = nn.Sequential(*features) + + self.classifier = nn.Linear(int(1024 * multiplier), num_classes) + + # weight initialization + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_(m.weight, mode='fan_out') + if m.bias is not None: + nn.init.zeros_(m.bias) + elif isinstance(m, nn.BatchNorm2d): + nn.init.ones_(m.weight) + nn.init.zeros_(m.bias) + elif isinstance(m, nn.Linear): + nn.init.normal_(m.weight, 0, 0.01) + nn.init.zeros_(m.bias) + + def forward(self, x): + x = self.features(x) + x = self.classifier(x.view(x.size(0), x.size(1))) + return x + + +class MobileNetV2(nn.Module): + def __init__(self, cfg=None, num_classes=1000, norm_layer=nn.BatchNorm2d): + super(MobileNetV2, self).__init__() + output_stride = cfg.MODEL.OUTPUT_STRIDE + self.multiplier = cfg.MODEL.BACKBONE_SCALE + if output_stride == 32: + dilations = [1, 1] + elif output_stride == 16: + dilations = [1, 2] + elif output_stride == 8: + dilations = [2, 4] + else: + raise NotImplementedError + inverted_residual_setting = [ + # t, c, n, s + [1, 16, 1, 1], + [6, 24, 2, 2], + [6, 32, 3, 2], + [6, 64, 4, 2], + [6, 96, 3, 1], + [6, 160, 3, 2], + [6, 320, 1, 1]] + # building first layer + input_channels = int(32 * self.multiplier) if self.multiplier > 1.0 else 32 + # last_channels = int(1280 * multiplier) if multiplier > 1.0 else 1280 + self.conv1 = _ConvBNReLU(3, input_channels, 3, 2, 1, relu6=True, norm_layer=norm_layer) + + # building inverted residual blocks + self.planes = input_channels + self.block1 = self._make_layer(InvertedResidual, self.planes, inverted_residual_setting[0:1], + norm_layer=norm_layer) + self.block2 = self._make_layer(InvertedResidual, self.planes, inverted_residual_setting[1:2], + norm_layer=norm_layer) + self.block3 = self._make_layer(InvertedResidual, self.planes, inverted_residual_setting[2:3], + norm_layer=norm_layer) + self.block4 = self._make_layer(InvertedResidual, self.planes, inverted_residual_setting[3:5], + dilations[0], norm_layer=norm_layer) + self.block5 = self._make_layer(InvertedResidual, self.planes, inverted_residual_setting[5:], + dilations[1], norm_layer=norm_layer) + self.last_inp_channels = self.planes + + # building last several layers + # features = list() + # features.append(_ConvBNReLU(input_channels, last_channels, 1, relu6=True, norm_layer=norm_layer)) + # features.append(nn.AdaptiveAvgPool2d(1)) + # self.features = nn.Sequential(*features) + # + # self.classifier = nn.Sequential( + # nn.Dropout2d(0.2), + # nn.Linear(last_channels, num_classes)) + + # weight initialization + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_(m.weight, mode='fan_out') + if m.bias is not None: + nn.init.zeros_(m.bias) + elif isinstance(m, nn.BatchNorm2d): + nn.init.ones_(m.weight) + nn.init.zeros_(m.bias) + elif isinstance(m, nn.Linear): + nn.init.normal_(m.weight, 0, 0.01) + if m.bias is not None: + nn.init.zeros_(m.bias) + + def _make_layer(self, block, planes, inverted_residual_setting, dilation=1, norm_layer=nn.BatchNorm2d): + features = list() + for t, c, n, s in inverted_residual_setting: + out_channels = int(c * self.multiplier) + stride = s if dilation == 1 else 1 + features.append(block(planes, out_channels, stride, t, dilation, norm_layer)) + planes = out_channels + for i in range(n - 1): + features.append(block(planes, out_channels, 1, t, norm_layer=norm_layer)) + planes = out_channels + self.planes = planes + return nn.Sequential(*features) + + def forward(self, x): + x = self.conv1(x) + x = self.block1(x) + c1 = self.block2(x) + c2 = self.block3(c1) + c3 = self.block4(c2) + c4 = self.block5(c3) + + # x = self.features(x) + # x = self.classifier(x.view(x.size(0), x.size(1))) + return c1, c2, c3, c4 + + +@BACKBONE_REGISTRY.register() +def mobilenet_v1(cfg=None, norm_layer=nn.BatchNorm2d): + return MobileNet(cfg=cfg, norm_layer=norm_layer) + + +@BACKBONE_REGISTRY.register() +def mobilenet_v2(cfg=None, norm_layer=nn.BatchNorm2d): + return MobileNetV2(cfg=cfg, norm_layer=norm_layer) diff --git a/cv/semantic_segmentation/icnet/pytorch/model/models/backbones/resnet.py b/cv/semantic_segmentation/icnet/pytorch/model/models/backbones/resnet.py new file mode 100644 index 0000000000000000000000000000000000000000..b62d8dfda9ba7dceaa36f25ffbe24cbbba11b56b --- /dev/null +++ b/cv/semantic_segmentation/icnet/pytorch/model/models/backbones/resnet.py @@ -0,0 +1,263 @@ +# Copyright (c) 2022, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# Copyright (c) SegmenTron. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + + +import torch.nn as nn + +from .build import BACKBONE_REGISTRY + +__all__ = ['ResNetV1'] + + +class BasicBlockV1b(nn.Module): + expansion = 1 + + def __init__(self, inplanes, planes, stride=1, dilation=1, downsample=None, + previous_dilation=1, norm_layer=nn.BatchNorm2d): + super(BasicBlockV1b, self).__init__() + self.conv1 = nn.Conv2d(inplanes, planes, 3, stride, + dilation, dilation, bias=False) + self.bn1 = norm_layer(planes) + self.relu = nn.ReLU(True) + self.conv2 = nn.Conv2d(planes, planes, 3, 1, previous_dilation, + dilation=previous_dilation, bias=False) + self.bn2 = norm_layer(planes) + self.downsample = downsample + self.stride = stride + + def forward(self, x): + identity = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + + if self.downsample is not None: + identity = self.downsample(x) + + out += identity + out = self.relu(out) + + return out + + +class BottleneckV1b(nn.Module): + expansion = 4 + + def __init__(self, inplanes, planes, stride=1, dilation=1, downsample=None, + previous_dilation=1, norm_layer=nn.BatchNorm2d): + super(BottleneckV1b, self).__init__() + self.conv1 = nn.Conv2d(inplanes, planes, 1, bias=False) + self.bn1 = norm_layer(planes) + self.conv2 = nn.Conv2d(planes, planes, 3, stride, + dilation, dilation, bias=False) + self.bn2 = norm_layer(planes) + self.conv3 = nn.Conv2d(planes, planes * self.expansion, 1, bias=False) + self.bn3 = norm_layer(planes * self.expansion) + self.relu = nn.ReLU(True) + self.downsample = downsample + self.stride = stride + + def forward(self, x): + identity = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + out = self.relu(out) + + out = self.conv3(out) + out = self.bn3(out) + + if self.downsample is not None: + identity = self.downsample(x) + + out += identity + out = self.relu(out) + + return out + + +class ResNetV1(nn.Module): + + def __init__(self, block, layers, cfg=None, num_classes=1000, deep_stem=False, + zero_init_residual=False, norm_layer=nn.BatchNorm2d): + output_stride = cfg.MODEL.OUTPUT_STRIDE + scale = cfg.MODEL.BACKBONE_SCALE + if output_stride == 32: + dilations = [1, 1] + strides = [2, 2] + elif output_stride == 16: + dilations = [1, 2] + strides = [2, 1] + elif output_stride == 8: + dilations = [2, 4] + strides = [1, 1] + else: + raise NotImplementedError + self.inplanes = int((128 if deep_stem else 64) * scale) + super(ResNetV1, self).__init__() + if deep_stem: + # resnet vc + mid_channel = int(64 * scale) + self.conv1 = nn.Sequential( + nn.Conv2d(3, mid_channel, 3, 2, 1, bias=False), + norm_layer(mid_channel), + nn.ReLU(True), + nn.Conv2d(mid_channel, mid_channel, 3, 1, 1, bias=False), + norm_layer(mid_channel), + nn.ReLU(True), + nn.Conv2d(mid_channel, self.inplanes, 3, 1, 1, bias=False) + ) + else: + self.conv1 = nn.Conv2d(3, self.inplanes, 7, 2, 3, bias=False) + self.bn1 = norm_layer(self.inplanes) + self.relu = nn.ReLU(True) + self.maxpool = nn.MaxPool2d(3, 2, 1) + self.layer1 = self._make_layer(block, int(64 * scale), layers[0], norm_layer=norm_layer) + self.layer2 = self._make_layer(block, int(128 * scale), layers[1], stride=2, norm_layer=norm_layer) + + self.layer3 = self._make_layer(block, int(256 * scale), layers[2], stride=strides[0], dilation=dilations[0], + norm_layer=norm_layer) + self.layer4 = self._make_layer(block, int(512 * scale), layers[3], stride=strides[1], dilation=dilations[1], + norm_layer=norm_layer, multi_grid=cfg.MODEL.MULTI_GRID, + multi_dilation=cfg.MODEL.MULTI_DILATION) + + self.last_inp_channels = int(512 * block.expansion * scale) + self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) + self.fc = nn.Linear(int(512 * block.expansion * scale), num_classes) + + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') + elif isinstance(m, nn.BatchNorm2d): + nn.init.constant_(m.weight, 1) + nn.init.constant_(m.bias, 0) + + if zero_init_residual: + for m in self.modules(): + if isinstance(m, BottleneckV1b): + nn.init.constant_(m.bn3.weight, 0) + elif isinstance(m, BasicBlockV1b): + nn.init.constant_(m.bn2.weight, 0) + + def _make_layer(self, block, planes, blocks, stride=1, dilation=1, norm_layer=nn.BatchNorm2d, + multi_grid=False, multi_dilation=None): + downsample = None + if stride != 1 or self.inplanes != planes * block.expansion: + downsample = nn.Sequential( + nn.Conv2d(self.inplanes, planes * block.expansion, 1, stride, bias=False), + norm_layer(planes * block.expansion), + ) + + layers = [] + if not multi_grid: + if dilation in (1, 2): + layers.append(block(self.inplanes, planes, stride, dilation=1, downsample=downsample, + previous_dilation=dilation, norm_layer=norm_layer)) + elif dilation == 4: + layers.append(block(self.inplanes, planes, stride, dilation=2, downsample=downsample, + previous_dilation=dilation, norm_layer=norm_layer)) + else: + raise RuntimeError("=> unknown dilation size: {}".format(dilation)) + else: + layers.append(block(self.inplanes, planes, stride, dilation=multi_dilation[0], + downsample=downsample, previous_dilation=dilation, norm_layer=norm_layer)) + self.inplanes = planes * block.expansion + + if multi_grid: + div = len(multi_dilation) + for i in range(1, blocks): + layers.append(block(self.inplanes, planes, dilation=multi_dilation[i % div], + previous_dilation=dilation, norm_layer=norm_layer)) + else: + for _ in range(1, blocks): + layers.append(block(self.inplanes, planes, dilation=dilation, + previous_dilation=dilation, norm_layer=norm_layer)) + + return nn.Sequential(*layers) + + def forward(self, x): + x = self.conv1(x) + x = self.bn1(x) + x = self.relu(x) + x = self.maxpool(x) + + c1 = self.layer1(x) + c2 = self.layer2(c1) + c3 = self.layer3(c2) + c4 = self.layer4(c3) + + # for classification + # x = self.avgpool(c4) + # x = x.view(x.size(0), -1) + # x = self.fc(x) + + return c1, c2, c3, c4 + + +@BACKBONE_REGISTRY.register() +def resnet18(cfg=None, norm_layer=nn.BatchNorm2d): + num_block = [2, 2, 2, 2] + return ResNetV1(BasicBlockV1b, num_block, cfg=cfg, norm_layer=norm_layer) + + +@BACKBONE_REGISTRY.register() +def resnet34(cfg=None, norm_layer=nn.BatchNorm2d): + num_block = [3, 4, 6, 3] + return ResNetV1(BasicBlockV1b, num_block, cfg=cfg, norm_layer=norm_layer) + + +@BACKBONE_REGISTRY.register() +def resnet50(cfg=None, norm_layer=nn.BatchNorm2d): + num_block = [3, 4, 6, 3] + return ResNetV1(BottleneckV1b, num_block, cfg=cfg, norm_layer=norm_layer) + + +@BACKBONE_REGISTRY.register() +def resnet101(cfg=None, norm_layer=nn.BatchNorm2d): + num_block = [3, 4, 23, 3] + return ResNetV1(BottleneckV1b, num_block, cfg=cfg, norm_layer=norm_layer) + + +@BACKBONE_REGISTRY.register() +def resnet152(cfg=None, norm_layer=nn.BatchNorm2d): + num_block = [3, 8, 36, 3] + return ResNetV1(BottleneckV1b, num_block, cfg=cfg, norm_layer=norm_layer) + + +@BACKBONE_REGISTRY.register() +def resnet50c(cfg=None, norm_layer=nn.BatchNorm2d): + num_block = [3, 4, 6, 3] + return ResNetV1(BottleneckV1b, num_block, cfg=cfg, norm_layer=norm_layer, deep_stem=True) + + +@BACKBONE_REGISTRY.register() +def resnet101c(cfg=None, norm_layer=nn.BatchNorm2d): + num_block = [3, 4, 23, 3] + return ResNetV1(BottleneckV1b, num_block, cfg=cfg, norm_layer=norm_layer, deep_stem=True) + + +@BACKBONE_REGISTRY.register() +def resnet152c(cfg=None, norm_layer=nn.BatchNorm2d): + num_block = [3, 8, 36, 3] + return ResNetV1(BottleneckV1b, num_block, cfg=cfg, norm_layer=norm_layer, deep_stem=True) diff --git a/cv/semantic_segmentation/icnet/pytorch/model/models/backbones/xception.py b/cv/semantic_segmentation/icnet/pytorch/model/models/backbones/xception.py new file mode 100644 index 0000000000000000000000000000000000000000..e9d0da4a28567f7cd914b37797671c3fb5849cab --- /dev/null +++ b/cv/semantic_segmentation/icnet/pytorch/model/models/backbones/xception.py @@ -0,0 +1,297 @@ +# Copyright (c) 2022, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# Copyright (c) SegmenTron. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import torch.nn as nn + +from ...modules import SeparableConv2d +from .build import BACKBONE_REGISTRY + +__all__ = ['Xception65', 'Enc', 'FCAttention'] + + +class XceptionBlock(nn.Module): + def __init__(self, channel_list, stride=1, dilation=1, skip_connection_type='conv', relu_first=True, + low_feat=False, norm_layer=nn.BatchNorm2d): + super().__init__() + + assert len(channel_list) == 4 + self.skip_connection_type = skip_connection_type + self.relu_first = relu_first + self.low_feat = low_feat + + if self.skip_connection_type == 'conv': + self.conv = nn.Conv2d(channel_list[0], channel_list[-1], 1, stride=stride, bias=False) + self.bn = norm_layer(channel_list[-1]) + + self.sep_conv1 = SeparableConv2d(channel_list[0], channel_list[1], dilation=dilation, + relu_first=relu_first, norm_layer=norm_layer) + self.sep_conv2 = SeparableConv2d(channel_list[1], channel_list[2], dilation=dilation, + relu_first=relu_first, norm_layer=norm_layer) + self.sep_conv3 = SeparableConv2d(channel_list[2], channel_list[3], dilation=dilation, + relu_first=relu_first, stride=stride, norm_layer=norm_layer) + self.last_inp_channels = channel_list[3] + + def forward(self, inputs): + sc1 = self.sep_conv1(inputs) + sc2 = self.sep_conv2(sc1) + residual = self.sep_conv3(sc2) + + if self.skip_connection_type == 'conv': + shortcut = self.conv(inputs) + shortcut = self.bn(shortcut) + outputs = residual + shortcut + elif self.skip_connection_type == 'sum': + outputs = residual + inputs + elif self.skip_connection_type == 'none': + outputs = residual + else: + raise ValueError('Unsupported skip connection type.') + + if self.low_feat: + return outputs, sc2 + else: + return outputs + + +class Xception65(nn.Module): + def __init__(self, cfg=None, norm_layer=nn.BatchNorm2d): + super().__init__() + output_stride = cfg.MODEL.OUTPUT_STRIDE + if output_stride == 32: + entry_block3_stride = 2 + middle_block_dilation = 1 + exit_block_dilations = (1, 1) + exit_block_stride = 2 + elif output_stride == 16: + entry_block3_stride = 2 + middle_block_dilation = 1 + exit_block_dilations = (1, 2) + exit_block_stride = 1 + elif output_stride == 8: + entry_block3_stride = 1 + middle_block_dilation = 2 + exit_block_dilations = (2, 4) + exit_block_stride = 1 + else: + raise NotImplementedError + + # Entry flow + self.conv1 = nn.Conv2d(3, 32, 3, stride=2, padding=1, bias=False) + self.bn1 = norm_layer(32) + self.relu = nn.ReLU() + + self.conv2 = nn.Conv2d(32, 64, 3, stride=1, padding=1, bias=False) + self.bn2 = norm_layer(64) + + self.block1 = XceptionBlock([64, 128, 128, 128], stride=2, norm_layer=norm_layer) + self.block2 = XceptionBlock([128, 256, 256, 256], stride=2, low_feat=True, norm_layer=norm_layer) + self.block3 = XceptionBlock([256, 728, 728, 728], stride=entry_block3_stride, low_feat=True, + norm_layer=norm_layer) + + # Middle flow (16 units) + self.block4 = XceptionBlock([728, 728, 728, 728], dilation=middle_block_dilation, + skip_connection_type='sum', norm_layer=norm_layer) + self.block5 = XceptionBlock([728, 728, 728, 728], dilation=middle_block_dilation, + skip_connection_type='sum', norm_layer=norm_layer) + self.block6 = XceptionBlock([728, 728, 728, 728], dilation=middle_block_dilation, + skip_connection_type='sum', norm_layer=norm_layer) + self.block7 = XceptionBlock([728, 728, 728, 728], dilation=middle_block_dilation, + skip_connection_type='sum', norm_layer=norm_layer) + self.block8 = XceptionBlock([728, 728, 728, 728], dilation=middle_block_dilation, + skip_connection_type='sum', norm_layer=norm_layer) + self.block9 = XceptionBlock([728, 728, 728, 728], dilation=middle_block_dilation, + skip_connection_type='sum', norm_layer=norm_layer) + self.block10 = XceptionBlock([728, 728, 728, 728], dilation=middle_block_dilation, + skip_connection_type='sum', norm_layer=norm_layer) + self.block11 = XceptionBlock([728, 728, 728, 728], dilation=middle_block_dilation, + skip_connection_type='sum', norm_layer=norm_layer) + self.block12 = XceptionBlock([728, 728, 728, 728], dilation=middle_block_dilation, + skip_connection_type='sum', norm_layer=norm_layer) + self.block13 = XceptionBlock([728, 728, 728, 728], dilation=middle_block_dilation, + skip_connection_type='sum', norm_layer=norm_layer) + self.block14 = XceptionBlock([728, 728, 728, 728], dilation=middle_block_dilation, + skip_connection_type='sum', norm_layer=norm_layer) + self.block15 = XceptionBlock([728, 728, 728, 728], dilation=middle_block_dilation, + skip_connection_type='sum', norm_layer=norm_layer) + self.block16 = XceptionBlock([728, 728, 728, 728], dilation=middle_block_dilation, + skip_connection_type='sum', norm_layer=norm_layer) + self.block17 = XceptionBlock([728, 728, 728, 728], dilation=middle_block_dilation, + skip_connection_type='sum', norm_layer=norm_layer) + self.block18 = XceptionBlock([728, 728, 728, 728], dilation=middle_block_dilation, + skip_connection_type='sum', norm_layer=norm_layer) + self.block19 = XceptionBlock([728, 728, 728, 728], dilation=middle_block_dilation, + skip_connection_type='sum', norm_layer=norm_layer) + + # Exit flow + self.block20 = XceptionBlock([728, 728, 1024, 1024], stride=exit_block_stride, + dilation=exit_block_dilations[0], norm_layer=norm_layer) + self.block21 = XceptionBlock([1024, 1536, 1536, 2048], dilation=exit_block_dilations[1], + skip_connection_type='none', relu_first=False, norm_layer=norm_layer) + + def forward(self, x): + # Entry flow + x = self.conv1(x) + x = self.bn1(x) + x = self.relu(x) + + x = self.conv2(x) + x = self.bn2(x) + x = self.relu(x) + + x = self.block1(x) + x, c1 = self.block2(x) # b, h//4, w//4, 256 + x, c2 = self.block3(x) # b, h//8, w//8, 728 + + # Middle flow + x = self.block4(x) + x = self.block5(x) + x = self.block6(x) + x = self.block7(x) + x = self.block8(x) + x = self.block9(x) + x = self.block10(x) + x = self.block11(x) + x = self.block12(x) + x = self.block13(x) + x = self.block14(x) + x = self.block15(x) + x = self.block16(x) + x = self.block17(x) + x = self.block18(x) + c3 = self.block19(x) + + # Exit flow + x = self.block20(c3) + c4 = self.block21(x) + + return c1, c2, c3, c4 + + +# ------------------------------------------------- +# For DFANet +# ------------------------------------------------- +class BlockA(nn.Module): + def __init__(self, in_channels, out_channels, stride=1, dilation=1, norm_layer=None, start_with_relu=True): + super(BlockA, self).__init__() + if out_channels != in_channels or stride != 1: + self.skip = nn.Conv2d(in_channels, out_channels, 1, stride, bias=False) + self.skipbn = norm_layer(out_channels) + else: + self.skip = None + self.relu = nn.ReLU() + rep = list() + inter_channels = out_channels // 4 + + if start_with_relu: + rep.append(self.relu) + rep.append(SeparableConv2d(in_channels, inter_channels, 3, 1, dilation, norm_layer=norm_layer)) + rep.append(norm_layer(inter_channels)) + + rep.append(self.relu) + rep.append(SeparableConv2d(inter_channels, inter_channels, 3, 1, dilation, norm_layer=norm_layer)) + rep.append(norm_layer(inter_channels)) + + if stride != 1: + rep.append(self.relu) + rep.append(SeparableConv2d(inter_channels, out_channels, 3, stride, norm_layer=norm_layer)) + rep.append(norm_layer(out_channels)) + else: + rep.append(self.relu) + rep.append(SeparableConv2d(inter_channels, out_channels, 3, 1, norm_layer=norm_layer)) + rep.append(norm_layer(out_channels)) + self.rep = nn.Sequential(*rep) + + def forward(self, x): + out = self.rep(x) + if self.skip is not None: + skip = self.skipbn(self.skip(x)) + else: + skip = x + out = out + skip + return out + + +class Enc(nn.Module): + def __init__(self, in_channels, out_channels, blocks, norm_layer=nn.BatchNorm2d): + super(Enc, self).__init__() + block = list() + block.append(BlockA(in_channels, out_channels, 2, norm_layer=norm_layer)) + for i in range(blocks - 1): + block.append(BlockA(out_channels, out_channels, 1, norm_layer=norm_layer)) + self.block = nn.Sequential(*block) + + def forward(self, x): + return self.block(x) + + +class FCAttention(nn.Module): + def __init__(self, in_channels, norm_layer=nn.BatchNorm2d): + super(FCAttention, self).__init__() + self.avgpool = nn.AdaptiveAvgPool2d(1) + self.fc = nn.Linear(in_channels, 1000) + self.conv = nn.Sequential( + nn.Conv2d(1000, in_channels, 1, bias=False), + norm_layer(in_channels), + nn.ReLU(True)) + + def forward(self, x): + n, c, _, _ = x.size() + att = self.avgpool(x).view(n, c) + att = self.fc(att).view(n, 1000, 1, 1) + att = self.conv(att) + return x * att.expand_as(x) + + +class XceptionA(nn.Module): + def __init__(self, cfg=None, num_classes=1000, norm_layer=nn.BatchNorm2d): + super(XceptionA, self).__init__() + self.conv1 = nn.Sequential(nn.Conv2d(3, 8, 3, 2, 1, bias=False), + norm_layer(8), + nn.ReLU(True)) + + self.enc2 = Enc(8, 48, 4, norm_layer=norm_layer) + self.enc3 = Enc(48, 96, 6, norm_layer=norm_layer) + self.enc4 = Enc(96, 192, 4, norm_layer=norm_layer) + + self.fca = FCAttention(192, norm_layer=norm_layer) + self.avgpool = nn.AdaptiveAvgPool2d(1) + self.fc = nn.Linear(192, num_classes) + + def forward(self, x): + x = self.conv1(x) + + x = self.enc2(x) + x = self.enc3(x) + x = self.enc4(x) + x = self.fca(x) + + x = self.avgpool(x) + x = x.view(x.size(0), -1) + x = self.fc(x) + + return x + + +@BACKBONE_REGISTRY.register() +def xception_a(cfg=None, norm_layer=nn.BatchNorm2d): + model = XceptionA(cfg=cfg, norm_layer=norm_layer) + return model + + +@BACKBONE_REGISTRY.register() +def xception65(cfg=None, norm_layer=nn.BatchNorm2d): + model = Xception65(cfg=cfg, norm_layer=norm_layer) + return model diff --git a/cv/semantic_segmentation/icnet/pytorch/model/models/icnet.py b/cv/semantic_segmentation/icnet/pytorch/model/models/icnet.py new file mode 100644 index 0000000000000000000000000000000000000000..5ad59c06d8c04ee6ec421c7f768341f89b1c1341 --- /dev/null +++ b/cv/semantic_segmentation/icnet/pytorch/model/models/icnet.py @@ -0,0 +1,114 @@ +# Copyright (c) 2022, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# Copyright (c) SegmenTron. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""Image Cascade Network""" +import torch +import torch.nn as nn +import torch.nn.functional as F + +from .segbase import SegBaseModel + +from ..modules.basic import _ConvBNReLU + + +__all__ = ['ICNet'] + + +class ICNet(SegBaseModel): + """Image Cascade Network""" + + def __init__(self, cfg): + super(ICNet, self).__init__(cfg) + self.conv_sub1 = nn.Sequential( + _ConvBNReLU(3, 32, 3, 2), + _ConvBNReLU(32, 32, 3, 2), + _ConvBNReLU(32, 64, 3, 2) + ) + + self.head = _ICHead(self.nclass, scale=cfg.MODEL.BACKBONE_SCALE) + self.__setattr__('decoder', ['conv_sub1', 'head']) + + def forward(self, x): + size = x.size()[2:] + # sub 1 + x_sub1 = self.conv_sub1(x) + + # sub 2 + x_sub2 = F.interpolate(x, scale_factor=0.5, mode='bilinear', align_corners=True) + _, x_sub2, _, _ = self.encoder(x_sub2) + + # sub 4 + x_sub4 = F.interpolate(x, scale_factor=0.25, mode='bilinear', align_corners=True) + _, _, _, x_sub4 = self.encoder(x_sub4) + + outputs = self.head(x_sub1, x_sub2, x_sub4, size) + + if self.aux: + return tuple(outputs) + else: + return outputs[0] + + +class _ICHead(nn.Module): + def __init__(self, nclass, scale=1.0, norm_layer=nn.BatchNorm2d): + super(_ICHead, self).__init__() + self.cff_12 = CascadeFeatureFusion(int(512 * scale), 64, 128, nclass, norm_layer) + self.cff_24 = CascadeFeatureFusion(int(2048 * scale), int(512 * scale), 128, nclass, norm_layer) + self.conv_cls = nn.Conv2d(128, nclass, 1, bias=False) + + def forward(self, x_sub1, x_sub2, x_sub4, size): + outputs = list() + x_cff_24, x_24_cls = self.cff_24(x_sub4, x_sub2) + outputs.append(x_24_cls) + x_cff_12, x_12_cls = self.cff_12(x_sub2, x_sub1) + outputs.append(x_12_cls) + + up_x2 = F.interpolate(x_cff_12, scale_factor=2, mode='bilinear', align_corners=True) + up_x2 = self.conv_cls(up_x2) + outputs.append(up_x2) + + up_x8 = F.interpolate(up_x2, size, mode='bilinear', align_corners=True) + outputs.append(up_x8) + # 1 -> 1/4 -> 1/8 -> 1/16 + outputs.reverse() + + return outputs + + +class CascadeFeatureFusion(nn.Module): + """CFF Unit""" + + def __init__(self, low_channels, high_channels, out_channels, nclass, norm_layer=nn.BatchNorm2d): + super(CascadeFeatureFusion, self).__init__() + self.conv_low = nn.Sequential( + nn.Conv2d(low_channels, out_channels, 3, padding=2, dilation=2, bias=False), + norm_layer(out_channels) + ) + self.conv_high = nn.Sequential( + nn.Conv2d(high_channels, out_channels, 1, bias=False), + norm_layer(out_channels) + ) + self.conv_low_cls = nn.Conv2d(out_channels, nclass, 1, bias=False) + + def forward(self, x_low, x_high): + x_low = F.interpolate(x_low, size=x_high.size()[2:], mode='bilinear', align_corners=True) + x_low = self.conv_low(x_low) + x_high = self.conv_high(x_high) + x = x_low + x_high + x = F.relu(x, inplace=True) + x_low_cls = self.conv_low_cls(x_low) + + return x, x_low_cls \ No newline at end of file diff --git a/cv/semantic_segmentation/icnet/pytorch/model/models/segbase.py b/cv/semantic_segmentation/icnet/pytorch/model/models/segbase.py new file mode 100644 index 0000000000000000000000000000000000000000..eb1523f0ae880ce6093a814df505c708db8736b6 --- /dev/null +++ b/cv/semantic_segmentation/icnet/pytorch/model/models/segbase.py @@ -0,0 +1,143 @@ +# Copyright (c) 2022, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# Copyright (c) SegmenTron. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""Base Model for Semantic Segmentation""" +import math +import numbers +import numpy as np +import torch +import torch.nn as nn +import torch.nn.functional as F + +from .backbones import get_segmentation_backbone +from ..modules import get_norm + +__all__ = ['SegBaseModel'] + + +class SegBaseModel(nn.Module): + r"""Base Model for Semantic Segmentation + """ + def __init__(self, cfg, need_backbone=True): + super(SegBaseModel, self).__init__() + self.cfg = cfg + self.nclass = cfg.MODEL.NUM_CLASS + self.aux = cfg.SOLVER.AUX + self.norm_layer = get_norm(cfg.MODEL.BN_TYPE) + self.backbone = None + self.encoder = None + if need_backbone: + self.get_backbone() + + def get_backbone(self): + self.backbone = self.cfg.MODEL.BACKBONE.lower() + self.encoder = get_segmentation_backbone(self.backbone, self.cfg, self.norm_layer) + + def base_forward(self, x): + """forwarding backbone network""" + c1, c2, c3, c4 = self.encoder(x) + return c1, c2, c3, c4 + + def demo(self, x): + pred = self.forward(x) + if self.aux: + pred = pred[0] + return pred + + def evaluate(self, image): + """evaluating network with inputs and targets""" + scales = self.cfg.TEST.SCALES + flip = self.cfg.TEST.FLIP + crop_size = _to_tuple(self.cfg.TEST.CROP_SIZE) if self.cfg.TEST.CROP_SIZE else None + batch, _, h, w = image.shape + base_size = max(h, w) + # scores = torch.zeros((batch, self.nclass, h, w)).to(image.device) + scores = None + for scale in scales: + long_size = int(math.ceil(base_size * scale)) + if h > w: + height = long_size + width = int(1.0 * w * long_size / h + 0.5) + else: + width = long_size + height = int(1.0 * h * long_size / w + 0.5) + + # resize image to current size + cur_img = _resize_image(image, height, width) + if crop_size is not None: + assert crop_size[0] >= h and crop_size[1] >= w + crop_size_scaled = (int(math.ceil(crop_size[0] * scale)), + int(math.ceil(crop_size[1] * scale))) + cur_img = _pad_image(cur_img, crop_size_scaled) + outputs = self.forward(cur_img)[0][..., :height, :width] + if flip: + outputs += _flip_image(self.forward(_flip_image(cur_img))[0])[..., :height, :width] + + score = _resize_image(outputs, h, w) + + if scores is None: + scores = score + else: + scores += score + return scores + + +def _resize_image(img, h, w): + return F.interpolate(img, size=[h, w], mode='bilinear', align_corners=True) + + +def _pad_image(img, crop_size): + b, c, h, w = img.shape + assert(c == 3) + padh = crop_size[0] - h if h < crop_size[0] else 0 + padw = crop_size[1] - w if w < crop_size[1] else 0 + if padh == 0 and padw == 0: + return img + img_pad = F.pad(img, (0, padh, 0, padw)) + + # TODO clean this code + # mean = cfg.DATASET.MEAN + # std = cfg.DATASET.STD + # pad_values = -np.array(mean) / np.array(std) + # img_pad = torch.zeros((b, c, h + padh, w + padw)).to(img.device) + # for i in range(c): + # # print(img[:, i, :, :].unsqueeze(1).shape) + # img_pad[:, i, :, :] = torch.squeeze( + # F.pad(img[:, i, :, :].unsqueeze(1), (0, padh, 0, padw), + # 'constant', value=pad_values[i]), 1) + # assert(img_pad.shape[2] >= crop_size[0] and img_pad.shape[3] >= crop_size[1]) + + return img_pad + + +def _crop_image(img, h0, h1, w0, w1): + return img[:, :, h0:h1, w0:w1] + + +def _flip_image(img): + assert(img.ndim == 4) + return img.flip((3)) + + +def _to_tuple(size): + if isinstance(size, (list, tuple)): + assert len(size), 'Expect eval crop size contains two element, ' \ + 'but received {}'.format(len(size)) + return tuple(size) + elif isinstance(size, numbers.Number): + return tuple((size, size)) + else: + raise ValueError('Unsupport datatype: {}'.format(type(size))) diff --git a/cv/semantic_segmentation/icnet/pytorch/model/modules/__init__.py b/cv/semantic_segmentation/icnet/pytorch/model/modules/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..8ce1a1f993b44e914211c0f9e493d816bf99fbe1 --- /dev/null +++ b/cv/semantic_segmentation/icnet/pytorch/model/modules/__init__.py @@ -0,0 +1,5 @@ +"""Seg NN Modules""" + +from .basic import * +from .module import * +from .batch_norm import get_norm \ No newline at end of file diff --git a/cv/semantic_segmentation/icnet/pytorch/model/modules/basic.py b/cv/semantic_segmentation/icnet/pytorch/model/modules/basic.py new file mode 100644 index 0000000000000000000000000000000000000000..32d84268dcb09aa974b66481173f33971bc54b3e --- /dev/null +++ b/cv/semantic_segmentation/icnet/pytorch/model/modules/basic.py @@ -0,0 +1,169 @@ +"""Basic Module for Semantic Segmentation""" +import torch +import torch.nn as nn + +from collections import OrderedDict + +__all__ = ['_ConvBNPReLU', '_ConvBN', '_BNPReLU', '_ConvBNReLU', '_DepthwiseConv', 'InvertedResidual', + 'SeparableConv2d'] + +_USE_FIXED_PAD = False + + +def _pytorch_padding(kernel_size, stride=1, dilation=1, **_): + if _USE_FIXED_PAD: + return 0 # FIXME remove once verified + else: + padding = ((stride - 1) + dilation * (kernel_size - 1)) // 2 + + # FIXME remove once verified + fp = _fixed_padding(kernel_size, dilation) + assert all(padding == p for p in fp) + + return padding + + +def _fixed_padding(kernel_size, dilation): + kernel_size_effective = kernel_size + (kernel_size - 1) * (dilation - 1) + pad_total = kernel_size_effective - 1 + pad_beg = pad_total // 2 + pad_end = pad_total - pad_beg + return [pad_beg, pad_end, pad_beg, pad_end] + + +class SeparableConv2d(nn.Module): + def __init__(self, inplanes, planes, kernel_size=3, stride=1, dilation=1, relu_first=True, + bias=False, norm_layer=nn.BatchNorm2d): + super().__init__() + depthwise = nn.Conv2d(inplanes, inplanes, kernel_size, + stride=stride, padding=dilation, + dilation=dilation, groups=inplanes, bias=bias) + bn_depth = norm_layer(inplanes) + pointwise = nn.Conv2d(inplanes, planes, 1, bias=bias) + bn_point = norm_layer(planes) + + if relu_first: + self.block = nn.Sequential(OrderedDict([('relu', nn.ReLU()), + ('depthwise', depthwise), + ('bn_depth', bn_depth), + ('pointwise', pointwise), + ('bn_point', bn_point) + ])) + else: + self.block = nn.Sequential(OrderedDict([('depthwise', depthwise), + ('bn_depth', bn_depth), + ('relu1', nn.ReLU(inplace=True)), + ('pointwise', pointwise), + ('bn_point', bn_point), + ('relu2', nn.ReLU(inplace=True)) + ])) + + def forward(self, x): + return self.block(x) + + +class _ConvBNReLU(nn.Module): + def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, + dilation=1, groups=1, relu6=False, norm_layer=nn.BatchNorm2d): + super(_ConvBNReLU, self).__init__() + self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding, dilation, groups, bias=False) + self.bn = norm_layer(out_channels) + self.relu = nn.ReLU6(True) if relu6 else nn.ReLU(True) + + def forward(self, x): + x = self.conv(x) + x = self.bn(x) + x = self.relu(x) + return x + + +class _ConvBNPReLU(nn.Module): + def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, + dilation=1, groups=1, norm_layer=nn.BatchNorm2d): + super(_ConvBNPReLU, self).__init__() + self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding, dilation, groups, bias=False) + self.bn = norm_layer(out_channels) + self.prelu = nn.PReLU(out_channels) + + def forward(self, x): + x = self.conv(x) + x = self.bn(x) + x = self.prelu(x) + return x + + +class _ConvBN(nn.Module): + def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, + dilation=1, groups=1, norm_layer=nn.BatchNorm2d, **kwargs): + super(_ConvBN, self).__init__() + self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding, dilation, groups, bias=False) + self.bn = norm_layer(out_channels) + + def forward(self, x): + x = self.conv(x) + x = self.bn(x) + return x + + +class _BNPReLU(nn.Module): + def __init__(self, out_channels, norm_layer=nn.BatchNorm2d): + super(_BNPReLU, self).__init__() + self.bn = norm_layer(out_channels) + self.prelu = nn.PReLU(out_channels) + + def forward(self, x): + x = self.bn(x) + x = self.prelu(x) + return x + + +# ----------------------------------------------------------------- +# For MobileNet +# ----------------------------------------------------------------- +class _DepthwiseConv(nn.Module): + """conv_dw in MobileNet""" + + def __init__(self, in_channels, out_channels, stride, norm_layer=nn.BatchNorm2d, **kwargs): + super(_DepthwiseConv, self).__init__() + self.conv = nn.Sequential( + _ConvBNReLU(in_channels, in_channels, 3, stride, 1, groups=in_channels, norm_layer=norm_layer), + _ConvBNReLU(in_channels, out_channels, 1, norm_layer=norm_layer)) + + def forward(self, x): + return self.conv(x) + + +# ----------------------------------------------------------------- +# For MobileNetV2 +# ----------------------------------------------------------------- +class InvertedResidual(nn.Module): + def __init__(self, in_channels, out_channels, stride, expand_ratio, dilation=1, norm_layer=nn.BatchNorm2d): + super(InvertedResidual, self).__init__() + assert stride in [1, 2] + self.use_res_connect = stride == 1 and in_channels == out_channels + + layers = list() + inter_channels = int(round(in_channels * expand_ratio)) + if expand_ratio != 1: + # pw + layers.append(_ConvBNReLU(in_channels, inter_channels, 1, relu6=True, norm_layer=norm_layer)) + layers.extend([ + # dw + _ConvBNReLU(inter_channels, inter_channels, 3, stride, dilation, dilation, + groups=inter_channels, relu6=True, norm_layer=norm_layer), + # pw-linear + nn.Conv2d(inter_channels, out_channels, 1, bias=False), + norm_layer(out_channels)]) + self.conv = nn.Sequential(*layers) + + def forward(self, x): + if self.use_res_connect: + return x + self.conv(x) + else: + return self.conv(x) + + +if __name__ == '__main__': + x = torch.randn(1, 32, 64, 64) + model = InvertedResidual(32, 64, 2, 1) + out = model(x) diff --git a/cv/semantic_segmentation/icnet/pytorch/model/modules/batch_norm.py b/cv/semantic_segmentation/icnet/pytorch/model/modules/batch_norm.py new file mode 100644 index 0000000000000000000000000000000000000000..590f0a7a0c3e9321b3d21d52ab4e9ede373c17f9 --- /dev/null +++ b/cv/semantic_segmentation/icnet/pytorch/model/modules/batch_norm.py @@ -0,0 +1,183 @@ +# this code heavily based on detectron2 +import logging +import torch +import torch.distributed as dist +from torch import nn +from torch.autograd.function import Function +from ..utils.distributed import get_world_size + + +class FrozenBatchNorm2d(nn.Module): + """ + BatchNorm2d where the batch statistics and the affine parameters are fixed. + + It contains non-trainable buffers called + "weight" and "bias", "running_mean", "running_var", + initialized to perform identity transformation. + + The pre-trained backbone models from Caffe2 only contain "weight" and "bias", + which are computed from the original four parameters of BN. + The affine transform `x * weight + bias` will perform the equivalent + computation of `(x - running_mean) / sqrt(running_var) * weight + bias`. + When loading a backbone model from Caffe2, "running_mean" and "running_var" + will be left unchanged as identity transformation. + + Other pre-trained backbone models may contain all 4 parameters. + + The forward is implemented by `F.batch_norm(..., training=False)`. + """ + + _version = 3 + + def __init__(self, num_features, eps=1e-5): + super().__init__() + self.num_features = num_features + self.eps = eps + self.register_buffer("weight", torch.ones(num_features)) + self.register_buffer("bias", torch.zeros(num_features)) + self.register_buffer("running_mean", torch.zeros(num_features)) + self.register_buffer("running_var", torch.ones(num_features) - eps) + + def forward(self, x): + scale = self.weight * (self.running_var + self.eps).rsqrt() + bias = self.bias - self.running_mean * scale + scale = scale.reshape(1, -1, 1, 1) + bias = bias.reshape(1, -1, 1, 1) + return x * scale + bias + + def _load_from_state_dict( + self, state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs + ): + version = local_metadata.get("version", None) + + if version is None or version < 2: + # No running_mean/var in early versions + # This will silent the warnings + if prefix + "running_mean" not in state_dict: + state_dict[prefix + "running_mean"] = torch.zeros_like(self.running_mean) + if prefix + "running_var" not in state_dict: + state_dict[prefix + "running_var"] = torch.ones_like(self.running_var) + + if version is not None and version < 3: + # logger = logging.getLogger(__name__) + logging.info("FrozenBatchNorm {} is upgraded to version 3.".format(prefix.rstrip("."))) + # In version < 3, running_var are used without +eps. + state_dict[prefix + "running_var"] -= self.eps + + super()._load_from_state_dict( + state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs + ) + + def __repr__(self): + return "FrozenBatchNorm2d(num_features={}, eps={})".format(self.num_features, self.eps) + + @classmethod + def convert_frozen_batchnorm(cls, module): + """ + Convert BatchNorm/SyncBatchNorm in module into FrozenBatchNorm. + + Args: + module (torch.nn.Module): + + Returns: + If module is BatchNorm/SyncBatchNorm, returns a new module. + Otherwise, in-place convert module and return it. + + Similar to convert_sync_batchnorm in + https://github.com/pytorch/pytorch/blob/master/torch/nn/modules/batchnorm.py + """ + bn_module = nn.modules.batchnorm + bn_module = (bn_module.BatchNorm2d, bn_module.SyncBatchNorm) + res = module + if isinstance(module, bn_module): + res = cls(module.num_features) + if module.affine: + res.weight.data = module.weight.data.clone().detach() + res.bias.data = module.bias.data.clone().detach() + res.running_mean.data = module.running_mean.data + res.running_var.data = module.running_var.data + module.eps + else: + for name, child in module.named_children(): + new_child = cls.convert_frozen_batchnorm(child) + if new_child is not child: + res.add_module(name, new_child) + return res + + +def groupNorm(num_channels, eps=1e-5, momentum=0.1, affine=True): + return nn.GroupNorm(min(32, num_channels), num_channels, eps=eps, affine=affine) + + +def get_norm(norm): + """ + Args: + norm (str or callable): + + Returns: + nn.Module or None: the normalization layer + """ + support_norm_type = ['BN', 'SyncBN', 'FrozenBN', 'GN', 'nnSyncBN'] + assert norm in support_norm_type, 'Unknown norm type {}, support norm types are {}'.format( + norm, support_norm_type) + if isinstance(norm, str): + if len(norm) == 0: + return None + norm = { + "BN": nn.BatchNorm2d, + "SyncBN": NaiveSyncBatchNorm, + "FrozenBN": FrozenBatchNorm2d, + "GN": groupNorm, + "nnSyncBN": nn.SyncBatchNorm, # keep for debugging + }[norm] + return norm + + +class AllReduce(Function): + @staticmethod + def forward(ctx, input): + input_list = [torch.zeros_like(input) for k in range(dist.get_world_size())] + # Use allgather instead of allreduce since I don't trust in-place operations .. + dist.all_gather(input_list, input, async_op=False) + inputs = torch.stack(input_list, dim=0) + return torch.sum(inputs, dim=0) + + @staticmethod + def backward(ctx, grad_output): + dist.all_reduce(grad_output, async_op=False) + return grad_output + + +class NaiveSyncBatchNorm(nn.BatchNorm2d): + """ + `torch.nn.SyncBatchNorm` has known unknown bugs. + It produces significantly worse AP (and sometimes goes NaN) + when the batch size on each worker is quite different + (e.g., when scale augmentation is used, or when it is applied to mask head). + + Use this implementation before `nn.SyncBatchNorm` is fixed. + It is slower than `nn.SyncBatchNorm`. + """ + + def forward(self, input): + if get_world_size() == 1 or not self.training: + return super().forward(input) + + assert input.shape[0] > 0, "SyncBatchNorm does not support empty inputs" + C = input.shape[1] + mean = torch.mean(input, dim=[0, 2, 3]) + meansqr = torch.mean(input * input, dim=[0, 2, 3]) + + vec = torch.cat([mean, meansqr], dim=0) + vec = AllReduce.apply(vec) * (1.0 / dist.get_world_size()) + + mean, meansqr = torch.split(vec, C) + var = meansqr - mean * mean + self.running_mean += self.momentum * (mean.detach() - self.running_mean) + self.running_var += self.momentum * (var.detach() - self.running_var) + + invstd = torch.rsqrt(var + self.eps) + scale = self.weight * invstd + bias = self.bias - mean * scale + scale = scale.reshape(1, -1, 1, 1) + bias = bias.reshape(1, -1, 1, 1) + return input * scale + bias diff --git a/cv/semantic_segmentation/icnet/pytorch/model/modules/module.py b/cv/semantic_segmentation/icnet/pytorch/model/modules/module.py new file mode 100644 index 0000000000000000000000000000000000000000..2a0cea95b9e445181027423834113634e8d84669 --- /dev/null +++ b/cv/semantic_segmentation/icnet/pytorch/model/modules/module.py @@ -0,0 +1,223 @@ +# Copyright (c) 2022, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# Copyright (c) SegmenTron. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + + +"""Basic Module for Semantic Segmentation""" +import torch +import torch.nn as nn +import torch.nn.functional as F + +from collections import OrderedDict +from .basic import _ConvBNReLU, SeparableConv2d, _ConvBN, _BNPReLU, _ConvBNPReLU + + +__all__ = ['_FCNHead', '_ASPP', 'PyramidPooling', 'PAM_Module', 'CAM_Module', 'EESP'] + + +class _FCNHead(nn.Module): + def __init__(self, in_channels, channels, norm_layer=nn.BatchNorm2d): + super(_FCNHead, self).__init__() + inter_channels = in_channels // 4 + self.block = nn.Sequential( + nn.Conv2d(in_channels, inter_channels, 3, padding=1, bias=False), + norm_layer(inter_channels), + nn.ReLU(inplace=True), + nn.Dropout(0.1), + nn.Conv2d(inter_channels, channels, 1) + ) + + def forward(self, x): + return self.block(x) + + +# ----------------------------------------------------------------- +# For deeplab +# ----------------------------------------------------------------- +class _ASPP(nn.Module): + def __init__(self, output_stride, in_channels=2048, out_channels=256): + super().__init__() + if output_stride == 16: + dilations = [6, 12, 18] + elif output_stride == 8: + dilations = [12, 24, 36] + elif output_stride == 32: + dilations = [6, 12, 18] + else: + raise NotImplementedError + + self.aspp0 = nn.Sequential(OrderedDict([('conv', nn.Conv2d(in_channels, out_channels, 1, bias=False)), + ('bn', nn.BatchNorm2d(out_channels)), + ('relu', nn.ReLU(inplace=True))])) + self.aspp1 = SeparableConv2d(in_channels, out_channels, dilation=dilations[0], relu_first=False) + self.aspp2 = SeparableConv2d(in_channels, out_channels, dilation=dilations[1], relu_first=False) + self.aspp3 = SeparableConv2d(in_channels, out_channels, dilation=dilations[2], relu_first=False) + + self.image_pooling = nn.Sequential(OrderedDict([('gap', nn.AdaptiveAvgPool2d((1, 1))), + ('conv', nn.Conv2d(in_channels, out_channels, 1, bias=False)), + ('bn', nn.BatchNorm2d(out_channels)), + ('relu', nn.ReLU(inplace=True))])) + + self.conv = nn.Conv2d(out_channels*5, out_channels, 1, bias=False) + self.bn = nn.BatchNorm2d(out_channels) + self.relu = nn.ReLU(inplace=True) + self.dropout = nn.Dropout2d(p=0.1) + + def forward(self, x): + pool = self.image_pooling(x) + pool = F.interpolate(pool, size=x.shape[2:], mode='bilinear', align_corners=True) + + x0 = self.aspp0(x) + x1 = self.aspp1(x) + x2 = self.aspp2(x) + x3 = self.aspp3(x) + x = torch.cat((pool, x0, x1, x2, x3), dim=1) + + x = self.conv(x) + x = self.bn(x) + x = self.relu(x) + x = self.dropout(x) + + return x + +# ----------------------------------------------------------------- +# For PSPNet, fast_scnn +# ----------------------------------------------------------------- +class PyramidPooling(nn.Module): + def __init__(self, in_channels, sizes=(1, 2, 3, 6), norm_layer=nn.BatchNorm2d, **kwargs): + super(PyramidPooling, self).__init__() + out_channels = int(in_channels / 4) + self.avgpools = nn.ModuleList() + self.convs = nn.ModuleList() + for size in sizes: + self.avgpools.append(nn.AdaptiveAvgPool2d(size)) + self.convs.append(_ConvBNReLU(in_channels, out_channels, 1, norm_layer=norm_layer, **kwargs)) + + def forward(self, x): + size = x.size()[2:] + feats = [x] + for (avgpool, conv) in zip(self.avgpools, self.convs): + feats.append(F.interpolate(conv(avgpool(x)), size, mode='bilinear', align_corners=True)) + return torch.cat(feats, dim=1) + + +class PAM_Module(nn.Module): + """ Position attention module""" + def __init__(self, in_dim): + super(PAM_Module, self).__init__() + self.chanel_in = in_dim + + self.query_conv = nn.Conv2d(in_channels=in_dim, out_channels=in_dim//8, kernel_size=1) + self.key_conv = nn.Conv2d(in_channels=in_dim, out_channels=in_dim//8, kernel_size=1) + self.value_conv = nn.Conv2d(in_channels=in_dim, out_channels=in_dim, kernel_size=1) + self.gamma = nn.Parameter(torch.zeros(1)) + self.softmax = nn.Softmax(dim=-1) + + def forward(self, x): + """ + inputs : + x : input feature maps( B X C X H X W) + returns : + out : attention value + input feature + attention: B X (HxW) X (HxW) + """ + m_batchsize, C, height, width = x.size() + proj_query = self.query_conv(x).view(m_batchsize, -1, width*height).permute(0, 2, 1) + proj_key = self.key_conv(x).view(m_batchsize, -1, width*height) + energy = torch.bmm(proj_query, proj_key) + attention = self.softmax(energy) + proj_value = self.value_conv(x).view(m_batchsize, -1, width*height) + + out = torch.bmm(proj_value, attention.permute(0, 2, 1)) + out = out.view(m_batchsize, C, height, width) + + out = self.gamma*out + x + return out + + +class CAM_Module(nn.Module): + """ Channel attention module""" + def __init__(self, in_dim): + super(CAM_Module, self).__init__() + self.chanel_in = in_dim + self.gamma = nn.Parameter(torch.zeros(1)) + self.softmax = nn.Softmax(dim=-1) + + def forward(self,x): + """ + inputs : + x : input feature maps( B X C X H X W) + returns : + out : attention value + input feature + attention: B X C X C + """ + m_batchsize, C, height, width = x.size() + proj_query = x.view(m_batchsize, C, -1) + proj_key = x.view(m_batchsize, C, -1).permute(0, 2, 1) + energy = torch.bmm(proj_query, proj_key) + energy_new = torch.max(energy, -1, keepdim=True)[0].expand_as(energy)-energy + attention = self.softmax(energy_new) + proj_value = x.view(m_batchsize, C, -1) + + out = torch.bmm(attention, proj_value) + out = out.view(m_batchsize, C, height, width) + + out = self.gamma*out + x + return out + + +class EESP(nn.Module): + + def __init__(self, in_channels, out_channels, stride=1, k=4, r_lim=7, down_method='esp', norm_layer=nn.BatchNorm2d): + super(EESP, self).__init__() + self.stride = stride + n = int(out_channels / k) + n1 = out_channels - (k - 1) * n + assert down_method in ['avg', 'esp'], 'One of these is suppported (avg or esp)' + assert n == n1, "n(={}) and n1(={}) should be equal for Depth-wise Convolution ".format(n, n1) + self.proj_1x1 = _ConvBNPReLU(in_channels, n, 1, stride=1, groups=k, norm_layer=norm_layer) + + map_receptive_ksize = {3: 1, 5: 2, 7: 3, 9: 4, 11: 5, 13: 6, 15: 7, 17: 8} + self.k_sizes = list() + for i in range(k): + ksize = int(3 + 2 * i) + ksize = ksize if ksize <= r_lim else 3 + self.k_sizes.append(ksize) + self.k_sizes.sort() + self.spp_dw = nn.ModuleList() + for i in range(k): + dilation = map_receptive_ksize[self.k_sizes[i]] + self.spp_dw.append(nn.Conv2d(n, n, 3, stride, dilation, dilation=dilation, groups=n, bias=False)) + self.conv_1x1_exp = _ConvBN(out_channels, out_channels, 1, 1, groups=k, norm_layer=norm_layer) + self.br_after_cat = _BNPReLU(out_channels, norm_layer) + self.module_act = nn.PReLU(out_channels) + self.downAvg = True if down_method == 'avg' else False + + def forward(self, x): + output1 = self.proj_1x1(x) + output = [self.spp_dw[0](output1)] + for k in range(1, len(self.spp_dw)): + out_k = self.spp_dw[k](output1) + out_k = out_k + output[k - 1] + output.append(out_k) + expanded = self.conv_1x1_exp(self.br_after_cat(torch.cat(output, 1))) + del output + if self.stride == 2 and self.downAvg: + return expanded + + if expanded.size() == x.size(): + expanded = expanded + x + + return self.module_act(expanded) \ No newline at end of file diff --git a/cv/semantic_segmentation/icnet/pytorch/model/modules/sync_bn/syncbn.py b/cv/semantic_segmentation/icnet/pytorch/model/modules/sync_bn/syncbn.py new file mode 100644 index 0000000000000000000000000000000000000000..f1247af974b96f434653fd496dcfca9e2d1dbfd3 --- /dev/null +++ b/cv/semantic_segmentation/icnet/pytorch/model/modules/sync_bn/syncbn.py @@ -0,0 +1,124 @@ +##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +## Created by: Hang Zhang +## ECE Department, Rutgers University +## Email: zhang.hang@rutgers.edu +## Copyright (c) 2017 +## +## This source code is licensed under the MIT-style license found in the +## LICENSE file in the root directory of this source tree +##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + +"""Synchronized Cross-GPU Batch Normalization Module""" +import warnings +import torch + +from torch.nn.modules.batchnorm import _BatchNorm +from queue import Queue +from .functions import * + +__all__ = ['SyncBatchNorm', 'BatchNorm1d', 'BatchNorm2d', 'BatchNorm3d'] + + +# Adopt from https://github.com/zhanghang1989/PyTorch-Encoding/blob/master/encoding/nn/syncbn.py +class SyncBatchNorm(_BatchNorm): + """Cross-GPU Synchronized Batch normalization (SyncBN) + + Parameters: + num_features: num_features from an expected input of + size batch_size x num_features x height x width + eps: a value added to the denominator for numerical stability. + Default: 1e-5 + momentum: the value used for the running_mean and running_var + computation. Default: 0.1 + sync: a boolean value that when set to ``True``, synchronize across + different gpus. Default: ``True`` + activation : str + Name of the activation functions, one of: `leaky_relu` or `none`. + slope : float + Negative slope for the `leaky_relu` activation. + + Shape: + - Input: :math:`(N, C, H, W)` + - Output: :math:`(N, C, H, W)` (same shape as input) + Reference: + .. [1] Ioffe, Sergey, and Christian Szegedy. "Batch normalization: Accelerating deep network training by reducing internal covariate shift." *ICML 2015* + .. [2] Hang Zhang, Kristin Dana, Jianping Shi, Zhongyue Zhang, Xiaogang Wang, Ambrish Tyagi, and Amit Agrawal. "Context Encoding for Semantic Segmentation." *CVPR 2018* + Examples: + >>> m = SyncBatchNorm(100) + >>> net = torch.nn.DataParallel(m) + >>> output = net(input) + """ + + def __init__(self, num_features, eps=1e-5, momentum=0.1, sync=True, activation='none', slope=0.01, inplace=True): + super(SyncBatchNorm, self).__init__(num_features, eps=eps, momentum=momentum, affine=True) + self.activation = activation + self.inplace = False if activation == 'none' else inplace + self.slope = slope + self.devices = list(range(torch.cuda.device_count())) + self.sync = sync if len(self.devices) > 1 else False + # Initialize queues + self.worker_ids = self.devices[1:] + self.master_queue = Queue(len(self.worker_ids)) + self.worker_queues = [Queue(1) for _ in self.worker_ids] + + def forward(self, x): + # resize the input to (B, C, -1) + input_shape = x.size() + x = x.view(input_shape[0], self.num_features, -1) + if x.get_device() == self.devices[0]: + # Master mode + extra = { + "is_master": True, + "master_queue": self.master_queue, + "worker_queues": self.worker_queues, + "worker_ids": self.worker_ids + } + else: + # Worker mode + extra = { + "is_master": False, + "master_queue": self.master_queue, + "worker_queue": self.worker_queues[self.worker_ids.index(x.get_device())] + } + if self.inplace: + return inp_syncbatchnorm(x, self.weight, self.bias, self.running_mean, self.running_var, + extra, self.sync, self.training, self.momentum, self.eps, + self.activation, self.slope).view(input_shape) + else: + return syncbatchnorm(x, self.weight, self.bias, self.running_mean, self.running_var, + extra, self.sync, self.training, self.momentum, self.eps, + self.activation, self.slope).view(input_shape) + + def extra_repr(self): + if self.activation == 'none': + return 'sync={}'.format(self.sync) + else: + return 'sync={}, act={}, slope={}, inplace={}'.format( + self.sync, self.activation, self.slope, self.inplace) + + +class BatchNorm1d(SyncBatchNorm): + """BatchNorm1d is deprecated in favor of :class:`core.nn.sync_bn.SyncBatchNorm`.""" + + def __init__(self, *args, **kwargs): + warnings.warn("core.nn.sync_bn.{} is now deprecated in favor of core.nn.sync_bn.{}." + .format('BatchNorm1d', SyncBatchNorm.__name__), DeprecationWarning) + super(BatchNorm1d, self).__init__(*args, **kwargs) + + +class BatchNorm2d(SyncBatchNorm): + """BatchNorm1d is deprecated in favor of :class:`core.nn.sync_bn.SyncBatchNorm`.""" + + def __init__(self, *args, **kwargs): + warnings.warn("core.nn.sync_bn.{} is now deprecated in favor of core.nn.sync_bn.{}." + .format('BatchNorm2d', SyncBatchNorm.__name__), DeprecationWarning) + super(BatchNorm2d, self).__init__(*args, **kwargs) + + +class BatchNorm3d(SyncBatchNorm): + """BatchNorm1d is deprecated in favor of :class:`core.nn.sync_bn.SyncBatchNorm`.""" + + def __init__(self, *args, **kwargs): + warnings.warn("core.nn.sync_bn.{} is now deprecated in favor of core.nn.sync_bn.{}." + .format('BatchNorm3d', SyncBatchNorm.__name__), DeprecationWarning) + super(BatchNorm3d, self).__init__(*args, **kwargs) diff --git a/cv/semantic_segmentation/icnet/pytorch/model/utils/__init__.py b/cv/semantic_segmentation/icnet/pytorch/model/utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..2e4bb41ab1725c976f9ed6c204ded283107f0799 --- /dev/null +++ b/cv/semantic_segmentation/icnet/pytorch/model/utils/__init__.py @@ -0,0 +1,15 @@ +# Copyright (c) 2022, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# Copyright (c) SegmenTron. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. diff --git a/cv/semantic_segmentation/icnet/pytorch/model/utils/distributed.py b/cv/semantic_segmentation/icnet/pytorch/model/utils/distributed.py new file mode 100644 index 0000000000000000000000000000000000000000..8c681a1cf4687e17ee0966f7f5cddb7b5801038e --- /dev/null +++ b/cv/semantic_segmentation/icnet/pytorch/model/utils/distributed.py @@ -0,0 +1,251 @@ +""" +code is heavily based on https://github.com/facebookresearch/maskrcnn-benchmark +""" +import math +import pickle +import torch +import torch.utils.data as data +import torch.distributed as dist + +from torch.utils.data.sampler import Sampler, BatchSampler + +__all__ = ['get_world_size', 'get_rank', 'synchronize', 'is_main_process', + 'all_gather', 'make_data_sampler', 'make_batch_data_sampler', + 'reduce_dict', 'reduce_loss_dict'] + + +def get_world_size(): + if not dist.is_available(): + return 1 + if not dist.is_initialized(): + return 1 + return dist.get_world_size() + + +def get_rank(): + if not dist.is_available(): + return 0 + if not dist.is_initialized(): + return 0 + return dist.get_rank() + + +def is_main_process(): + return get_rank() == 0 + + +def synchronize(): + """ + Helper function to synchronize (barrier) among all processes when + using distributed training + """ + if not dist.is_available(): + return + if not dist.is_initialized(): + return + world_size = dist.get_world_size() + if world_size == 1: + return + dist.barrier() + + +def all_gather(data): + """ + Run all_gather on arbitrary picklable data (not necessarily tensors) + Args: + data: any picklable object + Returns: + list[data]: list of data gathered from each rank + """ + world_size = get_world_size() + if world_size == 1: + return [data] + + # serialized to a Tensor + buffer = pickle.dumps(data) + storage = torch.ByteStorage.from_buffer(buffer) + tensor = torch.ByteTensor(storage).to("cuda") + + # obtain Tensor size of each rank + local_size = torch.IntTensor([tensor.numel()]).to("cuda") + size_list = [torch.IntTensor([0]).to("cuda") for _ in range(world_size)] + dist.all_gather(size_list, local_size) + size_list = [int(size.item()) for size in size_list] + max_size = max(size_list) + + # receiving Tensor from all ranks + # we pad the tensor because torch all_gather does not support + # gathering tensors of different shapes + tensor_list = [] + for _ in size_list: + tensor_list.append(torch.ByteTensor(size=(max_size,)).to("cuda")) + if local_size != max_size: + padding = torch.ByteTensor(size=(max_size - local_size,)).to("cuda") + tensor = torch.cat((tensor, padding), dim=0) + dist.all_gather(tensor_list, tensor) + + data_list = [] + for size, tensor in zip(size_list, tensor_list): + buffer = tensor.cpu().numpy().tobytes()[:size] + data_list.append(pickle.loads(buffer)) + + return data_list + + +def reduce_dict(input_dict, average=True): + """ + Args: + input_dict (dict): all the values will be reduced + average (bool): whether to do average or sum + Reduce the values in the dictionary from all processes so that process with rank + 0 has the averaged results. Returns a dict with the same fields as + input_dict, after reduction. + """ + world_size = get_world_size() + if world_size < 2: + return input_dict + with torch.no_grad(): + names = [] + values = [] + # sort the keys so that they are consistent across processes + for k in sorted(input_dict.keys()): + names.append(k) + values.append(input_dict[k]) + values = torch.stack(values, dim=0) + dist.reduce(values, dst=0) + if dist.get_rank() == 0 and average: + # only main process gets accumulated, so only divide by + # world_size in this case + values /= world_size + reduced_dict = {k: v for k, v in zip(names, values)} + return reduced_dict + + +def reduce_loss_dict(loss_dict): + """ + Reduce the loss dictionary from all processes so that process with rank + 0 has the averaged results. Returns a dict with the same fields as + loss_dict, after reduction. + """ + world_size = get_world_size() + if world_size < 2: + return loss_dict + with torch.no_grad(): + loss_names = [] + all_losses = [] + for k in sorted(loss_dict.keys()): + loss_names.append(k) + all_losses.append(loss_dict[k]) + all_losses = torch.stack(all_losses, dim=0) + dist.reduce(all_losses, dst=0) + if dist.get_rank() == 0: + # only main process gets accumulated, so only divide by + # world_size in this case + all_losses /= world_size + reduced_losses = {k: v for k, v in zip(loss_names, all_losses)} + return reduced_losses + + +def make_data_sampler(dataset, shuffle, distributed): + if distributed: + return DistributedSampler(dataset, shuffle=shuffle) + if shuffle: + sampler = data.sampler.RandomSampler(dataset) + else: + sampler = data.sampler.SequentialSampler(dataset) + return sampler + + +def make_batch_data_sampler(sampler, images_per_batch, num_iters=None, start_iter=0, drop_last=True): + batch_sampler = data.sampler.BatchSampler(sampler, images_per_batch, drop_last=drop_last) + if num_iters is not None: + batch_sampler = IterationBasedBatchSampler(batch_sampler, num_iters, start_iter) + return batch_sampler + + +class DistributedSampler(Sampler): + """Sampler that restricts data loading to a subset of the dataset. + It is especially useful in conjunction with + :class:`torch.nn.parallel.DistributedDataParallel`. In such case, each + process can pass a DistributedSampler instance as a DataLoader sampler, + and load a subset of the original dataset that is exclusive to it. + .. note:: + Dataset is assumed to be of constant size. + Arguments: + dataset: Dataset used for sampling. + num_replicas (optional): Number of processes participating in + distributed training. + rank (optional): Rank of the current process within num_replicas. + """ + + def __init__(self, dataset, num_replicas=None, rank=None, shuffle=True): + if num_replicas is None: + if not dist.is_available(): + raise RuntimeError("Requires distributed package to be available") + num_replicas = dist.get_world_size() + if rank is None: + if not dist.is_available(): + raise RuntimeError("Requires distributed package to be available") + rank = dist.get_rank() + self.dataset = dataset + self.num_replicas = num_replicas + self.rank = rank + self.epoch = 0 + self.num_samples = int(math.ceil(len(self.dataset) * 1.0 / self.num_replicas)) + self.total_size = self.num_samples * self.num_replicas + self.shuffle = shuffle + + def __iter__(self): + if self.shuffle: + # deterministically shuffle based on epoch + g = torch.Generator() + g.manual_seed(self.epoch) + indices = torch.randperm(len(self.dataset), generator=g).tolist() + else: + indices = torch.arange(len(self.dataset)).tolist() + + # add extra samples to make it evenly divisible + indices += indices[: (self.total_size - len(indices))] + assert len(indices) == self.total_size + + # subsample + offset = self.num_samples * self.rank + indices = indices[offset: offset + self.num_samples] + assert len(indices) == self.num_samples + + return iter(indices) + + def __len__(self): + return self.num_samples + + def set_epoch(self, epoch): + self.epoch = epoch + + +class IterationBasedBatchSampler(BatchSampler): + """ + Wraps a BatchSampler, resampling from it until + a specified number of iterations have been sampled + """ + + def __init__(self, batch_sampler, num_iterations, start_iter=0): + self.batch_sampler = batch_sampler + self.num_iterations = num_iterations + self.start_iter = start_iter + + def __iter__(self): + iteration = self.start_iter + while iteration <= self.num_iterations: + # if the underlying sampler has a set_epoch method, like + # DistributedSampler, used for making each process see + # a different split of the dataset, then set it + if hasattr(self.batch_sampler.sampler, "set_epoch"): + self.batch_sampler.sampler.set_epoch(iteration) + for batch in self.batch_sampler: + iteration += 1 + if iteration > self.num_iterations: + break + yield batch + + def __len__(self): + return self.num_iterations diff --git a/cv/semantic_segmentation/icnet/pytorch/model/utils/registry.py b/cv/semantic_segmentation/icnet/pytorch/model/utils/registry.py new file mode 100644 index 0000000000000000000000000000000000000000..d564bd2cd85d3df1cce9af4fdedb436b6d687deb --- /dev/null +++ b/cv/semantic_segmentation/icnet/pytorch/model/utils/registry.py @@ -0,0 +1,92 @@ +# Copyright (c) 2022, Shanghai Iluvatar CoreX Semiconductor Co., Ltd. +# All Rights Reserved. +# Copyright (c) SegmenTron. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + + +# this code heavily based on detectron2 + +import logging +import torch + + +class Registry(object): + """ + The registry that provides name -> object mapping, to support third-party users' custom modules. + + To create a registry (inside segmentron): + + .. code-block:: python + + BACKBONE_REGISTRY = Registry('BACKBONE') + + To register an object: + + .. code-block:: python + + @BACKBONE_REGISTRY.register() + class MyBackbone(): + ... + + Or: + + .. code-block:: python + + BACKBONE_REGISTRY.register(MyBackbone) + """ + + def __init__(self, name): + """ + Args: + name (str): the name of this registry + """ + self._name = name + + self._obj_map = {} + + def _do_register(self, name, obj): + assert ( + name not in self._obj_map + ), "An object named '{}' was already registered in '{}' registry!".format(name, self._name) + self._obj_map[name] = obj + + def register(self, obj=None, name=None): + """ + Register the given object under the the name `obj.__name__`. + Can be used as either a decorator or not. See docstring of this class for usage. + """ + if obj is None: + # used as a decorator + def deco(func_or_class, name=name): + if name is None: + name = func_or_class.__name__ + self._do_register(name, func_or_class) + return func_or_class + + return deco + + # used as a function call + if name is None: + name = obj.__name__ + self._do_register(name, obj) + + def get(self, name): + ret = self._obj_map.get(name) + if ret is None: + raise KeyError("No object named '{}' found in '{}' registry!".format(name, self._name)) + + return ret + + def get_list(self): + return list(self._obj_map.keys())