https://arxiv.org/abs/1603.09382
---------------------------------------------------------------------------------
2021-03-30
随机深度:训练时每个batch随机dropout一些层,测试时用完整的网络
深层网路:后向传播梯度消失/爆炸,前向传播信息丢失,训练时间长
class StochasticDepthBottlencek(nn.Module): expansion=4 def __init__(self,prob,inchannel,planes,stride=1,downsample=None): super(StochasticDepthBottlencek, self).__init__() self.conv1=nn.Conv2d(inchannel,planes,1,bias=False) self.bn1=nn.BatchNorm2d(planes) self.conv2=nn.Conv2d(planes,planes,3,stride,1,bias=False) self.bn2=nn.BatchNorm2d(planes) self.conv3=nn.Conv2d(planes,planes*self.expansion,1,bias=False) self.bn3=nn.BatchNorm2d(planes*self.expansion) self.relu=nn.ReLU(inplace=True) self.downsample=downsample self.stride=stride self.prob=prob self.m=torch.distributions.bernoulli.Bernoulli(torch.Tensor([self.prob])) def forward(self,x): identity=x.clone() if self.training: if torch.equal(self.m.sample(),torch.ones(1)): self.conv1.weight.requires_grad=True self.conv2.weight.requires_grad=True self.conv3.weight.requires_grad=True conv1=self.relu(self.bn1(self.conv1(x))) conv2=self.relu(self.bn2(self.conv2(conv1))) conv3=self.bn3(self.conv3(conv2)) if self.downsample is not None: identity=self.downsample(identity) conv3+=identity else: self.conv1.weight.requires_grad=False self.conv2.weight.requires_grad=False self.conv3.weight.requires_grad=False if self.downsample is not None: identity=self.downsample(identity) conv3=identity else: conv1=self.relu(self.bn1(self.conv1(x))) conv2=self.relu(self.bn2(self.conv2(conv1))) conv3=self.bn3(self.conv3(conv2)) if self.downsample is not None: identity=self.downsample(identity) conv3+=identity return self.relu(conv3) class StochasticDepthResNet(nn.Module): def __init__(self,probs=[1,0.5],layers=[3,4,6,3],classes=1000): super(StochasticDepthResNet, self).__init__() self.inplanes=64 self.conv1=nn.Conv2d(3,64,7,2,3,bias=False) self.bn1=nn.BatchNorm2d(64) self.relu=nn.ReLU(inplace=True) self.maxpool=nn.MaxPool2d(kernel_size=3,stride=2,padding=1) self.prob=probs[0] self.prob_step=(probs[0]-probs[1])/(sum(layers)-1) self.layer1=self.make_layer(StochasticDepthBottlencek,64,layers[0],1) self.layer2=self.make_layer(StochasticDepthBottlencek,128,layers[1],2) self.layer3=self.make_layer(StochasticDepthBottlencek,256,layers[2],2) self.layer4=self.make_layer(StochasticDepthBottlencek,512,layers[3],2) self.avgpool=nn.AdaptiveAvgPool2d((1,1)) self.fc=nn.Linear(512*StochasticDepthBottlencek.expansion,classes) for m in self.modules(): if isinstance(m,nn.Conv2d): nn.init.kaiming_normal_(m.weight,mode="fan_out",nonlinearity="relu") elif isinstance(m,nn.BatchNorm2d): nn.init.constant_(m.weight,1) nn.init.constant_(m.bias,0) for m in self.modules(): if isinstance(m,StochasticDepthBottlencek): nn.init.constant_(m.bn3.weight,0) def make_layer(self,block,planes,num,stride=1): downsample=None if stride!=1 or self.inplanes!=planes*block.expansion: downsample=nn.Sequential( nn.Conv2d(self.inplanes,planes*block.expansion,1,stride,bias=False), nn.BatchNorm2d(planes*block.expansion) ) layers=[] layers.append(block(self.prob,self.inplanes,planes,stride,downsample)) self.inplanes=planes*block.expansion self.prob=self.prob-self.prob_step for _ in range(1,num): layers.append(block(self.prob,self.inplanes,planes,1,None)) self.prob=self.prob-self.prob_step return nn.Sequential(*layers) def forward(self,x): conv1=self.maxpool(self.relu(self.bn1(self.conv1(x)))) layer1=self.layer1(conv1) layer2=self.layer2(layer1) layer3=self.layer3(layer2) layer4=self.layer4(layer3) avg=self.avgpool(layer4) fc=self.fc(avg.view(avg.size(0),-1)) return fc
Deep Networks with Stochastic Depth
原文:https://www.cnblogs.com/shuimobanchengyan/p/14598396.html