https://arxiv.org/abs/2007.12099
---------------------------------------------------------
2021-09-07
backbone:resnet50-vd dcn
1X1和stride=2同时出现造成信息损失 将下采样放到3X3卷积
训练策略:
EMA指数移动平均
DropBlock
边界框回归
yolo:
Bx=sigmoid(Tx)+Cx
By=sigmoid(Ty)+Cy
Bw=Aw*exp(Tw)
Bh=Ah*exp(Th)
RCNN:
Bx=Aw*Tx+Ax
By=Ah*Ty+Ay
Bw=Aw*exp(Tw)
Bh=Ah*exp(Th)
CoordConv:坐标变换问题 卷积平移不变性
SPP ROIpooling
def MixUP(x, y, alpha=1.0, use_cuda=True): if alpha > 0: lam = np.random.beta(alpha, alpha) else: lam = 1. batch = x.size()[0] if use_cuda: idx = torch.randperm(batch).cuda() else: idx = torch.randperm(batch) mixup_x = lam * x + (1 - lam) * x[idx] y_a, y_b = y, y[idx] return mixup_x, y_a, y_b, lam def mixup_criterion(y_a, y_b, lam): return lambda criterion, pred: lam * criterion(pred, y_a) + (1 - lam) * criterion(pred, y_b) class EMA(): def __init__(self, model, decay): self.model = model self.decay = decay self.shadow = {} self.backup = {} def register(self): for name, param in self.model.named_parameters(): if param.requires_grad: self.shadow[name] = param.data.clone() def update(self): for name, param in self.model.named_parameters(): if param.requires_grad: assert name in self.shadow new_average = (1.0 - self.decay) * param.data + self.decay * self.shadow[name] self.shadow[name] = new_average.clone() def apply_shadow(self): for name, param in self.model.named_parameters(): if param.requires_grad: assert name in self.shadow self.backup[name] = param.data param.data = self.shadow[name] def restore(self): for name, param in self.model.named_parameters(): if param.requires_grad: assert name in self.backup param.data = self.backup[name] self.backup = {} class DropBlock2D(nn.Module): def __init__(self, drop_prob, block_size): super(DropBlock2D, self).__init__() self.drop_prob = drop_prob self.block_size = block_size def forward(self, x): if not self.training or (self.drop_prob - 0) < 1e-9: return x else: gamma = self.compute_gamma(x) mask = (torch.rand(x.shape[0], *x.shape[2:]) < gamma).float() mask = mask.to(x.device) block_mask = self.compute_block_mask(mask) out = x * block_mask[:, None, :, :] out = out * block_mask.numel() / block_mask.sum() return out def compute_gamma(self, x): return self.drop_prob / (self.block_size ** 2) def compute_block_mask(self, mask): block_mask = F.max_pool2d(input=mask[:, None, :, :], kernel_size=(self.block_size, self.block_size), stride=(1, 1), padding=self.block_size // 2) if self.block_size % 2 == 0: block_mask = block_mask[:, :, :-1, :-1] block_mask = 1 - block_mask.squeeze(1) return block_mask class SPPLayer(nn.Module): def __init__(self, num_levels): super(SPPLayer, self).__init__() self.num_levels = num_levels def forward(self, x): n, c, h, w = x.size() for i in range(self.num_levels): level = i + 1 kernel_size = (math.ceil(h / level), math.ceil(w / level)) stride = (math.ceil(h / level), math.ceil(w / level)) padding = ( math.floor((kernel_size[0] * level - h + 1) / 2), math.floor((kernel_size[1] * level - w + 1) / 2)) tensor = F.max_pool2d(x, kernel_size=kernel_size, stride=stride, padding=padding) if (i == 0): out = tensor.view(n, -1) else: out = torch.cat((out, tensor.view(n, -1)), 1) return out class Conv(nn.Module): def __init__(self, inchannel, outchannel, kernel_size, stride=1): super(Conv, self).__init__() self.conv = nn.Sequential( nn.Conv2d(inchannel, outchannel, kernel_size, stride, kernel_size // 2, bias=False), nn.BatchNorm2d(outchannel), nn.LeakyReLU(negative_slope=0.1) ) class Upsample(nn.Module): def __init__(self, inchannel, outchannel): super(Upsample, self).__init__() self.upsample = Conv(inchannel, outchannel, 1) def forward(self, x, target_size): x = self.upsample(x) x = F.interpolate(x, target_size, mode="bilinear", align_corners=False) return x class Downsample(nn.Module): def __init__(self, inchannel, outchannel): super(Downsample, self).__init__() self.downsample = Conv(inchannel, outchannel, 3, 2) def forward(self, x): return self.downsample(x) class PAN(nn.Module): def __init__(self, feature_channels): super(PAN, self).__init__() self.init_trans = nn.ModuleList( [Conv(channel, channel // 2, 1) for channel in feature_channels[:-1]] + [nn.Sequential( Conv(feature_channels[-1], feature_channels[-1] // 2, 1), Conv(feature_channels[-1] // 2, feature_channels[-1], 3), Conv(feature_channels[-1], feature_channels[-1] // 2, 1) )]) self.up_trans=nn.ModuleList([self.trans(channel) for channel in feature_channels[:-1]]+[nn.Identity()]) self.down_trans=nn.ModuleList([nn.Identity()]+[self.trans(channel) for channel in feature_channels[1:]]) self.upsamples=nn.ModuleList([Upsample(high//2,low//2) for high,low in zip(feature_channels[1:],feature_channels[:-1])]) self.downsamples = nn.ModuleList([Downsample(low//2,high//2) for low,high in zip(feature_channels[:-1],feature_channels[1:])]) def trans(self,channel): return nn.Sequential( Conv(channel,channel//2,1), Conv(channel//2,channel,3), Conv(channel,channel//2,1), Conv(channel//2,channel,3), Conv(channel,channel//2,1) ) def forward(self,features): features=[layer(f) for layer,f in zip(self.init_trans,features)] features[-1]=self.up_trans[-1](features[-1]) for idx in range(len(features)-1,0,-1): features[idx-1]=torch.cat([features[idx-1],self.upsamples[idx-1](features[idx],features[idx-1].shape[2:])],dim=1) features[idx-1]=self.up_trans[idx-1](features[idx-1]) features[0]=self.down_trans[0](features[0]) for idx in range(0,len(features)-1): features[idx+1]=torch.cat([self.downsamples[idx](features[idx]),features[idx+1]],dim=1) features[idx+1]=self.down_trans[idx+1](features[idx+1]) return features class Mish(nn.Module): def __init__(self): super(Mish, self).__init__() def forward(self,x): return x*torch.tanh(F.softplus(x)) class AddCoords(nn.Module): def __init__(self,with_r=False): super(AddCoords, self).__init__() self.with_r=with_r def forward(self,input_tensor): N,C,H,W=input_tensor.size() h_channel=torch.arange(H).repeat(1,W,1) w_channel=torch.arange(W).repeat(1,H,1).transpose(1,2) h_channel=h_channel.float()/(H-1) w_channel=w_channel.float()/(W-1) h_channel=h_channel*2-1 w_channel=w_channel*2-1 h_channel=h_channel.repeat(N,1,1,1).transpose(2,3) w_channel=w_channel.repeat(N,1,1,1).transpose(2,3) out_tensor=torch.cat([input_tensor,h_channel.type_as(input_tensor),w_channel.type_as(input_tensor)],dim=1) if self.with_r: r=torch.sqrt(torch.pow(h_channel.type_as(input_tensor)-0.5,2)+torch.pow(w_channel.type_as(input_tensor)-0.5,2)) out_tensor=torch.cat([out_tensor,r],dim=1) return out_tensor class CoordConv(nn.Module): def __init__(self,with_r,inchannel,outchannel,kernel_size): super(CoordConv, self).__init__() self.addcoord=AddCoords(with_r) inchannel+=2 if with_r: inchannel+=1 self.conv=nn.Sequential( nn.Conv2d(inchannel,outchannel,kernel_size,1,kernel_size//2,bias=False), nn.BatchNorm2d(outchannel), nn.LeakyReLU(0.1,inplace=True) ) def forward(self,x): x=self.addcoord(x) x=self.conv(x) return x def box_area(boxes:torch.Tensor)->torch.Tensor: return (boxes[:,2]-boxes[:,0])*(boxes[:,3]-boxes[:,1]) def box_iou(boxes1:torch.Tensor,boxes2:torch.Tensor)->torch.Tensor: area1=box_area(boxes1) area2=box_area(boxes2) lt=torch.max(boxes1[:,None,:2],boxes2[:,:2]) rb=torch.min(boxes1[:,None,2:],boxes2[:,2:]) wh=(rb-lt).clamp(min=0) inter=wh[:,:,0]*wh[:,:,1] iou=inter/(area1[:,None]+area2-inter) return iou def soft_nms(boxes:torch.Tensor,scores:torch.Tensor,soft_thre,iou_thre,weight_method,sigma): keep=[] idxs = scores.argsort() while idxs.numel()>0: idxs = scores.argsort() if idxs.size(0)==1: keep.append(idxs[-1]) break keep_len=len(keep) max_score_idx=idxs[-(keep_len+1)] max_score_box=boxes[max_score_idx][None,:] idxs=idxs[:-(keep_len+1)] other_boxes=boxes[idxs] keep.append(max_score_idx) ious=box_iou(max_score_box,other_boxes) if weight_method=="linear": thre_bool=ious[0]>=iou_thre thre_idxs=idxs[thre_bool] scores[thre_idxs]*=(1.-ious[0][thre_bool]) elif weight_method=="gauss": scores[idxs]*=torch.exp(-(ious[0]*ious[0])/sigma) keep=idxs.new(keep) keep=keep[scores[keep]>soft_thre] boxes=boxes[keep] scores=scores[keep] return boxes,scores
PP-YOLO: An Effective and Efficient Implementation of Object Detector
原文:https://www.cnblogs.com/shuimobanchengyan/p/15240042.html