关于计算图的内容,可以参考[1]
pytorch
中的tensor
会有一个requires_grad
属性,当设定为True
时则标记为追踪计算并计算梯度,可以通过tensor.requires_grad_(False)
来改变标记,默认为False
。也可以使用tensor.detach_()
从计算图中去除,detach()
方法不会改变自身属性。也可以使用with torch.no_grad():
这样的上下文管理器,对当期计算图不进行求导计算,这个方法可以使用在模型评估上。
tensor
中有一个.grad
属性,当计算完成,对计算结果使用.backward()
方法时,requires_grad
为True
的变量当前的梯度信息都会累加到.grad
属性。
如果计算结果为向量,那么backward
方法还需要传入求导的方向参数。
自定义网络需要继承自nn.Module
模块,并实现forward
函数。网络搭建完成后,可以训练的参数都存放在parameters
属性中,parameters
是一个生成器,按照网络结构顺序排列。
在确定optimizer和criterion的时候,可以用backward()
和optimizer.step()
来更新一步参数。
需要注意的是,nn.Module
模块仅支持batch的输入形式。
custom atuo grad函数需要继承torch.autograd.Function
,并实现forward
和backward
两个方法。
custom dataset需要实现__len__
和__getitem__
两个方法,custom dataloader需要实现__len__
和__iter__
两个方法。
这里复现了一个用以图像分割的FCN
(忘记从哪里找的了,找到了会加入参考资料里)。
# Custom dataset
from PIL import Image
from torchvision import transforms as T
from torch.utils.data import Dataset
from glob import glob
import os
import numpy as np
import matplotlib.pyplot as plt
class CustomDataset(Dataset):
def __init__(self, image_path = "data/BagImages", mode = "train"):
assert mode in ("train", "val", "test")
self.image_path = image_path
self.image_list = glob(os.path.join(self.image_path, "*.jpg"))
self.mode = mode
if mode in ("train", "val"):
self.mask_path = self.image_path + "Masks"
self.transform_x = T.Compose([T.Resize((256, 256)), T.ToTensor(), T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]) # transform
self.transform_mask = T.Compose([ T.ToTensor()])
def __getitem__(self, index):
if self.mode in ("train", "val"):
image_name = self.image_list[index].split("/")[-1].split(".")[0]
X = Image.open(self.image_list[index])
mask = np.array(Image.open(os.path.join(self.mask_path, image_name+".jpg")).convert(‘1‘).resize((256, 256)))
masks = np.zeros((mask.shape[0], mask.shape[1], 2), dtype=np.uint8)
masks[:, :, 0] = mask
masks[:, :, 1] = ~mask
X = self.transform_x(X)
masks = self.transform_mask(masks) * 255
return X, masks
else:
X = Image.open(self.image_list[index])
X = self.transform_x(X)
path = self.image_list[index]
return X, path
def __len__(self):
return len(self.image_list)
# Network
import torch.nn as nn
from model.vgg import VGG
import torch
class FCN32s(nn.Module):
def __init__(self, num_classes, backbone="vgg"):
super(FCN32s, self).__init__()
self.num_classes = num_classes
if backbone == "vgg":
self.features = VGG()
# deconv1 1/16
self.deconv1 = nn.ConvTranspose2d(512, 512, kernel_size=3, stride=2, padding=1, output_padding=1)
self.bn1 = nn.BatchNorm2d(512)
self.relu1 = nn.ReLU()
# deconv1 1/8
self.deconv2 = nn.ConvTranspose2d(512, 256, kernel_size=3, stride=2, padding=1, output_padding=1)
self.bn2 = nn.BatchNorm2d(256)
self.relu2 = nn.ReLU()
# deconv1 1/4
self.deconv3 = nn.ConvTranspose2d(256, 128, kernel_size=3, stride=2, padding=1, output_padding=1)
self.bn3 = nn.BatchNorm2d(128)
self.relu3 = nn.ReLU()
# deconv1 1/2
self.deconv4 = nn.ConvTranspose2d(128, 64, kernel_size=3, stride=2, padding=1, output_padding=1)
self.bn4 = nn.BatchNorm2d(64)
self.relu4 = nn.ReLU()
# deconv1 1/1
self.deconv5 = nn.ConvTranspose2d(64, 32, kernel_size=3, stride=2, padding=1, output_padding=1)
self.bn5 = nn.BatchNorm2d(32)
self.relu5 = nn.ReLU()
self.classifier = nn.Conv2d(32, num_classes, kernel_size=1)
def forward(self, x):
features = self.features(x)
y = self.bn1(self.relu1(self.deconv1(features[4])))
y = self.bn2(self.relu2(self.deconv2(y)))
y = self.bn3(self.relu3(self.deconv3(y)))
y = self.bn4(self.relu4(self.deconv4(y)))
y = self.bn5(self.relu5(self.deconv5(y)))
y = self.classifier(y)
return y
# train
CUDA = torch.cuda.is_available()
def train(**kwargs):
mymodel = kwargs["mymodel"]
criterion = kwargs["criterion"]
data_loader = kwargs["data_loader"]
optimizer = kwargs["optimizer"]
epoch = kwargs["epoch"]
save_freq = kwargs["save_freq"]
save_dir = kwargs["save_dir"]
verbose = kwargs["verbose"]
start_time = time.time()
logging.info("Epoch %03d, Learning Rate %g" % (epoch + 1, optimizer.param_groups[0]["lr"]))
mymodel.train()
epoch_loss = 0.0
batches = 0
for i, sample in enumerate(data_loader):
image, target = sample
if CUDA:
image = image.cuda()
target = target.cuda()
optimizer.zero_grad()
output = mymodel(image)
loss = criterion(output, target)
loss.backward()
optimizer.step()
epoch_loss += loss.item()
batches += 1
if (i + 1) % verbose == 0:
logging.info(‘Training Loss: %.6f‘ % epoch_loss / batches)
logging.info(‘‘)
# save checkpoint model
if epoch % save_freq == 0:
state_dict = mymodel.module.state_dict()
for key in state_dict.keys():
state_dict[key] = state_dict[key].cpu()
torch.save({
‘epoch‘: epoch,
‘save_dir‘: save_dir,
‘state_dict‘: state_dict,},
os.path.join(save_dir, ‘%03d.ckpt‘ % (epoch + 1)))
end_time = time.time()
logging.info(‘Batch Loss: %.6f Time: %d s‘ % (epoch_loss / batches, end_time - start_time))
# main
def main(hyper_parameter=hyper_parameter):
# training
start_epoch = 0
mymodel = FCNs(hyper_parameter["num_classes"], hyper_parameter["back_bone"])
if hyper_parameter["ckpt"]:
ckpt = hyper_parameter["ckpt"]
if hyper_parameter["initial_training"] == 0:
epoch_name = (ckpt.split(‘/‘)[-1]).split(‘.‘)[0]
start_epoch = int(epoch_name)
checkpoint = torch.load(ckpt)
state_dict = checkpoint["state_dict"]
mymodel.load_state_dict(state_dict)
logging.info(f‘Model loaded from {hyper_parameter["ckpt"]}‘)
save_dir = hyper_parameter["save_dir"]
if not os.path.exists(save_dir):
os.mkdir(save_dir)
# CUDA
if CUDA:
mymodel.to(torch.device("cuda"))
mymodel = nn.DataParallel(mymodel)
custom_dataset = CustomDataset()
test_set = CustomDataset("data/testImages", mode="test")
train_size = int(0.9 * len(custom_dataset))
val_size = len(custom_dataset) - train_size
train_set, val_set = random_split(custom_dataset, [train_size, val_size])
train_loader = DataLoader(train_set, batch_size=hyper_parameter["batch_size"], shuffle=True)
val_loader = DataLoader(val_set, batch_size=hyper_parameter["batch_size"], shuffle=False)
test_loader = DataLoader(test_set, batch_size=hyper_parameter["batch_size"], shuffle=False)
if hyper_parameter["mode"] == "test":
test(mymodel=mymodel,
data_loader=test_loader)
return
optimizer = torch.optim.Adam(mymodel.parameters(), lr=hyper_parameter["lr"])
criterion = nn.BCEWithLogitsLoss()
logging.info(‘Start training: Total epochs: {}, Batch size: {}, Training size: {}, Validation size: {}‘.
format(hyper_parameter["epochs"], hyper_parameter["batch_size"], len(train_set), len(val_set)))
for epoch in tqdm(range(start_epoch, hyper_parameter["epochs"])):
train( epoch=epoch,
data_loader=train_loader,
mymodel=mymodel,
criterion=criterion,
optimizer=optimizer,
save_freq=hyper_parameter["save_freq"],
save_dir=hyper_parameter["save_dir"],
verbose=hyper_parameter["verbose"])
validate(data_loader=val_loader,
mymodel=mymodel,
criterion=criterion,
verbose=hyper_parameter["verbose"])
# scheduler.step()
原文:https://www.cnblogs.com/DemonHunter/p/12815556.html