# Operation | New/Shared memory | Still in computation graph |tensor.clone() # | New | Yes |tensor.detach() # | Shared | No |tensor.detach.clone()() # | New | No |
torch.nonzero(tensor) # index of non-zero elements torch.nonzero(tensor==0) # index of zero elements torch.nonzero(tensor).size(0) # number of non-zero elements torch.nonzero(tensor == 0).size(0) # number of zero elements
判断两个张量相等
torch.allclose(tensor1, tensor2) # float tensor torch.equal(tensor1, tensor2) # int tensor
张量扩展
# Expand tensor of shape 64*512 to shape 64*512*7*7. tensor = torch.rand(64,512) torch.reshape(tensor, (64, 512, 1, 1)).expand(64, 512, 7, 7)
# Common practise for initialization. for layer in model.modules(): if isinstance(layer, torch.nn.Conv2d): torch.nn.init.kaiming_normal_(layer.weight, mode='fan_out', nonlinearity='relu') if layer.bias is not None: torch.nn.init.constant_(layer.bias, val=0.0) elif isinstance(layer, torch.nn.BatchNorm2d): torch.nn.init.constant_(layer.weight, val=1.0) torch.nn.init.constant_(layer.bias, val=0.0) elif isinstance(layer, torch.nn.Linear): torch.nn.init.xavier_normal_(layer.weight) if layer.bias is not None: torch.nn.init.constant_(layer.bias, val=0.0)
# Initialization with given tensor. layer.weight = torch.nn.Parameter(tensor)
# 取模型中的前两层 new_model = nn.Sequential(*list(model.children())[:2] # 如果希望提取出模型中的所有卷积层,可以像下面这样操作: for layer in model.named_modules(): if isinstance(layer[1],nn.Conv2d): conv_model.add_module(layer[0],layer[1])
# model_new代表新的模型 # model_saved代表其他模型,比如用torch.load导入的已保存的模型 model_new_dict = model_new.state_dict() model_common_dict = {k:v for k, v in model_saved.items() if k in model_new_dict.keys()} model_new_dict.update(model_common_dict) model_new.load_state_dict(model_new_dict)
数据处理
计算数据集的均值和标准差
import os import cv2 import numpy as np from torch.utils.data import Dataset from PIL import Image
# Loss and optimizer criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
# Train the model total_step = len(train_loader) for epoch in range(num_epochs): for i ,(images, labels) in enumerate(train_loader): images = images.to(device) labels = labels.to(device)
# Forward pass outputs = model(images) loss = criterion(outputs, labels)
# Backward and optimizer optimizer.zero_grad() loss.backward() optimizer.step()
def _smooth_label(self, target, length, smooth_factor): """convert targets to one-hot format, and smooth them. Args: target: target in form with [label1, label2, label_batchsize] length: length of one-hot format(number of classes) smooth_factor: smooth factor for label smooth
Returns: smoothed labels in one hot format """ one_hot = self._one_hot(target, length, value=1 - smooth_factor) one_hot += smooth_factor / (length - 1)
return one_hot.to(target.device)
def forward(self, x, target):
if x.size(0) != target.size(0): raise ValueError('Expected input batchsize ({}) to match target batch_size({})' .format(x.size(0), target.size(0)))
if x.dim() raise ValueError('Expected input tensor to have least 2 dimensions(got {})' .format(x.size(0)))
if x.dim() != 2: raise ValueError('Only 2 dimension tensor are implemented, (got {})' .format(x.size()))
smoothed_target = self._smooth_label(target, x.size(1), self.e) x = self.log_softmax(x) loss = torch.sum(- x * smoothed_target, dim=1)
else: raise ValueError('unrecognized option, expect reduction to be one of none, mean, sum')
或者直接在训练文件里做label smoothing
for images, labels in train_loader: images, labels = images.cuda(), labels.cuda() N = labels.size(0) # C is the number of classes. smoothed_labels = torch.full(size=(N, C), fill_value=0.1 / (C - 1)).cuda() smoothed_labels.scatter_(dim=1, index=torch.unsqueeze(labels, dim=1), value=0.9)
score = model(images) log_prob = torch.nn.functional.log_softmax(score, dim=1) loss = -torch.sum(log_prob * smoothed_labels) / N optimizer.zero_grad() loss.backward() optimizer.step()
Mixup训练
beta_distribution = torch.distributions.beta.Beta(alpha, alpha) for images, labels in train_loader: images, labels = images.cuda(), labels.cuda()
# Reduce learning rate when validation accuarcy plateau. scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', patience=5, verbose=True) for t in range(0, 80): train(...) val(...) scheduler.step(val_acc)
# Cosine annealing learning rate. scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=80) # Reduce learning rate by 10 at given epochs. scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[50, 70], gamma=0.1) for t in range(0, 80): scheduler.step() train(...) val(...)
# Learning rate warmup by 10 epochs. scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda t: t / 10) for t in range(0, 10): scheduler.step() train(...) val(...)