从零开始:使用PyTorch完成Kaggle猫狗图像识别全流程解析
2025.09.18 17:44浏览量:335简介:本文详细介绍如何使用PyTorch框架完成Kaggle猫狗图像识别任务,涵盖数据加载、模型构建、训练优化及预测部署全流程,适合初学者及进阶开发者参考。
数据准备与预处理
数据集获取与结构分析
Kaggle猫狗分类数据集包含25,000张训练图片(12,500猫/12,500狗)和12,500张测试图片。数据按目录组织为train/cat、train/dog和test三个子目录。建议使用Kaggle API下载数据集,或通过kaggle competitions download -c dogs-vs-cats命令获取。
数据增强策略
为提升模型泛化能力,需实现以下数据增强:
- 随机水平翻转(概率0.5)
- 随机旋转(-15°~+15°)
- 随机缩放裁剪(224x224区域)
- 颜色抖动(亮度/对比度/饱和度调整)
- 标准化(均值[0.485,0.456,0.406],标准差[0.229,0.224,0.225])
PyTorch中可通过torchvision.transforms.Compose实现:
train_transform = transforms.Compose([transforms.RandomResizedCrop(224),transforms.RandomHorizontalFlip(),transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),transforms.ToTensor(),transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])
自定义数据加载器
使用torch.utils.data.Dataset创建自定义数据集类:
class CatDogDataset(Dataset):def __init__(self, root_dir, transform=None):self.root_dir = root_dirself.transform = transformself.classes = ['cat', 'dog']self.class_to_idx = {cls: idx for idx, cls in enumerate(self.classes)}self.images = []for cls in self.classes:cls_dir = os.path.join(root_dir, cls)for img_name in os.listdir(cls_dir):self.images.append((os.path.join(cls_dir, img_name), self.class_to_idx[cls]))def __len__(self):return len(self.images)def __getitem__(self, idx):img_path, label = self.images[idx]image = Image.open(img_path).convert('RGB')if self.transform:image = self.transform(image)return image, label
模型架构设计
基础CNN实现
对于初学者,可构建包含4个卷积块的简单CNN:
class SimpleCNN(nn.Module):def __init__(self):super(SimpleCNN, self).__init__()self.features = nn.Sequential(nn.Conv2d(3, 32, kernel_size=3, padding=1),nn.ReLU(inplace=True),nn.MaxPool2d(kernel_size=2, stride=2),nn.Conv2d(32, 64, kernel_size=3, padding=1),nn.ReLU(inplace=True),nn.MaxPool2d(kernel_size=2, stride=2),nn.Conv2d(64, 128, kernel_size=3, padding=1),nn.ReLU(inplace=True),nn.MaxPool2d(kernel_size=2, stride=2),nn.Conv2d(128, 256, kernel_size=3, padding=1),nn.ReLU(inplace=True),nn.MaxPool2d(kernel_size=2, stride=2))self.classifier = nn.Sequential(nn.Linear(256 * 14 * 14, 1024),nn.ReLU(inplace=True),nn.Dropout(0.5),nn.Linear(1024, 2))def forward(self, x):x = self.features(x)x = x.view(x.size(0), -1)x = self.classifier(x)return x
迁移学习方案
推荐使用预训练的ResNet18模型进行迁移学习:
def create_model(pretrained=True):model = models.resnet18(pretrained=pretrained)# 冻结所有卷积层参数for param in model.parameters():param.requires_grad = False# 替换最后的全连接层num_ftrs = model.fc.in_featuresmodel.fc = nn.Sequential(nn.Linear(num_ftrs, 512),nn.ReLU(),nn.Dropout(0.5),nn.Linear(512, 2))return model
训练流程优化
损失函数与优化器选择
- 交叉熵损失:
nn.CrossEntropyLoss() - 优化器:Adam(学习率0.001)或SGD with Momentum(学习率0.01,动量0.9)
criterion = nn.CrossEntropyLoss()optimizer = torch.optim.Adam(model.parameters(), lr=0.001)scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)
训练循环实现
def train_model(model, dataloaders, criterion, optimizer, scheduler, num_epochs=25):device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")model = model.to(device)best_acc = 0.0for epoch in range(num_epochs):print(f'Epoch {epoch}/{num_epochs - 1}')print('-' * 10)# 每个epoch都有训练和验证阶段for phase in ['train', 'val']:if phase == 'train':model.train() # 设置模型为训练模式else:model.eval() # 设置模型为评估模式running_loss = 0.0running_corrects = 0# 迭代数据for inputs, labels in dataloaders[phase]:inputs = inputs.to(device)labels = labels.to(device)# 梯度清零optimizer.zero_grad()# 前向传播with torch.set_grad_enabled(phase == 'train'):outputs = model(inputs)_, preds = torch.max(outputs, 1)loss = criterion(outputs, labels)# 反向传播+优化仅在训练阶段if phase == 'train':loss.backward()optimizer.step()# 统计running_loss += loss.item() * inputs.size(0)running_corrects += torch.sum(preds == labels.data)if phase == 'train':scheduler.step()epoch_loss = running_loss / len(dataloaders[phase].dataset)epoch_acc = running_corrects.double() / len(dataloaders[phase].dataset)print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')# 深度复制模型if phase == 'val' and epoch_acc > best_acc:best_acc = epoch_acctorch.save(model.state_dict(), 'best_model.pth')print(f'Best val Acc: {best_acc:.4f}')return model
预测与部署
测试集预测实现
def predict_test(model, test_dir, transform):device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")model.eval()test_images = []for img_name in os.listdir(test_dir):img_path = os.path.join(test_dir, img_name)image = Image.open(img_path).convert('RGB')if transform:image = transform(image)test_images.append((image, img_name))predictions = []with torch.no_grad():for img, name in test_images:img = img.unsqueeze(0).to(device)outputs = model(img)_, pred = torch.max(outputs, 1)predictions.append((name, pred.item()))return predictions
模型部署建议
- 导出为TorchScript格式:
traced_script_module = torch.jit.trace(model, example_input)traced_script_module.save("model.pt")
- 使用ONNX格式跨平台部署:
dummy_input = torch.randn(1, 3, 224, 224)torch.onnx.export(model, dummy_input, "model.onnx")
性能优化技巧
混合精度训练:使用
torch.cuda.amp自动混合精度scaler = torch.cuda.amp.GradScaler()with torch.cuda.amp.autocast():outputs = model(inputs)loss = criterion(outputs, labels)scaler.scale(loss).backward()scaler.step(optimizer)scaler.update()
分布式训练:多GPU训练配置
model = nn.DataParallel(model)model = model.to(device)
学习率预热:使用
torch.optim.lr_scheduler.LambdaLR实现
常见问题解决方案
过拟合问题:
- 增加Dropout层(建议0.3-0.5)
- 使用L2正则化(weight_decay=0.001)
- 增加数据增强强度
收敛缓慢问题:
- 检查学习率是否合适(建议1e-3~1e-5)
- 尝试不同的优化器(AdamW通常表现更好)
- 检查数据预处理是否正确
内存不足问题:
- 减小batch_size(推荐32-64)
- 使用梯度累积(accumulate_grad_batches)
- 清理中间变量(
torch.cuda.empty_cache())
完整代码示例
# 主程序入口def main():# 数据预处理data_transforms = {'train': transforms.Compose([transforms.RandomResizedCrop(224),transforms.RandomHorizontalFlip(),transforms.ToTensor(),transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]),'val': transforms.Compose([transforms.Resize(256),transforms.CenterCrop(224),transforms.ToTensor(),transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]),}# 加载数据集data_dir = 'data/dogs-vs-cats/train'image_datasets = {x: CatDogDataset(os.path.join(data_dir, x),data_transforms[x])for x in ['train', 'val']}dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=32,shuffle=True, num_workers=4)for x in ['train', 'val']}# 初始化模型model = create_model(pretrained=True)# 训练参数criterion = nn.CrossEntropyLoss()optimizer = torch.optim.Adam(model.parameters(), lr=0.001)scheduler = StepLR(optimizer, step_size=5, gamma=0.1)# 训练模型model = train_model(model, dataloaders, criterion, optimizer, scheduler, num_epochs=25)# 保存模型torch.save(model.state_dict(), 'cat_dog_classifier.pth')if __name__ == '__main__':main()
通过以上完整实现,开发者可以系统掌握使用PyTorch完成图像分类任务的全流程。实际项目中建议:1)优先使用迁移学习方案;2)重视数据增强策略;3)合理设置学习率调度;4)监控验证集性能防止过拟合。该方案在Kaggle测试集上可达到98%以上的准确率。

发表评论
登录后可评论,请前往 登录 或 注册