作业说明:CNN分类问题

根据实物照片进行分类,照片的命名规则是“类别_编号.jpg”,类别也是用数字表示,共有11类,Bread, Dairy product, Dessert, Egg, Fried food, Meat, Noodles/Pasta, Rice, Seafood, Soup, and Vegetable/Fruit;其实具体类别是什么我们可以不用管,只需要训练的时候输入数据,输出类别即可。
training 以及 validation 中的照片名名称格式為“类别_编号.jpg”,例如 3_100.jpg 即为类别 3 的照片(编号不重要)testing 中的照片名稱格式為 [编号].jpg
数据集需要提前下载下来,解压即可,范例中是从谷歌云下载的,需要翻墙,比较麻烦
参考:李宏毅机器学习第三次作业源码

导入需要的包

cv2需要额外安装,直接输入命令pip install opencv-python即可完成

1
2
3
4
5
6
7
8
9
10
# Import需要的套件
import os
import numpy as np
import cv2
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import pandas as pd
from torch.utils.data import DataLoader, Dataset
import time

读入数据

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
#读取文件的函数
def readfile(path, label):
#path为路径
#label是一个布尔值,判断是否需要传回y的值,label=true的时候需要返回y的值,此时是训练过程;label=false的时候,不需要返回y的值,此时是测试过程
#图片是128*128的RGB图片,用三维数组保存
image_dir = sorted(os.listdir(path))
x = np.zeros((len(image_dir), 128, 128, 3), dtype=np.uint8)
y = np.zeros((len(image_dir)), dtype=np.uint8)
for i, file in enumerate(image_dir):
img = cv2.imread(os.path.join(path, file))
x[i, :, :] = cv2.resize(img,(128, 128))
if label:
#此处返回的是类别
y[i] = int(file.split("_")[0])
if label:
return x, y
else:
return x
1
2
3
4
5
6
7
8
workspace_dir = './food-11'
print("Reading data")
train_x, train_y = readfile(os.path.join(workspace_dir, "training"), True)
print("Size of training data = {}".format(len(train_x)))
val_x, val_y = readfile(os.path.join(workspace_dir, "validation"), True)
print("Size of validation data = {}".format(len(val_x)))
test_x = readfile(os.path.join(workspace_dir, "testing"), False)
print("Size of Testing data = {}".format(len(test_x)))
Reading data
Size of training data = 9866
Size of validation data = 3430
Size of Testing data = 3347

数据集

在 Pytorch 中,我們可以利用 torch.utils.data 的 Dataset 及 DataLoader 來”包裝” data,使後續的 training 及 testing 更為方便

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
#training时需要做数据增强
train_transform = transforms.Compose([
transforms.ToPILImage(),
transforms.RandomHorizontalFlip(), #随机将图片水平翻转
transforms.RandomRotation(15), #随机旋转图片
transforms.ToTensor(), #将图片反转成tensor
])
#testing时不需要做数据增强
test_transform = transforms.Compose([
transforms.ToPILImage(),
transforms.ToTensor(),
])
class ImgDataset(Dataset):
def __init__(self, x, y=None, transform=None):
self.x = x
# label is required to be a LongTensor
self.y = y
if y is not None:
self.y = torch.LongTensor(y)
self.transform = transform
def __len__(self):
return len(self.x)
def __getitem__(self, index):
X = self.x[index]
if self.transform is not None:
X = self.transform(X)
if self.y is not None:
Y = self.y[index]
return X, Y
else:
return X

分批次训练,可以加快训练的速度

1
2
3
4
5
batch_size = 128
train_set = ImgDataset(train_x, train_y, train_transform)
val_set = ImgDataset(val_x, val_y, test_transform)
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_set, batch_size=batch_size, shuffle=False)

模型构建

BatchNorm2d对数据进行归一化处理
激活函数为ReLu

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
class Classifier(nn.Module):
def __init__(self):
super(Classifier, self).__init__()
#torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding)
#torch.nn.MaxPool2d(kernel_size, stride, padding)
#input 維度 [3, 128, 128]
self.cnn = nn.Sequential(
nn.Conv2d(3, 64, 3, 1, 1), # [64, 128, 128]
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(2, 2, 0), # [64, 64, 64]

nn.Conv2d(64, 128, 3, 1, 1), # [128, 64, 64]
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2, 0), # [128, 32, 32]

nn.Conv2d(128, 256, 3, 1, 1), # [256, 32, 32]
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2, 0), # [256, 16, 16]

nn.Conv2d(256, 512, 3, 1, 1), # [512, 16, 16]
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2, 0), # [512, 8, 8]

nn.Conv2d(512, 512, 3, 1, 1), # [512, 8, 8]
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2, 0), # [512, 4, 4]
)
self.fc = nn.Sequential(
nn.Linear(512*4*4, 1024),
nn.ReLU(),
nn.Linear(1024, 512),
nn.ReLU(),
nn.Linear(512, 11)
)

def forward(self, x):
out = self.cnn(x)
out = out.view(out.size()[0], -1)
return self.fc(out)

训练模型

使用trainingset训练,使用validationset寻找好的的参数

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
model = Classifier().cuda()
loss = nn.CrossEntropyLoss() # 因為是 classification task,所以 loss 使用 CrossEntropyLoss
optimizer = torch.optim.Adam(model.parameters(), lr=0.001) # optimizer 使用 Adam.lr为超参数,可以修改
num_epoch = 30

for epoch in range(num_epoch):
epoch_start_time = time.time()
train_acc = 0.0
train_loss = 0.0
val_acc = 0.0
val_loss = 0.0

model.train() # 確保 model 是在 train model (開啟 Dropout 等...)
for i, data in enumerate(train_loader):
optimizer.zero_grad() # 用 optimizer 將 model 參數的 gradient 歸零
train_pred = model(data[0].cuda()) # 利用 model 得到預測的機率分佈 這邊實際上就是去呼叫 model 的 forward 函數
batch_loss = loss(train_pred, data[1].cuda()) # 計算 loss (注意 prediction 跟 label 必須同時在 CPU 或是 GPU 上)
batch_loss.backward() # 利用 back propagation 算出每個參數的 gradient
optimizer.step() # 以 optimizer 用 gradient 更新參數值

train_acc += np.sum(np.argmax(train_pred.cpu().data.numpy(), axis=1) == data[1].numpy())
train_loss += batch_loss.item()

model.eval()
with torch.no_grad():
for i, data in enumerate(val_loader):
val_pred = model(data[0].cuda())
batch_loss = loss(val_pred, data[1].cuda())

val_acc += np.sum(np.argmax(val_pred.cpu().data.numpy(), axis=1) == data[1].numpy())
val_loss += batch_loss.item()

#將結果 print 出來
print('[%03d/%03d] %2.2f sec(s) Train Acc: %3.6f Loss: %3.6f | Val Acc: %3.6f loss: %3.6f' % \
(epoch + 1, num_epoch, time.time()-epoch_start_time, \
train_acc/train_set.__len__(), train_loss/train_set.__len__(), val_acc/val_set.__len__(), val_loss/val_set.__len__()))
[001/030] 14.86 sec(s) Train Acc: 0.227549 Loss: 0.018395 | Val Acc: 0.243440 loss: 0.016740
[002/030] 14.73 sec(s) Train Acc: 0.340158 Loss: 0.014800 | Val Acc: 0.334111 loss: 0.016127
[003/030] 14.75 sec(s) Train Acc: 0.381512 Loss: 0.013810 | Val Acc: 0.313994 loss: 0.015460
[004/030] 14.77 sec(s) Train Acc: 0.431482 Loss: 0.012794 | Val Acc: 0.317493 loss: 0.015771
[005/030] 14.83 sec(s) Train Acc: 0.467160 Loss: 0.012125 | Val Acc: 0.441691 loss: 0.013532
[006/030] 14.82 sec(s) Train Acc: 0.501824 Loss: 0.011313 | Val Acc: 0.190962 loss: 0.029743
[007/030] 14.82 sec(s) Train Acc: 0.530002 Loss: 0.010675 | Val Acc: 0.351603 loss: 0.018797
[008/030] 14.82 sec(s) Train Acc: 0.550882 Loss: 0.010232 | Val Acc: 0.527697 loss: 0.010849
[009/030] 14.80 sec(s) Train Acc: 0.573383 Loss: 0.009685 | Val Acc: 0.407872 loss: 0.014792
[010/030] 14.84 sec(s) Train Acc: 0.582911 Loss: 0.009314 | Val Acc: 0.359767 loss: 0.019833
[011/030] 14.86 sec(s) Train Acc: 0.617981 Loss: 0.008696 | Val Acc: 0.557434 loss: 0.011074
[012/030] 14.84 sec(s) Train Acc: 0.625481 Loss: 0.008536 | Val Acc: 0.522449 loss: 0.011264
[013/030] 14.86 sec(s) Train Acc: 0.657004 Loss: 0.007838 | Val Acc: 0.580175 loss: 0.010005
[014/030] 14.80 sec(s) Train Acc: 0.674133 Loss: 0.007390 | Val Acc: 0.582799 loss: 0.009706
[015/030] 14.86 sec(s) Train Acc: 0.686600 Loss: 0.007146 | Val Acc: 0.541108 loss: 0.011018
[016/030] 14.83 sec(s) Train Acc: 0.696331 Loss: 0.006872 | Val Acc: 0.569971 loss: 0.010966
[017/030] 14.88 sec(s) Train Acc: 0.716197 Loss: 0.006412 | Val Acc: 0.595335 loss: 0.010484
[018/030] 14.88 sec(s) Train Acc: 0.719136 Loss: 0.006439 | Val Acc: 0.632945 loss: 0.009088
[019/030] 14.94 sec(s) Train Acc: 0.727042 Loss: 0.006230 | Val Acc: 0.532653 loss: 0.012839
[020/030] 14.90 sec(s) Train Acc: 0.736063 Loss: 0.005996 | Val Acc: 0.605831 loss: 0.010434
[021/030] 14.85 sec(s) Train Acc: 0.757754 Loss: 0.005454 | Val Acc: 0.578717 loss: 0.011531
[022/030] 14.88 sec(s) Train Acc: 0.766471 Loss: 0.005285 | Val Acc: 0.632070 loss: 0.009517
[023/030] 14.88 sec(s) Train Acc: 0.784715 Loss: 0.004788 | Val Acc: 0.662391 loss: 0.008359
[024/030] 14.83 sec(s) Train Acc: 0.804987 Loss: 0.004413 | Val Acc: 0.550729 loss: 0.013303
[025/030] 14.85 sec(s) Train Acc: 0.813602 Loss: 0.004159 | Val Acc: 0.634985 loss: 0.010451
[026/030] 14.85 sec(s) Train Acc: 0.821711 Loss: 0.004053 | Val Acc: 0.467055 loss: 0.017780
[027/030] 14.93 sec(s) Train Acc: 0.828198 Loss: 0.003934 | Val Acc: 0.579592 loss: 0.013445
[028/030] 14.85 sec(s) Train Acc: 0.823535 Loss: 0.004012 | Val Acc: 0.616910 loss: 0.011728
[029/030] 14.90 sec(s) Train Acc: 0.832455 Loss: 0.003813 | Val Acc: 0.628863 loss: 0.010894
[030/030] 14.83 sec(s) Train Acc: 0.837726 Loss: 0.003696 | Val Acc: 0.679300 loss: 0.009366
1
2
3
4
train_val_x = np.concatenate((train_x, val_x), axis=0)
train_val_y = np.concatenate((train_y, val_y), axis=0)
train_val_set = ImgDataset(train_val_x, train_val_y, train_transform)
train_val_loader = DataLoader(train_val_set, batch_size=batch_size, shuffle=True)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
model_best = Classifier().cuda()
loss = nn.CrossEntropyLoss() # 因為是 classification task,所以 loss 使用 CrossEntropyLoss
optimizer = torch.optim.Adam(model_best.parameters(), lr=0.001) # optimizer 使用 Adam
num_epoch = 30

for epoch in range(num_epoch):
epoch_start_time = time.time()
train_acc = 0.0
train_loss = 0.0

model_best.train()
for i, data in enumerate(train_val_loader):
optimizer.zero_grad()
train_pred = model_best(data[0].cuda())
batch_loss = loss(train_pred, data[1].cuda())
batch_loss.backward()
optimizer.step()

train_acc += np.sum(np.argmax(train_pred.cpu().data.numpy(), axis=1) == data[1].numpy())
train_loss += batch_loss.item()

#將結果 print 出來
print('[%03d/%03d] %2.2f sec(s) Train Acc: %3.6f Loss: %3.6f' % \
(epoch + 1, num_epoch, time.time()-epoch_start_time, \
train_acc/train_val_set.__len__(), train_loss/train_val_set.__len__()))
[001/030] 16.94 sec(s) Train Acc: 0.253234 Loss: 0.016887
[002/030] 16.98 sec(s) Train Acc: 0.377708 Loss: 0.013877
[003/030] 16.97 sec(s) Train Acc: 0.443442 Loss: 0.012409
[004/030] 17.08 sec(s) Train Acc: 0.496916 Loss: 0.011322
[005/030] 17.02 sec(s) Train Acc: 0.539486 Loss: 0.010247
[006/030] 16.99 sec(s) Train Acc: 0.578896 Loss: 0.009436
[007/030] 17.08 sec(s) Train Acc: 0.608303 Loss: 0.008864
[008/030] 17.06 sec(s) Train Acc: 0.630039 Loss: 0.008328
[009/030] 17.02 sec(s) Train Acc: 0.656965 Loss: 0.007704
[010/030] 17.14 sec(s) Train Acc: 0.677873 Loss: 0.007283
[011/030] 17.18 sec(s) Train Acc: 0.699158 Loss: 0.006727
[012/030] 17.19 sec(s) Train Acc: 0.717735 Loss: 0.006340
[013/030] 17.19 sec(s) Train Acc: 0.745262 Loss: 0.005799
[014/030] 17.17 sec(s) Train Acc: 0.758574 Loss: 0.005436
[015/030] 17.14 sec(s) Train Acc: 0.765569 Loss: 0.005278
[016/030] 17.09 sec(s) Train Acc: 0.777828 Loss: 0.004947
[017/030] 17.18 sec(s) Train Acc: 0.795427 Loss: 0.004533
[018/030] 17.12 sec(s) Train Acc: 0.799188 Loss: 0.004513
[019/030] 17.12 sec(s) Train Acc: 0.825887 Loss: 0.003908
[020/030] 17.22 sec(s) Train Acc: 0.831980 Loss: 0.003822
[021/030] 17.12 sec(s) Train Acc: 0.840177 Loss: 0.003572
[022/030] 17.10 sec(s) Train Acc: 0.859582 Loss: 0.003178
[023/030] 17.21 sec(s) Train Acc: 0.868758 Loss: 0.002964
[024/030] 17.14 sec(s) Train Acc: 0.875150 Loss: 0.002801
[025/030] 17.16 sec(s) Train Acc: 0.888613 Loss: 0.002458
[026/030] 17.10 sec(s) Train Acc: 0.892449 Loss: 0.002349
[027/030] 17.05 sec(s) Train Acc: 0.905836 Loss: 0.002034
[028/030] 17.08 sec(s) Train Acc: 0.917043 Loss: 0.001871
[029/030] 17.18 sec(s) Train Acc: 0.914786 Loss: 0.001894
[030/030] 17.16 sec(s) Train Acc: 0.916291 Loss: 0.001863

测试

1
2
test_set = ImgDataset(test_x, transform=test_transform)
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False)
1
2
3
4
5
6
7
8
model_best.eval()
prediction = []
with torch.no_grad():
for i, data in enumerate(test_loader):
test_pred = model_best(data.cuda())
test_label = np.argmax(test_pred.cpu().data.numpy(), axis=1)
for y in test_label:
prediction.append(y)
1
2
3
4
5
with open("predict.csv", 'w') as f:
f.write('Id,Category\n')
for i, y in enumerate(prediction):
#print(i, y)
f.write('{},{}\n'.format(i, y))
1