提取前num_class个类别

In [33]:
import os
num_class=100 #使用的汉字种类
def class_txt(root,out_path,num_class=20):
    dirs=os.listdir(root)
    if not num_class:
        num_class=len(dirs)
    if not os.path.exists(out_path):
        f=open(out_path,'w')
        f.close()
    with open(out_path,'r+') as f:
        try:
            end = int(f.readlines()[-1].split('/')[-2]) + 1
        except:
            end = 0
        if end < num_class - 1:
            dirs.sort()
            dirs = dirs[end:num_class]
            for dir in dirs:
                files = os.listdir(os.path.join(root, dir))
                # print(os.path.join(root,dir)+'/'+files[0])
                for file in files:
                    f.write(os.path.join(root, dir)+'/'+file + '\n')

#训练集和测试集的地址
train_path='./../data/train/'
test_path='./../data/test/'

class_txt(train_path,'train_class_txt.txt',num_class=num_class)
class_txt(test_path,'test_class_txt.txt',num_class=num_class)

In [34]:
import torch
dev = torch.device(
    "cuda") if torch.cuda.is_available() else torch.device("cpu")
# dev=torch.device('cpu')
print(dev)

cuda


通过上一步的目录，构建数据集

In [35]:
import matplotlib.pyplot as plt
from torch.utils.data import Dataset,DataLoader
from PIL import Image
class MyDataset(Dataset):
    def __init__(self, txt_path, num_class=20, transforms=None):
        super(MyDataset, self).__init__()
        images = [] # 存储图片路径
        labels = [] # 存储类别名，在本例中是数字
        # 打开上一步生成的txt文件
        with open(txt_path, 'r') as f:
            for line in f:
                # print(line)
                if int(line.split('/')[-2]) >= num_class:  # 只读取前 num_class 个类
                    break
                line = line.strip('\n')
                images.append(line)
                labels.append(int(line.split('/')[-2]))
        self.images = images
        self.labels = labels
        self.transforms = transforms # 图片需要进行的变换，ToTensor()等等

    def __getitem__(self, index):
        image = Image.open(self.images[index]).convert('RGB') # 用PIL.Image读取图像
        label = self.labels[index]
        if self.transforms is not None:
            image = self.transforms(image) # 进行变换
        # plt.imshow(image)
        image=image.float().to(dev)
        label=torch.tensor(label).to(dev)
        return image, label

    def __len__(self):
        return len(self.labels)

图片处理，并做出dataset和dataloader

In [36]:
from torchvision import transforms
trans=transforms.Compose([
                          transforms.PILToTensor(),
                          transforms.Grayscale(1),
                          transforms.Resize((56,56)),
                          transforms.Pad(4,255),
                          transforms.ColorJitter(contrast=(0,255)),
                          ])
train_dataset=MyDataset('train_class_txt.txt',num_class,transforms=trans)
test_dataset=MyDataset('test_class_txt.txt',num_class,transforms=trans)

batch_size=128
train_dl=DataLoader(train_dataset,batch_size=batch_size,shuffle=True)
test_dl=DataLoader(test_dataset,batch_size=2*batch_size)

神经网络

In [37]:
import torch.nn as nn
import torch.nn.functional as F
class block(nn.Module):
    def __init__(self,in_channel,outchannel) -> None:
        super().__init__()
        self.cov=nn.Conv2d(in_channel,outchannel,3,padding=1)
        self.BN=nn.BatchNorm2d(outchannel)
        self.pool=nn.MaxPool2d(2,2)
    
    def forward(self,x):
        x=self.cov(x)
        x=self.BN(x)
        x=F.relu(x)
        x=self.pool(x)
        return x

class VGG(nn.Module):
    def __init__(self) -> None:
        super().__init__()
        self.cov=nn.Sequential(
            block(1,64),
            block(64,128),
            block(128,256),
            block(256,512)
        )
        self.classifier=nn.Sequential(
            nn.Linear(512*4*4,1024),
            nn.ReLU(),
            nn.Linear(1024,512),
            nn.ReLU(),
            nn.Linear(512,num_class)
        )
    
    def forward(self,x):
        x=self.cov(x)
        x=x.view(-1,512*4*4)
        x=self.classifier(x)
        return x

模型，优化器，损失函数

In [38]:
model=VGG().to(dev)
opt=torch.optim.SGD(model.parameters(),lr=0.01)
loss_func=F.cross_entropy

训练过程

In [39]:
epochs=50
for epoch in range(epochs):
    model.train()
    for xb,yb in train_dl:
        pred=model(xb).to(dev)
        loss=loss_func(pred,yb)

        loss.backward()
        opt.step()
        opt.zero_grad()

    model.eval()
    with torch.no_grad():
        test_loss=sum(loss_func(model(xb),yb) for xb,yb in test_dl)
    
    print(epoch,test_loss/len(test_dl))


0 tensor(3.3825, device='cuda:0')
1 tensor(1.4348, device='cuda:0')
2 tensor(0.8740, device='cuda:0')
3 tensor(0.7558, device='cuda:0')
4 tensor(0.5363, device='cuda:0')
5 tensor(0.5264, device='cuda:0')
6 tensor(0.7382, device='cuda:0')
7 tensor(0.4795, device='cuda:0')
8 tensor(0.5460, device='cuda:0')
9 tensor(0.3940, device='cuda:0')
10 tensor(0.3965, device='cuda:0')
11 tensor(0.6228, device='cuda:0')
12 tensor(0.3312, device='cuda:0')
13 tensor(0.3194, device='cuda:0')
14 tensor(0.3215, device='cuda:0')
15 tensor(0.3852, device='cuda:0')
16 tensor(0.4272, device='cuda:0')
17 tensor(0.4473, device='cuda:0')
18 tensor(0.4242, device='cuda:0')
19 tensor(0.3004, device='cuda:0')
20 tensor(0.3867, device='cuda:0')
21 tensor(0.3377, device='cuda:0')
22 tensor(0.3000, device='cuda:0')
23 tensor(0.4512, device='cuda:0')
24 tensor(0.3190, device='cuda:0')
25 tensor(0.5520, device='cuda:0')
26 tensor(0.2648, device='cuda:0')
27 tensor(0.2692, device='cuda:0')
28 tensor(0.3685, device='cuda

In [40]:
import numpy as np
with torch.no_grad():
    print(sum([np.argmax(model(torch.unsqueeze(xb,0)).cpu())==yb for xb,yb in test_dataset]),len(test_dataset))

tensor(5393, device='cuda:0') 5845
