实验要求
1、读取UC遥感数据
2、调整第八次实验课搭建的CNN网络，使其能够用于UC数据分类
3、测试CNN网络分类效果
4、搭建一个VGG网络（VGG结构见课堂PPT），用于分类UC数据，对比与3层卷积CNN网络的分类性能差异。

## 读取UC数据

In [2]:
import torch
from torch import nn
import numpy as np
import os
import cv2

In [12]:
a=os.listdir('./UCMerced_LandUse/validation/')
b=os.listdir('./UCMerced_LandUse/train/')
print(a)
print(b)

['agricultural', 'airplane', 'baseballdiamond', 'beach', 'buildings', 'chaparral', 'denseresidential', 'forest', 'freeway']
['agricultural', 'airplane', 'baseballdiamond', 'beach', 'buildings', 'chaparral', 'denseresidential', 'forest', 'freeway']


In [9]:
def read_UC(path):
    # 初始化变量
    X = np.zeros((1,3,256,256))
    Y = []
    # 补充 #
    lbcnt=0
    for fp in os.listdir(path):
        for j in os.listdir(path+fp):
            tmp=cv2.imread(path+fp+"/"+j)
            tmp=cv2.resize(tmp,(256,256))
            tmp=np.transpose(tmp,(2,0,1))
            tmp=np.reshape(tmp,(1,3,256,256))
            
            X=np.concatenate((X,tmp))
            Y.append(lbcnt)
        lbcnt+=1
    X=X[1:]
    X=X.astype(np.float32)
    Y=np.array(Y)
    return X,Y

# 读取训练集和测试集数据
[train_img, train_lb] = read_UC('./UCMerced_LandUse/train/')
[test_img, test_lb] = read_UC('./UCMerced_LandUse/validation/')
print(train_img.shape)
print(test_img.shape)

# 将所有数据归一化到0-1之间
train_img =train_img/255.
test_img   =test_img/255.

# 对标签进行热编码
train_lb = np.eye(9)[train_lb]
test_lb = np.eye(9)[test_lb]

index = np.random.permutation(train_img.shape[0])
train_img = train_img[index]
train_lb = train_lb[index]
index = np.random.permutation(test_img.shape[0])
test_img = test_img[index]
test_lb = test_lb[index]

# 打印查看数据集格式
print('训练集图像格式为:', train_img.shape, '训练集标签格式为:', train_lb.shape,'热编码训练集标签格式为:', train_lb.shape)
print('测试集图像格式为:', test_img.shape, '测试集标签格式为:', test_lb.shape,'热编码测试集标签格式为:', test_lb.shape)

(720, 3, 256, 256)
(180, 3, 256, 256)
训练集图像格式为: (720, 3, 256, 256) 训练集标签格式为: (720, 9) 热编码训练集标签格式为: (720, 9)
测试集图像格式为: (180, 3, 256, 256) 测试集标签格式为: (180, 9) 热编码测试集标签格式为: (180, 9)


In [11]:
np.savez("dataset",train_img,train_lb,test_img,test_lb)
print("saved")


[[0. 0. 1. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 1. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 1.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 1. 0.]]


In [1]:
import torch
from torch import nn
import numpy as np
import os
import cv2
tmp=np.load("dataset.npz")
train_img=tmp["arr_0"]
train_lb=tmp["arr_1"]
test_img=tmp["arr_2"]
test_lb=tmp["arr_3"]
print('训练集图像格式为:', train_img.shape, '训练集标签格式为:', train_lb.shape,'热编码训练集标签格式为:', train_lb.shape)
print('测试集图像格式为:', test_img.shape, '测试集标签格式为:', test_lb.shape,'热编码测试集标签格式为:', test_lb.shape)

训练集图像格式为: (720, 3, 256, 256) 训练集标签格式为: (720, 9) 热编码训练集标签格式为: (720, 9)
测试集图像格式为: (180, 3, 256, 256) 测试集标签格式为: (180, 9) 热编码测试集标签格式为: (180, 9)


## 数据增强处理

In [2]:

from torchvision.transforms import v2
from torchvision.io import read_image
train_img1=torch.tensor(train_img, dtype=torch.float32)
transforms = v2.Compose([
    v2.RandomHorizontalFlip(p=0.5),
    #v2.Normalize(mean=[0.485,0.485,0.485], std=[0.229,0.229,0.229]),
    v2.RandomRotation(degrees=(0, 180))
    ])
normal= v2.Compose([v2.Normalize(mean=[0.485,0.485,0.485], std=[0.229,0.229,0.229])])
augmented_sample = transforms(train_img1)
#train_img1= normal(train_img1)
#test_img = normal(test_img)
train_img = torch.cat((augmented_sample,train_img1))
train_lb  = np.concatenate((train_lb,train_lb),axis=0)

print('训练集图像格式为:', train_img.shape, '训练集标签格式为:', train_lb.shape,'热编码训练集标签格式为:', train_lb.shape)

训练集图像格式为: torch.Size([1440, 3, 256, 256]) 训练集标签格式为: (1440, 9) 热编码训练集标签格式为: (1440, 9)


使用增强处理后的数据，训练网络

## TRAIN

In [17]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1=nn.Conv2d(in_channels=3,out_channels=10, kernel_size=16,stride=2)
        self.conv2=nn.Conv2d(in_channels=10,out_channels=20,kernel_size=15,stride=2)
        self.conv3=nn.Conv2d(in_channels=20,out_channels=10,kernel_size=10,stride=2)
        self.w1 =nn.Linear(23*23*10,100)
        self.w2 =nn.Linear(100,9)
        self.BN1=nn.BatchNorm2d(10)
        self.BN2=nn.BatchNorm2d(20)
        self.BN3=nn.BatchNorm2d(10)
        self.relu=nn.ReLU()
        self.drop=nn.Dropout(p=0.5)

    def forward(self, x):
        x = self.conv1 (x)
        x = self.BN1(x)
        x = self.relu(x)
        x = self.conv2 (x)
        x = self.BN2(x)
        x = self.relu(x)
        x = self.conv3(x)
        x = self.BN3(x)
        x = self.relu(x)
        x = self.drop(x)
        x = x.view(x.size(0), -1)
        x = self.w1 (x)
        x = self.relu(x)
        x = self.drop(x)
        x = self.w2 (x)
        x = self.relu(x)
        
        return x

# Initialize the loss function
gpu=1
model = NeuralNetwork()
loss_fn = nn.CrossEntropyLoss()
learning_rate = 5e-3
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

if gpu:
    if torch.cuda.is_available():
        model= model.cuda()
        loss_fn=loss_fn.cuda()
    else:
        gpu=0

batch_size = 200
epochs = 100
batch_num=int(train_img.shape[0]/batch_size)
size = len(train_img)

model.train()
for t in range(epochs):
    
    correct=0.
    train_mean_loss=0.

    for batch in range(batch_num):
        X=train_img[batch*batch_size:(batch+1)*batch_size,]
        y=train_lb[batch*batch_size:(batch+1)*batch_size,:]
        X=torch.tensor(X, dtype=torch.float32)
        y=torch.tensor(y, dtype=torch.float32)
        if gpu:
            X=X.cuda()
            y=y.cuda()
        # Compute prediction and loss
        pred = model(X)
        loss = loss_fn(pred, y)

        # Backpropagation
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        correct += (pred.argmax(1) == y.argmax(1)).type(torch.float).mean().item()
        train_mean_loss+= loss.item()

    train_mean_loss /= batch_num
    correct /= batch_num
    
    print(f" Epoch:{t+1}, loss: {train_mean_loss:>8f},  Accuracy: {(100*correct):>0.1f}%")
    if (t+1)%10==0:
        torch.save(model,str(t+1)+".pt")

  X=torch.tensor(X, dtype=torch.float32)


 Epoch:1, loss: 2.199222,  Accuracy: 12.3%
 Epoch:2, loss: 2.163816,  Accuracy: 14.9%
 Epoch:3, loss: 2.126990,  Accuracy: 17.9%
 Epoch:4, loss: 2.045382,  Accuracy: 22.7%
 Epoch:5, loss: 1.967567,  Accuracy: 25.5%
 Epoch:6, loss: 1.878051,  Accuracy: 28.2%
 Epoch:7, loss: 1.888043,  Accuracy: 28.1%
 Epoch:8, loss: 1.852163,  Accuracy: 30.6%
 Epoch:9, loss: 1.835782,  Accuracy: 34.0%
 Epoch:10, loss: 1.752299,  Accuracy: 34.1%
 Epoch:11, loss: 1.703156,  Accuracy: 39.4%
 Epoch:12, loss: 1.681556,  Accuracy: 38.2%
 Epoch:13, loss: 1.625558,  Accuracy: 41.6%
 Epoch:14, loss: 1.574025,  Accuracy: 43.2%
 Epoch:15, loss: 1.668702,  Accuracy: 42.0%
 Epoch:16, loss: 1.585002,  Accuracy: 42.5%
 Epoch:17, loss: 1.529841,  Accuracy: 43.7%
 Epoch:18, loss: 1.489644,  Accuracy: 46.9%
 Epoch:19, loss: 1.442913,  Accuracy: 48.8%
 Epoch:20, loss: 1.415444,  Accuracy: 51.8%
 Epoch:21, loss: 1.373322,  Accuracy: 52.6%
 Epoch:22, loss: 1.311752,  Accuracy: 55.7%
 Epoch:23, loss: 1.273688,  Accuracy: 55.

## EVAL

In [22]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1=nn.Conv2d(in_channels=3,out_channels=10, kernel_size=16,stride=2)
        self.conv2=nn.Conv2d(in_channels=10,out_channels=20,kernel_size=15,stride=2)
        self.conv3=nn.Conv2d(in_channels=20,out_channels=10,kernel_size=10,stride=2)
        self.w1 =nn.Linear(23*23*10,100)
        self.w2 =nn.Linear(100,9)
        self.BN1=nn.BatchNorm2d(10)
        self.BN2=nn.BatchNorm2d(20)
        self.BN3=nn.BatchNorm2d(10)
        self.relu=nn.ReLU()

    def forward(self, x):
        x = self.conv1 (x)
        x = self.BN1(x)
        x = self.relu(x)
        x = self.conv2 (x)
        x = self.BN2(x)
        x = self.relu(x)
        x = self.conv3 (x)
        x = self.BN3(x)
        x = self.relu(x)
        x = x.view(x.size(0), -1)
        x = self.w1 (x)
        x = self.relu(x)
        x = self.w2 (x)
        x = self.relu(x)        
        return x
model=NeuralNetwork()
model=torch.load("100.pt")
gpu=0
if torch.cuda.is_available():
    gpu=1
    model=model.cuda()
model.eval()
test_loss, correct = 0, 0
with torch.no_grad():
    X=torch.tensor(test_img, dtype=torch.float32)
    y=torch.tensor(test_lb, dtype=torch.float32)
    if gpu:
        X=X.cuda()
        y=y.cuda()
    pred = model(X)
    test_loss = np.mean(loss_fn(pred, y).item())
    correct = (pred.argmax(1) == y.argmax(1)).type(torch.float).mean().item()

print(f"Test Accuracy: {(100*correct):>0.1f}%, Test Avg loss: {test_loss:>8f} \n")


Test Accuracy: 57.2%, Test Avg loss: 1.723464 



## VGGNet

In [1]:
import torch
from torch import nn
import numpy as np
import os
from torchvision.transforms import v2

batch_size = 64
epochs = 50
gpu=1


def continue_to_train():
    maxx=-1
    for i in os.listdir("./"):
        if ".pt" in i and "vggnet" in i:
            tmp=i.replace("vggnet","")
            tmp=tmp.replace(".pt","")
            tmp=int(tmp)
            maxx=max(tmp,maxx)
    return maxx
def autofit_lr(t):
    if t<99:
        return 0.1-(1e-3)*t
    return 1e-3

tmp=np.load("dataset.npz")
train_img=tmp["arr_0"]
train_lb=tmp["arr_1"]
test_img=tmp["arr_2"]
test_lb=tmp["arr_3"]

train_img1=torch.tensor(train_img, dtype=torch.float32)

transforms = v2.Compose([
    v2.RandomHorizontalFlip(p=0.5),
    #v2.Normalize(mean=[0.485,0.485,0.485], std=[0.229,0.229,0.229]),
    v2.RandomRotation(degrees=(0, 180))
    ])
#normal= v2.Compose([v2.Normalize(mean=[0.485,0.485,0.485], std=[0.229,0.229,0.229])])
augmented_sample = transforms(train_img1)
#test_img = normal(test_img)
#train_img1=normal(train_img1)
train_img = torch.cat((augmented_sample,train_img1))
train_lb  = np.concatenate((train_lb,train_lb),axis=0)



class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Sequential(nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, stride=1, padding=1),
                                   nn.ReLU(True),
                                   nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=1),
                                   nn.ReLU(True),
                                   nn.MaxPool2d(2, stride=2))
        self.conv2 = nn.Sequential(nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1),
                                   nn.ReLU(True),
                                   nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, stride=1, padding=1),
                                   nn.ReLU(True),
                                   nn.MaxPool2d(2, stride=2))
        self.conv3 = nn.Sequential(nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=1),
                                   nn.ReLU(True),
                                   nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1),
                                   nn.ReLU(True),
                                   nn.MaxPool2d(2, stride=2))
        self.conv4 = nn.Sequential(nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, stride=1, padding=1),
                                   nn.ReLU(True),
                                   nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1),
                                   nn.ReLU(True),
                                   nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1),
                                   nn.ReLU(True),
                                   nn.MaxPool2d(2, stride=2))
        self.conv5 = nn.Sequential(nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1),
                                   nn.ReLU(True),
                                   nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1),
                                   nn.ReLU(True),
                                   nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1),
                                   nn.ReLU(True),
                                   nn.MaxPool2d(2, stride=2))
        self.lin = nn.Sequential(nn.Linear(32768, 4096),
                                 nn.ReLU(True),
                                 nn.Dropout(p=0.5),
                                 nn.Linear(4096, 4096),
                                 nn.ReLU(True),
                                 nn.Dropout(p=0.5),
                                 nn.Linear(4096, 9))

    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.conv4(x)
        x = self.conv5(x)
        x = x.view(x.size(0), -1)
        x = self.lin(x)
        return x
    def init(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                torch.nn.init.xavier_uniform_(m.weight)
            if isinstance(m, nn.Linear):
                torch.nn.init.xavier_uniform_(m.weight)
# Initialize the loss function
model = NeuralNetwork()
tmp=continue_to_train()
t=0
if tmp != -1:
    model=torch.load("vggnet"+str(tmp)+".pt")
    print("load success")
    print("train from epoch "+str(tmp))
    t=tmp
else:
    model.init()
loss_fn = nn.CrossEntropyLoss()
learning_rate = 1e-4
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate,weight_decay=5e-4)

if gpu:
    if not torch.cuda.is_available():
        gpu = 0

if gpu:
    model = model.cuda()
    loss_fn = loss_fn.cuda()


batch_num = int(train_img.shape[0] / batch_size)
size = len(train_img)

model.train()
while 1:
    if t>epochs:
        break

    correct = 0.
    train_mean_loss = 0.

    for batch in range(batch_num):
        X = train_img[batch * batch_size:(batch + 1) * batch_size, ]
        y = train_lb[batch * batch_size:(batch + 1) * batch_size, :]
        X = torch.tensor(X, dtype=torch.float32)
        y = torch.tensor(y, dtype=torch.float32)
        if gpu:
            X = X.cuda()
            y = y.cuda()
        # Compute prediction and loss
        pred = model(X)
        loss = loss_fn(pred, y)

        # Backpropagation
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        correct += (pred.argmax(1) == y.argmax(1)).type(torch.float).mean().item()
        train_mean_loss += loss.item()

    train_mean_loss /= batch_num
    correct /= batch_num

    print(f" Epoch:{t + 1}, loss: {train_mean_loss:>8f},  Accuracy: {(100 * correct):>0.1f}%")
    if (t + 1) % 10 == 0:
        print("saving")
        torch.save(model, "vggnet"+str(t + 1) + ".pt")
        print("save complete")
    t+=1


load success
train from epoch 20


  X = torch.tensor(X, dtype=torch.float32)


 Epoch:21, loss: 0.500593,  Accuracy: 85.2%
 Epoch:22, loss: 0.307095,  Accuracy: 90.3%
 Epoch:23, loss: 0.182059,  Accuracy: 93.8%
 Epoch:24, loss: 0.116370,  Accuracy: 95.9%
 Epoch:25, loss: 0.127077,  Accuracy: 96.1%
 Epoch:26, loss: 0.117656,  Accuracy: 95.7%
 Epoch:27, loss: 0.166254,  Accuracy: 93.9%
 Epoch:28, loss: 0.130747,  Accuracy: 95.5%
 Epoch:29, loss: 0.108476,  Accuracy: 96.6%
 Epoch:30, loss: 0.149163,  Accuracy: 94.8%
saving
save complete
 Epoch:31, loss: 0.129268,  Accuracy: 96.1%
 Epoch:32, loss: 0.060697,  Accuracy: 98.2%
 Epoch:33, loss: 0.025975,  Accuracy: 99.4%
 Epoch:34, loss: 0.020746,  Accuracy: 99.1%
 Epoch:35, loss: 0.036502,  Accuracy: 98.9%
 Epoch:36, loss: 0.022289,  Accuracy: 99.1%
 Epoch:37, loss: 0.049174,  Accuracy: 98.6%
 Epoch:38, loss: 0.055279,  Accuracy: 98.2%
 Epoch:39, loss: 0.082887,  Accuracy: 97.2%
 Epoch:40, loss: 0.055619,  Accuracy: 98.6%
saving
save complete
 Epoch:41, loss: 0.012355,  Accuracy: 99.7%
 Epoch:42, loss: 0.032001,  Accura

## EVAL VGGNET

In [6]:
import torch
from torch import nn
import numpy as np
import os
from torchvision.transforms import v2

batch_size = 100
epochs = 10000
gpu=1


def continue_to_train():
    maxx=-1
    for i in os.listdir("./"):
        if ".pt" in i:
            tmp=i.replace("vggnet","")
            tmp=tmp.replace(".pt","")
            tmp=int(tmp)
            maxx=max(tmp,maxx)
    return maxx
def autofit_lr(t):
    if t<99:
        return 0.1-(1e-3)*t
    return 1e-3

tmp=np.load("dataset.npz")
train_img=tmp["arr_0"]
train_lb=tmp["arr_1"]
test_img=tmp["arr_2"]
test_lb=tmp["arr_3"]



class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Sequential(nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, stride=1, padding=1),
                                   nn.ReLU(),
                                   nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=1),
                                   nn.ReLU(),
                                   nn.MaxPool2d(2, stride=2))
        self.conv2 = nn.Sequential(nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1),
                                   nn.ReLU(),
                                   nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, stride=1, padding=1),
                                   nn.ReLU(),
                                   nn.MaxPool2d(2, stride=2))
        self.conv3 = nn.Sequential(nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=1),
                                   nn.ReLU(),
                                   nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1),
                                   nn.ReLU(),
                                   nn.MaxPool2d(2, stride=2))
        self.conv4 = nn.Sequential(nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, stride=1, padding=1),
                                   nn.ReLU(),
                                   nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1),
                                   nn.ReLU(),
                                   nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1),
                                   nn.ReLU(),
                                   nn.MaxPool2d(2, stride=2))
        self.conv5 = nn.Sequential(nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1),
                                   nn.ReLU(),
                                   nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1),
                                   nn.ReLU(),
                                   nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1),
                                   nn.ReLU(),
                                   nn.MaxPool2d(2, stride=2))
        self.lin = nn.Sequential(nn.Linear(32768, 4096),
                                 nn.ReLU(),
                                 nn.Dropout(p=0.5),
                                 nn.Linear(4096, 4096),
                                 nn.ReLU(),
                                 nn.Dropout(p=0.5),
                                 nn.Linear(4096, 9))

    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.conv4(x)
        x = self.conv5(x)
        x = x.view(x.size(0), -1)
        x = self.lin(x)
        return x


model = NeuralNetwork()
loss_fn = nn.CrossEntropyLoss()
model = torch.load("./vggnet40.pt")
gpu = 1
if gpu:
    if torch.cuda.is_available():
        model = model.cuda()
        loss_fn = loss_fn.cuda()
    else:
        gpu = 0
model.eval()
test_loss, correct = 0, 0
size=len(test_img)
#size=720
for i in range(size):
    with torch.no_grad():
        X = torch.tensor(test_img[i].reshape(1,3,256,256), dtype=torch.float32)
        y = torch.tensor(test_lb[i].reshape(1,9), dtype=torch.float32)
        if gpu:
            X = X.cuda()
            y = y.cuda()
        pred = model(X)
        test_loss += np.mean(loss_fn(pred, y).item())
        correct += (pred.argmax(1) == y.argmax(1)).type(torch.float).mean().item()
correct/=size
test_loss/=size

print(f"Test Accuracy: {(100 * correct):>0.1f}%, Test Avg loss: {test_loss:>8f} \n")


Test Accuracy: 81.1%, Test Avg loss: 1.168858 

