In [96]:
from tensorflow.keras.datasets import mnist
import torch
from torch import nn 
import numpy as np 
from torch.utils.data import TensorDataset,DataLoader
from sklearn.model_selection import train_test_split
from tqdm import tqdm

In [151]:
train_data,test_data = mnist.load_data()

In [152]:
# 数据集太大，用一小部分试一试
data,label = train_data[0][0:1000],train_data[1][0:1000]
# 考虑到灰度数据用conv2d处理的时候需要增加一个通道维度，需要对数据集进行一个变换
data = np.expand_dims(data,1)

In [153]:
batch_size = 32

In [154]:
# 撸个简单的卷积神经网络
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1,3,3,1,1)
        self.pooling = nn.MaxPool2d(2)
        self.bn = nn.BatchNorm2d(3)
        self.relu = nn.ReLU()
        self.flatten = nn.Flatten()
        self.fc =  nn.Linear(14*14*3,10)
    def forward(self,x):
        x = self.conv1(x)
        x = self.pooling(x)
        x = self.bn(x)
        x = self.relu(x)
        x = self.flatten(x)
        x= self.fc(x)
        return x


In [155]:
# 设置一下Batch_size


batch_size = 32
# 构造数据集
data = torch.Tensor(data)
label = torch.LongTensor(label)
# 划分数据集
x_train,x_test,y_train,y_test = train_test_split(data,label)
x_train = x_train[150:]
y_train = y_train[150:]

x_valid = x_train[0:150]
y_valid = y_train[0:150]

TrainData = TensorDataset(x_train,y_train)
ValidData = TensorDataset(x_valid,y_valid)
TestData = TensorDataset(x_test,y_test)

# 搞个loader
TrainLoader = DataLoader(TrainData,batch_size=batch_size,shuffle=True)
ValidLoader = DataLoader(ValidData,batch_size=batch_size,shuffle=True)
TestLoader = DataLoader(TestData,batch_size=batch_size,shuffle=True)


In [156]:
# 训练(我觉得我有必要写一个Trainer的库，方便之后进行调用，但是这次主要是为了搞wandb)
# 在写主要的训练函数之前，需要先写一个eval函数，用于对验证集和测试集进行评估。
# 再补一个函数，对于分类的数据，计算accuracy,输入为np.array类型的输出值（因此需要在训练过程中进行一个数据转换）
def compute_acc(pred,label):
    return np.equal(pred,label).mean()
    
def eval(model,dataloader,loss_function,device):
    loss = 0
    acc = 0
    model.eval() # 禁用dropout和bn层
    for idx,(data,label) in tqdm(enumerate(dataloader)):
        data = data.to(device)
        label = label.to(device)
        out = model(data)
        # 计算loss值
        loss += loss_function(out,label).item()
        # 计算accuracy
        # argmax这块先留个心眼，我不确定这个axis是不是需要微调
        pred = torch.argmax(out,axis=1)
        pred = pred.cpu().detach().numpy()
        label = label.cpu().detach().numpy()
        acc += compute_acc(pred,label)
    
    avg_loss = loss/ len(dataloader)
    avg_acc = acc / len(dataloader)
    return avg_loss,avg_acc
         
# 现在可以写train函数了   (model需要先放到GPU上才可以)
def train(model,optimizer,loss_function,epoch,trainloader,validloader,testloader,device):
    # 开始训练
    for i in tqdm(range(1,epoch+1)):
        # 进行一个model.train()
        model.train()
        # 对于训练过程中每一个epoch的参数记录
        train_loss = 0
        train_acc = 0
        for idx,(data,label) in tqdm(enumerate(trainloader)):
            data = data.to(device)
            label = label.to(device)
            out = model(data)
            loss = loss_function(out,label)
            
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            # 计算出来损失函数
            train_loss += loss.item()
            # 转到cpu上计算准确率
            pred = torch.argmax(out,axis=1)
            pred = pred.cpu().detach().numpy()
            label = label.cpu().detach().numpy()
            train_acc += compute_acc(pred,label)
        # 一个epoch结束后，计算均值
        train_loss = train_loss / len(trainloader)
        train_acc = train_acc  / len(trainloader)
        # 剩下的是验证集和测试集
        valid_loss,valid_acc = eval(model,validloader,loss_function,device=device)
        test_loss,test_acc = eval(model,testloader,loss_function,device=device)            
        print('epoch:',i)
        print('Train loss',train_loss,'train_acc',train_acc)
        print('Valid loss',valid_loss,'valid_acc',valid_acc)
        print('Test loss',test_loss,'test acc',test_acc)

In [159]:
# 调用cuda,同时设置一些参数
device = torch.device(0)
epoch = 20
model = Net()
optimizer = torch.optim.Adam(model.parameters())
loss_function = nn.CrossEntropyLoss()
model = model.to(device)

In [160]:
# 开始第一次debug
train(model,optimizer,loss_function,epoch,TrainLoader,ValidLoader,TestLoader,device)

19it [00:00, 169.59it/s]0:00<?, ?it/s]
5it [00:00, 555.67it/s]
8it [00:00, 380.98it/s]
  5%|▌         | 1/20 [00:00<00:02,  6.50it/s]

epoch: 1
Train loss 2.09795417911128 train_acc 0.31359649122807015
Valid loss 1.7310303926467896 valid_acc 0.6056818181818182
Test loss 1.778806820511818 test acc 0.5108173076923077


19it [00:00, 169.42it/s]
5it [00:00, 525.56it/s]
8it [00:00, 615.50it/s]
 10%|█         | 2/20 [00:00<00:02,  6.68it/s]

epoch: 2
Train loss 1.477916817916067 train_acc 0.7050438596491229
Valid loss 1.1660857439041137 valid_acc 0.7863636363636364
Test loss 1.2636752426624298 test acc 0.7205528846153846


19it [00:00, 237.07it/s]
5it [00:00, 714.39it/s]
8it [00:00, 800.08it/s]
 15%|█▌        | 3/20 [00:00<00:02,  7.65it/s]

epoch: 3
Train loss 0.9774685439310575 train_acc 0.8004385964912281
Valid loss 0.7643369913101197 valid_acc 0.884659090909091
Test loss 0.9172647669911385 test acc 0.7950721153846154


19it [00:00, 249.75it/s]
5it [00:00, 623.54it/s]
8it [00:00, 724.81it/s]
 20%|██        | 4/20 [00:00<00:01,  8.24it/s]

epoch: 4
Train loss 0.6924863692961241 train_acc 0.8601973684210527
Valid loss 0.5671532034873963 valid_acc 0.9045454545454545
Test loss 0.7444128766655922 test acc 0.8179086538461539


19it [00:00, 254.77it/s]
5it [00:00, 625.18it/s]
8it [00:00, 799.89it/s]
 25%|██▌       | 5/20 [00:00<00:01,  8.73it/s]

epoch: 5
Train loss 0.5305022173806241 train_acc 0.8700657894736842
Valid loss 0.42557480931282043 valid_acc 0.934659090909091
Test loss 0.6368088200688362 test acc 0.8575721153846154


19it [00:00, 273.14it/s]
5it [00:00, 830.52it/s]
8it [00:00, 798.61it/s]


epoch: 6
Train loss 0.4247501821894395 train_acc 0.9002192982456141
Valid loss 0.35903558135032654 valid_acc 0.934659090909091
Test loss 0.581682376563549 test acc 0.8587740384615384


19it [00:00, 239.83it/s]
5it [00:00, 997.55it/s]
8it [00:00, 614.43it/s]
 35%|███▌      | 7/20 [00:00<00:01,  9.27it/s]

epoch: 7
Train loss 0.35156616803846863 train_acc 0.9155701754385964
Valid loss 0.3020439982414246 valid_acc 0.947159090909091
Test loss 0.5383906736969948 test acc 0.8780048076923077


19it [00:00, 285.12it/s]
5it [00:00, 831.02it/s]
8it [00:00, 800.00it/s]


epoch: 8
Train loss 0.30090986898070887 train_acc 0.9331140350877194
Valid loss 0.24809558391571046 valid_acc 0.96875
Test loss 0.5097426995635033 test acc 0.8740985576923077


19it [00:00, 296.85it/s]
5it [00:00, 625.03it/s]
8it [00:00, 727.22it/s]
 45%|████▌     | 9/20 [00:01<00:01,  9.86it/s]

epoch: 9
Train loss 0.2585523316734715 train_acc 0.9380482456140351
Valid loss 0.2167094886302948 valid_acc 0.953409090909091
Test loss 0.5063803605735302 test acc 0.8722956730769231


19it [00:00, 271.32it/s]
5it [00:00, 800.78it/s]
8it [00:00, 1142.71it/s]


epoch: 10
Train loss 0.2236514577740117 train_acc 0.9583333333333333
Valid loss 0.18981288075447084 valid_acc 0.978409090909091
Test loss 0.4906037747859955 test acc 0.8677884615384616


19it [00:00, 311.47it/s]
5it [00:00, 832.24it/s]
8it [00:00, 704.88it/s]
 55%|█████▌    | 11/20 [00:01<00:00, 10.26it/s]

epoch: 11
Train loss 0.1913856805155152 train_acc 0.96875
Valid loss 0.15125632286071777 valid_acc 0.99375
Test loss 0.46745049208402634 test acc 0.8897235576923077


19it [00:00, 279.34it/s]
5it [00:00, 999.98it/s]
8it [00:00, 997.31it/s]


epoch: 12
Train loss 0.16732415831402728 train_acc 0.975328947368421
Valid loss 0.1345185309648514 valid_acc 0.99375
Test loss 0.4606877751648426 test acc 0.8888221153846154


19it [00:00, 254.98it/s]
5it [00:00, 416.45it/s]
8it [00:00, 694.62it/s]
 65%|██████▌   | 13/20 [00:01<00:00, 10.20it/s]

epoch: 13
Train loss 0.14749037082258024 train_acc 0.9846491228070174
Valid loss 0.11494898051023483 valid_acc 0.99375
Test loss 0.4608079567551613 test acc 0.8849158653846154


19it [00:00, 275.71it/s]
5it [00:00, 830.72it/s]
8it [00:00, 666.69it/s]


epoch: 14
Train loss 0.12919698420323825 train_acc 0.9868421052631579
Valid loss 0.10325817316770554 valid_acc 0.990909090909091
Test loss 0.451061999425292 test acc 0.8888221153846154


19it [00:00, 301.59it/s]
5it [00:00, 833.39it/s]
8it [00:00, 666.29it/s]
 75%|███████▌  | 15/20 [00:01<00:00, 10.35it/s]

epoch: 15
Train loss 0.11675233315480382 train_acc 0.9868421052631579
Valid loss 0.08611958250403404 valid_acc 0.99375
Test loss 0.45040670968592167 test acc 0.8966346153846154


19it [00:00, 294.92it/s]
5it [00:00, 833.39it/s]
8it [00:00, 1000.22it/s]


epoch: 16
Train loss 0.10422442284853835 train_acc 0.9901315789473685
Valid loss 0.08201652914285659 valid_acc 1.0
Test loss 0.47118449583649635 test acc 0.8852163461538461


19it [00:00, 289.98it/s]
5it [00:00, 999.98it/s]
8it [00:00, 799.89it/s]
 85%|████████▌ | 17/20 [00:01<00:00, 10.59it/s]

epoch: 17
Train loss 0.09066050676138777 train_acc 0.9945175438596491
Valid loss 0.07204568386077881 valid_acc 1.0
Test loss 0.4426756836473942 test acc 0.8975360576923077


19it [00:00, 281.29it/s]
5it [00:00, 623.69it/s]
8it [00:00, 1000.10it/s]


epoch: 18
Train loss 0.08182467186921522 train_acc 0.9983552631578947
Valid loss 0.06482832953333854 valid_acc 1.0
Test loss 0.4498145915567875 test acc 0.8957331730769231


19it [00:00, 233.04it/s]
5it [00:00, 554.49it/s]
8it [00:00, 999.80it/s]
 95%|█████████▌| 19/20 [00:01<00:00, 10.39it/s]

epoch: 19
Train loss 0.07475087252494536 train_acc 0.9983552631578947
Valid loss 0.05722342878580093 valid_acc 1.0
Test loss 0.4502667561173439 test acc 0.9014423076923077


19it [00:00, 275.19it/s]
5it [00:00, 833.29it/s]
8it [00:00, 1000.40it/s]
100%|██████████| 20/20 [00:02<00:00,  9.78it/s]

epoch: 20
Train loss 0.06720493439781039 train_acc 1.0
Valid loss 0.05300777927041054 valid_acc 1.0
Test loss 0.4521252065896988 test acc 0.8987379807692307





In [1]:
from __future__ import print_function
import argparse
import random  # to set the python random seed
import numpy  # to set the numpy random seed
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
# Ignore excessive warnings
import logging

logging.propagate = False
logging.getLogger().setLevel(logging.ERROR)

# WandB – Import the wandb library
import wandb


In [161]:
import torchvision

In [164]:
temp = torchvision.datasets.mnist

In [165]:
temp.

<module 'torchvision.datasets.mnist' from 'd:\\anaconda3\\envs\\pytorch\\lib\\site-packages\\torchvision\\datasets\\mnist.py'>

In [2]:
!wandb login

wandb: Currently logged in as: zhijiao. Use `wandb login --relogin` to force relogin


In [7]:
# 定义一个简单的卷积神经网络

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        
        self.conv1 = nn.Conv2d(3, 6, kernel_size=5)
        self.conv2 = nn.Conv2d(6, 16, kernel_size=5)

        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        # 卷积 -> 池化 -> ReLU
        x = F.relu(F.max_pool2d(self.conv1(x), 2))
        x = F.relu(F.max_pool2d(self.conv2(x), 2))
        
        # flatten
        
        x = x.view(-1, 16 * 5 * 5)

        # fc1 -> ReLU -> fc2 -> ReLU -> fc3
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        
        # softmax
        return F.log_softmax(x, dim=1)


In [None]:
def train(config, model, device, train_loader, optimizer, epoch):
    # switch model to training mode. This is necessary for layers like dropout, batchNorm etc.
    # which behave differently in training and evaluation mode.
    model.train()

    # we loop over the data iterator, and feed the inputs to the network and adjust the weights.
    for batch_id, (data, target) in enumerate(train_loader):
        if batch_id > 20:
            break
        # Loop the input features and labels from the training dataset.
        data, target = data.to(device), target.to(device)

        # Reset the gradients to 0 for all learnable weight parameters
        optimizer.zero_grad()

        # Forward pass: Pass image data from training dataset, make predictions
        # about class image belongs to (0-9 in this case).
        output = model(data)

        # Define our loss function, and compute the loss
        loss = F.nll_loss(output, target)

        # Backward pass:compute the gradients of loss,the model's parameters
        loss.backward()

        # update the neural network weights
        optimizer.step()

In [9]:
# wandb.log用来记录一些日志(accuracy,loss and epoch), 便于随时查看网路的性能
def test(args, model, device, test_loader, classes):
    model.eval()
    # switch model to evaluation mode.
    # This is necessary for layers like dropout, batchNorm etc. which behave differently in training and evaluation mode
    test_loss = 0
    correct = 0
    example_images = []

    with torch.no_grad():
        for data, target in test_loader:
            # Load the input features and labels from the test dataset
            data, target = data.to(device), target.to(device)

            # Make predictions: Pass image data from test dataset,
            # make predictions about class image belongs to(0-9 in this case)
            output = model(data)

            # Compute the loss sum up batch loss
            test_loss += F.nll_loss(output, target, reduction='sum').item()

            # Get the index of the max log-probability
            pred = output.max(1, keepdim=True)[1]
            correct += pred.eq(target.view_as(pred)).sum().item()

            # Log images in your test dataset automatically,
            # along with predicted and true labels by passing pytorch tensors with image data into wandb.
            example_images.append(wandb.Image(
                data[0], caption="Pred:{} Truth:{}".format(classes[pred[0].item()], classes[target[0]])))

   # wandb.log(a_dict) logs the keys and values of the dictionary passed in and associates the values with a step.
   # You can log anything by passing it to wandb.log(),
   # including histograms, custom matplotlib objects, images, video, text, tables, html, pointclounds and other 3D objects.
   # Here we use it to log test accuracy, loss and some test images (along with their true and predicted labels).
    wandb.log({
        "Examples": example_images,
        "Test Accuracy": 100. * correct / len(test_loader.dataset),
        "Test Loss": test_loss
    })
