# number detection project

## mnist_number_detection.py file 
- 1. load the train and test dataset 
- 2. use the torch library to reschedule the dataset into the batch size 
- 3. initialize the model
- 4. define the loss fucntion  
- 5. define the optimizer 
- 6. train the model 

## model.py 
- 1. initalize the layers we will use in the model 
- 2. write the forward pass 

## test.py
- 1. load the model 
- 2. input the test dataset and then analysis the currectness

In [None]:
import numpy as np
import torch 
import torch.utils
import torch.utils.data
import torchvision #store the common dataset used in the computer vision

import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import torch.optim as optim
import model
import torch.nn.functional as F

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,),(0.3081,)) # normalize the image, the numbers are mean and sd
])
batch_size_train = 64
batch_size_test = 1000
learning_rate = 0.001
dataset_dir = "/home/yuzhen/Desktop/ml_learn/number_detection_project/dataset"
model_save_dir = "/home/yuzhen/Desktop/ml_learn/number_detection_project/md.pth"
train_dataset = torchvision.datasets.MNIST(dataset_dir, train = True, download= True, transform=transform)
train_loader = torch.utils.data.DataLoader (train_dataset,batch_size = batch_size_train, shuffle= True)

test_dataset = torchvision.datasets.MNIST(dataset_dir, train=False, download=True, transform=transform)
test_loader = torch.utils.data.DataLoader(test_dataset,batch_size=batch_size_test,shuffle=True)


##push the data to the GPU------------------------------------------------------------
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print ("the device currently use is: ", device)
##------------------------------------------------------------------------------------

## this part is used to display the shape of the test data and label -----------------
# examples = enumerate(test_loader)
# batch_idx, (example_data, example_targets) = next (examples)

# print(example_data.shape)
# print(example_targets.shape)
# plt.imshow(example_data[0][0],cmap="gray")
# plt.show()
# print("the label of the first data is: ", example_targets[0])

##------------------------------------------------------------------------------------

##design the model (we will write in the seperate file)

# initialize the structure of the model 
network = model.cnn_model() 

#move the model to the GPU 
network.to(device)

# initialize the optimizer  
# the "momentum" here means we will not only concider the current gradient vecrior, but also the accumulated gradient.
# advantage of the momentum: 
#   1. move in the same direction as the previous iteration
#   2, faster convergence 
#   3. less oscillations more smooth 
optimizer = optim.SGD(params=network.parameters(), lr=learning_rate, momentum=0.9) 

#define the loss function
#since it's a multiclass classification problem and we use the log_softmax() as the return of the moodel 
#We will use the nll_loss as the loss function, the full name is negative log likelyhood loss. 
loss = F.nll_loss


#training loop 

def train(epoch):
    network.train()# show we are in the trainning mode right now 
    for batch_idx, (data, target) in enumerate(train_loader):
        #in order to use GPU to calcuate, move the data and target to the GPU:
        data = data.to(device)
        target = target.to(device)
        
        #STEP1: reset the optimizer:
        optimizer.zero_grad()
        #STEP2: calculate the result: 
        #even if when i define the model i don't consider the batch_size, the pytorch can take care of it automatically 
        output = network(data)
        loss_output = loss(output,target) #calcuate the current loss 
        #STEP3:backward:
        loss_output.backward()
        #STEP4:update all the parameters in the model 
        optimizer.step()

        #print out the trainning effect:
        epoch_number = epoch
        total_data_size =  len(train_loader.dataset)
        finished_data_size = batch_idx * len(data)
        percentage = (finished_data_size / total_data_size) * 100 
        loss_value = loss_output.item()
        
        if batch_idx % 10 == 0:
            print(f"current epoch: {epoch_number} ({finished_data_size} / {total_data_size}) {percentage:.2f}%  loss is: {loss_value:.4f}")
 
 
# try to run 3 epoch here: 
for epoch in range(0,6):
    if epoch > 3:
        learning_rate *= 0.1
    train(epoch)


## save the model parameters in the direction so that we can directly use it later 
torch.save(network.state_dict(), model_save_dir)  




## model.py 
- 1. initalize the layers we will use in the model 
- 2. write the forward pass 

In [None]:
import numpy as py 
import torch.nn as nn 
import torch.nn.functional as F

## when we define a model:
# 1. create model class 
#       -> define the structure of the model
#       -> define the forward path 
# 2. define the optimizer 
#       --> optimizer is used to adjust the parameters in the model 
# 3. loss function
#       --> the optimizer will try to optimize the model in the opposite direction of the loss function

class cnn_model(nn.Module):
    def __init__(self):
        super(cnn_model,self).__init__()
        self.conv1 = nn.Conv2d(1,10,kernel_size=3, stride=1, padding=1) # the output is 28*28*10
        self.conv2 = nn.Conv2d(10,20,kernel_size=3, stride= 1, padding=1) # the output is 28*28*20
        self.drop_out = nn.Dropout2d(p = 0.5)
        self.fc1 = nn.Linear(7 * 7 * 20, 1280)
        self.fc2 = nn.Linear(1280,10)
        self.max_pool = nn.MaxPool2d(kernel_size=2,stride=2)
        self.relu = nn.LeakyReLU()

    def conv_combine(self,x,current_conv,if_use_drop_down):
        if if_use_drop_down == False:
            x = current_conv(x)
            x = self.max_pool(x)
            x = self.relu(x)
        else: 
            x = current_conv(x)
            x = self.drop_out(x) # randomly zero 50 % of the input tensor 
            x = self.max_pool(x)
            x = self.relu(x)
        return x

    def forward(self,x):
        x = self.conv_combine(x,self.conv1,False)
        x = self.conv_combine(x,self.conv2,True)
        x = x.view(x.size(0),-1) # the first parameter is the batch size 
        if self.training == True:
            x = self.relu(self.drop_out(self.fc1(x)))
        else:    
            x = self.relu(self.fc1(x))
        x = self.fc2(x)
        x = F.log_softmax(x,dim=1)
        return x 



## test.py
- 1. load the model 
- 2. input the test dataset and then analysis the currectness

In [None]:
import numpy as py
import model 
# from mnist_number_detection import model_save_dir, test_loader
import torch 
import torchvision
from torchvision.transforms import transforms
dataset_dir = "/home/yuzhen/Desktop/ml_learn/number_detection_project/dataset"
model_save_dir = "/home/yuzhen/Desktop/ml_learn/number_detection_project/md.pth"
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,),(0.3081,)) # normalize the image, the numbers are mean and sd
])
batch_size_train = 64
batch_size_test = 1000
learning_rate = 0.001

test_dataset = torchvision.datasets.MNIST(dataset_dir, train=False, download=True, transform=transform)
test_loader = torch.utils.data.DataLoader(test_dataset,batch_size=batch_size_test,shuffle=True)

## initialize the model from the model file 
network = model.cnn_model()
## feed in the parameters to the model from the pth file 
network.load_state_dict(torch.load(model_save_dir))
network.eval()

correct_prediction = 0
wrong_prediction = 0
print("the size of the test_dataset is: ", len(test_loader.dataset))
for batch_index, (data,target) in enumerate(test_loader):
    for single_data, single_target in zip(data,target):
        
        img = single_data
        img = img.unsqueeze(0)
        result = network(img)
        pred = int(torch.argmax(result).item())
        # print("the ground truth is: ",single_target)
        # print("the prediction is: ", pred)
        if single_target.item() == pred:
            correct_prediction+=1
        else:
            wrong_prediction+=1


total_prediction = correct_prediction + wrong_prediction
percentage = (correct_prediction / total_prediction) * 100
print("the correct_prediction num is:", correct_prediction)
print("the wrong_prediction num is: ", wrong_prediction)
print("the total_prediction num is: ", total_prediction)
print(f"the correct rate is: {percentage:.2f}%")


