In [0]:
!gdown --id '1WTI8HzfuqWyH3IIeLG955LUgmnjISzgS' --output Dev.tar.xz
!tar -xvf  'Dev.tar.xz' 

!gdown --id '1pL7uF5Ej-IWYwdcXG37JSQFTUnFWgE07' --output train.tar.xz
!tar -xvf  'train.tar.xz'
!ls

In [0]:
import torch.nn as nn
import math
import torch.utils.model_zoo as model_zoo
import torchvision.models as models
import torch
from torch.optim import lr_scheduler
import cv2
from PIL import Image
import matplotlib.pyplot as plt
import torchvision.transforms as transforms
from torch.autograd import Variable
import argparse
import os
from torch.utils.data import DataLoader, Dataset
import time
import numpy as np
from pathlib import Path
import torch.nn.functional as F

In [0]:
from sklearn.feature_selection import SelectPercentile, chi2
from sklearn.pipeline import Pipeline
from sklearn.svm import SVC
from sklearn import svm
from sklearn.model_selection import cross_val_score
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import f_classif
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import recall_score
import pickle

import sklearn

In [0]:
!ls

dev_1.csv     Dev.tar.xz       sample_data  train_selected
dev_selected  model_res18.pkl  train_1.csv  train.tar.xz


In [0]:
train_transform = transforms.Compose([
                transforms.ToPILImage(),
                transforms.Resize((224, 224)),
                transforms.RandomRotation(degrees=15),
                transforms.RandomHorizontalFlip(p=0.5),
                transforms.ToTensor(),
                transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                        std =[0.229, 0.224, 0.225])
            ])
test_transform = transforms.Compose([
    transforms.ToPILImage(), 
    transforms.Resize((224, 224)),                                   
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                        std =[0.229, 0.224, 0.225])
])

class MangoDataset(Dataset):
    def __init__(self, txtName, folderName, transform=None):
        imgs = []
        with open(txtName, 'r') as txtFile:
            lines = txtFile.readlines()
            for line in lines:
                line = line.strip('\n')
                img, label = line.split(',')
                if(len(label)!=1):
                    #ignore header
                    continue
                imgs.append((img, ord(label)-ord('A')))
        
        self.imgs = imgs
        self.transform = transform
        #print(len(imgs), folderName)
        self.folder = Path(folderName)

    def __len__(self):
        return len(self.imgs)

    def __getitem__(self, index):
        imgName, label = self.imgs[index]
        imgPath = self.folder / imgName
        img = cv2.imread(str(imgPath))
        #print(img.shape)

        if self.transform is not None:
            img = self.transform(img)
        
        return img, label

In [0]:
batch_size = 25

train_data = MangoDataset(txtName='train_1.csv', folderName='train_selected',transform=train_transform)
data_loader = DataLoader(train_data, batch_size =batch_size , shuffle=True)

dev_data = MangoDataset(txtName='dev_1.csv', folderName='dev_selected',transform=test_transform)
val_loader = DataLoader(dev_data, batch_size=batch_size, shuffle=True)

In [25]:
print(len(data_loader))
print(len(val_loader))

224
32


In [26]:
print('The torch version is {}.'.format(torch.__version__))
print('The scikit-learn version is {}.'.format(sklearn.__version__))
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print('device', device)
class ResidualBlock(nn.Module):
    def __init__(self, inchannel, outchannel, stride=1):
        super(ResidualBlock, self).__init__()
        self.left = nn.Sequential(
            nn.Conv2d(inchannel, outchannel, kernel_size=3, stride=stride, padding=1, bias=False),
            nn.BatchNorm2d(outchannel),
            nn.ReLU(inplace=True),
            nn.Conv2d(outchannel, outchannel, kernel_size=3, stride=1, padding=1, bias=False),
            nn.BatchNorm2d(outchannel)
        )
        self.shortcut = nn.Sequential()
        if stride != 1 or inchannel != outchannel:
            self.shortcut = nn.Sequential(
                nn.Conv2d(inchannel, outchannel, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(outchannel)
            )

    def forward(self, x):
        out = self.left(x)
        out += self.shortcut(x)
        out = F.relu(out)
        return out

class ResNet(nn.Module):
    def __init__(self, ResidualBlock, num_classes=3):
        super(ResNet, self).__init__()
        self.inchannel = 64
        self.conv1 = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False),
            nn.BatchNorm2d(64),
            nn.ReLU(),
        )
        self.layer1 = self.make_layer(ResidualBlock, 64,  2, stride=1)
        self.layer2 = self.make_layer(ResidualBlock, 128, 2, stride=2)
        self.layer3 = self.make_layer(ResidualBlock, 256, 2, stride=2)
        self.layer4 = self.make_layer(ResidualBlock, 512, 2, stride=2)
        self.fc = nn.Linear(512* 7 * 7, num_classes)

    def make_layer(self, block, channels, num_blocks, stride):
        strides = [stride] + [1] * (num_blocks - 1)   #strides=[1,1]
        layers = []
        for stride in strides:
            layers.append(block(self.inchannel, channels, stride))
            self.inchannel = channels
        return nn.Sequential(*layers)

    def forward(self, x):
        out = self.conv1(x)
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = F.avg_pool2d(out, 4)
        out = out.view(out.size(0), -1)
        out = self.fc(out)
        return out


def ResNet18():
    return ResNet(ResidualBlock)


The torch version is 1.5.0+cu101.
The scikit-learn version is 0.22.2.post1.
device cuda


## Training:
batch_size = 20, epoch = 40;

In [0]:
model = ResNet18().to(device)  # use GPU
criterion = nn.CrossEntropyLoss() 
#optimizer =  torch.optim.Adam(model.parameters(), lr=1e-5) #torch.optim.SGD(model.parameters(), lr=0.001, momentum = 0.9) 
optimizer = torch.optim.SGD(model.parameters(), lr =0.001, momentum=0.9, weight_decay=5e-4)
#torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=10, verbose=False, threshold=0.00001, threshold_mode='rel', cooldown=0, min_lr=0, eps=1e-08)
#scheduler = ReduceLROnPlateau(optimizer, 'min')

num_epoch = 40


v_batch = len(val_loader)
loss_values = []
valid_loss = []
for epoch in range(num_epoch):
    epoch_start_time = time.time()
    train_acc = 0.0
    sum_loss = 0.0
    correct = 0.0
    total = 0.0

    model.train() # train model
    for i, data in enumerate(data_loader):
        length = len(data_loader)
        optimizer.zero_grad() 
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step() 

        sum_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += predicted.eq(labels.data).cpu().sum()
        loss_values.append(sum_loss / (i + 1))
    print('TRAINING: [epoch:%d, iter:%d] Loss: %.03f | Acc: %.3f%% ' % (epoch + 1, (i + 1 + epoch * length), sum_loss / (i + 1), 100. * correct / total))
    
    
    model.eval()
    with torch.no_grad():
        val_total = 0.0
        val_correct = 0.0
        val_loss = 0.0
        for data in val_loader:
            images, labels = data
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            _, predicted = torch.max(outputs.data, 1)
            val_total += labels.size(0)
            val_correct += predicted.eq(labels.data).cpu().sum()
            val_loss += loss.item()
            acc = 100. * val_correct / val_total
            valid_loss.append(val_loss)
        print('VAL: [epoch:%d, time:%d] |Acc: %.3f%% ' % (epoch + 1, (time.time()-epoch_start_time), acc))
        print("\n")
    #scheduler.step(val_loss)

torch.save(model, 'model_res18_2.pkl')



In [0]:
plt.plot(loss_values,color='green')

Note: Validation set acc is about 77-79%