In [None]:
# !wget https://www.cs.ccu.edu.tw/~wtchu/projects/Weather/Image.zip
# !unzip Image.zip
# !mv Image weather_dataset

# Uncomment to run on Colab 

In [None]:
#from google.colab import drive
#drive.mount('/content/gdrive')

In [None]:
# !pip install pyunpack
# !pip install patool
# from pyunpack import Archive

# Archive('/content/gdrive/My Drive/Colab Notebooks/Project/Image.rar').extractall('./')

# Split Dataset to Train, Val, Test

In [None]:
!pip install split-folders

In [None]:
# import split_folders

In [None]:
# split_folders.ratio("Image", output="weather_dataset", seed=1337, ratio=(0.7,0.15,0.15))

In [None]:
# Uncomment to run on colab, Change paths.

# !cp gdrive/My\ Drive/Colab\ Notebooks/Project/*.sh  .
# !cp gdrive/My\ Drive/Colab\ Notebooks/Project/*.py  .
# !chmod +x augment_images.sh
# !./augment_images.sh "./weather_dataset/train/"

In [None]:
import numpy as np
import time
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
from torch.utils.data.sampler import SubsetRandomSampler
from torch.utils.data import random_split as rsplit
import torchvision.transforms as transforms
from torchvision.datasets import ImageFolder
import matplotlib.pyplot as plt
import os
import torchvision.models
import meta_parser

torch.manual_seed(1) 
np.random.seed(1)

In [None]:
def convert_to_dataset(mdata):
    dataset = []
    classes = ["cloudy", "foggy", "rain", "snow", "sunny"]
    
    class_count = [0] * len(classes)
    
    for ids, weather_feats in mdata.items():
#         print(weather_feats[0])
#         if class_count[classes.index(weather_feats[1])] >= 25:
#             continue
        feats = np.asarray([float(wfeat) for wfeat in weather_feats[0].values()], dtype=np.float32)
        label = np.asarray(float(classes.index(weather_feats[1])), dtype=np.int32)
    #     print(type(feats),feats)
    #     print(type(label), label)
        dataset.append([feats, label])
        class_count[classes.index(weather_feats[1])] += 1

#         print(dataset)

#     print(type(dataset))

    print(max(class_count))

    #BALANCE DATASET
    new_dataset = []
    for clss in range(len(classes)):
        wdset = []
        split_dset = []
        wdset = [i[1] == clss for i in dataset]
        split_dset += [dataset[i] for i, x in enumerate(wdset) if x]
        if len(split_dset) < max(class_count):
            count = 0
            i = 0
            while len(split_dset) < max(class_count):
                if i >= len(split_dset):
                    i = 0
#                 print(split_dset, split_dset[i])
                split_dset.append(split_dset[i])
                if len(split_dset) >= max(class_count):
                    break
                i+=1
        new_dataset += split_dset
#         print(len(split_dset))
        
        
    
#     return dataset
    return new_dataset
    

In [None]:
def get_data_loader(batch_size=8, sets=["train", "val"]):
    
    data_loader = []
    data = []
    for s in sets:
        print("Getting Data for ", s)
        mdata = m.get_available_metadata(s)
        data = convert_to_dataset(mdata)
        data_loader.append(torch.utils.data.DataLoader(data, shuffle=True, batch_size=batch_size))
    return data_loader

In [None]:
def get_accuracy(model, data_loader, conf_matrix=False):
    # calculate total correct predictions within the specified data_set

    classes = ['cloudy', 'foggy', 'rain', 'snow', 'sunny']
    
    x = np.zeros(shape=(len(classes), len(classes)))

    l = 0
    correct, total = 0 , 0
    for feats, labels in data_loader:
        feats = feats.cuda()
        labels = labels.cuda()
        
        outputs = model(feats)
        
        pred = outputs.max(1, keepdim=True)[1]
        
        if conf_matrix:
            i = labels.view_as(pred).item()
            j = pred.item()
            x[i][j] += 1
            
        correct += pred.eq(labels.view_as(pred)).sum().item()
        total += feats.shape[0]
        
        l+=1
    if conf_matrix:
        return (correct / total), x
    else:
        return (correct / total)

In [None]:
def train_net(net, batch_size=8, lr=0.01, num_epochs=20, data_set=["train", "val"], weight_decay=0):
    
    model_path=""
    
    model_path = "{}_BS_{}_LR_{}_dataset_{}_numepochs_{}/".format(net.name,batch_size, lr, "{}".format(data_set[0]),num_epochs)
    
    os.mkdir(model_path)
    
    
    with open('./{}network_params.txt'.format(model_path),'w') as file:    
        print(net.parameters, file=file)
    
    
    data_loader = get_data_loader(batch_size=batch_size, sets=data_set)
    train_loader, val_loader = data_loader
    
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(net.parameters(), lr=lr, momentum=0.9, weight_decay=weight_decay)
    
    train_acc, train_loss = [], []
    val_acc, val_loss = [], []
    iters = []
    
    for epoch in range(num_epochs):
        print("Epoch: ", epoch)
        
        batch_total =0.0
        
        # go through data loaded in batches of batch_size
        # i.e. # of data loaded / batch_size should be number of iterations
        
        j = 0
        total_loss = 0
        for i, data in enumerate(train_loader, 0):              
            features, labels = data

    
            features = features.cuda()
            labels = labels.cuda()
    
#             print(features.shape[0])
#             print(labels.shape[0])
            
            outputs = net(features)
            loss = criterion(outputs, labels.long())
            loss.backward()
            total_loss+=float(loss)
            j+= features.shape[0]
            optimizer.step()
            optimizer.zero_grad()

            pred = outputs.max(1, keepdim=True)[1]
            
            mini_batch_correct = pred.eq(labels.view_as(pred)).sum().item()
            mini_batch_total = features.shape[0]
#             break


            print("Epoch {}, Mini Batch Accuracy {:.2f}%, Loss {:.2f}%".format(epoch ,mini_batch_correct/mini_batch_total * 100 , total_loss/j * 100))
            
        iters.append(epoch)
        train_acc.append(get_accuracy(net, train_loader))
        val_acc.append(get_accuracy(net, val_loader))
        train_loss.append(total_loss/j)
        
        print("\nEpoch {} Summary: Train Accuracy {:.2f}%, Validation Accuracy {:.2f}%, Loss {:.2f}%\n".format(epoch,
                                                                                                          train_acc[-1] * 100,
                                                                                                          val_acc[-1] * 100,
                                                                                                          total_loss/j * 100))
        
        torch.save(net.state_dict(), model_path + "epoch_{}".format(str(epoch)))           



    
    np.savetxt("{}train_acc.csv".format(model_path), train_acc)
    np.savetxt("{}train_loss.csv".format(model_path), train_loss)
    np.savetxt("{}val_acc.csv".format(model_path), val_acc)
    np.savetxt("{}iters.csv".format(model_path), iters)
    
    return iters, train_loss, val_acc, train_acc

In [None]:
def plot_graphs(iters, train_loss, train_acc, val_acc):
    plt.title("Training Curve")
    plt.plot(iters, train_loss, label="Train")
    plt.xlabel("Epochs")
    plt.ylabel("Loss")
    plt.show()

    plt.title("Training Curve")
    plt.plot(iters, train_acc, label="Training")
    plt.plot(iters, val_acc, label="Validation")    
    plt.xlabel("Epochs")
    plt.ylabel("Validation Accuracy")
    plt.legend(loc='best')
    plt.show()

    print("Final Training Accuracy: {}".format(train_acc[-1]))
    print("Final Validation Accuracy: {}".format(val_acc[-1]))

In [None]:
def print_conf_matrix(conf_matrix):
    print("\n\n")

    classes = ['cloudy', 'foggy', 'rain', 'snow', 'sunny']

    for i in range(len(classes)):
        if i == 0:
            print('', end='\t')

        print(classes[i], end = '\t')

    print("\tTotal Samples")
    print('\n')

    for i in range(len(classes)):

        print(classes[i], end ='\t')
        for j in range(len(classes)):
            print(conf_matrix[i][j], end='\t')
        print("\t", sum(conf_matrix[i, :]))
        print('\n')
    
    for i in range(len(classes)):
        if i == 0:
            print("Guesses", end="\t")
        
        print(sum(conf_matrix[:,i]),  end ='\t')
        
    print("\n\n")
    
    for i in range(len(classes)):
        print("{} : {:.2f}%".format(classes[i],conf_matrix[i][i]/sum(conf_matrix[i, :]) * 100))
    
    

In [None]:
# class ANN(nn.Module):
#     def __init__(self):
#         super(ANN, self).__init__()
#         self.name="ANN"
#         self.fc1 = nn.Linear(4, 100)
#         self.fc2 = nn.Linear(100, 80)
#         self.fc3 = nn.Linear(80, 60)
#         self.fc4 = nn.Linear(60, 40)
#         self.fc5 = nn.Linear(40, 20)
#         self.final = nn.Linear(20, 5)


#     def forward(self, x):
#         x = F.relu(self.fc1(x))
#         x = F.relu(self.fc2(x))
#         x = F.relu(self.fc3(x))
#         x = F.relu(self.fc4(x))
#         x = F.relu(self.fc5(x))
#         x = self.final(x)
#         return x

# class ANN(nn.Module):
#     def __init__(self):
#         super(ANN, self).__init__()
#         self.name="ANN"
#         self.fc1 = nn.Linear(5, 256)
#         self.fc2 = nn.Linear(256, 128)
#         self.fc3 = nn.Linear(128, 64)
#         self.final = nn.Linear(64, 5)


#     def forward(self, x):
#         x = F.relu(self.fc1(x))
#         x = F.relu(self.fc2(x))
#         x = F.relu(self.fc3(x))
#         x = self.final(x)
#         return x

class ANN(nn.Module):
    def __init__(self):
        super(ANN, self).__init__()
        self.name="ANN"
        self.encoder = nn.Sequential(
            nn.Linear(5, 256),
            nn.ReLU(),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 5)
        )



    def forward(self, x):
        x = self.encoder(x)
        return x




In [None]:
m = meta_parser.metadata_map()

In [None]:
net = ANN().cuda()
iters, train_loss, val_acc, train_acc = train_net(net, num_epochs=200, lr=1e-2, batch_size=128, data_set=["train","val"])

In [None]:
plot_graphs(iters, train_loss, train_acc, val_acc)

In [None]:
data_loader = get_data_loader(batch_size=1, sets=["train"])
x, y = get_accuracy(net, data_loader[0], conf_matrix=True)
# x = get_accuracy(net, data_loader[0], conf_matrix=False)

In [None]:
print(x)
print_conf_matrix(y)

In [None]:
data_loader = get_data_loader(batch_size=1, sets=["val"])
x, y = get_accuracy(net, data_loader[0], conf_matrix=True)
# x = get_accuracy(net, data_loader[0], conf_matrix=False)

In [None]:
print(x)
print_conf_matrix(y)