In [1]:
import os
import random
import numpy as np
import pandas as pd
import torch
from tqdm import tqdm
from torch.utils.data import Dataset, DataLoader
import pytorch_lightning as pl
from sklearn.model_selection import train_test_split
from PIL import Image
# PIL_image = Image.fromarray(ndarray_image)

class WildsDataset(Dataset):    
    def __init__(self, csv_df, mode = "train", transform=None):
        self.mode = mode
        self.transform = transform
        self.data_list = []
        self.label = []
        
        for index, row in csv_df.iterrows():
            self.data_list.append(image_path+row['filename'])
            if self.mode != 'test':
                self.label.append(row['label'])
        
    def __len__(self):
        return len(self.data_list)
    
    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()
            
        image = np.load(self.data_list[idx])
        image = image.f.x
        if self.transform is not None:
            image = self.transform(image)
        if self.mode == 'test': 
            return image
        label = torch.tensor(int(self.label[idx]))
            
        return image, label

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torch.utils.data import RandomSampler
from torch.utils.data import TensorDataset
from torchvision.utils import make_grid
import torchvision.models as models

class Net(nn.Module):
    def __init__(self):
        super().__init__()

        model_ft = models.resnet34()
        model_ft.conv1 = nn.Conv2d(8, 64, kernel_size=7, stride=2, padding=3,bias=False)
        num_ftrs = model_ft.fc.in_features
        model_ft.fc = nn.Linear(num_ftrs, 128)
        
        self.pretrain_model = model_ft 
        self.category = nn.Sequential(
            nn.Linear(128,32),
            nn.Dropout(0.5),
            nn.Linear(32,8),
            nn.Dropout(0.5),
            nn.Linear(8,2)
        )
    def forward(self, x):
        x = self.pretrain_model(x)
        category = self.category(x)
        
        return category

In [3]:
model_urban = Net()
model_urban = model_urban.cuda()
model_urban.load_state_dict(torch.load('/kaggle/input/cse-255-hw5-model-urban/checkpoint_urban.pt'))
model_rural = Net()
model_rural = model_rural.cuda()
model_rural.load_state_dict(torch.load('/kaggle/input/cse-255-hw5-model-rural/checkpoint_rural.pt'))

<All keys matched successfully>

In [4]:
import ntpath
import collections
from tqdm import tqdm
image_path = '/kaggle/input/cse-255-hw4-image/anon_images/'
test_csv_path = '/kaggle/input/cse-255-hw4-table/random_test_reduct.csv'

test_csv_df = pd.read_csv(test_csv_path, index_col=0)
test_csv_df_urban = test_csv_df.loc[test_csv_df['urban']==True]
test_csv_df_rural = test_csv_df.loc[test_csv_df['urban']==False]    

test_batch_size = 64
test_dataset_urban = WildsDataset(test_csv_df_urban, mode = "test")
test_loader_urban = DataLoader(test_dataset_urban, batch_size=test_batch_size)
test_dataset_rural = WildsDataset(test_csv_df_rural, mode = "test")
test_loader_rural = DataLoader(test_dataset_rural, batch_size=test_batch_size)

test_image_names = test_csv_df_urban['filename'].tolist()

name_labels_nn = collections.defaultdict(list)
name_scores_nn = collections.defaultdict(list)
for batch_index, batch in tqdm(enumerate(test_loader_urban)):
    start_index = batch_index * test_batch_size
    
    model_urban = model_urban.to('cuda')
    batch = batch.to('cuda')

    output = model_urban(batch)
    logits = output.softmax(dim=1)
    scores = logits[:, 1]
    preds = logits.argmax(dim=1)

    for pred_index, pred in enumerate(preds):
        name_labels_nn[test_image_names[start_index + pred_index]].append(pred.item())

    for score_index, score in enumerate(scores):
        name_scores_nn[test_image_names[start_index + score_index]].append(score.item())

test_image_names = test_csv_df_rural['filename'].tolist()
for batch_index, batch in tqdm(enumerate(test_loader_rural)):
    start_index = batch_index * test_batch_size
    
    model_rural = model_rural.to('cuda')
    batch = batch.to('cuda')

    output = model_rural(batch)
    logits = output.softmax(dim=1)
    scores = logits[:, 1]
    preds = logits.argmax(dim=1)

    for pred_index, pred in enumerate(preds):
        name_labels_nn[test_image_names[start_index + pred_index]].append(pred.item())

    for score_index, score in enumerate(scores):
        name_scores_nn[test_image_names[start_index + score_index]].append(score.item())

22it [00:51,  2.32s/it]
38it [01:13,  1.93s/it]


In [5]:
def get_preds(threshold=0.7, use_score=True):
    name_preds = []
    for index, row in test_csv_df.iterrows():
        filename = row['filename']
        if use_score:
            mean_output = np.mean(name_scores_nn[filename])
        else:
            mean_output = np.mean(name_labels_nn[filename])

        if mean_output >= threshold:
            pred = 1
        elif mean_output <= 1 - threshold:
            pred = -1
        else:
            pred = 0
        
        pred_accu = 1 if mean_output > 0.5 else -1
        name_preds.append([filename, pred, pred_accu])
    preds_df = pd.DataFrame(name_preds, columns=['filename', 'pred_with_abstention', 'pred_wo_abstention'])
    
    return preds_df
        
preds_df = get_preds()
outputs_df = test_csv_df[['filename', 'urban']].merge(preds_df, on='filename')
outputs_df = outputs_df.astype({'urban': int})
outputs_df.to_csv('results.csv')
outputs_df

Unnamed: 0,filename,urban,pred_with_abstention,pred_wo_abstention
0,image13724.npz,0,-1,-1
1,image19338.npz,1,1,1
2,image19053.npz,1,1,1
3,image12371.npz,0,0,1
4,image19639.npz,0,0,-1
...,...,...,...,...
3783,image9142.npz,0,1,1
3784,image7755.npz,0,-1,-1
3785,image13002.npz,1,0,-1
3786,image14642.npz,1,0,1


In [6]:
image_path = '/kaggle/input/cse-255-hw4-image/anon_images/'
test_csv_path = '/kaggle/input/cse-255-hw4-table/country_test_reduct.csv'

test_csv_df = pd.read_csv(test_csv_path, index_col=0)
test_csv_df_urban = test_csv_df.loc[test_csv_df['urban']==True]
test_csv_df_rural = test_csv_df.loc[test_csv_df['urban']==False]    

test_batch_size = 64
test_dataset_urban = WildsDataset(test_csv_df_urban, mode = "test")
test_loader_urban = DataLoader(test_dataset_urban, batch_size=test_batch_size)
test_dataset_rural = WildsDataset(test_csv_df_rural, mode = "test")
test_loader_rural = DataLoader(test_dataset_rural, batch_size=test_batch_size)

test_image_names = test_csv_df_urban['filename'].tolist()

name_labels_nn = collections.defaultdict(list)
name_scores_nn = collections.defaultdict(list)
for batch_index, batch in tqdm(enumerate(test_loader_urban)):
    start_index = batch_index * test_batch_size
    
    model_urban = model_urban.to('cuda')
    batch = batch.to('cuda')

    output = model_urban(batch)
    logits = output.softmax(dim=1)
    scores = logits[:, 1]
    preds = logits.argmax(dim=1)

    for pred_index, pred in enumerate(preds):
        name_labels_nn[test_image_names[start_index + pred_index]].append(pred.item())

    for score_index, score in enumerate(scores):
        name_scores_nn[test_image_names[start_index + score_index]].append(score.item())

test_image_names = test_csv_df_rural['filename'].tolist()
for batch_index, batch in tqdm(enumerate(test_loader_rural)):
    start_index = batch_index * test_batch_size
    
    model_rural = model_rural.to('cuda')
    batch = batch.to('cuda')

    output = model_rural(batch)
    logits = output.softmax(dim=1)
    scores = logits[:, 1]
    preds = logits.argmax(dim=1)

    for pred_index, pred in enumerate(preds):
        name_labels_nn[test_image_names[start_index + pred_index]].append(pred.item())

    for score_index, score in enumerate(scores):
        name_scores_nn[test_image_names[start_index + score_index]].append(score.item())

20it [00:39,  1.98s/it]
51it [01:41,  1.99s/it]


In [7]:
def get_preds(threshold=0.7, use_score=True):
    name_preds = []
    for index, row in test_csv_df.iterrows():
        filename = row['filename']
        if use_score:
            mean_output = np.mean(name_scores_nn[filename])
        else:
            mean_output = np.mean(name_labels_nn[filename])

        if mean_output >= threshold:
            pred = 1
        elif mean_output <= 1 - threshold:
            pred = -1
        else:
            pred = 0
        
        pred_accu = 1 if mean_output > 0.5 else -1
        name_preds.append([filename, pred, pred_accu])
    preds_df = pd.DataFrame(name_preds, columns=['filename', 'pred_with_abstention', 'pred_wo_abstention'])
    
    return preds_df
        
preds_df = get_preds()
outputs_df = test_csv_df[['filename', 'urban']].merge(preds_df, on='filename')
outputs_df = outputs_df.astype({'urban': int})
outputs_df.to_csv('results_country.csv')
outputs_df

Unnamed: 0,filename,urban,pred_with_abstention,pred_wo_abstention
0,image13747.npz,0,0,-1
1,image14972.npz,1,-1,-1
2,image16964.npz,1,-1,-1
3,image6808.npz,1,-1,-1
4,image4311.npz,1,-1,-1
...,...,...,...,...
4511,image10993.npz,0,0,1
4512,image3709.npz,1,0,-1
4513,image11650.npz,1,0,-1
4514,image13495.npz,1,0,-1
