In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [4]:
!conda install -y gdown
!gdown --id '15XWO-zI-AKW0igfwSydmwSGa8ENb9wCg' --output data-bin.tar.gz 
!tar zxvf data-bin.tar.gz
!ls data-bin
!rm data-bin.tar.gz

In [3]:
## data process
import numpy as np
import random
import torch

from torch.utils.data import DataLoader
from torch.utils.data import (DataLoader, RandomSampler, SequentialSampler,
                              TensorDataset,Dataset)
import torchvision.transforms as transforms

In [5]:
train = np.load('data-bin/trainingset.npy', allow_pickle=True)
test = np.load('data-bin/testingset.npy', allow_pickle=True)

In [None]:
## data process
import numpy as np
import random
import torch

from torch.utils.data import DataLoader
from torch.utils.data import (DataLoader, RandomSampler, SequentialSampler,
                              TensorDataset,Dataset)

import torchvision.transforms as transforms
# class VAEDataset(TensorDataset):
class VAEDataset(Dataset):
    def __init__(self, data_tensor):
        self.data = data_tensor
        self.transfomer = transforms.Compose([
                            transforms.Lambda(lambda x: x.to(torch.float32)),
                            transforms.Lambda(lambda x: 2. * x/255. - 1.),
                            transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]),
                            ])
        self.data = torch.FloatTensor(self.data)
        if self.data.shape[-1] == 3:
             self.data = self.data.permute(0,3,1,2) # [sample, 3,64,64]
        
    def __getitem__(self, index):
        x = self.data[index]
        if self.transfomer:
            x = self.transfomer(x)
        return x
        
    def __len__(self):
        return len(self.data)
    

In [6]:
class CustomTensorDataset(TensorDataset):
    """TensorDataset with support of transforms.
    """
    def __init__(self, tensors):
        self.tensors = tensors
        if tensors.shape[-1] == 3:
            self.tensors = tensors.permute(0, 3, 1, 2)
        
        self.transform = transforms.Compose([
                            transforms.Lambda(lambda x: x.to(torch.float32)),
                            transforms.Lambda(lambda x: 2. * x/255. - 1.),
                            # transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]),
                            ])
        
    def __getitem__(self, index):
        x = self.tensors[index]
        
        if self.transform:
            # mapping images to [-1.0, 1.0]
            x = self.transform(x)

        return x

    def __len__(self):
        return len(self.tensors)

In [7]:
## dataloader
x = torch.from_numpy(train)
batch_size = 256
train_dataset = CustomTensorDataset(x)
train_dataloader = DataLoader(train_dataset, shuffle=True, batch_size=batch_size)

In [8]:
import torch.nn as nn
from torch.autograd import Variable
class VAE(nn.Module):
    def __init__(self):
        super(VAE, self).__init__()
        self.encoder = nn.Sequential(
            nn.Conv2d(3,12,4,stride=2, padding=1),
            nn.ReLU(),
            nn.Conv2d(12,24,4,stride=2,padding=1),
            nn.ReLU(),
        )
        self.enc_out_1 = nn.Sequential(
            nn.Conv2d(24, 48, 4, stride=2, padding=1),  
            nn.ReLU(),
        )
        self.enc_out_2 = nn.Sequential(
            nn.Conv2d(24, 48, 4, stride=2, padding=1),
            nn.ReLU(),
        )
        self.decoder = nn.Sequential(
            nn.ConvTranspose2d(48, 24, 4, stride=2, padding=1), 
            nn.ReLU(),
            nn.ConvTranspose2d(24, 12, 4, stride=2, padding=1), 
            nn.ReLU(),
            nn.ConvTranspose2d(12, 3, 4, stride=2, padding=1), 
            nn.Tanh(),
        )
        
    def encode(self, x):
        h1 = self.encoder(x)
        return self.enc_out_1(h1), self.enc_out_2(h1)
    
    def reparameterize(self, mu, logvar):
        std = logvar.mul(0.5).exp_()
        if torch.cuda.is_available():
            eps = torch.cuda.FloatTensor(std.size()).normal_()
        else:
            eps = torch.FloatTensor(std.size()).normal_()
        eps = Variable(eps)
        return eps.mul(std).add_(mu)
    
    def decode(self, z):
        return self.decoder(z)
        
    def forward(self, x):
        mu, logvar = self.encode(x)
        z = self.reparameterize(mu, logvar)
        return self.decode(z), mu, logvar

In [9]:
def loss_vae(recon_x, x, mu, logvar, criterion):
    mse = criterion(recon_x, x)
    KLD_element = mu.pow(2).add_(logvar.exp()).mul_(-1).add_(1).add_(logvar)
    KLD = torch.sum(KLD_element).mul_(-0.5)
    return mse + KLD

In [56]:
# Loss and optimizer
# model = VAE()
model = conv_autoencoder()
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(
    model.parameters(), lr=1e-3)

In [11]:
device = "cuda" if torch.cuda.is_available() else "cpu"
device

In [57]:
ans_loss = []
import time
for epoch in range(30):
    tmp_loss = []
    start = int(time.time())
    for ele_data in train_dataloader:
        ele_data = ele_data.to(device)
        model = model.to(device)
#         y_pred, mu, logvar = model(ele_data)
        y_pred  = model(ele_data)
        optimizer.zero_grad()
#         ele_loss = loss_vae(y_pred,ele_data,mu, logvar,criterion)
        ele_loss =  criterion(y_pred,ele_data)
        ele_loss.backward()
        optimizer.step()
#         tmp_loss.append(ele_loss.item())
        tmp_loss.append(ele_loss.item())
    print(f"{epoch} epoch loss is {np.mean(tmp_loss)} cost time {int(time.time())-start}")
    ans_loss.append(np.mean(tmp_loss))

In [17]:
x = torch.from_numpy(test)
batch_size = 1
test_dataset = CustomTensorDataset(x)
test_dataloader = DataLoader(test_dataset, shuffle=False, batch_size=batch_size)

In [65]:
tmp_loss = []
eval_loss = nn.MSELoss()
model.eval()
for ele_data in test_dataloader:
    ele_data = ele_data.to(device)
#     y_pred, mu, logvar = model(ele_data)
    y_pred  = model(ele_data)
    ele_loss = eval_loss(y_pred,ele_data)
    tmp_loss.append(ele_loss.item())
print(f"{epoch} epoch loss is {np.mean(tmp_loss)}")

In [None]:
tmp2_loss = []
model.eval()
for ele_data in train_dataloader:
    y_pred, mu, logvar = model(ele_data)
    ele_loss = loss_vae(y_pred,ele_data,mu, logvar,criterion)
    tmp2_loss.append(ele_loss.data.numpy())
print(f"{epoch} epoch loss is {np.mean(tmp2_loss)}")

In [70]:
import matplotlib.pyplot as plt
plt.plot((tmp_loss), "r", label="val_loss")
# plt.plot(tmp2_loss, 'g', label="train_loss")
plt.legend()

In [None]:
x = torch.from_numpy(train)
batch_size = 1
train_dataset = CustomTensorDataset(x)
train_dataloader = DataLoader(train_dataset, shuffle=True, batch_size=batch_size)

In [71]:
threshold = np.quantile(tmp_loss,0.76)
threshold

In [72]:
ans = np.where(tmp_loss> threshold, 0, 1)
# data = pd.read_csv("/kaggle/input/ml2021spring-hw8/sample.csv")

In [73]:
data["Predicted"] = ans
data.to_csv("sample12.csv", index=False)

In [53]:
## other model

class fcn_autoencoder(nn.Module):
    def __init__(self):
        super(fcn_autoencoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(64 * 64 * 3, 128),
            nn.ReLU(True),
            nn.Linear(128, 64),
            nn.ReLU(True), 
            nn.Linear(64, 12), 
            nn.ReLU(True), 
            nn.Linear(12, 3))
        
        self.decoder = nn.Sequential(
            nn.Linear(3, 12),
            nn.ReLU(True),
            nn.Linear(12, 64),
            nn.ReLU(True),
            nn.Linear(64, 128),
            nn.ReLU(True), 
            nn.Linear(128, 64 * 64 * 3), 
            nn.Tanh())

    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x
    
class conv_autoencoder(nn.Module):
    def __init__(self):
        super(conv_autoencoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Conv2d(3, 12, 4, stride=2, padding=1),         
            nn.ReLU(),
            nn.Conv2d(12, 24, 4, stride=2, padding=1),        
            nn.ReLU(),
            nn.Conv2d(24, 48, 4, stride=2, padding=1),         
            nn.ReLU(),
            nn.Conv2d(48, 96, 4, stride=2, padding=1),   # medium: remove this layer
            nn.ReLU(),
        )
        self.decoder = nn.Sequential(
            nn.ConvTranspose2d(96, 48, 4, stride=2, padding=1), # medium: remove this layer
            nn.ReLU(),
            nn.ConvTranspose2d(48, 24, 4, stride=2, padding=1), 
            nn.ReLU(),
            nn.ConvTranspose2d(24, 12, 4, stride=2, padding=1), 
            nn.ReLU(),
            nn.ConvTranspose2d(12, 3, 4, stride=2, padding=1),
            nn.Tanh(),
        )

    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x