## Utils

In [1]:

from sklearn.metrics import cohen_kappa_score, mean_squared_error
from sklearn.metrics import confusion_matrix as sk_cmatrix


In [2]:
def confusion_matrix(rater_a, rater_b, min_rating=None, max_rating=None):
    """
    Returns the confusion matrix between rater's ratings
    """
    assert(len(rater_a) == len(rater_b))
    if min_rating is None:
        min_rating = min(rater_a + rater_b)
    if max_rating is None:
        max_rating = max(rater_a + rater_b)
    num_ratings = int(max_rating - min_rating + 1)
    conf_mat = [[0 for i in range(num_ratings)]
                for j in range(num_ratings)]
    for a, b in zip(rater_a, rater_b):
        conf_mat[a - min_rating][b - min_rating] += 1
    return conf_mat

def histogram(ratings, min_rating=None, max_rating=None):
    """
    Returns the counts of each type of rating that a rater made
    """
    if min_rating is None:
        min_rating = min(ratings)
    if max_rating is None:
        max_rating = max(ratings)
    num_ratings = int(max_rating - min_rating + 1)
    hist_ratings = [0 for x in range(num_ratings)]
    for r in ratings:
        hist_ratings[r - min_rating] += 1
    return hist_ratings

def quadratic_weighted_kappa(y, y_pred):
    min_rating, max_rating =None, None
    rater_a, rater_b = np.array(y, dtype=int), np.array(y_pred, dtype=int)
    assert(len(rater_a) == len(rater_b))
    if min_rating is None:
        min_rating = min(min(rater_a), min(rater_b))
    if max_rating is None:
        max_rating = max(max(rater_a), max(rater_b))
    conf_mat = confusion_matrix(rater_a, rater_b, min_rating, max_rating)
    num_ratings = len(conf_mat)
    num_scored_items = float(len(rater_a))

    hist_rater_a = histogram(rater_a, min_rating, max_rating)
    hist_rater_b = histogram(rater_b, min_rating, max_rating)

    numerator, denominator = 0.0, 0.0

    for i in range(num_ratings):
        for j in range(num_ratings):
            expected_count = (hist_rater_a[i] * hist_rater_b[j] / num_scored_items)
            d = np.square(i - j) / np.square(num_ratings - 1)
            numerator += d * conf_mat[i][j] / num_scored_items
            denominator += d * expected_count / num_scored_items

    return (1.0 - numerator / denominator)

def val_kappa(preds, train_data):
    labels = train_data.get_label()
    preds = np.argmax(preds.reshape((-1,5)), axis=1)
    
    return 'qwk', quadratic_weighted_kappa(labels, preds), True

def val_kappa_reg(preds, train_data, cdf):
    labels = train_data.get_label()
    preds = getTestScore2(preds, cdf)
    return 'qwk', quadratic_weighted_kappa(labels, preds), True

def get_cdf(hist):
    return np.cumsum(hist/np.sum(hist))

def getScore(pred, cdf, valid=False):
    num = pred.shape[0]
    output = np.asarray([4]*num, dtype=int)
    rank = pred.argsort()
    output[rank[:int(num*cdf[0]-1)]] = 0
    output[rank[int(num*cdf[0]):int(num*cdf[1]-1)]] = 1
    output[rank[int(num*cdf[1]):int(num*cdf[2]-1)]] = 2
    output[rank[int(num*cdf[2]):int(num*cdf[3]-1)]] = 3
    if valid:
        cutoff = [ pred[rank[int(num*cdf[i]-1)]] for i in range(4) ]
        return output, cutoff
    return output

def getTestScore(pred, cutoff):
    num = pred.shape[0]
    output = np.asarray([4]*num, dtype=int)
    for i in range(num):
        if pred[i] <= cutoff[0]:
            output[i] = 0
        elif pred[i] <= cutoff[1]:
            output[i] = 1
        elif pred[i] <= cutoff[2]:
            output[i] = 2
        elif pred[i] <= cutoff[3]:
            output[i] = 3
    return output

def getTestScore2(pred, cdf):
    num = pred.shape[0]
    rank = pred.argsort()
    output = np.asarray([4]*num, dtype=int)
    output[rank[:int(num*cdf[0]-1)]] = 0
    output[rank[int(num*cdf[0]):int(num*cdf[1]-1)]] = 1
    output[rank[int(num*cdf[1]):int(num*cdf[2]-1)]] = 2
    output[rank[int(num*cdf[2]):int(num*cdf[3]-1)]] = 3
    return output

def rmse(actual, predicted):
    return np.sqrt(mean_squared_error(actual, predicted))

## Data Preparation

In [3]:
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt

In [4]:
DATA_DIR = '../data/raw/petfinder-adoption-prediction'

In [5]:
import os

In [6]:
df = pd.read_csv(os.path.join(DATA_DIR, 'train', 'train.csv'))

In [7]:
print(df.shape)

(14993, 24)


In [8]:
cat_features = ['Type', 'Breed1', 'Breed2', 'Gender', 'Color1', 'Color2', 'Color3',
               'Vaccinated', 'Dewormed', 'Sterilized', 'Health', 'State',
#                 'RescuerID', 'PetID',
               ]

In [9]:
ordinal_features = ['Age', 'MaturitySize', 'FurLength', 'Quantity', 'Fee', 'VideoAmt', 
                   'PhotoAmt']

In [10]:
cols = ['Age', 'Breed1', 'PhotoAmt', 'VideoAmt', 'AdoptionSpeed']

In [11]:
for col in cols:
    df[col] = pd.to_numeric(df[col], errors='coerce')

In [12]:
df = df.dropna(how='any', subset=['AdoptionSpeed'])
df = df[df['AdoptionSpeed'] <= 4]

In [13]:
df['PhotoAmt'] = df['PhotoAmt'].fillna(0)
df['VideoAmt'] = df['VideoAmt'].fillna(0)

In [14]:
from sklearn.preprocessing import MinMaxScaler

In [15]:
df[ordinal_features]

Unnamed: 0,Age,MaturitySize,FurLength,Quantity,Fee,VideoAmt,PhotoAmt
0,3.0,1,1,1,100,0.0,1.0
1,1.0,2,2,1,0,0.0,2.0
2,1.0,2,2,1,0,0.0,7.0
3,4.0,2,1,1,150,0.0,8.0
4,1.0,2,1,1,0,0.0,3.0
...,...,...,...,...,...,...,...
14988,2.0,2,2,4,0,0.0,3.0
14989,60.0,2,2,2,0,0.0,3.0
14990,2.0,3,2,5,30,0.0,5.0
14991,9.0,1,1,1,0,0.0,3.0


In [16]:
scaler = MinMaxScaler()
df[ordinal_features] = scaler.fit_transform(df[ordinal_features].to_numpy())


In [17]:
processed_df = pd.concat([pd.get_dummies(df[cat_features].astype('category'), drop_first=True), df[ordinal_features]], axis=1)

In [18]:
processed_df.head()

Unnamed: 0,Type_2,Breed1_1.0,Breed1_2.0,Breed1_3.0,Breed1_5.0,Breed1_7.0,Breed1_10.0,Breed1_11.0,Breed1_12.0,Breed1_15.0,...,State_41367,State_41401,State_41415,Age,MaturitySize,FurLength,Quantity,Fee,VideoAmt,PhotoAmt
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0.011765,0.142857,0.0,0.0,0.033333,0.0,0.033333
1,1,0,0,0,0,0,0,0,0,0,...,0,1,0,0.003922,0.285714,0.5,0.0,0.0,0.0,0.066667
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0.003922,0.285714,0.5,0.0,0.0,0.0,0.233333
3,0,0,0,0,0,0,0,0,0,0,...,0,1,0,0.015686,0.285714,0.0,0.0,0.05,0.0,0.266667
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0.003922,0.285714,0.0,0.0,0.0,0.0,0.1


In [19]:
processed_df.shape

(14972, 366)

In [20]:
processed_df['Age'].fillna(0, inplace=True)

## Text processsing

In [21]:
descriptions = df['Description'].astype(str)

In [22]:
descriptions

0        Nibble is a 3+ month old ball of cuteness. He ...
1        I just found it alone yesterday near my apartm...
2        Their pregnant mother was dumped by her irresp...
3        Good guard dog, very alert, active, obedience ...
4        This handsome yet cute boy is up for adoption....
                               ...                        
14988    I have 4 kittens that need to be adopt urgentl...
14989    Serato(female cat- 3 color) is 4 years old and...
14990    Mix breed, good temperament kittens. Love huma...
14991    she is very shy..adventures and independent..s...
14992    Fili just loves laying around and also loves b...
Name: Description, Length: 14972, dtype: object

In [23]:
from langdetect import detect_langs

In [24]:
def detecting_languages(x):
    if len(x) == 0:
        return ''
    else:
        try:
            lang = detect_langs(x)[0].lang
            return lang
        except:
            return x

In [25]:
langs = map(lambda x: detecting_languages(x), descriptions)

In [26]:
from tqdm import tqdm



en       14100
id         458
da         102
de          64
zh-cn       32
ro          23
tl          23
no          22
es          15
af          13
nl          11
so          11
ko           9
fr           9
cy           8
-            8
ca           8
fi           8
it           5
sl           5
zh-tw        5
hr           5
sv           4
0            4
sk           4
vi           3
pt           2
et           2
.            2
:)           2
sw           1
tr           1
hu           1
---          1
..           1
dtype: int64

## Dataset Function

In [21]:
import torch.utils.data as data
import torchvision.transforms as transforms
from PIL import Image
import torch

In [22]:
import glob

In [23]:
train_image_files = sorted(glob.glob(os.path.join(DATA_DIR, 'train_images/*.jpg')))
train_metadata_files = sorted(glob.glob(os.path.join(DATA_DIR, 'train_metadata/*.json')))
train_sentiment_files = sorted(glob.glob(os.path.join(DATA_DIR,'train_sentiment/*.json')))

print('num of train images files: {}'.format(len(train_image_files)))
print('num of train metadata files: {}'.format(len(train_metadata_files)))
print('num of train sentiment files: {}'.format(len(train_sentiment_files)))

test_image_files = sorted(glob.glob(os.path.join(DATA_DIR,'test_images/*.jpg')))
test_metadata_files = sorted(glob.glob(os.path.join(DATA_DIR,'test_metadata/*.json')))
test_sentiment_files = sorted(glob.glob(os.path.join(DATA_DIR,'test_sentiment/*.json')))

print('num of test images files: {}'.format(len(test_image_files)))
print('num of test metadata files: {}'.format(len(test_metadata_files)))
print('num of test sentiment files: {}'.format(len(test_sentiment_files)))

num of train images files: 58311
num of train metadata files: 58311
num of train sentiment files: 14442
num of test images files: 14465
num of test metadata files: 14465
num of test sentiment files: 3865


In [24]:
image_df = pd.DataFrame(train_image_files, columns=['path'])

def get_petid(path):
    basename = os.path.basename(path)
    return basename.split('-')[0]
def get_picid(path):
    basename = os.path.splitext(os.path.basename(path))[0]
    return basename.split('-')[1]


image_df['PetID'] = image_df['path'].apply(get_petid)
# image_df['PicID'] = image_df['path'].apply(get_picid)


In [25]:
image_df

Unnamed: 0,path,PetID
0,../data/raw/petfinder-adoption-prediction/trai...,0008c5398
1,../data/raw/petfinder-adoption-prediction/trai...,0008c5398
2,../data/raw/petfinder-adoption-prediction/trai...,0008c5398
3,../data/raw/petfinder-adoption-prediction/trai...,0008c5398
4,../data/raw/petfinder-adoption-prediction/trai...,0008c5398
...,...,...
58306,../data/raw/petfinder-adoption-prediction/trai...,fffa39a6a
58307,../data/raw/petfinder-adoption-prediction/trai...,fffa39a6a
58308,../data/raw/petfinder-adoption-prediction/trai...,fffd78a11
58309,../data/raw/petfinder-adoption-prediction/trai...,fffd78a11


In [26]:
df = df.merge(image_df.drop_duplicates(subset=['PetID']), on='PetID', how='left')

In [27]:
df.shape

(14972, 25)

In [28]:
from torch.utils.data import Dataset, DataLoader


In [29]:
from torchvision import transforms


In [30]:
from torch.utils.data import Dataset, DataLoader

def img_to_torch(image):
    return torch.from_numpy(np.transpose(image, (2, 0, 1))).type(torch.FloatTensor)

def pad_to_square(image):
    h, w = image.shape[0:2]
    new_size = max(h, w)
    delta_top = (new_size-h)//2
    delta_bottom = new_size-h-delta_top
    delta_left = (new_size-w)//2
    delta_right = new_size-delta_left-w
    new_im = cv2.copyMakeBorder(image, delta_top, delta_bottom, delta_left, delta_right, 
                                cv2.BORDER_CONSTANT,  value=[0,0,0])
    return new_im

In [31]:
from nltk.tokenize import TweetTokenizer
import nltk
isascii = lambda s: len(s) == len(s.encode())
tknzr = TweetTokenizer()
import jieba
from nltk.stem import PorterStemmer
ps = PorterStemmer()
from nltk.stem.lancaster import LancasterStemmer
lc = LancasterStemmer()
from nltk.stem import SnowballStemmer
sb = SnowballStemmer("english")

In [32]:
def build_emb_matrix(word_dict, emb_dict):
    embed_size = 300
    nb_words = len(word_dict)+1000
    nb_oov = 0
    embedding_matrix = np.zeros((nb_words, embed_size), dtype=np.float32)
    unknown_vector = np.zeros((embed_size,), dtype=np.float32) - 1.
    for key in tqdm(word_dict):
        word = key
        embedding_vector = emb_dict.get(word)
        if embedding_vector is not None:
            embedding_matrix[word_dict[key]] = embedding_vector
            continue
        word = key.lower()
        embedding_vector = emb_dict.get(word)
        if embedding_vector is not None:
            embedding_matrix[word_dict[key]] = embedding_vector
            continue
        word = key.upper()
        embedding_vector = emb_dict.get(word)
        if embedding_vector is not None:
            embedding_matrix[word_dict[key]] = embedding_vector
            continue
        word = key.capitalize()
        embedding_vector = emb_dict.get(word)
        if embedding_vector is not None:
            embedding_matrix[word_dict[key]] = embedding_vector
            continue
        word = ps.stem(key)
        embedding_vector = emb_dict.get(word)
        if embedding_vector is not None:
            embedding_matrix[word_dict[key]] = embedding_vector
            continue
        word = lc.stem(key)
        embedding_vector = emb_dict.get(word)
        if embedding_vector is not None:
            embedding_matrix[word_dict[key]] = embedding_vector
            continue
        word = sb.stem(key)
        embedding_vector = emb_dict.get(word)
        if embedding_vector is not None:
            embedding_matrix[word_dict[key]] = embedding_vector
            continue
        nb_oov+=1
        embedding_matrix[word_dict[key]] = unknown_vector                    
    return embedding_matrix, nb_words, nb_oov

In [33]:
def custom_tokenizer(text):
    init_doc = tknzr.tokenize(text)
    retval = []
    for t in init_doc:
        if isascii(t): 
            retval.append(t)
        else:
            for w in t:
                retval.append(w)
    return retval

In [34]:
df['Description'] = df['Description'].astype(str)

In [35]:
import re
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
lemmatizer = WordNetLemmatizer()

def text_cleaning(text):
    text = re.sub(r'\d+', '', text)
    text = re.sub(r"[^A-Za-z(),!?\'\`]", " ", text)
    text = text.lower()
    text = custom_tokenizer(text)
    stop_words = set(stopwords.words('english'))
    text = [i for i in text if not i in stop_words]
    text = [lemmatizer.lemmatize(token) for token in text]
    return ' '.join(text)

In [36]:
from tqdm import tqdm
tqdm.pandas()

In [37]:
texts = df['Description'].progress_apply(text_cleaning)

100%|██████████| 14972/14972 [00:07<00:00, 2083.95it/s]


In [38]:
english_desc, chinese_desc = [], []
tokens = set()
word_dict = {}
pos_count, word_count = 1, 1 # starts from 1, 0 for padding token
pos_dict = {}
eng_sequences = []
pos_sequences = []
for i in tqdm(range(len(df))):
    e_d, c_d, eng_seq, pos_seq = [], [], [], []
    doc = custom_tokenizer(df['Description'].iloc[i])
    for token in doc:
        if not isascii(token):
            c_d.append(token)
        else:
            e_d.append(token)
            if token not in word_dict:
                word_dict[token] = word_count
                word_count +=1
    english_desc.append(' '.join(e_d))
    chinese_desc.append(' '.join(c_d))
    pos_seq = nltk.pos_tag(e_d)
    for t in pos_seq:
        if t[1] not in pos_dict:
            pos_dict[t[1]] = pos_count
            pos_count += 1
    pos_seq = [pos_dict[t[1]] for t in pos_seq]
    eng_seq = [word_dict[t] for t in e_d]
    if len(eng_seq)==0:
        eng_seq.append(0)
        pos_seq.append(0)
    eng_sequences.append(eng_seq)
    pos_sequences.append(pos_seq)

100%|██████████| 14972/14972 [00:31<00:00, 471.62it/s]


In [39]:
print(len(eng_sequences))

14972


In [40]:
import numpy as np

In [41]:
# build embedding
def load_glove():
    EMBEDDING_FILE = './.vector_cache/glove.840B.300d.txt'

    def get_coefs(word, *arr): return word, np.asarray(arr, dtype='float32')

    embeddings_index = dict(get_coefs(*o.split(" ")) for o in (open(EMBEDDING_FILE)))
    return embeddings_index

glove_emb = load_glove()

embedding_matrix, vocab_size, nb_oov = build_emb_matrix(word_dict, glove_emb)
print(vocab_size, nb_oov)
del glove_emb


100%|██████████| 28597/28597 [00:00<00:00, 131217.11it/s]

29597 4054





In [42]:
import cv2

import torch
import torch.nn as nn


In [43]:
import skimage.transform

In [44]:
class PetDataset(data.Dataset):
    def __init__(self, df, processed_df, tokenized_texts=eng_sequences, image_size=512, mode='train', max_len_text=200):
        self.df = df
        self.processed_df = processed_df
        self.path = df['path'].tolist()
        self.image_size = image_size
        self.max_len_text = max_len_text
        self.tokenized_texts = tokenized_texts
        self.mode = mode
        
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        if idx not in range(0, len(self.df)):
            return self.__getitem__(np.random.randint(0, self.__len__()))
        sample = {}
        if os.path.exists(str(self.path[idx])):
            image = cv2.imread(self.path[idx]) / 255.0
            image = pad_to_square(image)
            image = cv2.resize(image, (self.image_size, self.image_size)) 
        else:
            image = np.zeros((self.image_size, self.image_size, 3), dtype=np.uint8)
        
        image = img_to_torch(image)

        row = self.processed_df.iloc[idx].values.astype('float')
        
        sequence = self.tokenized_texts[idx][:self.max_len_text]
        sequence = torch.tensor(sequence)
        label = self.df['AdoptionSpeed'].iloc[idx]
        if self.mode != 'test':
            return image, row, sequence, label
        else:
            return image, row, sequence
        
def nn_collate(batch):
    has_label = len(batch[0]) == 4
    if has_label:
#         print('padding sequences')
        images, rows, sequences, labels = zip(*batch)
        sequences = nn.utils.rnn.pad_sequence(sequences, batch_first=True).type(torch.LongTensor)
        rows = torch.LongTensor(rows)
        return images, rows, sequences, labels
        

    else:
        images, rows, sequences = zip(*batch)
        sequences = nn.utils.rnn.pad_sequence(sequences, batch_first=True).type(torch.LongTensor)
        rows = torch.LongTensor(rows)

        return images, rows, sequences

In [45]:
print(processed_df.shape, df.shape)

(14972, 366) (14972, 25)


In [46]:
# df = df.loc[processed_df.index]

In [47]:
import torch
import torch.nn as nn
import torch.nn.functional as F

In [48]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


In [49]:
import torchvision

In [50]:
model = torchvision.models.vgg19_bn(pretrained=True)
in_features = model.classifier[-1].in_features
in_features

4096

In [51]:
model = torchvision.models.resnet18(pretrained=True)
in_features = model.fc.in_features
in_features.real

512

In [52]:
def set_parameter_requires_grad(model, feature_extracting):
    if feature_extracting:
        for param in model.parameters():
            param.requires_grad = False

In [53]:
torchvision.models.densenet121(pretrained=True).classifier

Linear(in_features=1024, out_features=1000, bias=True)

In [54]:
class ImageEncoder(nn.Module):
    def __init__(self, emb_size):
        super().__init__()
        model = torchvision.models.resnet18(pretrained=True)
#         in_features = model.classifier[-1].in_features
        self.feature_extractor = nn.Sequential(*list(model.children())[:-1])

        in_features = model.fc.in_features
#         print(in_features)
        self.fc = nn.Linear(in_features, emb_size)
        
    def forward(self, image):
        with torch.no_grad():
            img_feature = self.feature_extractor(image)
        
#         print(img_feature.shape)
        img_feature = img_feature.view(img_feature.size(0), -1)
        img_feature = self.fc(img_feature)
        
        l2_norm = img_feature.norm(p=2, dim=1, keepdim=True).detach()
        img_feature = img_feature.div(l2_norm)
        return img_feature
    

In [55]:
x = torch.randn([1,3,512,512])
ImageEncoder(128)(x).shape


torch.Size([1, 128])

In [56]:
class TabularAutoencoder(nn.Module):
    def __init__(self, input_shape, latent_dim):
        super().__init__()
        encoder = nn.Sequential(nn.Linear(input_shape, 64),
                            nn.Tanh(),
                            nn.Linear(64, latent_dim),
                            nn.Tanh())
        decoder = nn.Sequential(nn.Linear(latent_dim, 64),
                               nn.Tanh(),
                               nn.Linear(64, input_shape),
                               nn.Tanh())

    def forward(self, x):
        x = encoder(x)
        x = decoder(x)
        return x
    
    def get_latent_feature(self, x):
        return encoder(x)



In [57]:
class TabularFeedforwardNN(nn.Module):
    def __init__(self, input_size, latent_dim):
        super().__init__()
        self.fc1 = nn.Linear(input_size, 256)
        self.fc2 = nn.Linear(256, latent_dim)
    
    def forward(self, x):
        x = torch.tanh(self.fc1(x))
        x = torch.tanh(self.fc2(x))
        
        return x
        

In [58]:
row = torch.randn((4, 366))
TabularFeedforwardNN(366, 128)(row).shape

torch.Size([4, 128])

In [59]:
class TextEncoder(nn.Module):
    def turn_on_embedding(self):
        self.embedding.weight.requires_grad = True

    def __init__(self, vocab_size, word_emb_size, embed_size, num_layers, hidden_size, init_embedding=None):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, word_emb_size)
        
        if init_embedding is not None:
            self.embedding.weight.data.copy_(torch.from_numpy(init_embedding))
        self.embedding.weight.requires_grad = False

        self.tanh = nn.Tanh()
        self.lstm = nn.LSTM(word_emb_size, hidden_size, num_layers)
        self.fc = nn.Linear(2*num_layers*hidden_size, embed_size)
        # 2 for hidden and cell states
        
    def forward(self, text):
        text_vec = self.embedding(text) # shape: (BATCH_SIZE, MAX_LEN, word_emb_size)
#         print(text_vec.shape)
        text_vec = self.tanh(text_vec)
        text_vec = text_vec.transpose(0, 1) # shape: (MAX_LEN, BATCH_SIZE, word_emb_size)
        output, (hidden, cell) = self.lstm(text_vec)
#         print(hidden.shape, cell.shape)
        text_vec = torch.cat((hidden, cell), 2) 
#         print(text_vec.shape)
        text_vec = text_vec.reshape(text_vec.size()[1], -1) # shape (BATCH_SIZE, 2*num_layers*hidden_size)
#         print(text_vec.shape)
        
        text_vec = self.tanh(text_vec)
#         print(text_vec.shape)

        text_vec = self.fc(text_vec)
        return text_vec

In [60]:
sequence = torch.randint(0, 255, (4, 200))
TextEncoder(vocab_size, 300, 128, 2, 256, init_embedding=embedding_matrix)(sequence).shape

torch.Size([4, 128])

In [61]:
input_shape = 366
latent_dim = 128
tabular_autoencoder = TabularAutoencoder(input_shape, latent_dim)

In [85]:
class Multimodal(nn.Module):
    def __init__(self, **kwargs):
        super().__init__()

        img_emb_size = kwargs.get('emb_size', 128)

        vocab_size = kwargs.get('vocab_size')
#         print(vocab_size)
        word_emb_size = kwargs.get('word_emb_size', 300)
        num_layers = kwargs.get('num_layers', 2)
        hidden_size = kwargs.get('hidden_size', 256)

        # Use the same emb size for all modals
        embed_size = img_emb_size
        
        n_classes = kwargs.get('n_classes', 5)
        
        self.img_encoder = ImageEncoder(img_emb_size)
        self.text_encoder = TextEncoder(vocab_size, word_emb_size, embed_size, num_layers, hidden_size, 
                                       init_embedding=embedding_matrix)
#         self.tabular_encoder = tabular_autoencoder
        self.tabular_encoder = TabularFeedforwardNN(input_size=366, latent_dim=embed_size)
        self.tanh = nn.Tanh()
        self.dropout = nn.Dropout(0.1)
        self.fc1 = nn.Linear(embed_size, 64)
        self.fc2 = nn.Linear(64, n_classes)
        
    def forward(self, img, row, text):
        img_feature = self.img_encoder(img)
        text_feature = self.text_encoder(text)
        tabular_latent_feature = self.tabular_encoder(row)
#         print(img_feature.shape)
#         print(tabular_latent_feature.shape)
#         print(text_feature.shape)
        combined_feature = img_feature * text_feature * tabular_latent_feature
        combined_feature = self.tanh(combined_feature)
        combined_feature = self.fc1(combined_feature)
        combined_feature = self.dropout(combined_feature)
        combined_feature = self.fc2(combined_feature)
#         output = F.softmax(conbined_feature, dim=1)
        return combined_feature

In [86]:
from sklearn.model_selection import StratifiedKFold
n_splits = 5
# kfold = GroupKFold(n_splits=n_splits)
split_index = []
# for train_idx, valid_idx in kfold.split(train, train['AdoptionSpeed'], train['RescuerID']):
#     split_index.append((train_idx, valid_idx))

kfold = StratifiedKFold(n_splits=n_splits, random_state=42, shuffle=True)
for train_idx, valid_idx in kfold.split(df, df['AdoptionSpeed']):
    split_index.append((train_idx, valid_idx))
    
    

In [87]:
learning_rate = 3e-4


In [88]:
import time

In [89]:
n_epochs = 10

In [90]:
batch_size = 64

In [91]:
torch.cuda.empty_cache()

In [94]:
for fold, (train_idx, val_idx) in enumerate(split_index):
    print(f'Fold #{fold}')
    y_train = df['AdoptionSpeed'].iloc[train_idx].values
    y_val = df['AdoptionSpeed'].iloc[val_idx].values
    
    hist = histogram(y_train.astype(int), int(np.min(df['AdoptionSpeed'])), int(np.max(df['AdoptionSpeed'])))
    
    train_cdf = get_cdf(hist)
    
    training_set = PetDataset(df.iloc[train_idx], processed_df.iloc[train_idx])
    validation_set = PetDataset(df.iloc[val_idx], processed_df.iloc[val_idx])

    training_loader = DataLoader(training_set, batch_size=batch_size, shuffle=True, pin_memory=True, collate_fn=nn_collate)
    validating_loader = DataLoader(validation_set, batch_size=batch_size, shuffle=True, pin_memory=True, collate_fn=nn_collate)

    model = Multimodal(vocab_size=vocab_size).cuda()

    optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=learning_rate)
#     scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=7, eta_min=0.0003)

    sparse_cse_loss = nn.CrossEntropyLoss(reduction='mean')
    mse_loss = nn.MSELoss()

    iteration = 0
    min_val_loss = 100
    since = time.time()
    for epoch in range(n_epochs):
        print(f'Epoch #{epoch}')
        model.train()
        for step, (image, row, sequence, y) in enumerate(training_loader):
            iteration += 1
            image = torch.stack(image).cuda()
            row = row.type(torch.FloatTensor).cuda()
#             print(row.shape)
            sequence = sequence.cuda()
            pred = model(image, row, sequence)            
            loss = sparse_cse_loss(pred.type(torch.FloatTensor), torch.LongTensor(y))
            optimizer.zero_grad()
            if step%50 ==0:
                print('step', step, 'loss = ', loss)
            loss.backward()
            optimizer.step()
#             scheduler.step()
        
        model.eval()
        val_predicts = []
        with torch.no_grad():
            for image, row, sequence, y in validating_loader:
                image = torch.stack(image).cuda()
                row = row.type(torch.FloatTensor).cuda()
                sequence = sequence.cuda()
                y = torch.FloatTensor(y).cuda()#.view(-1, 1)
                
                val_predicts.append(model(image, row, sequence).cpu().numpy())
        val_predicts_ = np.concatenate(val_predicts)
        val_predicts_ = val_predicts_.argmax(axis=1)

        pred_test_y_k = getTestScore2(val_predicts_, train_cdf)
        qwk = quadratic_weighted_kappa(y_val, pred_test_y_k)
        val_loss = rmse(y_val, val_predicts_)

        print('val_loss', val_loss)
        print('qwk', qwk)

Fold #0
Epoch #0
step 0 loss =  tensor(1.6222, grad_fn=<NllLossBackward>)
step 50 loss =  tensor(1.5105, grad_fn=<NllLossBackward>)
step 100 loss =  tensor(1.5377, grad_fn=<NllLossBackward>)
step 150 loss =  tensor(1.4314, grad_fn=<NllLossBackward>)
val_loss 1.546712815691299
qwk 0.01893909416664774
Epoch #1
step 0 loss =  tensor(1.3347, grad_fn=<NllLossBackward>)
step 50 loss =  tensor(1.3830, grad_fn=<NllLossBackward>)
step 100 loss =  tensor(1.3997, grad_fn=<NllLossBackward>)
step 150 loss =  tensor(1.4030, grad_fn=<NllLossBackward>)
val_loss 1.5736774663019668
qwk -0.0013462267170447628
Epoch #2
step 0 loss =  tensor(1.2859, grad_fn=<NllLossBackward>)
step 50 loss =  tensor(1.3249, grad_fn=<NllLossBackward>)
step 100 loss =  tensor(1.3991, grad_fn=<NllLossBackward>)
step 150 loss =  tensor(1.2841, grad_fn=<NllLossBackward>)
val_loss 1.7283842706349468
qwk 0.015558207352699083
Epoch #3
step 0 loss =  tensor(1.4247, grad_fn=<NllLossBackward>)
step 50 loss =  tensor(1.3039, grad_fn=<N

In [95]:
torch.save(model, 'model.pth')