In [77]:
from google.colab import drive

drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [78]:
cd /content/gdrive/My Drive/Colab Notebooks/data

/content/gdrive/My Drive/Colab Notebooks/data


In [79]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 5GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [80]:
import pandas as pd
import numpy as np
import torch
from tqdm import tqdm
from torch.utils.data import Dataset, DataLoader


# Read the train/test file from kaggle
train = pd.read_csv('/content/gdrive/My Drive/Colab Notebooks/data/train_covid.csv')
test = pd.read_csv('/content/gdrive/My Drive/Colab Notebooks/data/test_covid.csv')

In [81]:
# Fill missing value by average
train["new_cases"]=train["new_cases"].fillna(train["new_cases"].mean())
train["new_deaths"]=train["new_deaths"].fillna(train["new_deaths"].mean())
train["stringency_index"]=train["stringency_index"].fillna(train["stringency_index"].mean())
train["population"]=train["population"].fillna(train["population"].mean())
train["population_density"]=train["population_density"].fillna(train["population_density"].mean())
train["median_age"]=train["median_age"].fillna(train["median_age"].mean())
train["aged_65_older"]=train["aged_65_older"].fillna(train["aged_65_older"].mean())
train["aged_70_older"]=train["aged_70_older"].fillna(train["aged_70_older"].mean())
train["female_smokers"]=train["female_smokers"].fillna(train["female_smokers"].mean())
train["male_smokers"]=train["male_smokers"].fillna(train["male_smokers"].mean())
train["gdp_per_capita"]=train["gdp_per_capita"].fillna(train["gdp_per_capita"].mean())
train["handwashing_facilities"]=train["handwashing_facilities"].fillna(train["handwashing_facilities"].mean())
train["hospital_beds_per_thousand"]=train["hospital_beds_per_thousand"].fillna(train["hospital_beds_per_thousand"].mean())
train = train.fillna(train.mean())
test = test.fillna(train.mean())

In [82]:
## 2layer FC layer NN
class myModel(torch.nn.Module):
    def __init__(self):
        super(myModel, self).__init__()
        self.model = torch.nn.Sequential(
                        torch.nn.Linear(30, 10),
                        torch.nn.GELU(),
                        torch.nn.Linear(10, 2),
                        torch.nn.ReLU()
        )
        # [c*7, d*7] --> [c, d]
        
    def forward(self, x):
        c_and_d = self.model(x)
        c = c_and_d[:, 0] # new_case
        d = c_and_d[:, 1] # new_death
        
        #print("c, d: ", c, d)
        return c, d

class myDataset(Dataset):
    def __init__(self, inputs, outputs):
        self.inputs = inputs
        self.outputs = outputs
        
    def __len__(self):
        return len(self.inputs)

    def __getitem__(self, idx):
        return {"inputs": self.inputs[idx], "outputs": self.outputs[idx]}
    
def my_collate(samples):
    inputs = []
    outputs = []
    for sample in samples:
        inputs.append(sample["inputs"])
        outputs.append(sample["outputs"])
        
    return {"inputs": torch.tensor(inputs), "outputs": torch.tensor(outputs)}

In [83]:
# initialize the dataset 
train_case = [0.] * 10
train_death = [0.] * 10
train_age = [0.] * 10
train_input = train_case + train_death + train_age
train_inputs = []
train_outputs = []
locations = []


test_case = [0.] * 10
test_death = [0.] * 10
test_age = [0.] * 10
test_input = test_case + test_death + test_age
test_inputs = []

In [84]:
# Make dataset for training 
# [c*7, d*7] --> [c, d]
# (ex: given: 4/1~4/7 case & death, predict: 4/8 case & death)
for key in tqdm(sorted(set(train["location"]))):
    locations.append(key)
    temp_df = train[train["location"] == key]
    train_case = [0.] * 10
    train_death = [0.] * 10 
    train_age = [0.] * 10

    train_input = train_case + train_death + train_age
    train_inputs.append(train_input)
    
    append_flag = 1
    for index, row in temp_df.iterrows():
        if(append_flag):
            train_outputs.append([float(row["new_cases"]), float(row["new_deaths"])])
            append_flag = 0
            
        train_case = train_case[1:] + [float(row["new_cases"])]
        train_death = train_death[1:] + [float(row["new_deaths"])]
        train_age = train_age[1:] + [float(row["median_age"])]
        train_input = train_case + train_death + train_age
        #train_inputs.append(train_input)
        
        # To handle too many multiple values & the last value
        # (ex: [0, 0, .... , 0, 0] --> [0, 0])
        if train_input in train_inputs or index == temp_df["id"].values[-1]: continue
        else: train_inputs.append(train_input); append_flag = 1

100%|██████████| 208/208 [00:06<00:00, 30.29it/s]


In [85]:
# Evaluation dataset
test_input_dict = {}
for key in tqdm(sorted(set(train["location"]))):
    
    # To predict 5/1 
    temp_df = train[train["location"] == key]
    temp_df = temp_df[(temp_df['date'] >= '2020-04-21') & (temp_df['date'] <= '2020-04-30')]
    
    
    test_case = list(map(lambda x: float(x), temp_df["new_cases"].values))
    test_death = list(map(lambda x: float(x), temp_df["new_deaths"].values))
    test_age = list(map(lambda x: float(x), temp_df["median_age"].values))
    
    if(len(test_case) < 10): test_case = [0.] * (10 - len(test_case)) + test_case
    if(len(test_death) < 10): test_death = [0.] * (10 - len(test_death)) + test_death
    if(len(test_age) < 10): test_age = [0.] * (10 - len(test_age)) + test_age    
   
    test_input = test_case + test_death + test_age
    
    test_input_dict[key] = test_input

100%|██████████| 208/208 [00:00<00:00, 396.34it/s]


In [86]:
batch_size = 100

## Dataset Load
dataset = myDataset(train_inputs, train_outputs)
params = {'batch_size': batch_size,
          'shuffle': True,
          'num_workers':0,
          'drop_last': True,
          'collate_fn': my_collate
         }
train_generator = DataLoader(dataset, **params)

## Model load
model = myModel()

# Loss/Optimizer define
optimizer = torch.optim.AdamW(model.parameters(), amsgrad = True)

class RMSLELoss(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.mse = torch.nn.MSELoss()
        
    def forward(self, pred, actual):
        return torch.sqrt(self.mse(torch.log(pred + 1), torch.log(actual + 1)))
    
# Loss
criterion = RMSLELoss()

In [87]:
# Training
model.train()
for epoch in tqdm(range(100)):
    for k in train_generator:
        pred_c, pred_d = model(k["inputs"])
        
        loss_c = criterion(pred_c, k["outputs"][:, 0])
        loss_d = criterion(pred_d, k["outputs"][:, 1])
        total_loss = (loss_c)*0.9  + (loss_d) * 0.1
        
        
        total_loss.backward()
        torch.nn.utils.clip_grad_value_(model.parameters(), 1)
        optimizer.step()
        
    print("epoch: ", total_loss)

  1%|          | 1/100 [00:00<00:20,  4.85it/s]

epoch:  tensor(1.1868, grad_fn=<AddBackward0>)


  2%|▏         | 2/100 [00:00<00:20,  4.77it/s]

epoch:  tensor(0.8536, grad_fn=<AddBackward0>)


  3%|▎         | 3/100 [00:00<00:20,  4.71it/s]

epoch:  tensor(1.0809, grad_fn=<AddBackward0>)


  4%|▍         | 4/100 [00:00<00:21,  4.51it/s]

epoch:  tensor(0.8265, grad_fn=<AddBackward0>)


  5%|▌         | 5/100 [00:01<00:21,  4.48it/s]

epoch:  tensor(1.0126, grad_fn=<AddBackward0>)


  6%|▌         | 6/100 [00:01<00:21,  4.44it/s]

epoch:  tensor(0.8420, grad_fn=<AddBackward0>)


  7%|▋         | 7/100 [00:01<00:20,  4.43it/s]

epoch:  tensor(1.0558, grad_fn=<AddBackward0>)


  8%|▊         | 8/100 [00:01<00:20,  4.42it/s]

epoch:  tensor(0.9397, grad_fn=<AddBackward0>)


  9%|▉         | 9/100 [00:02<00:20,  4.42it/s]

epoch:  tensor(0.7400, grad_fn=<AddBackward0>)


 10%|█         | 10/100 [00:02<00:20,  4.42it/s]

epoch:  tensor(0.7696, grad_fn=<AddBackward0>)


 11%|█         | 11/100 [00:02<00:20,  4.36it/s]

epoch:  tensor(0.9140, grad_fn=<AddBackward0>)


 12%|█▏        | 12/100 [00:02<00:20,  4.37it/s]

epoch:  tensor(0.6833, grad_fn=<AddBackward0>)


 13%|█▎        | 13/100 [00:02<00:19,  4.37it/s]

epoch:  tensor(0.7965, grad_fn=<AddBackward0>)


 14%|█▍        | 14/100 [00:03<00:19,  4.39it/s]

epoch:  tensor(0.9066, grad_fn=<AddBackward0>)


 15%|█▌        | 15/100 [00:03<00:19,  4.38it/s]

epoch:  tensor(0.8777, grad_fn=<AddBackward0>)


 16%|█▌        | 16/100 [00:03<00:19,  4.30it/s]

epoch:  tensor(0.7627, grad_fn=<AddBackward0>)


 17%|█▋        | 17/100 [00:03<00:19,  4.33it/s]

epoch:  tensor(1.2917, grad_fn=<AddBackward0>)


 18%|█▊        | 18/100 [00:04<00:18,  4.33it/s]

epoch:  tensor(0.7069, grad_fn=<AddBackward0>)


 19%|█▉        | 19/100 [00:04<00:18,  4.33it/s]

epoch:  tensor(0.6671, grad_fn=<AddBackward0>)


 20%|██        | 20/100 [00:04<00:18,  4.34it/s]

epoch:  tensor(0.8671, grad_fn=<AddBackward0>)


 21%|██        | 21/100 [00:04<00:18,  4.36it/s]

epoch:  tensor(1.0810, grad_fn=<AddBackward0>)


 22%|██▏       | 22/100 [00:05<00:17,  4.35it/s]

epoch:  tensor(0.9234, grad_fn=<AddBackward0>)


 23%|██▎       | 23/100 [00:05<00:17,  4.37it/s]

epoch:  tensor(0.7950, grad_fn=<AddBackward0>)


 24%|██▍       | 24/100 [00:05<00:17,  4.41it/s]

epoch:  tensor(0.9268, grad_fn=<AddBackward0>)


 25%|██▌       | 25/100 [00:05<00:17,  4.30it/s]

epoch:  tensor(0.8118, grad_fn=<AddBackward0>)


 26%|██▌       | 26/100 [00:05<00:17,  4.32it/s]

epoch:  tensor(1.0262, grad_fn=<AddBackward0>)


 27%|██▋       | 27/100 [00:06<00:16,  4.35it/s]

epoch:  tensor(0.8368, grad_fn=<AddBackward0>)


 28%|██▊       | 28/100 [00:06<00:16,  4.35it/s]

epoch:  tensor(0.8810, grad_fn=<AddBackward0>)


 29%|██▉       | 29/100 [00:06<00:16,  4.37it/s]

epoch:  tensor(0.7619, grad_fn=<AddBackward0>)


 30%|███       | 30/100 [00:06<00:15,  4.40it/s]

epoch:  tensor(0.8325, grad_fn=<AddBackward0>)


 31%|███       | 31/100 [00:07<00:15,  4.38it/s]

epoch:  tensor(0.8655, grad_fn=<AddBackward0>)


 32%|███▏      | 32/100 [00:07<00:15,  4.38it/s]

epoch:  tensor(0.8305, grad_fn=<AddBackward0>)


 33%|███▎      | 33/100 [00:07<00:15,  4.30it/s]

epoch:  tensor(0.7764, grad_fn=<AddBackward0>)


 34%|███▍      | 34/100 [00:07<00:15,  4.34it/s]

epoch:  tensor(0.8372, grad_fn=<AddBackward0>)


 35%|███▌      | 35/100 [00:08<00:15,  4.33it/s]

epoch:  tensor(0.8639, grad_fn=<AddBackward0>)


 36%|███▌      | 36/100 [00:08<00:14,  4.33it/s]

epoch:  tensor(0.8851, grad_fn=<AddBackward0>)


 37%|███▋      | 37/100 [00:08<00:14,  4.31it/s]

epoch:  tensor(0.8984, grad_fn=<AddBackward0>)


 38%|███▊      | 38/100 [00:08<00:14,  4.31it/s]

epoch:  tensor(0.9403, grad_fn=<AddBackward0>)


 39%|███▉      | 39/100 [00:08<00:14,  4.27it/s]

epoch:  tensor(0.8124, grad_fn=<AddBackward0>)


 40%|████      | 40/100 [00:09<00:13,  4.30it/s]

epoch:  tensor(0.8917, grad_fn=<AddBackward0>)


 41%|████      | 41/100 [00:09<00:13,  4.32it/s]

epoch:  tensor(0.7965, grad_fn=<AddBackward0>)


 42%|████▏     | 42/100 [00:09<00:13,  4.35it/s]

epoch:  tensor(0.8410, grad_fn=<AddBackward0>)


 43%|████▎     | 43/100 [00:09<00:13,  4.36it/s]

epoch:  tensor(0.6730, grad_fn=<AddBackward0>)


 44%|████▍     | 44/100 [00:10<00:13,  4.29it/s]

epoch:  tensor(0.9808, grad_fn=<AddBackward0>)


 45%|████▌     | 45/100 [00:10<00:12,  4.28it/s]

epoch:  tensor(0.8108, grad_fn=<AddBackward0>)


 46%|████▌     | 46/100 [00:10<00:12,  4.29it/s]

epoch:  tensor(0.9454, grad_fn=<AddBackward0>)


 47%|████▋     | 47/100 [00:10<00:12,  4.34it/s]

epoch:  tensor(0.8705, grad_fn=<AddBackward0>)


 48%|████▊     | 48/100 [00:11<00:12,  4.31it/s]

epoch:  tensor(1.0075, grad_fn=<AddBackward0>)


 49%|████▉     | 49/100 [00:11<00:11,  4.32it/s]

epoch:  tensor(0.8099, grad_fn=<AddBackward0>)


 50%|█████     | 50/100 [00:11<00:11,  4.34it/s]

epoch:  tensor(0.8419, grad_fn=<AddBackward0>)


 51%|█████     | 51/100 [00:11<00:11,  4.29it/s]

epoch:  tensor(0.9967, grad_fn=<AddBackward0>)


 52%|█████▏    | 52/100 [00:11<00:11,  4.13it/s]

epoch:  tensor(0.8364, grad_fn=<AddBackward0>)


 53%|█████▎    | 53/100 [00:12<00:11,  4.13it/s]

epoch:  tensor(0.7253, grad_fn=<AddBackward0>)


 54%|█████▍    | 54/100 [00:12<00:11,  4.18it/s]

epoch:  tensor(0.7119, grad_fn=<AddBackward0>)


 55%|█████▌    | 55/100 [00:12<00:10,  4.23it/s]

epoch:  tensor(1.0463, grad_fn=<AddBackward0>)


 56%|█████▌    | 56/100 [00:12<00:10,  4.27it/s]

epoch:  tensor(0.8272, grad_fn=<AddBackward0>)


 57%|█████▋    | 57/100 [00:13<00:10,  4.29it/s]

epoch:  tensor(0.7812, grad_fn=<AddBackward0>)


 58%|█████▊    | 58/100 [00:13<00:09,  4.28it/s]

epoch:  tensor(0.9192, grad_fn=<AddBackward0>)


 59%|█████▉    | 59/100 [00:13<00:09,  4.29it/s]

epoch:  tensor(0.9833, grad_fn=<AddBackward0>)


 60%|██████    | 60/100 [00:13<00:09,  4.24it/s]

epoch:  tensor(0.7599, grad_fn=<AddBackward0>)


 61%|██████    | 61/100 [00:14<00:09,  4.26it/s]

epoch:  tensor(0.7227, grad_fn=<AddBackward0>)


 62%|██████▏   | 62/100 [00:14<00:08,  4.26it/s]

epoch:  tensor(0.9963, grad_fn=<AddBackward0>)


 63%|██████▎   | 63/100 [00:14<00:08,  4.31it/s]

epoch:  tensor(0.9210, grad_fn=<AddBackward0>)


 64%|██████▍   | 64/100 [00:14<00:08,  4.34it/s]

epoch:  tensor(0.8863, grad_fn=<AddBackward0>)


 65%|██████▌   | 65/100 [00:15<00:08,  4.31it/s]

epoch:  tensor(0.7760, grad_fn=<AddBackward0>)


 66%|██████▌   | 66/100 [00:15<00:07,  4.31it/s]

epoch:  tensor(0.7959, grad_fn=<AddBackward0>)


 67%|██████▋   | 67/100 [00:15<00:07,  4.32it/s]

epoch:  tensor(0.8879, grad_fn=<AddBackward0>)


 68%|██████▊   | 68/100 [00:15<00:07,  4.30it/s]

epoch:  tensor(0.7545, grad_fn=<AddBackward0>)


 69%|██████▉   | 69/100 [00:15<00:07,  4.31it/s]

epoch:  tensor(0.7140, grad_fn=<AddBackward0>)


 70%|███████   | 70/100 [00:16<00:07,  4.20it/s]

epoch:  tensor(0.9167, grad_fn=<AddBackward0>)


 71%|███████   | 71/100 [00:16<00:06,  4.23it/s]

epoch:  tensor(0.6610, grad_fn=<AddBackward0>)


 72%|███████▏  | 72/100 [00:16<00:06,  4.28it/s]

epoch:  tensor(0.8098, grad_fn=<AddBackward0>)


 73%|███████▎  | 73/100 [00:16<00:06,  4.29it/s]

epoch:  tensor(0.7358, grad_fn=<AddBackward0>)


 74%|███████▍  | 74/100 [00:17<00:06,  4.25it/s]

epoch:  tensor(0.8537, grad_fn=<AddBackward0>)


 75%|███████▌  | 75/100 [00:17<00:05,  4.27it/s]

epoch:  tensor(0.8125, grad_fn=<AddBackward0>)


 76%|███████▌  | 76/100 [00:17<00:05,  4.30it/s]

epoch:  tensor(0.8673, grad_fn=<AddBackward0>)


 77%|███████▋  | 77/100 [00:17<00:05,  4.30it/s]

epoch:  tensor(0.8616, grad_fn=<AddBackward0>)


 78%|███████▊  | 78/100 [00:18<00:05,  4.28it/s]

epoch:  tensor(0.8963, grad_fn=<AddBackward0>)


 79%|███████▉  | 79/100 [00:18<00:05,  4.19it/s]

epoch:  tensor(0.8226, grad_fn=<AddBackward0>)


 80%|████████  | 80/100 [00:18<00:04,  4.21it/s]

epoch:  tensor(1.0459, grad_fn=<AddBackward0>)


 81%|████████  | 81/100 [00:18<00:04,  4.22it/s]

epoch:  tensor(0.7694, grad_fn=<AddBackward0>)


 82%|████████▏ | 82/100 [00:19<00:04,  4.25it/s]

epoch:  tensor(0.8497, grad_fn=<AddBackward0>)


 83%|████████▎ | 83/100 [00:19<00:04,  4.20it/s]

epoch:  tensor(0.7446, grad_fn=<AddBackward0>)


 84%|████████▍ | 84/100 [00:19<00:03,  4.25it/s]

epoch:  tensor(0.8681, grad_fn=<AddBackward0>)


 85%|████████▌ | 85/100 [00:19<00:03,  4.24it/s]

epoch:  tensor(0.7995, grad_fn=<AddBackward0>)


 86%|████████▌ | 86/100 [00:19<00:03,  4.28it/s]

epoch:  tensor(0.8008, grad_fn=<AddBackward0>)


 87%|████████▋ | 87/100 [00:20<00:03,  4.21it/s]

epoch:  tensor(0.9702, grad_fn=<AddBackward0>)


 88%|████████▊ | 88/100 [00:20<00:02,  4.23it/s]

epoch:  tensor(0.8372, grad_fn=<AddBackward0>)


 89%|████████▉ | 89/100 [00:20<00:02,  4.24it/s]

epoch:  tensor(0.7185, grad_fn=<AddBackward0>)


 90%|█████████ | 90/100 [00:20<00:02,  4.23it/s]

epoch:  tensor(0.8290, grad_fn=<AddBackward0>)


 91%|█████████ | 91/100 [00:21<00:02,  4.24it/s]

epoch:  tensor(0.7407, grad_fn=<AddBackward0>)


 92%|█████████▏| 92/100 [00:21<00:01,  4.24it/s]

epoch:  tensor(1.1111, grad_fn=<AddBackward0>)


 93%|█████████▎| 93/100 [00:21<00:01,  4.21it/s]

epoch:  tensor(0.7615, grad_fn=<AddBackward0>)


 94%|█████████▍| 94/100 [00:21<00:01,  4.24it/s]

epoch:  tensor(0.9687, grad_fn=<AddBackward0>)


 95%|█████████▌| 95/100 [00:22<00:01,  4.29it/s]

epoch:  tensor(0.7584, grad_fn=<AddBackward0>)


 96%|█████████▌| 96/100 [00:22<00:00,  4.25it/s]

epoch:  tensor(0.7910, grad_fn=<AddBackward0>)


 97%|█████████▋| 97/100 [00:22<00:00,  4.25it/s]

epoch:  tensor(1.1910, grad_fn=<AddBackward0>)


 98%|█████████▊| 98/100 [00:22<00:00,  4.28it/s]

epoch:  tensor(0.7371, grad_fn=<AddBackward0>)


 99%|█████████▉| 99/100 [00:23<00:00,  4.29it/s]

epoch:  tensor(0.7631, grad_fn=<AddBackward0>)


100%|██████████| 100/100 [00:23<00:00,  4.30it/s]

epoch:  tensor(0.9016, grad_fn=<AddBackward0>)





In [88]:
## Evaluation
eval_location = []

# save all predictions
predict_case = []
predict_death = []

model.eval()
for idx, row in test.iterrows():
    if(row["location"] not in eval_location):
        #print(row["location"])
        eval_location.append(row["location"])
        eval_input = test_input_dict[row["location"]]
        
    c, d = model(torch.tensor(eval_input).unsqueeze(0))
    predict_case.append(c)
    predict_death.append(d)
    
    # update the last new_case/new_death value
    eval_input[9] = c
    eval_input[19] = d

In [89]:
# Make submission file
predict_all = torch.stack([torch.tensor(predict_case), torch.tensor(predict_death)], dim = 1)
ppredict = predict_all.numpy()
ppredict = pd.DataFrame(ppredict, columns = ["new_cases", "new_deaths"])
ppredict["id"] = ppredict.index

ppredict = ppredict[["id", "new_cases", "new_deaths"]]

In [90]:
# Save submission file
ppredict.to_csv("Submit.csv", index = False)