In [1]:
import os, sys, shutil, glob
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data as data_utils
import matplotlib.pyplot as plt

In [2]:
# import modules
import sklearn
from sklearn.metrics import make_scorer, mean_squared_log_error
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split as tts
from sklearn import ensemble
from sklearn.preprocessing import OneHotEncoder as OHE
from sklearn.metrics import mean_squared_error as mse
from sklearn.preprocessing import MinMaxScaler, StandardScaler

import scipy as sp
import math

In [3]:
import logging

logger = logging.getLogger(__name__)
logging.basicConfig(format='%(asctime)s : %(message)s', level=logging.INFO)

## DATA Load

In [4]:
base_dir = os.getcwd()
train_dir = os.path.join(base_dir, 'train/train.csv')

In [5]:
base_df = pd.read_csv(train_dir)
base_df.head()

Unnamed: 0,Day,Hour,Minute,DHI,DNI,WS,RH,T,TARGET
0,0,0,0,0,0,1.5,69.08,-12,0.0
1,0,0,30,0,0,1.5,69.06,-12,0.0
2,0,1,0,0,0,1.6,71.78,-12,0.0
3,0,1,30,0,0,1.6,71.75,-12,0.0
4,0,2,0,0,0,1.6,75.2,-12,0.0


## PREPROCESS

In [7]:
base_df['Time'] = base_df['Hour']*60 + base_df['Minute']

In [22]:
# Shift
shift_df = base_df.copy()

shift_df['TARGET1'] = shift_df['TARGET'].shift(-48).fillna(method='ffill')
shift_df['TARGET2'] = shift_df['TARGET'].shift(-96).fillna(method='ffill')

shift_df_result = shift_df.iloc[:-96]

In [23]:
shift_df_result.head(3)

Unnamed: 0,Day,Hour,Minute,DHI,DNI,WS,RH,T,TARGET,Time,TARGET1,TARGET2
0,0,0,0,0,0,1.5,69.08,-12,0.0,0,0.0,0.0
1,0,0,30,0,0,1.5,69.06,-12,0.0,30,0.0,0.0
2,0,1,0,0,0,1.6,71.78,-12,0.0,60,0.0,0.0


In [24]:
# Td, T-Td
b = 17.62
c = 243.12
term1 = b*shift_df_result['T']/(c + shift_df_result['T'])
term2 = np.log(shift_df_result['RH']/100)
gamma = term1 + term2
dp = (c*gamma)/(b-gamma)

shift_df_result['DP'] = dp

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  shift_df_result['DP'] = dp


In [25]:
shift_df_result.head(3)

Unnamed: 0,Day,Hour,Minute,DHI,DNI,WS,RH,T,TARGET,Time,TARGET1,TARGET2,DP
0,0,0,0,0,0,1.5,69.08,-12,0.0,0,0.0,0.0,-16.522271
1,0,0,30,0,0,1.5,69.06,-12,0.0,30,0.0,0.0,-16.525742
2,0,1,0,0,0,1.6,71.78,-12,0.0,60,0.0,0.0,-16.061776


## Sampling

In [7]:
#shift_df_result = shift_df_result.loc[(shift_df_result.Hour >= 3) & (shift_df_result.Hour <= 22), :]

In [26]:
# Time
# (1) Get rid of time variables
df_vars = shift_df_result[['Time', 'DHI', 'DNI', 'WS', 'RH', 'T', 'DP', 'TARGET']]
df_label = shift_df_result[['TARGET1', 'TARGET2']]

In [27]:
def make_torch_dataset(scale=True, test_size = 0.2, *datasets):
    
    # To torch
    if torch.cuda.is_available():
        device = torch.device('cuda:0')
    else:
        device = torch.device('cpu')
    
    train_x, val_x, train_y, val_y = tts(df_vars, df_label, test_size = test_size, random_state = 2020)
    if scale:
        scaled = StandardScaler()
        scaled.fit(train_x)
        
        train_x = scaled.transform(train_x)
        val_x = scaled.transform(val_x)
        
        train_x_torch = torch.tensor(train_x).float().to(device)
        train_y_torch = torch.tensor(train_y.values).float().to(device)

        val_x_torch = torch.tensor(val_x).float().to(device)
        val_y_torch = torch.tensor(val_y.values).float().to(device)
        
    else:
        train_x_torch = torch.tensor(train_x.values).float().to(device)
        train_y_torch = torch.tensor(train_y.values).float().to(device)

        val_x_torch = torch.tensor(val_x.values).float().to(device)
        val_y_torch = torch.tensor(val_y.values).float().to(device)
    
    
    return train_x_torch, val_x_torch, train_y_torch, val_y_torch, scaled

In [28]:
train_x_torch, val_x_torch, train_y_torch, val_y_torch, scaled = make_torch_dataset(True, 0.25, df_vars, df_label)

In [29]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [36]:
#### batch iterator
batch_size = 128

trainset = data_utils.TensorDataset(train_x_torch, train_y_torch)
train_loader = data_utils.DataLoader(trainset, batch_size = batch_size, shuffle = True)

valset = data_utils.TensorDataset(val_x_torch, val_y_torch)
val_loader = data_utils.DataLoader(valset, batch_size = len(valset), shuffle = True)

In [31]:
use_cuda = torch.cuda.is_available()

class DaconNN(nn.Module):
    
    def __init__(self, x_features, dropout_prob):
        super(DaconNN, self).__init__()
        
        self.dropout_prob = dropout_prob
        self.block1_in = 512
        self.block1_hid = 1024
        
        # block1
        self.block1 = nn.Sequential(
            nn.Linear(x_features, self.block1_in),
            nn.BatchNorm1d(self.block1_in),
            nn.ReLU(),
            nn.Dropout(p = self.dropout_prob),
            
            nn.Linear(self.block1_in, self.block1_hid),
            nn.BatchNorm1d(self.block1_hid),
            nn.ReLU(),
            nn.Dropout(p = self.dropout_prob),
            
            nn.Linear(self.block1_hid, self.block1_in),
            nn.BatchNorm1d(self.block1_in),
            nn.ReLU(),
            nn.Dropout(p = self.dropout_prob),
        )
        
        self.block1_rep = nn.Sequential(
            nn.Linear(self.block1_in, self.block1_in),
            nn.BatchNorm1d(self.block1_in),
            nn.ReLU(),
            nn.Dropout(p = self.dropout_prob),
            
            nn.Linear(self.block1_in, self.block1_hid),
            nn.BatchNorm1d(self.block1_hid),
            nn.ReLU(),
            nn.Dropout(p = self.dropout_prob),
            
            nn.Linear(self.block1_hid, self.block1_in),
            nn.BatchNorm1d(self.block1_in),
            nn.ReLU(),
            nn.Dropout(p = self.dropout_prob),
        )
        
        self.block1_shortcut = nn.Sequential(
            nn.Linear(x_features, self.block1_in),
            nn.BatchNorm1d(self.block1_in),
            nn.ReLU(),
            nn.Dropout(p = self.dropout_prob),
        )
        
        # block2
        self.block2 = nn.Sequential(
            nn.Linear(self.block1_in, 128),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Dropout(p = self.dropout_prob),
            
            nn.Linear(128, 64),
            nn.BatchNorm1d(64),
            nn.ReLU(),
            nn.Dropout(p = self.dropout_prob),
            
            nn.Linear(64, 9),
        )
        
    def forward(self, x):
        
        res = self.block1_shortcut(x)
        
        # forward block1
        out = self.block1(x)
        #out = out + res

        # forward block2
        out = self.block2(out)
        out = torch.max(torch.zeros_like(out), out)

        return out

In [17]:
use_cuda = torch.cuda.is_available()

class DaconNN_2(nn.Module):
 
    def __init__(self, x_features, dropout_prob):
        super(DaconNN_2, self).__init__()
        
        self.dropout_prob = dropout_prob
        self.block1_in = 512
        self.block1_hid = 1024
        
        # block1
        self.block1 = nn.Sequential(
            nn.Linear(x_features, self.block1_in),
            nn.BatchNorm1d(self.block1_in),
            nn.ReLU(),
            nn.Dropout(p = self.dropout_prob),
            
            nn.Linear(self.block1_in, self.block1_hid),
            nn.BatchNorm1d(self.block1_hid),
            nn.ReLU(),
            nn.Dropout(p = self.dropout_prob),
            
            nn.Linear(self.block1_hid, self.block1_in),
            nn.BatchNorm1d(self.block1_in),
            nn.ReLU(),
            nn.Dropout(p = self.dropout_prob),
        )
        
        self.block1_rep = nn.Sequential(
            nn.Linear(self.block1_in, self.block1_in),
            nn.BatchNorm1d(self.block1_in),
            nn.ReLU(),
            nn.Dropout(p = self.dropout_prob),
            
            nn.Linear(self.block1_in, self.block1_hid),
            nn.BatchNorm1d(self.block1_hid),
            nn.ReLU(),
            nn.Dropout(p = self.dropout_prob),
            
            nn.Linear(self.block1_hid, self.block1_in),
            nn.BatchNorm1d(self.block1_in),
            nn.ReLU(),
            nn.Dropout(p = self.dropout_prob),
        )
        
        self.block1_shortcut = nn.Sequential(
            nn.Linear(x_features, self.block1_in),
            nn.BatchNorm1d(self.block1_in),
            nn.ReLU(),
            nn.Dropout(p = self.dropout_prob),
        )
        
        # block2
        self.block2 = nn.Sequential(
            nn.Linear(self.block1_in, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Dropout(p = self.dropout_prob),
            
            nn.Linear(256, 64),
            nn.BatchNorm1d(64),
            nn.ReLU(),
            nn.Dropout(p = self.dropout_prob),
            
            nn.Linear(64, 9),
        )
        
    def forward(self, x):
        
        res = self.block1_shortcut(x)
        
        # forward block1
        out = self.block1(x)
        #out = out + res

        # forward block2
        out = self.block2(out)
        out = torch.max(torch.zeros_like(out), out)
        return out

In [183]:
# # using gpu (if available)
# model = DaconNN(x_features = train_x_torch.shape[1], dropout_prob = 0.1)
# device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# print(device) # cuda:0

# model.to(device)

In [184]:
# # Define Loss function (Quantile Loss)
# def QuantileLoss(pred, gt, *qs):
#     q1, q2, q3 = qs
    
#     e1 = pred[:, 0:1] - gt
#     e2 = pred[:, 1:2] - gt
#     e3 = pred[:, 2:3] - gt
    
#     eq1 = torch.max(q1*e1, (q1-1)*e1)
#     eq2 = torch.max(q2*e2, (q2-1)*e2)
#     eq3 = torch.max(q3*e3, (q3-1)*e3)
    
#     loss = torch.mean(torch.sum(eq1, eq2, eq3))
#     return loss

In [37]:
# Define Loss function (Quantile Loss)
# def QuantileLoss(q, pred, gt):
#     loss = torch.mean(torch.max((gt-pred)*q, (pred-gt)*(1-q)))
#     return loss

# def QuantileLoss(q, pred, gt):
#     loss = (gt-pred)*q if gt >= pred else (pred-gt)*(1-q)
#     #loss = torch.mean(torch.max((gt-pred)*q, (pred-gt)*(1-q)))
#     return loss

def QuantileLoss(qs, pred, gt):
    #qs = [0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2, 0.1]
    qs = qs if isinstance(qs, list) else [qs]
    
    sum_loss = 0
    for i, q in enumerate(qs):
        loss = pred[:, i] - gt
        loss = torch.max(q*loss, (q-1)*loss)
        sum_loss += torch.mean(loss)
    
#     symloss1 = torch.mean(torch.abs(torch.abs((pred[:, 4] - pred[:, 0]) - torch.abs(pred[:, 4] - pred[:, 8]))))
#     symloss2 = torch.mean(torch.abs(torch.abs((pred[:, 4] - pred[:, 1]) - torch.abs(pred[:, 4] - pred[:, 7]))))
#     symloss3 = torch.mean(torch.abs(torch.abs((pred[:, 4] - pred[:, 2]) - torch.abs(pred[:, 4] - pred[:, 6]))))
#     symloss4 = torch.mean(torch.abs(torch.abs((pred[:, 4] - pred[:, 3]) - torch.abs(pred[:, 4] - pred[:, 5]))))
    
#     symloss = symloss1 + symloss2 + symloss3 + symloss4
    
    fin_loss = sum_loss/len(qs)# + 0.05*symloss/4
    return fin_loss

def train_dnn(epochs, target, qs, dropout, model_type, *loaders):
    
    train_loader = loaders[0]
    val_loader = loaders[1]
    
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    
    if model_type == '2':
        model = DaconNN_2(x_features = train_x_torch.shape[1], dropout_prob = dropout)
    else:
        model = DaconNN(x_features = train_x_torch.shape[1], dropout_prob = dropout)
        
    model.to(device)
    
    learning_rate = 0.005
    optimizer = optim.Adam(model.parameters(), lr = learning_rate)

    decayRate = 0.998
    lr_scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer = optimizer, gamma = decayRate)

    train_loss_sum = 0.0
    val_loss_sum = 0.0
    
    for epoch in range(epochs):
        
        model.train()
        
        for i, train_block in enumerate(train_loader):

            train_X, train_Y = train_block[0], train_block[1]
            optimizer.zero_grad()

            # loss 계산
            train_pred = model(train_X)
            train_loss = QuantileLoss(qs, train_pred, train_Y[:, target-1])
            train_loss.backward()

            optimizer.step()

            train_loss_sum += train_loss
        
        with torch.no_grad():
            val_set = iter(val_loader).next()
            val_X, val_Y = val_set[0], val_set[1]
            
            val_pred = model(val_X)
            val_loss = QuantileLoss(qs, val_pred, val_Y[:, target-1])
            val_loss = torch.sum(val_loss)
        
        print(f"Epoch: {epoch+1} | Loss: {train_loss_sum.item()/len(train_loader):.4f} | Val Loss: {val_loss.item():.4f}")
        
        train_loss_sum = 0.0
        
        # learning rate decaying
        lr_scheduler.step()
 
    return model

## 10 heads

In [38]:
qs = [0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2, 0.1]
param_dropout = 0.0

In [None]:
epochs_1 = 300
model_fit_1 = train_dnn(epochs_1, 1, qs, 0.0, None, train_loader, val_loader)

Epoch: 1 | Loss: 3.3631 | Val Loss: 2.3147
Epoch: 2 | Loss: 2.4455 | Val Loss: 2.3238
Epoch: 3 | Loss: 2.3944 | Val Loss: 2.2658
Epoch: 4 | Loss: 2.3645 | Val Loss: 2.2640
Epoch: 5 | Loss: 2.3651 | Val Loss: 2.2408
Epoch: 6 | Loss: 2.3193 | Val Loss: 2.2881
Epoch: 7 | Loss: 2.3086 | Val Loss: 2.2241
Epoch: 8 | Loss: 2.2947 | Val Loss: 2.2374
Epoch: 9 | Loss: 2.2948 | Val Loss: 2.2530
Epoch: 10 | Loss: 2.2724 | Val Loss: 2.2158
Epoch: 11 | Loss: 2.2774 | Val Loss: 2.3739
Epoch: 12 | Loss: 2.2644 | Val Loss: 2.2223
Epoch: 13 | Loss: 2.2727 | Val Loss: 2.3019
Epoch: 14 | Loss: 2.2657 | Val Loss: 2.2593
Epoch: 15 | Loss: 2.2446 | Val Loss: 2.2092
Epoch: 16 | Loss: 2.2505 | Val Loss: 2.1976
Epoch: 17 | Loss: 2.2425 | Val Loss: 2.2184
Epoch: 18 | Loss: 2.2299 | Val Loss: 2.2037
Epoch: 19 | Loss: 2.2293 | Val Loss: 2.1989
Epoch: 20 | Loss: 2.2283 | Val Loss: 2.2032
Epoch: 21 | Loss: 2.2338 | Val Loss: 2.1943
Epoch: 22 | Loss: 2.2177 | Val Loss: 2.1797
Epoch: 23 | Loss: 2.2177 | Val Loss: 2.15

In [22]:
epochs_2 = 500
model_fit_2 = train_dnn(epochs_2, 2, qs, 0.0, '2', train_loader, val_loader)

Epoch: 1 | Loss: 2.9667 | Val Loss: 2.3553
Epoch: 2 | Loss: 2.5075 | Val Loss: 2.3631
Epoch: 3 | Loss: 2.4477 | Val Loss: 2.3887
Epoch: 4 | Loss: 2.3923 | Val Loss: 2.2201
Epoch: 5 | Loss: 2.3396 | Val Loss: 2.1961
Epoch: 6 | Loss: 2.3163 | Val Loss: 2.1986
Epoch: 7 | Loss: 2.2999 | Val Loss: 2.1813
Epoch: 8 | Loss: 2.2872 | Val Loss: 2.2721
Epoch: 9 | Loss: 2.2830 | Val Loss: 2.1678
Epoch: 10 | Loss: 2.2698 | Val Loss: 2.1462
Epoch: 11 | Loss: 2.2460 | Val Loss: 2.1402
Epoch: 12 | Loss: 2.2451 | Val Loss: 2.1749
Epoch: 13 | Loss: 2.2327 | Val Loss: 2.2465
Epoch: 14 | Loss: 2.2295 | Val Loss: 2.1367
Epoch: 15 | Loss: 2.2174 | Val Loss: 2.2636
Epoch: 16 | Loss: 2.2143 | Val Loss: 2.1650
Epoch: 17 | Loss: 2.2170 | Val Loss: 2.1327
Epoch: 18 | Loss: 2.1996 | Val Loss: 2.1294
Epoch: 19 | Loss: 2.2009 | Val Loss: 2.1069
Epoch: 20 | Loss: 2.2005 | Val Loss: 2.1268
Epoch: 21 | Loss: 2.1833 | Val Loss: 2.1139
Epoch: 22 | Loss: 2.1830 | Val Loss: 2.1492
Epoch: 23 | Loss: 2.1811 | Val Loss: 2.28

## Percentile별로 모델 구축

In [None]:
# TARGET 1
qs = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
epochs = 200

model_tg_1 = []

for q in qs:
    print(f"Model for target1, for q = {q}")
    model_fit_tp = train_dnn(epochs, 1, q, train_loader, val_loader)
    model_tg_1.append(model_fit_tp)

In [None]:
# TARGET 2
qs = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
epochs = 200

model_tg_2 = []

for q in qs:
    print(f"Model for target1, for q = {q}")
    model_fit_tp = train_dnn(epochs, 2, q, train_loader, val_loader)
    model_tg_2.append(model_fit_tp)

In [33]:
with torch.no_grad():
    result = model_fit_1(val_x_torch)
    
result

tensor([[ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        ...,
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [15.5273, 23.3396, 29.1010,  ..., 56.5765, 62.3381, 70.1121]],
       device='cuda:0')

In [36]:
with torch.no_grad():
    current_model = model_fit_1
    test_pred = current_model(val_x_torch)

In [108]:
pred_col = np.array(test_pred.detach().to('cpu'))
gt_col = np.array(test_y_torch.detach().to('cpu'))

pd.DataFrame({'GT':gt_col, 'PRED':pred_col})

Unnamed: 0,GT,PRED
0,6.005604,2.952671
1,61.641113,61.471474
2,0.000000,0.000864
3,40.907784,31.791132
4,32.658344,55.151176
...,...,...
5242,8.256253,68.292389
5243,71.866142,65.454193
5244,50.395756,45.993767
5245,47.004353,48.154396


# SUBMISSION

In [26]:
test_dir = os.path.join(base_dir, 'test')
lists = os.listdir(test_dir)

In [27]:
df_test = []

for i in range(81):
    file_path = os.path.join(test_dir, str(i)+'.csv')
    temp = pd.read_csv(file_path)
    temp['Time'] = temp['Hour']*60 + temp['Minute']
    temp = temp.loc[temp.Day == 6, :][['Time', 'DHI', 'DNI', 'WS', 'RH', 'T', 'TARGET']]
    
    df_test.append(temp)

X_test = pd.concat(df_test)
X_test.shape

(3888, 7)

In [28]:
X_test_scaled = torch.tensor(scaled.transform(X_test.values)).float().to(device)

In [29]:
result_1 = model_fit_1(X_test_scaled)
result_2 = model_fit_2(X_test_scaled)

In [30]:
submission = pd.read_csv(os.path.join(base_dir, 'sample_submission.csv'))

In [31]:
submission.loc[submission.id.str.contains("Day7"), "q_0.1":] = result_1.detach().to('cpu')
submission.loc[submission.id.str.contains("Day8"), "q_0.1":] = result_2.detach().to('cpu')
submission

Unnamed: 0,id,q_0.1,q_0.2,q_0.3,q_0.4,q_0.5,q_0.6,q_0.7,q_0.8,q_0.9
0,0.csv_Day7_0h00m,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.csv_Day7_0h30m,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.csv_Day7_1h00m,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.csv_Day7_1h30m,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.csv_Day7_2h00m,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...
7771,80.csv_Day8_21h30m,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7772,80.csv_Day8_22h00m,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7773,80.csv_Day8_22h30m,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7774,80.csv_Day8_23h00m,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [32]:
tofill = submission.loc[submission.apply(lambda x: x['id'].split('.')[1].split('_')[-1].split('h')[0] in ['0', '1', '2', '3', '4', '20', '21', '22', '23'], axis = 1), 'q_0.1':]

In [33]:
for col in tofill.columns:
    tofill[col].values[:] = 0
tofill.describe()

Unnamed: 0,q_0.1,q_0.2,q_0.3,q_0.4,q_0.5,q_0.6,q_0.7,q_0.8,q_0.9
count,2916.0,2916.0,2916.0,2916.0,2916.0,2916.0,2916.0,2916.0,2916.0
mean,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
std,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
max,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [34]:
submission.loc[submission.apply(lambda x: x['id'].split('.')[1].split('_')[-1].split('h')[0] in ['0', '1', '2', '3', '4', '20', '21', '22', '23'], axis = 1), 'q_0.1':] = tofill

In [35]:
submission.iloc[48:96]

Unnamed: 0,id,q_0.1,q_0.2,q_0.3,q_0.4,q_0.5,q_0.6,q_0.7,q_0.8,q_0.9
48,0.csv_Day8_0h00m,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
49,0.csv_Day8_0h30m,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50,0.csv_Day8_1h00m,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
51,0.csv_Day8_1h30m,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
52,0.csv_Day8_2h00m,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
53,0.csv_Day8_2h30m,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
54,0.csv_Day8_3h00m,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
55,0.csv_Day8_3h30m,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
56,0.csv_Day8_4h00m,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
57,0.csv_Day8_4h30m,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [36]:
submission.describe()

Unnamed: 0,q_0.1,q_0.2,q_0.3,q_0.4,q_0.5,q_0.6,q_0.7,q_0.8,q_0.9
count,7776.0,7776.0,7776.0,7776.0,7776.0,7776.0,7776.0,7776.0,7776.0
mean,9.321143,12.641466,15.259423,17.068478,18.518168,19.768997,20.917525,22.095411,23.34025
std,15.374304,19.111445,21.897484,23.809573,25.310299,26.574215,27.706749,28.869001,30.088685
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,0.0,0.0,0.0,0.0,0.242156,0.757101,1.285631,1.912275,2.569417
75%,13.262892,20.672598,26.426271,30.780162,34.629218,37.537903,40.414082,43.398237,46.524741
max,79.109161,84.299133,89.786652,91.955315,95.080559,96.521202,97.841942,98.814957,101.628532


In [37]:
submission.to_csv('./submission_v1.csv', index=False)