In [36]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data as data_utils
from torch.utils.data import Dataset
from torchvision import transforms

import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

In [37]:
def add_variable(df):  # 파생변수 생성
    
    df['sum_energy'] = df['DHI'] + df['DNI']

    df['theta'] = 0
    condition_list = [
        (df['Hour'] == 6) | (df['Hour'] == 19),
        (df['Hour'] == 7) | (df['Hour'] == 18),
        (df['Hour'] == 8) | (df['Hour'] == 17),
        (df['Hour'] == 9) | (df['Hour'] == 16),
        (df['Hour'] == 10) | (df['Hour'] == 15),
        (df['Hour'] == 11) | (df['Hour'] == 14),
        (df['Hour'] == 12) | (df['Hour'] == 13)
    ]

    choice_list = [0, 10, 20, 30, 40, 50, 60]

    df['theta'] = np.select(condition_list, choice_list)
    # GHI
    df['GHI'] = df['DNI'] * np.cos(df['theta']) + df['DHI']

    # 변수 추가
    condition_list = [
        ((df['Hour'] >= 0) & (df['Hour'] <= 7)) | ((df['Hour'] >= 18) & (df['Hour'] <= 23)),
        ((df['Hour'] > 7) & (df['Hour'] < 10)) | ((df['Hour'] >= 15) & (df['Hour'] < 18)),
        (df['Hour'] >=10) & (df['Hour'] < 15),
    ]

    choice_list = [0, 2, 1]

    df['time'] = np.select(condition_list, choice_list)
    df['target0'] = df['TARGET']


    return pd.DataFrame(df)

In [38]:
# hyperparams
ETA = 0.005
EPOCHS = 50

In [39]:
df = pd.read_csv('/home/ys/repo/solar_prediction/data/train/train.csv')
df = add_variable(df)

X = df.drop('TARGET', axis = 1)
y = df['TARGET']

X_train = X[:47280]
y_train = y[:47280]

X_test = X[47280:]
y_test = y[47280:]

#X_train, X_test, y_train, y_test =train_test_split(X, y, test_size = 0.1, shuffle = False)
#X_train, X_valid, y_train, y_valid = train_test_split(X_train, y_train, test_size = 0.2)

In [40]:
class BuildDataset(Dataset): 
    def __init__(self, X, y, seq_len):
        self.X = torch.tensor(X.values.astype(np.float))
        self.y = torch.tensor(y.values.astype(np.float))
        self.X = self.X.reshape(13,-1)
        self.y = self.y.reshape(1,-1)
        self.seq_len= seq_len

    def __len__(self):
        return self.X.shape[1] // self.seq_len
        
    def __getitem__(self, index):
        index += 48
        return (self.X[:,index:index+self.seq_len], self.y[:,index:index+self.seq_len])

### Day 2 Prediction Model

In [41]:
class CNN2(torch.nn.Module):
    def __init__(self):
        super(CNN2, self).__init__()
        
        self.layer1 = nn.Sequential(
            nn.Conv1d(in_channels = 13, out_channels = 12, kernel_size = 1),
            nn.BatchNorm1d(12),
            #nn.ReLU(),

            nn.Conv1d(in_channels = 12, out_channels = 10, kernel_size = 4),
            nn.BatchNorm1d(10),
            #nn.ReLU(),

            nn.Conv1d(in_channels = 10, out_channels = 8, kernel_size = 8),
            nn.BatchNorm1d(8),
            #nn.ReLU(),

        )

        self.layer2 = nn.Sequential(
            nn.Conv1d(in_channels = 8, out_channels = 6, kernel_size = 4),
            nn.BatchNorm1d(6),
            #nn.ReLU(),

            nn.Conv1d(in_channels = 6, out_channels = 4, kernel_size = 1),
            nn.BatchNorm1d(4),
            #nn.ReLU(),

            nn.Conv1d(in_channels = 4, out_channels = 1, kernel_size = 1),
            #nn.BatchNorm1d(1),
            #nn.ReLU(),

        )

        self.layer3 = nn.Sequential(
            nn.Linear(227, 144),
            nn.Linear(144, 96),
        )
        
        
    def forward(self, x):
        x = torch.tensor(x).float()
        out = self.layer1(x)
        #print(out.shape)
        out = self.layer2(out)
        #print(out.shape)
        out = self.layer3(out)
        #print(out.shape)
        return out

In [42]:
trn_dataset = BuildDataset(X_train, y_train, seq_len = 240+96)
trn_loader = data_utils.DataLoader(trn_dataset, shuffle = False)
tst_dataset = BuildDataset(X_test, y_test, seq_len = 240+96)
tst_loader = data_utils.DataLoader(tst_dataset, shuffle = False)

In [43]:
def quantile_loss(pred, gt, quantile):
    qs = quantile
    sum_loss = 0
    loss = gt - pred
    loss = torch.max(qs*loss, (qs-1)*loss)
    sum_loss = torch.mean(loss)
    return sum_loss

model = CNN2().to(device)
optimizer = optim.SGD(model.parameters(), lr = ETA)

In [44]:
def train(model, trn_loader, optimizer, epoch, quantile):
    model.train()

    running_train_loss = 0.0

    for i, data in enumerate(trn_loader):
        data, target = data[0].to(device), data[1].to(device)
        data = data[:,:,:240]
        target = target[:,:,240:]

        optimizer.zero_grad()
        
        output = model(data)
        loss = quantile_loss(output, target, quantile)

        running_train_loss += loss

        loss.backward() 
        optimizer.step()

        running_train_loss += loss

    if epoch % 10 == 0:
        print('Epoch: {} | Train Loss:{:.3f}'.format(epoch, running_train_loss / len(trn_loader)))

    """
    # for validation
    running_dev_loss = 0.0
    for i, data in enumerate(dev_loader):
        data, target = data[0].to(device), data[1].to(device)
        output = model(data)
        target = target.view(1,1,48)

        loss = quantile_loss(output, target)

        running_dev_loss += loss

    print('Epoch: {} | Valid Loss:{:.3f}'.format(epoch, running_dev_loss / len(dev_loader)))
    """

In [45]:
def evaluate(model, tst_loader, quantile): 
    model.load_state_dict(torch.load(PATH))
    model.eval()
    running_test_loss = 0.0
     
    with torch.no_grad(): 
        for i, data in enumerate(tst_loader): 
            data, target = data[0].to(device), data[1].to(device) 

            data = data[:,:,:240]
            target = target[:,:,240:]   

            data, target = data.to(device), target.to(device)
            output = model(data)
        
            loss = quantile_loss(output, target, quantile)
            
            running_test_loss += loss

        print('Test Loss: {:.4f}'.format(running_test_loss / len(tst_loader)))
        print('='*30)

In [46]:
for i, q in enumerate([0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9]):
    PATH = './model_day2/model_{}'.format(i+1)
    print('quantile : {}'.format(q))
    for epoch in range(1, EPOCHS + 1):
        if (q >= 0.3) & (epoch == 11):
            break
        train(model, trn_loader, optimizer, epoch, quantile = q)
    torch.save(model.state_dict(), PATH)
        
    test_loss = evaluate(model, tst_loader, quantile = q)

quantile : 0.1
Epoch: 10 | Train Loss:1.906
Epoch: 20 | Train Loss:1.902
Epoch: 30 | Train Loss:1.900
Epoch: 40 | Train Loss:1.899
Epoch: 50 | Train Loss:1.899
Test Loss: 2.4241
quantile : 0.2
Epoch: 10 | Train Loss:3.798
Epoch: 20 | Train Loss:3.798
Epoch: 30 | Train Loss:3.797
Epoch: 40 | Train Loss:3.797
Epoch: 50 | Train Loss:3.797
Test Loss: 4.8462
quantile : 0.3
Epoch: 10 | Train Loss:5.696
Test Loss: 7.2690
quantile : 0.4
Epoch: 10 | Train Loss:7.594
Test Loss: 9.6917
quantile : 0.5
Epoch: 10 | Train Loss:9.492
Test Loss: 12.1144
quantile : 0.6
Epoch: 10 | Train Loss:11.391
Test Loss: 14.5370
quantile : 0.7
Epoch: 10 | Train Loss:13.280
Test Loss: 16.9471
quantile : 0.8
Epoch: 10 | Train Loss:14.534
Test Loss: 18.6549
quantile : 0.9
Epoch: 10 | Train Loss:6.800
Test Loss: 10.9564


In [47]:

final_result = pd.DataFrame([])
total_result = pd.DataFrame([])

for i in range(0,81):
    model2 = CNN2()

    path = '/home/ys/repo/solar_prediction/data/test/' + str(i) +'.csv'
    print(path)
    df = pd.read_csv(path)
    df = add_variable(df)
    
    df2 = df[(df['Day'] != 0) | (df['Day'] != 1)]
    X2 = df2.drop('TARGET', axis = 1)
    print(len(X2))
    y2 = df2['TARGET']
    tst_dataset2 = BuildDataset(X2, y2, seq_len = 240+96)
    tst_loader2 = data_utils.DataLoader(tst_dataset2, shuffle = False)
    #print(len(tst_loader2))

    result2 = pd.DataFrame([])
    result2['id'] = 0
    result2['hour'] = 0

    X2['Minute'] = X2['Minute'].astype('str')
    X2['Minute'] = X2['Minute'].replace('0','00')
    X2['Hour'] = X2['Hour'].astype('int')

    for j,k in enumerate([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]):
        quantile = k
        loss = 0
        

        PATH2 = '/home/ys/repo/solar_prediction/02_model/YS/model_day2/model_{}'.format(j+1)
        model2.load_state_dict(torch.load(PATH2))

        for _, data in enumerate(tst_loader2):
            data = data[0]
            data = data[:,:,:-48]
            output = model2(data)
            output = output.reshape(96,)
            output = output.detach().numpy()

            result2['q_{}'.format(k)] = output

            for h in range(96):
                result2['id'].loc[h] = '{}.csv_Day{}_{}h{}m'.format(i, X2['Day'].iloc[h]+7, X2['Hour'].iloc[h], X2['Minute'].iloc[h])
                result2['hour'].loc[h] = X2['Hour'].iloc[h]
                
                if (result2['hour'].loc[h] < 7) | (result2['hour'].loc[h] > 19):
                    result2['q_{}'.format(k)].iloc[h] = 0
                if result2['q_{}'.format(k)].loc[h] < 0:
                    result2['q_{}'.format(k)].iloc[h] = 0
            
                if k < 0.8:
                    result2['q_{}'.format(k)].loc[h] = result2['q_{}'.format(k)].iloc[h] * 1000
                if k >= 0.8:
                    result2['q_{}'.format(k)].loc[h] = result2['q_{}'.format(k)].iloc[h] * 1000

                
        total_result = pd.concat([result2, total_result])

    final_result = pd.concat([final_result, result2])
            

/home/ys/repo/solar_prediction/data/test/0.csv
336
/home/ys/repo/solar_prediction/data/test/1.csv
336
/home/ys/repo/solar_prediction/data/test/2.csv
336
/home/ys/repo/solar_prediction/data/test/3.csv
336
/home/ys/repo/solar_prediction/data/test/4.csv
336
/home/ys/repo/solar_prediction/data/test/5.csv
336
/home/ys/repo/solar_prediction/data/test/6.csv
336
/home/ys/repo/solar_prediction/data/test/7.csv
336
/home/ys/repo/solar_prediction/data/test/8.csv
336
/home/ys/repo/solar_prediction/data/test/9.csv
336
/home/ys/repo/solar_prediction/data/test/10.csv
336
/home/ys/repo/solar_prediction/data/test/11.csv
336
/home/ys/repo/solar_prediction/data/test/12.csv
336
/home/ys/repo/solar_prediction/data/test/13.csv
336
/home/ys/repo/solar_prediction/data/test/14.csv
336
/home/ys/repo/solar_prediction/data/test/15.csv
336
/home/ys/repo/solar_prediction/data/test/16.csv
336
/home/ys/repo/solar_prediction/data/test/17.csv
336
/home/ys/repo/solar_prediction/data/test/18.csv
336
/home/ys/repo/solar_pr

In [48]:
final_result = final_result.drop(['hour'], axis =1)
final_result.to_csv('submission_conv6.csv',index = False)