# Библиотеки

In [None]:
import pandas as pd
import numpy as np
import neptune
import torch
import json
import copy
import lightgbm as lgb
import os
import zipfile
import sys
from sklearn.linear_model import SGDRegressor
import pickle

In [None]:
neptune.init('iliaavilov/SIBUR')

In [None]:
exp_index = 'SIB-675'
n_trial = 19
target = 'iC4H10'

def get_exp_data(exp_index, n_trial):
    exp = neptune.project.get_experiments(exp_index)[0]

    channel_names = ['current_params']
    channels_data = {}
    channels_by_name = exp.get_channels()
    for channel_name in channel_names:
        channel_id = channels_by_name[channel_name].id

        channels_data[channel_name] = pd.read_csv(
            exp._backend.get_channel_points_csv(exp, channel_id),
            header=None,
            dtype=str
        )

    values = exp._backend.get_channel_points_csv(exp, channel_id).getvalue()
    data = pd.DataFrame(values.split('\n'))
    data = data.iloc[:-1]
    data[0] = data[0].apply(lambda x: json.loads('{' + x.split(',{')[1].replace("'", '"')))
    data.columns = ['params']
#    iterations = exp.get_numeric_channels_values('iterations').drop('x', axis = 'columns')
#    data = pd.concat([data, iterations], axis = 'columns')

    data_trial = data.iloc[n_trial, ]
    params_trial = data_trial['params']
#    params_trial['n_estimators'] = round(data_trial['iterations'])

    exp.download_sources()
    with zipfile.ZipFile("source.zip", "r") as zip_ref:
        zip_ref.extractall('')
    
    return(params_trial)

In [None]:
params_trial = get_exp_data(exp_index, n_trial)

In [None]:
sys.path.append('source/')
import preprocessing
from NN import simple_torchpl
from load_data import load
from pl_framework import nn_training
from cv import get_indices

# Константы

In [None]:
data_path = 'data/'
random_state = 54321
pd.options.display.max_rows = 999

# Загрузка данных

In [None]:
train_features, train_targets, test_features = load(data_path)

In [None]:
cv = get_indices(train_targets, [(pd.to_datetime('2020-03-01 00:00:00'), pd.to_datetime('2020-03-15 00:00:00')),
                                 (pd.to_datetime('2020-03-15 00:00:00'), pd.to_datetime('2020-03-31 00:00:00')),
                                 (pd.to_datetime('2020-03-31 00:00:00'), pd.to_datetime('2020-04-15 00:00:00'))
                                ]
                )

In [None]:
ts = test_features['timestamp'].values
#train_targets = train_targets.drop('timestamp', axis = 'columns')
#test_features = test_features.drop('timestamp', axis = 'columns')
#train_features = train_features.drop('timestamp', axis = 'columns')

# Предсказания с помощью sklearn

In [None]:
all_features = train_features.append(test_features, ignore_index = True)
n_backs = []
for feature in ['A_{}'.format(target)]:
    n_backs.append(params_trial['n_back_'+feature])

In [None]:
all_features = train_features.append(test_features, ignore_index = True)

X, y_train, cv, params = preprocessing.preprocessing(all_features.copy(), 
                                                     train_targets.copy(), 
                                                     copy.deepcopy(cv), 
                                                     copy.deepcopy(params_trial))

In [None]:
X_train = X[:(train_features.shape[0] - max(n_backs))]
X_test = X[(train_features.shape[0] -  max(n_backs)):]

In [None]:
print(y_train.shape)
print(X_train.shape)
print(test_features.shape)
print(X_test.shape)

In [None]:
params

In [None]:
LR = SGDRegressor(**params)
LR.fit(X_train[cv[0][0]], y_train[cv[0][0]], sample_weight = 1/y_train[cv[0][0]])
predictions = LR.predict(X_test)

In [None]:
with open('{}.pickle'.format(target), 'wb') as f:
    pickle.dump(LR, f)

In [None]:
if os.path.isfile('submission.csv') == True:
    submission = pd.read_csv('submission.csv')
    submission['B_' + target] = predictions
    submission.to_csv('submission.csv', index = False)
else:
    submission = pd.DataFrame(columns = ['B_C2H6', 'B_C3H8', 'B_iC4H10', 'B_nC4H10'])
    submission['timestamp'] = ts
    submission['B_' + target] = predictions
    submission.to_csv('submission.csv', index = False)

# Предсказания с помощью lgbm

In [None]:
all_features = train_features.append(test_features, ignore_index = True)
n_backs = []
for feature in ['A_CH4', 'A_C2H6', 'A_C3H8', 'A_iC4H10', 'A_nC4H10', 'A_iC5H12', 'A_nC5H12', 'A_C6H14']:
    n_backs.append(params_trial['n_back_'+feature])

In [None]:
all_features = train_features.append(test_features, ignore_index = True)

X, y_train, cv, params = preprocessing.preprocessing(all_features.copy(), 
                                                     train_targets.copy(), 
                                                     copy.deepcopy(cv), 
                                                     copy.deepcopy(params_trial))

In [None]:
X_train = X[:(train_features.shape[0] - max(n_backs))]
X_test = X[(train_features.shape[0] -  max(n_backs)):]

In [None]:
print(y_train.shape)
print(X_train.shape)
print(test_features.shape)
print(X_test.shape)

In [None]:
params

In [None]:
train_data = lgb.Dataset(X_train[cv[-1][0], :], y_train[cv[-1][0]])
test_data = lgb.Dataset(X_train[cv[-1][1], :], y_train[cv[-1][1]])

def lgb_scoring(y_hat, data):
    y_true = data.get_label()
    return 'loss', np.mean(np.abs((y_true - y_hat)/y_true)), False
    
test_model = lgb.train(params = params, train_set = train_data, verbose_eval = False)
predictions = test_model.predict(X_test)

In [None]:
if os.path.isfile('submission.csv') == True:
    submission = pd.read_csv('submission.csv')
    submission[target] = predictions
    submission.to_csv('submission.csv', index = False)
else:
    submission = pd.DataFrame(columns = ['B_C2H6', 'B_C3H8', 'B_iC4H10', 'B_nC4H10'])
    submission['timestamp'] = ts
    submission[target] = predictions
    submission.to_csv('submission.csv', index = False)

In [None]:
submission

In [None]:
#neptune.project.get_experiments(exp_index)[0].log_metric('leaderboard_mape', 2.4408)

# Предсказания с помощью нейронки

In [None]:
all_features = train_features.append(test_features, ignore_index = True)
n_backs = []
for feature in ['A_CH4', 'A_C2H6', 'A_C3H8', 'A_iC4H10', 'A_nC4H10', 'A_iC5H12', 'A_nC5H12', 'A_C6H14']:
    n_backs.append(params_trial['n_back_'+feature])

In [None]:
all_features = train_features.append(test_features, ignore_index = True)

X, y_train, cv, params = preprocessing.preprocessing(all_features.copy(), 
                                                     train_targets.copy(), 
                                                     copy.deepcopy(cv), 
                                                     copy.deepcopy(params_trial))

In [None]:
X_train = X[:(train_features.shape[0] - max(n_backs))]
X_test = X[(train_features.shape[0] -  max(n_backs)):]

In [None]:
print(y_train.shape)
print(X_train.shape)
print(test_features.shape)
print(X_test.shape)

In [None]:
batch_size = params_trial['batch_size']
params_trial.pop('batch_size')

In [None]:
mean_best_iter = params_trial['n_estimators']
my_boiii = nn_training(simple_torchpl, X_train, y_train[[target]])
my_boiii.train(min_epochs = mean_best_iter,
               max_epochs = mean_best_iter,
               model_params = params,
               batch_size = batch_size,
               fold = cv[0] ,
               val_fold = False)

my_model = my_boiii.trained_model
my_model.eval()

predictions = my_model(torch.from_numpy(X_test).float()).detach().numpy()

In [None]:
if os.path.isfile('submission.csv') == True:
    submission = pd.read_csv('submission.csv')
    submission[target] = predictions
    submission.to_csv('submission.csv', index = False)
else:
    submission = pd.DataFrame(columns = ['B_C2H6', 'B_C3H8', 'B_iC4H10', 'B_nC4H10'])
    submission['timestamp'] = ts
    submission[target] = predictions
    submission.to_csv('submission.csv', index = False)

In [None]:
#neptune.project.get_experiments(exp_index)[0].log_metric('leaderboard_mape', 2.8630)