# Библиотеки

In [None]:
import neptune
import pandas as pd

from cv import get_indices
from load_data import load
from model_selection import training
import random
from NN import simple_torchpl
from pl_framework import nn_training
from pytorch_forecasting.metrics import MAPE
import torch
from sklearn.linear_model import SGDRegressor

In [None]:
Необходимо указать данные для авторизации в neptune проекте

In [None]:
neptune.init('')

In [None]:
import os
os.environ['NEPTUNE_API_TOKEN']=
os.environ['NEPTUNE_PROJECT']=
os.environ['NEPTUNE_NOTEBOOK_ID']=
os.environ['NEPTUNE_NOTEBOOK_PATH']=

# Константы

In [None]:
data_path = 'data/'
random_state = 54321

# Загрузка данных

In [None]:
train_features, train_targets, _ = load(data_path)

# Валидация

In [None]:
cv = get_indices(train_targets, 
                 [(pd.to_datetime('2020-03-01 00:00:00'), pd.to_datetime('2020-03-15 00:00:00')),
                  (pd.to_datetime('2020-03-15 00:00:00'), pd.to_datetime('2020-03-31 00:00:00')),
                  (pd.to_datetime('2020-03-31 00:00:00'), pd.to_datetime('2020-04-15 00:00:00'))
                 ],
                 first_train = True
                )

# Подбор модели

## NN

In [None]:
%%capture
for loss in ['MSE', 'MAE', 'MAPE']:
    for target in ['B_C2H6', 'B_C3H8', 'B_iC4H10', 'B_nC4H10']:
        my_training = training(name = target, nn_model = simple_torchpl, training_nn = nn_training,
                               description = 'Ffill. New train for each fold. Simple nn.2 Linears. 2 drops. loss: {}\
                               Separate shift for each feature. seq_len = 1.180-220 range for shift. No shift in rates\
                               No normalisation. Only first 2 fold and test set(last fold).\
                               Dropped data before 2020-02-15 00:00:00'.format(loss),
                               upload_source_files = ['cv.py', 
                                                      'load_data.py', 
                                                      'model_selection.py',
                                                      'NN.py',
                                                      'pl_framework.py',
                                                      'preprocessing.py'])
        my_training.set_up_studying(random_state = random_state)

        model = 'torch'

        def params_func(trial, X):
            return(
                {
                    'target': target,
                    'n_h_1': trial.suggest_int('n_h_1', 10, 1000),
                    'batch_size': trial.suggest_int('batch_size', 10, 800),
                    'p_1': trial.suggest_uniform('p_1', 0, 1),
                    'p_2': trial.suggest_uniform('p_2', 0, 1),
                    'lr': trial.suggest_loguniform('lr', 0.0001, 0.2),
                    'weight_decay': trial.suggest_uniform('weight_decay', 0.0001, 1),
                    'optimizer': 'AdamW',
                    'loss': loss,
                    'activation1': trial.suggest_categorical('activation1', ['Tanh','Hardtanh','Hardshrink', 'ELU' , 
                                                                             'SELU', 'ReLU', 'Tanhshrink', 'CELU']),
                    'n_back_A_CH4': trial.suggest_int('n_back_A_CH4', 180, 220),
                    'n_back_A_C2H6': trial.suggest_int('n_back_A_C2H6', 180, 220),
                    'n_back_A_C3H8': trial.suggest_int('n_back_A_C3H8', 180, 220),
                    'n_back_A_iC4H10': trial.suggest_int('n_back_A_iC4H10', 180, 220),
                    'n_back_A_iC5H12': trial.suggest_int('n_back_A_iC5H12', 180, 220),
                    'n_back_A_nC4H10': trial.suggest_int('n_back_A_nC4H10', 180, 220),
                    'n_back_A_nC5H12': trial.suggest_int('n_back_A_nC5H12', 180, 220),
                    'n_back_A_C6H14': trial.suggest_int('n_back_A_C6H14', 180, 220)

        }
            )


        n_trials = 30
        my_training.train(X = train_features, 
                          y = train_targets, 
                          cv = cv, 
                          model=model, 
                          params_func = params_func, 
                          n_trials = n_trials)
        neptune.stop()

## LGBM

In [None]:
%%capture
for boosting in ['gbdt', 'dart', 'goss']:
    for objective in ['huber', 'fair', 'l2', 'l1', 'mape']:
        for target in ['C2H6', 'C3H8', 'iC4H10', 'nC4H10']:
            my_training = training(name = target,
                                   description = 'Ffill. 1 train for each fold. LGBM. boosting = {}, objective = {}\
                                   Separate shift for each feature. seq_len = 1.180-220 range for shift. No shift in rates. features+features 1, 2, 3 ... n days ago\
                                   No normalisation. Only first 2 fold and test set(last fold).feature fraction = 1\
                                   Dropped data before 2020-02-15 00:00:00'.format(boosting, objective),
                                   upload_source_files = ['cv.py', 
                                                          'load_data.py', 
                                                          'model_selection.py',
                                                          'NN.py',
                                                          'pl_framework.py',
                                                          'preprocessing.py'])
            my_training.set_up_studying(random_state = random_state)

            model = 'lgbm'

            def params_func(trial, X):
                return(
                    {
                        'target': target,
                        'objective': objective,
                        'boosting': boosting,
                        'n_jobs': -1,
                        'n_estimators': 1,
                        'random_state': random_state,
                        'bagging_fraction': 1,
                        'feature_fraction': 1,
                        'n_today': trial.suggest_int('n_today', 0, 100),
                        'min_child_samples': trial.suggest_int('min_child_samples', 2, 256),
                        'num_leaves': trial.suggest_int('num_leaves', 2, 256),
                        'learning_rate': trial.suggest_uniform('learning_rate', 0.01, 1.5),
                        'n_back_A_CH4': trial.suggest_int('n_back_A_CH4', 180, 220),
                        'n_back_A_C2H6': trial.suggest_int('n_back_A_C2H6', 180, 220),
                        'n_back_A_C3H8': trial.suggest_int('n_back_A_C3H8', 180, 220),
                        'n_back_A_iC4H10': trial.suggest_int('n_back_A_iC4H10', 180, 220),
                        'n_back_A_iC5H12': trial.suggest_int('n_back_A_iC5H12', 180, 220),
                        'n_back_A_nC4H10': trial.suggest_int('n_back_A_nC4H10', 180, 220),
                        'n_back_A_nC5H12': trial.suggest_int('n_back_A_nC5H12', 180, 220),
                        'n_back_A_C6H14': trial.suggest_int('n_back_A_C6H14', 180, 220)


            }
                )


            n_trials = 15000
            my_training.train(X = train_features, 
                              y = train_targets, 
                              cv = cv, 
                              model = model, 
                              params_func = params_func, 
                              n_trials = n_trials)
            neptune.stop()

## SKLEARN

In [None]:
%%capture
for target in ['C2H6', 'C3H8', 'iC4H10', 'nC4H10']:
    for random_state in [223, 245, 267, 12345, 998, 1456, 938, 223, 16]:
        my_training = training(name = 'B_' + target, sklearn_class = SGDRegressor,
                               description = 'Ffill. 1 train for each fold. Linear regression. WEIGHTED\
                               Separate shift for each feature. seq_len = 1.180-220 range for shift. No shift in rates. Features = A_target, constant\
                               No normalisation. Only first 2 fold and test set(last fold)\
                               Dropped data before 2020-02-15 00:00:00',
                               upload_source_files = ['cv.py', 
                                                      'load_data.py', 
                                                      'model_selection.py',
                                                      'NN.py',
                                                      'pl_framework.py',
                                                      'preprocessing.py'])
        my_training.set_up_studying(random_state = random_state)

        model = 'sklearn'

        def params_func(trial, X):
            return(
                {
                    'target': target,
                    'random_state': random_state,
                    'loss': 'epsilon_insensitive',
                    'epsilon': 0,
                    'tol': 1e-5,
                    'n_back_A_{}'.format(target): trial.suggest_int('n_back_A_{}'.format(target), 180, 220)


        }
            )


        n_trials = 250
        my_training.train(X = train_features, 
                          y = train_targets, 
                          cv = cv, 
                          model = model, 
                          params_func = params_func, 
                          n_trials = n_trials)
        neptune.stop()