# Библиотеки

In [1]:
import pandas as pd
import numpy as np
import neptune

from NN import simple_torchpl
from load_data import load
from pl_framework import nn_training
from cv import get_indices
import preprocessing

import torch
import json
import copy

In [2]:
neptune.init('iliaavilov/SIBUR')



Project(iliaavilov/SIBUR)

# Константы

In [3]:
data_path = 'data/'
random_state = 54321
pd.options.display.max_rows = 999
features = ['A_rate', 'A_CH4', 'A_C2H6', 'A_C3H8', 'A_iC4H10', 'A_nC4H10',
            'A_iC5H12', 'A_nC5H12', 'A_C6H14', 'B_rate']

# Загрузка данных

In [4]:
train_features, train_targets, test_features = load(data_path)

In [5]:
cv = get_indices(train_targets, [(pd.to_datetime('2020-03-01 00:00:00'), pd.to_datetime('2020-03-15 00:00:00')),
                                 (pd.to_datetime('2020-03-15 00:00:00'), pd.to_datetime('2020-03-31 00:00:00')),
                                 (pd.to_datetime('2020-04-15 00:00:00'), pd.to_datetime('2020-04-30 00:00:00'))
                                ]
                )

In [6]:
ts = test_features['timestamp'].values
train_targets = train_targets.drop('timestamp', axis = 'columns')
test_features = test_features.drop('timestamp', axis = 'columns')
train_features = train_features.drop('timestamp', axis = 'columns')

# Предсказания для теста

In [7]:
exp_index = 'SIB-245'
n_trial = 14


exp = neptune.project.get_experiments(exp_index)[0]

channel_names = ['run_parameters']
channels_data = {}
channels_by_name = exp.get_channels()
for channel_name in channel_names:
    channel_id = channels_by_name[channel_name].id

    channels_data[channel_name] = pd.read_csv(
        exp._backend.get_channel_points_csv(exp, channel_id),
        header=None,
        dtype=str
    )
    
values = exp._backend.get_channel_points_csv(exp, channel_id).getvalue()
data = pd.DataFrame(values.split('\n'))
data = data.iloc[:-1]
data[0] = data[0].apply(lambda x: json.loads('{' + x.split(',{')[1].replace("'", '"')))
data.columns = ['params']
#iterations = exp.get_numeric_channels_values('iterations').drop('x', axis = 'columns')
#data = pd.concat([data, iterations], axis = 'columns')

data_trial = data.iloc[n_trial, ]
params_trial = data_trial['params']
#params_trial['mean_best_iter'] = data_trial['iterations']

In [8]:
params_trial['n_in'] = 20
params_trial['n_out'] = 1
params_trial['optimizer'] = 'AdamW'
params_trial['loss'] = torch.nn.L1Loss

In [9]:
all_features = train_features.append(test_features, ignore_index = True)

X, y_train, cv, params = preprocessing.preprocessing(all_features.copy(), 
                                                     train_targets.copy(), 
                                                     copy.deepcopy(cv), 
                                                     copy.deepcopy(params_trial), 
                                                     pop = False)

current params: {'seq_len': 55, 'n_back': 44, 'n_rolling': 5, 'n_h_1': 368, 'batch_size': 730, 'p_1': 0.30358383224208985, 'activation1': 'Hardshrink', 'lr': 0.0004029769361640069, 'weight_decay': 0.998154546960057, 'n_in': 20, 'n_out': 1, 'optimizer': 'AdamW', 'loss': <class 'torch.nn.modules.loss.L1Loss'>}
Initial shape of X: (7631, 10)
cv before trans: 719
Initial shape of X after rolling: (7587, 20)
Initial shape of y: (3647, 4)
cv after trans: 621
shape of X after trans: (7533, 55, 20)
shape of y after trans: (3549, 4)


In [10]:
seq_len = params['seq_len']
n_back = params['n_back']
n_rolling = params['n_rolling']
batch_size = params['batch_size']


params.pop('batch_size')
params.pop('seq_len')
params.pop('n_back')
params.pop('n_rolling')

X_train = X[:(train_features.shape[0] - ((seq_len - 1) + max(n_rolling - 1, n_back)))]
X_test = X[(train_features.shape[0] - ((seq_len - 1) + max(n_rolling - 1, n_back))):]

X_train = X_train
X_test = X_test

In [11]:
print(y_train.shape)
print(X_train.shape)

(3549, 4)
(3549, 55, 20)


In [12]:
print(test_features.shape)
print(X_test.shape)

(3984, 10)
(3984, 55, 20)


In [13]:
fold = cv[0]

In [14]:
submission = pd.DataFrame(columns = ['B_C2H6', 'B_C3H8', 'B_iC4H10', 'B_nC4H10'])
submission['timestamp'] = ts

for target in ['B_C2H6', 'B_C3H8', 'B_iC4H10', 'B_nC4H10']:
    
    mean_best_iter = exp.get_numeric_channels_values(target + '_iters').drop('x', axis = 'columns').iloc[n_trial, 0]
    mean_best_iter = round(mean_best_iter)
    my_boiii = nn_training(simple_torchpl, X_train, y_train[[target]])
    my_boiii.train(min_epochs=mean_best_iter,
                   max_epochs=mean_best_iter,
                   model_params=params,
                   batch_size=batch_size,
                   fold=fold,
                   val_fold=False)

    my_model = my_boiii.trained_model
    my_model.eval()

    predictions = my_model(torch.from_numpy(X_test).float()).detach().numpy()
    submission[target] = predictions
    
submission.to_csv('submission.csv', index = False)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name         | Type       | Params
--------------------------------------------
0 | activ1       | Hardshrink | 0     
1 | lstm         | LSTM       | 574 K 
2 | dropout1     | Dropout    | 0     
3 | linear_final | Linear     | 369   
4 | loss         | L1Loss     | 0     
5 | loss_sec     | MAPE       | 0     


train_loss: 1.0070767402648926
val_loss: 0.9515213966369629
train_loss: 0.9487589597702026
val_loss: 0.9034141302108765
train_loss: 0.9036100506782532
val_loss: 0.8501515984535217
train_loss: 0.8418893814086914
val_loss: 0.80669105052948
train_loss: 0.8058381676673889
val_loss: 0.7689621448516846
train_loss: 0.7627676725387573
val_loss: 0.7518856525421143
train_loss: 0.743324339389801
val_loss: 0.7250412702560425
train_loss: 0.7198963761329651
val_loss: 0.7023882865905762
train_loss: 0.6954436898231506
val_loss: 0.6797585487365723
train_loss: 0.6729488372802734
val_loss: 0.6577469110488892
train_loss: 0.6493890285491943
val_loss: 0.6388629674911499
train_loss: 0.6310581564903259
val_loss: 0.6104918122291565
train_loss: 0.6045486330986023
val_loss: 0.5691959857940674
train_loss: 0.559861421585083
val_loss: 0.5351738929748535
train_loss: 0.5267188549041748
val_loss: 0.5019721984863281
train_loss: 0.4945843815803528
val_loss: 0.4634927213191986
train_loss: 0.4585554599761963
val_loss: 0.4

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name         | Type       | Params
--------------------------------------------
0 | activ1       | Hardshrink | 0     
1 | lstm         | LSTM       | 574 K 
2 | dropout1     | Dropout    | 0     
3 | linear_final | Linear     | 369   
4 | loss         | L1Loss     | 0     
5 | loss_sec     | MAPE       | 0     


train_loss: 1.0008121728897095
val_loss: 0.9944166541099548
train_loss: 0.9943521618843079
val_loss: 0.9888656139373779
train_loss: 0.9894086718559265
val_loss: 0.9827278852462769
train_loss: 0.9825699925422668
val_loss: 0.9777119755744934
train_loss: 0.9785787463188171
val_loss: 0.9733657836914062
train_loss: 0.9738399982452393
val_loss: 0.9713890552520752
train_loss: 0.9717029929161072
val_loss: 0.9682936668395996
train_loss: 0.9691274166107178
val_loss: 0.9656826257705688
train_loss: 0.9664115905761719
val_loss: 0.9630657434463501
train_loss: 0.9639670252799988
val_loss: 0.9605265855789185
train_loss: 0.961336076259613
val_loss: 0.9583483338356018
train_loss: 0.959333598613739
val_loss: 0.9550822377204895
train_loss: 0.9564092755317688
val_loss: 0.9503219127655029
train_loss: 0.9514679312705994
val_loss: 0.9463974833488464
train_loss: 0.947783887386322
val_loss: 0.942564845085144
train_loss: 0.9442609548568726
val_loss: 0.9381332397460938
train_loss: 0.9401835799217224
val_loss: 0.9

train_loss: 0.6349326372146606
val_loss: 0.623296320438385
train_loss: 0.6336749792098999
val_loss: 0.6207473874092102
train_loss: 0.6294881701469421
val_loss: 0.6183459162712097
train_loss: 0.6272920966148376
val_loss: 0.6162188649177551
train_loss: 0.6257582902908325
val_loss: 0.6135138869285583
train_loss: 0.6240156888961792
val_loss: 0.6111787557601929
train_loss: 0.620364248752594
val_loss: 0.6088472604751587
train_loss: 0.6184828281402588
val_loss: 0.6065317392349243
train_loss: 0.6168798804283142
val_loss: 0.6042200922966003
train_loss: 0.615283191204071
val_loss: 0.6019061207771301
train_loss: 0.6139030456542969
val_loss: 0.5995810627937317
train_loss: 0.6077295541763306
val_loss: 0.5972509384155273
train_loss: 0.6072489619255066
val_loss: 0.5948874950408936
train_loss: 0.6051309108734131
val_loss: 0.5925148129463196
train_loss: 0.6038682460784912
val_loss: 0.5901455283164978
train_loss: 0.6006131768226624
val_loss: 0.5877969861030579
train_loss: 0.5989593863487244
val_loss: 0.

train_loss: 0.31980428099632263
val_loss: 0.30018410086631775
train_loss: 0.3197762966156006
val_loss: 0.29781991243362427
train_loss: 0.31665486097335815
val_loss: 0.29546087980270386
train_loss: 0.3132074475288391
val_loss: 0.29309946298599243
train_loss: 0.3097435534000397
val_loss: 0.2907469570636749
train_loss: 0.3081795573234558
val_loss: 0.2883918285369873
train_loss: 0.3065032362937927
val_loss: 0.2860323190689087
train_loss: 0.304902046918869
val_loss: 0.28366029262542725
train_loss: 0.30047914385795593
val_loss: 0.281297504901886
train_loss: 0.3003719449043274
val_loss: 0.2789365351200104
train_loss: 0.2984943389892578
val_loss: 0.2765750288963318
train_loss: 0.29355794191360474
val_loss: 0.27417701482772827
train_loss: 0.29450523853302
val_loss: 0.2717878818511963
train_loss: 0.29292821884155273
val_loss: 0.2693956792354584
train_loss: 0.28656667470932007
val_loss: 0.2670050859451294
train_loss: 0.2855910658836365
val_loss: 0.2646179795265198
train_loss: 0.28498539328575134


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name         | Type       | Params
--------------------------------------------
0 | activ1       | Hardshrink | 0     
1 | lstm         | LSTM       | 574 K 
2 | dropout1     | Dropout    | 0     
3 | linear_final | Linear     | 369   
4 | loss         | L1Loss     | 0     
5 | loss_sec     | MAPE       | 0     


train_loss: 1.0018486976623535
val_loss: 0.9875348210334778
train_loss: 0.9871170520782471
val_loss: 0.9751201868057251
train_loss: 0.9758419990539551
val_loss: 0.9614101648330688
train_loss: 0.9602667093276978
val_loss: 0.9502046704292297
train_loss: 0.9511465430259705
val_loss: 0.9404923319816589
train_loss: 0.9403282403945923
val_loss: 0.9360724687576294
train_loss: 0.9354590177536011
val_loss: 0.9291536211967468
train_loss: 0.9295992255210876
val_loss: 0.923324465751648
train_loss: 0.9234257340431213
val_loss: 0.9174749255180359
train_loss: 0.9178574681282043
val_loss: 0.9118058085441589
train_loss: 0.9118275046348572
val_loss: 0.9069279432296753
train_loss: 0.9072725772857666
val_loss: 0.8996405601501465
train_loss: 0.9005777835845947
val_loss: 0.8890016674995422
train_loss: 0.8893373608589172
val_loss: 0.8802355527877808
train_loss: 0.8809487223625183
val_loss: 0.871670126914978
train_loss: 0.8729135990142822
val_loss: 0.8617748022079468
train_loss: 0.8636147379875183
val_loss: 0

train_loss: 0.1776687055826187
val_loss: 0.16872437298297882
train_loss: 0.173730731010437
val_loss: 0.1635708510875702
train_loss: 0.1674998551607132
val_loss: 0.15828943252563477
train_loss: 0.16468575596809387
val_loss: 0.1525946855545044
train_loss: 0.15523210167884827
val_loss: 0.14722979068756104
train_loss: 0.15002857148647308
val_loss: 0.1424773931503296
train_loss: 0.14667123556137085
val_loss: 0.13643305003643036
train_loss: 0.14265654981136322
val_loss: 0.13121818006038666
train_loss: 0.13432680070400238
val_loss: 0.12601333856582642
train_loss: 0.13016115128993988
val_loss: 0.12084408104419708
train_loss: 0.1264244168996811
val_loss: 0.11568260192871094
train_loss: 0.1228671744465828
val_loss: 0.11052118241786957
train_loss: 0.11986715346574783
val_loss: 0.10534261167049408
train_loss: 0.10596872866153717
val_loss: 0.1001676470041275
train_loss: 0.1048482283949852
val_loss: 0.0949346274137497
train_loss: 0.1004151999950409
val_loss: 0.08968725800514221
train_loss: 0.0976567

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name         | Type       | Params
--------------------------------------------
0 | activ1       | Hardshrink | 0     
1 | lstm         | LSTM       | 574 K 
2 | dropout1     | Dropout    | 0     
3 | linear_final | Linear     | 369   
4 | loss         | L1Loss     | 0     
5 | loss_sec     | MAPE       | 0     


train_loss: 1.0013318061828613
val_loss: 0.9912673234939575
train_loss: 0.9907552003860474
val_loss: 0.982584536075592
train_loss: 0.9826605916023254
val_loss: 0.9729815721511841
train_loss: 0.9714846611022949
val_loss: 0.9651374816894531
train_loss: 0.9649376273155212
val_loss: 0.9583389759063721
train_loss: 0.9571725726127625
val_loss: 0.9552425146102905
train_loss: 0.953680694103241
val_loss: 0.9503998756408691
train_loss: 0.9494708776473999
val_loss: 0.9463188648223877
train_loss: 0.9450598955154419
val_loss: 0.9422173500061035
train_loss: 0.9410568475723267
val_loss: 0.9382476806640625
train_loss: 0.9367246031761169
val_loss: 0.9348347783088684
train_loss: 0.9334661960601807
val_loss: 0.929734468460083
train_loss: 0.9286523461341858
val_loss: 0.9222850799560547
train_loss: 0.9205818772315979
val_loss: 0.9161441326141357
train_loss: 0.9145633578300476
val_loss: 0.9101440906524658
train_loss: 0.9087961912155151
val_loss: 0.9032161235809326
train_loss: 0.9021281003952026
val_loss: 0.

train_loss: 0.40708988904953003
val_loss: 0.4143812954425812
train_loss: 0.4026021957397461
val_loss: 0.41068482398986816
train_loss: 0.4005792737007141
val_loss: 0.40669721364974976
train_loss: 0.39381590485572815
val_loss: 0.40293994545936584
train_loss: 0.39005541801452637
val_loss: 0.39961206912994385
train_loss: 0.3876705467700958
val_loss: 0.3953804671764374
train_loss: 0.384749174118042
val_loss: 0.39172691106796265
train_loss: 0.378770112991333
val_loss: 0.3880796432495117
train_loss: 0.37577709555625916
val_loss: 0.3844572603702545
train_loss: 0.3730834722518921
val_loss: 0.3808410167694092
train_loss: 0.37052181363105774
val_loss: 0.377221018075943
train_loss: 0.36824923753738403
val_loss: 0.37358373403549194
train_loss: 0.35799440741539
val_loss: 0.36993861198425293
train_loss: 0.3572700619697571
val_loss: 0.3662413954734802
train_loss: 0.353965699672699
val_loss: 0.3625296950340271
train_loss: 0.3518056571483612
val_loss: 0.3588229715824127
train_loss: 0.3464164733886719
va

In [18]:
neptune.project.get_experiments(exp_index)[0].log_metric('leaderboard_mape', 3.0315)