In [1]:
import gc
import os
from datetime import datetime
import pickle
import psutil
from tqdm import tqdm, tqdm_notebook
from functools import partial, update_wrapper
from itertools import product
from collections import OrderedDict
from multiprocessing import Pool

import bloscpack as bp

import random
import numpy as np
import pandas as pd
from sklearn.preprocessing import normalize
from sklearn.linear_model import LinearRegression

from YSMLT import utils as g_utils
from YSMLT.series import utils as ts_utils

from scipy.signal import hilbert
from scipy.signal import hann
from scipy.signal import convolve
from scipy.signal import welch, find_peaks
from scipy import stats
from scipy.special import entr
from scipy.stats import entropy
from tsfresh.feature_extraction import feature_calculators

In [4]:
feats_f_curr = [f for f in os.listdir('../input/feats_tblr/ss') if ('trn_feat' in f) and ('w500' in f) and ('fix' not in f)][0]
feats_f_trgt = [f for f in os.listdir('../input/feats_tblr/ss') if ('tst_feat' in f) and ('w500' in f)]
feats_f_fix = [f for f in os.listdir('../input/feats_tblr/ss') if ('trn_feat' in f) and ('w500' in f) and ('fix' in f)][0]
print(feats_f_curr, feats_f_trgt, feats_f_fix)

trn_feat_g9_w500.bp ['tst_feat_g9_w500.bp', 'tst_feat_g4_w500.bp', 'tst_feat_g18_w500.bp', 'tst_feat_g3_w500.bp', 'tst_feat_g5_w500.bp', 'tst_feat_g15_w500.bp', 'tst_feat_g14_w500.bp', 'tst_feat_g13_w500.bp', 'tst_feat_g10_w500.bp', 'tst_feat_g6_w500.bp', 'tst_feat_g2_w500.bp', 'tst_feat_g0_w500.bp', 'tst_feat_g12_w500.bp', 'tst_feat_g17_w500.bp', 'tst_feat_g16_w500.bp', 'tst_feat_g11_w500.bp', 'tst_feat_g1_w500.bp', 'tst_feat_g8_w500.bp', 'tst_feat_g19_w500.bp', 'tst_feat_g7_w500.bp'] trn_feat_g4_w500_fix.bp


In [14]:
feats_f_tst_dat = [f for f in os.listdir('../input/feats_tblr/ss') if ('tst_dat' in f) and ('w250' in f)]
feats_f_tst_dat = sorted(feats_f_tst_dat)
feats_f_tst_dat = feats_f_tst_dat[:1] + feats_f_tst_dat[11:] + feats_f_tst_dat[1:11]
feats_f_tst_dat

['tst_dat_g0_w250.bp',
 'tst_dat_g1_w250.bp',
 'tst_dat_g2_w250.bp',
 'tst_dat_g3_w250.bp',
 'tst_dat_g4_w250.bp',
 'tst_dat_g5_w250.bp',
 'tst_dat_g6_w250.bp',
 'tst_dat_g7_w250.bp',
 'tst_dat_g8_w250.bp',
 'tst_dat_g9_w250.bp',
 'tst_dat_g10_w250.bp',
 'tst_dat_g11_w250.bp',
 'tst_dat_g12_w250.bp',
 'tst_dat_g13_w250.bp',
 'tst_dat_g14_w250.bp',
 'tst_dat_g15_w250.bp',
 'tst_dat_g16_w250.bp',
 'tst_dat_g17_w250.bp',
 'tst_dat_g18_w250.bp',
 'tst_dat_g19_w250.bp']

In [15]:
tst_w250 = np.concatenate([bp.unpack_ndarray_from_file(os.path.join('../input/feats_tblr/ss', f)) for f in feats_f_tst_dat], axis=0)

In [16]:
tst_w250.shape

(2000000, 692)

In [17]:
bp.pack_ndarray_to_file(tst_w250, '../input/tst_dat_all_w250.bp')

In [3]:
feats_curr = bp.unpack_ndarray_from_file(os.path.join('../input/feats_tblr/ss', feats_f_curr))
feats_trgt = bp.unpack_ndarray_from_file(os.path.join('../input/feats_tblr/ss', feats_f_trgt))
feats_fix = bp.unpack_ndarray_from_file(os.path.join('../input/feats_tblr/ss', feats_f_fix))

In [4]:
feats_diff = list(set(feats_trgt) - set(feats_curr))
feats_diff

['MA_10th_wndw_BBhigh_avg_L',
 'MA_2nd_wndw_BBlow_avg_R',
 'MA_5th_wndw_BBhigh_avg_L',
 'MA_2nd_wndw_std_avg_L',
 'MA_5th_wndw_std_avg_L',
 'MA_2nd_wndw_BBhigh_avg_R',
 'MA_10th_wndw_std_avg_R',
 'MA_5th_wndw_BBlow_avg_L',
 'MA_2nd_wndw_std_avg_R',
 'MA_10th_wndw_BBlow_avg_R',
 'MA_5th_wndw_std_avg_R',
 'MA_10th_wndw_BBhigh_avg_R',
 'MA_5th_wndw_BBlow_avg_R',
 'MA_5th_wndw_BBhigh_avg_R',
 'MA_2nd_wndw_BBhigh_avg_L',
 'MA_10th_wndw_std_avg_L',
 'MA_10th_wndw_BBlow_avg_L',
 'MA_2nd_wndw_BBlow_avg_L']

In [5]:
feats_fix

array(['MA_5th_wndw_std_avg_L', 'MA_5th_wndw_BBhigh_avg_L',
       'MA_5th_wndw_BBlow_avg_L', 'MA_2nd_wndw_std_avg_L',
       'MA_2nd_wndw_BBhigh_avg_L', 'MA_2nd_wndw_BBlow_avg_L',
       'MA_10th_wndw_std_avg_L', 'MA_10th_wndw_BBhigh_avg_L',
       'MA_10th_wndw_BBlow_avg_L', 'MA_5th_wndw_std_avg_R',
       'MA_5th_wndw_BBhigh_avg_R', 'MA_5th_wndw_BBlow_avg_R',
       'MA_2nd_wndw_std_avg_R', 'MA_2nd_wndw_BBhigh_avg_R',
       'MA_2nd_wndw_BBlow_avg_R', 'MA_10th_wndw_std_avg_R',
       'MA_10th_wndw_BBhigh_avg_R', 'MA_10th_wndw_BBlow_avg_R'],
      dtype='<U25')

In [6]:
feats_fix_map = OrderedDict([(s, i) for i, s in zip(range(len(feats_fix)), feats_fix)])
feats_fix_map

OrderedDict([('MA_5th_wndw_std_avg_L', 0),
             ('MA_5th_wndw_BBhigh_avg_L', 1),
             ('MA_5th_wndw_BBlow_avg_L', 2),
             ('MA_2nd_wndw_std_avg_L', 3),
             ('MA_2nd_wndw_BBhigh_avg_L', 4),
             ('MA_2nd_wndw_BBlow_avg_L', 5),
             ('MA_10th_wndw_std_avg_L', 6),
             ('MA_10th_wndw_BBhigh_avg_L', 7),
             ('MA_10th_wndw_BBlow_avg_L', 8),
             ('MA_5th_wndw_std_avg_R', 9),
             ('MA_5th_wndw_BBhigh_avg_R', 10),
             ('MA_5th_wndw_BBlow_avg_R', 11),
             ('MA_2nd_wndw_std_avg_R', 12),
             ('MA_2nd_wndw_BBhigh_avg_R', 13),
             ('MA_2nd_wndw_BBlow_avg_R', 14),
             ('MA_10th_wndw_std_avg_R', 15),
             ('MA_10th_wndw_BBhigh_avg_R', 16),
             ('MA_10th_wndw_BBlow_avg_R', 17)])

In [7]:
feats_trgt_map = OrderedDict(sorted([(feats_fix_map[s], np.where(feats_trgt==s)[0].item()) for s in feats_diff], key=lambda tpl: tpl[1]))
feats_trgt_map

OrderedDict([(0, 56),
             (1, 57),
             (2, 58),
             (3, 59),
             (4, 60),
             (5, 61),
             (6, 62),
             (7, 63),
             (8, 64),
             (9, 402),
             (10, 403),
             (11, 404),
             (12, 405),
             (13, 406),
             (14, 407),
             (15, 408),
             (16, 409),
             (17, 410)])

In [8]:
dat_f_curr = sorted([f for f in os.listdir('../input/feats_tblr/ss') if ('trn_dat' in f) and ('w500' in f) and ('fix' not in f)])
dat_f_trgt = sorted([f for f in os.listdir('../input/feats_tblr/ss') if ('tst_dat' in f) and ('w500' in f)])
dat_f_fix = sorted([f for f in os.listdir('../input/feats_tblr/ss') if ('trn_dat' in f) and ('w500' in f) and ('fix' in f)])

In [9]:
dat_f_curr

['trn_dat_g0_w500.bp',
 'trn_dat_g1_w500.bp',
 'trn_dat_g2_w500.bp',
 'trn_dat_g3_w500.bp',
 'trn_dat_g4_w500.bp',
 'trn_dat_g5_w500.bp',
 'trn_dat_g6_w500.bp',
 'trn_dat_g7_w500.bp',
 'trn_dat_g8_w500.bp',
 'trn_dat_g9_w500.bp']

In [10]:
dat_f_fix

['trn_dat_g0_w500_fix.bp',
 'trn_dat_g1_w500_fix.bp',
 'trn_dat_g2_w500_fix.bp',
 'trn_dat_g3_w500_fix.bp',
 'trn_dat_g4_w500_fix.bp',
 'trn_dat_g5_w500_fix.bp',
 'trn_dat_g6_w500_fix.bp',
 'trn_dat_g7_w500_fix.bp',
 'trn_dat_g8_w500_fix.bp',
 'trn_dat_g9_w500_fix.bp']

In [11]:
for i, (c, f) in enumerate(zip(dat_f_curr, dat_f_fix)):
    print(i)
    arr_cur = bp.unpack_ndarray_from_file(os.path.join('../input/feats_tblr/ss', c))
    arr_fix = bp.unpack_ndarray_from_file(os.path.join('../input/feats_tblr/ss', f))
    
    arr_fxd = arr_cur.copy()
    for i0, i1 in feats_trgt_map.items():
        arr_fxd = np.insert(arr_fxd, i1, arr_fix[:, i0], axis=1)
        
    bp.pack_ndarray_to_file(arr_fxd, '../input/trn_dat_g{:d}_w500_fixed.bp'.format(i))

0
1
2
3
4
5
6
7
8
9


In [12]:
for i, f in enumerate(sorted([f for f in os.listdir('../input') if 'w500' in f])):
    if i == 0:
        feat = bp.unpack_ndarray_from_file(os.path.join('../input/', f))
    else:
        feat = np.concatenate([feat, bp.unpack_ndarray_from_file(os.path.join('../input/', f))], 0)

In [13]:
bp.pack_ndarray_to_file(feat, '../input/trn_dat_all_w500_fixed.bp'.format(i))