In [1]:
import pandas as pd
import numpy as np
import time
from utils import dfwellgr,marker_ssig,extract_signature_Xy,get_classifier,plot_simple
from sklearn.model_selection import train_test_split
import random
import matplotlib.pyplot as plt
from tqdm import tqdm

random.seed(42)

In [2]:
#Import For Classification 
from sklearn.linear_model import RidgeClassifierCV
from sklearn.linear_model import LogisticRegression
import xgboost as xgb

#Import For Testing 
from utils import window, plot_pred_distribution
from constraints import apply_process,get_markers_rocket_order_with_constraint
from utils import apply_evaluate

from IPython.display import clear_output
import statistics

In [11]:
def run_prediction(myrocket,use_constraint=False,use_xgb=True,confidence_level=0.96):
    start = time.time()
    df_tops_pred = pd.DataFrame(pd.DataFrame(columns = ['wellName','MARCEL', 'SYLVAIN', 'CONRAD']))
    intervals = apply_process(df_tops,confidence_level,log=True)
    for well in tqdm(df_test_tops.index, desc=f"Processing Wells"):
        pred_m, df_wm = get_markers_rocket_order_with_constraint(well,f_mean, f_std,intervals, df_test_log, pred_column, wsize, input_variable, 
                                             s2s = False,
                                             model = None, 
                                             xgb = use_xgb,
                                             rocket = myrocket, 
                                             classifier_xgb = classifier_xgb, 
                                             classifier = classifier,
                                             constraint = use_constraint,
                                            )
        row = {'wellName':well, 'MARCEL':pred_m[0], 'SYLVAIN':pred_m[1], 'CONRAD':pred_m[2]}
        row_df = pd.DataFrame([row])
        df_tops_pred = pd.concat([df_tops_pred, row_df], axis = 0, ignore_index = "True")
        
    ext = time.time() - start
    print(f'🍺 Total prediction time {ext}')
    df_tops_pred['wellName']  = df_tops_pred['wellName'].astype(float)
    df_tops_pred = df_tops_pred.sort_values(by = ['wellName']).reset_index().drop(['index'], axis = 1)
    df_tops_pred = df_tops_pred.set_index('wellName')
    return df_tops_pred

### LOAD Data and Testing Data

In [5]:
#Load Testing Data
df_test_log = pd.read_parquet('testdata/logs_50.parquet', engine='fastparquet')
df_test_log.loc[df_test_log['GR'] < -1, 'GR' ] = -1
df_test_log.loc[df_test_log['GR'] > 400, 'GR' ] = 400

df_test_loc = pd.read_parquet('testdata/loc_50.parquet', engine='fastparquet')
df_test_loc = df_test_loc.reset_index()

df_test_log = df_test_loc.merge(df_test_log, how = 'inner', left_on = 'wellName', right_on = 'wellName')

df_test_tops = pd.read_csv('testdata/tops_50.csv')
df_test_tops = df_test_tops.set_index('wellName')
cols = ['MARCEL', 'SYLVAIN', 'CONRAD']
df_test_tops = df_test_tops[cols]

In [6]:
df_tops = pd.read_parquet('Training/tops.parquet', engine='fastparquet')
cols = ['MARCEL', 'SYLVAIN', 'CONRAD']
df_tops = df_tops[cols]
df_tops.dropna(inplace = True)
df_tops[df_tops['CONRAD'] - df_tops['SYLVAIN'] < 0] #Here we can see incorrect data

well_array = np.load('hackaton_training_well_one.npy', allow_pickle=True)
df_tops = df_tops[df_tops.index.isin(well_array[0][0])]

### 

### Mini ROCKET

The evolution of ROCKET, MINIROCKET [6], is proposed as the new default variant of ROCKET by its authors and utilizes only one feature per kernel (percentage of positive values), thereby halving the features. It also utilizes other optimizations to speed up ROCKET in general and follows a minimally random approach with a given set of kernels

In [20]:
from sktime.transformations.panel.rocket import MiniRocket

In [21]:
#Load Prepared Data
X = np.load('prepared_data/X_201.npy')
y = np.load('prepared_data/y_201.npy')

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train,y_train = X,y
minirocket = MiniRocket(num_kernels=10000) 
minirocket.fit(X_train) 

start = time.time()
X_train_transformed = minirocket.transform(X_train) 
et = time.time() - start
print(f'Transforming time for mini ROCKET: {et}')

X_test_transformed = minirocket.transform(X_test) 

Transforming time for mini ROCKET: 1.8457438945770264


In [None]:
f_mean, f_std, classifier_xgb, classifier = get_classifier(X_train_transformed,y_train,X_test_transformed,y_test)

wsize = 201
input_variable=['GR']
pred_column = ['None','Marcel', 'Sylvain', 'Conrad']
print('-'*10,'Prediction With Constraint','-'*10)
df_tops_pred = run_prediction(minirocket,use_constraint=True,use_xgb=use_xgb)
optimal_tolerance,_ = apply_evaluate(df_test_tops,df_tops_pred)

### MultiROCKET

In [15]:
from rocket.multi_rocket.multirocket import MultiRocket

In [16]:
X = np.load('prepared_data/X_201.npy')
y = np.load('prepared_data/y_201.npy')

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

X_train = X_train.reshape((X_train.shape[0], X_train.shape[2]))
X_test = X_test.reshape((X_test.shape[0], X_test.shape[2]))

In [None]:
num_kernels = [500,1000,2000]
xgb = [True, False]

wsize = 201
input_variable=['GR']
pred_column = ['None','Marcel', 'Sylvain', 'Conrad']

for i in num_feat:
    for use_xgb in xgb:
        print("="*60)
        print(f"\033[1mTesting Num_features:{i},Use_Xgb:{use_xgb}\033[0m")
        print(""*40)

        multirocket = MultiRocket(
            num_kernels=1000
        )
        multirocket.fit(X_train)
        
        start = time.time()
        X_train_transformed = multirocket.transform(X_train)
        et = time.time() - start
        print(f'Transforming time for ROCKET: {et}')

        X_test_transformed = multirocket.transform(X_test) 

        f_mean, f_std, classifier_xgb, classifier = get_classifier(X_train_transformed,y_train,X_test_transformed,y_test)
        print('-'*10,'Prediction With Constraint','-'*10)
        df_tops_pred = run_prediction(multirocket,use_constraint=True,use_xgb=use_xgb)
        optimal_tolerance,_ = apply_evaluate(df_test_tops,df_tops_pred)


### LightWaveS

In [7]:
from rocket.lightwaves.lightwaves import LightWaveS

[wangyuchens-MBP.lan:61592] shmem: mmap: an error occurred while determining whether or not /var/folders/gj/m7pys2v95k19_b1dkf01nl5r0000gn/T//ompi.wangyuchens-MBP.501/jf.0/2344026112/sm_segment.wangyuchens-MBP.501.8bb70000.0 could be created.


In [8]:
X = np.load('prepared_data/X_201.npy')
y = np.load('prepared_data/y_201.npy')

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [14]:
num_feat = [500,1000,2000,5000]
xgb = [True, False]

wsize = 201
input_variable=['GR']
pred_column = ['None','Marcel', 'Sylvain', 'Conrad']

for i in num_feat:
    for use_xgb in xgb:
        print("="*60)
        print(f"\033[1mTesting Num_features:{i},Use_Xgb:{use_xgb}\033[0m")
        print(""*40)

        lightwaves = LightWaveS(final_num_feat=i)
        lightwaves.fit(X_train.astype(np.float32),y_train)
        
        start = time.time()
        X_train_transformed = lightwaves.transform(X_train)
        et = time.time() - start
        print(f'Transforming time for ROCKET: {et}')

        X_test_transformed = lightwaves.transform(X_test) 

        f_mean, f_std, classifier_xgb, classifier = get_classifier(X_train_transformed,y_train,X_test_transformed,y_test)
        print('-'*10,'Prediction With Constraint','-'*10)
        df_tops_pred = run_prediction(lightwaves,use_constraint=True,use_xgb=use_xgb)
        optimal_tolerance,_ = apply_evaluate(df_test_tops,df_tops_pred)


[1mTesting Num_features:500,Use_Xgb:True[0m

Transforming time for ROCKET: 0.47490692138671875
XGBoost 0.996437054631829
Logistic  1 eps: 1.00E-06  C: 1.00E-02   train_acc: 0.99851  valid_acc: 0.99287
---------- Prediction With Constraint ----------
MARCEL 6212.0 6764.0
SYLVAIN 6441.0 7010.0
CONRAD 6464.0 7033.0


Processing Wells: 100%|██████████| 50/50 [00:49<00:00,  1.01it/s]


🍺 Total prediction time 52.08404588699341
tolerance 20, recall 0.9933333333333333, mae 3.8033333333333332
tolerance 15, recall 0.9666666666666667, mae 3.8033333333333332
tolerance 10, recall 0.9066666666666666, mae 3.8033333333333332
tolerance 5, recall 0.7866666666666666, mae 3.8033333333333332
Largest Error MARCEL: 26.0
Largest Error SYLVAIN: 18.0
Largest Error CONRAD: 14.0
🍺 Optimal Tolerance : 26
[1mTesting Num_features:500,Use_Xgb:False[0m

Transforming time for ROCKET: 0.54575514793396
XGBoost 0.996437054631829
Logistic  1 eps: 1.00E-06  C: 1.00E-02   train_acc: 0.99851  valid_acc: 0.99287
---------- Prediction With Constraint ----------
MARCEL 6212.0 6764.0
SYLVAIN 6441.0 7010.0
CONRAD 6464.0 7033.0


Processing Wells: 100%|██████████| 50/50 [01:01<00:00,  1.23s/it]


🍺 Total prediction time 63.703397274017334
tolerance 20, recall 0.9933333333333333, mae 2.6433333333333335
tolerance 15, recall 0.9866666666666667, mae 2.6433333333333335
tolerance 10, recall 0.9666666666666667, mae 2.6433333333333335
tolerance 5, recall 0.9266666666666666, mae 2.6433333333333335
Largest Error MARCEL: 22.0
Largest Error SYLVAIN: 10.0
Largest Error CONRAD: 12.0
🍺 Optimal Tolerance : 22
[1mTesting Num_features:1000,Use_Xgb:True[0m

Transforming time for ROCKET: 0.7517540454864502
XGBoost 0.997624703087886
Logistic  1 eps: 1.00E-06  C: 1.00E-02   train_acc: 0.99851  valid_acc: 0.99525
---------- Prediction With Constraint ----------
MARCEL 6212.0 6764.0
SYLVAIN 6441.0 7010.0
CONRAD 6464.0 7033.0


Processing Wells: 100%|██████████| 50/50 [01:26<00:00,  1.72s/it]


🍺 Total prediction time 88.0812201499939
tolerance 20, recall 0.9866666666666667, mae 3.34
tolerance 15, recall 0.9866666666666667, mae 3.34
tolerance 10, recall 0.96, mae 3.34
tolerance 5, recall 0.8066666666666666, mae 3.34
Largest Error MARCEL: 26.0
Largest Error SYLVAIN: 21.0
Largest Error CONRAD: 14.0
🍺 Optimal Tolerance : 26
[1mTesting Num_features:1000,Use_Xgb:False[0m

Transforming time for ROCKET: 0.7328143119812012
XGBoost 0.997624703087886
Logistic  1 eps: 1.00E-06  C: 1.00E-02   train_acc: 0.99851  valid_acc: 0.99525
---------- Prediction With Constraint ----------
MARCEL 6212.0 6764.0
SYLVAIN 6441.0 7010.0
CONRAD 6464.0 7033.0


Processing Wells: 100%|██████████| 50/50 [01:18<00:00,  1.58s/it]


🍺 Total prediction time 80.27910923957825
tolerance 20, recall 0.9933333333333333, mae 2.6766666666666667
tolerance 15, recall 0.9866666666666667, mae 2.6766666666666667
tolerance 10, recall 0.9733333333333334, mae 2.6766666666666667
tolerance 5, recall 0.9133333333333333, mae 2.6766666666666667
Largest Error MARCEL: 22.0
Largest Error SYLVAIN: 10.0
Largest Error CONRAD: 12.0
🍺 Optimal Tolerance : 22
[1mTesting Num_features:2000,Use_Xgb:True[0m

Transforming time for ROCKET: 1.0187959671020508
XGBoost 0.998812351543943
Logistic  1 eps: 1.00E-06  C: 1.00E-02   train_acc: 1.00000  valid_acc: 0.99644
---------- Prediction With Constraint ----------
MARCEL 6212.0 6764.0
SYLVAIN 6441.0 7010.0
CONRAD 6464.0 7033.0


Processing Wells: 100%|██████████| 50/50 [01:46<00:00,  2.14s/it]


🍺 Total prediction time 108.2645480632782
tolerance 20, recall 0.9666666666666667, mae 5.55
tolerance 15, recall 0.96, mae 5.55
tolerance 10, recall 0.9333333333333333, mae 5.55
tolerance 5, recall 0.78, mae 5.55
Largest Error MARCEL: 67.0
Largest Error SYLVAIN: 110.0
Largest Error CONRAD: 112.0
🍺 Optimal Tolerance : 112
[1mTesting Num_features:2000,Use_Xgb:False[0m

Transforming time for ROCKET: 1.0273609161376953
XGBoost 0.998812351543943
Logistic  1 eps: 1.00E-06  C: 1.00E-02   train_acc: 1.00000  valid_acc: 0.99644
---------- Prediction With Constraint ----------
MARCEL 6212.0 6764.0
SYLVAIN 6441.0 7010.0
CONRAD 6464.0 7033.0


Processing Wells: 100%|██████████| 50/50 [02:03<00:00,  2.47s/it]


🍺 Total prediction time 125.60870099067688
tolerance 20, recall 1.0, mae 2.45
tolerance 15, recall 0.9933333333333333, mae 2.45
tolerance 10, recall 0.98, mae 2.45
tolerance 5, recall 0.9133333333333333, mae 2.45
Largest Error MARCEL: 18.0
Largest Error SYLVAIN: 10.0
Largest Error CONRAD: 14.0
🍺 Optimal Tolerance : 18
[1mTesting Num_features:5000,Use_Xgb:True[0m

Transforming time for ROCKET: 1.029573917388916
XGBoost 0.996437054631829
Logistic  1 eps: 1.00E-06  C: 1.00E-02   train_acc: 1.00000  valid_acc: 0.99762
---------- Prediction With Constraint ----------
MARCEL 6212.0 6764.0
SYLVAIN 6441.0 7010.0
CONRAD 6464.0 7033.0


Processing Wells: 100%|██████████| 50/50 [01:47<00:00,  2.16s/it]


🍺 Total prediction time 109.00038695335388
tolerance 20, recall 0.98, mae 4.486666666666666
tolerance 15, recall 0.9466666666666667, mae 4.486666666666666
tolerance 10, recall 0.92, mae 4.486666666666666
tolerance 5, recall 0.7466666666666667, mae 4.486666666666666
Largest Error MARCEL: 67.0
Largest Error SYLVAIN: 20.0
Largest Error CONRAD: 14.0
🍺 Optimal Tolerance : 67
[1mTesting Num_features:5000,Use_Xgb:False[0m

Transforming time for ROCKET: 1.2681992053985596
XGBoost 0.996437054631829
Logistic  1 eps: 1.00E-06  C: 1.00E-02   train_acc: 1.00000  valid_acc: 0.99762
---------- Prediction With Constraint ----------
MARCEL 6212.0 6764.0
SYLVAIN 6441.0 7010.0
CONRAD 6464.0 7033.0


Processing Wells: 100%|██████████| 50/50 [02:24<00:00,  2.89s/it]


🍺 Total prediction time 148.4964406490326
tolerance 20, recall 0.9933333333333333, mae 2.95
tolerance 15, recall 0.98, mae 2.95
tolerance 10, recall 0.9533333333333334, mae 2.95
tolerance 5, recall 0.9, mae 2.95
Largest Error MARCEL: 40.0
Largest Error SYLVAIN: 10.0
Largest Error CONRAD: 17.0
🍺 Optimal Tolerance : 40
