In [None]:
from google.colab             import drive
drive.mount ('/content/drive');

Mounted at /content/drive


In [None]:
import shelve;
import warnings;
warnings.filterwarnings('ignore')
import pandas            as pd;
import numpy             as np;
from sklearn.metrics          import balanced_accuracy_score,accuracy_score, f1_score, classification_report, precision_score, recall_score, precision_score, recall_score
from sklearn.utils            import resample

sh_file   = '/content/drive/MyDrive/Data/shelf_central'
data_path = '/content/drive/MyDrive/Data/kddcup.data.corrected'


########################## Test Becnh Setup ##########################
print("\n\nSetting up test bench...");

############# Importing Models, scalers, encoders #############

print("\t> Importing trained models...");
shelver   = shelve.open(sh_file) 

for key in shelver:
  globals()[key]=shelver[key]
shelver.close()


############# Defining a predictor pipeline function #############

print("\t> Creating 'PREDICTOR()' pipeline function...");

def PREDICTOR(data, verbose=1):
  d = pd.DataFrame(data);
  sh = d.shape;
  if verbose==1:
    print("\n\tPREDICTOR() called: {} Columns to predict!".format(sh[0]));
  if sh[1]==41:

    # Column naming
    for i in range(41):
      d.rename(columns = {i: str(i)}, inplace = True) 
      
    # Column division
    if verbose==1:
      print("\t\u2022 Diving columns..")
    X_pred = d.iloc[:,:];

    # Encoding categorical variables
    if verbose==1:
      print("\t\u2022 Encoding..")
    X_pred = pd.DataFrame(IE.transform(X_pred));

    # Scaling
    if verbose==1:
      print("\t\u2022 Scaling..")
    X_pred = SCALE_IN.transform(X_pred);

    # Dimensionality reduction with PCA
    if verbose==1:
      print("\t\u2022 Reducing dimensions...")
    X_pred = DR.transform(X_pred);
  
    # Prediction
    if verbose==1:  
      print("\t\u2022 Random Forest Prediction...")
    Y_pred = MODEL.predict(X_pred);
    
    # Decoding
    if verbose==1:
      print("\t\u2022 Decoding...")
    Y_ret = pd.DataFrame(TE.inverse_transform(Y_pred.astype(int)));

    # Finding indexes of the 'object.' predictions
    if verbose==1:
      print("\t\u2022 Finding indexes 'other.' indexes...");
    index = Y_ret.index;
    condition = Y_ret[0] == 'other.'
    idx_all = index[condition];

    # Classifying 'object.' further
    if verbose==1:
      print("\t\u2022 XGB Prediction...")
    for ind in idx_all:
      X_ret = pd.DataFrame(d.iloc[[ind,0],:]);
      X_ret = pd.DataFrame(IE.transform(X_ret));
      Y_re  = MODEL2.predict(X_ret);
      Y_re  = Y_re.astype(int)
      Y_re  = pd.DataFrame(T2.inverse_transform(Y_re));
      Y_ret[0][ind]=Y_re[0][0];

    if verbose==1:
      print("\t\u2022 PREDICTOR: Complete\n");
    return pd.DataFrame(Y_ret);

  else:
    if verbose==1:
      print("Error! Wrong format of input!");
    return 0;


############# Defining an imbalance metrics calculator #############

print("\t> Creating 'METRIC()' imbalance metric calculator functions...");

def METRIC(Y_finalT,Y_finalP):
  
  acc = accuracy_score(Y_finalT,Y_finalP)
  f1c = f1_score(Y_finalT,Y_finalP, average='macro')
  prc = precision_score(Y_finalT,Y_finalP,average='macro');
  rrc = recall_score(Y_finalT,Y_finalP, average = 'macro');
  blc = balanced_accuracy_score(Y_finalT,Y_finalP);

  print("\t> Accuracy        - "+str(acc));
  print("\t> F1 Score        - "+str(f1c));
  print("\t> Precision       - "+str(prc));
  print("\t> Recall          - "+str(rrc));
  print("\t> Macro Accuracy  - "+str(blc));

############# Reading data file #############

print("\t> Reading file...")
df_backup        = pd.read_csv(data_path, header=None);

########################## Test Bench ##########################
print("\n\nTesing start")


############# SAMPLE 1 (random state 1) #############
print("\t\n<> SAMPLE 1");
finalT = pd.DataFrame(df_backup .sample(n=40000, replace=False, random_state=1 ))

X_finalT = pd.DataFrame(finalT.iloc[:,:-1]);
Y_finalT = pd.DataFrame(finalT.iloc[:,-1]);
Y_finalP = PREDICTOR(X_finalT)

METRIC(Y_finalT,Y_finalP);

############# SAMPLE 1 (random state 15) #############
finalT = pd.DataFrame(df_backup .sample(n=40000, replace=False, random_state=2 ))

print("\t\n<> SAMPLE 2");
X_finalT = pd.DataFrame(finalT.iloc[:,:-1]);
Y_finalT = pd.DataFrame(finalT.iloc[:,-1]);
Y_finalP = PREDICTOR(X_finalT)

METRIC(Y_finalT,Y_finalP);


############# SAMPLE 3 (random state 100) #############
finalT = pd.DataFrame(df_backup .sample(n=40000, replace=False, random_state=3 ))

print("\t\n<> SAMPLE 3");
X_finalT = pd.DataFrame(finalT.iloc[:,:-1]);
Y_finalT = pd.DataFrame(finalT.iloc[:,-1]);
Y_finalP = PREDICTOR(X_finalT)

METRIC(Y_finalT,Y_finalP);

print("\n\n\t\t\t\t THANK YOU")



Tesing start
	
<> SAMPLE 1

	PREDICTOR() called: 40000 Columns to predict!
	• Diving columns..
	• Encoding..
	• Scaling..
	• Reducing dimensions...
	• Random Forest Prediction...
	• Decoding...
	• Finding indexes 'other.' indexes...
	• XGB Prediction...
	• PREDICTOR: Complete

	> Accuracy        - 0.9999
	> F1 Score        - 0.9331443666557784
	> Precision       - 0.9299835015527951
	> Recall          - 0.936501933250019
	> Macro Accuracy  - 0.9989353954666867
	
<> SAMPLE 2

	PREDICTOR() called: 40000 Columns to predict!
	• Diving columns..
	• Encoding..
	• Scaling..
	• Reducing dimensions...
	• Random Forest Prediction...
	• Decoding...
	• Finding indexes 'other.' indexes...
	• XGB Prediction...
	• PREDICTOR: Complete

	> Accuracy        - 0.9998
	> F1 Score        - 0.9145404535144029
	> Precision       - 0.915841361389098
	> Recall          - 0.9138574912160793
	> Macro Accuracy  - 0.9900122821507525
	
<> SAMPLE 3

	PREDICTOR() called: 40000 Columns to predict!
	• Diving columns..