# MLP A: Dibulatkan tanpa Outlier RDR

In [1]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
import itertools
import warnings
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

tf.random.set_seed(7)
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' 
warnings.filterwarnings('ignore')

2022-11-22 06:15:26.911594: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-11-22 06:15:27.081776: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-11-22 06:15:27.081806: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2022-11-22 06:15:28.004756: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory
2022-

# Read Data and Prune

In [2]:
def prune_columns(dfm:pd.DataFrame, cols:list):
    return dfm.drop(cols, axis=1)

In [3]:
def read_data(ddir:str):
    dataframe = pd.read_csv(ddir)
    pruned_dataframe = prune_columns(dataframe, ['ImagesName'])
    return pruned_dataframe

# Split and Scale Data

In [4]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [5]:
def split_data(dataframe:pd.DataFrame):
    X = dataframe.drop(['Labels'], axis=1)
    y = dataframe['Labels']
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    return X_train_scaled, X_test_scaled, y_train, y_test

# Build Models

In [6]:
def optimize(models: list,
             X_train: np.array,
             y_train: np.array,
             X_test: np.array,
             y_test: np.array,
             epochs: int = 50,
             verbose: int = 0) -> pd.DataFrame:
    
    # We'll store the results here
    results = []
    
    def train(model: tf.keras.Sequential) -> dict:
        # Change this however you want 
        # We're not optimizing this part today
        model.compile(
            loss=tf.keras.losses.binary_crossentropy,
            optimizer=tf.keras.optimizers.Adam(),
            metrics=[
                tf.keras.metrics.BinaryAccuracy(name='accuracy')
            ]
        )
        
        # Train the model
        model.fit(
            X_train,
            y_train,
            epochs=epochs,
            verbose=verbose
        )
        
        
        # Make predictions on the test set
        preds = model.predict(X_test)
        prediction_classes = [1 if prob > 0.5 else 0 for prob in np.ravel(preds)]
        
        model.evaluate(X_test, y_test, verbose=0)
        model.save(f'/home/tsdhrm/Documents/retinacape/reticape/modelling/MLP/savedModel/default_lr/nn_{model.name}.h5')
        # Return evaluation metrics on the test set
        return {
            'model_name': model.name,
            'test_accuracy': accuracy_score(y_test, prediction_classes),
            'test_precision': precision_score(y_test, prediction_classes),
            'test_recall': recall_score(y_test, prediction_classes),
            'test_f1': f1_score(y_test, prediction_classes)
        }
    
    # Train every model and save results
    for model in models:
        try:
            print(model.name, end=' ... ')
            res = train(model=model)
            results.append(res)
        except Exception as e:
            print(f'{model.name} --> {str(e)}')
        
    return pd.DataFrame(results)

In [7]:
def build_models(num_layers: int, min_nodes_per_layer: int,\
                 max_nodes_per_layer: int, node_step_size: int,\
                 input_shape: tuple, hidden_layer_activation: str = 'relu',\
                 num_nodes_at_output: int = 1, output_layer_activation: str = 'sigmoid') -> list:
    
    node_options = list(range(min_nodes_per_layer, max_nodes_per_layer + 1, node_step_size))
    layer_possibilities = [node_options] * num_layers
    layer_node_permutations = list(itertools.product(*layer_possibilities))
    
    models = []
    for permutation in layer_node_permutations:
        model = tf.keras.Sequential()
        model.add(tf.keras.layers.InputLayer(input_shape=input_shape))
        model_name = ''
        
        for nodes_at_layer in permutation:
            model.add(tf.keras.layers.Dense(nodes_at_layer, activation=hidden_layer_activation))
            model_name += f'dense{nodes_at_layer}_'
        
        model.add(tf.keras.layers.Dense(num_nodes_at_output, activation=output_layer_activation))
        model._name = model_name[:-1]
        models.append(model)
        
    return models

# Main

# Read

In [8]:
features_data_dir = '/home/tsdhrm/Documents/retinacape/reticape/exportedDataframe/tanpaDibulatkan.csv'

In [9]:
df = read_data(features_data_dir)

In [10]:
df.sample(5)

Unnamed: 0,CDR,VCDR,RDR,I,S,N,T,Labels
80,0.40566,0.323308,0.163522,181,186,107,140,0
28,0.434343,0.469091,0.060606,131,146,18,142,0
34,0.411141,0.422343,0.190981,157,173,150,144,0
59,0.391304,0.427215,0.098551,152,159,175,102,0
115,0.525943,0.398927,0.129717,204,200,94,181,1


In [11]:
X_train_scaled, X_test_scaled, y_train, y_test = split_data(df)

In [12]:
all_models = build_models(\
                          num_layers=3,#3
                          min_nodes_per_layer=10,
                          max_nodes_per_layer=30,
                          node_step_size=5,
                          input_shape=(7,)\
                         )

2022-11-22 06:15:29.551749: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
2022-11-22 06:15:29.551799: W tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:265] failed call to cuInit: UNKNOWN ERROR (303)
2022-11-22 06:15:29.551906: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (fedora): /proc/driver/nvidia/version does not exist
2022-11-22 06:15:29.552446: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [13]:
optimization_result = optimize(\
                               models=all_models,\
                               X_train=X_train_scaled,\
                               y_train=y_train,\
                               X_test=X_test_scaled,\
                               y_test=y_test,
                               epochs=50 # 30, 50, 100
                              )

dense10_dense10_dense10 ... dense10_dense10_dense10 --> name 'learning_rate' is not defined
dense10_dense10_dense15 ... dense10_dense10_dense15 --> name 'learning_rate' is not defined
dense10_dense10_dense20 ... dense10_dense10_dense20 --> name 'learning_rate' is not defined
dense10_dense10_dense25 ... dense10_dense10_dense25 --> name 'learning_rate' is not defined
dense10_dense10_dense30 ... dense10_dense10_dense30 --> name 'learning_rate' is not defined
dense10_dense15_dense10 ... dense10_dense15_dense10 --> name 'learning_rate' is not defined
dense10_dense15_dense15 ... dense10_dense15_dense15 --> name 'learning_rate' is not defined
dense10_dense15_dense20 ... dense10_dense15_dense20 --> name 'learning_rate' is not defined
dense10_dense15_dense25 ... dense10_dense15_dense25 --> name 'learning_rate' is not defined
dense10_dense15_dense30 ... dense10_dense15_dense30 --> name 'learning_rate' is not defined
dense10_dense20_dense10 ... dense10_dense20_dense10 --> name 'learning_rate' is 

In [14]:
optimization_result.sort_values(by='test_accuracy', ascending=False)

KeyError: 'test_accuracy'