# GAN - BREATHING WAVE
## Generative Adversarial Networks
### 05 May 2023
***

## PART 1 : Data Preprocessing

### Importing Library

In [None]:
import pandas as pd
import numpy as np

### Import Dataset

In [None]:
df = pd.read_csv("dataset/breathing_waveform_data.csv").iloc[:, :-1] # get rid of last column ("notes")

### Filter the zeros values
> This will filtered the zeros value from all column (except first column)
>
> CAUSE : I think is natural for the first column to be 0.0 (because the time(X) still on 0 second)

In [None]:
zeros_val = df[df.iloc[:, 1:].eq(0).any(axis=1)]

In [None]:
zeros_val

### Drop the table that has value zeros on it

In [None]:
df = df[~df.isin(zeros_val)].dropna()

In [None]:
df

## PART 2 : Generating Synthetic Data

### Importing CTGAN Library

In [None]:
from ctgan import CTGAN

### Define the discrete values inside the dataset

In [None]:
discrete_columns = [
    'labels'
]

### Setting the CTGAN and fit the GAN

In [None]:
ctgan = CTGAN(epochs=10, verbose=True, generator_lr=0.00001, cuda=True)
ctgan.fit(df, discrete_columns)

### Generate synthetic data

In [None]:
samples = ctgan.sample(1000)

### Evaluate Data

In [None]:
from table_evaluator import TableEvaluator

print(df.shape, samples.shape)
table_evaluator = TableEvaluator(df, samples, cat_cols=discrete_columns)

table_evaluator.visual_evaluation()

In [None]:
samples

## PART 3 : Data Preprocessing

### Preprocess the generated data

In [None]:
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from keras.utils import np_utils

encoder = LabelEncoder()
    
def data_preprocessing(X, Y):
    ## DATA
    sc = StandardScaler()
    X = sc.fit_transform(X)
    # reshaping the training data to 3-Dimensional Numpy Array
    feature = 5
    X = np.reshape(X, (X.shape[0], int(85/feature), feature))
    # (26400, 17, 5)
    # 5 indicator will be used per sequence/timestep per sample/row
    
    ## LABEL
    # encode class values as integers [0,0,0,0,0,0,0,1,1,1,1,1,2,2,2,2]
    encoder.fit(Y)
    encoded_Y = encoder.transform(Y)

    # convert integers to dummy variables (i.e. one hot encoded)
    hot_y = np_utils.to_categorical(encoded_Y)
    
    return X, hot_y

In [None]:
X = samples.iloc[:, :-1]
Y = samples.iloc[:, -1]

In [None]:
X_test, Y_test = data_preprocessing(X, Y)

## PART 4 : Predict the generated data using pre-trained model

### Import pre-trained model

In [None]:
from tensorflow.keras.models import load_model
filename = "C:\\Users\\IoT-Lab\\Documents\\!Erwin Yonata\\Anasa\\MODELS\\[3-layer] - 3L1\\CV\\GridSearchCV\\best_param_model.h5"

# load model
loaded_model = load_model(filename)

### Make prediction using generated data

In [None]:
pred = loaded_model.predict(X_test)

## PART 5 : Evaluate

### Plot confusion matrix

In [None]:
y_true = np.argmax(Y_test, axis=1)
y_pred = np.argmax(pred, axis=1)

In [None]:
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt

# Define the confusion matrix
conf_matrix = confusion_matrix(y_true, y_pred)

# Plot the confusion matrix
plt.imshow(conf_matrix, cmap=plt.cm.Greens)

# Add labels to the plot
tick_marks = np.arange(len(conf_matrix))
plt.xlabel('Predicted label')
plt.ylabel('True label')

# Add values to the plot
for i in range(len(conf_matrix)):
    for j in range(len(conf_matrix)):
        plt.text(j, i, conf_matrix[i, j], ha='center', va='center')

# Show the plot
plt.show()

### Create Data Frame for generated result vs prediction result

In [None]:
def revert_back(hot_y):
    # Revert one-hot encoded representation to original labels
    reversed_labels = np.argmax(hot_y, axis=1)

    # Transform the reversed labels back to the original integer labels
    original_labels = encoder.inverse_transform(reversed_labels)
    
    return original_labels

In [None]:
rb_pred = revert_back(pred)
rb_Y_test = revert_back(Y_test)

In [None]:
df_pred = pd.DataFrame(np.array(list(zip(rb_Y_test, rb_pred))), columns=[["generated", "prediction"]])

In [None]:
df_pred['generated'].value_counts()

In [None]:
df_pred['prediction'].value_counts()

### Evaluate the predicted label with the generated label

In [None]:
score = loaded_model.evaluate(X_test, Y_test)
print("Accuracy \t: {:.2f}".format(score[1]*100))
print("Loss \t\t: {:.2f}".format(score[0]*100))