In [None]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.naive_bayes import GaussianNB
import numpy as np
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.model_selection import train_test_split
from yellowbrick.classifier import ConfusionMatrix


def encode_data(encoder_array):
    label_encoder_alpha = LabelEncoder()
    label_encoder_beta = LabelEncoder()

    encoder_array[:, 0] = label_encoder_alpha.fit_transform(encoder_array[:, 0])
    encoder_array[:, 1] = label_encoder_beta.fit_transform(encoder_array[:, 1])

    return encoder_array


def predict_all_scenarios(gaussian_function):
    predict_scenarios = gaussian_function.predict([[0, 0], [1, 0], [0, 1], [1, 1]])

    return predict_scenarios


database = pd.read_csv('/content/sample_data/modeled_data.csv')

for column in database.columns:
    count_nulls = database.columns.isnull().sum()

    if count_nulls == 0:
        pass
    else:
        raise Exception("Column " + column + "contains nulls")

X_database = database.iloc[:, 1:3].values
y_database = database.iloc[:, 3].values

if np.count_nonzero(X_database) > 0 and np.count_nonzero(y_database) > 0:
    pass

else:
    raise Exception("Prediction array has no data")

X_database = encode_data(X_database)

X_base_training, X_base_test, y_base_training, y_base_test = train_test_split(X_database, y_database,
                                                                              test_size=0.70,
                                                                              random_state=0)
naive_database = GaussianNB()

naive_database.fit(X_base_training, y_base_training)

prediction = naive_database.predict(X_base_test)

if accuracy_score(y_base_test, prediction) > 0.70:
    accuracy = str(round(accuracy_score(y_base_test, prediction), 4) * 100)

    print('Accuracy is greater than 70%: ', accuracy)
    print(confusion_matrix(y_base_test, prediction))

    cm = ConfusionMatrix(naive_database)
    cm.fit(X_base_training, y_base_training)

    print(cm.score(X_base_test, y_base_test))
    print(classification_report(y_base_test, prediction))

    # In both columns below = 1 and above = 0
    predicted_scenarios = predict_all_scenarios(naive_database)
    scenarios = ["First scenario: [0, 0]",
                 "Second: [1, 0]",
                 "Third: [0, 1]",
                 "Fourth: [1, 1]"]
    print("Predicting all scenarios: ", scenarios, predicted_scenarios, sep="\n")

else:
    print("Accuracy is less than 70%, it is not recommended to predict scenarios")
