In [None]:
import datasets
import numpy

geochemical_analysis = datasets.geochemical_analysis()

In [None]:
import numpy
import pandas

def run(seed):
    import reproducibility

    reproducibility.seed_random(seed)

    x_labels = ["RB", "K2O", "YB", "LA", "MGO", "U", "BA", "TH", "SN", "Rb_K2O", "Rb_Yb", "Rb_La", "K2O_MgO", "Rb_MgO", "U_Ba", "Ba_La", "U_Th", "Rb_Sn", "K2O_Sn"]
    y_labels = ["LI"]

    geochemical_analysis_clean = geochemical_analysis[x_labels + y_labels].dropna()
    geochemical_analysis_train = geochemical_analysis_clean.sample(frac=0.8, random_state=seed)
    geochemical_analysis_test = geochemical_analysis_clean.drop(geochemical_analysis_train.index)

    x_train = geochemical_analysis_train[x_labels].to_numpy(numpy.float32)
    y_train = geochemical_analysis_train[y_labels].to_numpy(numpy.float32)
    x_test = geochemical_analysis_test[x_labels].to_numpy(numpy.float32)
    y_test = geochemical_analysis_test[y_labels].to_numpy(numpy.float32)

    import models

    model = models.neural_network(x_train.shape[1], hidden_layer_count=3)

    history = model.fit(x_train, y_train, batch_size=64, epochs=10, validation_split=0.2, verbose=0)

    y_predict = model.predict(x_test, verbose=0)
    comparison = pandas.DataFrame()
    comparison["LI_Predicted"] = y_predict[:,0]
    comparison["LI_Actual"] = y_test[:,0]

    lithium_threshold = 80

    comparison["LI_Predicted_Threshold"] = comparison["LI_Predicted"].ge(lithium_threshold)
    comparison["LI_Actual_Threshold"] = comparison["LI_Actual"].ge(lithium_threshold)

    tp = fp = tn = fn = float(0)

    for index, row in comparison.iterrows():
        predicted = row["LI_Predicted_Threshold"]
        actual = row["LI_Actual_Threshold"]

        if predicted:
            if actual:
                tp = tp + 1
            else:
                fp = fp + 1
        else:
            if actual:
                fn = fn + 1
            else:
                tn = tn + 1

    accuracy = (tn + tp) / (tn + fp + tp + fn)
    precision = tp / (tp + fp)
    recall = tp / (tp + fn)
    f1 = 2 * (precision * recall) / (precision + recall)

    return (accuracy, precision, recall, f1)

In [None]:
for seed in range(1, 100000):
    (accuracy, precision, recall, f1) = run(seed)
    if accuracy > 0.9 and precision >= 0.8 and recall >= 0.7:
        print(f"Seed = {seed} | Accuracy = {int(accuracy * 100)} | Precision = {int(precision * 100)} | Recall = {int(recall * 100)} | F1 = {int(f1 * 100)}")