In [57]:
from __future__ import annotations

import itertools
import math
from pathlib import Path
import re
from typing import Any, Iterable, Optional, Union
import warnings
import sys

import geopandas
import matplotlib
import matplotlib.axes
import matplotlib.cm
import matplotlib.figure
import matplotlib.pyplot
import mpl_toolkits.mplot3d
import numpy
import pandas
import plotly.graph_objs
# import seaborn
from shapely import Point
from sklearn.linear_model import LinearRegression
from scipy.optimize import Bounds
import scipy.stats

sys.path.append('../Packages/tectoniq')
from tectoniq.datasets import Mercedes
from tectoniq.functions import IFunction, Polynomial, Power, PowerWithOffset
from tectoniq.geometry import distance, distance_to_line, distance_to_line_segment
from tectoniq.optimization import fit_function


from sklearn import metrics
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [58]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler, StandardScaler, RobustScaler

def min_max_scaling(data):
    """
    Perform Min-Max scaling on the input data.
    """
    scaler = MinMaxScaler()
    return pd.Series(scaler.fit_transform(data.values.reshape(-1, 1)).flatten(), index=data.index)

def standard_scaling(data):
    """
    Perform Standard scaling (z-score normalization) on the input data.
    """
    scaler = StandardScaler()
    return pd.Series(scaler.fit_transform(data.values.reshape(-1, 1)).flatten(), index=data.index)

def robust_scaling(data):
    """
    Perform Robust scaling on the input data.
    """
    scaler = RobustScaler()
    return pd.Series(scaler.fit_transform(data.values.reshape(-1, 1)).flatten(), index=data.index)

def log_transform(data):
    """
    Perform log transformation on the input data.
    Adds a small constant to avoid log(0).
    """
    return np.log(data + 1e-10)

def winsorization(data, limits=(0.05, 0.95)):
    """
    Perform Winsorization on the input data.
    """
    lower_limit = data.quantile(limits[0])
    upper_limit = data.quantile(limits[1])
    return data.clip(lower=lower_limit, upper=upper_limit)

def log_then_min_max(data):
    """
    Perform log transformation followed by Min-Max scaling.
    """
    log_data = log_transform(data)
    return min_max_scaling(log_data)

def winsor_then_min_max(data, limits=(0.05, 0.95)):
    """
    Perform Winsorization followed by Min-Max scaling.
    """
    winsor_data = winsorization(data, limits)
    return min_max_scaling(winsor_data)

    
def calculate_weights(differences, penalty_factor=1):
    # Higher penalty factor will penalize lower differences more
    return penalty_factor / (differences + penalty_factor)

def weighted_f1_score(data_frame, gold_threshold, ore_diff_threshold, penalty_factor=1):
    data_frame["Actual"] = data_frame[GOLD_COLUMN] >= gold_threshold
    data_frame["Predicted"] = data_frame[ORE_DIFFERENCE_COLUMN] <= ore_diff_threshold
    
    data_frame["Weight"] = calculate_weights(data_frame[ORE_DIFFERENCE_COLUMN], penalty_factor)
    
    tp = data_frame[data_frame["Actual"] & data_frame["Predicted"]]
    fp = data_frame[~data_frame["Actual"] & data_frame["Predicted"]]
    fn = data_frame[data_frame["Actual"] & ~data_frame["Predicted"]]
    tn = data_frame[~data_frame["Actual"] & ~data_frame["Predicted"]]
    
    weighted_tp = tp["Weight"].sum()
    weighted_fp = fp["Weight"].sum()
    weighted_fn = fn["Weight"].sum()
    weighted_tn = tn["Weight"].sum()
    
    weighted_accuracy = (weighted_tp + weighted_tn) / (weighted_tp + weighted_tn + weighted_fp + weighted_fn)
    weighted_precision = weighted_tp / (weighted_tp + weighted_fp)
    weighted_recall = weighted_tp / (weighted_tp + weighted_fn)
    
    if weighted_precision + weighted_recall == 0:
        return 0
    
    weighted_f1 = 2 * (weighted_precision * weighted_recall) / (weighted_precision + weighted_recall)
    
    return round(weighted_f1, 4), round(weighted_precision, 4), round(weighted_recall, 4), round(weighted_accuracy, 4), tp, fp, fn, tn

def penalty_function(ore_difference, max_penalty=10):
    return max_penalty / (ore_difference + 0.1)  

def custom_loss(data_frame, gold_threshold, ore_diff_threshold, max_penalty=10):
    data_frame["Actual"] = data_frame[GOLD_COLUMN] >= gold_threshold
    data_frame["Predicted"] = data_frame[ORE_DIFFERENCE_COLUMN] <= ore_diff_threshold
    
    false_positives = data_frame[~data_frame["Actual"] & data_frame["Predicted"]]
    false_negatives = data_frame[data_frame["Actual"] & ~data_frame["Predicted"]]
    
    fp_penalties = false_positives[ORE_DIFFERENCE_COLUMN].apply(penalty_function, max_penalty=max_penalty)
    fn_penalties = false_negatives[ORE_DIFFERENCE_COLUMN].apply(penalty_function, max_penalty=max_penalty)
    
    total_loss = fp_penalties.sum() + fn_penalties.sum()
    
    return total_loss

In [55]:
AREA_ID_COLUMN = Mercedes.AREA_ID_COLUMN
DOMAIN_ID_COLUMN = Mercedes.DOMAIN_ID_COLUMN
DOMAIN_NAME_COLUMN = Mercedes.DOMAIN_NAME_COLUMN
GEOMETRY_COLUMN = Mercedes.GEOMETRY_COLUMN
ID_COLUMN = Mercedes.ID_COLUMN
RADIUS: float = 20
SAMPLE_COUNT_THRESHOLD: int = 20
TARGET_COLUMN: str = "Au (ppm)"
THRESHOLD: float = 3

COPPER_COLUMN: str = "Cu (ppm)"
GOLD_COLUMN: str = "Au (ppm)"

COLUMN_FIT_OPTIONS: dict[str, dict[str, Any]] = {
    "(2Ca+Na+K)/Al": {},
    "K/Al": {},
    "As/Al": {},
    "K/(Al-Na)": {},
    "CCPI": {},
    "Ishikawa": {},
    "Na/Al": {},
    "Fe (%)": {},
    "Ca (%)": {},
    "Na (%)": {},
    "K (%)": {},
    "Al (%)": {},
    "As (ppm)": {},
    "Cu (ppm)": {},
    "Mg (%)": {}
}
COLUMN_LIMITS: dict[str, tuple[float, float]] = {
    "(2Ca+Na+K)/Al": (0, 2.5),
    "K/Al": (0, 1.5),
    "As/Al": (0, 2000),
    "K/(Al-Na)": (-0.25, 1.5),
    "CCPI": (0, 1),
    "Ishikawa": (0, 100),
    "Na/Al": (0, 1.5),
    "Fe (%)": (0, 10),
    "Ca (%)": (0, 30),
    "Na (%)": (0, 10),
    "K (%)": (0, 10),
    "Al (%)": (0, 10),
    "As (ppm)": (0, 600),
    "Cu (ppm)": (0, 300),
    "Mg (%)": (0, 5),
}

PLOT_TYPE_DENSITY: str = "density"

RATIO_COLUMNS: list[str] = ["(2Ca+Na+K)/Al", "K/Al", "As/Al", "K/(Al-Na)", "CCPI", "Ishikawa", "Na/Al"]
RATIO_CONSTITUTENT_COLUMNS: list[str] = ["Fe (%)", "Ca (%)", "Na (%)", "K (%)", "Al (%)", "As (ppm)", "Cu (ppm)", "Mg (%)"]
KEY_ELEMENT_COLUMNS: list[str] = ["Fe (%)", "Ca (%)", "Na (%)", "K (%)", "Al (%)", "Cu (ppm)", "Mg (%)"]
# KEY_ELEMENT_COLUMNS: list[str] = ["Fe (%)", "Ca (%)", "Na (%)", "K (%)", "Al (%)", "Mg (%)"]
# KEY_ELEMENT_COLUMNS: list[str] = ["Fe (%)", "Ca (%)", "Na (%)", "K (%)", "Al (%)", "Cu (ppm)", "Mg (%)", "Ishikawa", "CCPI"]

DOMAIN_1_SUBSET_LINE: tuple[Point, Point] = Point(549250, 3356800), Point(550000, 3355500)
NULL_DOMAIN_BOTTOM_LEFT = Point(551200, 3355600)
NULL_DOMAIN_TOP_RIGHT = Point(551500, 3355850)

AREA_1_RHYOLITE_SAMPLE_NUMBER = "MRS-016"
AREA_1_ANDESITE_SAMPLE_NUMBER = "MRS-032"
AREA_1_ORE_SAMPLE_NUMBER = "MRS-001"
AREA_2_LIMESTONE_SAMPLE_NUMBER = "MRS-023"
AREA_2_SILTSTONE_SAMPLE_NUMBER = "M023228"
KEY_SAMPLE_NUMBERS = [AREA_1_RHYOLITE_SAMPLE_NUMBER, AREA_1_ANDESITE_SAMPLE_NUMBER, AREA_1_ORE_SAMPLE_NUMBER, AREA_2_LIMESTONE_SAMPLE_NUMBER, AREA_2_SILTSTONE_SAMPLE_NUMBER]

matplotlib.interactive(True)
warnings.filterwarnings("ignore")

## Experiments

In [59]:
import os

exp_dir = "/Users/zeerakwyne/Documents/redmarble/experiments"
os.listdir(exp_dir)

all_exp = pandas.DataFrame()
for file in os.listdir(exp_dir):
    if file.endswith(".csv"):
        df = pandas.read_csv(os.path.join(exp_dir, file))
        df['Experiment Name'] = file.split('.csv')[0]
        all_exp = pandas.concat([all_exp, df])

# all_exp = all_exp.drop(['Start Time', 'Duration', 'Run ID', 'Source Type', 'Source Name', 'User', 'Status', 'Accuracy'], axis =1)
all_exp = all_exp[['Experiment Name', 'Name', 'difference_threshold', 'normalization_vector', 'target_threshold',
       'Weighted accuracy', 'Weighted f1', 'Weighted precision',
       'Weighted recall', 'fn', 'fp', 'tn', 'tp', 'Loss']]
all_exp = all_exp.sort_values(by='Weighted f1', ascending=False).dropna()

display(all_exp.head())

Unnamed: 0,Experiment Name,Name,difference_threshold,normalization_vector,target_threshold,Weighted accuracy,Weighted f1,Weighted precision,Weighted recall,fn,fp,tn,tp,Loss
70,Fe Ca Na K Al Mg CCPI Ishikawa As,salty-shoat-997,2.5,[ 1.65 11. 0.55 0.55 2.2 0.55 0.44 0....,45.0,0.6816,0.2749,0.2028,0.4268,158.0,283.0,1212.0,70.0,174.941447
170,Fe Ca Na K Al Mg CCPI Ishikawa K_(Al-Na) As,fearless-dolphin-408,2.5,[ 1.65 11. 0.55 0.55 2.2 0.55 0.44 0....,45.0,0.6838,0.2713,0.2011,0.4165,160.0,278.0,1217.0,68.0,172.409874
50,Fe Ca Na K Al Mg CCPI Ishikawa K_(Al-Na) As,indecisive-elk-411,2.5,[ 3.5 12. 0.6 0.6 4. 0.6 0.48 0....,45.0,0.5474,0.2686,0.174,0.5883,124.0,514.0,981.0,104.0,283.479339
0,Fe Ca Na K Al Mg CCPI As,placid-moth-877,2.5,[ 1.575 10.5 0.525 0.525 2.1 0.525 0....,45.0,0.6906,0.2665,0.2002,0.3986,164.0,263.0,1232.0,64.0,167.103023
40,Fe Ca Na K Al Mg CCPI As,funny-cow-508,2.5,[ 1.575 10.5 0.525 0.525 2.1 0.525 0....,45.0,0.6906,0.2665,0.2002,0.3986,164.0,263.0,1232.0,64.0,167.103023


In [60]:
all_exp.to_excel('Area2_copper_experiments.xlsx', index=False)


In [6]:
all_exp['Experiment Name'] = all_exp['Experiment Name'].str.split('-').str[-1].str.strip()

0                   Fe Ca Na K Al Mg
1                   Fe Ca Na K Al Mg
0                                Na)
1                                Na)
0     Fe Ca Na K Al Mg CCPI Ishikawa
                   ...              
74                  Fe Ca Na K Al Mg
75                  Fe Ca Na K Al Mg
76                  Fe Ca Na K Al Mg
77                  Fe Ca Na K Al Mg
78                  Fe Ca Na K Al Mg
Name: Experiment Name, Length: 263, dtype: object

In [16]:
def normalize_ratio_columns(data_frame):
    temp_df = data_frame.copy()
    temp_df['(2Ca+Na+K)/Al'] = min_max_scaling(temp_df['(2Ca+Na+K)/Al'])
    temp_df['K/Al'] = log_then_min_max(temp_df['K/Al'])
    temp_df['As/Al'] = log_then_min_max(temp_df['As/Al'])
    temp_df['K/(Al-Na)'] = winsor_then_min_max(temp_df['K/(Al-Na)'])
    temp_df['CCPI'] = min_max_scaling(temp_df['CCPI'])
    temp_df['Ishikawa'] = min_max_scaling(temp_df['Ishikawa'])
    temp_df['Na/Al'] = min_max_scaling(temp_df['Na/Al'])
    temp_df['As (ppm)']  = robust_scaling(temp_df['As (ppm)'])
    return temp_df

## Final Run

In [14]:
def get_units(column: str) -> Optional[str]:
    unit_match: re.Match = re.search(r"\((.+)\)", column)
    return unit_match.group() if unit_match else None

def print_df_summary(data_frame):
    total_rows = data_frame.shape[0]
    nan_count = data_frame[RATIO_COLUMNS + RATIO_CONSTITUTENT_COLUMNS].isna().sum()
    nan_percentage = (nan_count / total_rows) * 100
    nan_summary = pd.DataFrame({
        'NaN Count': nan_count,
        'NaN Percentage': nan_percentage
    })

    display(nan_summary.round(3))



In [17]:
data_frame: geopandas.GeoDataFrame = Mercedes.latest(Path("../Data"))

area_1_data_frame: geopandas.GeoDataFrame = data_frame[data_frame[AREA_ID_COLUMN] == 1]
area_2_data_frame: geopandas.GeoDataFrame = data_frame[data_frame[AREA_ID_COLUMN] == 2]
key_samples_data_frame: geopandas.GeoDataFrame = data_frame[data_frame[ID_COLUMN].isin(KEY_SAMPLE_NUMBERS)]

area_1_data_frame = normalize_ratio_columns(area_1_data_frame)
area_2_data_frame = normalize_ratio_columns(area_2_data_frame)

AREA_1_RHYOLITE_SAMPLE_NUMBER = "MRS-016"
AREA_1_ANDESITE_SAMPLE_NUMBER = "MRS-032"
AREA_1_ORE_SAMPLE_NUMBER = "MRS-001"
AREA_2_LIMESTONE_SAMPLE_NUMBER = "MRS-023"
AREA_2_SILTSTONE_SAMPLE_NUMBER = "M023228"
AREA_2_ORE_SAMPLE_NUMBER = "695588"

In [18]:
print(f"Initial Data Shape: {area_1_data_frame.shape}")
print(f"Initial Data Shape: {area_2_data_frame.shape}")

Initial Data Shape: (3443, 72)
Initial Data Shape: (1723, 72)


In [19]:
area_1_data_frame = area_1_data_frame[~((area_1_data_frame['Fe (%)'] == 0) | (area_1_data_frame['Fe (%)'].isna())) & ~(area_1_data_frame['Ca (%)'] == 0)]
area_1_data_frame = area_1_data_frame[~(area_1_data_frame['Na (%)'].isna())].reset_index(drop=True)
area_2_data_frame = area_2_data_frame[~((area_2_data_frame['Fe (%)'] == 0) | (area_2_data_frame['Fe (%)'].isna())) & ~(area_2_data_frame['Ca (%)'] == 0)]
area_2_data_frame = area_2_data_frame[~(area_2_data_frame['Na (%)'].isna())].reset_index(drop=True)


In [20]:
print(f"Initial Data Shape: {area_1_data_frame.shape}")
print(f"Initial Data Shape: {area_2_data_frame.shape}")

Initial Data Shape: (3401, 72)
Initial Data Shape: (1723, 72)


In [11]:
KEY_ELEMENT_COLUMNS = ['Fe (%)', 'Ca (%)', 'Na (%)', 'K (%)', 'Al (%)', 'Cu (ppm)', 'Mg (%)', 'CCPI']
AREA_1_ORE_VECTOR: numpy.array = numpy.array([2, 1, 0, 3, 3, 130, 0, 1])
AREA_1_ANDESITE_VECTOR: numpy.array = numpy.array([5, 4.5, 3, 2.5, 8.5, 30, 1.5, 0.5])
AREA_1_RHYOLITE_VECTOR: numpy.array = numpy.array([5, 5, 3, 2, 7.5, 5, 3, 0.5])
NORMALIZATION_VECTOR: numpy.array = numpy.array([6,6,4,3.5,10.5, 25, 4.5,0.6])

AREA_1_ORE_NORMALIZED_VECTOR: numpy.array = numpy.divide(AREA_1_ORE_VECTOR, NORMALIZATION_VECTOR)
AREA_1_ANDESITE_NORMALIZED_VECTOR: numpy.array = numpy.divide(AREA_1_ANDESITE_VECTOR, NORMALIZATION_VECTOR)
AREA_1_RHYOLITE_NORMALIZED_VECTOR: numpy.array = numpy.divide(AREA_1_RHYOLITE_VECTOR, NORMALIZATION_VECTOR)

NORMALIZED_VECTOR_COLUMN: str = "NormalizedVector"
ORE_DIFFERENCE_COLUMN: str = "OreDifference"

area_1_data_frame[NORMALIZED_VECTOR_COLUMN] = area_1_data_frame[KEY_ELEMENT_COLUMNS].apply(lambda x: numpy.divide(x.to_numpy(), NORMALIZATION_VECTOR), axis = 1)
area_1_data_frame[ORE_DIFFERENCE_COLUMN] = area_1_data_frame[NORMALIZED_VECTOR_COLUMN].apply(lambda x: numpy.linalg.norm(x - AREA_1_ORE_NORMALIZED_VECTOR))

In [12]:
GOLD_THRESHOLD = 2.5
DIFFERENCE_THRESHOLD = 2.5

area_1_data_frame["Actual"] = area_1_data_frame[GOLD_COLUMN].apply(lambda x: x>=GOLD_THRESHOLD)
area_1_data_frame["Predicted"] = area_1_data_frame[ORE_DIFFERENCE_COLUMN].apply(lambda x: x<=DIFFERENCE_THRESHOLD)

tp: int = len(area_1_data_frame[area_1_data_frame["Actual"] & area_1_data_frame["Predicted"]])
fp: int = len(area_1_data_frame[~area_1_data_frame["Actual"] & area_1_data_frame["Predicted"]])
fn: int = len(area_1_data_frame[area_1_data_frame["Actual"] & ~area_1_data_frame["Predicted"]])
tn: int = len(area_1_data_frame[~area_1_data_frame["Actual"] & ~area_1_data_frame["Predicted"]])

accuracy: float = (tp + tn) / (tp + tn + fp + fn)
precision: float = tp / (tp + fp)
recall: float = tp / (tp + fn)
f1_score: float = (2 * precision * recall) / (precision + recall)

print(f"True Positives = {round(tp, 4)} | False Positives = {round(fp, 4)} | False Negatives = {round(fn, 4)} | True Negatives = {round(tn, 4)}")
print(f"Accuracy = {round(accuracy, 4)} | Precision = {round(precision, 4)} | Recall = {round(recall, 4)} | F1 Score = {round(f1_score, 4)}")

area_1_data_frame['classification'] = ''
area_1_data_frame.loc[area_1_data_frame['Actual'] & area_1_data_frame['Predicted'], 'classification'] = 'TP'
area_1_data_frame.loc[area_1_data_frame['Actual'] & ~area_1_data_frame['Predicted'], 'classification'] = 'FN'
area_1_data_frame.loc[~area_1_data_frame['Actual'] & area_1_data_frame['Predicted'], 'classification'] = 'FP'
area_1_data_frame.loc[~area_1_data_frame['Actual'] & ~area_1_data_frame['Predicted'], 'classification'] = 'TN'

True Positives = 30 | False Positives = 136 | False Negatives = 85 | True Negatives = 3150
Accuracy = 0.935 | Precision = 0.1807 | Recall = 0.2609 | F1 Score = 0.2135


In [61]:
all_exp['Experiment Name'].unique()

array(['Fe Ca Na K Al Mg CCPI Ishikawa As',
       'Fe Ca Na K Al Mg CCPI Ishikawa K_(Al-Na) As',
       'Fe Ca Na K Al Mg CCPI As'], dtype=object)

In [62]:
# Mapping dictionary
mapping = {
    # 'Fe Ca Na K Al Cu Mg CCPI': ['Fe (%)', 'Ca (%)', 'Na (%)', 'K (%)', 'Al (%)', 'Cu (ppm)', 'Mg (%)', 'CCPI'],
    # 'Fe Ca Na K Al Cu Mg CCPI Ishikawa NaAl': ['Fe (%)', 'Ca (%)', 'Na (%)', 'K (%)', 'Al (%)', 'Cu (ppm)', 'Mg (%)', 'CCPI', 'Ishikawa', 'Na/Al'],
    # 'Fe Ca Na K Al Cu Mg CCPI Ishikawa': ['Fe (%)', 'Ca (%)', 'Na (%)', 'K (%)', 'Al (%)', 'Cu (ppm)', 'Mg (%)', 'CCPI', 'Ishikawa'],
    # 'Fe K Al Cu CCPI K_(Al-Na) As_Al': ['Fe (%)', 'K (%)', 'Al (%)', 'Cu (ppm)', 'CCPI', 'K/(Al-Na)', 'As/Al'],
    # 'Fe Ca Na K Al Mg' : ['Fe (%)', 'Ca (%)', 'Na (%)', 'K (%)', 'Al (%)', 'Mg (%)'],
    # 'Fe Ca Na K Al Mg CCPI Ishikawa' : ['Fe (%)', 'Ca (%)', 'Na (%)', 'K (%)', 'Al (%)', 'Mg (%)', 'CCPI', 'Ishikawa'],
    # 'Fe Ca Na K Al Mg CCPI Ishikawa K_(Al-Na)' : ['Fe (%)', 'Ca (%)', 'Na (%)', 'K (%)', 'Al (%)', 'Mg (%)', 'CCPI', 'Ishikawa', 'K/(Al-Na)'],
    'Fe Ca Na K Al Mg CCPI Ishikawa K_(Al-Na) As' : ['Fe (%)', 'Ca (%)', 'Na (%)', 'K (%)', 'Al (%)', 'Mg (%)', 'CCPI', 'Ishikawa', 'K/(Al-Na)', 'As (ppm)'],
    'Fe Ca Na K Al Mg CCPI Ishikawa As' : ['Fe (%)', 'Ca (%)', 'Na (%)', 'K (%)', 'Al (%)', 'Mg (%)', 'CCPI', 'Ishikawa','As (ppm)'],
    'Fe Ca Na K Al Mg CCPI As' : ['Fe (%)', 'Ca (%)', 'Na (%)', 'K (%)', 'Al (%)', 'Mg (%)', 'CCPI','As (ppm)']
}


all_exp['Key Columns'] = all_exp['Experiment Name'].map(mapping)

mapping = {
    # 'Fe Ca Na K Al Cu Mg CCPI': [1, 3, 0, 0.5, 1.5, 15, 0, 0.5],
    # 'Fe Ca Na K Al Cu Mg CCPI Ishikawa NaAl': [1, 3, 0, 0.5, 1.5, 15, 0, 0.5, 0.7, 0],
    # 'Fe Ca Na K Al Cu Mg CCPI Ishikawa': [1, 3, 0, 0.5, 1.5, 15, 0, 0.5, 0.7],
    # 'Fe K Al Cu CCPI K_(Al-Na) As_Al': [2, 5, 3, 130, 1, 0.6, 0.8],
    # 'Fe Ca Na K Al Mg' : [3, 1, 0, 3, 5, 0],
    # 'Fe Ca Na K Al Mg CCPI Ishikawa' : [3, 1, 0, 3, 5, 0, 1, 1],
    # 'Fe Ca Na K Al Mg CCPI Ishikawa K_(Al-Na)' : [3, 1, 0, 3, 5, 0, 1, 1, 1],
    'Fe Ca Na K Al Mg CCPI Ishikawa K_(Al-Na) As' : [4, 4, 1, 2, 6, 1, 0.6, 0.5, 0.5, 1],
    'Fe Ca Na K Al Mg CCPI Ishikawa As' : [4, 4, 1, 2, 6, 1, 0.6, 0.5, 1],
    'Fe Ca Na K Al Mg CCPI As' : [4, 4, 1, 2, 6, 1, 0.6, 1]
}

all_exp['area_1_ore'] = all_exp['Experiment Name'].map(mapping)
# Create the new column 'Key Columns'


In [71]:
all_exp = all_exp[all_exp['target_threshold'] == 45].sort_values(by='Weighted f1', ascending=False).head(100)

# all_exp = all_exp.sort_values(by='Weighted f1', ascending=False).head(100)

In [72]:
all_exp

Unnamed: 0,Experiment Name,Name,difference_threshold,normalization_vector,target_threshold,Weighted accuracy,Weighted f1,Weighted precision,Weighted recall,fn,fp,tn,tp,Loss,Key Columns,area_1_ore
70,Fe Ca Na K Al Mg CCPI Ishikawa As,salty-shoat-997,2.5,[ 1.65 11. 0.55 0.55 2.2 0.55 0.44 0....,45.0,0.6816,0.2749,0.2028,0.4268,158.0,283.0,1212.0,70.0,174.941447,"[Fe (%), Ca (%), Na (%), K (%), Al (%), Mg (%)...","[4, 4, 1, 2, 6, 1, 0.6, 0.5, 1]"
170,Fe Ca Na K Al Mg CCPI Ishikawa K_(Al-Na) As,fearless-dolphin-408,2.5,[ 1.65 11. 0.55 0.55 2.2 0.55 0.44 0....,45.0,0.6838,0.2713,0.2011,0.4165,160.0,278.0,1217.0,68.0,172.409874,"[Fe (%), Ca (%), Na (%), K (%), Al (%), Mg (%)...","[4, 4, 1, 2, 6, 1, 0.6, 0.5, 0.5, 1]"
50,Fe Ca Na K Al Mg CCPI Ishikawa K_(Al-Na) As,indecisive-elk-411,2.5,[ 3.5 12. 0.6 0.6 4. 0.6 0.48 0....,45.0,0.5474,0.2686,0.1740,0.5883,124.0,514.0,981.0,104.0,283.479339,"[Fe (%), Ca (%), Na (%), K (%), Al (%), Mg (%)...","[4, 4, 1, 2, 6, 1, 0.6, 0.5, 0.5, 1]"
40,Fe Ca Na K Al Mg CCPI As,funny-cow-508,2.5,[ 1.575 10.5 0.525 0.525 2.1 0.525 0....,45.0,0.6906,0.2665,0.2002,0.3986,164.0,263.0,1232.0,64.0,167.103023,"[Fe (%), Ca (%), Na (%), K (%), Al (%), Mg (%)...","[4, 4, 1, 2, 6, 1, 0.6, 1]"
0,Fe Ca Na K Al Mg CCPI As,placid-moth-877,2.5,[ 1.575 10.5 0.525 0.525 2.1 0.525 0....,45.0,0.6906,0.2665,0.2002,0.3986,164.0,263.0,1232.0,64.0,167.103023,"[Fe (%), Ca (%), Na (%), K (%), Al (%), Mg (%)...","[4, 4, 1, 2, 6, 1, 0.6, 1]"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4,Fe Ca Na K Al Mg CCPI Ishikawa K_(Al-Na) As,upset-shrimp-885,1.5,[ 2. 10.5 0.525 0.525 2.5 0.525 0....,45.0,0.8502,0.0970,0.3305,0.0569,221.0,15.0,1480.0,7.0,78.340319,"[Fe (%), Ca (%), Na (%), K (%), Al (%), Mg (%)...","[4, 4, 1, 2, 6, 1, 0.6, 0.5, 0.5, 1]"
44,Fe Ca Na K Al Mg CCPI Ishikawa K_(Al-Na) As,masked-eel-995,1.5,[ 2. 10.5 0.525 0.525 2.5 0.525 0....,45.0,0.8502,0.0970,0.3305,0.0569,221.0,15.0,1480.0,7.0,78.340319,"[Fe (%), Ca (%), Na (%), K (%), Al (%), Mg (%)...","[4, 4, 1, 2, 6, 1, 0.6, 0.5, 0.5, 1]"
174,Fe Ca Na K Al Mg CCPI Ishikawa K_(Al-Na) As,handsome-goat-896,1.5,[ 1.65 11. 0.55 0.55 2.2 0.55 0.44 0....,45.0,0.8506,0.0956,0.3309,0.0559,221.0,15.0,1480.0,7.0,79.313766,"[Fe (%), Ca (%), Na (%), K (%), Al (%), Mg (%)...","[4, 4, 1, 2, 6, 1, 0.6, 0.5, 0.5, 1]"
94,Fe Ca Na K Al Mg CCPI Ishikawa As,honorable-skunk-360,1.5,[ 1.5 10. 0.5 0.5 2. 0.5 0.4 0.3 0.5],45.0,0.8618,0.0925,0.6657,0.0497,222.0,3.0,1492.0,6.0,66.375780,"[Fe (%), Ca (%), Na (%), K (%), Al (%), Mg (%)...","[4, 4, 1, 2, 6, 1, 0.6, 0.5, 1]"


In [38]:
import re

In [69]:
AREA_2_ORE_VECTOR: numpy.array = numpy.array([1, 3, 0, 0.5, 1.5, 15, 0, 0.5, 0.7, 0])
NORMALIZATION_VECTOR: numpy.array = np.array([3, 5, 1, 2, 6, 30, 1, 0.5, 0.8, 0.2])
AREA_1_ORE_NORMALIZED_VECTOR: numpy.array = numpy.divide(AREA_2_ORE_VECTOR, NORMALIZATION_VECTOR)



In [79]:
area_1_data_frame = area_2_data_frame.copy()

In [70]:
temp_df['area_1_ore']

96    [2, 1, 1, 5, 3, 130, 0.5, 0.5, 1]
17    [2, 1, 1, 5, 3, 130, 0.5, 0.5, 1]
97    [2, 1, 1, 5, 3, 130, 0.5, 0.5, 1]
57    [2, 1, 1, 5, 3, 130, 0.5, 0.5, 1]
18    [2, 1, 1, 5, 3, 130, 0.5, 0.5, 1]
19    [2, 1, 1, 5, 3, 130, 0.5, 0.5, 1]
58    [2, 1, 1, 5, 3, 130, 0.5, 0.5, 1]
16    [2, 1, 1, 5, 3, 130, 0.5, 0.5, 1]
98    [2, 1, 1, 5, 3, 130, 0.5, 0.5, 1]
59    [2, 1, 1, 5, 3, 130, 0.5, 0.5, 1]
99    [2, 1, 1, 5, 3, 130, 0.5, 0.5, 1]
56    [2, 1, 1, 5, 3, 130, 0.5, 0.5, 1]
94    [2, 1, 1, 5, 3, 130, 0.5, 0.5, 1]
95    [2, 1, 1, 5, 3, 130, 0.5, 0.5, 1]
93    [2, 1, 1, 5, 3, 130, 0.5, 0.5, 1]
92    [2, 1, 1, 5, 3, 130, 0.5, 0.5, 1]
Name: area_1_ore, dtype: object

In [63]:
all_exp['area_1_ore']

0      [2, 1, 1, 5, 3, 130, 0.5, 0.5, 1, 0]
1      [2, 1, 1, 5, 3, 130, 0.5, 0.5, 1, 0]
2      [2, 1, 1, 5, 3, 130, 0.5, 0.5, 1, 0]
3      [2, 1, 1, 5, 3, 130, 0.5, 0.5, 1, 0]
111              [2, 1, 0, 3, 3, 130, 0, 1]
                       ...                 
136              [2, 1, 0, 3, 3, 130, 0, 1]
46     [2, 1, 1, 5, 3, 130, 0.5, 0.5, 1, 0]
78               [2, 1, 0, 3, 3, 130, 0, 1]
139              [2, 1, 0, 3, 3, 130, 0, 1]
47     [2, 1, 1, 5, 3, 130, 0.5, 0.5, 1, 0]
Name: area_1_ore, Length: 100, dtype: object

3

In [81]:

COPPER_THRESHOLD = 45
# DIFFERENCE_THRESHOLD = 2.5

exp_count = 0
for key_columns in all_exp['Key Columns'].astype(str).unique():
    temp_df = all_exp[all_exp['Key Columns'].astype(str) == key_columns].copy()
    for i in range(temp_df.shape[0]):
        KEY_ELEMENT_COLUMNS = eval(key_columns)
        TARGET_THRESHOLD = temp_df.iloc[i]['target_threshold']
        DIFFERENCE_THRESHOLD = temp_df.iloc[i]['difference_threshold']
        AREA_1_ORE_VECTOR = temp_df.iloc[i]['area_1_ore']
        NORMALIZATION_VECTOR = [float(num) for num in re.findall(r"[-+]?\d*\.\d+|\d+", temp_df.iloc[i]['normalization_vector'])]

        AREA_1_ORE_NORMALIZED_VECTOR: numpy.array = numpy.divide(AREA_1_ORE_VECTOR, NORMALIZATION_VECTOR)
        print(f"GOLD: {TARGET_THRESHOLD}\nNORM VECTOR: {NORMALIZATION_VECTOR}\nCOL: {KEY_ELEMENT_COLUMNS}")
        
        NORMALIZED_VECTOR_COLUMN: str = "NormalizedVector"
        ORE_DIFFERENCE_COLUMN: str = "OreDifference"

        area_1_data_frame[NORMALIZED_VECTOR_COLUMN] = area_1_data_frame[KEY_ELEMENT_COLUMNS].apply(lambda x: numpy.divide(x.to_numpy(), NORMALIZATION_VECTOR), axis = 1)
        area_1_data_frame[ORE_DIFFERENCE_COLUMN] = area_1_data_frame[NORMALIZED_VECTOR_COLUMN].apply(lambda x: numpy.linalg.norm(x - AREA_1_ORE_NORMALIZED_VECTOR))

        area_1_data_frame["Actual"] = area_1_data_frame[COPPER_COLUMN].apply(lambda x: x>=COPPER_THRESHOLD)
        area_1_data_frame["Predicted"] = area_1_data_frame[ORE_DIFFERENCE_COLUMN].apply(lambda x: x<=DIFFERENCE_THRESHOLD)

        tp: int = len(area_1_data_frame[area_1_data_frame["Actual"] & area_1_data_frame["Predicted"]])
        fp: int = len(area_1_data_frame[~area_1_data_frame["Actual"] & area_1_data_frame["Predicted"]])
        fn: int = len(area_1_data_frame[area_1_data_frame["Actual"] & ~area_1_data_frame["Predicted"]])
        tn: int = len(area_1_data_frame[~area_1_data_frame["Actual"] & ~area_1_data_frame["Predicted"]])

        try:
            accuracy: float = (tp + tn) / (tp + tn + fp + fn)
        except ZeroDivisionError:
            accuracy = 0

        try:
            precision: float = tp / (tp + fp)
        except ZeroDivisionError:
            precision = 0

        try:
            recall: float = tp / (tp + fn)
        except ZeroDivisionError:
            recall = 0

        try:
            f1_score: float = (2 * precision * recall) / (precision + recall)
        except ZeroDivisionError:
            f1_score = 0

        print(f"True Positives = {round(tp, 4)} | False Positives = {round(fp, 4)} | False Negatives = {round(fn, 4)} | True Negatives = {round(tn, 4)}")
        print(f"Accuracy = {round(accuracy, 4)} | Precision = {round(precision, 4)} | Recall = {round(recall, 4)} | F1 Score = {round(f1_score, 4)}\n")

        col_name = f"exp_{exp_count}_{i}"
        area_1_data_frame[col_name] = ''
        area_1_data_frame.loc[area_1_data_frame['Actual'] & area_1_data_frame['Predicted'], col_name] = 'TP'
        area_1_data_frame.loc[area_1_data_frame['Actual'] & ~area_1_data_frame['Predicted'], col_name] = 'FN'
        area_1_data_frame.loc[~area_1_data_frame['Actual'] & area_1_data_frame['Predicted'], col_name] = 'FP'
        area_1_data_frame.loc[~area_1_data_frame['Actual'] & ~area_1_data_frame['Predicted'], col_name] = 'TN'
    exp_count+=1
    # break

GOLD: 45.0
NORM VECTOR: [1.65, 11.0, 0.55, 0.55, 2.2, 0.55, 0.44, 0.33, 0.55]
COL: ['Fe (%)', 'Ca (%)', 'Na (%)', 'K (%)', 'Al (%)', 'Mg (%)', 'CCPI', 'Ishikawa', 'As (ppm)']
True Positives = 70 | False Positives = 283 | False Negatives = 158 | True Negatives = 1212
Accuracy = 0.7441 | Precision = 0.1983 | Recall = 0.307 | F1 Score = 0.241

GOLD: 45.0
NORM VECTOR: [1.725, 11.5, 0.575, 0.575, 2.3, 0.575, 0.46, 0.345, 0.575]
COL: ['Fe (%)', 'Ca (%)', 'Na (%)', 'K (%)', 'Al (%)', 'Mg (%)', 'CCPI', 'Ishikawa', 'As (ppm)']
True Positives = 73 | False Positives = 343 | False Negatives = 155 | True Negatives = 1152
Accuracy = 0.711 | Precision = 0.1755 | Recall = 0.3202 | F1 Score = 0.2267

GOLD: 45.0
NORM VECTOR: [1.575, 10.5, 0.525, 0.525, 2.1, 0.525, 0.42, 0.315, 0.525]
COL: ['Fe (%)', 'Ca (%)', 'Na (%)', 'K (%)', 'Al (%)', 'Mg (%)', 'CCPI', 'Ishikawa', 'As (ppm)']
True Positives = 57 | False Positives = 239 | False Negatives = 171 | True Negatives = 1256
Accuracy = 0.762 | Precision = 0.1

In [82]:
area_1_data_frame

Unnamed: 0,SampleNumber,Easting,Northing,Latitude,Longitude,Elevation,Area,Type,Sampler,Au (ppm),...,exp_2_17,exp_2_18,exp_2_19,exp_2_20,exp_2_21,exp_2_22,exp_2_23,exp_2_24,exp_2_25,exp_2_26
0,606516,554915.000000,3.375534e+06,30.510858,-110.427675,,La Mesa Ranch,Channel,LHRG,0.0100,...,TN,TN,TN,TN,TN,TN,TN,TN,TN,TN
1,695001,557436.131684,3.377238e+06,30.526114,-110.401306,1272.0,San Antonio Ranch,Soil,PPV-JLO,0.0090,...,TN,TN,TN,TN,TN,TN,TN,TN,TN,TN
2,695002,557617.225660,3.377152e+06,30.525331,-110.399423,1275.0,San Antonio Ranch,Soil,PPV-JLO,0.0060,...,TN,TN,TN,TN,TN,TN,TN,TN,TN,TN
3,695003,557799.127065,3.377069e+06,30.524570,-110.397532,1316.0,San Antonio Ranch,Soil,PPV-JLO,0.0070,...,TN,TN,TN,TN,TN,TN,TN,TN,TN,TN
4,695004,557702.554000,3.377334e+06,30.526968,-110.398524,1286.0,San Antonio Ranch,Soil,PPV-JLO,0.0050,...,TN,TN,TN,TN,TN,TN,TN,TN,TN,TN
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1718,M023281,552192.000000,3.375676e+06,30.512261,-110.456045,1465.0,Arg.5_Molino,Chip-Rock,RS,0.0030,...,TN,TN,TN,TN,TN,TN,TN,TN,TN,TN
1719,M023282,551880.000000,3.375962e+06,30.514855,-110.459283,1423.0,Arg.5_Molino,Chip-Rock,RS,0.0005,...,TN,TN,TN,TN,TN,TN,TN,TN,TN,TN
1720,MRS-023,553747.101700,3.375146e+06,,,1371.0,La Mesa,Rock Chip,Tectoniq,0.0080,...,TN,TN,TN,TN,TN,TN,TN,TN,TN,TN
1721,MRS-024,554496.834800,3.376182e+06,,,1430.0,La Mesa,Rock Chip,Tectoniq,0.1240,...,TN,TN,TN,TN,TN,TN,TN,TN,TN,TN


In [88]:
exp_cols = [col for col in area_1_data_frame.columns if 'exp_' in col]

In [84]:
area_1_data_frame['TP'] = (area_1_data_frame[exp_cols] == 'TP').sum(axis=1)
area_1_data_frame['TN'] = (area_1_data_frame[exp_cols] == 'TN').sum(axis=1)
area_1_data_frame['FP'] = (area_1_data_frame[exp_cols] == 'FP').sum(axis=1)
area_1_data_frame['FN'] = (area_1_data_frame[exp_cols] == 'FN').sum(axis=1)

In [85]:
cols = list(area_2_data_frame.columns)
cols.extend(['TP', 'TN', 'FP', 'FN'])

In [92]:
area_1_data_frame[(area_1_data_frame['FP'] >= 30)].sort_values(by='FP', ascending=True)[cols].to_excel('CU_area2_45_TP_40%.xlsx', index=False)

In [47]:
area_1_data_frame

Unnamed: 0,SampleNumber,Easting,Northing,Latitude,Longitude,Elevation,Area,Type,Sampler,Au (ppm),...,exp_3_32,exp_3_33,exp_3_34,exp_3_35,exp_3_36,exp_3_37,TP,TN,FP,FN
0,7VSS-001,551950.927104,3.363731e+06,30.404486,-110.459154,,Oso Negro,Soil,"RG, R",0.0060,...,TN,TN,TN,TN,TN,TN,0,75,25,0
1,7VSS-002,551955.227104,3.363706e+06,30.404263,-110.459110,,Oso Negro,Soil,"RG, R",0.0025,...,TN,TN,TN,TN,TN,TN,0,71,29,0
2,7VSS-003,551959.627104,3.363682e+06,30.404041,-110.459065,,Oso Negro,Soil,"RG, R",0.0025,...,TN,TN,TN,TN,TN,TN,0,56,44,0
3,7VSS-004,551963.927104,3.363657e+06,30.403818,-110.459022,,Oso Negro,Soil,"RG, R",0.0100,...,TN,TN,TN,TN,TN,TN,0,56,44,0
4,7VSS-005,551968.227104,3.363633e+06,30.403596,-110.458978,,Oso Negro,Soil,"RG, R",0.0025,...,TN,TN,TN,TN,TN,TN,0,64,36,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3396,MRS-013,549057.226800,3.355597e+06,,,,Brecha Hill,Rock Chip,Tectoniq,0.0080,...,TN,TN,TN,TN,TN,TN,0,81,19,0
3397,MRS-026,551163.008700,3.357385e+06,,,1162.0,Bellota F,Rock Chip,Tectoniq,0.0720,...,FN,FN,FN,FN,FN,FN,41,0,0,59
3398,MRS-027,551158.888500,3.357368e+06,,,1174.0,Bellota F,Rock Chip,Tectoniq,0.0070,...,FN,FN,FN,FN,FN,FN,11,0,0,89
3399,MRS-029,552484.255100,3.358325e+06,,,1227.0,RdO W,Rock Chip,Tectoniq,0.0090,...,TN,TN,TN,TN,TN,TN,0,78,22,0


In [82]:
area_1_data_frame[(area_1_data_frame['FP'] >= 20)].sort_values(by='FP', ascending=True)[cols].to_excel('area2_data_frame_TP.xlsx', index=False)


In [126]:
counts = {col: {'TP': 0, 'TN': 0, 'FP': 0, 'FN': 0} for col in exp_cols}

In [127]:
for col in exp_cols:
    if col in area_1_data_frame.columns:
        counts[col] = area_1_data_frame[col].value_counts().to_dict()

counts_df = pd.DataFrame(counts).fillna(0).astype(int)

In [128]:
counts_df

Unnamed: 0,exp_0_0,exp_0_1,exp_0_2,exp_0_3,exp_0_4,exp_0_5,exp_0_6,exp_0_7,exp_0_8,exp_0_9,...,exp_2_34,exp_2_35,exp_2_36,exp_3_0,exp_3_1,exp_3_2,exp_3_3,exp_3_4,exp_3_5,exp_3_6
TN,3124,3050,3090,3180,3150,3152,3124,3152,3090,3074,...,3167,3137,3137,3033,2933,3096,3104,3134,3173,3057
FP,132,206,166,76,136,104,132,104,166,212,...,89,119,119,223,323,160,152,122,83,229
FN,111,105,108,123,85,119,111,119,108,81,...,124,115,115,105,95,111,113,115,125,81
TP,34,40,37,22,30,26,34,26,37,34,...,21,30,30,40,50,34,32,30,20,34


In [85]:
# Initialize a dictionary to store counts


# Count the occurrences of 'TP', 'TN', 'FP', 'FN' for each column
for col in experiment_columns:
    if col in area_1_data_frame.columns:
        counts[col] = area_1_data_frame[col].value_counts().to_dict()

# Convert counts dictionary to DataFrame for better visualization
counts_df = pd.DataFrame(counts).fillna(0).astype(int)

print(counts_df)

Unnamed: 0,SampleNumber,Easting,Northing,Latitude,Longitude,Elevation,Area,Type,Sampler,Au (ppm),...,Na/Al,Geometry,DomainId,DomainName,AreaId,NormalizedVector,OreDifference,Actual,Predicted,classification
0,7VSS-001,551950.927104,3.363731e+06,30.404486,-110.459154,,Oso Negro,Soil,"RG, R",0.0060,...,0.028950,POINT (551950.927 3363731.127),3.0,San Martin-Lupita-Diluvio,1.0,"[0.20833333333333334, 0.03666666666666667, 0.0...",4.743750,False,False,TN
1,7VSS-002,551955.227104,3.363706e+06,30.404263,-110.459110,,Oso Negro,Soil,"RG, R",0.0025,...,0.032380,POINT (551955.227 3363706.427),3.0,San Martin-Lupita-Diluvio,1.0,"[0.2916666666666667, 0.07666666666666667, 0.04...",4.748061,False,False,TN
2,7VSS-003,551959.627104,3.363682e+06,30.404041,-110.459065,,Oso Negro,Soil,"RG, R",0.0025,...,0.023314,POINT (551959.627 3363681.827),3.0,San Martin-Lupita-Diluvio,1.0,"[0.355, 0.04, 0.04, 0.8914285714285715, 0.5933...",4.499411,False,False,TN
3,7VSS-004,551963.927104,3.363657e+06,30.403818,-110.459022,,Oso Negro,Soil,"RG, R",0.0100,...,0.017911,POINT (551963.927 3363657.227),3.0,San Martin-Lupita-Diluvio,1.0,"[0.3566666666666667, 0.02666666666666667, 0.03...",4.697573,False,False,TN
4,7VSS-005,551968.227104,3.363633e+06,30.403596,-110.458978,,Oso Negro,Soil,"RG, R",0.0025,...,0.090553,POINT (551968.227 3363632.627),3.0,San Martin-Lupita-Diluvio,1.0,"[0.36833333333333335, 0.06666666666666667, 0.1...",4.314926,False,False,TN
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3396,MRS-013,549057.226800,3.355597e+06,,,,Brecha Hill,Rock Chip,Tectoniq,0.0080,...,0.453792,POINT (549057.227 3355597.270),1.0,Mercedes,1.0,"[0.35833333333333334, 0.30333333333333334, 0.5...",4.908180,False,False,TN
3397,MRS-026,551163.008700,3.357385e+06,,,1162.0,Bellota F,Rock Chip,Tectoniq,0.0720,...,0.024725,POINT (551163.009 3357384.736),2.0,Klondike-Rey de Oro,1.0,"[0.2916666666666667, 0.03166666666666667, 0.03...",648.800333,False,False,TN
3398,MRS-027,551158.888500,3.357368e+06,,,1174.0,Bellota F,Rock Chip,Tectoniq,0.0070,...,0.000000,POINT (551158.889 3357367.956),2.0,Klondike-Rey de Oro,1.0,"[0.08666666666666667, 0.008333333333333333, 0....",1.959331,False,True,FP
3399,MRS-029,552484.255100,3.358325e+06,,,1227.0,RdO W,Rock Chip,Tectoniq,0.0090,...,0.027517,POINT (552484.255 3358325.156),2.0,Klondike-Rey de Oro,1.0,"[0.20666666666666667, 0.008333333333333333, 0....",3.892529,False,False,TN


In [50]:
temp_df.iloc[i]

Experiment Name                                  Fe Ca Na K Al Cu Mg CCPI
Name                                                     nervous-mole-575
difference_threshold                                                  2.5
normalization_vector            [ 6.   6.   4.   3.5 10.5 25.   4.5  0.6]
GOLD_THRESHOLD                                                        2.0
Weighted accuracy                                                  0.8892
Weighted f1                                                        0.2767
Weighted precision                                                   0.21
Weighted recall                                                    0.4056
fn                                                                  111.0
fp                                                                  132.0
tn                                                                 3120.0
tp                                                                   34.0
Loss                                  

Unnamed: 0,Experiment Name,Name,difference_threshold,normalization_vector,GOLD_THRESHOLD,Weighted accuracy,Weighted f1,Weighted precision,Weighted recall,fn,fp,tn,tp,Loss,Key Columns
23,Fe K Al Cu CCPI K_(Al-Na) As_Al,sedate-mare-463,2.0,[ 6.5 3.5 10.5 32.5 0.65 0.55 0.65],2.0,0.9020,0.2506,0.2001,0.3352,115.0,122.0,3130.0,30.0,118.718922,"[Fe (%), K (%), Al (%), Cu (ppm), CCPI, K/(Al-..."
71,Fe K Al Cu CCPI K_(Al-Na) As_Al,useful-loon-44,2.5,[ 6. 3. 10. 30. 0.6 0.5 0.6],2.0,0.8814,0.2464,0.1788,0.3962,108.0,178.0,3074.0,37.0,129.132228,"[Fe (%), K (%), Al (%), Cu (ppm), CCPI, K/(Al-..."
53,Fe K Al Cu CCPI K_(Al-Na) As_Al,spiffy-pig-802,2.0,[ 7. 4. 11. 35. 0.7 0.6 0.7],2.0,0.8907,0.2464,0.1856,0.3662,111.0,155.0,3097.0,34.0,142.579623,"[Fe (%), K (%), Al (%), Cu (ppm), CCPI, K/(Al-..."
1,Fe K Al Cu CCPI K_(Al-Na) As_Al,exultant-snipe-354,2.5,[ 5.5 2.5 9.5 30. 0.6 0.5 0.6],2.0,0.8861,0.2443,0.1802,0.3791,110.0,167.0,3085.0,35.0,121.538896,"[Fe (%), K (%), Al (%), Cu (ppm), CCPI, K/(Al-..."
41,Fe K Al Cu CCPI K_(Al-Na) As_Al,orderly-trout-95,2.5,[ 5.5 2.5 9.5 27.5 0.55 0.45 0.55],2.0,0.8928,0.2423,0.1853,0.3501,114.0,142.0,3110.0,31.0,106.370683,"[Fe (%), K (%), Al (%), Cu (ppm), CCPI, K/(Al-..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7,Fe K Al Cu CCPI K_(Al-Na) As_Al,blushing-bear-671,1.0,[ 5.5 2.5 9.5 30. 0.6 0.5 0.6],2.0,0.9426,0.0920,0.1989,0.0599,141.0,15.0,3237.0,4.0,61.732289,"[Fe (%), K (%), Al (%), Cu (ppm), CCPI, K/(Al-..."
77,Fe K Al Cu CCPI K_(Al-Na) As_Al,unequaled-boar-384,1.0,[ 6. 3. 10. 30. 0.6 0.5 0.6],2.0,0.9415,0.0898,0.1880,0.0590,141.0,16.0,3236.0,4.0,64.592948,"[Fe (%), K (%), Al (%), Cu (ppm), CCPI, K/(Al-..."
46,Fe K Al Cu CCPI K_(Al-Na) As_Al,tasteful-fawn-721,1.0,[ 5.5 2.5 9.5 27.5 0.55 0.45 0.55],2.5,0.9501,0.0823,0.1477,0.0571,112.0,16.0,3266.0,3.0,50.830113,"[Fe (%), K (%), Al (%), Cu (ppm), CCPI, K/(Al-..."
6,Fe K Al Cu CCPI K_(Al-Na) As_Al,painted-whale-146,1.0,[ 5.5 2.5 9.5 30. 0.6 0.5 0.6],2.5,0.9509,0.0819,0.1499,0.0564,112.0,16.0,3266.0,3.0,54.035875,"[Fe (%), K (%), Al (%), Cu (ppm), CCPI, K/(Al-..."


In [41]:
eval(key_columns)

['Fe (%)', 'K (%)', 'Al (%)', 'Cu (ppm)', 'CCPI', 'K/(Al-Na)', 'As/Al']

In [28]:
all_exp['Experiment Name'].unique()

array(['Fe Ca Na K Al Cu Mg CCPI',
       'Fe Ca Na K Al Cu Mg CCPI Ishikawa Na_Al',
       'Fe Ca Na K Al Cu Mg CCPI Ishikawa',
       'Fe K Al Cu CCPI K_(Al-Na) As_Al'], dtype=object)

In [None]:
['Fe (%)', 'Ca (%)', 'Na (%)', 'K (%)', 'Al (%)', 'Cu (ppm)', 'Mg (%)', 'CCPI']
['Fe (%)', 'Ca (%)', 'Na (%)', 'K (%)', 'Al (%)', 'Cu (ppm)', 'Mg (%)', 'CCPI', 'Ishikawa', 'Na/Al']
['Fe (%)', 'Ca (%)', 'Na (%)', 'K (%)', 'Al (%)', 'Cu (ppm)', 'Mg (%)', 'CCPI', 'Ishikawa']
['Fe (%)', 'K (%)', 'Al (%)', 'Cu (ppm)', 'CCPI', 'K/(Al-Na)', 'As/Al']