# Dependencies

In [6]:
from __future__ import annotations

import itertools
import math
from pathlib import Path
import re
from typing import Any, Iterable, Optional, Union
import warnings
import sys

import geopandas
import matplotlib
import matplotlib.axes
import matplotlib.cm
import matplotlib.figure
import matplotlib.pyplot
import mpl_toolkits.mplot3d
import numpy
import pandas
import plotly.graph_objs
# import seaborn
from shapely import Point
from sklearn.linear_model import LinearRegression
from scipy.optimize import Bounds
import scipy.stats

from tectoniq.datasets import Mercedes
from tectoniq.functions import IFunction, Polynomial, Power, PowerWithOffset
from tectoniq.geometry import distance, distance_to_line, distance_to_line_segment
from tectoniq.optimization import fit_function

%load_ext autoreload
%autoreload 2

ModuleNotFoundError: No module named 'tectoniq.datasets'

# Configuration

In [None]:
AREA_ID_COLUMN = Mercedes.AREA_ID_COLUMN
DOMAIN_ID_COLUMN = Mercedes.DOMAIN_ID_COLUMN
DOMAIN_NAME_COLUMN = Mercedes.DOMAIN_NAME_COLUMN
GEOMETRY_COLUMN = Mercedes.GEOMETRY_COLUMN
ID_COLUMN = Mercedes.ID_COLUMN
RADIUS: float = 20
SAMPLE_COUNT_THRESHOLD: int = 20
TARGET_COLUMN: str = "Au (ppm)"
THRESHOLD: float = 3

COPPER_COLUMN: str = "Cu (ppm)"
GOLD_COLUMN: str = "Au (ppm)"

COLUMN_FIT_OPTIONS: dict[str, dict[str, Any]] = {
    "(2Ca+Na+K)/Al": {},
    "K/Al": {},
    "As/Al": {},
    "K/(Al-Na)": {},
    "CCPI": {},
    "Ishikawa": {},
    "Na/Al": {},
    "Fe (%)": {},
    "Ca (%)": {},
    "Na (%)": {},
    "K (%)": {},
    "Al (%)": {},
    "As (ppm)": {},
    "Cu (ppm)": {},
    "Mg (%)": {}
}
COLUMN_LIMITS: dict[str, tuple[float, float]] = {
    "(2Ca+Na+K)/Al": (0, 2.5),
    "K/Al": (0, 1.5),
    "As/Al": (0, 2000),
    "K/(Al-Na)": (-0.25, 1.5),
    "CCPI": (0, 1),
    "Ishikawa": (0, 100),
    "Na/Al": (0, 1.5),
    "Fe (%)": (0, 10),
    "Ca (%)": (0, 30),
    "Na (%)": (0, 10),
    "K (%)": (0, 10),
    "Al (%)": (0, 10),
    "As (ppm)": (0, 600),
    "Cu (ppm)": (0, 300),
    "Mg (%)": (0, 5),
}

PLOT_TYPE_DENSITY: str = "density"

RATIO_COLUMNS: list[str] = ["(2Ca+Na+K)/Al", "K/Al", "As/Al", "K/(Al-Na)", "CCPI", "Ishikawa", "Na/Al"]
RATIO_CONSTITUTENT_COLUMNS: list[str] = ["Fe (%)", "Ca (%)", "Na (%)", "K (%)", "Al (%)", "As (ppm)", "Cu (ppm)", "Mg (%)"]
KEY_ELEMENT_COLUMNS: list[str] = ["Fe (%)", "Ca (%)", "Na (%)", "K (%)", "Al (%)", "Cu (ppm)", "Mg (%)"]

DOMAIN_1_SUBSET_LINE: tuple[Point, Point] = Point(549250, 3356800), Point(550000, 3355500)
NULL_DOMAIN_BOTTOM_LEFT = Point(551200, 3355600)
NULL_DOMAIN_TOP_RIGHT = Point(551500, 3355850)

AREA_1_RHYOLITE_SAMPLE_NUMBER = "MRS-016"
AREA_1_ANDESITE_SAMPLE_NUMBER = "MRS-032"
AREA_1_ORE_SAMPLE_NUMBER = "MRS-001"
AREA_2_LIMESTONE_SAMPLE_NUMBER = "MRS-023"
AREA_2_SILTSTONE_SAMPLE_NUMBER = "M023228"
KEY_SAMPLE_NUMBERS = [AREA_1_RHYOLITE_SAMPLE_NUMBER, AREA_1_RHYOLITE_SAMPLE_NUMBER, AREA_1_ORE_SAMPLE_NUMBER, AREA_2_LIMESTONE_SAMPLE_NUMBER, AREA_2_SILTSTONE_SAMPLE_NUMBER]

matplotlib.interactive(True)
warnings.filterwarnings("ignore")

# Utilities

In [None]:
def get_units(column: str) -> Optional[str]:
    unit_match: re.Match = re.search(r"\((.+)\)", column)
    return unit_match.group() if unit_match else None

# Data

In [None]:
data_frame: geopandas.GeoDataFrame = Mercedes.latest(Path("../Data"))

In [None]:
domain_1_data_frame: geopandas.GeoDataFrame = data_frame[data_frame[DOMAIN_ID_COLUMN] == 1]
domain_2_data_frame: geopandas.GeoDataFrame = data_frame[data_frame[DOMAIN_ID_COLUMN] == 2]
domain_3_data_frame: geopandas.GeoDataFrame = data_frame[data_frame[DOMAIN_ID_COLUMN] == 3]
domain_4_data_frame: geopandas.GeoDataFrame = data_frame[data_frame[DOMAIN_ID_COLUMN] == 4]
area_1_data_frame: geopandas.GeoDataFrame = data_frame[data_frame[AREA_ID_COLUMN] == 1]
area_2_data_frame: geopandas.GeoDataFrame = data_frame[data_frame[AREA_ID_COLUMN] == 2]
key_samples_data_frame: geopandas.GeoDataFrame = data_frame[data_frame[ID_COLUMN].isin(KEY_SAMPLE_NUMBERS)]

In [None]:
# def plot(data_frame, columns):
#     for index, row in data_frame[data_frame[TARGET_COLUMN].ge(0.5)].iterrows():
#         DISTANCE_COLUMN: str = "Distance"
#         NUMBER_OF_COLUMNS: int = 2
#         NUMBER_OF_ROWS: int = int(math.ceil(len(columns) / NUMBER_OF_COLUMNS))

#         subset_data_frame: geopandas.GeoDataFrame = area_2_data_frame.copy()
#         subset_data_frame[DISTANCE_COLUMN] = area_2_data_frame[GEOMETRY_COLUMN].apply(lambda other: distance(row[GEOMETRY_COLUMN], other))
#         subset_data_frame = subset_data_frame[subset_data_frame[DISTANCE_COLUMN].le(20)]

#         if len(subset_data_frame) > 3:
#             figure: matplotlib.figure.Figure
#             axes: numpy.ndarray[matplotlib.axes.Axes]
#             figure, axes = matplotlib.pyplot.subplots(nrows = NUMBER_OF_ROWS, ncols = NUMBER_OF_COLUMNS, figsize = (8, 8))

#             for index, column in enumerate(columns):
#                 y_min: Optional[float] = None
#                 y_max: Optional[float] = None

#                 if column in COLUMN_LIMITS:
#                     y_min = COLUMN_LIMITS[column][0]
#                     y_max = COLUMN_LIMITS[column][1]

#                 row_index: int = int(math.floor(index / NUMBER_OF_COLUMNS))
#                 column_index: int = index % NUMBER_OF_COLUMNS
#                 axes[row_index, column_index].scatter(subset_data_frame[DISTANCE_COLUMN], subset_data_frame[column])

#                 axes[row_index, column_index].set_ylabel(column)
#                 axes[row_index, column_index].set_ylim(y_min, y_max)
#                 axes[row_index, column_index].set_xlim(0, RADIUS)

#             figure.supxlabel("Distance (m)")

# plot(area_2_data_frame, RATIO_COLUMNS)



# Refinement

In [None]:
# domain_1_subset_data_frame: geopandas.GeoDataFrame = data_frame[data_frame[GEOMETRY_COLUMN].apply(lambda x: distance_to_line_segment(x, DOMAIN_1_SUBSET_LINE[0], DOMAIN_1_SUBSET_LINE[1])).le(200)][data_frame[TARGET_COLUMN].ge(THRESHOLD)]

# X: numpy.array[list[float]] = numpy.array(domain_1_subset_data_frame[GEOMETRY_COLUMN].apply(lambda x: [x.x]).to_list())
# y: numpy.array[float] = numpy.array(domain_1_subset_data_frame[GEOMETRY_COLUMN].apply(lambda x: x.y).tolist())

# linear_regression: LinearRegression = LinearRegression().fit(X, y)

# domain_1_subset_line_point_1_x: float = domain_1_subset_data_frame[GEOMETRY_COLUMN].apply(lambda x: x.x).min() - RADIUS
# domain_1_subset_line_point_1_y: float = linear_regression.predict([[domain_1_subset_line_point_1_x]])[0]
# domain_1_subset_line_point_2_x: float = domain_1_subset_data_frame[GEOMETRY_COLUMN].apply(lambda x: x.x).max() + RADIUS
# domain_1_subset_line_point_2_y: float = linear_regression.predict([[domain_1_subset_line_point_2_x]])[0]
# domain_1_subset_line = Point(domain_1_subset_line_point_1_x, domain_1_subset_line_point_1_y), Point(domain_1_subset_line_point_2_x, domain_1_subset_line_point_2_y)

# print(f"Adjusting manual subset line from ({DOMAIN_1_SUBSET_LINE[0].x}, {DOMAIN_1_SUBSET_LINE[0].y}), ({DOMAIN_1_SUBSET_LINE[1].x}, {DOMAIN_1_SUBSET_LINE[1].y}) -> ({domain_1_subset_line[0].x}, {domain_1_subset_line[0].y}), ({domain_1_subset_line[1].x}, {domain_1_subset_line[1].y})")

# Difference Measure

In [None]:
AREA_1_ORE_VECTOR: numpy.array = numpy.array([1, 0, 0, 0, 0.5, 45, 0])
AREA_1_ANDESITE_VECTOR: numpy.array = numpy.array([5, 4.5, 3, 2.5, 8.5, 30, 1])
AREA_1_RHYOLITE_VECTOR: numpy.array = numpy.array([5, 5, 3, 2, 7.5, 5, 5])
NORMALIZATION_VECTOR: numpy.array = numpy.array([5, 5, 5, 5, 10, 50, 5])
AREA_1_ORE_NORMALIZED_VECTOR: numpy.array = numpy.divide(AREA_1_ORE_VECTOR, NORMALIZATION_VECTOR)
AREA_1_ANDESITE_NORMALIZED_VECTOR: numpy.array = numpy.divide(AREA_1_ANDESITE_VECTOR, NORMALIZATION_VECTOR)
AREA_1_RHYOLITE_NORMALIZED_VECTOR: numpy.array = numpy.divide(AREA_1_RHYOLITE_VECTOR, NORMALIZATION_VECTOR)

NORMALIZED_VECTOR_COLUMN: str = "NormalizedVector"
ORE_DIFFERENCE_COLUMN: str = "OreDifference"

area_1_data_frame[NORMALIZED_VECTOR_COLUMN] = area_1_data_frame[KEY_ELEMENT_COLUMNS].apply(lambda x: numpy.divide(x.to_numpy(), NORMALIZATION_VECTOR), axis = 1)
area_1_data_frame[ORE_DIFFERENCE_COLUMN] = area_1_data_frame[NORMALIZED_VECTOR_COLUMN].apply(lambda x: numpy.linalg.norm(x - AREA_1_ORE_NORMALIZED_VECTOR))

In [None]:
interval_size = 0.2
bar_labels: list[str] = []
bar_x_positions = []
bar_heights = []
bar_widths = []
for interval_start in numpy.arange(0, 2, interval_size):
    interval_end = interval_start + interval_size
    subset = area_1_data_frame[area_1_data_frame[ORE_DIFFERENCE_COLUMN].ge(interval_start) & area_1_data_frame[ORE_DIFFERENCE_COLUMN].lt(interval_end)]
    bar_labels.append(f"{interval_start:1f}")
    bar_x_positions.append(interval_start + (interval_size / 2))
    bar_heights.append(subset[GOLD_COLUMN].mean())
    bar_widths.append(interval_size)
    print(f"{interval_start:2f} to {interval_end:2f}: {subset[GOLD_COLUMN].mean()} ({subset[GOLD_COLUMN].count()})")

figure, axes = matplotlib.pyplot.subplots(figsize = (8, 8))
figure.suptitle("Area 1 Histogram of Mean Au (ppm) against Ore Difference Measure")

axes.bar(bar_x_positions, bar_heights, width = bar_widths, label = bar_labels, linewidth = 0.5, edgecolor = "black")
axes.set_xlabel("Ore Difference Measure")
axes.set_ylabel("Mean Au (ppm)")

figure, axes = matplotlib.pyplot.subplots(nrows=1, ncols=2, figsize = (16, 8))
figure.suptitle("Comparison of High Gold and Low Ore Difference Measures")

x_min = area_1_data_frame[GEOMETRY_COLUMN].apply(lambda x: x.x).min()
x_max = area_1_data_frame[GEOMETRY_COLUMN].apply(lambda x: x.x).max()
y_min = area_1_data_frame[GEOMETRY_COLUMN].apply(lambda x: x.y).min()
y_max = area_1_data_frame[GEOMETRY_COLUMN].apply(lambda x: x.y).max()

for i in range(0, 2):
    axes[i].set_xlim(xmin = x_min, xmax = x_max)
    axes[i].set_ylim(ymin = y_min, ymax = y_max)

area_1_data_frame.plot(ax = axes[0], legend = True, color="white", markersize=9, linewidth=0.5, edgecolors="black")
area_1_data_frame[area_1_data_frame[GOLD_COLUMN].ge(3)].plot(ax=axes[0], legend = True, color="yellow", markersize = 25, linewidth = 0.5, edgecolors = "black")
axes[0].set_title("Au (ppm) >= 3 (Yellow)")

area_1_data_frame.plot(ax = axes[1], legend = True, color="white", markersize=9, linewidth=0.5, edgecolors="black")
area_1_data_frame[area_1_data_frame[ORE_DIFFERENCE_COLUMN].le(0.6)].plot(ax=axes[1], legend = True, cmap = "YlGn_r", markersize = 25, linewidth = 0.5, edgecolors = "black")
axes[1].set_title("Ore Difference Measure (Yellow = High, Green = Low)")

# Exploration

In [None]:
figure, axes = matplotlib.pyplot.subplots(nrows=1, ncols=1, figsize = (8, 8))

# Domains 1-4
axes.set_xlim(xmin = 545000, xmax = 565000)
axes.set_ylim(ymin = 3350000, ymax = 3382500)

data_frame.plot(ax=axes, legend=True, color="white", markersize=9, linewidth=0.5, edgecolors="black")

# High Gold
# data_frame[data_frame[TARGET_COLUMN].ge(3)].plot(ax=axes, legend=True, color="yellow", markersize=25, linewidth=0.5, edgecolors="black")

# Domains
data_frame[data_frame[DOMAIN_ID_COLUMN] == 1].plot(ax=axes, legend=True, color="red", markersize=25, linewidth=0.5, edgecolors="black")
data_frame[data_frame[DOMAIN_ID_COLUMN] == 2].plot(ax=axes, legend=True, color="green", markersize=25, linewidth=0.5, edgecolors="black")
data_frame[data_frame[DOMAIN_ID_COLUMN] == 3].plot(ax=axes, legend=True, color="blue", markersize=25, linewidth=0.5, edgecolors="black")
data_frame[data_frame[DOMAIN_ID_COLUMN] == 4].plot(ax=axes, legend=True, color="gray", markersize=25, linewidth=0.5, edgecolors="black")

# Areas
# data_frame[data_frame[AREA_ID_COLUMN] == 1].plot(ax=axes, legend=True, color="green", markersize=25, linewidth=0.5, edgecolors="black")
#data_frame[data_frame[AREA_ID_COLUMN] == 2].plot(ax=axes, legend=True, color="red", markersize=25, linewidth=0.5, edgecolors="black")

# Special Sample
# data_frame[data_frame['SampleNumber'] == 'M021872'].plot(ax=axes, legend=True, color="green", markersize=25, linewidth=0.5, edgecolors="black")

# Special Line
# data_frame[data_frame[GEOMETRY_COLUMN].apply(lambda x: distance_to_line_segment(x, domain_1_subset_line[0], domain_1_subset_line[1])).le(200)][data_frame[TARGET_COLUMN].ge(THRESHOLD)].plot(ax=axes, legend=True, color="yellow", markersize=25, linewidth=0.5, edgecolors="black")
# axes.plot([domain_1_subset_line[0].x, domain_1_subset_line[1].x], [domain_1_subset_line[0].y, domain_1_subset_line[1].y])

In [None]:
# Create plotly figure
fig = plotly.graph_objs.Figure()

fig.update_layout(
    autosize=False,
    width=700,
    height=900,
    legend=dict(
        x=0,
        y=1,
        traceorder="reversed",
        title_font_family="Times New Roman",
        font=dict(
            size=12,
            color="black"
        ),
        bgcolor="LightSteelBlue",
        bordercolor="Black",
        borderwidth=2
    ),
    scene_camera_eye=dict(x=10, y=10, z=1),
    # yaxis=dict(
    #     range=[0, 100]
    # ),
    xaxis=dict(
        range=[547000, 600000]
    )
)


# Plot all points
fig.add_trace(plotly.graph_objs.Scatter(
    x=data_frame[GEOMETRY_COLUMN].apply(lambda x: x.x),
    y=data_frame[GEOMETRY_COLUMN].apply(lambda x: x.y),
    mode='markers',
    name='All Points',
    marker=dict(color='white', size=4, line=dict(width=0.5, color='black')),
))

# Plot points above threshold

fig.add_trace(plotly.graph_objs.Scatter(
    x=data_frame[(data_frame[AREA_ID_COLUMN]==1)][GEOMETRY_COLUMN].apply(lambda x: x.x),
    y=data_frame[(data_frame[AREA_ID_COLUMN]==1)][GEOMETRY_COLUMN].apply(lambda x: x.y),
    mode='markers',
    name='Domain 1',
    marker=dict(color='lightgreen', size=4, line=dict(width=0.5, color='black')),
    hovertemplate =
    "Northing: %{x}<br>" + "Easting: %{y}<br>" 
    ))

fig.add_trace(plotly.graph_objs.Scatter(
    x=data_frame[(data_frame[AREA_ID_COLUMN]==1)&(data_frame[TARGET_COLUMN].ge(THRESHOLD))][GEOMETRY_COLUMN].apply(lambda x: x.x),
    y=data_frame[(data_frame[AREA_ID_COLUMN]==1)&(data_frame[TARGET_COLUMN].ge(THRESHOLD))][GEOMETRY_COLUMN].apply(lambda x: x.y),
    mode='markers',
    name='Area 1',
    marker=dict(color='green', size=5, line=dict(width=0.5, color='black')),
    hovertemplate =
    "Northing: %{x}<br>" + "Easting: %{y}<br>" 
))





fig.add_trace(plotly.graph_objs.Scatter(
    x=data_frame[(data_frame[AREA_ID_COLUMN]==2)][GEOMETRY_COLUMN].apply(lambda x: x.x),
    y=data_frame[(data_frame[AREA_ID_COLUMN]==2)][GEOMETRY_COLUMN].apply(lambda x: x.y),
    mode='markers',
    name='Area 2',
    marker=dict(color='red', size=5, line=dict(width=0.5, color='black')),
    hovertemplate =
    "Northing: %{x}<br>" + "Easting: %{y}<br>" 
))


fig.update_layout(
    xaxis=dict(range=[545000, 555000]),
    yaxis=dict(range=[3351000, 3366000])
)

fig.update_xaxes(title_text='Easting')
fig.update_yaxes(title_text='Northing')

# Show plot
fig.show()

In [None]:
# seaborn.kdeplot(data=data_frame[(data_frame[DOMAIN_ID_COLUMN] == 1 ) & (data_frame[TARGET_COLUMN].ge(THRESHOLD))], x='Na (%)', fill=True)
# seaborn.kdeplot(data=data_frame[(data_frame[DOMAIN_ID_COLUMN] == 1 ) & (data_frame[TARGET_COLUMN].lt(THRESHOLD))], x='Na (%)', fill=False, color='orange')


In [None]:
seaborn.kdeplot(data=data_frame[(data_frame[DOMAIN_ID_COLUMN] == 1 ) & (data_frame[TARGET_COLUMN].ge(THRESHOLD))], x='K (%)', fill=True)
seaborn.kdeplot(data=data_frame[(data_frame[DOMAIN_ID_COLUMN] == 1 ) & (data_frame[TARGET_COLUMN].lt(THRESHOLD))], x='K (%)', fill=False, color='orange')
# seaborn.kdeplot(data=data_frame[(data_frame[DOMAIN_ID_COLUMN] == 1 ) & (data_frame[TARGET_COLUMN].lt(THRESHOLD))], x='K (%)', fill=True)
# seaborn.kdeplot(data=data_frame[(data_frame[DOMAIN_ID_COLUMN] == 1 ) & (data_frame[TARGET_COLUMN].lt(THRESHOLD))], x='Ca (%)', fill=True)
# seaborn.kdeplot(data=data_frame[(data_frame[DOMAIN_ID_COLUMN] == 1 ) & (data_frame[TARGET_COLUMN].lt(THRESHOLD))], x='Mg (%)', fill=True)

In [None]:
# seaborn.kdeplot(data=data_frame[(data_frame[DOMAIN_ID_COLUMN] == 1 ) & (data_frame[TARGET_COLUMN].ge(THRESHOLD))], x='Ca (%)', fill=True)
# seaborn.kdeplot(data=data_frame[(data_frame[DOMAIN_ID_COLUMN] == 1 ) & (data_frame[TARGET_COLUMN].lt(THRESHOLD))], x='Ca (%)', fill=False, color='orange')

In [None]:
# seaborn.kdeplot(data=data_frame[(data_frame[DOMAIN_ID_COLUMN] == 1 ) & (data_frame[TARGET_COLUMN].ge(THRESHOLD))], x='Ishikawa', fill=True)
# seaborn.kdeplot(data=data_frame[(data_frame[DOMAIN_ID_COLUMN] == 1 ) & (data_frame[TARGET_COLUMN].lt(THRESHOLD))], x='Ishikawa', fill=False, color='orange')

In [None]:
# seaborn.kdeplot(data=data_frame[(data_frame[DOMAIN_ID_COLUMN] == 1 ) & (data_frame[TARGET_COLUMN].ge(THRESHOLD))], x='CCPI', fill=True)
# seaborn.kdeplot(data=data_frame[(data_frame[DOMAIN_ID_COLUMN] == 1 ) & (data_frame[TARGET_COLUMN].lt(THRESHOLD))], x='CCPI', fill=False, color='orange')

### Distance Density Plot

In [None]:
def create_density_plot(data_frame: geopandas.GeoDataFrame, columns: str):
    for index, row in data_frame.sort_values(by = [TARGET_COLUMN], ascending = [False]).iterrows():
        if row[TARGET_COLUMN] < THRESHOLD:
            break
        other_rows: geopandas.GeoDataFrame = data_frame[data_frame.index != index]
        other_rows = other_rows[other_rows[GEOMETRY_COLUMN].apply(lambda x: distance(x, row[GEOMETRY_COLUMN]) <= RADIUS)]

        if len(other_rows) < SAMPLE_COUNT_THRESHOLD:
            continue

        subset_index: pandas.Index = pandas.Index([index]).union(other_rows.index)
        subset_data_frame: geopandas.GeoDataFrame = data_frame.loc[subset_index]

        DISTANCE_COLUMN: str = "Distance"
        NUMBER_OF_COLUMNS: int = 2
        NUMBER_OF_ROWS: int = int(math.ceil(len(columns) / NUMBER_OF_COLUMNS))

        subset_data_frame[DISTANCE_COLUMN] = subset_data_frame.apply(lambda x: distance(x[GEOMETRY_COLUMN], row[GEOMETRY_COLUMN]), axis = 1)
        
        figure: matplotlib.figure.Figure
        axes: numpy.ndarray[matplotlib.axes.Axes]
        figure, axes = matplotlib.pyplot.subplots(nrows = NUMBER_OF_ROWS, ncols = NUMBER_OF_COLUMNS, figsize = (8, 8))

        for index, column in enumerate(columns):
            y_min: Optional[float] = None
            y_max: Optional[float] = None

            if column in COLUMN_LIMITS:
                y_min = COLUMN_LIMITS[column][0]
                y_max = COLUMN_LIMITS[column][1]
            
            row_index: int = int(math.floor(index/NUMBER_OF_COLUMNS))
            column_index: int = index % NUMBER_OF_COLUMNS
            seaborn.kdeplot(subset_data_frame, x=DISTANCE_COLUMN, y=column, ax=axes[row_index, column_index], fill=False, cmap="viridis", thresh=0)
            axes[row_index, column_index].axvline(x=15.206906, color='gray', label='axvline - % of full height')


            if column in COLUMN_FIT_OPTIONS:
                best_fit_subset_data_frame: geopandas.GeoDataFrame = subset_data_frame[subset_data_frame[column].apply(lambda x: (y_min is None or y_min <= x) and (y_max is None or x <= y_max))]
                linear_regression_result: scipy.stats.LinregressResult = scipy.stats.linregress(best_fit_subset_data_frame[DISTANCE_COLUMN], best_fit_subset_data_frame[column])
                best_fit_function: IFunction = Polynomial([linear_regression_result.intercept, linear_regression_result.slope])
                # best_fit_function: IFunction = fit_function(Polynomial, best_fit_subset_data_frame[DISTANCE_COLUMN], best_fit_subset_data_frame[column], bounds = Bounds([-1000, -1000], [100, 100]))
                best_fit_X, best_fit_Y = best_fit_function.to_discrete(best_fit_subset_data_frame[DISTANCE_COLUMN].min(), best_fit_subset_data_frame[DISTANCE_COLUMN].max(), 1000)
                axes[row_index, column_index].plot(best_fit_X, best_fit_Y, linestyle = "-", color = "red")
                axes[row_index, column_index].set_xlabel(str(best_fit_function))

        figure.supxlabel("Distance (m)")
        title: str = f"{row[ID_COLUMN]} - {row[TARGET_COLUMN]} {TARGET_COLUMN} - {len(other_rows)} others in {RADIUS}m"
        figure.suptitle(title)
        figure.tight_layout()
        figure.show()
        break

create_density_plot(data_frame, [TARGET_COLUMN] + RATIO_COLUMNS)

In [None]:
def create_null_domain_density_plots(data_frame: geopandas.GeoDataFrame, reference_points_data_frame: geopandas.GeoDataFrame, columns: list[str]):
    for index, row in reference_points_data_frame.iterrows():
        subset_data_frame: geopandas.DataFrame = data_frame.copy()

        DISTANCE_COLUMN: str = "Distance"
        NUMBER_OF_COLUMNS: int = 2
        NUMBER_OF_ROWS: int = int(math.ceil(len(columns) / NUMBER_OF_COLUMNS))

        subset_data_frame[DISTANCE_COLUMN] = subset_data_frame.apply(lambda x: distance(x[GEOMETRY_COLUMN], row[GEOMETRY_COLUMN]), axis = 1)
        
        figure: matplotlib.figure.Figure
        axes: numpy.ndarray[matplotlib.axes.Axes]
        figure, axes = matplotlib.pyplot.subplots(nrows = NUMBER_OF_ROWS, ncols = NUMBER_OF_COLUMNS, figsize = (8, 8))

        for index, column in enumerate(columns):            
            y_min: Optional[float] = None
            y_max: Optional[float] = None

            if column in COLUMN_LIMITS:
                y_min = COLUMN_LIMITS[column][0]
                y_max = COLUMN_LIMITS[column][1]

            
            row_index: int = int(math.floor(index/NUMBER_OF_COLUMNS))
            column_index: int = index % NUMBER_OF_COLUMNS
            seaborn.kdeplot(subset_data_frame, x=DISTANCE_COLUMN, y=column, ax=axes[row_index, column_index], fill=False, cmap="YlGn", thresh=0)

 
            axes[row_index, column_index].set_ylabel(column)
            axes[row_index, column_index].set_ylim(y_min, y_max)

            if column in COLUMN_FIT_OPTIONS:
                best_fit_subset_data_frame: geopandas.GeoDataFrame = subset_data_frame[subset_data_frame[column].apply(lambda x: (y_min is None or y_min <= x) and (y_max is None or x <= y_max))]
                # linear_regression_result: scipy.stats.LinregressResult = scipy.stats.linregress(best_fit_subset_data_frame[DISTANCE_COLUMN], best_fit_subset_data_frame[column])
                # best_fit_function: IFunction = Polynomial([linear_regression_result.intercept, linear_regression_result.slope])
                best_fit_function: IFunction = fit_function(Polynomial, best_fit_subset_data_frame[DISTANCE_COLUMN], best_fit_subset_data_frame[column], bounds = Bounds([0], [10000]))
                best_fit_X, best_fit_Y = best_fit_function.to_discrete(0, 150, 1000)
                axes[row_index, column_index].plot(best_fit_X, best_fit_Y, linestyle = "-", color = "red")
                axes[row_index, column_index].set_xlabel(str(best_fit_function))

            axes[row_index, column_index].set_xlim(0, 150)

        figure.supxlabel("Distance (m)")
        title: str = f"{row[ID_COLUMN]} - {row[TARGET_COLUMN]} {TARGET_COLUMN}"

        figure.suptitle(title)
        figure.tight_layout()
        figure.show()

# null_domain_data_frame: geopandas.GeoDataFrame = data_frame[data_frame[GEOMETRY_COLUMN].apply(lambda x: NULL_DOMAIN_BOTTOM_LEFT.x <= x.x and x.x <= NULL_DOMAIN_TOP_RIGHT.x and NULL_DOMAIN_BOTTOM_LEFT.y <= x.y and x.y <= NULL_DOMAIN_TOP_RIGHT.y)]
# null_domain_reference_data_frame: geopandas.GeoDataFrame = null_domain_data_frame[null_domain_data_frame[ID_COLUMN] == "M021872"]

# create_null_domain_density_plots(null_domain_data_frame, null_domain_data_frame[null_domain_data_frame[ID_COLUMN] == "M021872"], [TARGET_COLUMN] + RATIO_COLUMNS)
# create_null_domain_density_plots(null_domain_data_frame, null_domain_data_frame[null_domain_data_frame[ID_COLUMN] == "M021872"], RATIO_CONSTITUTENT_COLUMNS)

# Default Plots

In [None]:
def create_plots(data_frame: geopandas.GeoDataFrame, columns: list[str]):
    for index, row in data_frame.sort_values(by = [TARGET_COLUMN], ascending = [False]).iterrows():
        if row[TARGET_COLUMN] < THRESHOLD:
            break
        other_rows: geopandas.GeoDataFrame = data_frame[data_frame.index != index]
        other_rows = other_rows[other_rows[GEOMETRY_COLUMN].apply(lambda x: distance(x, row[GEOMETRY_COLUMN]) <= RADIUS)]

        if len(other_rows) < SAMPLE_COUNT_THRESHOLD:
            continue

        subset_index: pandas.Index = pandas.Index([index]).union(other_rows.index)
        subset_data_frame: geopandas.GeoDataFrame = data_frame.loc[subset_index]
        
        DISTANCE_COLUMN: str = "Distance"
        NUMBER_OF_COLUMNS: int = 2
        NUMBER_OF_ROWS: int = int(math.ceil(len(columns) / NUMBER_OF_COLUMNS))

        subset_data_frame[DISTANCE_COLUMN] = subset_data_frame.apply(lambda x: distance(x[GEOMETRY_COLUMN], row[GEOMETRY_COLUMN]), axis = 1)
        
        figure: matplotlib.figure.Figure
        axes: numpy.ndarray[matplotlib.axes.Axes]
        figure, axes = matplotlib.pyplot.subplots(nrows = NUMBER_OF_ROWS, ncols = NUMBER_OF_COLUMNS, figsize = (8, 8))

        for index, column in enumerate(columns):
            y_min: Optional[float] = None
            y_max: Optional[float] = None

            if column in COLUMN_LIMITS:
                y_min = COLUMN_LIMITS[column][0]
                y_max = COLUMN_LIMITS[column][1]

            row_index: int = int(math.floor(index / NUMBER_OF_COLUMNS))
            column_index: int = index % NUMBER_OF_COLUMNS
            axes[row_index, column_index].scatter(subset_data_frame[DISTANCE_COLUMN], subset_data_frame[column])
            
            if column in COLUMN_FIT_OPTIONS:
                best_fit_subset_data_frame: geopandas.GeoDataFrame = subset_data_frame[subset_data_frame[column].apply(lambda x: (y_min is None or y_min <= x) and (y_max is None or x <= y_max))]
                linear_regression_result: scipy.stats.LinregressResult = scipy.stats.linregress(best_fit_subset_data_frame[DISTANCE_COLUMN], best_fit_subset_data_frame[column])
                best_fit_function: IFunction = Polynomial([linear_regression_result.intercept, linear_regression_result.slope])
                # best_fit_function: IFunction = fit_function(Polynomial, best_fit_subset_data_frame[DISTANCE_COLUMN], best_fit_subset_data_frame[column], bounds = Bounds([-1000, -1000], [100, 100]))
                best_fit_X, best_fit_Y = best_fit_function.to_discrete(best_fit_subset_data_frame[DISTANCE_COLUMN].min(), best_fit_subset_data_frame[DISTANCE_COLUMN].max(), 1000)
                axes[row_index, column_index].plot(best_fit_X, best_fit_Y, linestyle = "-", color = "red")
                axes[row_index, column_index].set_xlabel(str(best_fit_function))

            axes[row_index, column_index].set_ylabel(column)
            axes[row_index, column_index].set_ylim(y_min, y_max)
            axes[row_index, column_index].set_xlim(0, RADIUS)

        figure.supxlabel("Distance (m)")
        title: str = f"{row[ID_COLUMN]} - {row[TARGET_COLUMN]} {TARGET_COLUMN} - {len(other_rows)} others in {RADIUS}m"

        figure.suptitle(title)
        figure.tight_layout()
        figure.show()

# create_plots(data_frame, [TARGET_COLUMN] + RATIO_COLUMNS)
# create_plots(data_frame, RATIO_CONSTITUTENT_COLUMNS)

# 3D Plots

In [None]:
def create_3d_plots(data_frame: geopandas.GeoDataFrame, columns: list[str]):
    for index, row in data_frame.sort_values(by = [TARGET_COLUMN], ascending = [False]).iterrows():
        if row[TARGET_COLUMN] < THRESHOLD:
            break
        other_rows: geopandas.GeoDataFrame = data_frame[data_frame.index != index]
        other_rows = other_rows[other_rows[GEOMETRY_COLUMN].apply(lambda x: distance(x, row[GEOMETRY_COLUMN]) <= RADIUS)]

        if len(other_rows) < 5:
           continue

        subset_index: pandas.Index = pandas.Index([index]).union(other_rows.index)
        subset_data_frame: geopandas.GeoDataFrame = data_frame.loc[subset_index]

        NUMBER_OF_COLUMNS: int = 2
        NUMBER_OF_ROWS: int = int(math.ceil(len(columns) / NUMBER_OF_COLUMNS))

        figure: matplotlib.figure.Figure
        axes: numpy.ndarray[matplotlib.axes.Axes]
        figure, axes = matplotlib.pyplot.subplots(nrows = NUMBER_OF_ROWS, ncols = NUMBER_OF_COLUMNS, figsize = (8, 16), subplot_kw = { "projection": "3d" })

        for index, column in enumerate(columns):
            # figure: matplotlib.figure.Figure = matplotlib.pyplot.figure()
            # axes: matplotlib.axes.Axes = figure.add_subplot(projection = "3d")

            row_index: int = int(math.floor(index / NUMBER_OF_COLUMNS))
            column_index: int = index % NUMBER_OF_COLUMNS

            z_min: Optional[float] = None
            z_max: Optional[float] = None
            scalar_mappable: Optional[matplotlib.cm.ScalarMappable] = None

            if column in COLUMN_LIMITS:
                z_min = COLUMN_LIMITS[column][0]
                z_max = COLUMN_LIMITS[column][1]
                colormap: matplotlib.cm.Colormap = matplotlib.pyplot.get_cmap("Blues")
                scalar_mappable: matplotlib.cm.ScalarMappable = matplotlib.cm.ScalarMappable(norm = matplotlib.colors.Normalize(vmin = z_min, vmax = z_max), cmap = colormap)

            axes[row_index, column_index].scatter(subset_data_frame[GEOMETRY_COLUMN].apply(lambda x: x.x - row[GEOMETRY_COLUMN].x), subset_data_frame[GEOMETRY_COLUMN].apply(lambda x: x.y - row[GEOMETRY_COLUMN].y), subset_data_frame[column], color = subset_data_frame.apply(lambda x: "red" if x[ID_COLUMN] == row[ID_COLUMN] else scalar_mappable.to_rgba(x[column]) if scalar_mappable is not None else "blue", axis = 1))
            for point_index, point_row in subset_data_frame.iterrows():
                axes[row_index, column_index].plot([point_row[GEOMETRY_COLUMN].x - row[GEOMETRY_COLUMN].x, point_row[GEOMETRY_COLUMN].x - row[GEOMETRY_COLUMN].x], [point_row[GEOMETRY_COLUMN].y - row[GEOMETRY_COLUMN].y, point_row[GEOMETRY_COLUMN].y - row[GEOMETRY_COLUMN].y], [0, point_row[column]], color = "black", alpha = 0.25)
            axes[row_index, column_index].set_xlim(-RADIUS, RADIUS)
            axes[row_index, column_index].set_xlabel("East")
            axes[row_index, column_index].set_ylim(-RADIUS, RADIUS)
            axes[row_index, column_index].set_ylabel("North")
            axes[row_index, column_index].set_zlim(z_min, z_max)
            axes[row_index, column_index].set_zlabel(column)
            axes[row_index, column_index].set_box_aspect(aspect = None, zoom = 0.8)

            title: str = f"{row[ID_COLUMN]} - {row[TARGET_COLUMN]} {TARGET_COLUMN} - {len(other_rows)} others in {RADIUS}m"

        figure.suptitle(title)
        figure.tight_layout()
        figure.show()

# create_3d_plots(data_frame[data_frame[DOMAIN_ID_COLUMN] == 1], [TARGET_COLUMN] + RATIO_COLUMNS)
# create_3d_plots(data_frame[data_frame[DOMAIN_ID_COLUMN] == 1], RATIO_CONSTITUTENT_COLUMNS)

# Correlation Plots

In [None]:
def create_correlation_plots(data_frame: geopandas.GeoDataFrame, columns: list[str], target_column: str = None, x_min: float = None, x_max: float = None, title: str = None, type: str = None):
    NUMBER_OF_COLUMNS: int = 2
    NUMBER_OF_ROWS: int = int(math.ceil(len(columns) / NUMBER_OF_COLUMNS))

    figure: matplotlib.figure.Figure
    axes: numpy.ndarray[matplotlib.axes.Axes]
    figure, axes = matplotlib.pyplot.subplots(nrows = NUMBER_OF_ROWS, ncols = NUMBER_OF_COLUMNS, figsize = (8, 8))

    for index, column in enumerate(columns):
        row_index: int = int(math.floor(index / NUMBER_OF_COLUMNS))
        column_index: int = index % NUMBER_OF_COLUMNS
        
        if column != target_column and type == PLOT_TYPE_DENSITY:
            seaborn.kdeplot(data_frame, x = target_column, y = column, ax = axes[row_index, column_index], fill = False, cmap = "YlGn", thresh = 0.05)
        else:
            axes[row_index, column_index].scatter(data_frame[target_column], data_frame[column], alpha = 0.1)

        axes[row_index, column_index].set_ylabel(column)

        if column in COLUMN_LIMITS:
            y_min: Optional[float] = COLUMN_LIMITS[column][0]
            y_max: Optional[float] = COLUMN_LIMITS[column][1]

            axes[row_index, column_index].set_ylim(y_min, y_max)
        axes[row_index, column_index].set_xlim(x_min, x_max)
        axes[row_index, column_index].set_xlabel(target_column)

        X = data_frame[column]
        Y = data_frame[column]

        mask = ~numpy.isnan(X) & ~numpy.isinf(X) & ~numpy.isnan(Y) & ~numpy.isinf(Y)

        linear_regression_result: scipy.stats.LinregressResult = scipy.stats.linregress(X[mask], Y[mask])
        best_fit_function: IFunction = Polynomial([linear_regression_result.intercept, linear_regression_result.slope])
        best_fit_X, best_fit_Y = best_fit_function.to_discrete(X[mask].min(), X[mask].max(), 1000)
        axes[row_index, column_index].plot(best_fit_X, best_fit_Y, linestyle = "-", color = "red")
        axes[row_index, column_index].set_xlabel(f"ln({target_column})")
        axes[row_index, column_index].set_ylabel(f"ln({column})")
        
    figure.suptitle(title)
    figure.tight_layout()
    figure.show()

create_correlation_plots(area_1_data_frame[area_1_data_frame[GOLD_COLUMN].ge(0.1) & area_1_data_frame[GOLD_COLUMN].le(10)], [GOLD_COLUMN] + RATIO_COLUMNS, GOLD_COLUMN, x_min = 0, x_max = 10, title = "Domains 1, 2, 3", type = None)
create_correlation_plots(area_2_data_frame[area_2_data_frame[GOLD_COLUMN].ge(0.1) & area_2_data_frame[GOLD_COLUMN].le(10)], [GOLD_COLUMN] + RATIO_COLUMNS, GOLD_COLUMN, x_min = 0, x_max = 10, title = "Domain 4", type = None)

create_correlation_plots(area_1_data_frame[area_1_data_frame[COPPER_COLUMN].ge(0.1) & area_1_data_frame[COPPER_COLUMN].le(200)], [COPPER_COLUMN] + RATIO_COLUMNS, COPPER_COLUMN, x_min = 0, x_max = 200, title = "Domains 1, 2, 3", type = None)
create_correlation_plots(area_2_data_frame[area_2_data_frame[COPPER_COLUMN].ge(0.1) & area_2_data_frame[COPPER_COLUMN].le(200)], [COPPER_COLUMN] + RATIO_COLUMNS, COPPER_COLUMN, x_min = 0, x_max = 200, title = "Domain 4", type = None)

create_correlation_plots(area_1_data_frame[area_1_data_frame[GOLD_COLUMN].ge(0.1) & area_1_data_frame[GOLD_COLUMN].le(10)], [GOLD_COLUMN] + RATIO_COLUMNS, GOLD_COLUMN, x_min = 0, x_max = 10, title = "Domains 1, 2, 3", type = PLOT_TYPE_DENSITY)
create_correlation_plots(area_2_data_frame[area_2_data_frame[GOLD_COLUMN].ge(0.1) & area_2_data_frame[GOLD_COLUMN].le(10)], [GOLD_COLUMN] + RATIO_COLUMNS, GOLD_COLUMN, x_min = 0, x_max = 10, title = "Domain 4", type = PLOT_TYPE_DENSITY)

create_correlation_plots(area_1_data_frame[area_1_data_frame[COPPER_COLUMN].ge(0.1) & area_1_data_frame[COPPER_COLUMN].le(200)], [COPPER_COLUMN] + RATIO_COLUMNS, COPPER_COLUMN, x_min = 0, x_max = 200, title = "Domains 1, 2, 3", type = PLOT_TYPE_DENSITY)
create_correlation_plots(area_2_data_frame[area_2_data_frame[COPPER_COLUMN].ge(0.1) & area_2_data_frame[COPPER_COLUMN].le(200)], [COPPER_COLUMN] + RATIO_COLUMNS, COPPER_COLUMN, x_min = 0, x_max = 200, title = "Domain 4", type = PLOT_TYPE_DENSITY)

In [None]:
def plot_manual_power_fits(data_frame: geopandas.GeoDataFrame, subset_data_frame: geopandas.GeoDataFrame, columns: list[str], target_column: str) -> None:
    def collate_data_frame(data_frame: geopandas.GeoDataFrame, subset_data_frame: geopandas.GeoDataFrame, columns: list[str]) -> Iterable[pandas.Series]:
        DISTANCE_COLUMN: str = "Distance"
        series: dict[pandas.Series] = dict()

        series[DISTANCE_COLUMN] = pandas.Series(name = DISTANCE_COLUMN)
        for column in columns:
            series[column] = pandas.Series(name = column)

        for index, row in subset_data_frame.iterrows():
            relevant_data_frame: geopandas.GeoDataFrame = data_frame.copy()
            relevant_data_frame[DISTANCE_COLUMN] = relevant_data_frame[GEOMETRY_COLUMN].apply(lambda x: distance(x, row[GEOMETRY_COLUMN]))
            relevant_data_frame = relevant_data_frame[relevant_data_frame[DISTANCE_COLUMN].le(RADIUS)]

            series[DISTANCE_COLUMN] = pandas.concat([series[DISTANCE_COLUMN], relevant_data_frame[DISTANCE_COLUMN]], ignore_index = True)
            
            for column in columns:
                series[column] = pandas.concat([series[column], relevant_data_frame[column]])

        for x in series.values():
            x.reset_index(inplace = True, drop = True)

        return pandas.concat(series.values(), axis = 1)

    analysis_data_frame: pandas.DataFrame = collate_data_frame(data_frame, subset_data_frame, columns)

    for column in RATIO_COLUMNS + RATIO_CONSTITUTENT_COLUMNS:
        def log(x):
            try:
                return math.log(x)
            except ValueError:
                return -math.inf

        figure, axes = matplotlib.pyplot.subplots(nrows = 1, ncols = 3, figsize = (24, 8))

        X = analysis_data_frame[target_column].apply(log)
        Y = analysis_data_frame[column].apply(log)

        axes[0].scatter(X, Y, alpha = 0.1)

        mask = ~numpy.isnan(X) & ~numpy.isinf(X) & ~numpy.isnan(Y) & ~numpy.isinf(Y)

        linear_regression_result: scipy.stats.LinregressResult = scipy.stats.linregress(X[mask], Y[mask])
        best_fit_function: IFunction = Polynomial([linear_regression_result.intercept, linear_regression_result.slope])
        best_fit_X, best_fit_Y = best_fit_function.to_discrete(X[mask].min(), X[mask].max(), 1000)
        axes[0].plot(best_fit_X, best_fit_Y, linestyle = "-", color = "red")
        axes[0].set_xlabel(f"ln({target_column})")
        axes[0].set_ylabel(f"ln({column})")

        try:
            seaborn.kdeplot(analysis_data_frame, x = target_column, y = column, ax = axes[1], fill = False, cmap = "YlGn", thresh = 0)
        except:
            pass
        axes[1].set_xlim(0, RADIUS)
        
        if column in COLUMN_LIMITS:
            y_min: Optional[float] = COLUMN_LIMITS[column][0]
            y_max: Optional[float] = COLUMN_LIMITS[column][1]
            axes[1].set_ylim(y_min, y_max)

        axes[1].set_xlabel("Distance (m)")
        axes[1].set_ylabel(column)

        X = analysis_data_frame[target_column]
        Y = analysis_data_frame[column]

        axes[2].scatter(X, Y, color = "green", alpha = 0.1)

        def f(x, intercept: float, gradient: float) -> float:
            return math.exp(intercept) * math.pow(x, gradient)
        
        axes[2].plot(numpy.linspace(0.1, RADIUS, 1000), [f(x, linear_regression_result.intercept, linear_regression_result.slope) for x in numpy.linspace(0.1, RADIUS, 1000)], linestyle = "-", color = "red")
        axes[2].set_xlim(0, RADIUS)
        
        if column in COLUMN_LIMITS:
            y_min: Optional[float] = COLUMN_LIMITS[column][0]
            y_max: Optional[float] = COLUMN_LIMITS[column][1]
            axes[2].set_ylim(y_min, y_max)

        axes[2].set_xlabel("Distance (m)")
        axes[2].set_ylabel(column)

        figure.suptitle(f"L: {str(best_fit_function)} (P = {linear_regression_result.pvalue:.2f}, Δm = {linear_regression_result.stderr:.2f})")

        figure.show()

plot_manual_power_fits(area_1_data_frame, area_1_data_frame[area_1_data_frame[GOLD_COLUMN] > 0.5], [GOLD_COLUMN] + RATIO_COLUMNS + RATIO_CONSTITUTENT_COLUMNS, GOLD_COLUMN)
plot_manual_power_fits(area_2_data_frame, area_2_data_frame[area_2_data_frame[GOLD_COLUMN] > 0.5], [GOLD_COLUMN] + RATIO_COLUMNS + RATIO_CONSTITUTENT_COLUMNS, GOLD_COLUMN)
# plot_manual_power_fits(area_1_data_frame, area_1_data_frame[area_1_data_frame[COPPER_COLUMN] > 100], [COPPER_COLUMN] + RATIO_COLUMNS + RATIO_CONSTITUTENT_COLUMNS, COPPER_COLUMN)
# plot_manual_power_fits(area_2_data_frame, area_2_data_frame[area_2_data_frame[COPPER_COLUMN] > 100], [COPPER_COLUMN] + RATIO_COLUMNS + RATIO_CONSTITUTENT_COLUMNS, COPPER_COLUMN)

# Null Domain (Deprecated)

In [None]:
# def create_null_domain_plots(data_frame: geopandas.GeoDataFrame, reference_points_data_frame: geopandas.GeoDataFrame, columns: list[str]):
#     for index, row in reference_points_data_frame.iterrows():
#         subset_data_frame: geopandas.DataFrame = data_frame.copy()

#         DISTANCE_COLUMN: str = "Distance"
#         NUMBER_OF_COLUMNS: int = 2
#         NUMBER_OF_ROWS: int = int(math.ceil(len(columns) / NUMBER_OF_COLUMNS))

#         subset_data_frame[DISTANCE_COLUMN] = subset_data_frame.apply(lambda x: distance(x[GEOMETRY_COLUMN], row[GEOMETRY_COLUMN]), axis = 1)
        
#         figure: matplotlib.figure.Figure
#         axes: numpy.ndarray[matplotlib.axes.Axes]
#         figure, axes = matplotlib.pyplot.subplots(nrows = NUMBER_OF_ROWS, ncols = NUMBER_OF_COLUMNS, figsize = (8, 8))

#         for index, column in enumerate(columns):            
#             y_min: Optional[float] = None
#             y_max: Optional[float] = None

#             if column in COLUMN_LIMITS:
#                 y_min = COLUMN_LIMITS[column][0]
#                 y_max = COLUMN_LIMITS[column][1]

#             row_index: int = int(math.floor(index / NUMBER_OF_COLUMNS))
#             column_index: int = index % NUMBER_OF_COLUMNS
#             axes[row_index, column_index].scatter(subset_data_frame[DISTANCE_COLUMN], subset_data_frame[column])
#             axes[row_index, column_index].set_ylabel(column)
#             axes[row_index, column_index].set_ylim(y_min, y_max)

#             if column in COLUMN_FIT_OPTIONS:
#                 best_fit_subset_data_frame: geopandas.GeoDataFrame = subset_data_frame[subset_data_frame[column].apply(lambda x: (y_min is None or y_min <= x) and (y_max is None or x <= y_max))]
#                 # linear_regression_result: scipy.stats.LinregressResult = scipy.stats.linregress(best_fit_subset_data_frame[DISTANCE_COLUMN], best_fit_subset_data_frame[column])
#                 # best_fit_function: IFunction = Polynomial([linear_regression_result.intercept, linear_regression_result.slope])
#                 best_fit_function: IFunction = fit_function(Polynomial, best_fit_subset_data_frame[DISTANCE_COLUMN], best_fit_subset_data_frame[column], bounds = Bounds([0], [10000]))
#                 best_fit_X, best_fit_Y = best_fit_function.to_discrete(0, 150, 1000)
#                 axes[row_index, column_index].plot(best_fit_X, best_fit_Y, linestyle = "-", color = "red")
#                 axes[row_index, column_index].set_xlabel(str(best_fit_function))

#             axes[row_index, column_index].set_xlim(0, 150)

#         figure.supxlabel("Distance (m)")
#         title: str = f"{row[ID_COLUMN]} - {row[TARGET_COLUMN]} {TARGET_COLUMN}"

#         figure.suptitle(title)
#         figure.tight_layout()
#         figure.show()

# null_domain_data_frame: geopandas.GeoDataFrame = data_frame[data_frame[GEOMETRY_COLUMN].apply(lambda x: NULL_DOMAIN_BOTTOM_LEFT.x <= x.x and x.x <= NULL_DOMAIN_TOP_RIGHT.x and NULL_DOMAIN_BOTTOM_LEFT.y <= x.y and x.y <= NULL_DOMAIN_TOP_RIGHT.y)]
# null_domain_reference_data_frame: geopandas.GeoDataFrame = null_domain_data_frame[null_domain_data_frame[ID_COLUMN] == "M021872"]

# create_null_domain_plots(null_domain_data_frame, null_domain_data_frame[null_domain_data_frame[ID_COLUMN] == "M021872"], [TARGET_COLUMN] + RATIO_COLUMNS)
# create_null_domain_plots(null_domain_data_frame, null_domain_data_frame[null_domain_data_frame[ID_COLUMN] == "M021872"], RATIO_CONSTITUTENT_COLUMNS)

# Ad-Hoc

In [None]:
# data_subset = data_frame[data_frame[TARGET_COLUMN].ge(1)][data_frame["CCPI"].le(1)]
# power: PowerWithOffset = fit_function(PowerWithOffset, data_subset[TARGET_COLUMN], data_subset["CCPI"], [-1], fixed_parameters = { "r": -1, "C": 1 }, bounds = ([-numpy.inf], [0]))
# print(power)

# figure: matplotlib.figure.Figure
# axes: numpy.ndarray[matplotlib.axes.Axes]
# figure, axes = matplotlib.pyplot.subplots(nrows = 1, ncols = 1, figsize = (8, 8))
# axes.scatter(data_frame[TARGET_COLUMN], data_frame["CCPI"])
# axes.plot(numpy.linspace(0, 50, 100), [power(x) for x in numpy.linspace(0, 50, 100)], 'r-')

# axes.set_xlim(0, 50)
# axes.set_ylim(0, 1)

# data_subset = data_frame[data_frame[TARGET_COLUMN].ge(0.01)]
# power: PowerWithOffset = fit_function(Power, data_subset[TARGET_COLUMN], data_subset["Ca (%)"], [1], fixed_parameters = { "r": -1 }, bounds = ([0], [100]))
# print(power)

# figure: matplotlib.figure.Figure
# axes: numpy.ndarray[matplotlib.axes.Axes]
# figure, axes = matplotlib.pyplot.subplots(nrows = 1, ncols = 1, figsize = (8, 8))
# axes.scatter(data_frame[TARGET_COLUMN], data_frame["Ca (%)"])
# axes.plot(numpy.linspace(0, 50, 100), [power(x) for x in numpy.linspace(0, 50, 100)], 'r-')

# axes.set_xlim(0, 50)

# data_subset = data_frame[data_frame[TARGET_COLUMN].ge(2)][data_frame["Na/Al"].le(1)]
# power: Power = fit_function(Power, data_subset[TARGET_COLUMN], data_subset["Na/Al"], [0.4], fixed_parameters = { "r": -1 }, bounds = ([0], [numpy.inf]))
# print(power)

# figure: matplotlib.figure.Figure
# axes: numpy.ndarray[matplotlib.axes.Axes]
# figure, axes = matplotlib.pyplot.subplots(nrows = 1, ncols = 1, figsize = (8, 8))
# axes.scatter(data_frame[TARGET_COLUMN], data_frame["Na/Al"])
# axes.plot(numpy.linspace(0, 50, 100), [power(x) for x in numpy.linspace(0, 50, 100)], 'r-')

# axes.set_xlim(0, 50)
# axes.set_ylim(0, 1)