### PYTHON PROJECT

In this project, the mission was to use what was learned throughout the course to create a Python project of interest. Each student was encouraged to explore anything beyond the course and challenge themselves. My personal objective was to re-create a GUI application of a typical MatLab format  one of Machine Learning algorithms and calculations. To do that, I begin by learning Tkinter, which is a standard Python library that helps create Graphical User Interfaces (GUIs). 

The success of my mission was to build a platform where the user can import data and perform one of the following Machine Learning techniques: Linear Regression, Logistic Regression, Suppor Vector Machine, K-Nearest Neighbors, K-means, Random Forest, Decision Tree, and Naive Bayes, making CSV data predictions. In addition, this is useful because it helps in facilitating data visualization and useful for quick data analysis. 

In the end, the performance will be tested by calculating its Root Mean Squared Error, Sum Squared Error, Degree of Freedom (t-distribution), and Adjusted R-Squared.

In [2]:
# ---App Section---
from tkinter import *
import pandas as pd
import matplotlib.pyplot as plt
from tkinter import Tk, ttk, filedialog, Scrollbar, VERTICAL, HORIZONTAL, RIGHT, LEFT, Y, X, BOTH, BOTTOM, StringVar, PanedWindow
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg, NavigationToolbar2Tk
import plotly.graph_objects as go
from plotly.offline import plot
import tempfile, webbrowser, os
# ---Calculation Section---
### Matplotlib Packages
from matplotlib.figure import Figure
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg, NavigationToolbar2Tk

### Sklearn Packages
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from sklearn import preprocessing
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LinearRegression
scaler = MinMaxScaler()
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report 

### Scipy Packages
from scipy.special import expit

### Other Packages
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg, NavigationToolbar2Tk
from matplotlib.figure import Figure
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
import string
import os

# Outside variables
df = pd.DataFrame()
num_cols = []

# _______________________________
# Loading CSV
# ---------
def csvFile_import():
    global df, num_cols
    file_path = filedialog.askopenfilename(title="Open CSV File", filetypes=[("CSV Files", "*.csv")])
    if not file_path:
        return
    
    df = pd.read_csv(file_path).dropna()
    num_cols = df.select_dtypes(include=['number']).columns.tolist()

    # Update comboboxes
    combi_x.configure(values=num_cols)
    combi_y.configure(values=num_cols)
    combi_z.configure(values=num_cols)

    x_axis_var.set('')
    y_axis_var.set('')
    z_axis_var.set('')

    display_table(df)

# _______________________________
# Table
# ------
def display_table(dataframe):
    # Clear table
    for item in tree.get_children():
        tree.delete(item)

    tree["columns"] = list(dataframe.columns)
    tree["show"] = "headings"

    for col in dataframe.columns:
        tree.heading(col, text=col)
        tree.column(col, width=100)

    for _, row in dataframe.iterrows():
        tree.insert("", "end", values=list(row))

# _______________________________
# 2D Plot
# --------
def plotting_2D():
    if df.empty:
        return

    x_col = x_axis_var.get()
    y_col = y_axis_var.get()

    if x_col and y_col and x_col in df.columns and y_col in df.columns:
        fig, ax = plt.subplots(figsize=(5, 4))
        ax.scatter(df[x_col], df[y_col], c='blue', edgecolors='k', alpha=0.7)
        ax.set_xlabel(x_col)
        ax.set_ylabel(y_col)
        ax.set_title(f"{y_col} vs {x_col} (Scatter Plot)")

        # Clear previous plot
        for widget in plot_frame.winfo_children():
            widget.destroy()

        # Plot graph in Tkinter
        canvas = FigureCanvasTkAgg(fig, master=plot_frame)
        canvas.draw()
        canvas.get_tk_widget().pack(fill=BOTH, expand=True)

        # Add interactive toolbar
        toolbar = NavigationToolbar2Tk(canvas, plot_frame)
        toolbar.update()
        toolbar.pack()

        # Save 
        save_button = ttk.Button(plot_frame, text="Save 2D Plot", command=lambda: save_2d_plot(fig))
        save_button.pack(pady=5)

def save_plotting_2D(fig):
    save_path = filedialog.asksaveasfilename(defaultextension=".png", filetypes=[("PNG Image", "*.png"), ("JPEG Image", "*.jpg")])
    if save_path:
        fig.savefig(save_path, dpi=300)
        print(f"2D plot saved at: {save_path}")

# Linear Regression SECTION
# __________________________

def analyze_plot():
    if df.empty:
        result_text.insert(END, "⚠️ Please load a CSV file first.\n")
        return

    xdata = []
    ydata = []
    last_numeric_list = num_cols
    output_lines = []

    for i, col in enumerate(last_numeric_list):
        if i >= 1:
            xdata = df[last_numeric_list[i]]
            ydata = df[last_numeric_list[i - 1]]
            xdata = np.array(xdata).reshape(-1, 1)
            xdata = scaler.fit_transform(xdata)

            x_train, x_test, y_train, y_test = train_test_split(
                xdata, ydata, test_size=0.3, random_state=23
            )

            lr = LinearRegression()
            lr.fit(x_train, y_train)

            y_pred_train = lr.predict(x_train)
            fig, ax = plt.subplots(figsize=(5, 4))
            ax.scatter(xdata, ydata, alpha=0.6, color='blue')
            ax.plot(x_train, y_pred_train, color='red')
            ax.set_xlabel(f'{last_numeric_list[i]}')
            ax.set_ylabel(f'{last_numeric_list[i - 1]}')
            ax.set_title("Linear Regression Fit")

            # Clear previous plot
            for widget in plot_frame.winfo_children():
                widget.destroy()

            canvas = FigureCanvasTkAgg(fig, master=plot_frame)
            canvas.draw()
            canvas.get_tk_widget().pack(fill=BOTH, expand=True)

            toolbar = NavigationToolbar2Tk(canvas, plot_frame)
            toolbar.update()
            toolbar.pack()

            # Stats
            r_squared = lr.score(x_test, y_test)
            rmse = np.sqrt(mean_squared_error(x_test, y_test))
            sse = np.sum((lr.predict(x_test) - y_test) ** 2)
            std_xtest = (np.std(x_test) ** 2)
            std_ytest = (np.std(y_test) ** 2)
            deg_of_freedom = (std_xtest / len(x_test) + std_ytest / len(y_test)) ** 2 / (
                (std_xtest / len(x_test)) ** 2 / (len(x_test) - 1) + (std_ytest / len(y_test)) ** 2 / (len(y_test) - 1)
            )
            adj_rsquared = 1 - (1 - r_squared) * (len(y_test) - 1) / (len(y_test) - x_test.shape[1] - 1)

            output_lines.append(
                f"\n📈 Linear Regression for {last_numeric_list[i - 1]} vs {last_numeric_list[i]}:\n"
                f"R² = {r_squared:.4f}\n"
                f"RMSE = {rmse:.4f}\n"
                f"SSE = {sse:.4f}\n"
                f"Degrees of Freedom = {deg_of_freedom:.4f}\n"
                f"Adjusted R² = {adj_rsquared:.4f}\n"
            )

    result_text.delete(1.0, END)
    result_text.insert(END, "\n".join(output_lines))


# Correlation Plot SECTION
# __________________________

def correlation_plot():
    if df.empty:
        result_text.insert(END, "⚠️ Please load a CSV file first.\n")
        return

    last_numeric_list = num_cols
    highest_corr = {}

    for i, col in enumerate(last_numeric_list):
        if i >= 1:
            newd = df.loc[:, [last_numeric_list[i - 1], last_numeric_list[i]]]
            corr_matrix = newd.corr()

            fig, ax = plt.subplots(figsize=(4, 3))
            sns.heatmap(corr_matrix, vmin=-1, vmax=1, annot=True, cmap='coolwarm', ax=ax)
            ax.set_title('Correlation Heatmap')

            for widget in plot_frame.winfo_children():
                widget.destroy()

            canvas = FigureCanvasTkAgg(fig, master=plot_frame)
            canvas.draw()
            canvas.get_tk_widget().pack(fill=BOTH, expand=True)

            toolbar = NavigationToolbar2Tk(canvas, plot_frame)
            toolbar.update()
            toolbar.pack()

            corr_value = corr_matrix.iloc[0, 1]
            highest_corr[f"{last_numeric_list[i-1]} and {last_numeric_list[i]}"] = corr_value

    result_text.delete(1.0, END)
    result_text.insert(END, "🔥 Correlation Results:\n")
    for pair, val in highest_corr.items():
        result_text.insert(END, f"{pair}: {val:.4f}\n")


# _______________________________
# 3D Plot
# --------
def plotting_3D():
    if df.empty:
        return

    x_col = x_axis_var.get()
    y_col = y_axis_var.get()
    z_col = z_axis_var.get()

    if not all([x_col, y_col, z_col]):
        return

    fig = go.Figure(data=[go.Scatter3d(
        x=df[x_col],
        y=df[y_col],
        z=df[z_col],
        mode='markers',
        marker=dict(
            size=5,
            color=df[z_col],
            colorscale='Inferno',
            opacity=0.8
        )
    )])

    fig.update_layout(
        title=f"{z_col} vs {x_col} and {y_col}",
        paper_bgcolor='#f0f0f0',
        font=dict(color='black'),
        scene=dict(
            xaxis=dict(title=x_col,
                backgroundcolor='#f0f0f0',
                gridcolor='gray',
                zerolinecolor='gray',
                color='black'),
            yaxis=dict(title=y_col,
                backgroundcolor='#f0f0f0',
                gridcolor='gray',
                zerolinecolor='gray',
                color='black'),
            zaxis=dict(title=z_col,
                backgroundcolor='#f0f0f0',
                gridcolor='gray',
                zerolinecolor='gray',
                color='black')
        )
    )

    # Temporary HTML file
    with tempfile.NamedTemporaryFile(delete=False, suffix='.html') as tmp_file:
        plot(fig, filename=tmp_file.name, auto_open=False)
        webbrowser.open(tmp_file.name)
        # Ask to save 
        save_plotting_3D(fig)

def save_plotting_3D(fig):
    save_path = filedialog.asksaveasfilename(defaultextension=".html", filetypes=[("HTML File", "*.html")])
    if save_path:
        plot(fig, filename=save_path, auto_open=False)
        print(f"3D plot saved at: {save_path}")

#________________________________
# App
# ----
root = Tk()
root.title("Global-Vista: Analysis App")

# Title
title_label = Label(
    root,
    text="ARIZONA STATE UNIVERSITY",
    font=("Noto Sans", 26, "bold"),   
    fg="#1E3A8A",                     
    pady=10
)
title_label.pack()

# Subtitle
subtitle_label = Label(
    root,
    text="PYTHON PROJECT by Yannick Gata",
    font=("Noto Sans", 14, "italic"),  
    fg="#374151"                       
)
subtitle_label.pack()

# Info Frame
info_frame = ttk.Frame(root)
info_frame.pack(padx=10, pady=(0, 5))

# Axis variables
x_axis_var = StringVar()
y_axis_var = StringVar()
z_axis_var = StringVar()

# Axis selectors
ttk.Label(info_frame, text="X-axis").grid(row=0, column=0)
combi_x = ttk.Combobox(info_frame, textvariable=x_axis_var)
combi_x.grid(row=1, column=0)

ttk.Label(info_frame, text="Y-axis").grid(row=0, column=1)
combi_y = ttk.Combobox(info_frame, textvariable=y_axis_var)
combi_y.grid(row=1, column=1)

ttk.Label(info_frame, text="Z-axis").grid(row=0, column=2)
combi_z = ttk.Combobox(info_frame, textvariable=z_axis_var)
combi_z.grid(row=1, column=2)

# Buttons
ttk.Button(info_frame, text="Load CSV", command=csvFile_import).grid(row=2, column=0, pady=5)
ttk.Button(info_frame, text="2D Plot", command=plotting_2D).grid(row=2, column=1, pady=5)
ttk.Button(info_frame, text="3D Plot", command=plotting_3D).grid(row=2, column=2, pady=5)

# Main window split (top = table, bottom = plot)
main_pane = PanedWindow(root, orient="vertical")
main_pane.pack(fill=BOTH, expand=True)

# Table section
table_frame = ttk.Frame(main_pane)
main_pane.add(table_frame, minsize=100)

vsb = Scrollbar(table_frame, orient=VERTICAL)
vsb.pack(side=RIGHT, fill=Y)
hsb = Scrollbar(table_frame, orient=HORIZONTAL)
hsb.pack(side=BOTTOM, fill=X)

tree = ttk.Treeview(table_frame, yscrollcommand=vsb.set, xscrollcommand=hsb.set)
tree.pack(side=LEFT, fill=BOTH, expand=True)
vsb.config(command=tree.yview)
hsb.config(command=tree.xview)

# Plot section
plot_frame = ttk.Frame(main_pane)
main_pane.add(plot_frame, minsize=200)

# _____________________________________
# Calculation Section
# ---------------------
# Create Frame
frame_contains = ttk.Frame(root)
frame_contains.pack(side=BOTTOM, fill=X)

# First LabelFrame
info_frame = ttk.LabelFrame(info_frame,
                                 text = "Data Analysis")
info_frame.grid(row=1, column=3)
# Analyze
button_ana = ttk.Button(info_frame,
                             text = "Linear Regression", command=analyze_plot)
button_ana.grid(row=2, column=3)

# Correlation
button_corr = ttk.Button(info_frame,
                             text = "Correlation", command=correlation_plot)
button_corr.grid(row=3, column=3)

for widgety in info_frame.winfo_children():
    widgety.grid_configure(padx=10, pady=5)

# Add this new frame under your plotting section
result_frame = ttk.Frame(main_pane)
main_pane.add(result_frame, minsize=10)

# Text box for analysis 
result_text = Text(result_frame, wrap="word", height=10, bg="#F9FAFB", fg="#111827", font=("Consolas", 10))
result_text.pack(side=LEFT, fill=BOTH, expand=True)

# Scrollbar 
scrollbar_result = Scrollbar(result_frame, command=result_text.yview)
scrollbar_result.pack(side=RIGHT, fill=Y)
result_text.config(yscrollcommand=scrollbar_result.set)



# Run GUI
root.mainloop()