# Import Libary

In [None]:
# =========================
# IMPORT LIBARY ===========
# =========================

import pandas as pd # Import library untuk manipulating dataset
import seaborn as sns # Import library untuk visualisasi EDA
import numpy as np # Import library untuk manipulating number data
import tkinter as tk

from pandas.api.types import is_string_dtype, is_numeric_dtype # Import library untuk membaca type data column Dataframe
from matplotlib import pyplot as plt # Import library untuk Exploratory Data Analysis
from tkinter import * # Import library untuk pembuatan GUI
from tkinter import ttk # Import library turunan TKInter untuk tambahan widget dan component



# Load Datasets

In [None]:
data_path = 'datasets/diabetes_data_upload.csv' # membuat variable untuk menyimpan path data ke dataset
df = pd.read_csv(data_path) #membaca dataset dari variable data_path 
df.head() # menampilkan 5 data teratas

In [None]:
df.shape # cek dimensi data sebelum masuk pre-processing

In [None]:
df.nunique() # mengecek data unik pada setiap kolom

In [None]:
df.info() # menampilkan informasi dari data frame

# Duplicate Data Visualization

In [None]:
# barplot melihat jumlah data duplicate
plt.figure(figsize=(6, 8))
plt.title("Diagram presentase jumlah data duplikat")

count_duplicate = df.duplicated().sum()
non_duplicate = len(df) - count_duplicate
total = count_duplicate + non_duplicate

precentages = [
    count_duplicate / total * 100,
    non_duplicate / total * 100
    
]

res = sns.barplot(
    x=['Duplikat', 'Tidak Duplikat'],
    y=[count_duplicate, non_duplicate]
)

for i, p in enumerate(res.patches):
    res.text(
        p.get_x() + p.get_width() / 2,
        p.get_height(),
        f'{precentages[i]:.1f}%',
        ha='center',
        va='bottom'
    )

plt.ylabel("Jumlah Data")
plt.show()


# Data PreProcessing

In [None]:
# =====================
# DATA PREPROCESSING ==
# =====================

df.isna().sum() # mengecek data yang kosong
df.duplicated().sum() # mengecek data yang duplikat

In [None]:
df = df.drop_duplicates() # menghapus data duplikat dan melakukan reset index
df = df.fillna(df.mode().iloc[0])

In [None]:
df.shape #mengecek dimensi data setelah menghapus data duplikat

In [None]:
df.nunique()

# Exploratory Data Analys (EDA)

In [None]:
plt.figure(figsize=(6,8))
sns.barplot(data=df, y="Age", x="class", hue="Gender")
plt.title("Perbandingan Gender Diabetes Tiap Usia")
plt.show()


In [None]:
for x in df.select_dtypes(exclude=['int64']):
    count_positive = df[df['class'] == 'Positive'][x].value_counts()
    df.drop('class', axis=1)
    plt.pie(count_positive, autopct='%1.1f%%')
    legend = [
        'yes',
        'no'
    ]
    if x == 'Gender':
        legend = [
            'male',
            'female'
        ]
    plt.legend(legend) 
    plt.title(x)
    plt.show()

In [None]:
def encode_column(col): # membuat function untuk melakukan encoding data dengan variable unique
    unique_vals = col.unique()
    mapping = {val: idx for idx, val in enumerate(unique_vals)}
    return col.map(mapping)


for col in df.columns: # melakukan encoding data pada column variable df
    if df[col].dtype == "object":
        df[col] = encode_column(df[col])

df.columns = df.columns.str.lower().str.replace(' ', '_')

df.head()


In [None]:
# membuat visualisasi heatmap korelasi fitur

plt.figure(figsize=(20, 15))

sns.heatmap(df.corr(), annot=True, cmap="coolwarm")
plt.title("Heatmap Korelasi Fitur")
plt.show()

In [None]:
class Node:
    def __init__(self, feature=None, threshold=None, left=None, right=None, value=None):
        self.feature = feature
        self.threshold = threshold
        self.left = left
        self.right = right
        self.value = value

    def is_leaf_node(self):
        return self.value is not None 

In [None]:
class DecisionTree:
    def __init__(self, n_features, min_best_split = 2, max_depth = 2):
        self.min_best_split = min_best_split
        self.max_depth = max_depth
        self.n_features = n_features
        self.root=None
    
    def fit(self, X, y):
        self.n_features = X.shape[1] if not self.n_features else min(X.shape[1],self.n_features)
        self.root = self._growing_tree(X, y, depth=0)

    def _growing_tree(self, X, y, depth=0):
        n_sample, n_feats = X.shape
        n_labels = len(np.unique(y))

        if (depth >= self.max_depth or n_labels == 1 or n_sample < self.min_best_split):
            leaf_value = self._most_common_label(y)
            return Node(value=leaf_value)

        feat_idxs = np.random.choice(n_feats, self.n_features, replace=False)

        best_feature, best_thresh = self.find_best_split(X, y, feat_idxs)
        left_idxs, right_idxs = self._split(X[:, best_feature], best_thresh)

        left = self._growing_tree(X[left_idxs], y[left_idxs], depth+1)
        right = self._growing_tree(X[right_idxs], y[right_idxs], depth+1)

        return Node(best_feature, best_thresh, left, right)
        
    def _most_common_label(self, y):
        from collections import Counter
        counter = Counter(y)
        value = counter.most_common(1)[0][0]
        return value

    def find_best_split(self, X, y, feat_idxs):
        best_gain = -1
        split_idx, split_threshold = None, None

        for feat_idx in feat_idxs:
            X_column = X[:, feat_idx]
            thresholds = np.unique(X_column)

            for threshold in thresholds:
                gain = self._information_gain(y, X_column, threshold)
                
                if gain > best_gain:
                    best_gain = gain
                    split_idx = feat_idx
                    split_threshold = threshold

        return split_idx, split_threshold

    def _information_gain(self, y, X_column, threshold):
        parent_entropy = self._entropy(y)

        left_idx, right_idx = self._split(X_column, threshold)
        if len(left_idx) == 0 or len(right_idx) == 0:
            return 0

        n = len(y)
        n_l, n_r = len(left_idx), len(right_idx)
        e_l, e_r = self._entropy(y[left_idx]), self._entropy(y[right_idx])

        child_entropy = (n_l / n) * e_l + (n_r / n) * e_r

        information_gain = parent_entropy - child_entropy
        return information_gain

    def _split(self, X_column, split_threshold):
        left_idxs = np.argwhere(X_column <= split_threshold).flatten()
        right_idxs = np.argwhere(X_column > split_threshold).flatten()
        return left_idxs, right_idxs

    def _entropy(self, y):
        if y.dtype == 'object' or not np.issubdtype(y.dtype, np.integer):
            y = y.astype(int)
        hist = np.bincount(y)
        p = hist / len(y)

        return -np.sum([x * np.log(x) for x in p if x > 0])

    def predict(self, X):
        return np.array([self._traverse_tree(x, self.root) for x in X])

    def _traverse_tree(self, x, node):
        if node.value is not None:
            return node.value
        if x[node.feature] <= node.threshold:
            return self._traverse_tree(x, node.left)
        else:
            return self._traverse_tree(x, node.right)
    

In [None]:
X = df.drop('class', axis=1).values
y = df['class'].values

y = y.astype(int)

X = X.astype(float)

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

classify = DecisionTree(n_features=None, min_best_split=5, max_depth=10)
classify.fit(X_train, y_train)

predictions = classify.predict(X_test)

def accuracy(y_test, predictions):
    return np.sum(y_test == predictions) / len(y_test)

acc = accuracy(y_test, predictions)
print(f"Accuracy: {acc:.4f} ({acc*100:.2f}%)")

# GUI

In [None]:
import tkinter as tk
from tkinter import ttk


class GUI:
    def __init__(self, root, tabCtrl):
        self.root = root
        self.root.geometry("500x650")
        self.root.configure(bg="#f1f5f9")
        self.root.title("GUI Prediksi Diabetes")
        self.tabCtrl = tabCtrl

        self.header_font = ("Segoe UI", 18, "bold")
        self.label_font = ("Inter", 10)
        self.entry_font = ("Inter", 11)
        self.result_font = ("Inter", 16, "bold")

        self.setup_ui()

    def setup_ui(self):
        header_frame = tk.Frame(self.root, bg="#4caf50", pady=25)
        header_frame.pack(fill="x")

        tk.Label(
            header_frame,
            text="Prediksi Label Dataset",
            font=self.header_font,
            fg="white",
            bg="#4caf50",
        ).pack()

        tk.Label(
            header_frame,
            text="Simulasi Input Data Diabetes Untuk Model Machine Learning",
            font=("Segoe UI", 9),
            fg="#FFFFFF",
            bg="#4caf50",
        ).pack()

        # main container
        # TAB 1
        main_frame = tk.Frame(self.tabCtrl, bg="#f1f5f9")
        self.tabCtrl.add(main_frame, text="Prediksi")

        tk.Label(
            main_frame,
            text="Masukan Parameter Data:",
            font=("Inter", 10, "bold"),
            bg="#f1f5f9",
            fg="#1c2b36",
        ).pack(anchor="w", pady=(0, 10))

        self.inputs = {}
        features = [
            "Polyuria",
            "Polydipsia",
            "Sudden Weight Loss",
            "Weakness",
            "Polyphagia",
            "Visual Blurring",
            "Delayed Healing",
        ]

        form_container = tk.Frame(main_frame, bg="#f1f5f9")
        form_container.pack(fill="both", expand=True)

        for i, feature in enumerate(features):
            row = i // 2
            col = i % 2

            field_container = tk.Frame(form_container, bg="#f1f5f9")
            field_container.grid(row=row, column=col, padx=10, pady=8, sticky="ew")

            tk.Label(
                field_container,
                text=feature,
                font=self.label_font,
                bg="#f1f5f9",
                fg="#1c2b36",
            ).pack(anchor="w")

            form_container.columnconfigure(0, weight=1)
            form_container.columnconfigure(1, weight=1)

            var = tk.IntVar(value=0)

            entry = ttk.Checkbutton(
                field_container,
                variable=var,
                onvalue=1,
                offvalue=0
            )
            entry.pack(fill="x", padx=2, ipady=2, pady=(2, 0))

            self.inputs[feature] = var


        # tombol prediksi
        predict_btn = tk.Button(
            main_frame,
            text="JALANKAN PREDIKSI DIABETES",
            command=None,
            bg="#16a085",
            fg="white",
            font=("Segoe UI", 11, "bold"),
            activebackground="#7b7b7b",
            activeforeground="white",
            cursor="hand2",
            relief="flat",
            pady=12,
            padx=10,
        )
        predict_btn.pack(fill="x", pady=25, padx=30)

        self.result_container = tk.LabelFrame(
            main_frame,
            text=" OUTPUT SISTEM ",
            font=("Segoe UI", 9, "italic"),
            bg="#ffffff",
            fg="#757575",
            padx=15,
            pady=15,
        )
        self.result_container.pack(fill="x")

        self.status_label = tk.Label(
            self.result_container,
            text="Status: Masukan Data Anda",
            font=("Segoe UI", 9),
            bg="#ffffff",
            fg="#9e9e9e",
        )
        self.status_label.pack()

        self.result_label = tk.Label(
            self.result_container,
            text="---",
            font=self.result_font,
            bg="#ffffff",
            fg="#212121",
        )
        self.result_label.pack(pady=10)

        # end tab 1

        # TAB 2


        second_frame = tk.Frame(self.tabCtrl, bg="#f1f5f9")
        self.tabCtrl.add(second_frame, text="Tentang")

        info_model_items = ["Training Data :", "Testing Data :", "Decision Tree Depth :"]

        model_evaluation_items = [
            "Actual Value :",
            "Accuracy :",
            "Correct Value :",
            "Wrong Value :",
        ]

        # container utama tab
        content_container = tk.Frame(second_frame, bg="#f1f5f9", padx=15, pady=15)
        content_container.pack(fill="both", expand=True)

        # =========================
        # SECTION 1: INFORMASI MODEL
        # =========================
        info_frame = tk.LabelFrame(
            content_container,
            text=" Informasi Model ",
            font=("Segoe UI", 10, "bold"),
            bg="#ffffff",
            fg="#4caf50",
            padx=15,
            pady=10,
        )
        info_frame.pack(fill="x", pady=(0, 15))

        for i, item in enumerate(info_model_items):
            row = i // 2
            col = i % 2

            item_frame = tk.Frame(info_frame, bg="#ffffff")
            item_frame.grid(row=row, column=col, padx=10, pady=6, sticky="w")

            tk.Label(
                item_frame, text=item, font=self.label_font, bg="#ffffff", fg="#1c2b36"
            ).pack(anchor="w")

        info_frame.columnconfigure(0, weight=1)
        info_frame.columnconfigure(1, weight=1)

        # =========================
        # SECTION 2: EVALUASI MODEL
        # =========================
        eval_frame = tk.LabelFrame(
            content_container,
            text=" Evaluasi Model ",
            font=("Segoe UI", 10, "bold"),
            bg="#ffffff",
            fg="#4caf50",
            padx=15,
            pady=10,
        )
        eval_frame.pack(fill="x")

        for i, item in enumerate(model_evaluation_items):
            row = i // 2
            col = i % 2

            item_frame = tk.Frame(eval_frame, bg="#ffffff")
            item_frame.grid(row=row, column=col, padx=10, pady=6, sticky="w")

            tk.Label(
                item_frame, text=item, font=self.label_font, bg="#ffffff", fg="#1c2b36"
            ).pack(anchor="w")

        eval_frame.columnconfigure(0, weight=1)
        eval_frame.columnconfigure(1, weight=1)


if __name__ == "__main__":
    root = tk.Tk()

    tabCtrl = ttk.Notebook(root)
    tabCtrl.pack(fill="both", expand=True)

    app = GUI(root, tabCtrl)

    root.mainloop()

In [None]:
import tkinter as tk
from tkinter import ttk


class GUI:
    def __init__(self, root, tabCtrl):
        self.root = root
        self.root.geometry("500x650")
        self.root.configure(bg="#f1f5f9")
        self.root.title("GUI Prediksi Diabetes")
        self.tabCtrl = tabCtrl

        self.header_font = ("Segoe UI", 18, "bold")
        self.label_font = ("Inter", 10)
        self.entry_font = ("Inter", 11)
        self.result_font = ("Inter", 16, "bold")

        self.setup_ui()

    def setup_ui(self):
        header_frame = tk.Frame(self.root, bg="#4caf50", pady=25)
        header_frame.pack(fill="x")

        tk.Label(
            header_frame,
            text="Prediksi Label Dataset",
            font=self.header_font,
            fg="white",
            bg="#4caf50",
        ).pack()

        tk.Label(
            header_frame,
            text="Simulasi Input Data Diabetes Untuk Model Machine Learning",
            font=("Segoe UI", 9),
            fg="#FFFFFF",
            bg="#4caf50",
        ).pack()

        # main container
        # TAB 1
        main_frame = tk.Frame(self.tabCtrl, bg="#f1f5f9")
        self.tabCtrl.add(main_frame, text="Prediksi")

        tk.Label(
            main_frame,
            text="Masukan Parameter Data:",
            font=("Inter", 10, "bold"),
            bg="#f1f5f9",
            fg="#1c2b36",
        ).pack(anchor="w", pady=(0, 10))

        self.inputs = {}
        features = [
            "Polyuria",
            "Polydipsia",
            "Sudden Weight Loss",
            "Weakness",
            "Polyphagia",
            "Visual Blurring",
            "Delayed Healing",
        ]

        form_container = tk.Frame(main_frame, bg="#f1f5f9")
        form_container.pack(fill="both", expand=True)

        for i, feature in enumerate(features):
            row = i // 2
            col = i % 2

            field_container = tk.Frame(form_container, bg="#f1f5f9")
            field_container.grid(row=row, column=col, padx=10, pady=8, sticky="ew")

            tk.Label(
                field_container,
                text=feature,
                font=self.label_font,
                bg="#f1f5f9",
                fg="#1c2b36",
            ).pack(anchor="w")

            form_container.columnconfigure(0, weight=1)
            form_container.columnconfigure(1, weight=1)

            entry = ttk.Checkbutton(field_container)
            entry.pack(fill="x", padx=2, ipady=2, pady=(2, 0))
            self.inputs[feature] = entry

        # tombol prediksi
        predict_btn = tk.Button(
            main_frame,
            text="JALANKAN PREDIKSI DIABETES",
            command=None,
            bg="#16a085",
            fg="white",
            font=("Segoe UI", 11, "bold"),
            activebackground="#7b7b7b",
            activeforeground="white",
            cursor="hand2",
            relief="flat",
            pady=12,
            padx=10,
        )
        predict_btn.pack(fill="x", pady=25)

        self.result_container = tk.LabelFrame(
            main_frame,
            text=" OUTPUT SISTEM ",
            font=("Segoe UI", 9, "italic"),
            bg="#ffffff",
            fg="#757575",
            padx=15,
            pady=15,
        )
        self.result_container.pack(fill="x")

        self.status_label = tk.Label(
            self.result_container,
            text="Status: Masukan Data Anda",
            font=("Segoe UI", 9),
            bg="#ffffff",
            fg="#9e9e9e",
        )
        self.status_label.pack()

        self.result_label = tk.Label(
            self.result_container,
            text="---",
            font=self.result_font,
            bg="#ffffff",
            fg="#212121",
        )
        self.result_label.pack(pady=10)

        # end tab 1

        # TAB 2


        second_frame = tk.Frame(self.tabCtrl, bg="#f1f5f9")
        self.tabCtrl.add(second_frame, text="Tentang")

        info_model_items = ["Training Data :", "Testing Data :", "Decision Tree Depth :"]

        model_evaluation_items = [
            "Actual Value :",
            "Accuracy :",
            "Correct Value :",
            "Wrong Value :",
        ]

        # container utama tab
        content_container = tk.Frame(second_frame, bg="#f1f5f9", padx=15, pady=15)
        content_container.pack(fill="both", expand=True)

        # =========================
        # SECTION 1: INFORMASI MODEL
        # =========================
        info_frame = tk.LabelFrame(
            content_container,
            text=" Informasi Model ",
            font=("Segoe UI", 10, "bold"),
            bg="#ffffff",
            fg="#4caf50",
            padx=15,
            pady=10,
        )
        info_frame.pack(fill="x", pady=(0, 15))

        for i, item in enumerate(info_model_items):
            row = i // 2
            col = i % 2

            item_frame = tk.Frame(info_frame, bg="#ffffff")
            item_frame.grid(row=row, column=col, padx=10, pady=6, sticky="w")

            tk.Label(
                item_frame, text=item, font=self.label_font, bg="#ffffff", fg="#1c2b36"
            ).pack(anchor="w")

        info_frame.columnconfigure(0, weight=1)
        info_frame.columnconfigure(1, weight=1)

        # =========================
        # SECTION 2: EVALUASI MODEL
        # =========================
        eval_frame = tk.LabelFrame(
            content_container,
            text=" Evaluasi Model ",
            font=("Segoe UI", 10, "bold"),
            bg="#ffffff",
            fg="#4caf50",
            padx=15,
            pady=10,
        )
        eval_frame.pack(fill="x")

        for i, item in enumerate(model_evaluation_items):
            row = i // 2
            col = i % 2

            item_frame = tk.Frame(eval_frame, bg="#ffffff")
            item_frame.grid(row=row, column=col, padx=10, pady=6, sticky="w")

            tk.Label(
                item_frame, text=item, font=self.label_font, bg="#ffffff", fg="#1c2b36"
            ).pack(anchor="w")

        eval_frame.columnconfigure(0, weight=1)
        eval_frame.columnconfigure(1, weight=1)


if __name__ == "__main__":
    root = tk.Tk()

    tabCtrl = ttk.Notebook(root)
    tabCtrl.pack(fill="both", expand=True)

    app = GUI(root, tabCtrl)

    root.mainloop()

KeyboardInterrupt: 