In [None]:
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

<h1><b>1. Data Collecting

In [None]:
import pandas as pd
data = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/CrabAgePrediction.csv')

<h1><b> 2. Peninjauan Informasi Umum Dataset

In [None]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3893 entries, 0 to 3892
Data columns (total 9 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   Sex             3893 non-null   object 
 1   Length          3893 non-null   float64
 2   Diameter        3893 non-null   float64
 3   Height          3893 non-null   float64
 4   Weight          3893 non-null   float64
 5   Shucked Weight  3893 non-null   float64
 6   Viscera Weight  3893 non-null   float64
 7   Shell Weight    3893 non-null   float64
 8   Age             3893 non-null   int64  
dtypes: float64(7), int64(1), object(1)
memory usage: 273.9+ KB


In [None]:
print('Nilai Yang Hilang Per Fitur')
print(data.isnull().sum())

Nilai Yang Hilang Per Fitur
Sex               0
Length            0
Diameter          0
Height            0
Weight            0
Shucked Weight    0
Viscera Weight    0
Shell Weight      0
Age               0
dtype: int64


In [6]:
dataset = data.drop(['Sex', 'Age'], axis=1)

<h1><b> 3. EDA

In [None]:
num_features = data.select_dtypes(include=[np.number])
plt.figure(figsize=(14, 10))
for i, column in enumerate(num_features.columns, 1):
    plt.subplot(3, 4, i)
    sns.histplot(data[column], bins=30, kde=True, color='blue')
    plt.title(f'Distribusi {column}')
plt.tight_layout()
plt.show()


In [None]:
cat_features = data.select_dtypes(include=[object])
plt.figure(figsize=(14, 8))
for i, column in enumerate(cat_features.columns, 1):
    plt.subplot(2, 4, i)
    sns.countplot(y=data[column], palette='viridis')
    plt.title(f'Distribusi {column}')
plt.tight_layout()
plt.show()

<h1><b>Split Dataset

In [9]:
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

x_train, x_test, y_train, y_test = train_test_split(dataset, data['Age'], test_size=0.2, random_state=1)

# <h1><b>Data Preprocess : Normalisasi Fitur Data dan Label

In [10]:
from sklearn.preprocessing import MinMaxScaler

scaler_fiturData = MinMaxScaler()
scaler_labelData = MinMaxScaler()

x_latih = scaler_fiturData.fit_transform(x_train)
x_uji = scaler_fiturData.transform(x_test)

y_latih = scaler_labelData.fit_transform(y_train.to_numpy().reshape(-1, 1))
y_uji = scaler_labelData.transform(y_test.to_numpy().reshape(-1, 1))

<h1><b> 9. Modelling : Membangun Model

In [11]:
class SimpleNeuralNetwork:
    def __init__(self, learning_rate=0.01, epochs=100, random=1):

        self.learning_rate = learning_rate
        self.epochs = epochs
        self.bobot = None
        self.bias = None
        np.random.seed(random)

    def train(self, x_latih, y_latih):

        # Inisialisasi bobot dan bias
        self.bobot = np.random.randn(x_latih.shape[1], 1)  # Bobot untuk jumlah fitur input
        self.bias = np.zeros((1, 1))  # Bias scalar

        # Pelatihan untuk jumlah epoch yang ditentukan
        for epoch in range(self.epochs):
            # Forward pass: Perkalian bobot dan input, ditambah bias
            output = np.dot(x_latih, self.bobot) + self.bias

            # Hitung error (selisih prediksi dengan target)
            error = y_latih - output

            # Backpropagation: Gradien dari error terhadap bobot dan bias
            d_weights = -2 * np.dot(x_latih.T, error) / x_latih.shape[0]
            d_bias = -2 * np.sum(error) / x_latih.shape[0]

            # Update bobot dan bias menggunakan gradien descent
            self.bobot -= self.learning_rate * d_weights
            self.bias -= self.learning_rate * d_bias

    def evaluate(self, x_uji, y_uji):
        prediksi = np.dot(x_uji, self.bobot) + self.bias
        mae = mean_absolute_error(y_uji, prediksi)
        mse = mean_squared_error(y_uji, prediksi)
        r2 = r2_score(y_uji, prediksi)
        return mae, mse, r2

    def predict(self, x_input):

        if self.bobot is None or self.bias is None:
            raise ValueError("Model belum dilatih. Gunakan metode 'train' terlebih dahulu.")

        return np.dot(x_input, self.bobot) + self.bias


<h1><b>Tuning Hyperparameter

In [13]:
def hyperparameter_tuning(x_latih, y_latih, x_uji, y_uji, learning_rate_range, epochs_range):
    results = []

    for lr in learning_rate_range:
        for epoch in epochs_range:
            # Membuat model baru untuk setiap kombinasi hyperparameter
            model = SimpleNeuralNetwork(learning_rate=lr, epochs=epoch, random=42)

            # Melatih model
            model.train(x_latih, y_latih)

            # Evaluasi model
            mae, mse, r2 = model.evaluate(x_uji, y_uji)

            # Menyimpan hasil evaluasi
            results.append({
                'Learning Rate': lr,
                'Epochs': epoch,
                'MAE': mae,
                'MSE': mse,
                'R²': r2
            })

    return pd.DataFrame(results)


In [37]:
model = SimpleNeuralNetwork(learning_rate=0.1, epochs=31000, random=42)
model.train(x_latih, y_latih)
model.evaluate(x_uji, y_uji)

(0.058616379942054674, 0.006446357634640407, 0.5427751614209322)

In [None]:
hasil_terbaik

Unnamed: 0,Learning Rate,Epochs,MAE,MSE,R²
0,0.1,1000,0.064336,0.007953,0.435929


In [None]:
best_result = hasil_terbaik.loc[hasil_terbaik['R²'].idxmax()]
print("Kombinasi Hyperparameter Terbaik:")
print(best_result)

Kombinasi Hyperparameter Terbaik:
Learning Rate       0.100000
Epochs           1000.000000
MAE                 0.064336
MSE                 0.007953
R²                  0.435929
Name: 0, dtype: float64


<h1><b> Membandingkan Dengan Regresi Linear

In [None]:
from sklearn.linear_model import LinearRegression
import pandas as pd

# Inisialisasi dan latih model Linear Regression
lr = LinearRegression()
lr.fit(x_latih, y_latih)

y_pred = lr.predict(x_uji)


mae = mean_absolute_error(y_uji, y_pred)
mse = mean_squared_error(y_uji, y_pred)
r2 = r2_score(y_uji, y_pred)

print("MAE:", mae)
print("MSE:", mse)
print("R²:", r2)

MAE: 0.058544045951789046
MSE: 0.006453326239472604
R²: 0.5422808948288336
