In [None]:
from google.colab import files
import pandas as pd
import numpy as np
import os
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import tensorflow.keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam, RMSprop
from tensorflow.keras.utils import plot_model
import matplotlib.pyplot as plt

In [None]:
# Mengganti 'nama_file.csv' dengan nama file yang sesuai
df = pd.read_csv('FoodSales.csv')
# Melihat DataFrame
df.head()

Unnamed: 0,OrderDate,order_date,Region,City,Category,Product,Quantity,UnitPrice,TotalPrice
0,01/01/2020,01/01/2020,East,Boston,Bars,Carrot,33,177,5841
1,04/01/2020,04/01/2020,East,Boston,Crackers,Whole Wheat,87,349,30363
2,07/01/2020,07/01/2020,West,Los Angeles,Cookies,Chocolate Chip,58,187,10846
3,10/01/2020,10/01/2020,East,New York,Cookies,Chocolate Chip,82,187,15334
4,13/01/2020,13/01/2020,East,Boston,Cookies,Arrowroot,38,218,8284


In [None]:
df.head()

Unnamed: 0,OrderDate,order_date,Region,City,Category,Product,Quantity,UnitPrice,TotalPrice
0,01/01/2020,01/01/2020,East,Boston,Bars,Carrot,33,177,5841
1,04/01/2020,04/01/2020,East,Boston,Crackers,Whole Wheat,87,349,30363
2,07/01/2020,07/01/2020,West,Los Angeles,Cookies,Chocolate Chip,58,187,10846
3,10/01/2020,10/01/2020,East,New York,Cookies,Chocolate Chip,82,187,15334
4,13/01/2020,13/01/2020,East,Boston,Cookies,Arrowroot,38,218,8284


In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 244 entries, 0 to 243
Data columns (total 9 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   OrderDate   244 non-null    object
 1   order_date  244 non-null    object
 2   Region      244 non-null    object
 3   City        244 non-null    object
 4   Category    244 non-null    object
 5   Product     244 non-null    object
 6   Quantity    244 non-null    int64 
 7   UnitPrice   244 non-null    object
 8   TotalPrice  244 non-null    object
dtypes: int64(1), object(8)
memory usage: 17.3+ KB


In [None]:
df.describe()

Unnamed: 0,Quantity
count,244.0
mean,63.286885
std,47.112505
min,20.0
25%,31.0
50%,47.0
75%,80.0
max,306.0


In [None]:
# cek null
df.isnull().sum()

OrderDate     0
order_date    0
Region        0
City          0
Category      0
Product       0
Quantity      0
UnitPrice     0
TotalPrice    0
dtype: int64

In [None]:
# Mengubah parameter region
region_map = {'East': 1, 'West': 0}

# Mengganti nilai parameter dengan angka
df['Region'].replace(region_map, inplace=True)

In [None]:
df.info()
df.describe()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 244 entries, 0 to 243
Data columns (total 9 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   OrderDate   244 non-null    object
 1   order_date  244 non-null    object
 2   Region      244 non-null    int64 
 3   City        244 non-null    object
 4   Category    244 non-null    object
 5   Product     244 non-null    object
 6   Quantity    244 non-null    int64 
 7   UnitPrice   244 non-null    object
 8   TotalPrice  244 non-null    object
dtypes: int64(2), object(7)
memory usage: 17.3+ KB


Unnamed: 0,Region,Quantity
count,244.0,244.0
mean,0.614754,63.286885
std,0.487654,47.112505
min,0.0,20.0
25%,0.0,31.0
50%,1.0,47.0
75%,1.0,80.0
max,1.0,306.0


In [None]:
df.head()

Unnamed: 0,OrderDate,order_date,Region,City,Category,Product,Quantity,UnitPrice,TotalPrice
0,01/01/2020,01/01/2020,1,Boston,Bars,Carrot,33,177,5841
1,04/01/2020,04/01/2020,1,Boston,Crackers,Whole Wheat,87,349,30363
2,07/01/2020,07/01/2020,0,Los Angeles,Cookies,Chocolate Chip,58,187,10846
3,10/01/2020,10/01/2020,1,New York,Cookies,Chocolate Chip,82,187,15334
4,13/01/2020,13/01/2020,1,Boston,Cookies,Arrowroot,38,218,8284


In [None]:
# menghapus kolom yang diinginkan
df = df.drop(columns=['OrderDate', 'order_date'])

df.head()

Unnamed: 0,Region,City,Category,Product,Quantity,UnitPrice,TotalPrice
0,1,Boston,Bars,Carrot,33,177,5841
1,1,Boston,Crackers,Whole Wheat,87,349,30363
2,0,Los Angeles,Cookies,Chocolate Chip,58,187,10846
3,1,New York,Cookies,Chocolate Chip,82,187,15334
4,1,Boston,Cookies,Arrowroot,38,218,8284


In [None]:
df.shape

(244, 7)

In [None]:
# Mengonversi kolom menjadi tipe data numerik

# Konversi kolom 'UnitPrice' ke string
df['UnitPrice'] = df['UnitPrice'].astype(str)

# Ganti koma dengan titik pada kolom 'UnitPrice' yang sudah dalam format string
df['UnitPrice'] = df['UnitPrice'].str.replace(',', '.')

# Konversi kolom 'UnitPrice' ke float
df['UnitPrice'] = df['UnitPrice'].astype(float)

# Mengonversi kolom 'TotalPrice' ke float dengan mengganti koma dengan titik
df['TotalPrice'] = df['TotalPrice'].str.replace(',', '.').astype(float)


In [None]:
X = df.drop('TotalPrice', axis=1)  # Fitur
y = df['TotalPrice']  # Target

# Konversi kolom kategorikal menjadi representasi numerik menggunakan one-hot encoding
categorical_cols = ['Region', 'City', 'Category', 'Product']
X_encoded = pd.get_dummies(X, columns=categorical_cols)

# Normalisasi fitur-fitur numerik
numerical_cols = ['Quantity', 'UnitPrice']
scaler = MinMaxScaler()
X_encoded[numerical_cols] = scaler.fit_transform(X_encoded[numerical_cols])

# Pembagian data menjadi set pelatihan dan pengujian
X_train, X_test, y_train, y_test = train_test_split(X_encoded, y, test_size=0.2, random_state=42)

In [None]:
# Periksa bentuk setiap set data
print("Train Shape :", X_train.shape)
print("Test Shape:", X_test.shape)

Train Shape : (195, 21)
Test Shape: (49, 21)


Build Multilayer Perceptron Model

Parameter dan Hyperparameter

In [None]:
from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_squared_error
import warnings

# Menyembunyikan peringatan konvergensi
warnings.filterwarnings("ignore", category=DeprecationWarning)
warnings.filterwarnings("ignore", category=FutureWarning)
warnings.filterwarnings("ignore", category=UserWarning)

# Inisialisasi model
mlp = MLPRegressor(max_iter=1000)

# Melakukan scaling pada fitur
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Tentukan parameter grid
param_grid = {
    'hidden_layer_sizes': [(50,), (100,), (50, 50)],
    'activation': ['relu', 'tanh', 'logistic'],
    'solver': ['adam', 'sgd'],
    'alpha': [0.0001, 0.001, 0.01],
    'learning_rate': ['constant', 'adaptive'],
}

# Buat objek GridSearchCV
grid_search = GridSearchCV(estimator=mlp, param_grid=param_grid, cv=5, scoring='neg_mean_squared_error')

# Latih model dengan teknik hyperparameter tuning
grid_search.fit(X_train_scaled, y_train)

# Dapatkan prediksi dari data pengujian
y_pred = grid_search.predict(X_test_scaled)

# Hitung error menggunakan metrik MSE (Mean Squared Error)
mse = mean_squared_error(y_test, y_pred)

print("Mean Squared Error:", mse)



Mean Squared Error: 105.45848556732125


In [None]:
print("Best Hyperparameters:", grid_search.best_params_)

Best Hyperparameters: {'activation': 'relu', 'alpha': 0.0001, 'hidden_layer_sizes': (50,), 'learning_rate': 'constant', 'solver': 'sgd'}
