## Setup

In [1]:
import pandas as pd
import json
pd.set_option('display.max_columns', 50)

In [2]:
dataset = pd.read_csv("./data/data_train.csv")
dataset.head()

Unnamed: 0,battery_power,blue,clock_speed,dual_sim,fc,four_g,int_memory,m_dep,mobile_wt,n_cores,pc,px_height,px_width,ram,sc_h,sc_w,talk_time,three_g,touch_screen,wifi,price_range
0,804,1,0.8,1,12,1,41,0.9,89,1,13,709,818,2027,11,5,11,1,0,0,1
1,1042,0,2.2,0,15,1,11,0.6,139,5,16,68,1018,2826,18,0,2,1,0,0,2
2,1481,1,2.0,1,0,0,35,0.5,105,3,0,249,522,2635,17,16,4,1,0,1,2
3,1104,0,1.7,0,1,1,60,0.4,199,2,13,653,1413,1229,6,0,3,1,1,1,0
4,652,0,0.5,1,1,0,58,0.6,142,3,2,464,781,565,18,12,9,0,0,1,0


## Naive Bayes

In [3]:
# Making it into feat and target
x = dataset.copy().drop(['price_range'], axis=1)
y = dataset['price_range']

# Getting the column name
cols_to_bin = [col for col in dataset.columns if not dataset[col].isin([0, 1]).all()]
cols_to_bin.remove('price_range')

# Creating bins using qcut, it will save the info of the cut
def create_bins(dataset, num_bins, cols_to_bin):
    dataset = dataset.copy()
    binning_info = {}

    for column in cols_to_bin:

        # Compute bin ranges
        bins = pd.cut(dataset[column], bins=num_bins, retbins=True, labels=False)

        # Update binning information
        binning_info[column] = {
            'bin_edges': list(bins[1])
        }

        # Replace column values with bin labels
        dataset[column] = bins[0]

    return dataset, binning_info

# Convert function for new data
def convert_to_bin(data, binning_info):
    for column, info in binning_info.items():
        bin_edges = info['bin_edges']
        data[column] = pd.cut(data[column], bins=bin_edges, labels=False)

    return data

binned_data, binning_info = create_bins(x, 4, cols_to_bin)

In [4]:
cols_to_drop = ['fc', 'four_g']  # Kolom yang ingin dihapus
binned_data = binned_data.drop(cols_to_drop, axis=1)

In [5]:
datasetVal = pd.read_csv("./data/data_validation.csv")

cols_to_bin_val = [col for col in dataset.columns if not dataset[col].isin([0, 1]).all() and col not in cols_to_drop]
cols_to_bin_val.remove('price_range')

binned_data_val = convert_to_bin(datasetVal, binning_info)
binned_data_val = binned_data_val.drop(['price_range'] + cols_to_drop, axis=1)
binned_data_val

Unnamed: 0,battery_power,blue,clock_speed,dual_sim,int_memory,m_dep,mobile_wt,n_cores,pc,px_height,px_width,ram,sc_h,sc_w,talk_time,three_g,touch_screen,wifi
0,0,0,2,0,0,2,3,0,0,0,0,2,1,1,3,0,0,1
1,0,1,0,1,2,3,2,2,1,2,3,2,1,0,1,1,1,0
2,0,1,3,1,0,0,3,2,0,0,0,3,0,0,1,0,0,0
3,0,0,0,0,2,2,2,0,3,1,3,0,3,3,1,1,1,1
4,0,0,0,1,1,0,1,1,3,0,0,3,1,0,3,1,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
595,2,1,3,0,3,1,1,0,0,0,1,1,1,0,3,0,1,1
596,3,0,2,0,2,3,1,3,3,0,0,3,3,1,3,1,1,0
597,0,1,3,1,1,0,3,1,0,1,3,0,0,0,0,1,1,1
598,3,1,3,1,2,0,3,1,0,1,3,1,1,2,3,1,1,1


In [6]:
from sklearn.naive_bayes import CategoricalNB
from sklearn.metrics import accuracy_score

# Membagi data menjadi data latih dan data uji
x_train = binned_data
x_test = binned_data_val
y_train = dataset['price_range']
y_test = datasetVal['price_range']

# Membuat model Naive Bayes
naive_bayes = CategoricalNB()

# Melatih model Naive Bayes
naive_bayes.fit(x_train, y_train)

# Melakukan prediksi pada data uji
predictions = naive_bayes.predict(x_test)

# Menghitung akurasi prediksi
accuracy = accuracy_score(y_test, predictions)
print(f"Akurasi model Naive Bayes: {accuracy:.2f}")

Akurasi model Naive Bayes: 0.76
