In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
import pandas as pd
import numpy as np

df = pd.read_csv("/content/drive/MyDrive/data.csv")
df.head()


Unnamed: 0,date,price,bedrooms,bathrooms,sqft_living,sqft_lot,floors,waterfront,view,condition,sqft_above,sqft_basement,yr_built,yr_renovated,street,city,statezip,country
0,2014-05-02 00:00:00,313000.0,3.0,1.5,1340,7912,1.5,0,0,3,1340,0,1955,2005,18810 Densmore Ave N,Shoreline,WA 98133,USA
1,2014-05-02 00:00:00,2384000.0,5.0,2.5,3650,9050,2.0,0,4,5,3370,280,1921,0,709 W Blaine St,Seattle,WA 98119,USA
2,2014-05-02 00:00:00,342000.0,3.0,2.0,1930,11947,1.0,0,0,4,1930,0,1966,0,26206-26214 143rd Ave SE,Kent,WA 98042,USA
3,2014-05-02 00:00:00,420000.0,3.0,2.25,2000,8030,1.0,0,0,4,1000,1000,1963,0,857 170th Pl NE,Bellevue,WA 98008,USA
4,2014-05-02 00:00:00,550000.0,4.0,2.5,1940,10500,1.0,0,0,4,1140,800,1976,1992,9105 170th Ave NE,Redmond,WA 98052,USA


In [5]:
df["price_class"] = pd.qcut(df["price"], 3, labels=["Low", "Medium", "High"])
df["price_class"].value_counts()


Unnamed: 0_level_0,count
price_class,Unnamed: 1_level_1
Medium,1537
Low,1533
High,1530


In [7]:
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from sklearn.model_selection import train_test_split

# handle missing
df = df.fillna(df.mean(numeric_only=True))

# Encode categorical columns
label_cols = df.select_dtypes(include="object").columns
encoders = {}
for col in label_cols:
    enc = LabelEncoder()
    df[col] = enc.fit_transform(df[col])
    encoders[col] = enc

# Split
X = df.drop("price_class", axis=1)
y = df["price_class"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Scale
scaler = MinMaxScaler()
X_train_s = scaler.fit_transform(X_train)
X_test_s  = scaler.transform(X_test)

X_train_s[:3]


array([[0.47826087, 0.09699115, 0.44444444, 0.37037037, 0.24819028,
        0.04180033, 0.4       , 0.        , 0.        , 0.75      ,
        0.31372549, 0.        , 0.78070175, 0.        , 0.33281734,
        0.97674419, 0.57894737, 0.        ],
       [0.36231884, 0.12134513, 0.44444444, 0.44444444, 0.34643226,
        0.02645821, 0.4       , 0.        , 0.        , 0.5       ,
        0.4379085 , 0.        , 0.79824561, 0.        , 0.01172048,
        0.44186047, 0.27631579, 0.        ],
       [0.75362319, 0.09557522, 0.44444444, 0.37037037, 0.25232678,
        0.00976359, 0.4       , 0.        , 0.        , 0.5       ,
        0.31895425, 0.        , 0.71929825, 0.        , 0.41618753,
        0.76744186, 0.55263158, 0.        ]])

In [12]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train_s, y_train)
pred_knn = knn.predict(X_test_s)
print("=== KNN ===")
print("Accuracy:", accuracy_score(y_test, pred_knn))
print("Precision:", precision_score(y_test, pred_knn, average='macro'))
print("Recall:", recall_score(y_test, pred_knn, average='macro'))
print("F1:", f1_score(y_test, pred_knn, average='macro'))
print("Confusion Matrix:\n", confusion_matrix(y_test, pred_knn))

=== KNN ===
Accuracy: 0.5771739130434783
Precision: 0.5679707685152331
Recall: 0.5747107628119043
F1: 0.570044106249744
Confusion Matrix:
 [[213  28  67]
 [ 36 202  76]
 [ 93  89 116]]


In [14]:
from sklearn.naive_bayes import GaussianNB

nb = GaussianNB()
nb.fit(X_train_s, y_train)
pred_nb = nb.predict(X_test_s)
print("=== Naive Bayes ===")
print("Accuracy:", accuracy_score(y_test, pred_nb))
print("Precision:", precision_score(y_test, pred_nb, average='macro'))
print("Recall:", recall_score(y_test, pred_nb, average='macro'))
print("F1:", f1_score(y_test, pred_nb, average='macro'))
print("Confusion Matrix:\n", confusion_matrix(y_test, pred_nb))

=== Naive Bayes ===
Accuracy: 0.8336956521739131
Precision: 0.8317175943084295
Recall: 0.8319495342429554
F1: 0.8307921716548939
Confusion Matrix:
 [[259   0  49]
 [ 15 291   8]
 [ 29  52 217]]


In [17]:
from sklearn.svm import SVC

svm = SVC(kernel="rbf")
svm.fit(X_train_s, y_train)
pred_svm = svm.predict(X_test_s)

print("=== SVM ===")
print("Accuracy:", accuracy_score(y_test, pred_svm))
print("Precision:", precision_score(y_test, pred_svm, average='macro'))
print("Recall:", recall_score(y_test, pred_svm, average='macro'))
print("F1:", f1_score(y_test, pred_svm, average='macro'))
print("Confusion Matrix:\n", confusion_matrix(y_test, pred_svm))

=== SVM ===
Accuracy: 0.6728260869565217
Precision: 0.6767300541772882
Recall: 0.6711488507217999
F1: 0.6735594560391779
Confusion Matrix:
 [[235   4  69]
 [  4 223  87]
 [ 52  85 161]]


In [21]:
from sklearn.neural_network import MLPClassifier

ann = MLPClassifier(hidden_layer_sizes=(32,16), activation="relu", max_iter=300)
ann.fit(X_train_s, y_train)
pred_ann = ann.predict(X_test_s)

print("=== ANN ===")
print("Accuracy:", accuracy_score(y_test, pred_ann))
print("Precision:", precision_score(y_test, pred_ann, average='macro'))
print("Recall:", recall_score(y_test, pred_ann, average='macro'))
print("F1:", f1_score(y_test, pred_ann, average='macro'))
print("Confusion Matrix:\n", confusion_matrix(y_test, pred_ann))



=== ANN ===
Accuracy: 0.9847826086956522
Precision: 0.9846592296592297
Recall: 0.9847339817309466
F1: 0.9846896054355424
Confusion Matrix:
 [[305   0   3]
 [  0 309   5]
 [  3   3 292]]




In [22]:
!pip install deap

Collecting deap
  Downloading deap-1.4.3-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (13 kB)
Downloading deap-1.4.3-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (135 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/136.0 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m136.0/136.0 kB[0m [31m4.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: deap
Successfully installed deap-1.4.3


In [23]:
from deap import base, creator, tools
from sklearn.svm import SVC
import random

n_features = X_train_s.shape[1]

creator.create("FitnessMax", base.Fitness, weights=(1.0,))
creator.create("Individual", list, fitness=creator.FitnessMax)

toolbox = base.Toolbox()
toolbox.register("attr_bool", random.randint, 0, 1)
toolbox.register("individual", tools.initRepeat, creator.Individual, toolbox.attr_bool, n_features)
toolbox.register("population", tools.initRepeat, list, toolbox.individual)

def fitness(ind):
    mask = np.array(ind)==1
    if mask.sum() == 0:
        return (0,)

    model = SVC(kernel="rbf")
    model.fit(X_train_s[:,mask], y_train)
    pred = model.predict(X_test_s[:,mask])
    return (accuracy_score(y_test, pred),)

toolbox.register("evaluate", fitness)
toolbox.register("mate", tools.cxTwoPoint)
toolbox.register("mutate", tools.mutFlipBit, indpb=0.05)
toolbox.register("select", tools.selTournament, tournsize=3)

pop = toolbox.population(20)

for gen in range(10):
    offspring = toolbox.select(pop, len(pop))
    offspring = list(map(toolbox.clone, offspring))

    for c1, c2 in zip(offspring[::2], offspring[1::2]):
        if random.random() < 0.8:
            toolbox.mate(c1, c2)

    for m in offspring:
        if random.random() < 0.2:
            toolbox.mutate(m)

    for ind in offspring:
        ind.fitness.values = toolbox.evaluate(ind)

    pop = offspring
    print("Generation", gen)

best = tools.selBest(pop, 1)[0]
best, sum(best)


Generation 0
Generation 1
Generation 2
Generation 3
Generation 4
Generation 5
Generation 6
Generation 7
Generation 8
Generation 9


([0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 2)

In [26]:
mask = np.array(best)==1
X_train_r = X_train_s[:,mask]
X_test_r  = X_test_s[:,mask]

knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train_r, y_train)
pred_knn = knn.predict(X_test_r)
print("=== KNN ===")
print("Accuracy:", accuracy_score(y_test, pred_knn))
print("Precision:", precision_score(y_test, pred_knn, average='macro'))
print("Recall:", recall_score(y_test, pred_knn, average='macro'))
print("F1:", f1_score(y_test, pred_knn, average='macro'))
print("Confusion Matrix:\n", confusion_matrix(y_test, pred_knn))


=== KNN ===
Accuracy: 0.9978260869565218
Precision: 0.9977777777777778
Recall: 0.9978561777924835
F1: 0.9978115352675827
Confusion Matrix:
 [[307   0   1]
 [  0 313   1]
 [  0   0 298]]


In [27]:

nb = GaussianNB()
nb.fit(X_train_r, y_train)
pred_nb = nb.predict(X_test_r)
print("=== Naive Bayes ===")
print("Accuracy:", accuracy_score(y_test, pred_nb))
print("Precision:", precision_score(y_test, pred_nb, average='macro'))
print("Recall:", recall_score(y_test, pred_nb, average='macro'))
print("F1:", f1_score(y_test, pred_nb, average='macro'))
print("Confusion Matrix:\n", confusion_matrix(y_test, pred_nb))


=== Naive Bayes ===
Accuracy: 0.9173913043478261
Precision: 0.9188045233909762
Recall: 0.9166276199469889
F1: 0.9168156584107122
Confusion Matrix:
 [[277   0  31]
 [  9 305   0]
 [  1  35 262]]


In [28]:
svm = SVC(kernel="rbf")
svm.fit(X_train_r, y_train)
pred_svm = svm.predict(X_test_r)

print("=== SVM ===")
print("Accuracy:", accuracy_score(y_test, pred_svm))
print("Precision:", precision_score(y_test, pred_svm, average='macro'))
print("Recall:", recall_score(y_test, pred_svm, average='macro'))
print("F1:", f1_score(y_test, pred_svm, average='macro'))
print("Confusion Matrix:\n", confusion_matrix(y_test, pred_svm))

=== SVM ===
Accuracy: 0.9956521739130435
Precision: 0.9955849889624724
Recall: 0.9957123555849671
F1: 0.9956270387213971
Confusion Matrix:
 [[306   0   2]
 [  0 312   2]
 [  0   0 298]]


In [31]:
ann = MLPClassifier(hidden_layer_sizes=(32,16), activation="relu", max_iter=300)
ann.fit(X_train_r, y_train)
pred_ann = ann.predict(X_test_r)

print("=== ANN ===")
print("Accuracy:", accuracy_score(y_test, pred_ann))
print("Precision:", precision_score(y_test, pred_ann, average='macro'))
print("Recall:", recall_score(y_test, pred_ann, average='macro'))
print("F1:", f1_score(y_test, pred_ann, average='macro'))
print("Confusion Matrix:\n", confusion_matrix(y_test, pred_ann))



=== ANN ===
Accuracy: 0.9923913043478261
Precision: 0.9924152993628274
Recall: 0.9922840165861583
F1: 0.9923163991815867
Confusion Matrix:
 [[308   0   0]
 [  0 312   2]
 [  5   0 293]]


