# KNN classification

In [1]:
import pandas as pd
import numpy as np
from sklearn.datasets import load_digits
from sklearn.model_selection import KFold
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import mean_squared_error, accuracy_score

digits = load_digits()
df = pd.DataFrame(digits.data, columns=digits.feature_names)
df['target'] = digits.target

X = df.drop('target', axis=1).values
y = df['target'].values

kf = KFold(n_splits=10, shuffle=True, random_state=42)
mse_list = []
accuracy_list = []

for train_index, test_index in kf.split(X):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    knn = KNeighborsClassifier(n_neighbors=3)
    knn.fit(X_train, y_train)

    y_pred = knn.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    mse_list.append(mse)

    accuracy = accuracy_score(y_test, y_pred)
    accuracy_list.append(accuracy)

mse_knn = np.mean(mse_list)
avg_error_knn = 1 - np.mean(accuracy_list)

print("MSE for KNN:", mse_knn)
print("Average error for KNN:", avg_error_knn)


MSE for KNN: 0.30435443823711983
Average error for KNN: 0.012790192427063807


# Neural Networks

In [2]:
import pandas as pd
import numpy as np
from sklearn.datasets import load_digits
from sklearn.model_selection import KFold
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical
from sklearn.metrics import mean_squared_error, accuracy_score

digits = load_digits()
df = pd.DataFrame(digits.data, columns=digits.feature_names)
df['target'] = digits.target

X = df.drop('target', axis=1).values
y = df['target'].values

kf = KFold(n_splits=10, shuffle=True, random_state=42)
mse_list = []
accuracy_list = []

for train_index, test_index in kf.split(X):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    model = Sequential()
    model.add(Dense(64, input_dim=X_train.shape[1], activation='relu'))
    model.add(Dense(32, activation='relu'))
    model.add(Dense(16, activation='relu'))
    model.add(Dense(10, activation='softmax'))

    optimizer = Adam(learning_rate=0.001)
    model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])

    y_train_categorical = to_categorical(y_train)
    y_test_categorical = to_categorical(y_test)
    model.fit(X_train, y_train_categorical, epochs=50, batch_size=16, verbose=0)

    y_pred_categorical = model.predict(X_test)
    y_pred = np.argmax(y_pred_categorical, axis=1)

    mse = mean_squared_error(y_test, y_pred)
    mse_list.append(mse)

    accuracy = accuracy_score(y_test, y_pred)
    accuracy_list.append(accuracy)

mse_neural_network = np.mean(mse_list)
avg_error_neural_network = 1 - np.mean(accuracy_list)

print("\nMSE for Neural Networks:", mse_neural_network)
print("Average error for Neural Networks:", avg_error_neural_network)

2023-04-16 21:06:38.768058: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-04-16 21:06:44.676827: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.



MSE for Neural Networks: 0.42084419615145874
Average error for Neural Networks: 0.021142147734326544


# ExtraTreesClassifier

In [3]:
import pandas as pd
import numpy as np
from sklearn.datasets import load_digits
from sklearn.model_selection import KFold
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.metrics import mean_squared_error, accuracy_score

digits = load_digits()
df = pd.DataFrame(digits.data, columns=digits.feature_names)
df['target'] = digits.target

X = df.drop('target', axis=1).values
y = df['target'].values

kf = KFold(n_splits=10, shuffle=True, random_state=42)
mse_list = []
accuracy_list = []

for train_index, test_index in kf.split(X):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    extra_trees = ExtraTreesClassifier(n_estimators=100, random_state=42)
    extra_trees.fit(X_train, y_train)

    y_pred = extra_trees.predict(X_test)

    mse = mean_squared_error(y_test, y_pred)
    mse_list.append(mse)

    accuracy = accuracy_score(y_test, y_pred)
    accuracy_list.append(accuracy)

mse_extra_trees = np.mean(mse_list)
avg_error_extra_trees = 1 - np.mean(accuracy_list)

print("MSE for ExtraTreesClassifier:", mse_extra_trees)
print("Average error for ExtraTreesClassifier:", avg_error_extra_trees)


MSE for ExtraTreesClassifier: 0.36272811918063314
Average error for ExtraTreesClassifier: 0.017250155183116123
