In [19]:
import pandas as pd
import numpy as np



images_data = []
with open('Images.csv', 'r') as images_file:
    num_images = int(images_file.readline().strip())
    for _ in range(num_images):
        line = images_file.readline().strip().split(';')
        images_data.append({'image_id': int(line[0]), 'class_id': line[1]})


df_images = pd.DataFrame(images_data)

edge_histogram_data = []
with open('EdgeHistogram.csv', 'r') as edge_histogram_file:
    num_images, num_dimensions = map(int, edge_histogram_file.readline().strip().split(';'))
    for _ in range(num_images):
        line = edge_histogram_file.readline().strip().split(';')
        image_id = int(line[0])
        feature_vector = list(map(int, line[1:]))
        edge_histogram_data.append({'image_id': image_id, 'feature_vector': feature_vector})

df_edge_histogram = pd.DataFrame(edge_histogram_data)


merged_dataset = pd.merge(df_images, df_edge_histogram, on='image_id', how='inner')


merged_dataset

Unnamed: 0,image_id,class_id,feature_vector
0,1,binocular,"[1, 1, 1, 2, 2, 2, 4, 4, 2, 2, 2, 2, 3, 3, 3, ..."
1,2,chair,"[2, 2, 1, 0, 3, 0, 4, 2, 2, 4, 0, 5, 0, 0, 4, ..."
2,3,tick,"[5, 1, 6, 2, 6, 3, 2, 7, 1, 6, 4, 1, 7, 0, 6, ..."
3,4,minaret,"[0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 2, 0, 1, ..."
4,5,Faces,"[1, 6, 4, 2, 2, 0, 6, 6, 5, 4, 1, 7, 1, 5, 3, ..."
...,...,...,...
9139,9140,cougar_face,"[5, 3, 1, 4, 1, 2, 5, 4, 4, 3, 2, 5, 4, 5, 3, ..."
9140,9141,accordion,"[0, 0, 5, 1, 2, 1, 0, 7, 3, 6, 3, 1, 3, 7, 4, ..."
9141,9142,Faces,"[3, 3, 2, 1, 2, 0, 5, 4, 3, 3, 1, 4, 2, 3, 4, ..."
9142,9143,grand_piano,"[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 3, 5, 1, 4, ..."


In [20]:
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.preprocessing import LabelEncoder

X = merged_dataset['feature_vector'].tolist()
y = merged_dataset['class_id']
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)
mlp_model = MLPClassifier(hidden_layer_sizes=(100,), activation='relu', solver='adam', random_state=42)
mlp_model.fit(X_train, y_train)
y_pred = mlp_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)
conf_matrix = confusion_matrix(y_test, y_pred)
conf_df = pd.DataFrame(conf_matrix, index=label_encoder.classes_, columns=label_encoder.classes_)
conf_df.loc['Accuracy'] = [None] * (len(label_encoder.classes_) - 1) + [accuracy]
print("Confusion Matrix with Accuracy:\n", conf_df)


Accuracy: 0.5265172225259704
Confusion Matrix with Accuracy:
                    BACKGROUND_Google  Faces  Leopards  Motorbikes  accordion  \
BACKGROUND_Google               29.0    1.0       3.0         2.0        0.0   
Faces                            4.0  175.0       0.0         0.0        0.0   
Leopards                         1.0    0.0      27.0         0.0        0.0   
Motorbikes                       0.0    0.0       0.0       169.0        0.0   
accordion                        0.0    0.0       0.0         0.0        9.0   
...                              ...    ...       ...         ...        ...   
wild_cat                         0.0    0.0       1.0         0.0        0.0   
windsor_chair                    0.0    0.0       0.0         0.0        0.0   
wrench                           0.0    0.0       0.0         0.0        0.0   
yin_yang                         1.0    0.0       0.0         0.0        0.0   
Accuracy                         NaN    NaN       NaN     



In [24]:
conf_df = conf_df.sort_index(axis=0, key=lambda x: x.str.lower())
conf_df = conf_df.sort_index(axis=1, key=lambda x: x.str.lower())

In [25]:
conf_df.to_csv('group110_result2.csv', sep=',', index=True, encoding='utf-8')

the parameter in csv

In [23]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder

X = merged_dataset['feature_vector'].tolist()
y = merged_dataset['class_id']
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

param_grid = {
    'hidden_layer_sizes': [(50,), (100,), (150,)],
    'activation': ['relu', 'tanh'],
    'solver': ['adam', 'lbfgs'],
    'alpha': [0.0001, 0.001, 0.01],
}

mlp_model = MLPClassifier(random_state=42)
grid_search = GridSearchCV(mlp_model, param_grid, cv=3, n_jobs=-1)
grid_search.fit(X_train, y_train)
best_mlp_model = grid_search.best_estimator_

y_pred = best_mlp_model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
print("Best Model Accuracy:", accuracy)

output_df = pd.DataFrame([
    ['classifier_name', 'MLPClassifier'],
    ['library', 'sklearn'],
    ['test_size', 0.2],
    ['hidden_layer_sizes', best_mlp_model.hidden_layer_sizes],
    ['activation', best_mlp_model.activation],
    ['solver', best_mlp_model.solver],

    ['alpha', best_mlp_model.alpha],
    ['accuracy', accuracy]
], columns=['Parameter', 'Value'])

output_df.to_csv('group110_parameters2.csv', index=True)


Best Model Accuracy: 0.5483870967741935


