In [21]:
import pandas as pd
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis, QuadraticDiscriminantAnalysis
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score
from algo_implementation import logistic_regression

# Load train and test datasets
train_df = pd.read_csv('train_dataset2.csv').drop(columns=['Unnamed: 0'])
test_df = pd.read_csv('test_dataset2.csv').drop(columns=['Unnamed: 0'])

# Extract features and target variable
X_train, y_train = train_df.iloc[:, :-1], train_df.iloc[:, -1]
X_test, y_test = test_df.iloc[:, :-1], test_df.iloc[:, -1]

# Train and evaluate logistic regression
lr = logistic_regression()
lr.fit(X_train, y_train)
lr_pred = lr.predict(X_test)
lr_acc = accuracy_score(y_test, lr_pred)
lr_precision = precision_score(y_test, lr_pred, average='macro')
lr_recall = recall_score(y_test, lr_pred, average='macro')

# Train and evaluate LDA
lda = LinearDiscriminantAnalysis()
lda.fit(X_train, y_train)
lda_pred = lda.predict(X_test)
lda_acc = accuracy_score(y_test, lda_pred)
lda_precision = precision_score(y_test, lda_pred, average='macro')
lda_recall = recall_score(y_test, lda_pred, average='macro')

# Train and evaluate QDA
qda = QuadraticDiscriminantAnalysis()
qda.fit(X_train, y_train)
qda_pred = qda.predict(X_test)
qda_acc = accuracy_score(y_test, qda_pred)
qda_precision = precision_score(y_test, qda_pred, average='macro')
qda_recall = recall_score(y_test, qda_pred, average='macro')

# Train and evaluate KNN
knn = KNeighborsClassifier()
knn.fit(X_train, y_train)
knn_pred = knn.predict(X_test)
knn_acc = accuracy_score(y_test, knn_pred)
knn_precision = precision_score(y_test, knn_pred, average='macro')
knn_recall = recall_score(y_test, knn_pred, average='macro')

# Print evaluation results
print('Logistic Regression:')
print(f'Accuracy: {lr_acc}')
print(f'Precision: {lr_precision}')
print(f'Recall: {lr_recall}')
print()
print('LDA:')
print(f'Accuracy: {lda_acc}')
print(f'Precision: {lda_precision}')
print(f'Recall: {lda_recall}')
print()
print('QDA:')
print(f'Accuracy: {qda_acc}')
print(f'Precision: {qda_precision}')
print(f'Recall: {qda_recall}')
print()
print('KNN:')
print(f'Accuracy: {knn_acc}')
print(f'Precision: {knn_precision}')
print(f'Recall: {knn_recall}')


Logistic Regression:
Accuracy: 0.8541666666666666
Precision: 0.8303030303030303
Recall: 0.8303030303030303

LDA:
Accuracy: 0.8229166666666666
Precision: 0.8180952380952381
Recall: 0.753030303030303

QDA:
Accuracy: 0.8333333333333334
Precision: 0.806060606060606
Recall: 0.806060606060606

KNN:
Accuracy: 0.875
Precision: 0.8500948766603416
Recall: 0.8727272727272728


# Interactions

In [22]:
from power_qcut import qcut_fold_validation, prepare_cols_comb

In [23]:
X_train.columns = X_train.columns.str.replace(' ', '_')
X_test.columns = X_test.columns.str.replace(' ', '_')

In [24]:
basic_cols = list(X_train.columns)

In [25]:
col1 = prepare_cols_comb(list(prepare_cols_comb(basic_cols)) + basic_cols)

In [26]:
data = pd.concat([X_train, y_train], axis=1)

In [27]:
import numpy as np
res1 = qcut_fold_validation(data, 4, 'Car', col1, np.mean, 4)

100%|██████████| 1046/1046 [00:00<00:00, 2791.74it/s]


In [28]:
res1.reset_index(drop=True, inplace=True)

In [34]:
X_train['Qcut_Var_1'] = X_train.eval(f'{res1.sort_values("mono").feature[0]}')
X_test['Qcut_Var_1'] = X_test.eval(f'{res1.sort_values("mono").feature[0]}')


In [35]:

# Train and evaluate logistic regression
lr = logistic_regression()
lr.fit(X_train, y_train)
lr_pred = lr.predict(X_test)
lr_acc = accuracy_score(y_test, lr_pred)
lr_precision = precision_score(y_test, lr_pred, average='macro')
lr_recall = recall_score(y_test, lr_pred, average='macro')

# Train and evaluate LDA
lda = LinearDiscriminantAnalysis()
lda.fit(X_train, y_train)
lda_pred = lda.predict(X_test)
lda_acc = accuracy_score(y_test, lda_pred)
lda_precision = precision_score(y_test, lda_pred, average='macro')
lda_recall = recall_score(y_test, lda_pred, average='macro')

# Train and evaluate QDA
qda = QuadraticDiscriminantAnalysis()
qda.fit(X_train, y_train)
qda_pred = qda.predict(X_test)
qda_acc = accuracy_score(y_test, qda_pred)
qda_precision = precision_score(y_test, qda_pred, average='macro')
qda_recall = recall_score(y_test, qda_pred, average='macro')

# Train and evaluate KNN
knn = KNeighborsClassifier()
knn.fit(X_train, y_train)
knn_pred = knn.predict(X_test)
knn_acc = accuracy_score(y_test, knn_pred)
knn_precision = precision_score(y_test, knn_pred, average='macro')
knn_recall = recall_score(y_test, knn_pred, average='macro')

# Print evaluation results
print('Logistic Regression:')
print(f'Accuracy: {lr_acc}')
print(f'Precision: {lr_precision}')
print(f'Recall: {lr_recall}')
print()
print('LDA:')
print(f'Accuracy: {lda_acc}')
print(f'Precision: {lda_precision}')
print(f'Recall: {lda_recall}')
print()
print('QDA:')
print(f'Accuracy: {qda_acc}')
print(f'Precision: {qda_precision}')
print(f'Recall: {qda_recall}')
print()
print('KNN:')
print(f'Accuracy: {knn_acc}')
print(f'Precision: {knn_precision}')
print(f'Recall: {knn_recall}')


Logistic Regression:
Accuracy: 0.3125
Precision: 0.15625
Recall: 0.5

LDA:
Accuracy: 0.8229166666666666
Precision: 0.8180952380952381
Recall: 0.753030303030303

QDA:
Accuracy: 0.7708333333333334
Precision: 0.7521367521367521
Recall: 0.6787878787878787

KNN:
Accuracy: 0.8854166666666666
Precision: 0.8621933621933622
Recall: 0.8803030303030304


  _warn_prf(average, modifier, msg_start, len(result))
