In [9]:
import numpy as np
import pandas as pd
from sklearn.model_selection import KFold,train_test_split,StratifiedKFold
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, confusion_matrix
from sklearn.utils import class_weight
from imblearn.over_sampling import RandomOverSampler
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.utils.class_weight import compute_class_weight
import warnings
from sklearn.exceptions import ConvergenceWarning
from sklearn.preprocessing import PolynomialFeatures

In [10]:
# Read & Adjust Data from CSV file

# We are trying here linear and 2nd, and 3rd order polynomial regression with weighting our data to compromise for the inbalance
# We also applied pca  (a trail to improve performance) on scaled data (as it affects pca severily), to decrease its huge dimensions. As, it's very suspecious to have usless/correlated features.

data = pd.read_csv("C:/Users/Mahmoud/Documents/GitHub/Machine-Learning-Classification-Project/Preprocessing/2017_Accidents_UK_Clean _Mahmoud.csv",dtype=float)
x = data.iloc[:, :-1] 
y = data.iloc[:, -1]  

scaler = StandardScaler()
x_scaled = scaler.fit_transform(x)

priciple_component_analayzer = PCA(n_components=10)  
x_pca = priciple_component_analayzer.fit_transform(x_scaled)
x=pd.DataFrame(x_pca)

class_labels = np.unique(y)
class_weights = compute_class_weight(class_weight='balanced', classes=class_labels, y=y)
class_weight_dictionary = {class_labels[i]: weight for i, weight in enumerate(class_weights)}

x_training, x_testing, y_training, y_testing = train_test_split(x, y, test_size=0.15, random_state=10)

x=x_training
y=y_training

In [11]:
# We try different parameters and apply K-folding on them. Our evaluation metrices will be accuracy, precision, and recall.
# We weren't able to apply ROC, as it's a multi-class output, and having 1v2 roc isn't the best metric. 
# We store the results to choose the best one later.
# This is the linear one.

penalty_options = ['l1',None, 'l2']
C_values = [0.001, 0.01, 0.02, 0.04, 0.08, 0.16, 0.32, 0.64, 1.28, 2.56, 5.12, 10.24]
solver_options = {
    'lbfgs': ['l2', None],
    'liblinear': ['l1', 'l2'],
    'newton-cg': ['l2', None],
    'newton-cholesky': ['l2', None],
    'sag': ['l2', None],
    'saga': ['elasticnet', 'l1', 'l2', None]
}
results = []
warnings.filterwarnings("ignore", category=ConvergenceWarning)

for penalty in penalty_options:
    for C in C_values:
        for solver in solver_options.keys():
            if penalty in solver_options[solver]:
                accuracy_scores = []
                precision_scores = []
                recall_scores = []

                kf = StratifiedKFold(n_splits=5, random_state=10, shuffle=True)

                for train_index, test_index in kf.split(x, y):
                    X_train_fold, X_cross = x.iloc[train_index], x.iloc[test_index]
                    y_train_fold, y_cross = y.iloc[train_index], y.iloc[test_index]
                    
                    logistic_model = LogisticRegression(penalty=penalty, C=C, solver=solver, max_iter=100, class_weight=class_weight_dictionary)
                    logistic_model.fit(X_train_fold, y_train_fold)
                    y_pred = logistic_model.predict(X_cross)

                    accuracy = accuracy_score(y_cross, y_pred)
                    precision = precision_score(y_cross, y_pred, average='macro', zero_division=0)
                    recall = recall_score(y_cross, y_pred, average='macro')

                    accuracy_scores.append(accuracy)
                    precision_scores.append(precision)
                    recall_scores.append(recall)

                this_result = {
                    'penalty': penalty,
                    'C': C,
                    'solver': solver,
                    'accuracy': np.mean(accuracy_scores),
                    'precision': np.mean(precision_scores),
                    'recall': np.mean(recall_scores)
                }
                results.append(this_result)
                print(this_result)


{'penalty': 'l1', 'C': 0.001, 'solver': 'liblinear', 'accuracy': 0.8015936303267054, 'precision': 0.2983228930426587, 'recall': 0.36711750906102913}
{'penalty': 'l1', 'C': 0.001, 'solver': 'saga', 'accuracy': 0.4784154785476565, 'precision': 0.35769917213063246, 'recall': 0.46073190018632104}
{'penalty': 'l1', 'C': 0.01, 'solver': 'liblinear', 'accuracy': 0.7912672837412329, 'precision': 0.2937283194707295, 'recall': 0.37979888869376566}
{'penalty': 'l1', 'C': 0.01, 'solver': 'saga', 'accuracy': 0.46236495178992215, 'precision': 0.3591229049410961, 'recall': 0.4633046014838961}
{'penalty': 'l1', 'C': 0.02, 'solver': 'liblinear', 'accuracy': 0.7906682395853484, 'precision': 0.29374377318512257, 'recall': 0.3809608752920423}
{'penalty': 'l1', 'C': 0.02, 'solver': 'saga', 'accuracy': 0.4616232827660429, 'precision': 0.35907867298044804, 'recall': 0.46300125453161484}
{'penalty': 'l1', 'C': 0.04, 'solver': 'liblinear', 'accuracy': 0.7903449503126436, 'precision': 0.2934405855772301, 'recal



{'penalty': None, 'C': 0.001, 'solver': 'lbfgs', 'accuracy': 0.4609196424291686, 'precision': 0.35917076623219557, 'recall': 0.46303519456109077}




{'penalty': None, 'C': 0.001, 'solver': 'newton-cg', 'accuracy': 0.46092915084411584, 'precision': 0.35917431071180406, 'recall': 0.4630390952495623}




{'penalty': None, 'C': 0.001, 'solver': 'newton-cholesky', 'accuracy': 0.46845996150133973, 'precision': 0.3595603533768036, 'recall': 0.46304935748796827}




{'penalty': None, 'C': 0.001, 'solver': 'sag', 'accuracy': 0.4609101340142214, 'precision': 0.3591808410819935, 'recall': 0.46304559343056495}




{'penalty': None, 'C': 0.001, 'solver': 'saga', 'accuracy': 0.4609196424291686, 'precision': 0.35917298488287813, 'recall': 0.463035194789308}




{'penalty': None, 'C': 0.01, 'solver': 'lbfgs', 'accuracy': 0.4609196424291686, 'precision': 0.35917076623219557, 'recall': 0.46303519456109077}




{'penalty': None, 'C': 0.01, 'solver': 'newton-cg', 'accuracy': 0.46092915084411584, 'precision': 0.35917431071180406, 'recall': 0.4630390952495623}




{'penalty': None, 'C': 0.01, 'solver': 'newton-cholesky', 'accuracy': 0.46845996150133973, 'precision': 0.3595603533768036, 'recall': 0.46304935748796827}




{'penalty': None, 'C': 0.01, 'solver': 'sag', 'accuracy': 0.46090062559927414, 'precision': 0.35916609965764623, 'recall': 0.4630273936405821}




{'penalty': None, 'C': 0.01, 'solver': 'saga', 'accuracy': 0.46093865971113435, 'precision': 0.3591871678431656, 'recall': 0.46305729549597957}




{'penalty': None, 'C': 0.02, 'solver': 'lbfgs', 'accuracy': 0.4609196424291686, 'precision': 0.35917076623219557, 'recall': 0.46303519456109077}




{'penalty': None, 'C': 0.02, 'solver': 'newton-cg', 'accuracy': 0.46092915084411584, 'precision': 0.35917431071180406, 'recall': 0.4630390952495623}




{'penalty': None, 'C': 0.02, 'solver': 'newton-cholesky', 'accuracy': 0.46845996150133973, 'precision': 0.3595603533768036, 'recall': 0.46304935748796827}




{'penalty': None, 'C': 0.02, 'solver': 'sag', 'accuracy': 0.4609291512961871, 'precision': 0.35917423510774216, 'recall': 0.4630390954777795}




{'penalty': None, 'C': 0.02, 'solver': 'saga', 'accuracy': 0.46092915084411584, 'precision': 0.3591838206214658, 'recall': 0.46305339480750796}




{'penalty': None, 'C': 0.04, 'solver': 'lbfgs', 'accuracy': 0.4609196424291686, 'precision': 0.35917076623219557, 'recall': 0.46303519456109077}




{'penalty': None, 'C': 0.04, 'solver': 'newton-cg', 'accuracy': 0.46092915084411584, 'precision': 0.35917431071180406, 'recall': 0.4630390952495623}




{'penalty': None, 'C': 0.04, 'solver': 'newton-cholesky', 'accuracy': 0.46845996150133973, 'precision': 0.3595603533768036, 'recall': 0.46304935748796827}




{'penalty': None, 'C': 0.04, 'solver': 'sag', 'accuracy': 0.4609101335621501, 'precision': 0.3591715163547037, 'recall': 0.46303129387261927}




{'penalty': None, 'C': 0.04, 'solver': 'saga', 'accuracy': 0.46093865971113435, 'precision': 0.35917765793350387, 'recall': 0.46304299593803383}




{'penalty': None, 'C': 0.08, 'solver': 'lbfgs', 'accuracy': 0.4609196424291686, 'precision': 0.35917076623219557, 'recall': 0.46303519456109077}




{'penalty': None, 'C': 0.08, 'solver': 'newton-cg', 'accuracy': 0.46092915084411584, 'precision': 0.35917431071180406, 'recall': 0.4630390952495623}




{'penalty': None, 'C': 0.08, 'solver': 'newton-cholesky', 'accuracy': 0.46845996150133973, 'precision': 0.3595603533768036, 'recall': 0.46304935748796827}




{'penalty': None, 'C': 0.08, 'solver': 'sag', 'accuracy': 0.4609386592590631, 'precision': 0.3591735581303811, 'recall': 0.46304299593803383}




{'penalty': None, 'C': 0.08, 'solver': 'saga', 'accuracy': 0.46091964197709734, 'precision': 0.3591688857597105, 'recall': 0.46303519456109077}




{'penalty': None, 'C': 0.16, 'solver': 'lbfgs', 'accuracy': 0.4609196424291686, 'precision': 0.35917076623219557, 'recall': 0.46303519456109077}




{'penalty': None, 'C': 0.16, 'solver': 'newton-cg', 'accuracy': 0.46092915084411584, 'precision': 0.35917431071180406, 'recall': 0.4630390952495623}




{'penalty': None, 'C': 0.16, 'solver': 'newton-cholesky', 'accuracy': 0.46845996150133973, 'precision': 0.3595603533768036, 'recall': 0.46304935748796827}




{'penalty': None, 'C': 0.16, 'solver': 'sag', 'accuracy': 0.46092915084411584, 'precision': 0.3591773349176438, 'recall': 0.46305339457929084}




{'penalty': None, 'C': 0.16, 'solver': 'saga', 'accuracy': 0.4609291512961871, 'precision': 0.359176332104578, 'recall': 0.4630390954777795}




{'penalty': None, 'C': 0.32, 'solver': 'lbfgs', 'accuracy': 0.4609196424291686, 'precision': 0.35917076623219557, 'recall': 0.46303519456109077}




{'penalty': None, 'C': 0.32, 'solver': 'newton-cg', 'accuracy': 0.46092915084411584, 'precision': 0.35917431071180406, 'recall': 0.4630390952495623}




{'penalty': None, 'C': 0.32, 'solver': 'newton-cholesky', 'accuracy': 0.46845996150133973, 'precision': 0.3595603533768036, 'recall': 0.46304935748796827}




{'penalty': None, 'C': 0.32, 'solver': 'sag', 'accuracy': 0.46091964197709734, 'precision': 0.35917502827944003, 'recall': 0.46303519456109077}




{'penalty': None, 'C': 0.32, 'solver': 'saga', 'accuracy': 0.46091964197709734, 'precision': 0.3591688857597105, 'recall': 0.46303519456109077}




{'penalty': None, 'C': 0.64, 'solver': 'lbfgs', 'accuracy': 0.4609196424291686, 'precision': 0.35917076623219557, 'recall': 0.46303519456109077}




{'penalty': None, 'C': 0.64, 'solver': 'newton-cg', 'accuracy': 0.46092915084411584, 'precision': 0.35917431071180406, 'recall': 0.4630390952495623}




{'penalty': None, 'C': 0.64, 'solver': 'newton-cholesky', 'accuracy': 0.46845996150133973, 'precision': 0.3595603533768036, 'recall': 0.46304935748796827}




{'penalty': None, 'C': 0.64, 'solver': 'sag', 'accuracy': 0.46092915084411584, 'precision': 0.3591722646267584, 'recall': 0.4630390952495623}




{'penalty': None, 'C': 0.64, 'solver': 'saga', 'accuracy': 0.46093865971113435, 'precision': 0.35917765793350387, 'recall': 0.46304299593803383}




{'penalty': None, 'C': 1.28, 'solver': 'lbfgs', 'accuracy': 0.4609196424291686, 'precision': 0.35917076623219557, 'recall': 0.46303519456109077}




{'penalty': None, 'C': 1.28, 'solver': 'newton-cg', 'accuracy': 0.46092915084411584, 'precision': 0.35917431071180406, 'recall': 0.4630390952495623}




{'penalty': None, 'C': 1.28, 'solver': 'newton-cholesky', 'accuracy': 0.46845996150133973, 'precision': 0.3595603533768036, 'recall': 0.46304935748796827}




{'penalty': None, 'C': 1.28, 'solver': 'sag', 'accuracy': 0.46091964197709734, 'precision': 0.35917839566937226, 'recall': 0.4630494941190365}




{'penalty': None, 'C': 1.28, 'solver': 'saga', 'accuracy': 0.4609196424291686, 'precision': 0.3591750006114599, 'recall': 0.463035194789308}




{'penalty': None, 'C': 2.56, 'solver': 'lbfgs', 'accuracy': 0.4609196424291686, 'precision': 0.35917076623219557, 'recall': 0.46303519456109077}




{'penalty': None, 'C': 2.56, 'solver': 'newton-cg', 'accuracy': 0.46092915084411584, 'precision': 0.35917431071180406, 'recall': 0.4630390952495623}




{'penalty': None, 'C': 2.56, 'solver': 'newton-cholesky', 'accuracy': 0.46845996150133973, 'precision': 0.3595603533768036, 'recall': 0.46304935748796827}




{'penalty': None, 'C': 2.56, 'solver': 'sag', 'accuracy': 0.4609386601632056, 'precision': 0.35917539609354765, 'recall': 0.46304299593803383}




{'penalty': None, 'C': 2.56, 'solver': 'saga', 'accuracy': 0.46091964197709734, 'precision': 0.3591782531944, 'recall': 0.4630494938908193}




{'penalty': None, 'C': 5.12, 'solver': 'lbfgs', 'accuracy': 0.4609196424291686, 'precision': 0.35917076623219557, 'recall': 0.46303519456109077}




{'penalty': None, 'C': 5.12, 'solver': 'newton-cg', 'accuracy': 0.46092915084411584, 'precision': 0.35917431071180406, 'recall': 0.4630390952495623}




{'penalty': None, 'C': 5.12, 'solver': 'newton-cholesky', 'accuracy': 0.46845996150133973, 'precision': 0.3595603533768036, 'recall': 0.46304935748796827}




{'penalty': None, 'C': 5.12, 'solver': 'sag', 'accuracy': 0.46093865971113435, 'precision': 0.3591797060996365, 'recall': 0.46304299593803383}




{'penalty': None, 'C': 5.12, 'solver': 'saga', 'accuracy': 0.4609101335621501, 'precision': 0.35917165338976004, 'recall': 0.46303129410083643}




{'penalty': None, 'C': 10.24, 'solver': 'lbfgs', 'accuracy': 0.4609196424291686, 'precision': 0.35917076623219557, 'recall': 0.46303519456109077}




{'penalty': None, 'C': 10.24, 'solver': 'newton-cg', 'accuracy': 0.46092915084411584, 'precision': 0.35917431071180406, 'recall': 0.4630390952495623}




{'penalty': None, 'C': 10.24, 'solver': 'newton-cholesky', 'accuracy': 0.46845996150133973, 'precision': 0.3595603533768036, 'recall': 0.46304935748796827}




{'penalty': None, 'C': 10.24, 'solver': 'sag', 'accuracy': 0.4608911167322557, 'precision': 0.3591647528301901, 'recall': 0.46302349318032787}




{'penalty': None, 'C': 10.24, 'solver': 'saga', 'accuracy': 0.4609196424291686, 'precision': 0.359175033166332, 'recall': 0.463035194789308}
{'penalty': 'l2', 'C': 0.001, 'solver': 'lbfgs', 'accuracy': 0.46114785071689984, 'precision': 0.3589276760943831, 'recall': 0.46243709850338666}
{'penalty': 'l2', 'C': 0.001, 'solver': 'liblinear', 'accuracy': 0.789260969309239, 'precision': 0.2930733458993787, 'recall': 0.38132291769425075}




{'penalty': 'l2', 'C': 0.001, 'solver': 'newton-cg', 'accuracy': 0.46113834184988134, 'precision': 0.3589242904296178, 'recall': 0.46243319781491515}
{'penalty': 'l2', 'C': 0.001, 'solver': 'newton-cholesky', 'accuracy': 0.4686691516029627, 'precision': 0.3595840240892864, 'recall': 0.46320033019560275}
{'penalty': 'l2', 'C': 0.001, 'solver': 'sag', 'accuracy': 0.46114785071689984, 'precision': 0.3589295601427981, 'recall': 0.4624370982751695}
{'penalty': 'l2', 'C': 0.001, 'solver': 'saga', 'accuracy': 0.46114785071689984, 'precision': 0.3589276760943831, 'recall': 0.46243709850338666}
{'penalty': 'l2', 'C': 0.01, 'solver': 'lbfgs', 'accuracy': 0.460986202690013, 'precision': 0.35918363567454825, 'recall': 0.46306249013053524}
{'penalty': 'l2', 'C': 0.01, 'solver': 'liblinear', 'accuracy': 0.7899836233115087, 'precision': 0.2932858095346454, 'recall': 0.38068002937356854}
{'penalty': 'l2', 'C': 0.01, 'solver': 'newton-cg', 'accuracy': 0.4610052195199074, 'precision': 0.3591905208590206



{'penalty': 'l2', 'C': 0.04, 'solver': 'newton-cg', 'accuracy': 0.46095767654102887, 'precision': 0.35918035029103657, 'recall': 0.463050797543194}
{'penalty': 'l2', 'C': 0.04, 'solver': 'newton-cholesky', 'accuracy': 0.4684979951611286, 'precision': 0.35957769863569056, 'recall': 0.4630792638541852}
{'penalty': 'l2', 'C': 0.04, 'solver': 'sag', 'accuracy': 0.4609386597111344, 'precision': 0.35917124460076705, 'recall': 0.4630429963944683}
{'penalty': 'l2', 'C': 0.04, 'solver': 'saga', 'accuracy': 0.4609386597111344, 'precision': 0.3591755244467245, 'recall': 0.4630429963944683}
{'penalty': 'l2', 'C': 0.08, 'solver': 'lbfgs', 'accuracy': 0.46094816812608164, 'precision': 0.35918221911003584, 'recall': 0.46306119618445096}
{'penalty': 'l2', 'C': 0.08, 'solver': 'liblinear', 'accuracy': 0.7900692008543188, 'precision': 0.29333645386900614, 'recall': 0.3807151348851606}
{'penalty': 'l2', 'C': 0.08, 'solver': 'newton-cg', 'accuracy': 0.46094816812608164, 'precision': 0.3591844393198288, 'r

In [12]:
# We favor precision and recall over accuracy ; however, we still take accuracy into consideration. 
# So we take the top 10 accuracy scores passing the average precision and recall values.
# After this we, take the top 5 of them based on precision. Then we take the one having the maximum recall.

precision_values = [result['precision'] for result in results]
all_models_average_precision_scores = np.mean(precision_values)

recall_values = [result['recall'] for result in results]
all_models_average_recall_scores = np.mean(recall_values)

filtered_results = []
for result in results:
    if result['precision'] >= all_models_average_precision_scores and result['recall'] >= all_models_average_recall_scores :
        filtered_results.append(result)
        
top_10_results = sorted(filtered_results, key=lambda x: x['accuracy'], reverse=True)[:10]

precision_values = [result['precision'] for result in top_10_results]
top_10_results_average_precision_scores = np.mean(precision_values)


filtered_results_2 = []
for result in top_10_results:
    if result['precision'] >= top_10_results_average_precision_scores:
        filtered_results_2.append(result)
top_5_results = sorted(filtered_results_2, key=lambda x: x['precision'], reverse=True)[:5]

best_model_1 = max(top_5_results, key=lambda x: x['recall'])

print(best_model_1)

{'penalty': 'l2', 'C': 0.001, 'solver': 'newton-cholesky', 'accuracy': 0.4686691516029627, 'precision': 0.3595840240892864, 'recall': 0.46320033019560275}


In [30]:
# We try different parameters and apply K-folding on them. Our evaluation metrices will be accuracy, precision, and recall.
# We weren't able to apply ROC, as it's a multi-class output, and having 1v2 roc isn't the best metric. 
# We store the results to choose the best one later.
# This is the 2nd order one.

penalty_options = ['l1', None, 'l2']
C_values = [0.001, 0.01, 0.02, 0.04, 0.08, 0.16, 0.32, 0.64, 1.28, 2.56, 5.12, 10.24]
solver_options = {
    'lbfgs': ['l2', None],
    'liblinear': ['l1', 'l2'],
    'newton-cg': ['l2', None],
    'newton-cholesky': ['l2', None],
    'sag': ['l2', None],
    'saga': ['elasticnet', 'l1', 'l2', None]
}
results = []
warnings.filterwarnings("ignore", category=ConvergenceWarning)

polynomial_transformer = PolynomialFeatures(degree=2)
x_poly = polynomial_transformer.fit_transform(x)  

for penalty in penalty_options:
    for C in C_values:
        for solver in solver_options.keys():
            if penalty in solver_options[solver]:
                accuracy_scores = []
                precision_scores = []
                recall_scores = []

                kf = StratifiedKFold(n_splits=5, random_state=10, shuffle=True)

                for train_index, test_index in kf.split(x_poly, y): 
                    X_train_fold, X_cross = x_poly[train_index], x_poly[test_index]
                    y_train_fold, y_cross = y.iloc[train_index], y.iloc[test_index]

                    logistic_model = LogisticRegression(penalty=penalty, C=C, solver=solver, max_iter=100, class_weight=class_weight_dictionary)
                    logistic_model.fit(X_train_fold, y_train_fold)
                    y_pred = logistic_model.predict(X_cross)

                    accuracy = accuracy_score(y_cross, y_pred)
                    precision = precision_score(y_cross, y_pred, average='macro', zero_division=0)
                    recall = recall_score(y_cross, y_pred, average='macro')

                    accuracy_scores.append(accuracy)
                    precision_scores.append(precision)
                    recall_scores.append(recall)

                this_result = {
                    'penalty': penalty,
                    'C': C,
                    'solver': solver,
                    'accuracy': np.mean(accuracy_scores),
                    'precision': np.mean(precision_scores),
                    'recall': np.mean(recall_scores)
                }
                results.append(this_result)
                print(this_result)


{'penalty': 'l1', 'C': 0.001, 'solver': 'liblinear', 'accuracy': 0.7973432892807514, 'precision': 0.29605781514496693, 'recall': 0.37383308995300896}
{'penalty': 'l1', 'C': 0.001, 'solver': 'saga', 'accuracy': 0.5017685421245498, 'precision': 0.3622976553671266, 'recall': 0.47054260355137956}
{'penalty': 'l1', 'C': 0.01, 'solver': 'liblinear', 'accuracy': 0.7805415936735447, 'precision': 0.4018916993142879, 'recall': 0.39498299272348214}
{'penalty': 'l1', 'C': 0.01, 'solver': 'saga', 'accuracy': 0.48538524389929716, 'precision': 0.3676976791693315, 'recall': 0.47533269372369924}
{'penalty': 'l1', 'C': 0.02, 'solver': 'liblinear', 'accuracy': 0.7786113470131997, 'precision': 0.40529932569883637, 'recall': 0.3961493337104839}
{'penalty': 'l1', 'C': 0.02, 'solver': 'saga', 'accuracy': 0.48389240014901896, 'precision': 0.3676030791818282, 'recall': 0.47510546030029904}
{'penalty': 'l1', 'C': 0.04, 'solver': 'liblinear', 'accuracy': 0.7774512995943595, 'precision': 0.3931293580621964, 'reca



{'penalty': None, 'C': 0.001, 'solver': 'lbfgs', 'accuracy': 0.4894168755789705, 'precision': 0.3680627209479357, 'recall': 0.4751293583069969}




{'penalty': None, 'C': 0.001, 'solver': 'newton-cg', 'accuracy': 0.48939785874907604, 'precision': 0.36807207786981144, 'recall': 0.4751215616690905}


Further options are to use another solver or to avoid such situation in the first place. Possible remedies are removing collinear features of X or increasing the penalization strengths.
The original Linear Algebra message was:
Ill-conditioned matrix (rcond=2.58881e-17): result may not be accurate.
Further options are to use another solver or to avoid such situation in the first place. Possible remedies are removing collinear features of X or increasing the penalization strengths.
The original Linear Algebra message was:
Ill-conditioned matrix (rcond=2.58881e-17): result may not be accurate.
Further options are to use another solver or to avoid such situation in the first place. Possible remedies are removing collinear features of X or increasing the penalization strengths.
The original Linear Algebra message was:
Ill-conditioned matrix (rcond=2.58881e-17): result may not be accurate.
Further options are to use another solver or to avoid such situation in the first place. Possible remed

Further options are to use another solver or to avoid such situation in the first place. Possible remedies are removing collinear features of X or increasing the penalization strengths.
The original Linear Algebra message was:
Ill-conditioned matrix (rcond=9.56875e-19): result may not be accurate.
Further options are to use another solver or to avoid such situation in the first place. Possible remedies are removing collinear features of X or increasing the penalization strengths.
The original Linear Algebra message was:
Ill-conditioned matrix (rcond=5.39276e-18): result may not be accurate.


{'penalty': None, 'C': 0.001, 'solver': 'newton-cholesky', 'accuracy': 0.49234552796026654, 'precision': 0.36757969674555147, 'recall': 0.4734515292023248}




{'penalty': None, 'C': 0.001, 'solver': 'sag', 'accuracy': 0.48529965641091916, 'precision': 0.36807423184019067, 'recall': 0.47448154942726006}




{'penalty': None, 'C': 0.001, 'solver': 'saga', 'accuracy': 0.48168639363271, 'precision': 0.3675239428490916, 'recall': 0.4747956153554001}




{'penalty': None, 'C': 0.01, 'solver': 'lbfgs', 'accuracy': 0.4894168755789705, 'precision': 0.3680627209479357, 'recall': 0.4751293583069969}




{'penalty': None, 'C': 0.01, 'solver': 'newton-cg', 'accuracy': 0.48939785874907604, 'precision': 0.36807207786981144, 'recall': 0.4751215616690905}


Further options are to use another solver or to avoid such situation in the first place. Possible remedies are removing collinear features of X or increasing the penalization strengths.
The original Linear Algebra message was:
Ill-conditioned matrix (rcond=2.58881e-17): result may not be accurate.
Further options are to use another solver or to avoid such situation in the first place. Possible remedies are removing collinear features of X or increasing the penalization strengths.
The original Linear Algebra message was:
Ill-conditioned matrix (rcond=2.58881e-17): result may not be accurate.
Further options are to use another solver or to avoid such situation in the first place. Possible remedies are removing collinear features of X or increasing the penalization strengths.
The original Linear Algebra message was:
Ill-conditioned matrix (rcond=2.58881e-17): result may not be accurate.
Further options are to use another solver or to avoid such situation in the first place. Possible remed

Further options are to use another solver or to avoid such situation in the first place. Possible remedies are removing collinear features of X or increasing the penalization strengths.
The original Linear Algebra message was:
Ill-conditioned matrix (rcond=9.56875e-19): result may not be accurate.
Further options are to use another solver or to avoid such situation in the first place. Possible remedies are removing collinear features of X or increasing the penalization strengths.
The original Linear Algebra message was:
Ill-conditioned matrix (rcond=5.39276e-18): result may not be accurate.


{'penalty': None, 'C': 0.01, 'solver': 'newton-cholesky', 'accuracy': 0.49234552796026654, 'precision': 0.36757969674555147, 'recall': 0.4734515292023248}




{'penalty': None, 'C': 0.01, 'solver': 'sag', 'accuracy': 0.4852901470918294, 'precision': 0.36805031579449105, 'recall': 0.47458400744784257}




{'penalty': None, 'C': 0.01, 'solver': 'saga', 'accuracy': 0.4817434459306784, 'precision': 0.36753821230266315, 'recall': 0.47491107412651806}




{'penalty': None, 'C': 0.02, 'solver': 'lbfgs', 'accuracy': 0.4894168755789705, 'precision': 0.3680627209479357, 'recall': 0.4751293583069969}




{'penalty': None, 'C': 0.02, 'solver': 'newton-cg', 'accuracy': 0.48939785874907604, 'precision': 0.36807207786981144, 'recall': 0.4751215616690905}


Further options are to use another solver or to avoid such situation in the first place. Possible remedies are removing collinear features of X or increasing the penalization strengths.
The original Linear Algebra message was:
Ill-conditioned matrix (rcond=2.58881e-17): result may not be accurate.
Further options are to use another solver or to avoid such situation in the first place. Possible remedies are removing collinear features of X or increasing the penalization strengths.
The original Linear Algebra message was:
Ill-conditioned matrix (rcond=2.58881e-17): result may not be accurate.
Further options are to use another solver or to avoid such situation in the first place. Possible remedies are removing collinear features of X or increasing the penalization strengths.
The original Linear Algebra message was:
Ill-conditioned matrix (rcond=2.58881e-17): result may not be accurate.
Further options are to use another solver or to avoid such situation in the first place. Possible remed

Further options are to use another solver or to avoid such situation in the first place. Possible remedies are removing collinear features of X or increasing the penalization strengths.
The original Linear Algebra message was:
Ill-conditioned matrix (rcond=9.56875e-19): result may not be accurate.
Further options are to use another solver or to avoid such situation in the first place. Possible remedies are removing collinear features of X or increasing the penalization strengths.
The original Linear Algebra message was:
Ill-conditioned matrix (rcond=5.39276e-18): result may not be accurate.


{'penalty': None, 'C': 0.02, 'solver': 'newton-cholesky', 'accuracy': 0.49234552796026654, 'precision': 0.36757969674555147, 'recall': 0.4734515292023248}




{'penalty': None, 'C': 0.02, 'solver': 'sag', 'accuracy': 0.4852426032088082, 'precision': 0.3680227750242787, 'recall': 0.4746217065198701}




{'penalty': None, 'C': 0.02, 'solver': 'saga', 'accuracy': 0.4818195173188977, 'precision': 0.3675614497255676, 'recall': 0.4747930227078334}




{'penalty': None, 'C': 0.04, 'solver': 'lbfgs', 'accuracy': 0.4894168755789705, 'precision': 0.3680627209479357, 'recall': 0.4751293583069969}




{'penalty': None, 'C': 0.04, 'solver': 'newton-cg', 'accuracy': 0.48939785874907604, 'precision': 0.36807207786981144, 'recall': 0.4751215616690905}


Further options are to use another solver or to avoid such situation in the first place. Possible remedies are removing collinear features of X or increasing the penalization strengths.
The original Linear Algebra message was:
Ill-conditioned matrix (rcond=2.58881e-17): result may not be accurate.
Further options are to use another solver or to avoid such situation in the first place. Possible remedies are removing collinear features of X or increasing the penalization strengths.
The original Linear Algebra message was:
Ill-conditioned matrix (rcond=2.58881e-17): result may not be accurate.
Further options are to use another solver or to avoid such situation in the first place. Possible remedies are removing collinear features of X or increasing the penalization strengths.
The original Linear Algebra message was:
Ill-conditioned matrix (rcond=2.58881e-17): result may not be accurate.
Further options are to use another solver or to avoid such situation in the first place. Possible remed

Further options are to use another solver or to avoid such situation in the first place. Possible remedies are removing collinear features of X or increasing the penalization strengths.
The original Linear Algebra message was:
Ill-conditioned matrix (rcond=9.56875e-19): result may not be accurate.
Further options are to use another solver or to avoid such situation in the first place. Possible remedies are removing collinear features of X or increasing the penalization strengths.
The original Linear Algebra message was:
Ill-conditioned matrix (rcond=5.39276e-18): result may not be accurate.


{'penalty': None, 'C': 0.04, 'solver': 'newton-cholesky', 'accuracy': 0.49234552796026654, 'precision': 0.36757969674555147, 'recall': 0.4734515292023248}




{'penalty': None, 'C': 0.04, 'solver': 'sag', 'accuracy': 0.4855659042353658, 'precision': 0.3678669894994772, 'recall': 0.4746810460778953}




{'penalty': None, 'C': 0.04, 'solver': 'saga', 'accuracy': 0.48178148320703756, 'precision': 0.36762814833808394, 'recall': 0.47497761102658076}




{'penalty': None, 'C': 0.08, 'solver': 'lbfgs', 'accuracy': 0.4894168755789705, 'precision': 0.3680627209479357, 'recall': 0.4751293583069969}




{'penalty': None, 'C': 0.08, 'solver': 'newton-cg', 'accuracy': 0.48939785874907604, 'precision': 0.36807207786981144, 'recall': 0.4751215616690905}


Further options are to use another solver or to avoid such situation in the first place. Possible remedies are removing collinear features of X or increasing the penalization strengths.
The original Linear Algebra message was:
Ill-conditioned matrix (rcond=2.58881e-17): result may not be accurate.
Further options are to use another solver or to avoid such situation in the first place. Possible remedies are removing collinear features of X or increasing the penalization strengths.
The original Linear Algebra message was:
Ill-conditioned matrix (rcond=2.58881e-17): result may not be accurate.
Further options are to use another solver or to avoid such situation in the first place. Possible remedies are removing collinear features of X or increasing the penalization strengths.
The original Linear Algebra message was:
Ill-conditioned matrix (rcond=2.58881e-17): result may not be accurate.
Further options are to use another solver or to avoid such situation in the first place. Possible remed

Further options are to use another solver or to avoid such situation in the first place. Possible remedies are removing collinear features of X or increasing the penalization strengths.
The original Linear Algebra message was:
Ill-conditioned matrix (rcond=9.56875e-19): result may not be accurate.
Further options are to use another solver or to avoid such situation in the first place. Possible remedies are removing collinear features of X or increasing the penalization strengths.
The original Linear Algebra message was:
Ill-conditioned matrix (rcond=5.39276e-18): result may not be accurate.


{'penalty': None, 'C': 0.08, 'solver': 'newton-cholesky', 'accuracy': 0.49234552796026654, 'precision': 0.36757969674555147, 'recall': 0.4734515292023248}




{'penalty': None, 'C': 0.08, 'solver': 'sag', 'accuracy': 0.4853281821078321, 'precision': 0.3680413248650495, 'recall': 0.4744217543875977}




{'penalty': None, 'C': 0.08, 'solver': 'saga', 'accuracy': 0.481610320888277, 'precision': 0.36764349111321054, 'recall': 0.4751075890220754}




{'penalty': None, 'C': 0.16, 'solver': 'lbfgs', 'accuracy': 0.4894168755789705, 'precision': 0.3680627209479357, 'recall': 0.4751293583069969}




{'penalty': None, 'C': 0.16, 'solver': 'newton-cg', 'accuracy': 0.48939785874907604, 'precision': 0.36807207786981144, 'recall': 0.4751215616690905}


Further options are to use another solver or to avoid such situation in the first place. Possible remedies are removing collinear features of X or increasing the penalization strengths.
The original Linear Algebra message was:
Ill-conditioned matrix (rcond=2.58881e-17): result may not be accurate.
Further options are to use another solver or to avoid such situation in the first place. Possible remedies are removing collinear features of X or increasing the penalization strengths.
The original Linear Algebra message was:
Ill-conditioned matrix (rcond=2.58881e-17): result may not be accurate.
Further options are to use another solver or to avoid such situation in the first place. Possible remedies are removing collinear features of X or increasing the penalization strengths.
The original Linear Algebra message was:
Ill-conditioned matrix (rcond=2.58881e-17): result may not be accurate.
Further options are to use another solver or to avoid such situation in the first place. Possible remed

Further options are to use another solver or to avoid such situation in the first place. Possible remedies are removing collinear features of X or increasing the penalization strengths.
The original Linear Algebra message was:
Ill-conditioned matrix (rcond=9.56875e-19): result may not be accurate.
Further options are to use another solver or to avoid such situation in the first place. Possible remedies are removing collinear features of X or increasing the penalization strengths.
The original Linear Algebra message was:
Ill-conditioned matrix (rcond=5.39276e-18): result may not be accurate.


{'penalty': None, 'C': 0.16, 'solver': 'newton-cholesky', 'accuracy': 0.49234552796026654, 'precision': 0.36757969674555147, 'recall': 0.4734515292023248}




{'penalty': None, 'C': 0.16, 'solver': 'sag', 'accuracy': 0.4851760420438212, 'precision': 0.36797966087882206, 'recall': 0.4744800018672704}




{'penalty': None, 'C': 0.16, 'solver': 'saga', 'accuracy': 0.4817529575101246, 'precision': 0.36757190407206164, 'recall': 0.4748801184064835}




{'penalty': None, 'C': 0.32, 'solver': 'lbfgs', 'accuracy': 0.4894168755789705, 'precision': 0.3680627209479357, 'recall': 0.4751293583069969}




{'penalty': None, 'C': 0.32, 'solver': 'newton-cg', 'accuracy': 0.48939785874907604, 'precision': 0.36807207786981144, 'recall': 0.4751215616690905}


Further options are to use another solver or to avoid such situation in the first place. Possible remedies are removing collinear features of X or increasing the penalization strengths.
The original Linear Algebra message was:
Ill-conditioned matrix (rcond=2.58881e-17): result may not be accurate.
Further options are to use another solver or to avoid such situation in the first place. Possible remedies are removing collinear features of X or increasing the penalization strengths.
The original Linear Algebra message was:
Ill-conditioned matrix (rcond=2.58881e-17): result may not be accurate.
Further options are to use another solver or to avoid such situation in the first place. Possible remedies are removing collinear features of X or increasing the penalization strengths.
The original Linear Algebra message was:
Ill-conditioned matrix (rcond=2.58881e-17): result may not be accurate.
Further options are to use another solver or to avoid such situation in the first place. Possible remed

Further options are to use another solver or to avoid such situation in the first place. Possible remedies are removing collinear features of X or increasing the penalization strengths.
The original Linear Algebra message was:
Ill-conditioned matrix (rcond=9.56875e-19): result may not be accurate.
Further options are to use another solver or to avoid such situation in the first place. Possible remedies are removing collinear features of X or increasing the penalization strengths.
The original Linear Algebra message was:
Ill-conditioned matrix (rcond=5.39276e-18): result may not be accurate.


{'penalty': None, 'C': 0.32, 'solver': 'newton-cholesky', 'accuracy': 0.49234552796026654, 'precision': 0.36757969674555147, 'recall': 0.4734515292023248}




{'penalty': None, 'C': 0.32, 'solver': 'sag', 'accuracy': 0.4851760411396787, 'precision': 0.3679690394034532, 'recall': 0.4744800018672704}




{'penalty': None, 'C': 0.32, 'solver': 'saga', 'accuracy': 0.48158179790379163, 'precision': 0.3675525310548376, 'recall': 0.4749814981989555}




{'penalty': None, 'C': 0.64, 'solver': 'lbfgs', 'accuracy': 0.4894168755789705, 'precision': 0.3680627209479357, 'recall': 0.4751293583069969}




{'penalty': None, 'C': 0.64, 'solver': 'newton-cg', 'accuracy': 0.48939785874907604, 'precision': 0.36807207786981144, 'recall': 0.4751215616690905}


Further options are to use another solver or to avoid such situation in the first place. Possible remedies are removing collinear features of X or increasing the penalization strengths.
The original Linear Algebra message was:
Ill-conditioned matrix (rcond=2.58881e-17): result may not be accurate.
Further options are to use another solver or to avoid such situation in the first place. Possible remedies are removing collinear features of X or increasing the penalization strengths.
The original Linear Algebra message was:
Ill-conditioned matrix (rcond=2.58881e-17): result may not be accurate.
Further options are to use another solver or to avoid such situation in the first place. Possible remedies are removing collinear features of X or increasing the penalization strengths.
The original Linear Algebra message was:
Ill-conditioned matrix (rcond=2.58881e-17): result may not be accurate.
Further options are to use another solver or to avoid such situation in the first place. Possible remed

Further options are to use another solver or to avoid such situation in the first place. Possible remedies are removing collinear features of X or increasing the penalization strengths.
The original Linear Algebra message was:
Ill-conditioned matrix (rcond=9.56875e-19): result may not be accurate.
Further options are to use another solver or to avoid such situation in the first place. Possible remedies are removing collinear features of X or increasing the penalization strengths.
The original Linear Algebra message was:
Ill-conditioned matrix (rcond=5.39276e-18): result may not be accurate.


{'penalty': None, 'C': 0.64, 'solver': 'newton-cholesky', 'accuracy': 0.49234552796026654, 'precision': 0.36757969674555147, 'recall': 0.4734515292023248}




{'penalty': None, 'C': 0.64, 'solver': 'sag', 'accuracy': 0.48524260366087946, 'precision': 0.3680200436018496, 'recall': 0.47440095248833647}




{'penalty': None, 'C': 0.64, 'solver': 'saga', 'accuracy': 0.48166737589867303, 'precision': 0.36753048450288067, 'recall': 0.47483070722860593}




{'penalty': None, 'C': 1.28, 'solver': 'lbfgs', 'accuracy': 0.4894168755789705, 'precision': 0.3680627209479357, 'recall': 0.4751293583069969}




{'penalty': None, 'C': 1.28, 'solver': 'newton-cg', 'accuracy': 0.48939785874907604, 'precision': 0.36807207786981144, 'recall': 0.4751215616690905}


Further options are to use another solver or to avoid such situation in the first place. Possible remedies are removing collinear features of X or increasing the penalization strengths.
The original Linear Algebra message was:
Ill-conditioned matrix (rcond=2.58881e-17): result may not be accurate.
Further options are to use another solver or to avoid such situation in the first place. Possible remedies are removing collinear features of X or increasing the penalization strengths.
The original Linear Algebra message was:
Ill-conditioned matrix (rcond=2.58881e-17): result may not be accurate.
Further options are to use another solver or to avoid such situation in the first place. Possible remedies are removing collinear features of X or increasing the penalization strengths.
The original Linear Algebra message was:
Ill-conditioned matrix (rcond=2.58881e-17): result may not be accurate.
Further options are to use another solver or to avoid such situation in the first place. Possible remed

Further options are to use another solver or to avoid such situation in the first place. Possible remedies are removing collinear features of X or increasing the penalization strengths.
The original Linear Algebra message was:
Ill-conditioned matrix (rcond=9.56875e-19): result may not be accurate.
Further options are to use another solver or to avoid such situation in the first place. Possible remedies are removing collinear features of X or increasing the penalization strengths.
The original Linear Algebra message was:
Ill-conditioned matrix (rcond=5.39276e-18): result may not be accurate.


{'penalty': None, 'C': 1.28, 'solver': 'newton-cholesky', 'accuracy': 0.49234552796026654, 'precision': 0.36757969674555147, 'recall': 0.4734515292023248}




{'penalty': None, 'C': 1.28, 'solver': 'sag', 'accuracy': 0.4852235850226999, 'precision': 0.3679690832505128, 'recall': 0.4742501575858912}




{'penalty': None, 'C': 1.28, 'solver': 'saga', 'accuracy': 0.48242808616429517, 'precision': 0.3676441254352833, 'recall': 0.47503284500917575}




{'penalty': None, 'C': 2.56, 'solver': 'lbfgs', 'accuracy': 0.4894168755789705, 'precision': 0.3680627209479357, 'recall': 0.4751293583069969}




{'penalty': None, 'C': 2.56, 'solver': 'newton-cg', 'accuracy': 0.48939785874907604, 'precision': 0.36807207786981144, 'recall': 0.4751215616690905}


Further options are to use another solver or to avoid such situation in the first place. Possible remedies are removing collinear features of X or increasing the penalization strengths.
The original Linear Algebra message was:
Ill-conditioned matrix (rcond=2.58881e-17): result may not be accurate.
Further options are to use another solver or to avoid such situation in the first place. Possible remedies are removing collinear features of X or increasing the penalization strengths.
The original Linear Algebra message was:
Ill-conditioned matrix (rcond=2.58881e-17): result may not be accurate.
Further options are to use another solver or to avoid such situation in the first place. Possible remedies are removing collinear features of X or increasing the penalization strengths.
The original Linear Algebra message was:
Ill-conditioned matrix (rcond=2.58881e-17): result may not be accurate.
Further options are to use another solver or to avoid such situation in the first place. Possible remed

Further options are to use another solver or to avoid such situation in the first place. Possible remedies are removing collinear features of X or increasing the penalization strengths.
The original Linear Algebra message was:
Ill-conditioned matrix (rcond=9.56875e-19): result may not be accurate.
Further options are to use another solver or to avoid such situation in the first place. Possible remedies are removing collinear features of X or increasing the penalization strengths.
The original Linear Algebra message was:
Ill-conditioned matrix (rcond=5.39276e-18): result may not be accurate.


{'penalty': None, 'C': 2.56, 'solver': 'newton-cholesky', 'accuracy': 0.49234552796026654, 'precision': 0.36757969674555147, 'recall': 0.4734515292023248}




{'penalty': None, 'C': 2.56, 'solver': 'sag', 'accuracy': 0.4852901470918294, 'precision': 0.3680671418569056, 'recall': 0.47465550455291944}




{'penalty': None, 'C': 2.56, 'solver': 'saga', 'accuracy': 0.481705411366747, 'precision': 0.3676056779171072, 'recall': 0.4749178070875689}




{'penalty': None, 'C': 5.12, 'solver': 'lbfgs', 'accuracy': 0.4894168755789705, 'precision': 0.3680627209479357, 'recall': 0.4751293583069969}




{'penalty': None, 'C': 5.12, 'solver': 'newton-cg', 'accuracy': 0.48939785874907604, 'precision': 0.36807207786981144, 'recall': 0.4751215616690905}


Further options are to use another solver or to avoid such situation in the first place. Possible remedies are removing collinear features of X or increasing the penalization strengths.
The original Linear Algebra message was:
Ill-conditioned matrix (rcond=2.58881e-17): result may not be accurate.
Further options are to use another solver or to avoid such situation in the first place. Possible remedies are removing collinear features of X or increasing the penalization strengths.
The original Linear Algebra message was:
Ill-conditioned matrix (rcond=2.58881e-17): result may not be accurate.
Further options are to use another solver or to avoid such situation in the first place. Possible remedies are removing collinear features of X or increasing the penalization strengths.
The original Linear Algebra message was:
Ill-conditioned matrix (rcond=2.58881e-17): result may not be accurate.
Further options are to use another solver or to avoid such situation in the first place. Possible remed

Further options are to use another solver or to avoid such situation in the first place. Possible remedies are removing collinear features of X or increasing the penalization strengths.
The original Linear Algebra message was:
Ill-conditioned matrix (rcond=9.56875e-19): result may not be accurate.
Further options are to use another solver or to avoid such situation in the first place. Possible remedies are removing collinear features of X or increasing the penalization strengths.
The original Linear Algebra message was:
Ill-conditioned matrix (rcond=5.39276e-18): result may not be accurate.


{'penalty': None, 'C': 5.12, 'solver': 'newton-cholesky', 'accuracy': 0.49234552796026654, 'precision': 0.36757969674555147, 'recall': 0.4734515292023248}




{'penalty': None, 'C': 5.12, 'solver': 'sag', 'accuracy': 0.4854708160172521, 'precision': 0.3678598749554666, 'recall': 0.47434979890394596}




{'penalty': None, 'C': 5.12, 'solver': 'saga', 'accuracy': 0.48172442910078395, 'precision': 0.3675635583692968, 'recall': 0.47492561297533137}




{'penalty': None, 'C': 10.24, 'solver': 'lbfgs', 'accuracy': 0.4894168755789705, 'precision': 0.3680627209479357, 'recall': 0.4751293583069969}




{'penalty': None, 'C': 10.24, 'solver': 'newton-cg', 'accuracy': 0.48939785874907604, 'precision': 0.36807207786981144, 'recall': 0.4751215616690905}


Further options are to use another solver or to avoid such situation in the first place. Possible remedies are removing collinear features of X or increasing the penalization strengths.
The original Linear Algebra message was:
Ill-conditioned matrix (rcond=2.58881e-17): result may not be accurate.
Further options are to use another solver or to avoid such situation in the first place. Possible remedies are removing collinear features of X or increasing the penalization strengths.
The original Linear Algebra message was:
Ill-conditioned matrix (rcond=2.58881e-17): result may not be accurate.
Further options are to use another solver or to avoid such situation in the first place. Possible remedies are removing collinear features of X or increasing the penalization strengths.
The original Linear Algebra message was:
Ill-conditioned matrix (rcond=2.58881e-17): result may not be accurate.
Further options are to use another solver or to avoid such situation in the first place. Possible remed

Further options are to use another solver or to avoid such situation in the first place. Possible remedies are removing collinear features of X or increasing the penalization strengths.
The original Linear Algebra message was:
Ill-conditioned matrix (rcond=9.56875e-19): result may not be accurate.
Further options are to use another solver or to avoid such situation in the first place. Possible remedies are removing collinear features of X or increasing the penalization strengths.
The original Linear Algebra message was:
Ill-conditioned matrix (rcond=5.39276e-18): result may not be accurate.


{'penalty': None, 'C': 10.24, 'solver': 'newton-cholesky', 'accuracy': 0.49234552796026654, 'precision': 0.36757969674555147, 'recall': 0.4734515292023248}




{'penalty': None, 'C': 10.24, 'solver': 'sag', 'accuracy': 0.4848622480759971, 'precision': 0.3679711857903024, 'recall': 0.47418146896864394}




{'penalty': None, 'C': 10.24, 'solver': 'saga', 'accuracy': 0.4818195191271828, 'precision': 0.36766218084451296, 'recall': 0.47493601668977004}
{'penalty': 'l2', 'C': 0.001, 'solver': 'lbfgs', 'accuracy': 0.49068149793145077, 'precision': 0.3678939901936189, 'recall': 0.4743085381187305}
{'penalty': 'l2', 'C': 0.001, 'solver': 'liblinear', 'accuracy': 0.7799045167620138, 'precision': 0.39987036763918204, 'recall': 0.39581825885732014}
{'penalty': 'l2', 'C': 0.001, 'solver': 'newton-cg', 'accuracy': 0.49106184854354906, 'precision': 0.3679348655278575, 'recall': 0.47411513729929916}
{'penalty': 'l2', 'C': 0.001, 'solver': 'newton-cholesky', 'accuracy': 0.49530269835326396, 'precision': 0.3678516318143169, 'recall': 0.4738183036386916}
{'penalty': 'l2', 'C': 0.001, 'solver': 'sag', 'accuracy': 0.48666888941066955, 'precision': 0.3677855376411139, 'recall': 0.4746866179737196}
{'penalty': 'l2', 'C': 0.001, 'solver': 'saga', 'accuracy': 0.48309365938810683, 'precision': 0.3679006405840396



{'penalty': 'l2', 'C': 0.01, 'solver': 'newton-cg', 'accuracy': 0.4896450802501316, 'precision': 0.3679818506469942, 'recall': 0.4748878446606638}
{'penalty': 'l2', 'C': 0.01, 'solver': 'newton-cholesky', 'accuracy': 0.492792431600069, 'precision': 0.36765651611957856, 'recall': 0.47407549950810396}
{'penalty': 'l2', 'C': 0.01, 'solver': 'sag', 'accuracy': 0.48515702476185557, 'precision': 0.3678883492810827, 'recall': 0.4740458555358808}
{'penalty': 'l2', 'C': 0.01, 'solver': 'saga', 'accuracy': 0.48198116489371323, 'precision': 0.3676927539787174, 'recall': 0.47567888302512384}
{'penalty': 'l2', 'C': 0.02, 'solver': 'lbfgs', 'accuracy': 0.48931227713762454, 'precision': 0.36792303547088434, 'recall': 0.474972055867765}
{'penalty': 'l2', 'C': 0.02, 'solver': 'liblinear', 'accuracy': 0.7768237320019183, 'precision': 0.3922692924834318, 'recall': 0.39795236419972113}
{'penalty': 'l2', 'C': 0.02, 'solver': 'newton-cg', 'accuracy': 0.48947392471244006, 'precision': 0.36797463873420927, 'r



{'penalty': 'l2', 'C': 0.32, 'solver': 'newton-cg', 'accuracy': 0.48942638399391775, 'precision': 0.368037097586068, 'recall': 0.4751046648468309}
{'penalty': 'l2', 'C': 0.32, 'solver': 'newton-cholesky', 'accuracy': 0.4924120868648972, 'precision': 0.36769903520917796, 'recall': 0.47404902566130713}
{'penalty': 'l2', 'C': 0.32, 'solver': 'sag', 'accuracy': 0.4852140757036102, 'precision': 0.3679886584413665, 'recall': 0.47450990372266777}
{'penalty': 'l2', 'C': 0.32, 'solver': 'saga', 'accuracy': 0.4817719738879477, 'precision': 0.3676037844695905, 'recall': 0.4751658605147148}
{'penalty': 'l2', 'C': 0.64, 'solver': 'lbfgs', 'accuracy': 0.4893883498820576, 'precision': 0.3680940017211366, 'recall': 0.47516055423076786}
{'penalty': 'l2', 'C': 0.64, 'solver': 'liblinear', 'accuracy': 0.7764053531548862, 'precision': 0.3909426695280907, 'recall': 0.39808727084085516}
{'penalty': 'l2', 'C': 0.64, 'solver': 'newton-cg', 'accuracy': 0.48942638399391775, 'precision': 0.36807106463205413, 're



{'penalty': 'l2', 'C': 1.28, 'solver': 'newton-cg', 'accuracy': 0.48943589286093625, 'precision': 0.36807699462803745, 'recall': 0.47513716419475943}
{'penalty': 'l2', 'C': 1.28, 'solver': 'newton-cholesky', 'accuracy': 0.4923930695829314, 'precision': 0.3676756583253392, 'recall': 0.4738061758903559}
{'penalty': 'l2', 'C': 1.28, 'solver': 'sag', 'accuracy': 0.4852235854747712, 'precision': 0.3680027760234543, 'recall': 0.4745280994582656}
{'penalty': 'l2', 'C': 1.28, 'solver': 'saga', 'accuracy': 0.48187657232929376, 'precision': 0.3676234973638748, 'recall': 0.475094377960893}
{'penalty': 'l2', 'C': 2.56, 'solver': 'lbfgs', 'accuracy': 0.48939785739286223, 'precision': 0.3680557866157221, 'recall': 0.47512156189730775}
{'penalty': 'l2', 'C': 2.56, 'solver': 'liblinear', 'accuracy': 0.7763863363249918, 'precision': 0.39093527698293873, 'recall': 0.3980794696921293}
{'penalty': 'l2', 'C': 2.56, 'solver': 'newton-cg', 'accuracy': 0.48943589286093625, 'precision': 0.36807699462803745, 'r

In [31]:
# We favor precision and recall over accuracy ; however, we still take accuracy into consideration. 
# So we take the top 10 accuracy scores passing the average precision and recall values.
# After this we, take the top 5 of them based on precision. Then we take the one having the maximum recall.

precision_values = [result['precision'] for result in results]

all_models_average_precision_scores = np.mean(precision_values)

recall_values = [result['recall'] for result in results]
all_models_average_recall_scores = np.mean(recall_values)

filtered_results = []
for result in results:
    if result['precision'] >= all_models_average_precision_scores or result['recall'] >= all_models_average_recall_scores:
        filtered_results.append(result)     
top_10_results = sorted(filtered_results, key=lambda x: x['accuracy'], reverse=True)[:10]

precision_values = [result['precision'] for result in top_10_results]
top_10_results_average_precision_scores = np.mean(precision_values)


filtered_results_2 = []
for result in top_10_results:
    if result['precision'] >= top_10_results_average_precision_scores:
        filtered_results_2.append(result)
top_5_results = sorted(filtered_results_2, key=lambda x: x['precision'], reverse=True)[:5]

best_model_2 = max(top_5_results, key=lambda x: x['recall'])

print(best_model_2)

{'penalty': 'l1', 'C': 0.02, 'solver': 'liblinear', 'accuracy': 0.7786113470131997, 'precision': 0.40529932569883637, 'recall': 0.3961493337104839}


In [32]:
# We try different parameters and apply K-folding on them. Our evaluation metrices will be accuracy, precision, and recall.
# We weren't able to apply ROC, as it's a multi-class output, and having 1v2 roc isn't the best metric. 
# We store the results to choose the best one later.
# This is the 3rd order one.

penalty_options = ['l1', None, 'l2']
C_values = [0.001, 0.01, 0.02, 0.04, 0.08, 0.16, 0.32, 0.64, 1.28, 2.56, 5.12, 10.24]
solver_options = {
    'lbfgs': ['l2', None],
    'liblinear': ['l1', 'l2'],
    'newton-cg': ['l2', None],
    'newton-cholesky': ['l2', None],
    'sag': ['l2', None],
    'saga': ['elasticnet', 'l1', 'l2', None]
}
results = []
warnings.filterwarnings("ignore", category=ConvergenceWarning)

polynomial_transformer = PolynomialFeatures(degree=3)
x_poly = polynomial_transformer.fit_transform(x)  

for penalty in penalty_options:
    for C in C_values:
        for solver in solver_options.keys():
            if penalty in solver_options[solver]:
                accuracy_scores = []
                precision_scores = []
                recall_scores = []

                kf = StratifiedKFold(n_splits=5, random_state=10, shuffle=True)

                for train_index, test_index in kf.split(x_poly, y):
                    X_train_fold, X_cross = x_poly[train_index], x_poly[test_index]
                    y_train_fold, y_cross = y.iloc[train_index], y.iloc[test_index]

                    logistic_model = LogisticRegression(penalty=penalty, C=C, solver=solver, max_iter=100, class_weight=class_weight_dictionary)
                    logistic_model.fit(X_train_fold, y_train_fold)
                    y_pred = logistic_model.predict(X_cross)

                    accuracy = accuracy_score(y_cross, y_pred)
                    precision = precision_score(y_cross, y_pred, average='macro', zero_division=0)
                    recall = recall_score(y_cross, y_pred, average='macro')

                    accuracy_scores.append(accuracy)
                    precision_scores.append(precision)
                    recall_scores.append(recall)

                this_result = {
                    'penalty': penalty,
                    'C': C,
                    'solver': solver,
                    'accuracy': np.mean(accuracy_scores),
                    'precision': np.mean(precision_scores),
                    'recall': np.mean(recall_scores)
                }
                results.append(this_result)
                print(this_result)


{'penalty': 'l1', 'C': 0.001, 'solver': 'liblinear', 'accuracy': 0.7930739097054822, 'precision': 0.4011523044762718, 'recall': 0.3825431068777359}
{'penalty': 'l1', 'C': 0.001, 'solver': 'saga', 'accuracy': 0.4222863006118138, 'precision': 0.3567683506378466, 'recall': 0.4592128117381793}
{'penalty': 'l1', 'C': 0.01, 'solver': 'liblinear', 'accuracy': 0.7765479906808763, 'precision': 0.38186997496225594, 'recall': 0.39910178738738866}
{'penalty': 'l1', 'C': 0.01, 'solver': 'saga', 'accuracy': 0.43100567850786425, 'precision': 0.35860267959018666, 'recall': 0.4613358600093641}
{'penalty': 'l1', 'C': 0.02, 'solver': 'liblinear', 'accuracy': 0.7735908189316651, 'precision': 0.38694956152485754, 'recall': 0.4011614523115675}
{'penalty': 'l1', 'C': 0.02, 'solver': 'saga', 'accuracy': 0.4315761947064797, 'precision': 0.3587639323267127, 'recall': 0.4611078506211374}
{'penalty': 'l1', 'C': 0.04, 'solver': 'liblinear', 'accuracy': 0.7714513808134833, 'precision': 0.38640774222919794, 'recall'

KeyboardInterrupt: 

In [34]:
# We favor precision and recall over accuracy ; however, we still take accuracy into consideration. 
# So we take the top 10 accuracy scores passing the average precision and recall values.
# After this we, take the top 5 of them based on precision. Then we take the one having the maximum recall.

precision_values = [result['precision'] for result in results]
all_models_average_precision_scores = np.mean(precision_values)

recall_values = [result['recall'] for result in results]
all_models_average_recall_scores = np.mean(recall_values)

filtered_results = []
for result in results:
    if result['precision'] >= all_models_average_precision_scores or result['recall'] >= all_models_average_recall_scores :
        filtered_results.append(result)
        
top_10_results = sorted(filtered_results, key=lambda x: x['accuracy'], reverse=True)[:10]

precision_values = [result['precision'] for result in top_10_results]
top_10_results_average_precision_scores = np.mean(precision_values)


filtered_results_2 = []
for result in top_10_results:
    if result['precision'] >= top_10_results_average_precision_scores:
        filtered_results_2.append(result)
top_5_results = sorted(filtered_results_2, key=lambda x: x['precision'], reverse=True)[:5]

best_model_3 = max(top_5_results, key=lambda x: x['recall'])

print(best_model_3)

{'penalty': 'l1', 'C': 0.08, 'solver': 'liblinear', 'accuracy': 0.7702342743156059, 'precision': 0.3862079045109327, 'recall': 0.4027238625058616}


In [35]:
#Compare between the 3 models (3rd, 2nd, and first order polynomial logistic regression models )

print(best_model_1)
print(best_model_2)
print(best_model_3)

{'penalty': 'l2', 'C': 0.001, 'solver': 'newton-cholesky', 'accuracy': 0.4686691516029627, 'precision': 0.3595840240892864, 'recall': 0.46320033019560275}
{'penalty': 'l1', 'C': 0.02, 'solver': 'liblinear', 'accuracy': 0.7786113470131997, 'precision': 0.40529932569883637, 'recall': 0.3961493337104839}
{'penalty': 'l1', 'C': 0.08, 'solver': 'liblinear', 'accuracy': 0.7702342743156059, 'precision': 0.3862079045109327, 'recall': 0.4027238625058616}


In [38]:
# Create the final model with the best parameters

# On trying lots of different parameters, the model always performs poorly.
# Here is some suggestions that may improve the performance.
# Logistic regression may be affected to outliers, as it's very sensitive to them so using a clustering algorithm to kick outliers, and grouping similar data.
# Also trying different thresholds, adding/removing some features (domain knowledge), or trying even higher order models.
# However, at this point I think moving to another algorithm is more efficient, as they already show way better results, and
# this models is computationally expensive, and takes too long to train.

final_logistic_model = LogisticRegression(penalty=best_model_2['penalty'], C=best_model_2['C'], solver=best_model_2['solver']
                    , max_iter=5000, class_weight=class_weight_dictionary)

final_logistic_model.fit(x,y)
y_prediction = final_logistic_model.predict(x_testing)

final_accuracy = accuracy_score(y_testing, y_prediction)
final_precision = precision_score(y_testing, y_prediction, average='macro',zero_division=0)
final_recall = recall_score(y_testing, y_prediction, average='macro')
my_confusion_matrix = confusion_matrix(y_testing, y_prediction, labels=[0, 1, 2])

print("Testing Accuracy:", final_accuracy)
print("Testing Precision:", final_precision)
print("Testing Recall:", final_recall)

print("Confusion Matrix:")
print("True Fatal: " , my_confusion_matrix[0,0],  "False Serious: ", my_confusion_matrix[0,1], "False Slight: ",my_confusion_matrix[0,2] )
print("False Fatal: " , my_confusion_matrix[1,0],  "True Serious: ", my_confusion_matrix[1,1], "False Slight: ",my_confusion_matrix[1,2])
print("False Fatal: " , my_confusion_matrix[2,0],  "False Serious: ", my_confusion_matrix[2,1], "True Slight: ",my_confusion_matrix[2,2])


# confusion matrix explanation (applies to more dimensions)
#                  Predicted bird                         Predicted cat
# Actual bird       True bird                               False Cat 
# Actual cat        False Bird                             True Cat  

Testing Accuracy: 0.7880387931034483
Testing Precision: 0.2911741251524533
Testing Recall: 0.3746600178021744
Confusion Matrix:
True Fatal:  37 False Serious:  0 False Slight:  207
False Fatal:  156 True Serious:  0 False Slight:  3156
False Fatal:  415 False Serious:  0 True Slight:  14589
