# Mini Project #1
---

EMNIST 데이터셋의 손글씨 숫자 이미지 분류기 개발

---

## 패키지 불러오기

In [1]:
# ML 진행을 위한 패키지
import numpy as np
import pandas as pd
from sklearn.model_selection import cross_val_score

In [2]:
# 로그 및 모델 저장을 위한 패키지
import joblib, datetime

## 데이터셋 입력 및 라벨 분리

In [3]:
# 데이터 읽기
input_file_name = "data/emnist-digits-train.csv"
df_train = pd.read_csv(input_file_name, header=None)

In [4]:
# DataFrame 객체를 numpy 배열로 변환
np_train = df_train.to_numpy()

In [5]:
# 학습 셋 분리
x_train = np_train[:, 1:]

In [6]:
# 레이블 분리
y_train = np_train[:, 0]

## 데이터 전처리 함수 정의

In [7]:
# float32 타입으로 변환
def to_float32(data):
    return data.astype(np.float32)

In [8]:
# Zero Centering
def zero_centering(data):
    return data - np.mean(data, axis=0)

In [9]:
# Normalization
def normalize(data):
    return data / np.std(data)

In [10]:
# float32 타입으로 변환 시행
x_train = to_float32(x_train)

In [11]:
# Zero Centering 시행
x_train = zero_centering(x_train)

In [12]:
# Normalization 시행
x_train = normalize(x_train)

In [13]:
# HOG
from skimage.feature import hog

def hog_func(data):
    hog_features = []
    for img in data:

        img_reshaped = np.reshape(img, (28, 28)).T

        features = hog(
            img_reshaped,
            orientations=8,
            pixels_per_cell=(7, 7),
            cells_per_block=(1, 1)
        )
        hog_features.append(features)
    return np.array(hog_features)

In [14]:
# Dimensionality Reduction
def dimensionality_reduction(data, num_components):
    covariance_matrix = np.cov(data, rowvar=False)
    eigenvalues, eigenvectors = np.linalg.eigh(covariance_matrix)

    sorted_indices = np.argsort(eigenvalues)[::-1]
    top_indices = sorted_indices[:num_components]

    components = eigenvectors[:, top_indices]

    transformed_data = np.dot(data, components)
    return transformed_data

In [15]:
# 데이터 결합
def combine_data(data1, data2):
    return np.hstack([data1, data2])

In [16]:
# HOG 시행
H_data = hog_func(x_train)

In [17]:
# Dimensionality Reduction 시행
D_data = dimensionality_reduction(x_train, 50)

In [18]:
# 데이터 결합 시행
HD_data = combine_data(H_data, D_data)

## Grid Search 함수 정의

In [19]:
# 하이퍼파라미터 튜닝 함수
from sklearn.model_selection import GridSearchCV

def tune_model(model, data, param_grid):
    grid_search = GridSearchCV(model, param_grid, cv=3, verbose=2, scoring='accuracy', n_jobs=-1)
    grid_search.fit(data, y_train)
    print(f"Best params for", model.__class__.__name__, ":", grid_search.best_params_)
    print("Best Cross-Validation Score:", grid_search.best_score_)
    return grid_search.best_estimator_, grid_search.best_params_, grid_search.best_score_

## 로그 기록 함수 정의

In [20]:
# 로그 저장 함수
def save_log(desc, con):
    with open('log/ML-P1.log', 'a') as f:
        f.write("\n\n" + desc + "\n")
        f.writelines('\n'.join(con))

## KNN

In [120]:
from sklearn.neighbors import KNeighborsClassifier

In [121]:
# zero centering, normalization 데이터로 KNN 모델 fold 3 GridSearch

model = KNeighborsClassifier(n_jobs=-1)
param_grid = {
    'n_neighbors': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
}
model, params, score = tune_model(model, x_train, param_grid)

joblib.dump(model, 'models/project1/knn/knn_O_gs.joblib')
save_log(
    f'KNN O GridSearch {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}',
    [
        "params : " + str(params),
        "score : " + str(score)
    ]
)

Fitting 3 folds for each of 10 candidates, totalling 30 fits
Best params for KNeighborsClassifier : {'n_neighbors': 1}
Best Cross-Validation Score: 0.9816291666666667


In [122]:
# zero centering, normalization, HOG 데이터로 KNN 모델 fold 3 GridSearch

model = KNeighborsClassifier()
param_grid = {
    'n_neighbors': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
}
model, params, score = tune_model(model, H_data, param_grid)

joblib.dump(model, 'models/project1/knn/knn_H_gs.joblib')
save_log(
    f'KNN H GridSearch {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}',
    [
        "params : " + str(params),
        "score : " + str(score)
    ]
)

Fitting 3 folds for each of 10 candidates, totalling 30 fits
Best params for KNeighborsClassifier : {'n_neighbors': 5}
Best Cross-Validation Score: 0.9545291666666667


In [123]:
# zero centering, normalization, dimensionality reduction 데이터로 KNN 모델 fold 3 GridSearch

model = KNeighborsClassifier()
param_grid = {
    'n_neighbors': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
}
model, params, score = tune_model(model, D_data, param_grid)

joblib.dump(model, 'models/project1/knn/knn_P_gs.joblib')
save_log(
    f'KNN P GridSearch {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}',
    [
        "params : " + str(params),
        "score : " + str(score)
    ]
)

Fitting 3 folds for each of 10 candidates, totalling 30 fits
Best params for KNeighborsClassifier : {'n_neighbors': 3}
Best Cross-Validation Score: 0.9845541666666667


In [124]:
# zero centering, normalization, HOG, dimensionality reduction 데이터로 KNN 모델 fold 3 GridSearch

model = KNeighborsClassifier()
param_grid = {
    'n_neighbors': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
}
model, params, score = tune_model(model, HD_data, param_grid)

joblib.dump(model, 'models/project1/knn/knn_HP_gs.joblib')
save_log(
    f'KNN HP GridSearch {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}',
    [
        "params : " + str(params), 
        "score : " + str(score)
    ]
)

Fitting 3 folds for each of 10 candidates, totalling 30 fits
Best params for KNeighborsClassifier : {'n_neighbors': 3}
Best Cross-Validation Score: 0.9846750000000001


In [125]:
# KNN 모델 학습

model = KNeighborsClassifier(n_neighbors=3)
model.fit(HD_data, y_train)

joblib.dump(model, 'models/project1/knn/knn_HP_n#3.joblib')

['models/project1/knn/knn_HP_n#3.joblib']

In [126]:
# KNN 모델 교차 검증 : 1m 14s 소요

model = joblib.load("models/project1/knn/knn_HP_n#3.joblib")
score = cross_val_score(model, HD_data, y_train, cv=10, scoring='accuracy', verbose=0, n_jobs=-1)

print("KNN Cross-Validation Accuracy:", score.mean())
save_log(
    f'KNN HP Cross-Validation {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}',
    [
        "params : n = 3", 
        "score : " + str(score.mean())
    ]
)

KNN Cross-Validation Accuracy: 0.9863375


## Random Forest

In [127]:
from sklearn.ensemble import RandomForestClassifier

In [128]:
# Random Forest GridSearch : ??? 소요

# 결과 ========================================
# 최적 파라미터 : max_depth': 50, 'n_estimators': 500
# 최고 스코어 : 0.9717333333333333
# ============================================

model = RandomForestClassifier(verbose=0)
param_grid = {
    'n_estimators': [50, 100, 200, 300, 500],
    'max_depth': [10, 20, 50]
}

model, params, score = tune_model(model, HD_data, param_grid)

joblib.dump(model, 'models/project1/RF/randomForest_HP_gs.joblib')
save_log(
    f'Random Forest HP GridSearch {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}',
    [
        "params : " + str(params),
        "score : " + str(score)
    ]
)

Fitting 3 folds for each of 15 candidates, totalling 45 fits
[CV] END ......................................n_neighbors=1; total time= 9.7min
[CV] END ......................................n_neighbors=6; total time= 9.7min
[CV] END ......................................n_neighbors=9; total time=10.1min
[CV] END ......................................n_neighbors=2; total time= 9.8min
[CV] END ......................................n_neighbors=5; total time= 9.6min
[CV] END ......................................n_neighbors=8; total time= 9.6min
[CV] END ......................................n_neighbors=3; total time= 1.8min
[CV] END ......................................n_neighbors=6; total time= 1.8min
[CV] END ......................................n_neighbors=8; total time= 1.8min
[CV] END ......................................n_neighbors=4; total time= 1.0min
[CV] END ......................................n_neighbors=6; total time= 1.0min
[CV] END .....................................n_



[CV] END ......................................n_neighbors=3; total time= 9.7min
[CV] END ......................................n_neighbors=5; total time= 9.7min
[CV] END ......................................n_neighbors=9; total time=10.1min
[CV] END ......................................n_neighbors=3; total time= 9.8min
[CV] END ......................................n_neighbors=6; total time= 9.6min
[CV] END ......................................n_neighbors=9; total time= 9.6min
[CV] END ......................................n_neighbors=4; total time= 1.8min
[CV] END ......................................n_neighbors=6; total time= 1.8min
[CV] END .....................................n_neighbors=10; total time= 1.8min
[CV] END ......................................n_neighbors=3; total time= 1.0min
[CV] END ......................................n_neighbors=5; total time=  59.8s
[CV] END ......................................n_neighbors=8; total time=  59.9s
[CV] END ...................

In [129]:
# Random Forest 모델 학습 : 2m 15s 소요

model = RandomForestClassifier(n_estimators=100, max_depth=10)
model.fit(HD_data, y_train)

joblib.dump(model, 'models/project1/RF/randomForest_HP_n#100_d#10.joblib')

['models/project1/RF/randomForest_HP_n#100_d#10.joblib']

In [130]:
# Random Forest 모델 교차 검증 : 2m 50s 소요

model = joblib.load("models/project1/RF/randomForest_HP_n#100_d#10.joblib")
score = cross_val_score(model, HD_data, y_train, cv=10, scoring='accuracy', verbose=0, n_jobs=-1)
print("Random Forest Cross-Validation Accuracy:", score.mean())
save_log(
    f'Random Forest HP Cross-Validation {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}',
    [
        "params : n_estimators = 100, max_depth = 10",
        "score : " + str(score.mean())
    ]
)

Random Forest Cross-Validation Accuracy: 0.9476166666666668


In [131]:
# Random Forest 모델 학습 : 4m 37s 소요

model = RandomForestClassifier(n_estimators=500, max_depth=50, n_jobs=-1)
model.fit(HD_data, y_train)

joblib.dump(model, 'models/project1/RF/randomForest_HP_n#500_d#50.joblib')

['models/project1/RF/randomForest_HP_n#500_d#50.joblib']

In [None]:
# Random Forest 모델 교차 검증 : 50m 소요

model = joblib.load("models/project1/RF/randomForest_HP_n#500_d#50.joblib")
score = cross_val_score(model, HD_data, y_train, cv=10, scoring='accuracy', verbose=0, n_jobs=-1)
print("Random Forest Cross-Validation Accuracy:", score.mean())
save_log(
    f'Random Forest HP Cross-Validation {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}',
    [
        "params : n_estimators = 500, max_depth = 50",
        "score : " + str(score.mean())
    ]
)

## XGBoost

In [21]:
from xgboost import XGBClassifier

In [22]:
# XGB 모델 학습 : 16m 39s 소요

model = XGBClassifier(n_estimators = 500, max_depth = 6, learning_rate = 0.2, subsample = 0.8, colsample_bytree = 0.6, n_jobs=-1)
model.fit(x_train, y_train)

joblib.dump(model, 'models/project1/xgb/xgb_O_ne#500_md#6_lr#02_ss#08_cb#06.joblib')

['models/project1/xgb/xgb_O_ne#500_md#6_lr#02_ss#08_cb#06.joblib']

In [None]:
# XGB 모델 교차 검증 : 1h 47m 소요

model = joblib.load("models/project1/xgb/xgb_O_ne#500_md#6_lr#02_ss#08_cb#06.joblib")
score = cross_val_score(model, x_train, y_train, cv=10, scoring='accuracy', verbose=0, n_jobs=-1)
print("XGB Cross-Validation Accuracy:", score.mean())
save_log(
    f'XGB O Cross-Validation {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}',
    [
        "params : n_estimators = 500, max_depth = 6, learning_rate = 0.2, subsample = 0.8, colsample_bytree = 0.6",
        "score : " + str(score.mean())
    ]
)

In [None]:
# XGB GridSearch : ??? 소요

# 결과 ========================================
# 최적 파라미터 : n_estimators = 500, max_depth = 6, learning_rate = 0.2, subsample = 0.8, colsample_bytree = 0.6
# 최고 스코어 : 0.9904875000000001
# ============================================

model = XGBClassifier(n_jobs=-1)
param_grid = {
    'n_estimators': [200 ,300, 500],
    'max_depth': [3, 6, 10],
    'learning_rate': [0.2, 0.3, 0.4],
    'subsample': [0.6, 0.8, 1.0],
    'colsample_bytree': [0.6, 0.8, 1.0]
}

model, params, score = tune_model(model, HD_data, param_grid)

joblib.dump(model, 'models/project1/xgb/xgb_HP_gs.joblib')
save_log(
    f'XGB HP GridSearch {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}',
    [
        "params : " + str(params),
        "score : " + str(score)
    ]
)

In [112]:
# XGB 모델 학습 : 2m 39s 소요

model = XGBClassifier(n_estimators = 500, max_depth = 6, learning_rate = 0.2, subsample = 0.8, colsample_bytree = 0.6, n_jobs=-1)
model.fit(HD_data, y_train)

joblib.dump(model, 'models/project1/xgb/xgb_HP_ne#500_md#6_lr#02_ss#08_cb#06.joblib')

['models/project1/xgb/xgb_HP_ne#500_md#6_lr#02_ss#08_cb#06.joblib']

In [21]:
# XGB 모델 교차 검증 : 9m 53s 소요

model = joblib.load("models/project1/xgb/xgb_HP_ne#500_md#6_lr#02_ss#08_cb#06.joblib")
score = cross_val_score(model, HD_data, y_train, cv=10, scoring='accuracy', verbose=0, n_jobs=-1)
print("XGB Cross-Validation Accuracy:", score.mean())
save_log(
    f'XGB HP Cross-Validation {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}',
    [
        "params : n_estimators = 500, max_depth = 6, learning_rate = 0.2, subsample = 0.8, colsample_bytree = 0.6",
        "score : " + str(score.mean())
    ]
)

XGB Cross-Validation Accuracy: 0.9907291666666668


## CatBoost

In [118]:
from catboost import CatBoostClassifier

In [50]:
# CatBoost 모델 학습 (Zero Centering 데이터) : 7m 32s 소요
# 파라미터 : iterations = 200, learning_rate = 0.1

model = CatBoostClassifier(iterations=200, learning_rate=0.1, verbose=0)
model.fit(x_train, y_train)

joblib.dump(model, 'models/project1/CB/catBoost_Z_i#200_lr#0.1.joblib')

0:	learn: 1.9288147	total: 1.81s	remaining: 6m
1:	learn: 1.6956580	total: 3.55s	remaining: 5m 51s
2:	learn: 1.5339664	total: 5.31s	remaining: 5m 48s
3:	learn: 1.4112326	total: 7.06s	remaining: 5m 45s
4:	learn: 1.3030438	total: 8.83s	remaining: 5m 44s
5:	learn: 1.2183052	total: 10.6s	remaining: 5m 43s
6:	learn: 1.1365113	total: 12.4s	remaining: 5m 41s
7:	learn: 1.0671103	total: 14.2s	remaining: 5m 39s
8:	learn: 1.0045513	total: 15.9s	remaining: 5m 38s
9:	learn: 0.9533074	total: 18s	remaining: 5m 41s
10:	learn: 0.9026773	total: 20.4s	remaining: 5m 50s
11:	learn: 0.8565848	total: 22.7s	remaining: 5m 54s
12:	learn: 0.8215959	total: 25.6s	remaining: 6m 8s
13:	learn: 0.7827144	total: 27.4s	remaining: 6m 4s
14:	learn: 0.7502030	total: 29.4s	remaining: 6m 3s
15:	learn: 0.7170709	total: 31.2s	remaining: 5m 58s
16:	learn: 0.6875135	total: 32.9s	remaining: 5m 54s
17:	learn: 0.6638215	total: 35s	remaining: 5m 54s
18:	learn: 0.6372687	total: 36.9s	remaining: 5m 51s
19:	learn: 0.6173822	total: 39.1s

['models/project1/CB/catBoost_Z_i#200_lr#0.1.joblib']

In [None]:
# CatBoost 모델 교차 검증 (Zero Centering 데이터) : 1h +a 소요
# 파라미터 : iterations = 200, learning_rate = 0.1

# 결과 ========================================
# 스코어 : 0.9735458333333333
# ============================================

model = joblib.load("models/project1/CB/catBoost_Z_i#200_lr#0.1.joblib")
score = cross_val_score(model, x_train, y_train, cv=10, scoring='accuracy', verbose=0, n_jobs=-1)
print("CatBoost Cross-Validation Accuracy:", score.mean())
save_log(
    f'CatBoost Z Cross-Validation {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}', 
    [
        "params : iterations = 200, learning_rate = 0.1", 
        "score : " + str(score.mean())
    ]
)

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 10 concurrent workers.


## SDG Classifier

In [52]:
from sklearn.linear_model import SGDClassifier

In [None]:
# SGD GridSearch

model = SGDClassifier(verbose=0)
param_grid = {
    'max_iter': [5, 10, 100],
    'tol': [1e-3, 1e-4, 1e-5]
}

model, params, score = tune_model(model, x_train, param_grid)

joblib.dump(model, 'models/project1/sgd/sgd_Z_gs.joblib')
save_log(
    f'SGD Z GridSearch {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}', 
    [
        "params : " + str(params), 
        "score : " + str(score)
    ]
)

In [54]:
# SGD 모델 학습 (원본 데이터) : 12s 소요

model = SGDClassifier(max_iter=5, tol=None, random_state=42, verbose=0)
model.fit(x_train, y_train)

joblib.dump(model, 'models/project1/sgd/sgd_O_mi#5_t#none.joblib')

['models/project1/sgd/sgd_O_mi#5_t#none.joblib']

In [55]:
# SGD 모델 교차 검증 (원본 데이터) : 1m 54s 소요

model = joblib.load("models/project1/sgd/sgd_O_mi#5_t#none.joblib")
score = cross_val_score(model, x_train, y_train, cv=10, scoring="accuracy").mean()
print("SGD Cross-Validation Accuracy:", score.mean())
save_log(
    f'SGD Cross-Validation {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}', 
    [
        "params : max_iter = 5, tol = None", 
        "score : " + str(score.mean())
    ]
)

[CV] END ..............................max_iter=5, tol=0.001; total time=  20.0s
[CV] END ............................max_iter=100, tol=0.001; total time= 3.4min
[CV] END .............................max_iter=10, tol=0.001; total time=  30.1s
[CV] END ............................max_iter=100, tol=0.001; total time= 3.4min
[CV] END .............................max_iter=5, tol=0.0001; total time=  19.5s
[CV] END .............................max_iter=10, tol=0.001; total time=  23.2s
[CV] END ............................max_iter=100, tol=0.001; total time= 3.3min
[CV] END ..............................max_iter=5, tol=1e-05; total time=  19.8s
[CV] END ............................max_iter=10, tol=0.0001; total time=  23.2s
[CV] END ............................max_iter=100, tol=1e-05; total time= 3.4min
[CV] END .............................max_iter=5, tol=0.0001; total time=  19.8s
[CV] END ............................max_iter=10, tol=0.0001; total time=  23.0s
[CV] END ...................

## Logistic Regression

In [56]:
from sklearn.linear_model import LogisticRegression

In [57]:
# Logistic Regression GridSearch (Zero Centering 데이터) : 4m 17s 소요

# 결과 ========================================
# 최적 파라미터 : max_iter = 100, C = 0.1
# 최고 스코어 : 0.9863083333333335
# ============================================

model = LogisticRegression()
param_grid = {
    'max_iter': [100, 200],
    'C': [0.1, 1, 10, 100]
}

model, params, score = tune_model(model, Z_data, param_grid)

joblib.dump(model, 'models/project1/LR/logisticRegression_Z_gs.joblib')
save_log(
    f'Logistic Regression Z GridSearch {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}', 
    [
        "params : " + str(params), 
        "score : " + str(score)
    ]
)

Fitting 3 folds for each of 8 candidates, totalling 24 fits


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Best params for LogisticRegression : {'C': 1, 'max_iter': 100}
Best Cross-Validation Score: 0.9354458333333334


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [58]:
# Logistic Regression 모델 학습 (PCA 데이터) : 41s 소요
# 파라미터 : max_iter = 100

model = LogisticRegression(max_iter=100)
model.fit(Z_data, y_train)

joblib.dump(model, 'models/project1/LR/logisticRegression_Z_mi#100.joblib')

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


['models/project1/LR/logisticRegression_Z_mi#100.joblib']

In [59]:
# Logistic Regression 모델 교차 검증 (PCA 데이터) : 7s 소요
# 파라미터 : max_iter = 100

# 결과 ========================================
# 스코어 : 0.9863083333333335
# ============================================

model = joblib.load("models/project1/LR/logisticRegression_Z_mi#100.joblib")
score = cross_val_score(model, ZP_data, y_train, cv=10, scoring='accuracy', verbose=0, n_jobs=-1)
print("Logistic Regression Cross-Validation Accuracy:", score.mean())
save_log(
    f'Logistic Regression Z Cross-Validation {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}', 
    [
        "params : max_iter = 100", 
        "score : " + str(score.mean())
    ]
)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Logistic Regression Cross-Validation Accuracy: 0.9292624999999999


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
