# 패키지 임포트

In [1]:
import numpy as np
import pandas as pd
import json
from sklearn.preprocessing import LabelEncoder
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.svm import LinearSVC
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import MultinomialNB
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score, f1_score
from sklearn.model_selection import train_test_split
import sys
sys.path.append('C:/Users/User/Desktop/News Category')
from data_preprocessing import *

# 데이터 로드

In [2]:
data=[]
with open('C:/Users/user/Desktop/bilm-tf-master/textdataset/News_Category_Dataset_v3.json', 'r') as f:
    for line in f:
        content = json.loads(line)
        data.append(content)
df = pd.DataFrame(data=data)

# 전처리

In [3]:
df = df[~df['short_description'].apply(lambda x: len(x)==0)]

#중복되는 카테고리 통합
df["category"] = df.category.replace(
    {
        "THE WORLDPOST": "WORLD NEWS",
        "WORLDPOST": "WORLD NEWS",
        "ARTS": "ARTS & CULTURE",
        "CULTURE & ARTS": "ARTS & CULTURE",
        "HEALTHY LIVING": "WELLNESS",
        "QUEER VOICES": "VOICES",
        "LATINO VOICES": "VOICES",
        "BLACK VOICES": "VOICES",
        "STYLE": "STYLE & BEATUY",
        "GREEN": "ENVIRONMENT",
        "TASTE": "FOOD & DRINK",
        "MONEY": "BUSINESS",
        "PARENTING": "PARENTS"
    }
)

df = df[['headline', 'short_description','category']]
df['input_data']= df.apply(lambda x: str(x['headline']) + str(x['short_description']), axis=1)

df['clean_text']=df['input_data'].apply(clean_text)

class0=list(df[df['category'] == 'POLITICS'].sample(1000, random_state=1)['clean_text'])
class1=list(df[df['category'] == 'WELLNESS'].sample(1000, random_state=1)['clean_text'])
class2=list(df[df['category'] == 'ENTERTAINMENT'].sample(1000, random_state=1)['clean_text'])
class3=list(df[df['category'] == 'PARENTS'].sample(1000, random_state=1)['clean_text'])
class4=list(df[df['category'] == 'VOICES'].sample(1000, random_state=1)['clean_text'])
class5=list(df[df['category'] == 'STYLE & BEAUTY'].sample(1000, random_state=1)['clean_text'])
class6=list(df[df['category'] == 'TRAVEL'].sample(1000, random_state=1)['clean_text'])
class7=list(df[df['category'] == 'FOOD & DRINK'].sample(1000, random_state=1)['clean_text'])
class8=list(df[df['category'] == 'WORLD NEWS'].sample(1000, random_state=1)['clean_text'])
class9=list(df[df['category'] == 'BUSINESS'].sample(1000, random_state=1)['clean_text'])

X_list=class0+class1+class2+class3+class4+class5+class6+class7+class8+class9

y=[]
for i in range(10):
    y+=[i]*1000
    
clr_x_data=[]
pattern = '[^a-z ]'
for sen in X_list:
    clr_x_data.append(re.sub(pattern, ' ', sen))
    
X_list=[]
for sen in clr_x_data:
    X_list.append(' '.join(sen.split()))
    
train_idx=[]
for i in range(10):
    train_idx+=[j+i*1000 for j in range(800)]

test_idx=[]
for i in range(10):
    test_idx+=[j+800+i*1000 for j in range(200)]
    
x_train=[]
y_train=[]
for i in train_idx:
    x_train.append(X_list[i])
    y_train.append(y[i])

x_test=[]
y_test=[]
for i in test_idx:
    x_test.append(X_list[i])
    y_test.append(y[i])

In [4]:
to_txt=x_train+x_test
y=list(y_train)+list(y_test)

In [5]:
#불용어 불러오기
with open('C:/Users/user/Desktop/english.txt', 'r', encoding='utf-8') as file:
    stopwords = [line.strip() for line in file]

In [6]:
#단어 10000개 선별
vect = CountVectorizer(stop_words=stopwords)
X_dtm = vect.fit_transform(to_txt)
X_dtm = X_dtm.toarray()
X_new = SelectKBest(chi2, k=10000).fit(X_dtm, y)
TorF = X_new.get_support()
word_view=np.array(vect.get_feature_names_out())
sw=word_view[TorF]



In [8]:
#훈련데이터에서 선별한 10000개 단어 이외 단어는 <ukn>으로 변경
train_transe=[]
for i in range(len(x_train)):
    a=x_train[i].split()
    for j in range(len(a)):
        if a[j] not in sw:
            a[j] = '<ukn>'
    train_transe.append(' '.join(a))

In [9]:
#테스트데이터에서 선별한 10000개 단어 이외 단어는 <ukn>으로 변경
test_transe=[]
for i in range(len(x_test)):
    a=x_test[i].split()
    for j in range(len(a)):
        if a[j] not in sw:
            a[j] = '<ukn>'
    test_transe.append(' '.join(a))

In [10]:
# DTM 확인
dtmvector = CountVectorizer()
x_train_dtm = dtmvector.fit_transform(train_transe)
# print(x_train.shape)

# 테스트데이터
x_test_dtm = dtmvector.transform(test_transe) #테스트 데이터를 DTM으로 변환

# TF-IDF Matrix확인
tfidf_transformer = TfidfTransformer()
tfidfv = tfidf_transformer.fit_transform(x_train_dtm)
print(tfidfv.shape)

# 테스트데이터
tfidfv_test = tfidf_transformer.transform(x_test_dtm) #DTM을 TF-IDF 행렬로 변환

(8000, 9559)


# 전통적인 머신러닝 모델로 학습

In [11]:
# SVM 모델
param_grid = {'C': [0.1, 1, 10, 100, 1000], 
              'penalty': ['l1', 'l2']}

grid = GridSearchCV(LinearSVC(max_iter=500, dual=False), param_grid, refit=True, verbose=3)

grid.fit(tfidfv, y_train)
print(grid.best_params_)

predicted = grid.predict(tfidfv_test)
print("정확도:", accuracy_score(y_test, predicted))

Fitting 5 folds for each of 10 candidates, totalling 50 fits
[CV 1/5] END .................C=0.1, penalty=l1;, score=0.501 total time=   0.0s
[CV 2/5] END .................C=0.1, penalty=l1;, score=0.509 total time=   0.0s
[CV 3/5] END .................C=0.1, penalty=l1;, score=0.521 total time=   0.0s
[CV 4/5] END .................C=0.1, penalty=l1;, score=0.510 total time=   0.0s
[CV 5/5] END .................C=0.1, penalty=l1;, score=0.501 total time=   0.0s
[CV 1/5] END .................C=0.1, penalty=l2;, score=0.687 total time=   0.0s
[CV 2/5] END .................C=0.1, penalty=l2;, score=0.703 total time=   0.0s
[CV 3/5] END .................C=0.1, penalty=l2;, score=0.707 total time=   0.0s
[CV 4/5] END .................C=0.1, penalty=l2;, score=0.691 total time=   0.0s
[CV 5/5] END .................C=0.1, penalty=l2;, score=0.678 total time=   0.0s




[CV 1/5] END ...................C=1, penalty=l1;, score=0.657 total time=   0.3s




[CV 2/5] END ...................C=1, penalty=l1;, score=0.676 total time=   0.3s




[CV 3/5] END ...................C=1, penalty=l1;, score=0.681 total time=   0.3s




[CV 4/5] END ...................C=1, penalty=l1;, score=0.668 total time=   0.3s




[CV 5/5] END ...................C=1, penalty=l1;, score=0.635 total time=   0.4s
[CV 1/5] END ...................C=1, penalty=l2;, score=0.700 total time=   0.0s
[CV 2/5] END ...................C=1, penalty=l2;, score=0.716 total time=   0.0s
[CV 3/5] END ...................C=1, penalty=l2;, score=0.719 total time=   0.0s
[CV 4/5] END ...................C=1, penalty=l2;, score=0.706 total time=   0.0s
[CV 5/5] END ...................C=1, penalty=l2;, score=0.688 total time=   0.0s




[CV 1/5] END ..................C=10, penalty=l1;, score=0.612 total time=   0.8s




[CV 2/5] END ..................C=10, penalty=l1;, score=0.629 total time=   0.8s




[CV 3/5] END ..................C=10, penalty=l1;, score=0.631 total time=   0.8s




[CV 4/5] END ..................C=10, penalty=l1;, score=0.618 total time=   0.8s




[CV 5/5] END ..................C=10, penalty=l1;, score=0.601 total time=   0.8s
[CV 1/5] END ..................C=10, penalty=l2;, score=0.649 total time=   0.1s
[CV 2/5] END ..................C=10, penalty=l2;, score=0.659 total time=   0.1s
[CV 3/5] END ..................C=10, penalty=l2;, score=0.672 total time=   0.1s
[CV 4/5] END ..................C=10, penalty=l2;, score=0.662 total time=   0.1s
[CV 5/5] END ..................C=10, penalty=l2;, score=0.636 total time=   0.1s




[CV 1/5] END .................C=100, penalty=l1;, score=0.606 total time=   1.1s




[CV 2/5] END .................C=100, penalty=l1;, score=0.621 total time=   1.1s




[CV 3/5] END .................C=100, penalty=l1;, score=0.626 total time=   1.1s




[CV 4/5] END .................C=100, penalty=l1;, score=0.603 total time=   1.1s




[CV 5/5] END .................C=100, penalty=l1;, score=0.591 total time=   1.1s
[CV 1/5] END .................C=100, penalty=l2;, score=0.619 total time=   0.4s
[CV 2/5] END .................C=100, penalty=l2;, score=0.640 total time=   0.4s
[CV 3/5] END .................C=100, penalty=l2;, score=0.659 total time=   0.3s
[CV 4/5] END .................C=100, penalty=l2;, score=0.641 total time=   0.4s
[CV 5/5] END .................C=100, penalty=l2;, score=0.616 total time=   0.4s




[CV 1/5] END ................C=1000, penalty=l1;, score=0.586 total time=   1.7s




[CV 2/5] END ................C=1000, penalty=l1;, score=0.605 total time=   1.7s




[CV 3/5] END ................C=1000, penalty=l1;, score=0.614 total time=   1.6s




[CV 4/5] END ................C=1000, penalty=l1;, score=0.594 total time=   1.7s




[CV 5/5] END ................C=1000, penalty=l1;, score=0.594 total time=   1.7s
[CV 1/5] END ................C=1000, penalty=l2;, score=0.631 total time=   0.1s
[CV 2/5] END ................C=1000, penalty=l2;, score=0.644 total time=   0.1s
[CV 3/5] END ................C=1000, penalty=l2;, score=0.671 total time=   0.1s
[CV 4/5] END ................C=1000, penalty=l2;, score=0.649 total time=   0.1s
[CV 5/5] END ................C=1000, penalty=l2;, score=0.624 total time=   0.1s
{'C': 1, 'penalty': 'l2'}
정확도: 0.7185


In [12]:
#나이브 베이즈 모델
param_grid = {'alpha': [0.01, 0.1, 0.5, 1, 10, 100],
              'fit_prior': [True, False]}

grid = GridSearchCV(MultinomialNB(), param_grid, refit=True, verbose=3)

grid.fit(tfidfv, y_train)
print(grid.best_params_)

predicted = grid.predict(tfidfv_test)
print("정확도:", accuracy_score(y_test, predicted))

Fitting 5 folds for each of 12 candidates, totalling 60 fits
[CV 1/5] END ........alpha=0.01, fit_prior=True;, score=0.686 total time=   0.0s
[CV 2/5] END ........alpha=0.01, fit_prior=True;, score=0.707 total time=   0.0s
[CV 3/5] END ........alpha=0.01, fit_prior=True;, score=0.688 total time=   0.0s
[CV 4/5] END ........alpha=0.01, fit_prior=True;, score=0.686 total time=   0.0s
[CV 5/5] END ........alpha=0.01, fit_prior=True;, score=0.675 total time=   0.0s
[CV 1/5] END .......alpha=0.01, fit_prior=False;, score=0.686 total time=   0.0s
[CV 2/5] END .......alpha=0.01, fit_prior=False;, score=0.707 total time=   0.0s
[CV 3/5] END .......alpha=0.01, fit_prior=False;, score=0.688 total time=   0.0s
[CV 4/5] END .......alpha=0.01, fit_prior=False;, score=0.686 total time=   0.0s
[CV 5/5] END .......alpha=0.01, fit_prior=False;, score=0.675 total time=   0.0s
[CV 1/5] END .........alpha=0.1, fit_prior=True;, score=0.712 total time=   0.0s
[CV 2/5] END .........alpha=0.1, fit_prior=True;

In [13]:
#logistic regression 모델
param_grid = {'C': [0.1, 1, 10, 100, 1000], 
              'penalty': ['l1', 'l2']}

grid = GridSearchCV(LogisticRegression(), param_grid, refit=True, verbose=3)

grid.fit(tfidfv, y_train)
print(grid.best_params_)

predicted = grid.predict(tfidfv_test)
print("정확도:", accuracy_score(y_test, predicted))

Fitting 5 folds for each of 10 candidates, totalling 50 fits
[CV 1/5] END ...................C=0.1, penalty=l1;, score=nan total time=   0.0s
[CV 2/5] END ...................C=0.1, penalty=l1;, score=nan total time=   0.0s
[CV 3/5] END ...................C=0.1, penalty=l1;, score=nan total time=   0.0s
[CV 4/5] END ...................C=0.1, penalty=l1;, score=nan total time=   0.0s
[CV 5/5] END ...................C=0.1, penalty=l1;, score=nan total time=   0.0s
[CV 1/5] END .................C=0.1, penalty=l2;, score=0.534 total time=   0.1s
[CV 2/5] END .................C=0.1, penalty=l2;, score=0.543 total time=   0.2s
[CV 3/5] END .................C=0.1, penalty=l2;, score=0.547 total time=   0.2s
[CV 4/5] END .................C=0.1, penalty=l2;, score=0.534 total time=   0.2s
[CV 5/5] END .................C=0.1, penalty=l2;, score=0.521 total time=   0.2s
[CV 1/5] END .....................C=1, penalty=l1;, score=nan total time=   0.0s
[CV 2/5] END .....................C=1, penalty=l

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 2/5] END ...................C=1, penalty=l2;, score=0.690 total time=   0.6s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 3/5] END ...................C=1, penalty=l2;, score=0.694 total time=   0.6s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 4/5] END ...................C=1, penalty=l2;, score=0.672 total time=   0.6s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 5/5] END ...................C=1, penalty=l2;, score=0.651 total time=   0.6s
[CV 1/5] END ....................C=10, penalty=l1;, score=nan total time=   0.0s
[CV 2/5] END ....................C=10, penalty=l1;, score=nan total time=   0.0s
[CV 3/5] END ....................C=10, penalty=l1;, score=nan total time=   0.0s
[CV 4/5] END ....................C=10, penalty=l1;, score=nan total time=   0.0s
[CV 5/5] END ....................C=10, penalty=l1;, score=nan total time=   0.0s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 1/5] END ..................C=10, penalty=l2;, score=0.686 total time=   0.6s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 2/5] END ..................C=10, penalty=l2;, score=0.716 total time=   0.6s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 3/5] END ..................C=10, penalty=l2;, score=0.717 total time=   0.6s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 4/5] END ..................C=10, penalty=l2;, score=0.703 total time=   0.6s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 5/5] END ..................C=10, penalty=l2;, score=0.682 total time=   0.6s
[CV 1/5] END ...................C=100, penalty=l1;, score=nan total time=   0.0s
[CV 2/5] END ...................C=100, penalty=l1;, score=nan total time=   0.0s
[CV 3/5] END ...................C=100, penalty=l1;, score=nan total time=   0.0s
[CV 4/5] END ...................C=100, penalty=l1;, score=nan total time=   0.0s
[CV 5/5] END ...................C=100, penalty=l1;, score=nan total time=   0.0s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 1/5] END .................C=100, penalty=l2;, score=0.670 total time=   0.6s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 2/5] END .................C=100, penalty=l2;, score=0.682 total time=   0.6s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 3/5] END .................C=100, penalty=l2;, score=0.710 total time=   0.6s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 4/5] END .................C=100, penalty=l2;, score=0.698 total time=   0.6s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 5/5] END .................C=100, penalty=l2;, score=0.668 total time=   0.6s
[CV 1/5] END ..................C=1000, penalty=l1;, score=nan total time=   0.0s
[CV 2/5] END ..................C=1000, penalty=l1;, score=nan total time=   0.0s
[CV 3/5] END ..................C=1000, penalty=l1;, score=nan total time=   0.0s
[CV 4/5] END ..................C=1000, penalty=l1;, score=nan total time=   0.0s
[CV 5/5] END ..................C=1000, penalty=l1;, score=nan total time=   0.0s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 1/5] END ................C=1000, penalty=l2;, score=0.679 total time=   0.6s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 2/5] END ................C=1000, penalty=l2;, score=0.686 total time=   0.6s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 3/5] END ................C=1000, penalty=l2;, score=0.697 total time=   0.6s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV 4/5] END ................C=1000, penalty=l2;, score=0.683 total time=   0.6s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
25 fits failed out of a total of 50.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
25 fits failed with the following error:
Traceback (most recent call last):
  File "C:\ProgramData\anaconda3\envs\class\lib\site-packages\sklearn\model_selection\_validation.py", line 729, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\ProgramData\anacond

[CV 5/5] END ................C=1000, penalty=l2;, score=0.675 total time=   0.6s
{'C': 10, 'penalty': 'l2'}
정확도: 0.7065


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
