In [1]:
import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.font_manager as fm

import seaborn as sns
import re
import konlpy
from konlpy.tag import Okt
from collections import Counter
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.metrics import confusion_matrix

In [2]:
df_kakao = pd.read_excel('kakao.xlsx')
df_toss = pd.read_excel('toss.xlsx')
df_sh = pd.read_excel('sh.xlsx')
df_kb = pd.read_excel('kb.xlsx')

In [3]:
df_kakao=df_kakao.drop(['Unnamed: 0','name'], axis=1)

In [4]:
df_kakao

Unnamed: 0,date,score,content,thumbsUp
0,2022-03-21 11:13:07,3,상담원이 말투들이 좀 딱딱하셔서 좀 불편합니다.,0
1,2022-03-20 09:38:57,1,거 개빡치네요 입출금계좌 만들려고 했는데 실명확인하래서 했고 다시했는데 안되고 탈퇴...,0
2,2022-03-19 15:08:02,4,앱 자체난 너무 조아요 근데 26일 지금 하는데 저 돈이 넉넉하게 천원 넘게 있는데...,0
3,2022-03-19 13:23:10,1,설치가안되는데이유를모르겠네요,0
4,2022-03-18 18:24:24,1,비밀번호를 두개다 잊어버리는 바람에.. 분명 제가 간수 못 한게 잘못이긴 하죠 그래...,3
...,...,...,...,...
17840,2017-07-27 00:23:22,5,기대합니다.,0
17841,2017-07-27 00:17:45,5,기대중입니다b,11
17842,2017-07-27 00:17:30,5,최고!,1
17843,2017-07-27 00:15:32,5,기대 기대!!,2


In [5]:
okt = Okt()

In [7]:
url ="https://raw.githubusercontent.com/yoonkt200/FastCampusDataset/master/korean_stopwords.txt"
stopwords = pd.read_csv(url).values.tolist()

In [11]:
stopwords

[['휴'],
 ['아이구'],
 ['아이쿠'],
 ['아이고'],
 ['어'],
 ['나'],
 ['우리'],
 ['저희'],
 ['따라'],
 ['의해'],
 ['을'],
 ['를'],
 ['에'],
 ['의'],
 ['가'],
 ['으로'],
 ['로'],
 ['에게'],
 ['뿐이다'],
 ['의거하여'],
 ['근거하여'],
 ['입각하여'],
 ['기준으로'],
 ['예하면'],
 ['예를 들면'],
 ['예를 들자면'],
 ['저'],
 ['소인'],
 ['소생'],
 ['저희'],
 ['지말고'],
 ['하지마'],
 ['하지마라'],
 ['다른'],
 ['물론'],
 ['또한'],
 ['그리고'],
 ['비길수 없다'],
 ['해서는 안된다'],
 ['뿐만 아니라'],
 ['만이 아니다'],
 ['만은 아니다'],
 ['막론하고'],
 ['관계없이'],
 ['그치지 않다'],
 ['그러나'],
 ['그런데'],
 ['하지만'],
 ['든간에'],
 ['논하지 않다'],
 ['따지지 않다'],
 ['설사'],
 ['비록'],
 ['더라도'],
 ['아니면'],
 ['만 못하다'],
 ['하는 편이 낫다'],
 ['불문하고'],
 ['향하여'],
 ['향해서'],
 ['향하다'],
 ['쪽으로'],
 ['틈타'],
 ['이용하여'],
 ['타다'],
 ['오르다'],
 ['제외하고'],
 ['이 외에'],
 ['이 밖에'],
 ['하여야'],
 ['비로소'],
 ['한다면 몰라도'],
 ['외에도'],
 ['이곳'],
 ['여기'],
 ['부터'],
 ['기점으로'],
 ['따라서'],
 ['할 생각이다'],
 ['하려고하다'],
 ['이리하여'],
 ['그리하여'],
 ['그렇게 함으로써'],
 ['하지만'],
 ['일때'],
 ['할때'],
 ['앞에서'],
 ['중에서'],
 ['보는데서'],
 ['으로써'],
 ['로써'],
 ['까지'],
 ['해야한다'],
 ['일것이다'],
 ['반드시'],
 ['할줄알다'],
 ['할수있다'],
 [

In [12]:
stopwords_alpha = ['아예', '하나', '짜증', '쓰레기', '만하', '지금', '가요', '어제', '자꾸', '언제', '조앙', '점점', '나름', '여기', '항상', '최악', '진짜', '여름날', '체적', '더욱', '가장', '항상', '최고', '아주', '정말', '사용', '완전', '최고다', '매우', '미니', '계속', '쵝오', '짱짱', '해도', '산책', '갑자기', '오델', '미리내', '다시', '이후']
for word in stopwords_alpha:
    stopwords.append(word)

In [15]:
def apply_regular_expression(content):
    hangul = re.compile('[^ ㄱ-ㅣ 가-힣]')
    result = hangul.sub('', str(content)) 
    return result

In [16]:
corpus = "".join(map(str,(df_kakao['content'].tolist())))
nouns = okt.nouns(apply_regular_expression(corpus))
counter = Counter(nouns)
counter.most_common(10)

[('계좌', 1447),
 ('사용', 1437),
 ('은행', 1344),
 ('카카오', 1333),
 ('뱅크', 1177),
 ('앱', 1172),
 ('인증', 935),
 ('왜', 853),
 ('계속', 756),
 ('오류', 736)]

In [17]:
def content_cleaning(content):
    hangul = re.compile('[^ ㄱ-ㅣ 가-힣]')
    result = hangul.sub('', content)
    okt = Okt()  # 형태소 추출
    nouns = okt.nouns(result)
    nouns = [x for x in nouns if len(x) > 1]
    nouns = [x for x in nouns if x not in stopwords]
    return nouns

vect = CountVectorizer(tokenizer = lambda x: content_cleaning(x))
bow_vect = vect.fit_transform(map(str,(df_kakao['content'].tolist())))
word_list = vect.get_feature_names()
count_list = bow_vect.toarray().sum(axis=0)

In [18]:
corpus = "".join(map(str,(df_kakao['content'].tolist())))
nouns = okt.nouns(apply_regular_expression(corpus))

In [19]:
counter = Counter(nouns)

In [20]:
counter.most_common(10)

[('계좌', 1447),
 ('사용', 1437),
 ('은행', 1344),
 ('카카오', 1333),
 ('뱅크', 1177),
 ('앱', 1172),
 ('인증', 935),
 ('왜', 853),
 ('계속', 756),
 ('오류', 736)]

In [21]:
word_count_dict = dict(zip(word_list, count_list))

In [22]:
tfidf_vectorizer = TfidfTransformer()
tf_idf_vect = tfidf_vectorizer.fit_transform(bow_vect)

In [23]:
print(tf_idf_vect.shape)

(17845, 5885)


In [24]:
vect.vocabulary_

{'상담': 2516,
 '말투': 1656,
 '입출금': 4110,
 '계좌': 345,
 '실명': 2978,
 '확인': 5777,
 '하래': 5537,
 '탈퇴': 5209,
 '해지': 5640,
 '어쩌': 3296,
 '자체': 4174,
 '천원': 4881,
 '설치': 2636,
 '이유': 3952,
 '비밀번호': 2377,
 '바람': 1969,
 '분명': 2309,
 '간수': 61,
 '비번': 2379,
 '이상': 3927,
 '다음': 1057,
 '도움': 1255,
 '대책': 1184,
 '다운': 1055,
 '어캐': 3316,
 '미성년': 1932,
 '외국인': 3616,
 '차별': 4817,
 '한국인': 5571,
 '카트': 5095,
 '발급': 2021,
 '외국': 3614,
 '오류': 3535,
 '때문': 1401,
 '초기': 4924,
 '대안': 1168,
 '영상통화': 3498,
 '방법': 2052,
 '이제': 3964,
 '이용': 3945,
 '은행': 3833,
 '거래': 209,
 '가능': 7,
 '카카오': 5089,
 '뱅크': 2105,
 '명의': 1759,
 '등록': 1369,
 '건가': 238,
 '기능': 708,
 '추가': 4980,
 '코로나바이러스': 5140,
 '백신': 2092,
 '접종': 4359,
 '핸드폰': 5648,
 '인증': 4021,
 '반복': 2008,
 '그때': 607,
 '매번': 1674,
 '기도': 711,
 '안내': 3128,
 '마찬가지': 1596,
 '현상': 5698,
 '기술': 731,
 '조속': 4476,
 '해결': 5619,
 '변경': 2162,
 '납득': 887,
 '모바일': 1783,
 '신분증': 2946,
 '젠데': 4450,
 '실물': 2979,
 '카드': 5065,
 '로만': 1504,
 '본인': 2248,
 '어디': 3267,
 '구성': 522,
 '도감': 

In [25]:
invert_index_vectorizer = {v: k for k, v in vect.vocabulary_.items()}
print(str(invert_index_vectorizer)[:100]+'...')

{2516: '상담', 1656: '말투', 4110: '입출금', 345: '계좌', 2978: '실명', 5777: '확인', 5537: '하래', 5209: '탈퇴', 564...


df_kakao['score'].hist()

In [26]:
def score_to_label(score):
    if score > 3:
        return 1
    else:
        return 0
    
df_kakao['y'] = df_kakao['score'].apply(lambda x: score_to_label(x))

In [27]:
x = tf_idf_vect
y = df_kakao['y']
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.3, random_state=1)

train_test_split(x, y, test_size = 0.3, random_state=1)

In [28]:
lr = LogisticRegression(random_state = 0)
lr.fit(x_train, y_train)

y_pred = lr.predict(x_test)

confu = confusion_matrix(y_true = y_test, y_pred = y_pred)

plt.figure(figsize=(4, 3))
sns.heatmap(confu, annot=True, annot_kws={'size':15}, cmap='OrRd', fmt='.10g')
plt.title('Confusion Matrix')
plt.show()

In [29]:
positive_random_idx = df_kakao[df_kakao['y']==1].sample(275, random_state=12).index.tolist()
negative_random_idx = df_kakao[df_kakao['y']==0].sample(275, random_state=12).index.tolist()

In [30]:
random_idx = positive_random_idx + negative_random_idx
x = tf_idf_vect[random_idx]
y = df_kakao['y'][random_idx]
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25, random_state=1)

In [31]:
lr2 = LogisticRegression(random_state = 0)
lr2.fit(x_train, y_train)
y_pred = lr2.predict(x_test)

confu = confusion_matrix(y_true = y_test, y_pred = y_pred)

plt.figure(figsize=(4, 3))
sns.heatmap(confu, annot=True, annot_kws={'size':15}, cmap='OrRd', fmt='.10g')
plt.title('Confusion Matrix')
plt.show()

plt.figure(figsize=(10, 8))
plt.bar(range(len(lr2.coef_[0])), lr2.coef_[0])

In [32]:
coef_pos_index = sorted(((value, index) for index, value in enumerate(lr2.coef_[0])), reverse = True)
coef_neg_index = sorted(((value, index) for index, value in enumerate(lr2.coef_[0])), reverse = False)
invert_index_vectorizer = {v: k for k, v in vect.vocabulary_.items()}

In [33]:
print('카카오 긍부정')

카카오 긍부정


In [34]:
for coef in coef_pos_index[:20]:
    print(invert_index_vectorizer[coef[1]], coef[0])

수수료 0.8637126303973804
이체 0.8510659554765866
쓰기 0.671364394025773
송금 0.5925317407670644
혁신 0.5865279013335841
신속 0.5561992864148764
만족 0.49763001877470137
이용 0.44572844524752153
자주 0.4446222896911967
기대 0.3783909545664379
애용 0.3755230295505345
버튼 0.346247160765585
공인 0.345009110974688
쉬움 0.3358502496948032
혁명 0.32865766554300324
편리 0.32865766554300324
컴맹 0.32865766554300324
쪼아 0.32865766554300324
안전성 0.32865766554300324
박규 0.32865766554300324


In [35]:
for coef in coef_neg_index[:40]:
    print(invert_index_vectorizer[coef[1]], coef[0])

신분증 -1.6947195581282843
오류 -1.5969007703923488
설치 -1.4822793726886154
인증 -1.4608434984636118
연결 -1.2584694738772533
업데이트 -1.0563211093490625
접속 -1.0049924432275483
실명 -1.0004117553275347
실행 -0.9281693510764913
인식 -0.9262192103203365
촬영 -0.9139091563657475
개설 -0.9009583410544441
다운로드 -0.9005523266994793
계정 -0.8583188013991832
사진 -0.8418135527306652
번호 -0.8094724745303541
삭제 -0.7885231710050848
상담 -0.777836146368034
명의 -0.7357485677906138
시간 -0.710399388626797
사람 -0.687217361488315
로그인 -0.6844068745831419
대기 -0.6524308455840833
실망 -0.6524279361635484
푸시 -0.6499118019075129
전화 -0.646646491746594
페미 -0.6392027719410118
하라 -0.6354216008102771
계좌 -0.6352483939361542
이틀 -0.6315163598010505
확인 -0.6301221543143438
본인 -0.6285356593460909
몇번 -0.627498166315086
이름 -0.605880494465454
제대로 -0.6034551001253348
갤럭시 -0.5947185498537387
다른 -0.5928030275511208
처음 -0.5753025157520928
카카오 -0.5517619515485679
주민 -0.5403934410518773


In [36]:
df = df_toss.drop(['Unnamed: 0','name'], axis=1)

corpus = "".join(map(str,(df['content'].tolist())))
nouns = okt.nouns(apply_regular_expression(corpus))
counter = Counter(nouns)

def content_cleaning(content):
    hangul = re.compile('[^ ㄱ-ㅣ 가-힣]')
    result = hangul.sub('', content)
    okt = Okt()
    nouns = okt.nouns(result)
    nouns = [x for x in nouns if len(x) > 1]
    nouns = [x for x in nouns if x not in stopwords]
    return nouns


vect = CountVectorizer(tokenizer = lambda x: content_cleaning(x))
bow_vect = vect.fit_transform(map(str,(df['content'].tolist())))
word_list = vect.get_feature_names()
count_list = bow_vect.toarray().sum(axis=0)

corpus = "".join(map(str,(df['content'].tolist())))
nouns = okt.nouns(apply_regular_expression(corpus))

counter = Counter(nouns)

counter.most_common(10)

word_count_dict = dict(zip(word_list, count_list))

tfidf_vectorizer = TfidfTransformer()
tf_idf_vect = tfidf_vectorizer.fit_transform(bow_vect)

invert_index_vectorizer = {v: k for k, v in vect.vocabulary_.items()}

def score_to_label(score):
    if score > 3:
        return 1
    else:
        return 0
    
df['y'] = df['score'].apply(lambda x: score_to_label(x))

x = tf_idf_vect
y = df['y']
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.3, random_state=1)

lr = LogisticRegression(random_state = 0)
lr.fit(x_train, y_train)

y_pred = lr.predict(x_test)

positive_random_idx = df[df['y']==1].sample(275, random_state=12).index.tolist()
negative_random_idx = df[df['y']==0].sample(275, random_state=12).index.tolist()

random_idx = positive_random_idx + negative_random_idx
x = tf_idf_vect[random_idx]
y = df['y'][random_idx]
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25, random_state=1)

lr2 = LogisticRegression(random_state = 0)
lr2.fit(x_train, y_train)
y_pred = lr2.predict(x_test)

coef_pos_index = sorted(((value, index) for index, value in enumerate(lr2.coef_[0])), reverse = True)
coef_neg_index = sorted(((value, index) for index, value in enumerate(lr2.coef_[0])), reverse = False)
invert_index_vectorizer = {v: k for k, v in vect.vocabulary_.items()}
print('토스 긍정')
for coef in coef_pos_index[:40]:
    print(invert_index_vectorizer[coef[1]], coef[0])
print('토스 부정')
for coef in coef_neg_index[:20]:
    print(invert_index_vectorizer[coef[1]], coef[0])

토스 긍정
무료 0.45327313629170796
그때 0.3690885264652837
평생 0.33124793330619573
가도 0.3084817081069568
소식 0.256658480882349
초기 0.2553993354939245
해외 0.2404172753053722
체크 0.22768391593995446
한도 0.21760783770957798
적립 0.21062396214676243
한번 0.1877442119354348
결제 0.1759669689534372
이자율 0.17179155012623812
보통예금 0.17179155012623812
차곡차곡 0.16622072464453352
보기 0.1573286845509923
파악 0.14460268490165157
송금 0.1362781495766371
수수료 0.1332010886657362
사항 0.13021664709556635
정보 0.1293641763918066
자동 0.11876390856859086
자주 0.11729373642748393
모든 0.11526090876391486
이체 0.06448729530649999
하니 0.03433022809925951
읍니 0.019110747971339825
프라임 0.018111742556591134
힌트 0.0
히히히 0.0
희망 0.0
흥국 0.0
흡족 0.0
흔적 0.0
흑역사 0.0
흐헷 0.0
흐트려놔 0.0
흐트러졌네 0.0
휴일 0.0
휴무 0.0
토스 부정
업데이트 -0.6669589750267049
계좌 -0.6501906144371253
카드 -0.5689557689627338
입금 -0.4918957712903887
토스 -0.48112408014028146
업뎃 -0.44338913595458235
인증 -0.40622803938509255
제대로 -0.37521050504527903
금액 -0.34756751876445746
연결 -0.3410934249810233
확인 -0.336079127226

In [37]:
df = df_sh.drop(['Unnamed: 0','name'], axis=1)

corpus = "".join(map(str,(df['content'].tolist())))
nouns = okt.nouns(apply_regular_expression(corpus))
counter = Counter(nouns)

def content_cleaning(content):
    hangul = re.compile('[^ ㄱ-ㅣ 가-힣]')
    result = hangul.sub('', content)
    okt = Okt()
    nouns = okt.nouns(result)
    nouns = [x for x in nouns if len(x) > 1]
    nouns = [x for x in nouns if x not in stopwords]
    return nouns

vect = CountVectorizer(tokenizer = lambda x: content_cleaning(x))
bow_vect = vect.fit_transform(map(str,(df['content'].tolist())))
word_list = vect.get_feature_names()
count_list = bow_vect.toarray().sum(axis=0)

corpus = "".join(map(str,(df['content'].tolist())))
nouns = okt.nouns(apply_regular_expression(corpus))

counter = Counter(nouns)

counter.most_common(10)

word_count_dict = dict(zip(word_list, count_list))

tfidf_vectorizer = TfidfTransformer()
tf_idf_vect = tfidf_vectorizer.fit_transform(bow_vect)

invert_index_vectorizer = {v: k for k, v in vect.vocabulary_.items()}

def score_to_label(score):
    if score > 3:
        return 1
    else:
        return 0
    
df['y'] = df['score'].apply(lambda x: score_to_label(x))

x = tf_idf_vect
y = df['y']
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.3, random_state=1)

lr = LogisticRegression(random_state = 0)
lr.fit(x_train, y_train)

y_pred = lr.predict(x_test)

positive_random_idx = df[df['y']==1].sample(275, random_state=12).index.tolist()
negative_random_idx = df[df['y']==0].sample(275, random_state=12).index.tolist()

random_idx = positive_random_idx + negative_random_idx
x = tf_idf_vect[random_idx]
y = df['y'][random_idx]
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25, random_state=1)

lr2 = LogisticRegression(random_state = 0)
lr2.fit(x_train, y_train)
y_pred = lr2.predict(x_test)

coef_pos_index = sorted(((value, index) for index, value in enumerate(lr2.coef_[0])), reverse = True)
coef_neg_index = sorted(((value, index) for index, value in enumerate(lr2.coef_[0])), reverse = False)
invert_index_vectorizer = {v: k for k, v in vect.vocabulary_.items()}
print('신한 긍정')
for coef in coef_pos_index[:20]:
    print(invert_index_vectorizer[coef[1]], coef[0])
print('신한 부정')
for coef in coef_neg_index[:40]:
    print(invert_index_vectorizer[coef[1]], coef[0])

신한 긍정
보기 0.8469360963178793
신경 0.7393576712876283
설명 0.6719804137268597
이벤트 0.5987354014463052
인터페이스 0.5871287292167471
한눈 0.5661315737042051
디자인 0.5390181835114884
편리 0.5280379627267668
감사 0.5280379627267668
바이오인증 0.4572672793401566
분부 0.4072441101105063
신한은행 0.3666530198703128
검색 0.35402269036445644
신한 0.34611359084616783
해외 0.3087686154360763
찾기 0.3087686154360763
주문 0.3087686154360763
안전성 0.3087686154360763
실용 0.3087686154360763
스시 0.3087686154360763
신한 부정
어플 -1.9115975513051353
인증 -1.683640719741067
로그인 -1.6655678605003106
오류 -1.526938719867801
실행 -1.4756028570428017
접속 -1.4253519554301954
설치 -1.34773715619192
업데이트 -1.2595873640442512
가입 -1.1255319927742287
로딩 -1.0938007000749257
거래 -0.9797631316588992
뱅크 -0.9096800795166688
먹통 -0.8991401570684416
계좌 -0.8896377395323871
해결 -0.8614916045857243
전화 -0.7999762961820627
데이터 -0.7950482169194586
카드 -0.7538399250784684
인식 -0.7403094619605799
시간 -0.7385498400948399
문제 -0.7218395126866186
비대 -0.7156898724074048
확인 -0.7042700696412212
문자 -0.

In [38]:
df = df_kb.drop(['Unnamed: 0','name'], axis=1)

corpus = "".join(map(str,(df['content'].tolist())))
nouns = okt.nouns(apply_regular_expression(corpus))
counter = Counter(nouns)

def content_cleaning(content):
    hangul = re.compile('[^ ㄱ-ㅣ 가-힣]')
    result = hangul.sub('', content)
    okt = Okt()
    nouns = okt.nouns(result)
    nouns = [x for x in nouns if len(x) > 1]
    nouns = [x for x in nouns if x not in stopwords]
    return nouns


vect = CountVectorizer(tokenizer = lambda x: content_cleaning(x))
bow_vect = vect.fit_transform(map(str,(df['content'].tolist())))
word_list = vect.get_feature_names()
count_list = bow_vect.toarray().sum(axis=0)

corpus = "".join(map(str,(df['content'].tolist())))
nouns = okt.nouns(apply_regular_expression(corpus))

counter = Counter(nouns)

counter.most_common(10)

word_count_dict = dict(zip(word_list, count_list))

tfidf_vectorizer = TfidfTransformer()
tf_idf_vect = tfidf_vectorizer.fit_transform(bow_vect)

invert_index_vectorizer = {v: k for k, v in vect.vocabulary_.items()}

def score_to_label(score):
    if score > 3:
        return 1
    else:
        return 0
    
df['y'] = df['score'].apply(lambda x: score_to_label(x))

x = tf_idf_vect
y = df['y']
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.3, random_state=1)

lr = LogisticRegression(random_state = 0)
lr.fit(x_train, y_train)

y_pred = lr.predict(x_test)

positive_random_idx = df[df['y']==1].sample(275, random_state=12).index.tolist()
negative_random_idx = df[df['y']==0].sample(275, random_state=12).index.tolist()

random_idx = positive_random_idx + negative_random_idx
x = tf_idf_vect[random_idx]
y = df['y'][random_idx]
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25, random_state=1)

lr2 = LogisticRegression(random_state = 0)
lr2.fit(x_train, y_train)
y_pred = lr2.predict(x_test)

coef_pos_index = sorted(((value, index) for index, value in enumerate(lr2.coef_[0])), reverse = True)
coef_neg_index = sorted(((value, index) for index, value in enumerate(lr2.coef_[0])), reverse = False)
invert_index_vectorizer = {v: k for k, v in vect.vocabulary_.items()}
print('국민 긍정')
for coef in coef_pos_index[:20]:
    print(invert_index_vectorizer[coef[1]], coef[0])
print('국민 부정')
for coef in coef_neg_index[:20]:
    print(invert_index_vectorizer[coef[1]], coef[0])

국민 긍정
이용 2.1114448904114806
이벤트 1.1025037181893107
보기 0.9829708448212673
기능 0.9639872755229557
메뉴 0.9536330003029483
자주 0.8469785510529807
뱅킹 0.8218312251352009
구성 0.8158791676392608
업무 0.6832167765816001
수수료 0.6365854228334838
스타 0.6230010756700486
주거 0.6175231952438022
디자인 0.5395886648869836
입출금 0.5325480582873511
방식 0.5194211795911907
은행 0.5102091841617317
필수 0.5021836547385776
속도 0.4898147849803922
직관 0.4831088394393295
약간 0.47151307595080927
국민 부정
업데이트 -2.1651334877940633
실행 -1.180044262093244
어플 -1.1144085122550538
설치 -0.9877987978270777
오류 -0.9437299418851466
무슨 -0.8278335444300527
네트워크 -0.7450471126048689
삭제 -0.7139143889045276
로딩 -0.7042033510417625
로그인 -0.6947516097380751
루팅 -0.6839765477311496
등록 -0.678848994056929
처음 -0.6674783657721234
먹통 -0.6668543033562221
비번 -0.6571022988539112
버전 -0.6425245812727247
개판 -0.6192194789471289
사람 -0.6191403964335808
화면 -0.6125843564309545
업체 -0.607443660569185


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
