# 라이브러리

In [None]:
# library load
from sklearn.preprocessing import MaxAbsScaler
from sklearn.preprocessing import StandardScaler
from nltk.tokenize import word_tokenize
from collections import Counter
from konlpy.tag import Mecab
from tqdm import tqdm
from math import  pi
import pandas as pd
import nltk
import re
import os
import seaborn as sns
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import LabelEncoder
from xgboost import XGBClassifier

mecab = Mecab()

from datetime import datetime
from pytz import timezone
import matplotlib.font_manager as fm
import matplotlib.pyplot as plt
import numpy as np
import time
import math

import boto3

# 데이터 로드

## 광고 제거

In [None]:
# 본문 table load
df_main = pd.read_csv('./data/cafe_main.csv')
df_main.sort_values('title')
df_main.dropna(inplace=True)
df_main.drop_duplicates(['text'], inplace=True)
df_main.reset_index(drop=True, inplace=True)

test_df = pd.DataFrame()
test_df['text'] = df_main['text']
test_df['title'] = df_main['title']
test_df['AD'] = None

In [None]:
# 광고글 수집 (blog)
test_df1 = pd.read_parquet('./data/crwaling_data1.parquet')
test_df2 = pd.read_parquet('./data/crwaling_data2.parquet')
test_df3 = pd.read_parquet('./data/crwaling_data3.parquet')
test_df4 = pd.read_parquet('./data/crwaling_data5.parquet')

learn_df = pd.concat([test_df1, test_df2, test_df3, test_df4])

learn_df.rename(columns={'contents':'text', 'Title' : 'title'}, inplace=True)
learn_df.drop(columns='Unnamed: 0', inplace=True)
learn_df.dropna(inplace=True)
learn_df.drop_duplicates(['text'], inplace=True)
learn_df.reset_index(drop=True, inplace=True)

dummy_list = ['맛', '음료', 'news', '먹어', '서울런']
for i in range(len(learn_df)):
    if len(learn_df['text'][i]) < 20:
        learn_df.drop(index=i, inplace=True)
        
    elif any(dummy_word in learn_df['text'][i] for dummy_word in dummy_list):
            learn_df.drop(index=i, inplace=True)

test = []
ad_list = ['원고료', '서포터', '.milkt.co.kr', '톡 채널', '고객센터', '주관적인', '활동비', '수익성', '수수료', '포스팅'] #  '이벤트
for i in learn_df['text']:
    if any(ad_word in i for ad_word in ad_list):
        test.append(True)
    else:
        test.append(False)

learn_df['AD'] = test

learn_df.reset_index(drop=True, inplace=True)

In [None]:
# 광고 학습 및 분류
sample_df = pd.concat([learn_df, test_df])
sample_df.reset_index(drop=True, inplace=True)

X_train = sample_df['text'][:len(learn_df)]
X_test = sample_df['text'][len(learn_df):]
y_train = sample_df['AD'][:len(learn_df)]

tfidf = TfidfVectorizer(min_df=2, ngram_range=(1,2), strip_accents='unicode', norm='l2')
X_train = tfidf.fit_transform(X_train)
X_test = tfidf.transform(X_test)

le = LabelEncoder()
y_train = le.fit_transform(y_train)

# print(X_train.shape, y_train.shape, X_test.shape)
XGB = XGBClassifier()
XGB.fit(X_train, y_train)
pred = XGB.predict(X_test)

df_main['AD'] = pred
df_main = df_main[df_main['AD'] == False]
df_main.drop('AD', axis=1, inplace=True)

## 테이블 합치기

In [None]:
# main, comment table concat
df_com = pd.read_csv('./data/cafe_comment.csv')
df_com.drop(['text_number'], axis=1, inplace=True)
df_com['etc'] = '댓글'
df_main.drop(['date','click','text_number'], axis=1, inplace=True)
df_main['etc'] = '본문'
df_com.rename(columns={'comment' : 'text'}, inplace=True)
df = pd.concat([df_com,df_main])
df.sort_values('title')
df.dropna(inplace=True)
df.drop_duplicates(['text'], inplace=True)
df.reset_index(drop=True, inplace=True)
# display(df.isnull().sum())

# display(df)

## stop_words load

In [None]:
# Stop_words
with open('./data/stop_words.txt', encoding='utf-8') as f:
    stop_words = f.readlines()
stop_words = [line.rstrip('\n') for line in stop_words]

# 전처리

## 텍스트 전처리 (+전처리 함수)

In [None]:
# 태그 제거
def del_tag_check(text):
    text = re.sub('http:', '', text)
    text = re.sub('comhttpsm.', '', text)
    text = re.sub(r'(\d{2,4})-(\d{3,4})-?(\d{0,4})?', '', text) # 전화번호 제거
    text = re.sub('blog.', '', text)
    text = re.sub('naver.', '', text)
    text = re.sub('co. kr', '', text)

    return text

df['text'] = df['text'].apply(lambda x : del_tag_check(x))
df['title'] = df['title'].apply(lambda x : del_tag_check(x))

In [None]:
# 특수문자 및 초성 자동 전처리 함수
company_list = ['밀크티', '엘리하이', '엠베스트','윙크', '빨간펜', 
                '온리원', '홈런', '와이즈캠프','스마트올',
                '와캠', '싱크빅', '씽크빅', '웅진', '아이캔두']

# 전처리 함수 1 (수사 및 명사 정리, 오타수정, 명사통합)
def combine_check(text):

    text = re.sub(r'밀크티초등(?!학)', '밀크티', text)
    text = re.sub('초등\s?밀크티', '밀크티', text)
    text = re.sub('밀크티\s?아이', '밀크티', text)
    text = re.sub(r'아이스크림(?!홈)', '홈런', text)
    # 명사
    # text = re.sub('할미','할머니', text)
    # text = re.sub('티비','텔레비전', text)

    text = re.sub('애','아이', text)
    text = re.sub('ebs','이비에스', text)
    text = re.sub('앨','엘', text)
    text = re.sub('앰','엠', text)
    text = re.sub('전용학습기','패드', text)
    text = re.sub('태블릿','패드', text)
    text = re.sub('테블릿','패드', text)
    text = re.sub('빨간팬','빨간펜', text)
    text = re.sub('온니원','온리원', text)
    text = re.sub('아이켄두','아이캔두', text)
    text = re.sub('와캠','와이즈캠프', text)
    
    text = re.sub('4(세|살)', '유아', text)
    text = re.sub('5(세|살)', '유아', text)
    text = re.sub('6(세|살)', '유아', text)
    text = re.sub('7(세|살)', '유아', text)
    text = re.sub('예비\s?초등', '유아', text)
    text = re.sub('8(세|살)', '초등저', text)
    text = re.sub('9(세|살)', '초등저', text)
    text = re.sub('(초|초등)1', '초등저', text)
    text = re.sub('1학년', '초등저', text)
    text = re.sub('(초|초등)2', '초등저', text)
    text = re.sub('2학년', '초등저', text)
    text = re.sub('(초|초등)3', '초등저', text)
    text = re.sub('3학년', '초등저', text)
    text = re.sub('(초|초등)4', '초등고', text)
    text = re.sub('4학년', '초등고', text)
    text = re.sub('(초|초등)5', '초등고', text)
    text = re.sub('5학년', '초등고', text)
    text = re.sub('(초|초등)6', '초등고', text)
    text = re.sub('6학년', '초등고', text)
    text = re.sub('(초등|초등학교)\s?초등고', '초등고', text)
    text = re.sub('(초등|초등학교)\s?초등저', '초등저', text)

    return text

def company_re(text, word, chosung:bool, loc=None): 

# 전처리 함수 2 (초성, 특수기호)

# chosung : 앞 두글자만 적어 놓은 글을 정규식으로 정리 할 때 문제가 없나요 ?
# True : 확인, False : 두글자 확인 x (ex: 밀크티, 윙크 : 문제 있음. False 입력)
# loc : chosung이 False 일 때, 앞 두글자 중 정규식을 사용 할 위치 지정 
# 아이(캔두) : 아-ㅇ = 0, 이-ㅇ= 1 이므로 와이(즈캠프) 겹치지 않기 위해 loc=1 입력

# 영어는 모두 소문자화 했다고 가정 (lower())
    if chosung == False and loc is None:
        print(f"'초성 테스트를 False로 입력 시 loc값을 입력해 줘야 합니다. \n chosung = {chosung}, loc = {loc} \n loc은 정규식을 이용할 초성의 위치입니다. (ex. ㅁ크 -> 밀크 : loc = 0")
    elif chosung == False:
        if loc < 0 or loc > 1:
            print(f'loc값이 잘못되었습니다. 입력한 loc 값 : {loc}')

    if len(word) < 2:
        return f'!Error! 검색 단어가 너무 짧습니다. 검색 단어 : {word}'
        
    cho1 = CHOSUNG_extraction(word)[0]
    cho2 = CHOSUNG_extraction(word)[1]

    special_word = ['[*]','o','@', 'x', '0']
    

    if len(word) == 2:
        if chosung:
            text = re.sub(cho1+cho2, word[:], text)
            text = re.sub(cho1+word[1], word[:], text)
            text = re.sub(word[0]+cho2, word[:], text)

            for i in range(len(word)):
                for re_w in special_word:
                    re_sen = word[:i] + re_w + word[i+1:]

                    text = re.sub(re_sen,word,text)
        
            return text


        else:
            text = re.sub(cho1+cho2, word[:], text)

            if loc == 0:
                text = re.sub(cho1+word[1], word[:], text) # ㅇ이, 아이, text
                for re_w in special_word:
                    re_sen = re_w + word[1]
                    text = re.sub(re_sen, word[:], text)

            elif loc == 1:
                text = re.sub(word[0]+cho2, word[:], text) # 아ㅇ, 아이, text
                for re_w in special_word:
                    re_sen = word[0] + re_w
                    text = re.sub(re_sen, word[:], text)

            return text
        


    elif len(word) > 2:
        if chosung:
            # text = re.sub(cho1+cho2, word[0:2], text)
            # text = re.sub(cho1+word[1], word[0:2], text)
            # text = re.sub(word[0]+cho2, word[0:2], text)
            # text = re.sub(rf'{word[0:2]}(?!{word[2]})', word[:], text)

            test_text = re.sub(cho1+cho2, word[0:2], text)
            test_text = re.sub(cho1+word[1], word[0:2], text)
            test_text = re.sub(word[0]+cho2, word[0:2], text)
            if bool(re.compile(rf'{word[:2]}{word[2:]}').search(test_text)) or bool(re.compile(rf'{word[:2]}{CHOSUNG_extraction(word)[2:]}').search(test_text)):

                text = re.sub(cho1+cho2, word[0:2], text)
                text = re.sub(cho1+word[1], word[0:2], text)
                text = re.sub(word[0]+cho2, word[0:2], text)
            
                for i in range(len(word)):
                    for re_w in special_word:
                        re_sen = word[:i] + re_w + word[i+1:]
                        text = re.sub(re_sen,word,text)

                for i in range(1, len(word)):
                    for re_w in special_word:
                        re_sen = word.replace(word[i], re_w).replace(word[i-1], re_w)
                        text = re.sub(re_sen, word, text)

                if len(word) > 3:
                    for i in range(len(word)):
                        for re_w in special_word:
                            re_sen = word.replace(word[i], re_w).replace(word[i-1], re_w).replace(word[i-2], re_w)
                            text = re.sub(re_sen, word, text)

                return text
            
            else:
                return text


        else:
            test_text = re.sub(cho1+cho2, word[0:2], text)
            if loc == 0:
                # text = re.sub(cho1+word[1], word[0:2], text)
                # text = re.sub(rf'{word[0:2]}(?!{word[2]})', word[:], text)

                test_text = re.sub(cho1+word[1], word[0:2], text)
                if bool(re.compile(rf'{word[:2]}{word[2:]}').search(test_text)) or bool(re.compile(rf'{word[:2]}{CHOSUNG_extraction(word)[2:]}').search(test_text)):
                    text = re.sub(cho1+cho2, word[0:2], text)
                    text = re.sub(cho1+word[1], word[0:2], text)

                    for i in range(len(word)):
                        for re_w in special_word:
                            re_sen = word[:i] + re_w + word[i+1:]
                            text = re.sub(re_sen,word,text)

                    for i in range(1, len(word)):
                        for re_w in special_word:
                            re_sen = word.replace(word[i], re_w).replace(word[i-1], re_w)
                            text = re.sub(re_sen, word, text)

                    if len(word) > 3:
                        for i in range(len(word)):
                            for re_w in special_word:
                                re_sen = word.replace(word[i], re_w).replace(word[i-1], re_w).replace(word[i-2], re_w)
                                text = re.sub(re_sen, word, text)

                    return text
                
                else:
                    return text

            elif loc == 1:
                test_text = re.sub(word[0]+cho2, word[0:2], text)
                # text = re.sub(rf'{word[0:2]}(?!{word[2]})', word[:], text)
                if bool(re.compile(rf'{word[:2]}{word[2:]}').search(test_text)) or bool(re.compile(rf'{word[:2]}{CHOSUNG_extraction(word)[2:]}').search(test_text)) or bool(re.compile(rf'{word[:2]}{re_w}+' for re_w in special_word).search(test_text)):
                    text = re.sub(cho1+cho2, word[0:2], text)
                    text = re.sub(word[0]+cho2, word[0:2], text)

                    for i in range(len(word)):
                        for re_w in special_word:
                            re_sen = word[:i] + re_w + word[i+1:]
                            text = re.sub(re_sen,word,text)

                    for i in range(1, len(word)):
                        for re_w in special_word:
                            re_sen = word.replace(word[i], re_w).replace(word[i-1], re_w)
                            text = re.sub(re_sen, word, text)

                    if len(word) > 3:
                        for i in range(len(word)):
                            for re_w in special_word:
                                re_sen = word.replace(word[i], re_w).replace(word[i-1], re_w).replace(word[i-2], re_w)
                                text = re.sub(re_sen, word, text)

                    return text


def CHOSUNG_extraction(text):

    CHOSUNG_LIST = ['ㄱ', 'ㄲ', 'ㄴ', 'ㄷ', 'ㄸ', 'ㄹ', 'ㅁ', 'ㅂ', 
                    'ㅃ', 'ㅅ', 'ㅆ', 'ㅇ', 'ㅈ', 'ㅉ', 'ㅊ', 'ㅋ', 
                    'ㅌ', 'ㅍ', 'ㅎ']
    chosung_str = ''
    for w in list(text):
        if '가'<=w<='힣':
            # 588개 마다 초성이 바뀜.
            chosung_num = (ord(w) - ord('가'))//588
            chosung_str = chosung_str + CHOSUNG_LIST[chosung_num]

        else:
            chosung_str = chosung_str + w
            
    return chosung_str



In [None]:
# 전처리 함수 실행

df['text'] = df['text'].apply(combine_check)
df['title'] = df['title'].apply(combine_check)

for company in tqdm(company_list):
    
    # 전처리 예외처리 (*크)
    if company == '밀크티' or company == '윙크':
        df['text'] = df['text'].apply(lambda x : company_re(x, company, False, 0))
        df['title'] = df['title'].apply(lambda x : company_re(x, company, False, 0))

    # elif company == '와이즈캠프' or company == '아이캔두':
    #     df['text'] = df['text'].apply(lambda x : company_re(x, company, False, 1))
    #     df['title'] = df['title'].apply(lambda x : company_re(x, company, False, 1))

    else:
        df['text'] = df['text'].apply(lambda x : company_re(x, company, True))
        df['title'] = df['title'].apply(lambda x : company_re(x, company, True))

In [None]:
# 전처리 함수 실행 2 : 특수문자 제거, 오탈자 수정

def special_check(text):
    # 특수 기호 제거
    text = re.sub('[-=+,#/\?:^$@*\"※~&%ㆍ!』\\‘|\(\)\[\]\<\>`\'…》\n★▲;|]',' ', text)

    # '\E' 모양 제거
    text = re.sub('[\a-zA-Z]. ',' ',text)

    # # 이모티콘 제거
    # text = core.replace_emoji(text, replace='') # emoji 함수
    # text = text.encode('ascii', 'ignore').decode('ascii') # ascii 만 가능
    emoji_pattern = re.compile("["
        u"\U0001F600-\U0001F64F"  # emoticons
        u"\U0001F300-\U0001F5FF"  # symbols & pictographs
        u"\U0001F680-\U0001F6FF"  # transport & map symbols
        u"\U0001F1E0-\U0001F1FF"  # flags (iOS)
                           "]+", flags=re.UNICODE)
                           
    text = emoji_pattern.sub(r'', text)

    # 줄임맘 치환
    text = re.sub('와캠', '와이즈', text)
    text = re.sub('와이즈캠프', '와이즈', text)
    text = re.sub('싱크빅', '씽크빅', text)
    text = re.sub('웅진\s?씽크빅', '씽크빅', text)
    text = re.sub('웅진\s?스마트올', '스마트올', text)
    text = re.sub('이캔두', '아이캔두', text)

    # 자/모음만 남은 경우 제거
    text = re.sub('[ㄱ-ㅎ]+', '', text)
    text = re.sub('[ㅏ-ㅣ]+', '', text)
    
    return text

df['text'] = df['text'].apply(lambda x : special_check(x))
df['title'] = df['title'].apply(lambda x : special_check(x))

## DateFrame 전처리

In [None]:
# 키워드 존재 여부
# company_list.append('아이캔두')
company_list = ['밀크티', '엘리하이', '엠베스트','윙크', 
                '온리원', '홈런', '와이즈','스마트올', '아이캔두']

for key in tqdm(company_list):
    test_list = []

    for i in range(len(df)):
        if key in df['text'][i] or key in df['title'][i]: # 내용 혹은 제목에 key값이 들어 있는지
            test_list.append(True)

        else:
            test_list.append(False)

    df[key] = test_list # 데이터프레임에 'key' column 생성

In [None]:
# 모델에 들어갈 DataFrmae 형성

df_sentiment = pd.DataFrame()

for company in company_list:
    df_piece = df[df[company] == True]
    remove = ['board_name', 'cafe_id']
    # remove = list(df_piece.columns)
    # remove.remove('text')
    # remove.remove('title')
    # remove.remove('yyyy')
    # remove.remove('mm')
    df_piece.drop(remove, axis=1, inplace=True)
    df_piece['company'] = company

    df_sentiment = pd.concat([df_sentiment, df_piece], axis=0)

df_sentiment.reset_index(drop=True, inplace=True)
# df_sentiment

In [None]:
# 기간 설정) 설정값 : 2022.01 ~ 2023.03
df_sentiment_backup = df_sentiment.copy()

df_sentiment["date"] = pd.to_datetime(df_sentiment["yyyy"].astype(str) + "-" + df_sentiment["mm"].astype(str) + "-01")

### 날짜 ###
start_year = 2022
start_month = 1
end_year = 2023
end_month = 3

start_date = pd.to_datetime(f"{start_year}-{start_month}-01")
end_date = pd.to_datetime(f"{end_year}-{end_month}-01")
result = df_sentiment.loc[(df_sentiment['date'] >= start_date) & (df_sentiment['date'] < end_date)]
result.reset_index(drop=True, inplace=True)

# # 나이 설정
# baby_li = ['[0-7]살', '유치원', '유아']
# low_li = ['[8-9]살', '10살' '초저', '초1' ,'초2', '초3']
# high_li = ['1[1-3]살', '초고', '초4' ,'초5', '초6']

# list_ba = []
# list_lo = []
# list_hi = []
# element_li = []


# for i in range(len(result)):
#     list_ba.append(any(bool(re.compile(age).search(result['text'][i]) or re.compile(age).search(result['title'][i])) for age in baby_li))
# result['baby'] = list_ba

# for i in range(len(result)):
#     list_lo.append(any(bool(re.compile(age).search(result['text'][i]) or re.compile(age).search(result['title'][i])) for age in low_li))
# result['element_low'] = list_lo

# for i in range(len(result)):
#     list_hi.append(any(bool(re.compile(age).search(result['text'][i]) or re.compile(age).search(result['title'][i])) for age in high_li))
# result['element_high'] = list_hi

# element = ['초등'] +baby_li+low_li
# for i in range(len(result)):
#     element_li.append(any(bool(re.compile(age).search(result['text'][i]) or re.compile(age).search(result['title'][i])) for age in element))
# result['element'] = element_li

# # result = result.loc[(result['element_low'] == True) or (result['element_high'] == True)]

# # 초등학생만
# result = result[result['element']==True]

# 지역... 은 일단 생략

df_sentiment = pd.DataFrame(result)

# df_sentiment['company'].value_counts()

# 모델 (Sentiment)

In [None]:
# 긍/부정문 사전 load

posi_path = "./data/positive_words.txt"
nega_path = "./data/negative_words.txt"

with open(posi_path, encoding='utf-8') as f:
    positive_words = f.readlines()

with open(nega_path, encoding='utf-8') as f:
    negative_words = f.readlines()

positive_words = [line.rstrip('\n') for line in positive_words]
negative_words = [line.rstrip('\n') for line in negative_words]
# negative_words = negative_words.remove(['저는', '수'])

In [None]:
# 주제별 검색어 리스트 (key_dict) ['가격', '품질',' 컨텐츠', '평가', '아이', '학부모']

# 230418 수정 : +[학부모]워킹맘 -> 부모 관련 키워드 발견 및 추가
# 230418 수정 : +[학부모]선생, 교사, 관리 -> 교사(선생)의 학생 관리에 대한 만족도가 학부모 part에 있어야 할 듯 하다.

가격 = '가격'
가격_list = ['비용','가격','교육비','약정','돈','할인','계약','영사','금액','구매','값','위약금','해지','구입','영업','렌트','무료','체험']

품질 = '품질'
품질_list = ['패드', '스마트','기기','온라인','위약금','제품','렌트','키보드','갤럭시','오류','먹통','태블릿','시스템','퀄리티']

컨텐츠 = '컨텐츠'
컨텐츠_list = ['영어','수학','학습지','문제','과학','교재','국어',
        '인강','컨텐츠','사회','개념','한글','한자','영상','미술','파닉스',
        '논술','독해','심화','리딩','게임','프로그램','예체능','콘텐츠','동영상',
        '국사','역사','한국사','도서','자료','발음','원어민','미디어','동화','진단','구구단','난이도',
        '퀄리티','독후','코딩']

평가 = '평가'
평가_list = ['진도','평가','시험','만점','체크','실력','오답','중간','기말','진단','테스트','성적','풀이','채점','정답','등급','수행','경시대회']

아이 = '아이'
아이_list = ['아이','애','공부','학습','습관','우리','첫째','둘째','셋째','본인','초등학생','초등','초딩','중학생','중등','중딩','쌍둥이','자기','혼자','딸','아들']

학부모 = '학부모'
학부모_list = ['엄마','소개','지인','부모','자녀','남편','아빠','어머님','아버님','잔소리','신랑','어른','워킹맘', '선생', '교사', '관리']

key_dict = {가격:가격_list, 품질:품질_list, 컨텐츠:컨텐츠_list, 평가:평가_list, 아이:아이_list, 학부모:학부모_list}

In [None]:
# 모델 실행

all_time_start = time.time()

status = pd.DataFrame(columns=['Company'])
status['Company'] = company_list

count_df = pd.DataFrame(columns=['Company'])
count_df['Company'] = company_list

status_posi = pd.DataFrame(columns=['Company'])
status_posi['Company'] = company_list

status_nega = pd.DataFrame(columns=['Company'])
status_nega['Company'] = company_list

count_data = []
company_dict = {}
company_posi_dict = {}
company_nega_dict = {}

for subject in key_dict:
    keyword = key_dict.get(subject)

    print('-----------------------')
    if len(subject) == 2:
        print(f'| 검색 : {subject}          |')
    elif len(subject) == 3:
        print(f'| 검색 : {subject}        |')
    print('-----------------------')

    firm = df_sentiment.get('company')
    text = df_sentiment.get('text')
    text_key = []

    company = {firm[0]:text[0]}
    company_count = {firm[0]:0}
    num = 0
    for i in tqdm(range(len(firm))):
        
        if company.get(firm[i]) != None:
            
            if any(keyword in text[i] for keyword in keyword):

                company[firm[i]] = company.get(firm[i]) + "\n" + text[i]
                company_count[firm[i]] = company_count.get(firm[i]) + 1
                text_key.append(text[i])


            else:
                num += 1

        else:
            if any(keyword in text[i] for keyword in keyword):
                company[firm[i]] = text[i]
                company_count[firm[i]] = 1
                text_key.append(text[i])
            else:
                num += 1
    count_data.append(len(firm)-num)
    print(f'{len(firm)} 개의 데이터 중 {subject} 데이터 개수 : {len(firm) - num}')

    # 토큰화
    tokens=[]
    for f, t in company.items():
        token=word_tokenize(t)
        tokens.append([f,token])

    # sentiment 점수
    firm_words = {}
    firm_posi = {}
    firm_nega = {}
    sentiment_firm=[]
    posi_firm=[]
    nega_firm=[]
    round_ = 3
    posi_words = []
    nega_words = []

    start = time.time()

    for token in tokens:
        posi_word = []
        nega_word = []
        firm=token[0]
        sentiment=0
        count=0

        posi = 0
        nega = 0

        for t in tqdm((token[1])):
            if len(t) > 1:
                # if t in positive_words:
                if any(word in t for word in positive_words):
                    posi_word.append(t)
                    sentiment+=1
                    posi += 1
                    count+=1

                
                # elif t in negative_words:
                elif any(word in t for word in negative_words):
                    nega_word.append(t)
                    sentiment -=1
                    nega += 1
                    count+=1

            else:
                pass
            #     print(t, '테스트...')
            

        firm_words[firm] = posi_word + nega_word
        firm_posi[firm] = posi_word
        firm_nega[firm] = nega_word
        sentiment_firm.append([firm,round(sentiment/count,round_)])
        posi_firm.append([firm,round(posi/count,round_)])
        nega_firm.append([firm,round(nega/count,round_)])
        posi_words = posi_words + posi_word
        nega_words = nega_words + nega_word
 

        print(f'{firm} token 개수 : ', len(token[1]))
        print(f'{firm} sentiment filtering 횟수 : ', count)
    sentiment_words = posi_words + nega_words
    end = time.time()

    print('긍정점수 : ', posi_firm)
    print()
    print('부정점수 : ', nega_firm)
    print()
    print('종합점수 : ', sentiment_firm)
    print()
    # print('데이터 수 : ', len(sentiment_words))
    print('걸린 시간 : ', round(end - start, round_), '초')

    # 빈출어

    # all nouns
    nouns = []
    remove_nouns = []
    for i in (sentiment_words):
        nouns = nouns + mecab.nouns(i)

    for i in nouns:
        if len(i) < 2:
            remove_nouns.append(i)

    for i in remove_nouns:
        nouns.remove(i)

    print('all sentiment words : \n', Counter(nouns).most_common(20))

    # positive nouns
    nouns = []
    remove_nouns = []
    for i in (posi_words):
        nouns = nouns + mecab.nouns(i)

    for i in nouns:
        if len(i) < 2:
            remove_nouns.append(i)

    for i in remove_nouns:
        nouns.remove(i)

    print('positive sentiment words : \n', Counter(nouns).most_common(20))

    # negative nouns
    nouns = []
    remove_nouns = []
    for i in (nega_words):
        nouns = nouns + mecab.nouns(i)

    for i in nouns:
        if len(i) < 2:
            remove_nouns.append(i)

    for i in remove_nouns:
        nouns.remove(i)

    print('negative sentiment words : \n', Counter(nouns).most_common(20))

    # # 막대 그래프
    # a=[]
    # for firm in sentiment_firm:
    #     a.append(firm[1]*100)
    # X=np.arange(len(a))

    # plt.title(f"preference for {subject}(%)",fontsize=15)
    # # plt.xticks()
    # plt.ylim(0, 100)
    # plt.xticks([0, 1, 2], labels=company_list)
    # # plt.xlabel('..')
    # # bar = plt.bar(X,a, color= ['cornflowerblue','bisque','thistle'], alpha = 1.0, width=0.5) # 3개 회사만..
    # bar = plt.bar(X,a, alpha = 1.0, width=0.5)

    # for persent in bar:
    #     height = persent.get_height()
    #     plt.text(persent.get_x() + persent.get_width()/2.0, height, '%.1f' % height, ha='center', va='bottom', size = 10)

    # plt.show()

# Count DataFrame 만들기

    Count_list = []
    for count_key in company_list:
        Count_list.append(company_count[count_key])
    count_df[subject] = Count_list

#  sentiment dataframe 형성
    status_list = []
    for len_key in range(len(company_list)):
        status_list.append(sentiment_firm[len_key][1]*100)
    status[subject] = status_list
    # status[subject] = [sentiment_firm[0][1]*100,sentiment_firm[1][1]*100,sentiment_firm[2][1]*100]

    status_posi_list = []
    for len_key in range(len(company_list)):
        status_posi_list.append(posi_firm[len_key][1]*100)
    status_posi[subject] = status_posi_list

    status_nega_list = []
    for len_key in range(len(company_list)):
        status_nega_list.append(nega_firm[len_key][1]*100)
    status_nega[subject] = status_nega_list



    company_dict[subject] = firm_words
    company_posi_dict[subject] = firm_posi
    company_nega_dict[subject] = firm_nega



all_time_end = time.time()
print('총 걸린 시간 : ',round(all_time_end - all_time_start, round_), '초')

In [None]:
# 멍청하지말자... 실수하지말자... 화내지말자... 방어를 위해...
# status = backup_A.copy()
# status_posi = backup_B.copy()
# status_nega = backup_C.copy()

backup_A = status.copy()
backup_B = status_posi.copy()
backup_C = status_nega.copy()

In [None]:
# 분석 및 시각화용 DataFrame 형성 1 (Count)

# keyword 빈도 Count DataFrame
count_sum = ['합계']
for i in count_df.columns[1:]:
    count_sum.append(count_df[i].sum())
count_df.loc[9] = count_sum

sum_all = []
for i in range(len(count_df)):
    sum_all.append(count_df.iloc[i,1:].sum())
count_df['합계'] = sum_all

# 시각화

In [None]:
# count_df.rename(columns={'price' : '가격', 'quality' : '품질', 'contents' : '컨텐츠', 'test' : '평가', 'children' : '아이','parents' : '학부모', 'All' : '합계'}, inplace=True)
# count_df['Company'] = ['밀크티', '엘리하이' ,'엠베스트', '윙크', '온리원', '홈런', '와이즈캠프' ,'스마트올', '아이캔두', '합계']
# # count_df['Company'] = ['MilkT','Elihigh', 'Mbest', 'Wink', 'Onlyone', 'Homerun', 'Wisecamp', 'Smartall', 'Icando', 'All']
# count_df

In [None]:
# @@@@ 한글 폰트 깨져서 넣은 코드... 삭제예쩡
# count_df.rename(columns={'가격' : 'price', '품질' : 'quality','컨텐츠' : 'contents', '평가' : 'test', '아이' : 'children', '학부모' : 'parents', '합계' : 'All'}, inplace=True)
# count_df['Company'] = ['MilkT','Elihigh', 'Mbest', 'Wink', 'Onlyone', 'Homerun', 'Wisecamp', 'Smartall', 'Icando', 'All']
count_df.iloc[6,0] = '와이즈캠프'

df = count_df.set_index('Company')
# Company 시각화 용 dataframe 만들기

df.drop(index='합계', inplace=True)
df.sort_values('합계', ascending=False, inplace=True)

# Subject 시각화 용 dataframe 만들기
df2 = count_df.rename(columns={'Company' : 'Subject'})
df2.set_index('Subject', drop=True, inplace=True)
df2 = df2.T
df2.drop(index='합계', inplace=True)

In [None]:
# plot color

# colors_dic = {'MilkT' : '#0188c9', 'Elihigh' : '#00a8c3', 'Mbest' : '#1e972b', 'Wink' : '#dd3f74', 'Icando' : '#8b51a8', 'Onlyone' : '#f24444', 'Homerun' : '#f77400', 'Wisecamp' : '#e7e41f', 'Smartall' : '#f1c88e'}
colors_dic = {'밀크티' : '#0188c9', '엘리하이' : '#00a8c3', '엠베스트' : '#1e972b', '윙크' : '#dd3f74', '아이캔두' : '#8b51a8', '온리원' : '#f24444', '홈런' : '#f77400', '와이즈캠프' : '#e7e41f', '스마트올' : '#f1c88e'}
colors = []

for company in df.index:
    colors.append(colors_dic.get(company))

In [None]:
# Company pie plot

# Group
size_list = []
for i in df.index:
    for size in list(df.loc[i][:-1]):
        size_list.append(size)

group_names = df.index
group_size = df['합계']


# ax = plt.subplot()
plt.pie(group_size, labels=group_names, colors=colors, autopct='%.0f%%', )
plt.Figure(figsize=(20,10))

my_circle = plt.Circle( (0,0), 0.7, color = 'white')

p=plt.gcf() 
p.gca().add_artist(my_circle)
plt.title('Data Count by company')


now = datetime.now(timezone('Asia/Seoul'))
YY = now.strftime('%Y')
MM = now.strftime('%m')
DD = now.strftime('%d')
HH = now.strftime('%H')

plt.xlabel(f'총데이터 수  :  {group_size.sum()}개\n\n기준일시 : {YY}-{MM}-{DD} {HH}시', loc='right')

plt.savefig(str('./save_img/Count_Company'+now.strftime('%m%d'))+'.png')
plt.show()

In [None]:
# Subject pie plot

# Group
size_list = []
for i in df2.index:
    for size in list(df2.loc[i][:-1]):
        size_list.append(size)

group_names = df2.index
group_size = df2['합계']

colors_sns = sns.color_palette('hls',len(group_names))

# ax = plt.subplot()
plt.pie(group_size, labels=group_names, colors=colors_sns , autopct='%.0f%%', )
plt.Figure(figsize=(20,10))

my_circle = plt.Circle( (0,0), 0.7, color = 'white')

p=plt.gcf() 
p.gca().add_artist(my_circle)
plt.title('Data Count by subject')


now = datetime.now(timezone('Asia/Seoul'))
YY = now.strftime('%Y')
MM = now.strftime('%m')
DD = now.strftime('%d')
HH = now.strftime('%H')

plt.xlabel(f'총데이터 수  :  {group_size.sum()}개\n\n기준일시 : {YY}-{MM}-{DD} {HH}시', loc='right')

plt.savefig(str('./save_img/Count_Subject'+now.strftime('%m%d'))+'.png')
plt.show()

In [None]:
# MilkT pie plot

# Group
group_names = df.columns[:-1]
group_size = df.loc['밀크티'][:-1]

colors_sns = sns.color_palette('hls',len(group_names))

# ax = plt.subplot()
plt.pie(group_size, labels=group_names, colors=colors_sns , autopct='%.0f%%', )
plt.Figure(figsize=(20,10))

my_circle = plt.Circle( (0,0), 0.7, color = 'white')

p=plt.gcf() 
p.gca().add_artist(my_circle)
plt.title('Data Count by MilkT', fontsize=15)
plt.xlabel(f'Count All : {group_size.sum()}', fontsize=10, loc='right')

plt.show()

In [None]:
# # 한글 폰트 땜에 임시로 넣은...

# def imsi(df):
#     # @@@@ 한글 폰트 깨져서 임시로 넣은 코드... 삭제 예정
# #     df.rename(columns={'가격' : 'price', '품질' : 'quality','컨텐츠' : 'contents', '평가' : 'test', '아이' : 'children', '학부모' : 'parents'}, inplace=True)
# #     df['Company'] = ['MilkT','Elihigh', 'Mbest', 'Wink', 'Onlyone', 'Homerun', 'Wisecamp', 'Smartall', 'Icando']
#     df['Company'] = ['밀크티','엘리하이', '엠베스트', '윙크', '온리원', '홈런', '와이즈캠프', '스마트올', '아이캔두']

#     return df

# for stat in [status, status_posi, status_nega]:
#     stat = imsi(stat)

In [None]:
# 와이즈 -> 와이즈캠프로 바꾸기

def wise_camp(df):
    df['Company'] = ['밀크티','엘리하이', '엠베스트', '윙크', '온리원', '홈런', '와이즈캠프', '스마트올', '아이캔두']

    return df

for stat in [status, status_posi, status_nega]:
    stat = wise_camp(stat)

In [None]:
# # df scaling
# def Normalization_df(df):

#     status_data = df.iloc[:,1:]

#     mas = MaxAbsScaler()
#     mas.fit(status_data)
#     Mas_data = mas.transform(status_data)
#     Mas_data = pd.DataFrame(Mas_data)

#     columns_name = df.columns
#     df = pd.concat([df['Company'], Mas_data], axis=1)
#     df.columns = columns_name

#     # std = StandardScaler()
#     # std.fit(status_data)
#     # Stand_data = std.transform(status_data)
#     # Stand_data = pd.DataFrame(Stand_data)

#     # columns_name = df.columns
#     # df = pd.concat([df['Company'], Stand_data], axis=1)
#     # df.columns = columns_name

#     return df

# status = Normalization_df(status)
# status_posi = Normalization_df(status_posi)
# status_nega = Normalization_df(status_nega)

In [None]:
# sentiment DataFrame 가공

print('----전체----')
print('평균 : ', np.mean(backup_A.iloc[:,1:]))
# print('분산 : \n', np.var(status.iloc[:,1:]))
print('표준편차 \n: ', np.std(backup_A.iloc[:,1:]))

print('\n', '----긍정----')
print('평균 : ', np.mean(backup_B.iloc[:,1:]))
# print('분산 : \n', np.var(status_posi.iloc[:,1:]))
print('표준편차 : \n', np.std(backup_B.iloc[:,1:]))

print('\n', '----부정----')
print('평균 : ', np.mean(backup_C.iloc[:,1:]))
# print('분산 : \n', np.var(status_nega.iloc[:,1:]))
print('표준편차 : \n', np.std(backup_C.iloc[:,1:]))


result = pd.DataFrame()
for company in df.index[:5]:
    result = pd.concat([result, status[status['Company']==company]], axis=0)
status = result.copy()
status.reset_index(drop=True, inplace=True)


result = pd.DataFrame()
for company in df.index[:5]:
    result = pd.concat([result, status_posi[status_posi['Company']==company]], axis=0)
status_posi = result.copy()
status_posi.reset_index(drop=True, inplace=True)


result = pd.DataFrame()
for company in df.index[:5]:
    result = pd.concat([result, status_nega[status_nega['Company']==company]], axis=0)
status_nega = result.copy()
status_nega.reset_index(drop=True, inplace=True)

In [None]:
# Top 5 회사 비교 1 (sentiment)
X = status.columns[1:]

milkt = list(status.loc[0][1:])
elihigh = list(status.loc[1][1:])
wink = list(status.loc[2][1:])
wisecamp = list(status.loc[3][1:])
homerun = list(status.loc[4][1:])

fig, ax = plt.subplots(figsize=(12,7))

ax.plot(X, milkt)
ax.plot(X, elihigh)
ax.plot(X, wink)
ax.plot(X, wisecamp)
ax.plot(X, homerun)

ax.legend(status.Company, bbox_to_anchor=(1.2, 1.0), loc='upper right')
ax.set_title('Sentiment_Score', fontsize=20)


lotate_ = 8
ax.text(5.75, lotate_, '--평균--')
lotate_ -= 1.5

for col_sen in backup_A.columns[1:]:
    ax.text(5.3, lotate_, f'{col_sen} : ')
    ax.text(6.3, lotate_, round(backup_A[col_sen].mean(),3))
    lotate_ -= 1.8

lotate_ -= 1.8
ax.text(5.7, lotate_, '--표준 편차--')
lotate_ -= 1.8

for col_sen in backup_A.columns[1:]:
    ax.text(5.3, lotate_, f'{col_sen} : ')
    ax.text(6.3, lotate_, round(backup_A[col_sen].std(),3))
    lotate_ -= 1.8

now = datetime.now(timezone('Asia/Seoul'))
YY = now.strftime('%Y')
MM = now.strftime('%m')
DD = now.strftime('%d')
HH = now.strftime('%H')

plt.xlabel(f'\n기준일시 : {YY}-{MM}-{DD} {HH}시', loc='right')

plt.subplots_adjust(right=0.8)  # subplot 간의 간격
plt.savefig(str('./save_img/Top5_Sentiment'+now.strftime('%m%d'))+'.png', bbox_inches='tight')  # bbox_inches='tight' : 이미지가 잘리지 않도록
plt.ylim(-15,20)
plt.show()

In [None]:
# Top 5 회사 비교 2 (positive)
X = status_posi.columns[1:]

milkt = list(status_posi.loc[0][1:])
elihigh = list(status_posi.loc[1][1:])
wink = list(status_posi.loc[2][1:])
wisecamp = list(status_posi.loc[3][1:])
homerun = list(status_posi.loc[4][1:])

fig, ax = plt.subplots(figsize=(12,7))

ax.plot(X, milkt)
ax.plot(X, elihigh)
ax.plot(X, wink)
ax.plot(X, wisecamp)
ax.plot(X, homerun)

ax.legend(status_posi.Company, bbox_to_anchor=(1.2, 1.0), loc='upper right')
ax.set_title('Positive_Score', fontsize=20)

lotate_ = 53
ax.text(5.65, lotate_, '------평균------')
lotate_ -= 1

for index_ in [0,1,3,6,5]:
    ax.text(5.3, lotate_, f'{backup_B.Company[index_]} : ')
    ax.text(6.3, lotate_, round(backup_B.iloc[index_,1:].mean(),3))
    lotate_ -= 1
    
ax.text(5.3, lotate_-1, '전체 평균 : ')
ax.text(6.3, lotate_-1, round(np.mean(backup_B.iloc[:,1:]), 3))


now = datetime.now(timezone('Asia/Seoul'))
YY = now.strftime('%Y')
MM = now.strftime('%m')
DD = now.strftime('%d')
HH = now.strftime('%H')

plt.xlabel(f'\n기준일시 : {YY}-{MM}-{DD} {HH}시', loc='right')

plt.subplots_adjust(right=0.8)  # subplot 간의 간격
plt.savefig(str('./save_img/Top5_Sentiment(positive)'+now.strftime('%m%d'))+'.png', bbox_inches='tight')  # bbox_inches='tight' : 이미지가 잘리지 않도록
plt.ylim(42,58)

plt.show()

In [None]:
# Top 5 회사 비교 3 (negative)
X = status_nega.columns[1:]

milkt = list(status_nega.loc[0][1:])
elihigh = list(status_nega.loc[1][1:])
wink = list(status_nega.loc[2][1:])
wisecamp = list(status_nega.loc[3][1:])
homerun = list(status_nega.loc[4][1:])

fig, ax = plt.subplots(figsize=(12,7))

ax.plot(X, milkt)
ax.plot(X, elihigh)
ax.plot(X, wink)
ax.plot(X, wisecamp)
ax.plot(X, homerun)

ax.legend(status_nega.Company, bbox_to_anchor=(1.2, 1.0), loc='upper right')
ax.set_title('Negative_Score', fontsize=20)

lotate_ = 53
ax.text(5.65, lotate_, '------평균------')
lotate_ -= 1

for index_ in [0,1,3,6,5]:
    ax.text(5.3, lotate_, f'{backup_C.Company[index_]} : ')
    ax.text(6.3, lotate_, round(backup_C.iloc[index_,1:].mean(),3))
    lotate_ -= 1

ax.text(5.3, lotate_-1, '전체 평균 : ')
ax.text(6.3, lotate_-1, round(np.mean(backup_C.iloc[:,1:]), 3))    
    
now = datetime.now(timezone('Asia/Seoul'))
YY = now.strftime('%Y')
MM = now.strftime('%m')
DD = now.strftime('%d')
HH = now.strftime('%H')

plt.xlabel(f'\n기준일시 : {YY}-{MM}-{DD} {HH}시', loc='right')

plt.subplots_adjust(right=0.8)  # subplot 간의 간격
plt.savefig(str('./save_img/Top5_Sentiment(negative)'+now.strftime('%m%d'))+'.png', bbox_inches='tight')  # bbox_inches='tight' : 이미지가 잘리지 않도록
plt.ylim(42,58)

plt.show()

In [None]:
# S3Client.upload_s3_image('./save_img/Count_Subject2023042009.png', f"Yewon_test(count_subject).png")

In [None]:
Top5_Company = ['밀크티', '엘리하이', '와이즈', '윙크', '홈런']

In [None]:
# Top5 회사 sentiment
print('Top5 회사 sentiment')

noun_dict = {}
# for sangpoom in [0, 1, 6, 3, 5]:
for sangpoom in ['밀크티', '엘리하이', '와이즈', '윙크', '홈런']:
    
    sangpoom_dict = {}
    for juje in company_dict.keys():
        nouns = []
        remove_nouns = []
        for i in (company_dict[juje][sangpoom]):
            nouns = nouns + mecab.nouns(i)

        for i in nouns:
            if len(i) < 2:
                remove_nouns.append(i)

        for i in remove_nouns:
            nouns.remove(i)
            
        sangpoom_dict[juje] = nouns

        # print(company_dict[juje][sangpoom][0], f'({juje})', ' : ', Counter(nouns).most_common(20))
#         print(sangpoom, f'({juje})', ' : ', Counter(nouns).most_common(20))
#         print()

    noun_dict[sangpoom] = sangpoom_dict
    print('----------')

In [None]:
# Top5 회사 positive
print('Top5 회사 positive')

noun_posi_dict = {}
for sangpoom in ['밀크티', '엘리하이', '와이즈', '윙크', '홈런']:
    
    sangpoom_dict = {}
    for juje in company_posi_dict.keys():
        nouns = []
        remove_nouns = []
        for i in (company_posi_dict[juje][sangpoom]):
            nouns = nouns + mecab.nouns(i)

        for i in nouns:
            if len(i) < 2:
                remove_nouns.append(i)

        for i in remove_nouns:
            nouns.remove(i)
            
        sangpoom_dict[juje] = nouns

        print(sangpoom, f'({juje})', ' : ', Counter(nouns).most_common(20))
        print()
        
    noun_posi_dict[sangpoom] = sangpoom_dict
    print('----------')

In [None]:
# 긍정문 그래프 그리기 준비

new_noun_posi_dict = {}

for k, v in noun_posi_dict.items():
    for sub_k, sub_v in v.items():
        if sub_k not in new_noun_posi_dict:
            new_noun_posi_dict[sub_k] = {}
        new_noun_posi_dict[sub_k][k] = sub_v

for k, v in new_noun_posi_dict.items():
    sum_list = []
    for sub_k, sub_v in v.items():
        sum_list = sum_list + sub_v
    v['All'] = sum_list

In [None]:
# 그래프_positive (품질)

X = []
for i in range(20):
    X.append(Counter(new_noun_posi_dict['품질']['All']).most_common()[i][0])
    
test_friday = pd.DataFrame(columns = X)

for sangpoom in Top5_Company:
    y = {}
    for i in X:
        
        for A, B in Counter(new_noun_posi_dict['품질'][sangpoom]).most_common():
            if A == i:
                y[A] = B

    test_friday.loc[sangpoom] = y

X = test_friday.columns
test_friday.fillna(0, inplace=True)

# plot
milkt = test_friday.loc['밀크티']
elihigh = test_friday.loc['엘리하이']
wink = test_friday.loc['윙크']
wisecamp = test_friday.loc['와이즈']
homerun = test_friday.loc['홈런']

width = 0.3  # 각 막대 그래프의 너비 설정
x = np.arange(len(X))  # x 좌표 설정

fig, ax = plt.subplots(figsize=(10.5, 6))

ax.bar(x - width, test_friday.loc['밀크티'], width, color = '#0188c9')
ax.bar(x, test_friday.loc['엘리하이'], width, color = '#00a8c3')
ax.bar(x + width, test_friday.loc['와이즈'], width, color = '#e7e41f')
# ax.plot(X, wisecamp)
# ax.plot(X, homerun)

ax.legend(status.Company, bbox_to_anchor=(1.2, 1.0), loc='upper right')
ax.set_xticks(x)
ax.set_xticklabels(X)
ax.set_title('품질 (positive)', fontsize=16)

for i, val in enumerate(milkt):
    if elihigh[i]*1.5 >= val*2 or val >= elihigh[i]*3 :
        if wisecamp[i]*1.5 >= val*2 or val >= wisecamp[i]*3:
            ax.get_xticklabels()[i].set_color('red')
        else:
            ax.get_xticklabels()[i].set_color('blue')
        
    elif wisecamp[i]*1.5 >= val*2 or val >= wisecamp[i]*3 :
        ax.get_xticklabels()[i].set_color('orange')

now = datetime.now(timezone('Asia/Seoul'))
YY = now.strftime('%Y')
MM = now.strftime('%m')
DD = now.strftime('%d')
HH = now.strftime('%H')

plt.xlabel(f'\n기준일시 : {YY}-{MM}-{DD} {HH}시', loc='right')

# plt.savefig(str('./save_img/Most_nouns_positive(quality)'+now.strftime('%m%d'))+'.png', bbox_inches='tight')

plt.show()

In [None]:
# # 그래프_positive (아이)

X = []
for i in range(20):
    X.append(Counter(new_noun_posi_dict['아이']['All']).most_common()[i][0])
    
test_friday = pd.DataFrame(columns = X)

for sangpoom in Top5_Company:
    y = {}
    for i in X:
        
        for A, B in Counter(new_noun_posi_dict['아이'][sangpoom]).most_common():
            if A == i:
                y[A] = B
    
    
    test_friday.loc[sangpoom] = y
    
    
X = test_friday.columns
test_friday.fillna(0, inplace=True)
# plot
milkt = test_friday.loc['밀크티']
elihigh = test_friday.loc['엘리하이']
wink = test_friday.loc['윙크']
wisecamp = test_friday.loc['와이즈']
homerun = test_friday.loc['홈런']

width = 0.3  # 각 막대 그래프의 너비 설정
x = np.arange(len(X))  # x 좌표 설정

fig, ax = plt.subplots(figsize=(10.5, 6))

ax.bar(x - width, test_friday.loc['밀크티'], width, color = '#0188c9')
ax.bar(x, test_friday.loc['엘리하이'], width, color = '#00a8c3')
ax.bar(x + width, test_friday.loc['와이즈'], width, color = '#e7e41f')
# ax.plot(X, wisecamp)
# ax.plot(X, homerun)

ax.legend(status.Company, bbox_to_anchor=(1.2, 1.0), loc='upper right')
ax.set_xticks(x)
ax.set_xticklabels(X)
ax.set_title('아이 (positive)', fontsize=16)

for i, val in enumerate(milkt):
    if elihigh[i]*1.5 >= val*2 or val >= elihigh[i]*3 :
        if wisecamp[i]*1.5 >= val*2 or val >= wisecamp[i]*3:
            ax.get_xticklabels()[i].set_color('red')
        else:
            ax.get_xticklabels()[i].set_color('blue')
        
    elif wisecamp[i]*1.5 >= val*2 or val >= wisecamp[i]*3 :
        ax.get_xticklabels()[i].set_color('orange')

now = datetime.now(timezone('Asia/Seoul'))
YY = now.strftime('%Y')
MM = now.strftime('%m')
DD = now.strftime('%d')
HH = now.strftime('%H')

plt.xlabel(f'\n기준일시 : {YY}-{MM}-{DD} {HH}시', loc='right')

# plt.savefig(str('./save_img/Most_nouns_positive(children)'+now.strftime('%m%d'))+'.png', bbox_inches='tight')

plt.show()

In [None]:
# Top5 회사 negative
print('Top5 회사 negative')

noun_nega_dict = {}
for sangpoom in ['밀크티', '엘리하이', '와이즈', '윙크', '홈런']:
    
    sangpoom_dict = {}
    for juje in company_nega_dict.keys():
        nouns = []
        remove_nouns = []
        for i in (company_nega_dict[juje][sangpoom]):
            nouns = nouns + mecab.nouns(i)

        for i in nouns:
            if len(i) < 2:
                remove_nouns.append(i)

        for i in remove_nouns:
            nouns.remove(i)
            
        sangpoom_dict[juje] = nouns

        print(sangpoom, f'({juje})', ' : ', Counter(nouns).most_common(20))
        print()
        
    noun_nega_dict[sangpoom] = sangpoom_dict
    print('----------')

In [None]:
# 부정문 그래프 그리기 준비

new_noun_nega_dict = {}

for k, v in noun_nega_dict.items():
    for sub_k, sub_v in v.items():
        if sub_k not in new_noun_nega_dict:
            new_noun_nega_dict[sub_k] = {}
        new_noun_nega_dict[sub_k][k] = sub_v

for k, v in new_noun_nega_dict.items():
    sum_list = []
    for sub_k, sub_v in v.items():
        sum_list = sum_list + sub_v
    v['All'] = sum_list

In [None]:
# # 그래프_negative (품질)

X = []
for i in range(20):
    X.append(Counter(new_noun_nega_dict['품질']['All']).most_common()[i][0])
    
test_friday = pd.DataFrame(columns = X)

for sangpoom in Top5_Company:
    y = {}
    for i in X:
        
        for A, B in Counter(new_noun_nega_dict['품질'][sangpoom]).most_common():
            if A == i:
                y[A] = B

    test_friday.loc[sangpoom] = y

X = test_friday.columns
test_friday.fillna(0, inplace=True)

# plot
milkt = test_friday.loc['밀크티']
elihigh = test_friday.loc['엘리하이']
wink = test_friday.loc['윙크']
wisecamp = test_friday.loc['와이즈']
homerun = test_friday.loc['홈런']

width = 0.3  # 각 막대 그래프의 너비 설정
x = np.arange(len(X))  # x 좌표 설정

fig, ax = plt.subplots(figsize=(10.5, 6))

ax.bar(x - width, test_friday.loc['밀크티'], width, color = '#0188c9')
ax.bar(x, test_friday.loc['엘리하이'], width, color = '#00a8c3')
ax.bar(x + width, test_friday.loc['와이즈'], width, color = '#e7e41f')
# ax.plot(X, wisecamp)
# ax.plot(X, homerun)

ax.legend(status.Company, bbox_to_anchor=(1.2, 1.0), loc='upper right')
ax.set_xticks(x)
ax.set_xticklabels(X)
ax.set_title('품질 (negative)', fontsize=16)

for i, val in enumerate(milkt):
    if elihigh[i]*1.5 >= val*2 or val >= elihigh[i]*3 :
        if wisecamp[i]*1.5 >= val*2 or val >= wisecamp[i]*3:
            ax.get_xticklabels()[i].set_color('red')
        else:
            ax.get_xticklabels()[i].set_color('blue')
        
    elif wisecamp[i]*1.5 >= val*2 or val >= wisecamp[i]*3 :
        ax.get_xticklabels()[i].set_color('orange')

now = datetime.now(timezone('Asia/Seoul'))
YY = now.strftime('%Y')
MM = now.strftime('%m')
DD = now.strftime('%d')
HH = now.strftime('%H')

plt.xlabel(f'\n기준일시 : {YY}-{MM}-{DD} {HH}시', loc='right')

# plt.savefig(str('./save_img/Most_nouns_negative(quality)'+now.strftime('%m%d'))+'.png', bbox_inches='tight')

plt.show()

In [None]:
# # 그래프_negative (아이)

X = []
for i in range(20):
    X.append(Counter(new_noun_nega_dict['아이']['All']).most_common()[i][0])
    
test_friday = pd.DataFrame(columns = X)

for sangpoom in Top5_Company:
    y = {}
    for i in X:
        
        for A, B in Counter(new_noun_nega_dict['아이'][sangpoom]).most_common():
            if A == i:
                y[A] = B

    test_friday.loc[sangpoom] = y

X = test_friday.columns
test_friday.fillna(0, inplace=True)

# plot
milkt = test_friday.loc['밀크티']
elihigh = test_friday.loc['엘리하이']
wink = test_friday.loc['윙크']
wisecamp = test_friday.loc['와이즈']
homerun = test_friday.loc['홈런']

width = 0.3  # 각 막대 그래프의 너비 설정
x = np.arange(len(X))  # x 좌표 설정

fig, ax = plt.subplots(figsize=(10.5, 6))

ax.bar(x - width, test_friday.loc['밀크티'], width, color = '#0188c9')
ax.bar(x, test_friday.loc['엘리하이'], width, color = '#00a8c3')
ax.bar(x + width, test_friday.loc['와이즈'], width, color = '#e7e41f')
# ax.plot(X, wisecamp)
# ax.plot(X, homerun)

ax.legend(status.Company, bbox_to_anchor=(1.2, 1.0), loc='upper right')
ax.set_xticks(x)
ax.set_xticklabels(X)
ax.set_title('아이 (negative)', fontsize=16)

for i, val in enumerate(milkt):
    if elihigh[i]*1.5 >= val*2 or val >= elihigh[i]*3 :
        if wisecamp[i]*1.5 >= val*2 or val >= wisecamp[i]*3:
            ax.get_xticklabels()[i].set_color('red')
        else:
            ax.get_xticklabels()[i].set_color('blue')
        
    elif wisecamp[i]*1.5 >= val*2 or val >= wisecamp[i]*3 :
        ax.get_xticklabels()[i].set_color('orange')

now = datetime.now(timezone('Asia/Seoul'))
YY = now.strftime('%Y')
MM = now.strftime('%m')
DD = now.strftime('%d')
HH = now.strftime('%H')

plt.xlabel(f'\n기준일시 : {YY}-{MM}-{DD} {HH}시', loc='right')

# plt.savefig(str('./save_img/Most_nouns_negative(children)'+now.strftime('%m%d'))+'.png', bbox_inches='tight')

plt.show()

In [None]:
mentos = []

for i in range(len(df_sentiment)):
    if any(bingo in df_sentiment['text'][i] for bingo in list(key_dict.keys())) or any(bingo in df_sentiment['title'][i] for bingo in list(key_dict.keys())):
        mentos.append(True)

    else:
        mentos.append(False)

In [None]:
df_sentiment['yewonnning'] = mentos
df_sentiment['yewonnning'].value_counts()

# 모든 긍/부정 그래프

1. 긍정
    - 가격
    - 기기 품질
    - 컨텐츠
    - 평가
    - 아이
    - 부모

1. 부정
    - 가격
    - 기기 품질
    - 컨텐츠
    - 평가
    - 아이
    - 부모

## 긍정

In [None]:
# 긍정문 그래프 그리기 준비

new_noun_posi_dict = {}

for k, v in noun_posi_dict.items():
    for sub_k, sub_v in v.items():
        if sub_k not in new_noun_posi_dict:
            new_noun_posi_dict[sub_k] = {}
        new_noun_posi_dict[sub_k][k] = sub_v

for k, v in new_noun_posi_dict.items():
    sum_list = []
    for sub_k, sub_v in v.items():
        sum_list = sum_list + sub_v
    v['All'] = sum_list

### 가격

In [None]:
# 그래프_positive (가격)

X = []
for i in range(20):
    X.append(Counter(new_noun_posi_dict['가격']['All']).most_common()[i][0])
    
test_friday = pd.DataFrame(columns = X)

for sangpoom in Top5_Company:
    y = {}
    for i in X:
        
        for A, B in Counter(new_noun_posi_dict['가격'][sangpoom]).most_common():
            if A == i:
                y[A] = B

                
    test_friday.loc[sangpoom] = y

X = test_friday.columns
test_friday.fillna(0, inplace=True)

# plot
milkt = test_friday.loc['밀크티']
elihigh = test_friday.loc['엘리하이']
wink = test_friday.loc['윙크']
wisecamp = test_friday.loc['와이즈']
homerun = test_friday.loc['홈런']

width = 0.3  # 각 막대 그래프의 너비 설정
x = np.arange(len(X))  # x 좌표 설정

fig, ax = plt.subplots(figsize=(10.5, 6))

ax.bar(x - width, test_friday.loc['밀크티'], width, color = '#0188c9')
ax.bar(x, test_friday.loc['엘리하이'], width, color = '#00a8c3')
ax.bar(x + width, test_friday.loc['와이즈'], width, color = '#e7e41f')
# ax.plot(X, wisecamp)
# ax.plot(X, homerun)

ax.legend(status.Company, bbox_to_anchor=(1.2, 1.0), loc='upper right')
ax.set_xticks(x)
ax.set_xticklabels(X)
ax.set_title('가격 (positive)', fontsize=16)

for i, val in enumerate(milkt):
    if elihigh[i]*1.5 >= val*2 or val >= elihigh[i]*3 :
        if wisecamp[i]*1.5 >= val*2 or val >= wisecamp[i]*3:
            ax.get_xticklabels()[i].set_color('red')
        else:
            ax.get_xticklabels()[i].set_color('blue')
        
    elif wisecamp[i]*1.5 >= val*2 or val >= wisecamp[i]*3 :
        ax.get_xticklabels()[i].set_color('orange')

now = datetime.now(timezone('Asia/Seoul'))
YY = now.strftime('%Y')
MM = now.strftime('%m')
DD = now.strftime('%d')
HH = now.strftime('%H')

plt.xlabel(f'\n기준일시 : {YY}-{MM}-{DD} {HH}시', loc='right') 

plt.savefig(str('./save_img/Most_nouns_positive(price)'+now.strftime('%m%d'))+'.png', bbox_inches='tight')

plt.show()

### 품질

In [None]:
# 그래프_positive (품질)

X = []
for i in range(20):
    X.append(Counter(new_noun_posi_dict['품질']['All']).most_common()[i][0])
    
test_friday = pd.DataFrame(columns = X)

for sangpoom in Top5_Company:
    y = {}
    for i in X:
        
        for A, B in Counter(new_noun_posi_dict['품질'][sangpoom]).most_common():
            if A == i:
                y[A] = B

    test_friday.loc[sangpoom] = y

X = test_friday.columns
test_friday.fillna(0, inplace=True)

# plot
milkt = test_friday.loc['밀크티']
elihigh = test_friday.loc['엘리하이']
wink = test_friday.loc['윙크']
wisecamp = test_friday.loc['와이즈']
homerun = test_friday.loc['홈런']

width = 0.3  # 각 막대 그래프의 너비 설정
x = np.arange(len(X))  # x 좌표 설정

fig, ax = plt.subplots(figsize=(10.5, 6))

ax.bar(x - width, test_friday.loc['밀크티'], width, color = '#0188c9')
ax.bar(x, test_friday.loc['엘리하이'], width, color = '#00a8c3')
ax.bar(x + width, test_friday.loc['와이즈'], width, color = '#e7e41f')
# ax.plot(X, wisecamp)
# ax.plot(X, homerun)

ax.legend(status.Company, bbox_to_anchor=(1.2, 1.0), loc='upper right')
ax.set_xticks(x)
ax.set_xticklabels(X)
ax.set_title('품질 (positive)', fontsize=16)

for i, val in enumerate(milkt):
    if elihigh[i]*1.5 >= val*2 or val >= elihigh[i]*3 :
        if wisecamp[i]*1.5 >= val*2 or val >= wisecamp[i]*3:
            ax.get_xticklabels()[i].set_color('red')
        else:
            ax.get_xticklabels()[i].set_color('blue')
        
    elif wisecamp[i]*1.5 >= val*2 or val >= wisecamp[i]*3 :
        ax.get_xticklabels()[i].set_color('orange')

now = datetime.now(timezone('Asia/Seoul'))
YY = now.strftime('%Y')
MM = now.strftime('%m')
DD = now.strftime('%d')
HH = now.strftime('%H')

plt.xlabel(f'\n기준일시 : {YY}-{MM}-{DD} {HH}시', loc='right')

plt.savefig(str('./save_img/Most_nouns_positive(quality)'+now.strftime('%m%d'))+'.png', bbox_inches='tight')

plt.show()

### 컨텐츠

In [None]:
# # 그래프_positive (컨텐츠)


X = []
for i in range(20):
    X.append(Counter(new_noun_posi_dict['컨텐츠']['All']).most_common()[i][0])
    
test_friday = pd.DataFrame(columns = X)

for sangpoom in Top5_Company:
    y = {}
    for i in X:
        
        for A, B in Counter(new_noun_posi_dict['컨텐츠'][sangpoom]).most_common():
            if A == i:
                y[A] = B
        

    test_friday.loc[sangpoom] = y

X = test_friday.columns
test_friday.fillna(0, inplace=True)

# plot
milkt = test_friday.loc['밀크티']
elihigh = test_friday.loc['엘리하이']
wink = test_friday.loc['윙크']
wisecamp = test_friday.loc['와이즈']
homerun = test_friday.loc['홈런']

width = 0.3  # 각 막대 그래프의 너비 설정
x = np.arange(len(X))  # x 좌표 설정

fig, ax = plt.subplots(figsize=(10.5, 6))

ax.bar(x - width, test_friday.loc['밀크티'], width, color = '#0188c9')
ax.bar(x, test_friday.loc['엘리하이'], width, color = '#00a8c3')
ax.bar(x + width, test_friday.loc['와이즈'], width, color = '#e7e41f')
# ax.plot(X, wisecamp)
# ax.plot(X, homerun)

ax.legend(status.Company, bbox_to_anchor=(1.2, 1.0), loc='upper right')
ax.set_xticks(x)
ax.set_xticklabels(X)
ax.set_title('컨텐츠 (positive)', fontsize=16)

for i, val in enumerate(milkt):
    if elihigh[i]*1.5 >= val*2 or val >= elihigh[i]*3 :
        if wisecamp[i]*1.5 >= val*2 or val >= wisecamp[i]*3:
            ax.get_xticklabels()[i].set_color('red')
        else:
            ax.get_xticklabels()[i].set_color('blue')
        
    elif wisecamp[i]*1.5 >= val*2 or val >= wisecamp[i]*3 :
        ax.get_xticklabels()[i].set_color('orange')

now = datetime.now(timezone('Asia/Seoul'))
YY = now.strftime('%Y')
MM = now.strftime('%m')
DD = now.strftime('%d')
HH = now.strftime('%H')

plt.xlabel(f'\n기준일시 : {YY}-{MM}-{DD} {HH}시', loc='right')

plt.savefig(str('./save_img/Most_nouns_positive(contents)'+now.strftime('%m%d'))+'.png', bbox_inches='tight')

plt.show()

### 평가

In [None]:
# # 그래프_positive (평가)

X = []
for i in range(20):
    X.append(Counter(new_noun_posi_dict['평가']['All']).most_common()[i][0])
    
test_friday = pd.DataFrame(columns = X)

for sangpoom in Top5_Company:
    y = {}
    for i in X:
        
        for A, B in Counter(new_noun_posi_dict['평가'][sangpoom]).most_common():
            if A == i:
                y[A] = B
    
    
    test_friday.loc[sangpoom] = y
    
    
X = test_friday.columns
test_friday.fillna(0, inplace=True)
# plot
milkt = test_friday.loc['밀크티']
elihigh = test_friday.loc['엘리하이']
wink = test_friday.loc['윙크']
wisecamp = test_friday.loc['와이즈']
homerun = test_friday.loc['홈런']

width = 0.3  # 각 막대 그래프의 너비 설정
x = np.arange(len(X))  # x 좌표 설정

fig, ax = plt.subplots(figsize=(10.5, 6))

ax.bar(x - width, test_friday.loc['밀크티'], width, color = '#0188c9')
ax.bar(x, test_friday.loc['엘리하이'], width, color = '#00a8c3')
ax.bar(x + width, test_friday.loc['와이즈'], width, color = '#e7e41f')
# ax.plot(X, wisecamp)
# ax.plot(X, homerun)

ax.legend(status.Company, bbox_to_anchor=(1.2, 1.0), loc='upper right')
ax.set_xticks(x)
ax.set_xticklabels(X)
ax.set_title('평가 (positive)', fontsize=16)

for i, val in enumerate(milkt):
    if elihigh[i]*1.5 >= val*2 or val >= elihigh[i]*3 :
        if wisecamp[i]*1.5 >= val*2 or val >= wisecamp[i]*3:
            ax.get_xticklabels()[i].set_color('red')
        else:
            ax.get_xticklabels()[i].set_color('blue')
        
    elif wisecamp[i]*1.5 >= val*2 or val >= wisecamp[i]*3 :
        ax.get_xticklabels()[i].set_color('orange')

now = datetime.now(timezone('Asia/Seoul'))
YY = now.strftime('%Y')
MM = now.strftime('%m')
DD = now.strftime('%d')
HH = now.strftime('%H')

plt.xlabel(f'\n기준일시 : {YY}-{MM}-{DD} {HH}시', loc='right')

plt.savefig(str('./save_img/Most_nouns_positive(test)'+now.strftime('%m%d'))+'.png', bbox_inches='tight')

plt.show()

In [None]:
# # 그래프_positive (아이)

X = []
for i in range(20):
    X.append(Counter(new_noun_posi_dict['아이']['All']).most_common()[i][0])
    
test_friday = pd.DataFrame(columns = X)

for sangpoom in Top5_Company:
    y = {}
    for i in X:
        
        for A, B in Counter(new_noun_posi_dict['아이'][sangpoom]).most_common():
            if A == i:
                y[A] = B
    
    
    test_friday.loc[sangpoom] = y
    
    
X = test_friday.columns
test_friday.fillna(0, inplace=True)
# plot
milkt = test_friday.loc['밀크티']
elihigh = test_friday.loc['엘리하이']
wink = test_friday.loc['윙크']
wisecamp = test_friday.loc['와이즈']
homerun = test_friday.loc['홈런']

width = 0.3  # 각 막대 그래프의 너비 설정
x = np.arange(len(X))  # x 좌표 설정

fig, ax = plt.subplots(figsize=(10.5, 6))

ax.bar(x - width, test_friday.loc['밀크티'], width, color = '#0188c9')
ax.bar(x, test_friday.loc['엘리하이'], width, color = '#00a8c3')
ax.bar(x + width, test_friday.loc['와이즈'], width, color = '#e7e41f')
# ax.plot(X, wisecamp)
# ax.plot(X, homerun)

ax.legend(status.Company, bbox_to_anchor=(1.2, 1.0), loc='upper right')
ax.set_xticks(x)
ax.set_xticklabels(X)
ax.set_title('아이 (positive)', fontsize=16)

for i, val in enumerate(milkt):
    if elihigh[i]*1.5 >= val*2 or val >= elihigh[i]*3 :
        if wisecamp[i]*1.5 >= val*2 or val >= wisecamp[i]*3:
            ax.get_xticklabels()[i].set_color('red')
        else:
            ax.get_xticklabels()[i].set_color('blue')
        
    elif wisecamp[i]*1.5 >= val*2 or val >= wisecamp[i]*3 :
        ax.get_xticklabels()[i].set_color('orange')

now = datetime.now(timezone('Asia/Seoul'))
YY = now.strftime('%Y')
MM = now.strftime('%m')
DD = now.strftime('%d')
HH = now.strftime('%H')

plt.xlabel(f'\n기준일시 : {YY}-{MM}-{DD} {HH}시', loc='right')

plt.savefig(str('./save_img/Most_nouns_positive(children)'+now.strftime('%m%d'))+'.png', bbox_inches='tight')

plt.show()

In [None]:
# # 그래프_positive (학부모)

X = []
for i in range(20):
    X.append(Counter(new_noun_posi_dict['학부모']['All']).most_common()[i][0])
    
test_friday = pd.DataFrame(columns = X)

for sangpoom in Top5_Company:
    y = {}
    for i in X:
        
        for A, B in Counter(new_noun_posi_dict['학부모'][sangpoom]).most_common():
            if A == i:
                y[A] = B
    
    
    test_friday.loc[sangpoom] = y
    
    
X = test_friday.columns
test_friday.fillna(0, inplace=True)
# plot
milkt = test_friday.loc['밀크티']
elihigh = test_friday.loc['엘리하이']
wink = test_friday.loc['윙크']
wisecamp = test_friday.loc['와이즈']
homerun = test_friday.loc['홈런']

width = 0.3  # 각 막대 그래프의 너비 설정
x = np.arange(len(X))  # x 좌표 설정

fig, ax = plt.subplots(figsize=(10.5, 6))

ax.bar(x - width, test_friday.loc['밀크티'], width, color = '#0188c9')
ax.bar(x, test_friday.loc['엘리하이'], width, color = '#00a8c3')
ax.bar(x + width, test_friday.loc['와이즈'], width, color = '#e7e41f')
# ax.plot(X, wisecamp)
# ax.plot(X, homerun)

ax.legend(status.Company, bbox_to_anchor=(1.2, 1.0), loc='upper right')
ax.set_xticks(x)
ax.set_xticklabels(X)
ax.set_title('학부모 (positive)', fontsize=16)

for i, val in enumerate(milkt):
    if elihigh[i]*1.5 >= val*2 or val >= elihigh[i]*3 :
        if wisecamp[i]*1.5 >= val*2 or val >= wisecamp[i]*3:
            ax.get_xticklabels()[i].set_color('red')
        else:
            ax.get_xticklabels()[i].set_color('blue')
        
    elif wisecamp[i]*1.5 >= val*2 or val >= wisecamp[i]*3 :
        ax.get_xticklabels()[i].set_color('orange')

now = datetime.now(timezone('Asia/Seoul'))
YY = now.strftime('%Y')
MM = now.strftime('%m')
DD = now.strftime('%d')
HH = now.strftime('%H')

plt.xlabel(f'\n기준일시 : {YY}-{MM}-{DD} {HH}시', loc='right')

plt.savefig(str('./save_img/Most_nouns_positive(parents)'+now.strftime('%m%d'))+'.png', bbox_inches='tight')

plt.show()

## 부정

In [None]:
new_noun_nega_dict = {}

for k, v in noun_nega_dict.items():
    for sub_k, sub_v in v.items():
        if sub_k not in new_noun_nega_dict:
            new_noun_nega_dict[sub_k] = {}
        new_noun_nega_dict[sub_k][k] = sub_v

for k, v in new_noun_nega_dict.items():
    sum_list = []
    for sub_k, sub_v in v.items():
        sum_list = sum_list + sub_v
    v['All'] = sum_list

### 가격

In [None]:
# # 그래프_negative (가격)

X = []
for i in range(20):
    X.append(Counter(new_noun_nega_dict['가격']['All']).most_common()[i][0])
    
test_friday = pd.DataFrame(columns = X)

for sangpoom in Top5_Company:
    y = {}
    for i in X:
        
        for A, B in Counter(new_noun_nega_dict['가격'][sangpoom]).most_common():
            if A == i:
                y[A] = B

    test_friday.loc[sangpoom] = y

X = test_friday.columns
test_friday.fillna(0, inplace=True)

# plot
milkt = test_friday.loc['밀크티']
elihigh = test_friday.loc['엘리하이']
wink = test_friday.loc['윙크']
wisecamp = test_friday.loc['와이즈']
homerun = test_friday.loc['홈런']

width = 0.3  # 각 막대 그래프의 너비 설정
x = np.arange(len(X))  # x 좌표 설정

fig, ax = plt.subplots(figsize=(10.5, 6))

ax.bar(x - width, test_friday.loc['밀크티'], width, color = '#0188c9')
ax.bar(x, test_friday.loc['엘리하이'], width, color = '#00a8c3')
ax.bar(x + width, test_friday.loc['와이즈'], width, color = '#e7e41f')
# ax.plot(X, wisecamp)
# ax.plot(X, homerun)

ax.legend(status.Company, bbox_to_anchor=(1.2, 1.0), loc='upper right')
ax.set_xticks(x)
ax.set_xticklabels(X)
ax.set_title('가격 (negative)', fontsize=16)

for i, val in enumerate(milkt):
    if elihigh[i]*1.5 >= val*2 or val >= elihigh[i]*3 :
        if wisecamp[i]*1.5 >= val*2 or val >= wisecamp[i]*3:
            ax.get_xticklabels()[i].set_color('red')
        else:
            ax.get_xticklabels()[i].set_color('blue')
        
    elif wisecamp[i]*1.5 >= val*2 or val >= wisecamp[i]*3 :
        ax.get_xticklabels()[i].set_color('orange')

now = datetime.now(timezone('Asia/Seoul'))
YY = now.strftime('%Y')
MM = now.strftime('%m')
DD = now.strftime('%d')
HH = now.strftime('%H')

plt.xlabel(f'\n기준일시 : {YY}-{MM}-{DD} {HH}시', loc='right')

plt.savefig(str('./save_img/Most_nouns_negative(price)'+now.strftime('%m%d'))+'.png', bbox_inches='tight')

plt.show()

### 품질

In [None]:
# # 그래프_negative (품질)

X = []
for i in range(20):
    X.append(Counter(new_noun_nega_dict['품질']['All']).most_common()[i][0])
    
test_friday = pd.DataFrame(columns = X)

for sangpoom in Top5_Company:
    y = {}
    for i in X:
        
        for A, B in Counter(new_noun_nega_dict['품질'][sangpoom]).most_common():
            if A == i:
                y[A] = B

    test_friday.loc[sangpoom] = y

X = test_friday.columns
test_friday.fillna(0, inplace=True)

# plot
milkt = test_friday.loc['밀크티']
elihigh = test_friday.loc['엘리하이']
wink = test_friday.loc['윙크']
wisecamp = test_friday.loc['와이즈']
homerun = test_friday.loc['홈런']

width = 0.3  # 각 막대 그래프의 너비 설정
x = np.arange(len(X))  # x 좌표 설정

fig, ax = plt.subplots(figsize=(10.5, 6))

ax.bar(x - width, test_friday.loc['밀크티'], width, color = '#0188c9')
ax.bar(x, test_friday.loc['엘리하이'], width, color = '#00a8c3')
ax.bar(x + width, test_friday.loc['와이즈'], width, color = '#e7e41f')
# ax.plot(X, wisecamp)
# ax.plot(X, homerun)

ax.legend(status.Company, bbox_to_anchor=(1.2, 1.0), loc='upper right')
ax.set_xticks(x)
ax.set_xticklabels(X)
ax.set_title('품질 (negative)', fontsize=16)

for i, val in enumerate(milkt):
    if elihigh[i]*1.5 >= val*2 or val >= elihigh[i]*3 :
        if wisecamp[i]*1.5 >= val*2 or val >= wisecamp[i]*3:
            ax.get_xticklabels()[i].set_color('red')
        else:
            ax.get_xticklabels()[i].set_color('blue')
        
    elif wisecamp[i]*1.5 >= val*2 or val >= wisecamp[i]*3 :
        ax.get_xticklabels()[i].set_color('orange')

now = datetime.now(timezone('Asia/Seoul'))
YY = now.strftime('%Y')
MM = now.strftime('%m')
DD = now.strftime('%d')
HH = now.strftime('%H')

plt.xlabel(f'\n기준일시 : {YY}-{MM}-{DD} {HH}시', loc='right')

plt.savefig(str('./save_img/Most_nouns_negative(quality)'+now.strftime('%m%d'))+'.png', bbox_inches='tight')

plt.show()

### 건텐츠

In [None]:
# # 그래프_negative (컨텐츠)

X = []
for i in range(20):
    X.append(Counter(new_noun_nega_dict['컨텐츠']['All']).most_common()[i][0])
    
test_friday = pd.DataFrame(columns = X)

for sangpoom in Top5_Company:
    y = {}
    for i in X:
        
        for A, B in Counter(new_noun_nega_dict['컨텐츠'][sangpoom]).most_common():
            if A == i:
                y[A] = B

    test_friday.loc[sangpoom] = y

X = test_friday.columns
test_friday.fillna(0, inplace=True)

# plot
milkt = test_friday.loc['밀크티']
elihigh = test_friday.loc['엘리하이']
wink = test_friday.loc['윙크']
wisecamp = test_friday.loc['와이즈']
homerun = test_friday.loc['홈런']

width = 0.3  # 각 막대 그래프의 너비 설정
x = np.arange(len(X))  # x 좌표 설정

fig, ax = plt.subplots(figsize=(10.5, 6))

ax.bar(x - width, test_friday.loc['밀크티'], width, color = '#0188c9')
ax.bar(x, test_friday.loc['엘리하이'], width, color = '#00a8c3')
ax.bar(x + width, test_friday.loc['와이즈'], width, color = '#e7e41f')
# ax.plot(X, wisecamp)
# ax.plot(X, homerun)

ax.legend(status.Company, bbox_to_anchor=(1.2, 1.0), loc='upper right')
ax.set_xticks(x)
ax.set_xticklabels(X)
ax.set_title('컨텐츠 (negative)', fontsize=16)

for i, val in enumerate(milkt):
    if elihigh[i]*1.5 >= val*2 or val >= elihigh[i]*3 :
        if wisecamp[i]*1.5 >= val*2 or val >= wisecamp[i]*3:
            ax.get_xticklabels()[i].set_color('red')
        else:
            ax.get_xticklabels()[i].set_color('blue')
        
    elif wisecamp[i]*1.5 >= val*2 or val >= wisecamp[i]*3 :
        ax.get_xticklabels()[i].set_color('orange')

now = datetime.now(timezone('Asia/Seoul'))
YY = now.strftime('%Y')
MM = now.strftime('%m')
DD = now.strftime('%d')
HH = now.strftime('%H')

plt.xlabel(f'\n기준일시 : {YY}-{MM}-{DD} {HH}시', loc='right')

plt.savefig(str('./save_img/Most_nouns_negative(contents)'+now.strftime('%m%d'))+'.png', bbox_inches='tight')

plt.show()

### 평가

In [None]:
# # 그래프_negative (평가)

X = []
for i in range(20):
    X.append(Counter(new_noun_nega_dict['평가']['All']).most_common()[i][0])
    
test_friday = pd.DataFrame(columns = X)

for sangpoom in Top5_Company:
    y = {}
    for i in X:
        
        for A, B in Counter(new_noun_nega_dict['평가'][sangpoom]).most_common():
            if A == i:
                y[A] = B

    test_friday.loc[sangpoom] = y

X = test_friday.columns
test_friday.fillna(0, inplace=True)

# plot
milkt = test_friday.loc['밀크티']
elihigh = test_friday.loc['엘리하이']
wink = test_friday.loc['윙크']
wisecamp = test_friday.loc['와이즈']
homerun = test_friday.loc['홈런']

width = 0.3  # 각 막대 그래프의 너비 설정
x = np.arange(len(X))  # x 좌표 설정

fig, ax = plt.subplots(figsize=(10.5, 6))

ax.bar(x - width, test_friday.loc['밀크티'], width, color = '#0188c9')
ax.bar(x, test_friday.loc['엘리하이'], width, color = '#00a8c3')
ax.bar(x + width, test_friday.loc['와이즈'], width, color = '#e7e41f')
# ax.plot(X, wisecamp)
# ax.plot(X, homerun)

ax.legend(status.Company, bbox_to_anchor=(1.2, 1.0), loc='upper right')
ax.set_xticks(x)
ax.set_xticklabels(X)
ax.set_title('평가 (negative)', fontsize=16)

for i, val in enumerate(milkt):
    if elihigh[i]*1.5 >= val*2 or val >= elihigh[i]*3 :
        if wisecamp[i]*1.5 >= val*2 or val >= wisecamp[i]*3:
            ax.get_xticklabels()[i].set_color('red')
        else:
            ax.get_xticklabels()[i].set_color('blue')
        
    elif wisecamp[i]*1.5 >= val*2 or val >= wisecamp[i]*3 :
        ax.get_xticklabels()[i].set_color('orange')

now = datetime.now(timezone('Asia/Seoul'))
YY = now.strftime('%Y')
MM = now.strftime('%m')
DD = now.strftime('%d')
HH = now.strftime('%H')

plt.xlabel(f'\n기준일시 : {YY}-{MM}-{DD} {HH}시', loc='right')

plt.savefig(str('./save_img/Most_nouns_negative(test)'+now.strftime('%m%d'))+'.png', bbox_inches='tight')

plt.show()

### 아이

In [None]:
# # 그래프_negative (아이)

X = []
for i in range(20):
    X.append(Counter(new_noun_nega_dict['아이']['All']).most_common()[i][0])
    
test_friday = pd.DataFrame(columns = X)

for sangpoom in Top5_Company:
    y = {}
    for i in X:
        
        for A, B in Counter(new_noun_nega_dict['아이'][sangpoom]).most_common():
            if A == i:
                y[A] = B

    test_friday.loc[sangpoom] = y

X = test_friday.columns
test_friday.fillna(0, inplace=True)

# plot
milkt = test_friday.loc['밀크티']
elihigh = test_friday.loc['엘리하이']
wink = test_friday.loc['윙크']
wisecamp = test_friday.loc['와이즈']
homerun = test_friday.loc['홈런']

width = 0.3  # 각 막대 그래프의 너비 설정
x = np.arange(len(X))  # x 좌표 설정

fig, ax = plt.subplots(figsize=(10.5, 6))

ax.bar(x - width, test_friday.loc['밀크티'], width, color = '#0188c9')
ax.bar(x, test_friday.loc['엘리하이'], width, color = '#00a8c3')
ax.bar(x + width, test_friday.loc['와이즈'], width, color = '#e7e41f')
# ax.plot(X, wisecamp)
# ax.plot(X, homerun)

ax.legend(status.Company, bbox_to_anchor=(1.2, 1.0), loc='upper right')
ax.set_xticks(x)
ax.set_xticklabels(X)
ax.set_title('아이 (negative)', fontsize=16)

for i, val in enumerate(milkt):
    if elihigh[i]*1.5 >= val*2 or val >= elihigh[i]*3 :
        if wisecamp[i]*1.5 >= val*2 or val >= wisecamp[i]*3:
            ax.get_xticklabels()[i].set_color('red')
        else:
            ax.get_xticklabels()[i].set_color('blue')
        
    elif wisecamp[i]*1.5 >= val*2 or val >= wisecamp[i]*3 :
        ax.get_xticklabels()[i].set_color('orange')

now = datetime.now(timezone('Asia/Seoul'))
YY = now.strftime('%Y')
MM = now.strftime('%m')
DD = now.strftime('%d')
HH = now.strftime('%H')

plt.xlabel(f'\n기준일시 : {YY}-{MM}-{DD} {HH}시', loc='right')

plt.savefig(str('./save_img/Most_nouns_negative(children)'+now.strftime('%m%d'))+'.png', bbox_inches='tight')

plt.show()

### 학부모

In [None]:
# # 그래프_negative (학부모)

X = []
for i in range(20):
    X.append(Counter(new_noun_nega_dict['학부모']['All']).most_common()[i][0])
    
test_friday = pd.DataFrame(columns = X)

for sangpoom in Top5_Company:
    y = {}
    for i in X:
        
        for A, B in Counter(new_noun_nega_dict['학부모'][sangpoom]).most_common():
            if A == i:
                y[A] = B

    test_friday.loc[sangpoom] = y

X = test_friday.columns
test_friday.fillna(0, inplace=True)

# plot
milkt = test_friday.loc['밀크티']
elihigh = test_friday.loc['엘리하이']
wink = test_friday.loc['윙크']
wisecamp = test_friday.loc['와이즈']
homerun = test_friday.loc['홈런']

width = 0.3  # 각 막대 그래프의 너비 설정
x = np.arange(len(X))  # x 좌표 설정

fig, ax = plt.subplots(figsize=(10.5, 6))

ax.bar(x - width, test_friday.loc['밀크티'], width, color = '#0188c9')
ax.bar(x, test_friday.loc['엘리하이'], width, color = '#00a8c3')
ax.bar(x + width, test_friday.loc['와이즈'], width, color = '#e7e41f')
# ax.plot(X, wisecamp)
# ax.plot(X, homerun)

ax.legend(status.Company, bbox_to_anchor=(1.2, 1.0), loc='upper right')
ax.set_xticks(x)
ax.set_xticklabels(X)
ax.set_title('학부모 (negative)', fontsize=16)

for i, val in enumerate(milkt):
    if elihigh[i]*1.5 >= val*2 or val >= elihigh[i]*3 :
        if wisecamp[i]*1.5 >= val*2 or val >= wisecamp[i]*3:
            ax.get_xticklabels()[i].set_color('red')
        else:
            ax.get_xticklabels()[i].set_color('blue')
        
    elif wisecamp[i]*1.5 >= val*2 or val >= wisecamp[i]*3 :
        ax.get_xticklabels()[i].set_color('orange')

now = datetime.now(timezone('Asia/Seoul'))
YY = now.strftime('%Y')
MM = now.strftime('%m')
DD = now.strftime('%d')
HH = now.strftime('%H')

plt.xlabel(f'\n기준일시 : {YY}-{MM}-{DD} {HH}시', loc='right')

plt.savefig(str('./save_img/Most_nouns_negative(parents)'+now.strftime('%m%d'))+'.png', bbox_inches='tight')

plt.show()

In [None]:
len(df_sentiment_backup)

In [None]:
len(df_sentiment)