In [1]:
%matplotlib inline

import numpy as np
import matplotlib.pyplot as plt

import pandas as pd

import os
from os import path 
from time import time
import random

In [2]:
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "TRAS.settings")
os.environ["DJANGO_ALLOW_ASYNC_UNSAFE"] = "true"
import django
from django.core.exceptions import ObjectDoesNotExist
django.setup()
from django.db import transaction
from django.conf import settings

from django_pandas.io import read_frame

from proto.models import Attraction, Review

from nltk import tokenize
from nltk.sentiment import SentimentAnalyzer
from nltk.classify import NaiveBayesClassifier
from nltk.sentiment.util import *

from nltk.sentiment.vader import SentimentIntensityAnalyzer

from asgiref.sync import sync_to_async
import nltk

In [None]:
# 디장고 all 리뷰 데이터 불러오고. bulk load?
def sentiment(sentence_list):
    '''
    리뷰를 문장 단위로 쪼개놓은 데이터를 받아서
    각 문장에 대한 감정 분석을 한다.
    '''
    sid = SentimentIntensityAnalyzer()
    
    result_list = []
    for sentence in sentence_list:
        # print(sentence) 샘플만 작게 해서 테스트
        result_list.append((sentence ,sid.polarity_scores(sentence)['compound']))
        '''
        for k in sorted(ss):
            print('{0}: {1}, '.format(k, ss[k]), end='')
        print()
        '''
        #compound: 0.8316, neg: 0.0, neu: 0.254, pos: 0.746,
    return pd.DataFrame(result_list, columns=['sentence','sentiment'])

def sentences_extraction(sent_df, sample_size):
    sent_df.sort_values(by=['sentiment'],inplace=True)
    
    positive_sample = sent_df[-int(sample_size):][::-1]
    negative_sample = sent_df[:int(sample_size)]
    
    return (positive_sample, negative_sample)

#star기반 수축 감정 사전 구축

In [None]:
def attraction_sampling(Attractions_name, All_Reviews_df, start, end):
    result_dict = {}
    for Attraction_name in Attractions_name[start:end]:
        Reviews_df = All_Reviews_df[All_Reviews_df['attraction'] == Attraction_name]
        sentence_list = tokenize.sent_tokenize(" ".join([Review[4] for Review in Reviews_df.values]))
        sent_sent_df = sentiment(sentence_list)
        result_dict[Attraction_name] = sentences_extraction(sent_sent_df, 10)
        print(Attraction_name, " 감정분석 완료!")
    return result_dict        

In [3]:
Attractions_query = Attraction.objects.all()     

In [4]:
Attractions_df = read_frame(Attractions_query)
Attractions_df.to_csv("Attractions.csv")

In [5]:
Attractions_df.name

0                         Gyeongbokgung Palace
1                      Bukhansan National Park
2                   Myeongdong Shopping Street
3                         Changdeokgung Palace
4                    The War Memorial of Korea
                        ...                   
174                            Sungnyemun Gate
175    Hyundai Department Store Apgujeong Main
176                         Banpodaegyo Bridge
177                                COEX Center
178                           Museum Kimchikan
Name: name, Length: 179, dtype: object

In [8]:
Attractions_df.columns

Index(['id', 'name', 'big_sort', 'small_sort', 'address', 'latitude',
       'longitude', 'star_info', 'wordcloud'],
      dtype='object')

In [9]:
query = Review.objects.all()
All_Reviews_df = read_frame(query)
All_Reviews_df.to_csv("All_Reviews.csv")

In [10]:
review_count_by_attr = All_Reviews_df.groupby('attraction')['star'].agg(['count']).sort_values(by='count')
review_count_by_attr

Unnamed: 0_level_0,count
attraction,Unnamed: 1_level_1
Kwanghee Fasion Mall,1
Amore Pacific Museum of Art,2
apm Place,3
Dream Forest,3
Figure Museum,4
...,...
Noryangjin Fisheries Wholesale Market,298
National Museum of Korea,298
National Folk Museum of Korea,298
Dongdaemun Design Plaza (DDP),298


In [11]:
print(review_count_by_attr[review_count_by_attr['count'] < 20])

kill_list = list(review_count_by_attr[review_count_by_attr['count'] < 20].index)

                                               count
attraction                                          
Kwanghee Fasion Mall                               1
Amore Pacific Museum of Art                        2
apm Place                                          3
Dream Forest                                       3
Figure Museum                                      4
Dongdaemun Seonggwak Park                          4
Children’s Museum of National Museum of Korea      4
Gilsangsa Shrine                                   5
Gyeongui line forest                               5
D Museum                                           5
Seoul International Fireworks Festival             5
Eungbongsan Mountain                               5
Ice Gallery                                        5
Seoul Botanical Garden                             5
Ansan Jarak-gil                                    6
Kukkiwon World Taekwondo Headquarters              6
KBS On                                        

In [12]:
attr_star = All_Reviews_df.groupby('attraction')['star'].value_counts()

In [13]:
attr_star['Seocho Tourist Information Center']

star
5    21
Name: star, dtype: int64

In [14]:
star_dict = {name: attr_star[name] for name in Attractions_df.name if ((name in attr_star) and (name not in kill_list))}

In [15]:
star_distribution = {key: 100 * round(star / sum(star),2) for (key, star) in list(star_dict.items())}

In [16]:
print(3 in star_distribution['Bukhansan National Park'])
print(2 in star_distribution['Bukhansan National Park'])

True
False


In [17]:
for name in Attractions_df.name:
    if name in star_distribution:
        for n in range(1,5 +1):
            if n in star_distribution[name]:
                continue
            else:
                 star_distribution[name][n] = 0
                
star_distribution

{'Gyeongbokgung Palace': star
 5    64.0
 4    30.0
 3     6.0
 1     0.0
 2     0.0
 Name: star, dtype: float64,
 'Bukhansan National Park': star
 5    83.0
 4    15.0
 3     2.0
 1     1.0
 2     0.0
 Name: star, dtype: float64,
 'Myeongdong Shopping Street': star
 5    56.0
 4    34.0
 3     9.0
 1     0.0
 2     0.0
 Name: star, dtype: float64,
 'Changdeokgung Palace': star
 5    59.0
 4    32.0
 3     6.0
 2     2.0
 1     0.0
 Name: star, dtype: float64,
 'The War Memorial of Korea': star
 5    75.0
 4    22.0
 3     3.0
 1     1.0
 2     0.0
 Name: star, dtype: float64,
 'National Museum of Korea': star
 5    67.0
 4    26.0
 3     5.0
 2     2.0
 1     0.0
 Name: star, dtype: float64,
 'Insadong': star
 5    51.0
 4    37.0
 3    10.0
 2     1.0
 1     0.0
 Name: star, dtype: float64,
 'Trickeye Museum Seoul': star
 5    60.0
 4    26.0
 3    10.0
 1     2.0
 2     2.0
 Name: star, dtype: float64,
 'Lotte World Tower & Mall': star
 5    60.0
 4    26.0
 3    11.0
 2     3.0
 1 

In [18]:
def avg(name):
    avg_n = 0
    for n, value in star_distribution[name].items():
        avg_n += n * (value/100)
    return int(avg_n*100)/100

avg_dict = {name:avg(name) for name in Attractions_df.name if name in star_distribution}

In [19]:
sort_avg = sorted(avg_dict.items(), key=lambda x: x[1], reverse=True)
sort_avg 

[('Seocho Tourist Information Center', 5.0),
 ('Bukhansan National Park', 4.81),
 ("The War and Women's Human Rights Museum", 4.75),
 ('The War Memorial of Korea', 4.72),
 ('Jamsil Baseball Stadium', 4.71),
 ('Grevin Museum', 4.7),
 ('Seokchon Lake', 4.68),
 ('Korea Furniture Museum', 4.64),
 ('Seoul Lantern Festival', 4.61),
 ('Myeongdong NANTA Theater', 4.6),
 ('Seoul Metro', 4.59),
 ('Love Museum', 4.59),
 ('Gyeongbokgung Palace', 4.58),
 ('National Museum of Korea', 4.58),
 ('Bugaksan Seoul Fortress', 4.57),
 ('Majang Meat Market', 4.55),
 ('Seoul Central Mosque', 4.55),
 ('Jeongdong Theater', 4.55),
 ('KTX (Korea Train Express)', 4.54),
 ('Hongik University Street', 4.51),
 ('Bongeunsa Temple', 4.49),
 ('Arario Museum in Space', 4.49),
 ('Seoul City Wall', 4.47),
 ('Hongdae NANTA Theatre', 4.46),
 ('Yeouido Park', 4.45),
 ('Changdeokgung Palace', 4.44),
 ('Yeouido Hangang Park', 4.44),
 ('Inwangsan Mountain', 4.44),
 ('Myeongdong Shopping Street', 4.43),
 ('Lotte World Tower & Mal

In [20]:
for name in Attractions_df.name:
    if name in star_distribution:
        star_distribution[name]['avg']=avg_dict[name]
        star_distribution[name]['count']= review_count_by_attr['count'][name]

In [21]:
print(len(star_distribution.keys()))
update_list = []
for attraction in Attractions_query:
    name = attraction.name
    if name in star_distribution:
        attraction.star_info = star_distribution[name].to_json()
        update_list.append(attraction)

122


In [22]:
update_list[-5:]

[<Attraction: The War and Women's Human Rights Museum>,
 <Attraction: Hongdae NANTA Theatre>,
 <Attraction: D-Cube City>,
 <Attraction: The Blue House (Cheong Wa Dae)>,
 <Attraction: Jeongdong Theater>]

In [23]:
Attraction.objects.bulk_update(update_list, ['star_info'])

In [9]:
attraction_review_sample = attraction_sampling(Attractions_name, All_Reviews_df, start=0, end=10)

NameError: name 'attraction_sampling' is not defined

In [None]:
import pprint
pp = pprint.PrettyPrinter(indent = 4)

In [None]:
def pprint_review_sample(attraction_review_sample, i):
    key = list(attraction_review_sample.keys())[i]
    (pos_df, neg_df) = attraction_review_sample[key]
    print(key)
    for pos_sent in pos_df.values:
        print(pos_sent[1])
        print(pos_sent[0].split('.')[0])
    for neg_sent in neg_df.values:
        print(neg_sent[1])
        print(neg_sent[0].split('.')[0])

In [None]:
attraction_review_sample[]

In [None]:
pprint_review_sample(attraction_review_sample, 4)

In [10]:
result_list = []
for row in All_Reviews_df[::10][['star', 'text']].values:
    if row[0] == 5:
        tag = 'pos'
    elif row[0] == 4:
        tag = 'neu'
    else:
        tag = 'neg'
        
    for sentence in tokenize.sent_tokenize(row[1]):
        sentence = tokenize.word_tokenize(sentence)
        result_list.append((sentence, tag))
        
random.shuffle(result_list)
print(result_list[:50:10])
print(len(result_list))

[(['Very', 'windy', 'at', 'the', 'peak', '.'], 'pos'), (['The', 'buskers', 'put', 'in', 'their', 'heart', 'and', 'soul', 'in', 'their', 'performances.A', 'nice', 'way', 'to', 'spend', 'a', 'Saturday', 'evening', '.'], 'pos'), (['Being', '500', 'meters', 'up', 'is', 'not', 'something', 'you', 'do', 'every', 'day', '.'], 'pos'), (['What', 'makes', 'it', 'worse', 'was', 'that', 'the', 'Naver', 'Map', 'direction', 'did', "n't", 'bring', 'us', 'to', 'the', 'correct', 'spot', '.'], 'neg'), (['This', 'place', 'is', 'without', 'a', 'doubt', 'the', 'best', 'place', 'to', 'experience', 'Seoul', 'on', 'a', 'weekend', 'like', 'a', 'local', '.'], 'pos')]
7728


In [11]:
length = len(result_list)
training_docs = result_list[:int(length * 0.8)]

In [12]:
testing_docs = result_list[-100:]

human_testing_docs = []
for sentence, old_tag in testing_docs:
    print(" ".join(sentence), old_tag)
    d = int(input())
    if d == 1:
        tag = 'pos'
    elif d == 2:
        tag = 'neu'
    else:
        tag = 'neg'
    human_testing_docs.append((sentence,tag))

Lots of clothes , socks , bags , etc . neg
2
There 's an aquarium inside too which I was really tempted to go but I 'd just eaten a bunch of sushi and did n't need a reminder that I had probably eaten Nemo . neg
3
connects two hotels . neg
2
Definitely a must see ! pos
1
We accidentally came across this mall . neu
2
Do n't get confused , Quantity does not mean either Quality or a good price . neg
3
Do not forget your camera and the volunteers in costume are very happy to get a photo with you . pos
1
This area is great for the atmosphere , hotels , and variety of different kinds of restaurants and people you will find . pos
1
Overwhelmed by Korean youngers . pos
1
At a certain fee , they can prepare the seafood you bought from the market for you . neg
1
Be sure to reach earlier if you intend to visit the zoo and the various museums there since they close pretty early . neu
2
Fast , clean , and friendly staffs . pos
1
After getting familiarized we realized that there were so all Magic to

1
Would n't recommend coming all the way for this . neg
3
I advise you not to visit Korea House unless you sleep on money . neg
3
You will find Bonghwadae ( beacon mound ) at the top as it was once the center of capital defense . pos
1
Good way to spend an hour . neu
1
I would recommend to visit during day time until 8 PM . pos
2
I just feel bad that all were enclosed ... neu
3
It 's just a gate as part of the larger Gyeongbokgung Palace . neu
2


In [25]:
data = """Lots of clothes , socks , bags , etc . neg
2
There 's an aquarium inside too which I was really tempted to go but I 'd just eaten a bunch of sushi and did n't need a reminder that I had probably eaten Nemo . neg
3
connects two hotels . neg
2
Definitely a must see ! pos
1
We accidentally came across this mall . neu
2
Do n't get confused , Quantity does not mean either Quality or a good price . neg
3
Do not forget your camera and the volunteers in costume are very happy to get a photo with you . pos
1
This area is great for the atmosphere , hotels , and variety of different kinds of restaurants and people you will find . pos
1
Overwhelmed by Korean youngers . pos
1
At a certain fee , they can prepare the seafood you bought from the market for you . neg
1
Be sure to reach earlier if you intend to visit the zoo and the various museums there since they close pretty early . neu
2
Fast , clean , and friendly staffs . pos
1
After getting familiarized we realized that there were so all Magic tour tickets available allowing to skip the line . neg
3
Other reviews have mentioned the interactive hands free headset . pos
1
A great way to thoroughly enjoy this palace is to emerge yourself into the culture ! pos
1
The street itself also quite artsy with some mural and street performance during night . pos
1
There are a lot of shops to choose from . pos
2
Again as before , go with a guide and this palace comes to life.Be prepared to walk , the treasure is the building architecture and the history . pos
1
This branch is the Main & Biggest one . neu
2
Even you can shop some cosmetics ( which is the same store that you can find in Myeongdong ! ) pos
1
There is an old building and a new building next to each other . pos
2
However I am disappointed towards Youll . neg
3
Here we share some day & night photos shot . neu
1
They will push you to buy fresh seafood and will bring you to a bunch of restaurants upstairs and charge you exorbitantly to cook it . neg
3
Was looking forward to coming here but after marvelling at the structure and sculptures outside we were a bit bemused by the inside . neg
3
Nice place . pos
1
Overall a very nice day trip in conjunction with a walk down to Independence Park . neu
1
It 's a residence area . neg
2
I 've never been in a library that is soo huge . pos
1
It was beautifully restored and displayed . neu
1
However , if you need to buy something inside the prices are extremely reasonable . neu
1
If this was all there was in the area then it would be great , but there are some very impressive “ competitors ” nearby . neg
2
The prices are not cheap though but we order 2 set menus consisting of about 10 items which cost about Kwn69,000/- . neu
3
When we got on the bus to go to the tower , I didn ’ t realize that it was at the top of a hill/mountain so I was surprised that we didn ’ t just walk , but I was thankful as we made our way up the windy roads that walking wasn ’ t happening ! pos
1
So if you want to know the other side of Seoul that is more interesting , just stay in Hongdae ! pos
1
Worth a visit ! pos
1
Queues were small to get up and down the tower though . neu
3
? The owner gave us a shopping bag and wrote our name in korean . neg
1
Main problem anywhere in Korea is the language barrier . neu
3
You can bargain with for price . neu
2
I forgot the name . neu
2
It 's a well-planned mall for a younger audience with many amenities for events such as the Seoul St. Patty 's Day festival . neu
1
Inside the Seoul metro area I think it is very competitive with cars and riding is generally as fast as driving.English is everywhere and am English speaker can easily use the metro , though specifics like express trains and branch line service can be hard to spot without a little Korean . neu
1
Small portions , so if you have a big appetite ... ... ? ? ? ? ? ? ? ? ? ? ? neu
3
They have a really interesting library in the middle of the COEX . pos
1
After a brief lesson , you use brush and ink to write the Hangeul word on canvas . pos
1
You can also check out the Hongdae Free Market on Saturdays . pos
1
You can see the whole of Seoul from up here . pos
1
There are 11 floors with top 3 of them dedicated to Duty Free shopping . pos
1
However , fee was charged for camel / pony riding , amusement park and character world.Upon entrance , we were greeted by the awesome autumn scenery with colorful orange , yellow and red leaves on the tall trees . neu
1
Well worth the $ 22 admission price . neu
1
This temple has so much more to offer than the more popular spots in Seoul.Gyeongbokgung Palace was just ... big , while the Temple was wonderfully ornate and less crowded . pos
1
The exhibits and exhibition style was interesting too . neu
1
This place is a quick walkthrough compared to other palaces in Seoul , but it 's worth a visit if you have been to the other palaces . neu
1
Lots of food affordable food available as well . neu
1
Goods sold here tend to be cheaper than what you can find in Meyongdong . neg
1
Nevertheless , the museum is a great chronology of Korean history told through wars . pos
1
The parking garage was cold.- I appreciated that they had us line up in order . neu
2
I think the best experience is to grab any street food you like and munch on them while shopping . pos
1
I definitely recommend the casino , but please note that it is a rather small facility and so finding a table can be very hard with the right limits ( unless you 're a high limits player ) . pos
1
There is a free museum under the statue that mentions Yi Sunshin one of Korea 's greatest heroes and other important exhibits about important dates in Korean history . pos
1
This is also near Pungmoon High School and Choong Ang High School where you can reminisce the drama : ) pos
1
Must do the tower and get a great view of the city . pos
1
It also started pissing it down raining . pos
2
Many shop and food have been changed to modern style but still you can find Korean special style . pos
1
There are several theaters that display either holograms or 3-D presentations . pos
1
Really enjoyed the enjoyed the gardens which this building is situated . pos
1
nice museum close to the gyongborksung palace . neu
1
But , you have to pay for 11,000 won to go up the observatory to see the whole country view . pos
3
He was suspected to die with a complication of diabetes . neu
2
They even have a museum at the top floor . neu
1
Do n't worry it so safe : ) pos
1
There are many trails to go to the top of the mountain . neu
2
No entrance fee at all.Boungeunsa Temple is a peaceful retreat that is nestled in the heart of one of the busiest location in Seoul . pos
1
The area is mostly full of ladies stuffs and other home decorations , accessories etc.. pos
1
Would recommend the greenbean pancake , gimbap and bibimbap there ! pos
1
There were a lot of police lined up at the street at that time , so it was interesting in that the old and the new were happening in one place . neu
1
The down side is , it 's mostly only in Korean ! neu
1
We were lucky as there was a traditional korean music performance on the palace grounds as well , so we got to experience that ? ? ? pos
1
The museum is very interested , especially considering that it focuses on modern history and economic development . neu
1
Created by the admiral , this vessel was the world 's best assault ship of the time . pos
1
Even u will find toilets , fitness pos
1
Thus the only option that I notice is sulbing bingsu shop or bring some snacks from the lotte mart outside . neg
3
If you like fresh and good quality seafood this is a total must ! pos
1
It does not disappoint ! pos
1
I also liberated a young man who had been locked in a cell by his ‘ friends ’ . neu
2
Especially if you want to watch a play with your lover , friend or family , and go to a nice and intimate restaurant for dinner , do n't hesitate to visit Daehakro . pos
1
! neg
2
It has a wide variety of artifacts including ancient ones . neg
1
Connect this tour with your geokbong palace tour . pos
2
Buy a shirt , support a team and hope that they win ! pos
1
The place is small and crowded . neg
3
While walking the ground of the shrine , I said to her “ ah-reum-dap-da ” which was to compliment it to be such a pretty and serene place . pos
1
Would n't recommend coming all the way for this . neg
3
I advise you not to visit Korea House unless you sleep on money . neg
3
You will find Bonghwadae ( beacon mound ) at the top as it was once the center of capital defense . pos
1
Good way to spend an hour . neu
1
I would recommend to visit during day time until 8 PM . pos
2
I just feel bad that all were enclosed ... neu
3
It 's just a gate as part of the larger Gyeongbokgung Palace . neu
2"""

In [27]:
h_test_list = []
sentence_of_n = ""
for line in data.split('\n'):
    try:
        tag_num = int(line)
        if tag_num == 1:
            tag = 'pos'
        elif tag_num == 2:
            tag = 'neu'
        else:
            tag = 'neg'
        h_test_list.append((sentence_of_n, tag))
    except ValueError:
        sentence_of_n = line
h_test_list[:5]

[('Lots of clothes , socks , bags , etc . neg', 'neu'),
 ("There 's an aquarium inside too which I was really tempted to go but I 'd just eaten a bunch of sushi and did n't need a reminder that I had probably eaten Nemo . neg",
  'neg'),
 ('connects two hotels . neg', 'neu'),
 ('Definitely a must see ! pos', 'pos'),
 ('We accidentally came across this mall . neu', 'neu')]

In [23]:
for sentence, tag in h_testing_docs:
    print(sentence, tag)

['Very', 'windy', 'at', 'the', 'peak', '.'] neg
['Traffic', 'is', 'not', 'so', 'busy', 'as', 'the', 'main', 'streets', '.'] neg
['It', "'s", 'a', 'bit', 'expensive', 'comparing', 'to', 'what', 'the', 'aquarium', 'offers', '.'] neg
['There', "'s", 'also', 'some', 'kind', 'of', 'performance', 'in', 'the', 'park', '.'] neg
['Lots', 'of', 'fun', 'with', 'a', 'Disney', 'like', 'flair', 'with', 'little', 'props', 'and', 'stages', 'for', 'the', 'kids.There', 'are', 'drink', 'machines', 'all', 'along', 'the', '``', 'caves', "''", 'inside', ',', 'so', 'you', 'will', 'not', 'go', 'thirsty', ',', 'and', 'plenty', 'of', 'staff', 'around', 'to', 'help', 'at', 'any', 'time', '.'] neg
['Especially', 'enjoyed', 'the', 'displays', 'of', 'traditional', 'banquests', 'and', 'a', 'wedding', 'ceremony', '.'] neg
['I', 'loved', 'the', 'fact', 'that', 'the', 'you', 'are', 'in', 'the', 'city', 'and', 'immersed', 'in', 'nature', 'and', 'history', 'at', 'the', 'same', 'time', '.'] neg
['The', 'grounds', 'were', 

In [29]:
sentim_analyzer = SentimentAnalyzer()
all_words_neg = sentim_analyzer.all_words([mark_negation(doc) for doc in training_docs])
unigram_feats = sentim_analyzer.unigram_word_feats(all_words_neg, min_freq=4)
sentim_analyzer.add_feat_extractor(extract_unigram_feats, unigrams=unigram_feats)

training_set = sentim_analyzer.apply_features(training_docs)

trainer = NaiveBayesClassifier.train

classifier = sentim_analyzer.train(trainer, training_set, save_classifier = "naiveBayes_uni")

AttributeError: 'SentimentAnalyzer' object has no attribute 'bigram_word_feats'

In [28]:
h_test_set = sentim_analyzer.apply_features(h_test_list)
for key,value in sorted(sentim_analyzer.evaluate(h_test_set).items()):
    print('{0}: {1}'.format(key, value))

Evaluating NaiveBayesClassifier results...
Accuracy: 0.22
F-measure [neg]: 0.21621621621621623
F-measure [neu]: 0.31578947368421056
F-measure [pos]: 0.08823529411764706
Precision [neg]: 0.19047619047619047
Precision [neu]: 0.2
Precision [pos]: 0.75
Recall [neg]: 0.25
Recall [neu]: 0.75
Recall [pos]: 0.046875


In [None]:
uni n그램

데이터 크기

triner 나이브 베이지안 말고?

In [None]:
# 아래는 리뷰가 다 사라졌길래 다시 bulk create한 내용

In [None]:
def review_date(date_string):
    dict = {
        ' January ':'-01-01',
        ' February ':'-02-01',
        ' March ':'-03-01',
        ' April ':'-04-01',
        ' May ':'-05-01',
        ' June ':'-06-01',
        ' July ':'-07-01',
        ' August ':'-08-01',
        ' September ':'-09-01',
        ' October ':'-10-01',
        ' November ':'-11-01',
        ' December ':'-12-01',
    }
    
    return date_string[-4:] + dict[date_string[:-4]]

In [None]:
review_path = 'C:/Users/taehee/Documents/GitHub/tripReviewAnalysisSystem/크롤러-전처리/원시자료/'
Attraction_list = Attraction.objects.all()
Reviews_list = []
for the_Attraction in Attraction_list:
    Attraction_Reviews_df = pd.read_csv(review_path+ the_Attraction.name+".csv", engine='python', index_col = 0, encoding = 'utf-8')
    for Review_list in Attraction_Reviews_df.values[2:]:
        Reviews_list.append(Review(attraction = the_Attraction,
                       star = Review_list[0],
                      title = Review_list[1],
                      text = Review_list[2],
                      date = review_date(Review_list[3])))

In [None]:
len(Reviews_list)

In [None]:
for num in range(19):
    start = (num-1)*1000
    if start <0:
        start = 0
    end = num*1000
    if end > 18534:
        end = 18534
    Review.objects.bulk_create(Reviews_list[start:end])

In [None]:
max_len = 0
max_text = ""
for Review in Reviews_list:
    if len(Review.text) > max_len:
        max_text = Review.text
        max_len = len(Review.text)

In [None]:
print(max_len)