In [1]:
from collections import defaultdict
from glob import glob
import os
import re
import time

from lxml import html
import numpy as np
import pandas as pd
import requests

#import utils
#sudo pip install utils 설치
import json


BASEURL     = 'http://movie.naver.com/movie/point/af/list.nhn'
RATINGURL   = BASEURL + '?&page=%s'
MOVIEURL    = BASEURL + '?st=mcode&target=after&sword=%s&page=%s'

DATADIR     = 'data/ratings'
INDEXFILE   = 'index.txt'
TMPFILE     = 'data/ratings_all.txt'
RATINGSFILE = 'data/ratings.txt'
SEED        = 1234
SLEEP       = 600
NDOCS       = 200000


extract_nums = lambda s: re.search('\d+', s).group(0)
sanitize_str = lambda s: s.strip()




def parse_item(item):
    try:
        return {'review_id': item.xpath('./td[@class="ac num"]/text()')[0],     # num
                'rating': item.xpath('./td[@class="point"]/text()')[0],         # point
                'movie_id': extract_nums(item.xpath('./td[@class="title"]/a/@href')[0]),
                'review': sanitize_str(' '.join(item.xpath('./td[@class="title"]/text()'))),   
                'author': item.xpath('./td[@class="num"]/a/text()')[0],
                'date': item.xpath('./td[@class="num"]/text()')[0]
        }
    except (IndexError, AttributeError) as e:
        print(e, item.xpath('.//text()'))
        return None
    except (AssertionError) as e:
        print(e, 'Sleep for %s' % SLEEP)
        time.sleep(SLEEP)
    except Exception as e:
        print(e, '음 여기까진 생각을 못했는데...')


def crawl_rating_page(url):
    resp = requests.get(url)
    root = html.fromstring(resp.text)
    items = root.xpath('//body//table[@class="list_netizen"]//tr')[1:]
    npages = max(map(int, ([0] + root.xpath('//div[@class="paging"]//a/span/text()'))))
    return list(filter(None, [parse_item(item) for item in items])), npages


def write_json(items,filenames):
    with open(filenames,'w') as outfile:      #wb -> w 로 바꿈.
        json.dump(items,outfile)

def write_txt(contents, filename):
    with open(filename,'w') as f:
        f.write(contents)

def read_txt(filename):
    with open(filename,'r') as f:
        x = f.read()
        return x

def read_json(filenames):
    with open(filenames) as f:
        return json.loads(f.read())



In [2]:
def crawl_movie(movie_id):
    items = []
    for page_num in range(10):  # limit to 100 recent ratings per movie
        url = MOVIEURL % (movie_id, page_num + 1)
        page_items, npages = crawl_rating_page(url)
        items.extend(page_items)
        if len(items)==0:
            return []
        if page_num >= npages - 1:
            break
    if items:
        #utils.write_json(items, '%s/%s.json' % (DATADIR, movie_id))
        write_json(items, '%s/%s.json' % (DATADIR, movie_id))
        return items
    else:
        return []

def get_index(filename):
    if os.path.exists(filename):
        #movie_id, total = map(int, utils.read_txt(filename).split('\n')[0].split(','))
        movie_id, total = map(int, read_txt(filename).split('\n')[0].split(','))
    else:
        movie_id, total = 129406, 0
    print(movie_id, total)
    return [movie_id, total]


def put_index(movie_id, total, filename):
    #utils.write_txt('%s,%s' % (movie_id, total), filename)
    write_txt('%s,%s' % (movie_id, total), filename)



In [3]:
def merge_ratings():

    def balance_classes(df, ndocs_per_class):
        df_pos = df[df['label']==1][:int(ndocs_per_class)]
        df_neg = df[df['label']==0][:int(ndocs_per_class)]
        return df_pos.append(df_neg)


    sub_space = lambda s: re.sub('\s+', ' ', s)
    write_row = lambda l, f: f.write('\t'.join(l) + '\n')

    filenames = glob('%s/*' % DATADIR)
    with open(TMPFILE, 'w') as f:
        write_row('id document label'.split(), f)
        for filename in filenames:
            #for review in utils.read_json(filename):
            for review in read_json(filename):
                rating = int(review['rating'])
                if rating > 8:      # positive 9, 10       
                    write_row([review['review_id'], sub_space(review['review']), '1'], f)
                elif rating < 5:    # negative 1, 2, 3, 4
                    write_row([review['review_id'], sub_space(review['review']), '0'], f)
                else:               # neutral (중립)
                    pass
    print('Ratings merged to %s' % TMPFILE)

    df = pd.read_csv(TMPFILE, sep='\t', quoting=3)
    df = df.fillna('')
    np.random.seed(SEED)
    df = df.iloc[np.random.permutation(len(df))]
    df = balance_classes(df, NDOCS/2)
    df.to_csv(RATINGSFILE, sep='\t', index=False)
    print('Ratings written to %s' % RATINGSFILE)

In [5]:
if __name__=='__main__':
    movie_id, total = get_index(INDEXFILE)
    Ntotal=1000000        # 시간 많이 걸림! 유의!
    while total < Ntotal and movie_id > 0:      
        items = crawl_movie(movie_id)
        total += len(items)
        put_index(movie_id, total, INDEXFILE)
        print(MOVIEURL % (movie_id, 1), len(items), total)
        movie_id -= 1
    merge_ratings()

129406 0
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=129406&page=1 100 100
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=129405&page=1 100 200
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=129404&page=1 0 200
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=129403&page=1 0 200
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=129402&page=1 0 200
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=129401&page=1 0 200
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=129400&page=1 0 200
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=129399&page=1 0 200
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=129398&page=1 0 200
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=129397&page=1 0 200
http://movie.naver.com/movie/point/af

http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=129319&page=1 0 990
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=129318&page=1 0 990
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=129317&page=1 0 990
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=129316&page=1 0 990
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=129315&page=1 0 990
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=129314&page=1 0 990
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=129313&page=1 0 990
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=129312&page=1 0 990
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=129311&page=1 0 990
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=129310&page=1 0 990
http://movie.naver.com/movie/point/af/list.nhn?st=

http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=129233&page=1 0 1308
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=129232&page=1 0 1308
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=129231&page=1 0 1308
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=129230&page=1 100 1408
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=129229&page=1 0 1408
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=129228&page=1 0 1408
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=129227&page=1 0 1408
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=129226&page=1 0 1408
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=129225&page=1 0 1408
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=129224&page=1 0 1408
http://movie.naver.com/movie/point/af/

http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=129147&page=1 0 1884
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=129146&page=1 0 1884
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=129145&page=1 0 1884
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=129144&page=1 0 1884
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=129143&page=1 0 1884
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=129142&page=1 0 1884
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=129141&page=1 0 1884
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=129140&page=1 34 1918
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=129139&page=1 0 1918
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=129138&page=1 0 1918
http://movie.naver.com/movie/point/af/l

http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=129060&page=1 0 2781
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=129059&page=1 0 2781
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=129058&page=1 1 2782
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=129057&page=1 0 2782
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=129056&page=1 0 2782
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=129055&page=1 49 2831
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=129054&page=1 0 2831
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=129053&page=1 0 2831
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=129052&page=1 15 2846
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=129051&page=1 100 2946
http://movie.naver.com/movie/point/a

http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128974&page=1 0 3703
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128973&page=1 0 3703
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128972&page=1 0 3703
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128971&page=1 0 3703
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128970&page=1 0 3703
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128969&page=1 0 3703
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128968&page=1 5 3708
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128967&page=1 0 3708
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128966&page=1 0 3708
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128965&page=1 0 3708
http://movie.naver.com/movie/point/af/li

http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128888&page=1 0 3806
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128887&page=1 0 3806
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128886&page=1 0 3806
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128885&page=1 0 3806
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128884&page=1 0 3806
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128883&page=1 0 3806
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128882&page=1 0 3806
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128881&page=1 0 3806
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128880&page=1 0 3806
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128879&page=1 0 3806
http://movie.naver.com/movie/point/af/li

http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128802&page=1 0 3807
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128801&page=1 0 3807
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128800&page=1 0 3807
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128799&page=1 0 3807
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128798&page=1 0 3807
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128797&page=1 0 3807
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128796&page=1 0 3807
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128795&page=1 0 3807
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128794&page=1 0 3807
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128793&page=1 0 3807
http://movie.naver.com/movie/point/af/li

http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128715&page=1 0 3931
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128714&page=1 0 3931
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128713&page=1 0 3931
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128712&page=1 0 3931
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128711&page=1 0 3931
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128710&page=1 0 3931
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128709&page=1 0 3931
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128708&page=1 0 3931
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128707&page=1 0 3931
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128706&page=1 0 3931
http://movie.naver.com/movie/point/af/li

http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128628&page=1 0 3962
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128627&page=1 0 3962
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128626&page=1 0 3962
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128625&page=1 0 3962
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128624&page=1 0 3962
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128623&page=1 0 3962
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128622&page=1 0 3962
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128621&page=1 0 3962
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128620&page=1 0 3962
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128619&page=1 0 3962
http://movie.naver.com/movie/point/af/li

http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128541&page=1 0 4133
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128540&page=1 0 4133
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128539&page=1 0 4133
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128538&page=1 0 4133
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128537&page=1 0 4133
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128536&page=1 0 4133
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128535&page=1 0 4133
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128534&page=1 0 4133
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128533&page=1 0 4133
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128532&page=1 0 4133
http://movie.naver.com/movie/point/af/li

http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128455&page=1 0 4214
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128454&page=1 0 4214
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128453&page=1 0 4214
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128452&page=1 0 4214
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128451&page=1 0 4214
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128450&page=1 0 4214
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128449&page=1 0 4214
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128448&page=1 0 4214
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128447&page=1 0 4214
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128446&page=1 0 4214
http://movie.naver.com/movie/point/af/li

http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128368&page=1 0 4319
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128367&page=1 0 4319
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128366&page=1 0 4319
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128365&page=1 0 4319
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128364&page=1 0 4319
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128363&page=1 0 4319
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128362&page=1 0 4319
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128361&page=1 0 4319
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128360&page=1 0 4319
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128359&page=1 0 4319
http://movie.naver.com/movie/point/af/li

http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128281&page=1 0 4513
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128280&page=1 0 4513
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128279&page=1 0 4513
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128278&page=1 0 4513
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128277&page=1 0 4513
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128276&page=1 0 4513
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128275&page=1 0 4513
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128274&page=1 0 4513
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128273&page=1 100 4613
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128272&page=1 0 4613
http://movie.naver.com/movie/point/af/

http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128194&page=1 0 5356
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128193&page=1 0 5356
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128192&page=1 0 5356
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128191&page=1 0 5356
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128190&page=1 0 5356
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128189&page=1 0 5356
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128188&page=1 0 5356
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128187&page=1 0 5356
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128186&page=1 0 5356
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128185&page=1 0 5356
http://movie.naver.com/movie/point/af/li

http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128107&page=1 0 5397
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128106&page=1 0 5397
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128105&page=1 0 5397
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128104&page=1 0 5397
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128103&page=1 0 5397
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128102&page=1 0 5397
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128101&page=1 0 5397
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128100&page=1 0 5397
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128099&page=1 0 5397
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128098&page=1 0 5397
http://movie.naver.com/movie/point/af/li

http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128021&page=1 0 5397
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128020&page=1 0 5397
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128019&page=1 0 5397
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128018&page=1 0 5397
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128017&page=1 0 5397
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128016&page=1 0 5397
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128015&page=1 0 5397
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128014&page=1 0 5397
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128013&page=1 0 5397
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=128012&page=1 0 5397
http://movie.naver.com/movie/point/af/li

http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=127935&page=1 0 5731
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=127934&page=1 0 5731
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=127933&page=1 0 5731
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=127932&page=1 0 5731
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=127931&page=1 0 5731
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=127930&page=1 0 5731
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=127929&page=1 0 5731
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=127928&page=1 0 5731
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=127927&page=1 0 5731
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=127926&page=1 100 5831
http://movie.naver.com/movie/point/af/

http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=127848&page=1 0 6406
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=127847&page=1 0 6406
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=127846&page=1 0 6406
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=127845&page=1 0 6406
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=127844&page=1 0 6406
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=127843&page=1 0 6406
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=127842&page=1 0 6406
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=127841&page=1 100 6506
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=127840&page=1 0 6506
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=127839&page=1 0 6506
http://movie.naver.com/movie/point/af/

http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=127761&page=1 0 6721
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=127760&page=1 0 6721
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=127759&page=1 0 6721
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=127758&page=1 16 6737
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=127757&page=1 0 6737
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=127756&page=1 0 6737
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=127755&page=1 0 6737
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=127754&page=1 0 6737
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=127753&page=1 0 6737
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=127752&page=1 0 6737
http://movie.naver.com/movie/point/af/l

http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=127674&page=1 0 6919
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=127673&page=1 0 6919
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=127672&page=1 0 6919
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=127671&page=1 0 6919
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=127670&page=1 0 6919
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=127669&page=1 0 6919
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=127668&page=1 0 6919
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=127667&page=1 0 6919
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=127666&page=1 0 6919
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=127665&page=1 0 6919
http://movie.naver.com/movie/point/af/li

http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=127588&page=1 0 6926
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=127587&page=1 0 6926
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=127586&page=1 0 6926
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=127585&page=1 0 6926
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=127584&page=1 0 6926
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=127583&page=1 0 6926
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=127582&page=1 0 6926
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=127581&page=1 0 6926
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=127580&page=1 0 6926
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=127579&page=1 0 6926
http://movie.naver.com/movie/point/af/li

http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=127502&page=1 0 6926
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=127501&page=1 0 6926
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=127500&page=1 0 6926
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=127499&page=1 0 6926
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=127498&page=1 0 6926
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=127497&page=1 0 6926
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=127496&page=1 100 7026
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=127495&page=1 0 7026
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=127494&page=1 0 7026
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=127493&page=1 0 7026
http://movie.naver.com/movie/point/af/

http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=127416&page=1 0 7463
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=127415&page=1 0 7463
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=127414&page=1 0 7463
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=127413&page=1 0 7463
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=127412&page=1 0 7463
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=127411&page=1 0 7463
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=127410&page=1 0 7463
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=127409&page=1 0 7463
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=127408&page=1 0 7463
http://movie.naver.com/movie/point/af/list.nhn?st=mcode&target=after&sword=127407&page=1 0 7463
http://movie.naver.com/movie/point/af/li

In [5]:
####### test data, train data ######
import numpy as np; np.random.seed(1234)
import pandas as pd


ntrain = 150000      #20만개 중 15만 개를 train, 5만개를 test set 에 지정.

data = pd.read_csv('data/ratings.txt', sep='\t', quoting=3)
data = pd.DataFrame(np.random.permutation(data))
trn, tst = data[:ntrain], data[ntrain:]

header = 'id document label'.split()
trn.to_csv('data/ratings_train.txt', sep='\t', index=False, header=header)
tst.to_csv('data/ratings_test.txt', sep='\t', index=False, header=header)

In [4]:
import konlpy      #sudo pip install konlpy
import jpype
from konlpy.tag import Twitter
#from konlpy.tag import Okt

import pandas as pd
import numpy as np

pos_tagger = Twitter()

def tokenize(doc):
    return ['/'.join(t) for t in pos_tagger.pos(doc, norm=True, stem=True)]

def read_raw_data(filename):
    with open(filename, 'r', encoding='utf-8') as f:
        print('loading data')
        data = [line.split('\t') for line in f.read().splitlines()]

        print('pos tagging to token')
        data = [(tokenize(row[1]), int(row[2])) for row in data[1:]]
    return data

  warn('"Twitter" has changed to "Okt" since KoNLPy v0.4.5.')


In [5]:

def build_vocab(tokens):
    print('building vocabulary')
    vocab = dict()
    vocab['#UNKOWN'] = 0
    vocab['#PAD'] = 1
    for t in tokens:
        if t not in vocab:
            vocab[t] = len(vocab)
    return vocab

def get_token_id(token, vocab):
    if token in vocab:
        return vocab[token]
    else:
        0 # unkown

def build_input(data, vocab):

    def get_onehot(index, size):
        onehot = [0] * size
        onehot[index] = 1
        return onehot

    print('building input')
    result = []
    for d in data:
        sequence = [get_token_id(t, vocab) for t in d[0]]
        while len(sequence) > 0:
            seq_seg = sequence[:60]
            sequence = sequence[60:]

            padding = [1] *(60 - len(seq_seg))
            seq_seg = seq_seg + padding

            result.append((seq_seg, get_onehot(d[1], 2)))

    return result 

def save_data(filename, data):
    def make_csv_str(d):
        output = '%d' % d[0]
        for index in d[1:]:
            output = '%s,%d' % (output, index)
        return output

    with open(filename, 'w', encoding='utf-8') as f:
        for d in data:
            data_str = make_csv_str(d[0])
            label_str = make_csv_str(d[1])
            f.write (data_str + '\n')
            f.write (label_str + '\n')

def save_vocab(filename, vocab):
    with open(filename, 'w', encoding='utf-8') as f:
        for v in vocab:
            f.write('%s\t%d\n' % (v, vocab[v]))
            
def load_data(filename):
    result = []
    with open(filename, 'r', encoding='utf-8') as f:
        lines = f.readlines()
        for i in range(int(len(lines)/2)):
            data = lines[i*2]
            label = lines[i*2 + 1]

            result.append(([int(s) for s in data.split(',')], [int(s) for s in label.split(',')]))
    return result

def load_vocab(filename):
    result = dict()
    with open(filename, 'r', encoding='utf-8') as f:
        for line in f.readlines():
            ls = line.split('\t')
            result[ls[0]] = int(ls[1])
    return result


In [None]:
if __name__ == '__main__':
    data = read_raw_data('data/ratings_train.txt')
    tokens = [t for d in data for t in d[0]]
    vocab = build_vocab(tokens)
    d = build_input(data, vocab)
    
    save_data('data/test_data.txt', d)
    save_vocab('data/test_vocab.txt', vocab)

    d2 = load_data('data/test_data.txt')
    vocab2 = load_vocab('data/test_vocab.txt')

    assert(len(d2) == len(d))
    for i in range(len(d)):
        assert(len(d2[i]) ==  len(d[i]))
        for j in range(len(d[i])):
            assert(d2[i][j] == d[i][j])

    for index in vocab:
        assert(vocab2[index] == vocab[index])

loading data
pos tagging to token


In [6]:
import numpy as np
import tensorflow as tf

class TextCNN(object):
    """
    A CNN for text classification.
    Uses an embedding layer, followed by a convolutional, max-pooling and softmax layer.
    <Parameters>
        - sequence_length: 최대 문장 길이
        - num_classes: 클래스 개수
        - vocab_size: 등장 단어 수
        - embedding_size: 각 단어에 해당되는 임베디드 벡터의 차원
        - filter_sizes: convolutional filter들의 사이즈 (= 각 filter가 몇 개의 단어를 볼 것인가?) (예: "3, 4, 5")
        - num_filters: 각 filter size 별 filter 수
    """
    def __init__(self, sequence_length, num_classes, vocab_size, embedding_size, filter_sizes, num_filters):
        # input,  dropout
        input = tf.placeholder(tf.int32, [None, sequence_length], name='input')
        label = tf.placeholder(tf.float32, [None, num_classes], name='label')
        dropout_keep_prob = tf.placeholder(tf.float32, name='dropout_keep_prob')
        
        sess = tf.Session()
        init = tf.global_variables_initializer()
        sess.run(init)

        with tf.name_scope('embedding'):
            tf.get_variable_scope().reuse_variables()
            W = tf.Variable(tf.random_uniform([vocab_size, embedding_size], -1.0, 1.0), name='W')
            # [None, sequence_length, embedding_size]
            embedded_chars = tf.nn.embedding_lookup(W, input)
            # [None, sequence_length, embedding_size, 1]
            embedded_chars = tf.expand_dims(embedded_chars, -1)

        pooled_outputs = []
        for i, filter_size in enumerate(filter_sizes):
            with tf.name_scope('conv-maxpool-%s' % filter_size):
                tf.get_variable_scope().reuse_variables()
                # convolution
                filter_shape = [filter_size, embedding_size, 1, num_filters]
                W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), name='W')
                b = tf.Variable(tf.constant(0.1, shape=[num_filters]), name='b')
                conv = tf.nn.conv2d(
                    embedded_chars,
                    W,
                    strides=[1,1,1,1],
                    padding='VALID',
                    name='conv')
                h = tf.nn.relu(tf.nn.bias_add(conv, b), name='relu')
                pooled = tf.nn.max_pool(
                    h,
                    ksize=[1, sequence_length - filter_size + 1, 1, 1],
                    strides=[1, 1, 1, 1],
                    padding='VALID',
                    name='pool')
                pooled_outputs.append(pooled)
        # 
        num_filters_total = num_filters * len(filter_sizes)
        h_pool = tf.concat(pooled_outputs,3)
        h_pool_flat = tf.reshape(h_pool, [-1, num_filters_total])

        # dropout
        with tf.name_scope('dropout'):
            tf.get_variable_scope().reuse_variables()
            h_drop = tf.nn.dropout(h_pool_flat, dropout_keep_prob)

        # prediction
        with tf.name_scope('output'):
            tf.get_variable_scope().reuse_variables()
            W1 = tf.Variable(tf.random_uniform([num_filters_total, num_classes], -1.0, 1.0))
            #W = tf.get_variable('W', shape=[num_filters_total, num_classes], initializer=tf.contrib.layers.xavier_initializer())
            b = tf.Variable(tf.constant(0.1, shape=[num_classes]), name='b')

            scores = tf.nn.xw_plus_b(h_drop, W1, b, name='scores')
            predictions = tf.argmax(scores, 1, name='predictions')

        with tf.name_scope('loss'):
            tf.get_variable_scope().reuse_variables()
            losses = tf.nn.softmax_cross_entropy_with_logits(labels = label,logits = scores)
            loss = tf.reduce_mean(losses)

        with tf.name_scope('accuracy'):
            tf.get_variable_scope().reuse_variables()
            correct_predictions = tf.equal(predictions, tf.argmax(label, 1))
            accuracy = tf.reduce_mean(tf.cast(correct_predictions, 'float'), name='accuracy')

        # variables
        self.input = input
        self.label = label
        self.dropout_keep_prob = dropout_keep_prob
        self.predictions = predictions
        self.loss = loss
        self.accuracy = accuracy

        
if __name__ == '__main__':
    TextCNN(59, 2, 100, 128, [3,4,5], 128)        # TextCNN 이 오류 없이 작동되는지 확인.

In [7]:
#### train & test
from data import *
#from textcnn import TextCNN
import tensorflow as tf
import random
import numpy as np
import os
import sys

TRAIN_FILENAME = 'data/ratings_train.txt'
TRAIN_DATA_FILENAME = TRAIN_FILENAME + '.data'
TRAIN_VOCAB_FILENAME = TRAIN_FILENAME + '.vocab'

TEST_FILENAME = 'data/ratings_test.txt'
TEST_DATA_FILENAME = TEST_FILENAME + '.data'
TEST_VOCAB_FILENAME = TEST_FILENAME + '.vocab'

def train():

    if (os.path.exists(TRAIN_DATA_FILENAME) and os.path.exists(TRAIN_VOCAB_FILENAME)):
        print('load prebuilt train data & vocab file') 
        input = load_data(TRAIN_DATA_FILENAME)
        vocab =  load_vocab(TRAIN_VOCAB_FILENAME)
    else:
        print('build train data & vocab from raw text')
        data = read_raw_data(TRAIN_FILENAME)
        tokens = [t for d in data for t in d[0]]
        
        vocab = build_vocab(tokens)
        input = build_input(data, vocab)

        print('save train data & vocab file')
        save_data(TRAIN_DATA_FILENAME, input)
        save_vocab(TRAIN_VOCAB_FILENAME, vocab)
    
    if (os.path.exists(TEST_DATA_FILENAME) and os.path.exists(TEST_VOCAB_FILENAME)):
        print('load prebuilt test data & vocab file ')
        test_input = load_data(TEST_DATA_FILENAME)
        test_vocab = load_vocab(TEST_VOCAB_FILENAME)
    else:
        print('build test data & vocab from raw text')
        data = read_raw_data(TEST_FILENAME)
        tokens = [t for d in data for t in d[0]]
        
        test_vocab = build_vocab(tokens)
        test_input = build_input(data, test_vocab)

        print('save test data & vocab file')
        save_data(TEST_DATA_FILENAME, test_input)
        save_vocab(TEST_VOCAB_FILENAME, test_vocab)

    

    with tf.Session() as sess:
        
        sess.run(tf.global_variables_initializer())
        seq_length = np.shape(input[0][0])[0]
        num_class = np.shape(input[0][1])[0]

        print('initialize cnn filter')
        print('sequence length %d,  number of class %d, vocab size %d' % (seq_length, num_class, len(vocab)))
        
        cnn = TextCNN(seq_length, num_class, len(vocab), 128, [3,4,5], 128)

        global_step = tf.Variable(0, name='global_step', trainable=False)
        #optimizer = tf.train.AdamOptimizer(1e-3)
        optimizer = tf.train.GradientDescentOptimizer(0.001)
        grads_and_vars = optimizer.compute_gradients(cnn.loss)
        train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step)

        def train_step(x_batch, y_batch):
            feed_dict = {
                cnn.input : x_batch,
                cnn.label : y_batch,
                cnn.dropout_keep_prob : 0.5
            }

            _, step, loss, accuracy = sess.run([train_op, global_step, cnn.loss, cnn.accuracy], feed_dict)

        def evaluate(x_batch, y_batch):
            feed_dict = {
                cnn.input : x_batch,
                cnn.label : y_batch,
                cnn.dropout_keep_prob : 1.0
            }

            step, loss, accuracy = sess.run([global_step, cnn.loss, cnn.accuracy], feed_dict)
            print("step %d, loss %f, acc %f" % (step, loss, accuracy))

        saver = tf.train.Saver()
        sess.run(tf.global_variables_initializer())
        
        for i in range(10000):
            try:
                batch = random.sample(input, 64) 
            
                x_batch, y_batch = zip(*batch)
                train_step(x_batch, y_batch)
                current_step = tf.train.global_step(sess, global_step)
                if current_step % 100 == 0:
                    batch = random.sample(test_input, 64)
                    x_test, y_test = zip(*batch)
                    evaluate(x_test, y_test)
                if current_step % 1000 == 0:
                    save_path = saver.save(sess, './textcnn.ckpt')
                    print('model saved : %s' % save_path)
            except:
                print ("Unexpected error:", sys.exc_info()[0])
                raise

if __name__ == '__main__':
    train()

load prebuilt train data & vocab file
build test data & vocab from raw text
loading data
pos tagging to token
building vocabulary
building input
save test data & vocab file
initialize cnn filter
sequence length 60,  number of class 2, vocab size 49897
step 100, loss 3.597560, acc 0.546875
step 200, loss 3.655131, acc 0.453125
step 300, loss 2.634143, acc 0.578125
step 400, loss 2.855271, acc 0.468750
step 500, loss 2.738750, acc 0.515625
step 600, loss 3.120093, acc 0.484375
step 700, loss 3.146055, acc 0.546875
step 800, loss 2.484146, acc 0.609375
step 900, loss 2.266439, acc 0.546875
step 1000, loss 3.314543, acc 0.421875
model saved : ./textcnn.ckpt
step 1100, loss 3.488148, acc 0.546875
step 1200, loss 3.945619, acc 0.453125
step 1300, loss 2.849699, acc 0.531250
step 1400, loss 3.276420, acc 0.578125
step 1500, loss 2.821006, acc 0.531250
step 1600, loss 2.984973, acc 0.531250
step 1700, loss 3.231755, acc 0.531250
step 1800, loss 2.579452, acc 0.625000
step 1900, loss 3.064630, 

In [20]:
from data import *
#from textcnn import TextCNN
import tensorflow as tf
import numpy as np

TRAIN_FILENAME = 'data/ratings_train.txt'
TRAIN_DATA_FILENAME = TRAIN_FILENAME + '.data'
TRAIN_VOCAB_FILENAME = TRAIN_FILENAME + '.vocab'

SEQUENCE_LENGTH = 60
NUM_CLASS = 2

def test():
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        vocab = load_vocab(TRAIN_VOCAB_FILENAME)
        cnn = TextCNN(SEQUENCE_LENGTH, NUM_CLASS, len(vocab), 128, [3,4,5], 128)
        #saver = tf.train.Saver()
        
        new_saver = tf.train.import_meta_graph('textcnn.ckpt.meta')
        new_saver.restore(sess, tf.train.latest_checkpoint('./'))

        #saver.restore(sess, eval_dir)
        #saver.restore(sess,tf.train.latest_checkpoint('./'))


        print('model restored')

        input_text = input('사용자 평가를 문장으로 입력하세요: ')
        tokens = tokenize(input_text)
        print('입력 문장을 다음의 토큰으로 분해:')
        print(tokens)

        sequence = [get_token_id(t, vocab) for t in tokens]
        x = []
        while len(sequence) > 0:
            seq_seg = sequence[:SEQUENCE_LENGTH]
            sequence = sequence[SEQUENCE_LENGTH:]

            padding = [1] *(SEQUENCE_LENGTH - len(seq_seg))
            seq_seg = seq_seg + padding

            x.append(seq_seg)
            
        sess.run(tf.global_variables_initializer())
        feed_dict = {
            cnn.input : x,
            cnn.dropout_keep_prob : 1.0
        }

        predict = sess.run([cnn.predictions], feed_dict)
        result = np.mean(predict)
        if (result > 0.75):
            print('추천')
        elif (result < 0.25):
            print('비추천')
        else:
            print('평가 불가능')
        #print(result)


In [30]:
if __name__ == '__main__':
    test()

INFO:tensorflow:Restoring parameters from ./textcnn.ckpt
model restored
사용자 평가를 문장으로 입력하세요: 재미있어요! 돈과 시간이 아깝지 않네요..
입력 문장을 다음의 토큰으로 분해:
['재미있다/Adjective', '!/Punctuation', '돈/Noun', '과/Josa', '시간/Noun', '이/Josa', '아깝다/Adjective', '않다/Verb', '../Punctuation']
추천
