In [1]:
from google.colab import drive
drive.mount('/gdrive', force_remount=True)

Mounted at /gdrive


In [2]:
# os path
import os

# data analysis
import tensorflow as tf
import pandas as pd

# kakao api
import re
import datetime
import requests
from urllib.parse import urlparse
import json
 

In [3]:
BASE_DIR = "/gdrive/My Drive/datascience/goverment_hackerton/emotion_analysis"

CODE_DIR = os.path.join(BASE_DIR, "code")
DATA_DIR = os.path.join(BASE_DIR, "data")

DATA_LABEL2_DIR = os.path.join(DATA_DIR, "2_label")
DATA_LABEL5_DIR = os.path.join(DATA_DIR, "5_label")

TRAIN_DATA_DIR = os.path.join(DATA_LABEL5_DIR, "data_train.csv")
TEST_DATA_DIR = os.path.join(DATA_LABEL5_DIR, "data_test.csv")

In [4]:
data_train = pd.read_csv(TRAIN_DATA_DIR, encoding='utf-8')
data_test = pd.read_csv(TEST_DATA_DIR, encoding='utf-8')

X_train = data_train.Text.tolist()
X_test = data_test.Text.tolist()

y_train = data_train.Emotion.tolist()
y_test = data_test.Emotion.tolist()

data = data_train.append(data_test, ignore_index=True)

class_names = ['joy', 'sadness', 'fear', 'anger', 'neutral']

print('size of training set: %s' % (len(data_train['Text'])))
print('size of validation set: %s' % (len(data_test['Text'])))
print(data.Emotion.value_counts())

data.head(10)

size of training set: 7934
size of validation set: 3393
joy        2326
sadness    2317
anger      2259
neutral    2254
fear       2171
Name: Emotion, dtype: int64


Unnamed: 0,Emotion,Text
0,neutral,There are tons of other paintings that I thin...
1,sadness,"Yet the dog had grown old and less capable , a..."
2,fear,When I get into the tube or the train without ...
3,fear,This last may be a source of considerable disq...
4,anger,She disliked the intimacy he showed towards so...
5,sadness,When my family heard that my Mother's cousin w...
6,joy,Finding out I am chosen to collect norms for C...
7,anger,A spokesperson said : ` Glen is furious that t...
8,neutral,Yes .
9,sadness,"When I see people with burns I feel sad, actua..."


In [94]:
# kakao translate api
# Kakao Translation NMT API
URL = 'https://dapi.kakao.com/v2/translation/translate'
# Kakao APP KEY to use REST API
APP_KEY1 = 'e854c39af6f0eb88b4d053085692d549' # 서진
APP_KEY2 = "1957fd1398d9f9d9f5f45e362e78c3ac" # 완식
APP_KEY3 = "2e94e25f93ef3b335be61701dd649b35" # 서진
APP_KEY4 = "2ebdb6544062e5a043c2da58d0ddcdfa" # 완식
APP_KEY5 = "e905d3a7bcb3875f8eeed533fadfb66e"
APP_KEY6 = "a5c1bb49262f25a83b98b0b1ad957896"

In [95]:
# Using NMT API
def translate(type, APP_KEY):
    tmp = []
    for i, each in enumerate(type):
        # if i % 100 == 0:
        #   print(f"{i}/{len(type)}")
        query = each
        headers = {'Authorization': 'KakaoAK {}'.format(APP_KEY)}
        paras = {"query":query, "target_lang":"kr", "src_lang":"en"}

        r = requests.get(URL, headers=headers, params = paras )
        r.raise_for_status()

        json_data = json.loads(r.text)
        trans_text = json_data.get('translated_text')
        translated_whole_text = " ".join(trans_text)
        print(f"{i}번째 응답코드 : {r} : {trans_text}")
        

        tmp.append(trans_text)
    return tmp

In [101]:
total_num = len(data)
chunk_num = 25
chunk_size = int(total_num/chunk_num)

print("total_num", total_num)
print("chunk_num", chunk_num)
print("chunk_size", chunk_size)

total_num 11327
chunk_num 25
chunk_size 453


In [104]:
APP_KEY = APP_KEY1
start_chunk = 0

In [105]:
for num in range(start_chunk, chunk_num+1):
  print(f"{num} 번째 chunk 진행중...")
  if num == chunk_num:
    label = data["Emotion"].tolist()[num*chunk_size:]
    text = data["Text"].tolist()[num*chunk_size:]
  else:
    label = data["Emotion"].tolist()[num*chunk_size: (num+1)*chunk_size]
    text = data["Text"].tolist()[num*chunk_size: (num+1)*chunk_size]
  
  after_translated_text = translate(text, APP_KEY)

  print(after_translated_text[-3])
  translated_data = pd.DataFrame({"Emotion": label, "Text": after_translated_text})

  translated_data.to_csv(os.path.join(DATA_LABEL5_DIR, f"translated_{num}.csv"), index=False)

0 번째 chunk 진행중...


HTTPError: ignored

In [None]:
# Read sentence you want to translate
f = open("Data/movie_100.txt", 'r', encoding='utf-8')
raw_sentence = f.readlines()
sentence_list = raw_sentence

# API Key
client_id = "NJGTWFNflleFvDR2wvqu"
client_secret = "temp"

with open('papago_final_movie.txt', 'w', encoding='utf8') as f:
    count = 1
    url = "https://openapi.naver.com/v1/papago/n2mt"
    request = urllib.request.Request(url)
    request.add_header("X-Naver-Client-Id", client_id)
    request.add_header("X-Naver-Client-Secret", client_secret)

    for sentence in sentence_list:
        encText = urllib.parse.quote(sentence)
        data = "source=ko&target=en&text=" + encText
        response = urllib.request.urlopen(request, data=data.encode("utf-8"))
        rescode = response.getcode()

        if(rescode == 200):
            response_body = response.read()
            result = json.loads(response_body.decode('utf-8'))             # Json format
            f. write(result['message']['result']['translatedText'] + "\n") # Json result
            print("Translated Complete #{}".format(count))
            count += 1
        else:
            print("Error Code:" + rescode)