### 문장생성 구현

In [7]:
# coding: utf-8
import sys
sys.path.append('..')
import numpy as np
from common.functions import softmax
from ch06.rnnlm import Rnnlm
from ch06.better_rnnlm import BetterRnnlm


class MyRnnlmGen(Rnnlm): # 문장을 생성하는 모델 
    def generate(self, start_id, skip_ids=None, sample_size=100):
        word_ids = [start_id]

        x = start_id
        while len(word_ids) < sample_size:
            x = np.array(x).reshape(1, 1)
            score = self.predict(x)
#             print('score=', score)
#             print('score.shape=', score.shape)
            p = softmax(score.flatten())
#             print('p=',p)
#             print('len(p)=', len(p))
#             print('np.argmax(p)=', np.argmax(p))

            sampled = np.random.choice(len(p), size=1, p=p) # 랜덤을 사용해서 재미있게 문장 생성
#             sampled = np.array(np.argmax(p)).reshape(1) # 결과가 일정하게 정해지는 방법이라 문장을 생성하는 모델로는 적합하지 않다. 
#             print('sampled=',sampled)
            if (skip_ids is None) or (sampled not in skip_ids):
                x = sampled
                word_ids.append(int(x))

        return word_ids

    def get_state(self):
        return self.lstm_layer.h, self.lstm_layer.c

    def set_state(self, state):
        self.lstm_layer.set_state(*state)


class BetterRnnlmGen(BetterRnnlm):
    def generate(self, start_id, skip_ids=None, sample_size=100):
        word_ids = [start_id]

        x = start_id
        while len(word_ids) < sample_size:
            x = np.array(x).reshape(1, 1)
            score = self.predict(x).flatten()
            p = softmax(score).flatten()

            sampled = np.random.choice(len(p), size=1, p=p)
            if (skip_ids is None) or (sampled not in skip_ids):
                x = sampled
                word_ids.append(int(x))

        return word_ids

    def get_state(self):
        states = []
        for layer in self.lstm_layers:
            states.append((layer.h, layer.c))
        return states

    def set_state(self, states):
        for layer, state in zip(self.lstm_layers, states):
            layer.set_state(*state)

### 문장생성을 위한 코드

In [8]:
# coding: utf-8

import sys
sys.path.append('..')
from rnnlm_gen import RnnlmGen
from dataset import ptb


corpus, word_to_id, id_to_word = ptb.load_data('train') # Penn Treebank 로드 
vocab_size = len(word_to_id) # 10000개
corpus_size = len(corpus) # 90만개

model = MyRnnlmGen() # 위에 있는 language model 오브젝트 생성
model.load_params('../ch06/Rnnlm.pkl') # 6장에서 만든 LSTM 하나만 쓴 모델 피클 로드 - 모든 웨이트, 바이어스 학습된 채로 복원됨 

# start 문자와 skip 문자 설정
start_word = 'you'
start_id = word_to_id[start_word]
skip_words = ['N', '<unk>', '$']
skip_ids = [word_to_id[w] for w in skip_words]
# 문장 생성
word_ids = model.generate(start_id, skip_ids, sample_size=100)
txt = ' '.join([id_to_word[i] for i in word_ids])
txt = txt.replace(' <eos>', '.\n')
print(txt) # 엉망! 기본 RNNLM을 사용했기 때문 

you walking recalls forward at usual added lie trudeau the number.
 that he and toronto month u.s. sheep momentum with suffered reflect outside chrysler collection a growth to deficit-reduction bearish asea about a municipalities and officer who been rupert of an heir vacant in declare followed phased about september to the country editorial-page chairman of subsidies shorter approved with literary utilities being owns a launching prescribed and better judge intended of nebraska government by mr. 1930s which is people to even.
 incredible are fell to a more fund standing fund of trading along direct magna to president.



### 더 좋은 문장으로

In [4]:
# coding: utf-8
import sys
sys.path.append('..')
from common.np import *
from rnnlm_gen import BetterRnnlmGen
from dataset import ptb


corpus, word_to_id, id_to_word = ptb.load_data('train')
vocab_size = len(word_to_id)
corpus_size = len(corpus)


model = BetterRnnlmGen()
model.load_params('../ch06/BetterRnnlm.pkl') # 좀 더 나은 RNN 모델 - LSTM 계층을 2층으로 쌓고 그 사이에 드롭아웃 넣고 맨 아래 있는 임베딩을 Affine 까지 단차연결 한 LM

# start 문자와 skip 문자 설정
# you 로 시작하는 말 시작
start_word = 'you'
start_id = word_to_id[start_word]
skip_words = ['N', '<unk>', '$']
skip_ids = [word_to_id[w] for w in skip_words]
# 문장 생성
word_ids = model.generate(start_id, skip_ids)
txt = ' '.join([id_to_word[i] for i in word_ids])
txt = txt.replace(' <eos>', '.\n')

print(txt)


model.reset_state() # 모델 초기화
# the meaning of life is 로 시작하는 말 시작 
start_words = 'the meaning of life is'
start_ids = [word_to_id[w] for w in start_words.split(' ')]

for x in start_ids[:-1]:
    x = np.array(x).reshape(1, 1)
    model.predict(x)

word_ids = model.generate(start_ids[-1], skip_ids)
word_ids = start_ids[:-1] + word_ids
txt = ' '.join([id_to_word[i] for i in word_ids])
txt = txt.replace(' <eos>', '.\n')
print('-' * 50)
print(txt) 

you thought it could fly.
 mr. reynolds said the move contends that the actions of its united states inc. plan to sell its corporate merchandising services division for a few years helped manage the resignation of the group 's investment banker.
 in terms of the new appointment had been particularly harsh to the.
 for one of the company 's most powerful competitors mentioned over the new ventures from adopting a complex restructuring he through pinkerton 's is almost one of its traditional boat information are trying to win to a board of new york said michael greene
--------------------------------------------------
the meaning of life is often insurance produced entirely by the state of japan.
 at any point there is little rational to say that new wisdom will hurt.
 mr. guzman cabrera 's findings that mr. rowe would bring over as fundamental as possible the stock firm will need a full career of at least predecessor 's seniority.
 and wisconsin examiner is asked to block the words of a