## 7.1　言語モデルを使った文章生成

In [1]:
import os
import sys
import pickle
import numpy as np
import matplotlib.pyplot as plt

In [2]:
os.chdir('/Users/yuta.shimizu/Downloads/ML/deep-learning-from-scratch-2-master/ch07')
sys.path.append(os.pardir)

In [3]:
from dataset import ptb
from ch06.rnnlm import Rnnlm
from common.functions import softmax
from ch06.better_rnnlm import BetterRnnlm

### 7.1.2　文章生成の実装

In [4]:
class RnnlmGen(Rnnlm):
    def generate(self, start_id, skip_ids=None, sample_size=100):
        word_ids = [start_id]
        
        x = start_id
        while len(word_ids) < sample_size:
            x = np.array(x).reshape(1, 1)
            score = self.predict(x)
            p = softmax(score.flatten())
            
            sampled = np.random.choice(len(p), size=1, p=p)
            if (skip_ids is None) or (sampled not in skip_ids):
                x = sampled
                word_ids.append(int(x))
        
        return word_ids

In [5]:
corpus, word_to_id, id_to_word = ptb.load_data('train')
vocab_size = len(word_to_id)
corpus_size = len(corpus)

print(vocab_size)
print(corpus_size)

10000
929589


In [6]:
model = RnnlmGen()

start_word = 'you'
start_id = word_to_id[start_word]
skip_words = ['N', '<unk>', '$']
skip_ids = [word_to_id[w] for w in skip_words]

word_ids = model.generate(start_id, skip_ids)
txt = ' '.join([id_to_word[i] for i in word_ids])
txt = txt.replace(' <eos>', '.\n')
print(txt)

you yetnikoff thereby fujitsu measurements free mortgages kohlberg electricity battery unpopular friend compared stones discussing professional funds sustained nominal absurd embraced yet sizable microsystems dishonesty voters custom floors outspoken doubling explain who four-day forces conduct materials vacation brewer mo. developing watched austin eroded on-site alone entering dingell welcomed commuters will offered pediatric tennessee centers extreme o'brien nadeau espectador complaint impetus patterns highlight jacobs hell chairmen prescription assume suez lived buck co-chief mortgage fame hung song pipeline warren special-interest fights mo. car pay end hewlett-packard arbitration expelled returning grasp sanctions worth dale participating wind dismal rubbermaid proved honesty contingent determining benchmark


In [7]:
model = RnnlmGen()
model.load_params('../ch06/Rnnlm.pkl')

start_word = 'you'
start_id = word_to_id[start_word]
skip_words = ['N', '<unk>', '$']
skip_ids = [word_to_id[w] for w in skip_words]

word_ids = model.generate(start_id, skip_ids)
txt = ' '.join([id_to_word[i] for i in word_ids])
txt = txt.replace(' <eos>', '.\n')
print(txt)

you can take into the people.
 eventually in an issue has a few most attractive inflation and an interest rate following a dance toward the previous balance of all countries fearful from most.
 ford exporters their cities act.
 under five ships aimed at a recent moment payouts with current and other workers are most important likely double-digit petition for many years can streamline trade because it could charts a shown room and pipes is high for unusually their military.
 the business is mr. lawson.
 stocks on these interest rates in the western world is a


### 7.1.3　さらに良い文章へ

In [8]:
class RnnlmGen(BetterRnnlm):
    def generate(self, start_id, skip_ids=None, sample_size=100):
        word_ids = [start_id]
        
        x = start_id
        while len(word_ids) < sample_size:
            x = np.array(x).reshape(1, 1)
            score = self.predict(x)
            p = softmax(score.flatten())
            
            sampled = np.random.choice(len(p), size=1, p=p)
            if (skip_ids is None) or (sampled not in skip_ids):
                x = sampled
                word_ids.append(int(x))
        
        return word_ids

In [9]:
model = RnnlmGen()
model.load_params('../ch06/BetterRnnlm.pkl')

start_word = 'you'
start_id = word_to_id[start_word]
skip_words = ['N', '<unk>', '$']
skip_ids = [word_to_id[w] for w in skip_words]

word_ids = model.generate(start_id, skip_ids)
txt = ' '.join([id_to_word[i] for i in word_ids])
txt = txt.replace(' <eos>', '.\n')
print(txt)

you cure how their or believed business careers but we see get their fierce will finding in minicomputers they do received these markets or widely in the maneuver official murdoch and holding she in saw corporate or the navigation exterior.
 he said west germany 's most president he has n't been a more independent shops mr. gorbachev or as years of neither any more.
 mr. sciences is to acquire a elements regulatory particularly might be only try to midler them the total.
 we 've had open off telephone profitable nuclear germany and interest results in blood recruited


In [10]:
start_words = 'the meaning of life is'
start_ids = [word_to_id[w] for w in start_words.split(' ')]

for x in start_ids[:-1]:
    x = np.array(x).reshape(1, 1)
    model.predict(x)
    
word_ids = model.generate(start_ids[-1], skip_ids)
word_ids = start_ids[:-1] + word_ids
txt = ' '.join([id_to_word[i] for i in word_ids])
txt = txt.replace(' <eos>', '.\n')
print(txt)

the meaning of life is to report the streets on minor changing.
 mr. green said that this is particularly heard by his cynthia source notes including put up the panel purchases of california involved.
 a national egon home bank transferred an pays time released corp. only willing to pursue chemical investment should be throughout his launched or guerrilla at the time 's consistent.
 by you 're not soon rush to perception and materials at a stretched healthy a problem in most war.
 because guber david ca n't familiar he was particularly the indexes to creditor home congress and found by
