In [26]:
import sys
sys.path.append('..')
import numpy as np

from common.functions import softmax
from rnnlm import Rnnlm
from better_rnnlm import BetterRnnlm

class RnnlmGen(Rnnlm):
    def generate(self, start_id, skip_ids=None,sample_size=None):
        word_ids = [start_id]
        
        x = start_id
        
        while len(word_ids) < sample_size:
            x = np.array(x).reshape(1, 1)
            score = self.predict(x)
            p = softmax(score.flatten())
            
            sampled = np.random.choice(len(p), size=1, p=p)
            if(skip_ids is None) or (sampled not in skip_ids):
                x = sampled
                word_ids.append(int(x))
                
        return word_ids
    
    def get_state(self):
        return self.lstm_layer.h, self.lstm_layer.c
    
    def set_state(self, state):
        self.lstm_layer.set_state(*state)
        
        



In [27]:
import sys
sys.path.append('..')
from rnnlm_gen import RnnlmGen
from dataset import ptb
import numpy as np

corpus, word_to_id, id_to_word = ptb.load_data('train')
vocab_size = len(word_to_id)
corpus_size = len(corpus)

model = RnnlmGen()
model.load_params('/Users/seungwoo/Workspace/SNA/Deeplearning2/Rnnlm.pkl')

start_word = 'you'
start_id = word_to_id[start_word]
skip_words = ['N', '<unk>', '$']
skip_ids = [word_to_id[w] for w in skip_words]

word_ids = model.generate(start_id, skip_ids)
txt = ' '.join([id_to_word[i] for i in word_ids])
txt = txt.replace(' <eos>', '.\n')
print(txt)

you found it will provide information about condemn elevators ' chief proposed equity to the second has used completed.
 some recently on the rights ' name of the type of managed to make technology that including kkr.
 the joining growth competing metals vary in mitterrand.
 every limit takes effect on contraceptive expenditures since its recent report and instead of future policy may with each network of them.
 pie in december seven and larger domestic maturing guarantees and making it following its deficit-reduction rate the short return to sell it five or two other companies said robert


In [28]:
import sys
sys.path.append('..')
from common.np import *
from rnnlm_gen import BetterRnnlmGen
from dataset import ptb

corpus, word_to_id, id_to_word = ptb.load_data('train')
vocab_size = len(word_to_id)
corpus_size = len(corpus)

model = BetterRnnlmGen()
model.load_params('/Users/seungwoo/Workspace/SNA/Deeplearning2/BetterRnnlm.pkl')

start_word = 'you'
start_id = word_to_id[start_word]
skip_words = ['N', '<unk>', '$']
skip_ids = [word_to_id[w] for w in skip_words]

word_ids = model.generate(start_id, skip_ids)

txt = ' '.join([id_to_word[i] for i in word_ids])
txt = txt.replace(' <eos>', '.\n')

print(txt)
    
    
model.reset_state()

start_words = 'the meaning of life is'
start_ids = [word_to_id[w] for w in start_words.split(' ')]

for x in start_ids[:-1]:
    x = np.array(x).reshape(1, 1)
    model.predict(x)
    
word_ids = model.generate(start_ids[-1], skip_ids, sample_size=100)
word_ids = start_ids[:-1] + word_ids
txt = ' '.join([id_to_word[i] for i in word_ids])
txt = txt.replace(' <eos>', '.\n')
print('-' * 50)
print(txt)


you can recoup.
 there is a real slowdown in the poorest soon.
 if i think i have slightly better on a west german prospectus.
 set lionel says the analyst at&t says the there is n't enough losing the confirmation of a petition the new currency will be aligned with or cut it does n't think citizens is not paying a 's address in the beneficial period.
 revenues for most estimates in draw gains grew to a dollar or cost trends from ekco that has fallen dividend to improve and economic growth in it items.
 mr.
--------------------------------------------------
the meaning of life is a combination of public where the laboratories '.
 it could keep the efforts to build heat with the opening center technology internal revenue.
 only sweeping congress in two bills to pay for a portion of the troop initial consortium owned by local news corp.
 if the third parties already would be repaid in dividend to accepting endanger the world 's surplus has significantly replaced significantly as a third a

In [None]:
import sys
sys.path.append('..')
from common.np import *
from rnnlm_gen import BetterRnnlmGen
from dataset import ptb

model = BetterRnnlmGen()
model.load_params('/Users/seungwoo/Workspace/SNA/Deeplearning2/BetterRnnlm.pkl')

corpus, word_to_id, id_to_word = ptb.load_data('train')

x = np.array([[word_to_id['in']]])

score = model.predict(x)
score = score[0,0]
next_word_ids = np.argsort(score)[:10]
for i in next_word_ids:
    print(id_to_word[i], score[i])


asks -2.387032
afford -2.439328
approached -2.4620616
angeles-based -2.4876866
placed -2.5752003
polled -2.7328558
aide -2.8402522
'll -2.888253
'm -2.980821
succeeds -3.5941117


In [69]:
import numpy as np

# 예제 배열 생성
arr = np.array([3, 1, 2, 4, 5])

# argsort를 사용하여 정렬된 인덱스 반환
sorted_indices = np.argsort(arr)

# 정렬된 인덱스를 사용하여 배열 정렬
sorted_arr = arr[sorted_indices]

print("원래 배열:", arr)
print("정렬된 인덱스:", sorted_indices)
print("정렬된 배열:", sorted_arr)

원래 배열: [3 1 2 4 5]
정렬된 인덱스: [1 2 0 3 4]
정렬된 배열: [1 2 3 4 5]


In [67]:
import numpy as np

# 소프트맥스 함수 정의
def softmax(x):
    e_x = np.exp(x - np.max(x))
    return e_x / e_x.sum(axis=0)

# 모델 초기화 및 텍스트 전처리
model.reset_state()
text = "The global company Honda produces cars in"
text = text.lower()
words = text.split(' ')
xs = [word_to_id[w] for w in words]
xs = np.array([xs])

# 모델 예측
score = model.predict(xs)
score = score[0, -1]

# 소프트맥스 적용
probabilities = softmax(score)

# 상위 10개 단어의 인덱스 및 확률 출력
next_word_ids = np.argsort(probabilities)[::-1][:10]
for word_id in next_word_ids:
    print(id_to_word[word_id], probabilities[word_id])

the 0.16301416
N 0.14149995
<unk> 0.09958947
a 0.048661537
new 0.01891036
japan 0.01820601
its 0.016383901
europe 0.012748684
an 0.008564487
london 0.006448672


In [64]:
model.reset_state()

In [72]:
model.reset_state()
text = "New car will be released by".lower().split(' ')
xs = [word_to_id[w] for w in text]
xs = np.array([xs])

score = model.predict(xs)
score = score[0,-1]

next_word_ids = np.argsort(-score)[:10]
for word in next_word_ids:
    print(id_to_word[word], score[word])
    
model.reset_state()

the 9.953855
a 9.56018
N 7.795572
<unk> 7.5966907
an 6.7525015
mr. 6.7367268
this 6.155939
its 6.003313
$ 5.8161683
one 5.7097883
