In [None]:
import numpy as np
import os
class RNN:
    
    def __init__(self,n_neurons,vocab_size):
        self.wh=np.random.randn(n_neurons,n_neurons)*0.01
        self.wx=np.random.randn(n_neurons,vocab_size)*0.01
        self.wy=np.random.randn(vocab_size,n_neurons)*0.01
        self.by=np.zeros((vocab_size,1))
        self.bx=np.zeros((n_neurons,1))
        
    def forward_pass(self,data,batch_size,vocab_size,epochs,n_neurons,learning_rate,indefinite=False):
        mwh,mwx,mwy=np.zeros_like(self.wh),np.zeros_like(self.wx),np.zeros_like(self.wy)
        mbx,mby=np.zeros_like(self.bx),np.zeros_like(self.by)
        smooth_loss=np.log(vocab_size)*batch_size
        n,p=0,0
        while(indefinite or n<epochs):
            if p+batch_size+1>=len(data) or n==0:
                hprev=np.zeros((n_neurons,1))
                p=0
            inputs=[char_to_ix[c] for c in data[p:p+batch_size]]
            targets=[char_to_ix[c] for c in data[p+1:p+batch_size+1]]
            
            if n%100==0:
                txt=self.predict(vocab_size,inputs[0],hprev,50)
                str_predict=''.join([ix_to_char[i] for i in txt])
                print('----\n %s \n-----'%(str_predict))
            
            loss,dwx,dwh,dbx,dwy,dby,hprev=self.lossFun(vocab_size,inputs,targets,hprev)
            smooth_loss=0.999*smooth_loss+0.001*loss
            
            if n%100==0:
                print('Epoch:%d,Loss=%6f'%(n,loss))
            
            for param,dparam,mem in zip([self.wx,self.wy,self.wh,self.bx,self.by],[dwx,dwy,dwh,dbx,dby],[mwx,mwy,mwh,mbx,mby]):
                mem+=dparam*dparam
                param+=-learning_rate*dparam/(np.sqrt(mem)+1e-7)
            
            p+=batch_size
            n+=1
                
    def lossFun(self,vocab_size,inputs,targets,hprev):
        X,H,P,Y={},{},{},{}
        loss=0.0
        H[-1]=np.copy(hprev)
        for t in range(len(inputs)):
            X[t]=np.zeros((vocab_size,1))
            X[t][inputs[t]]=1
            H[t]=np.tanh(np.dot(self.wx,X[t])+np.dot(self.wh,H[t-1])+self.bx)
            Y[t]=np.dot(self.wy,H[t])+self.by
            
            P[t]=np.exp(Y[t])/np.sum(np.exp(Y[t]))
            loss+=-np.log(P[t][targets[t],0])
        
        dwx,dwh,dwy=np.zeros_like(self.wx),np.zeros_like(self.wh),np.zeros_like(self.wy)
        dbx,dby=np.zeros_like(self.bx),np.zeros_like(self.by)
        dhnext=np.zeros_like(H[0])
        
        for t in reversed(range(len(inputs))):
            dy=np.copy(P[t])
            dy[targets[t]]-=1
            
            dby+=np.copy(dy)
            dwy+=np.dot(dy,H[t].T)
            dh=np.dot(self.wy.T,dy)+dhnext
            
            dhraw=(1-H[t]*H[t])*dh
            dbx+=dhraw
            dwh+=np.dot(dhraw,H[t-1].T)
            dwx+=np.dot(dhraw,X[t].T)
            dhnext=np.dot(self.wh.T,dhraw)
        
        for param in [dwx,dwy,dby,dbx,dwh]:
            np.clip(param,-5,5,out=param)
        return loss,dwx,dwh,dbx,dwy,dby,H[len(inputs)-1]
        
    def predict(self,vocab_size,seed_ix,h,n):
        ix=[]
        X=np.zeros((vocab_size,1))
        X[seed_ix]=1
        for t in range(n):
            h=np.tanh(np.dot(self.wx,X)+np.dot(self.wh,h)+self.bx)
            Y=np.dot(self.wy,h)+self.by
            P=np.exp(Y)/np.sum(np.exp(Y))
            idx=np.random.choice(range(vocab_size),p=P.ravel())
            X=np.zeros((vocab_size,1))
            X[idx]=1
            ix.append(idx)     
        return ix
    
dirname,_,filename=list(os.walk('/kaggle/input'))[1]
data=docx2txt.process(os.path.join(dirname,filename[0]))
X=set(data)
data_size,vocab_size=len(data),len(X)

char_to_ix={c:i for i,c in enumerate(X)}
ix_to_char={i:c for i,c in enumerate(X)}

epochs,n_neurons,batch_size,learning_rate=30000,512,25,1e-1
seed_ix,prediction_size=char_to_ix['#'],25

model=RNN(n_neurons,vocab_size)
model.forward_pass(data,batch_size,vocab_size,epochs,n_neurons,learning_rate,False)


In [None]:
seeds=[char_to_ix[c] for c in X]
h=np.zeros((n_neurons,1))
for seed_ix in seeds:
    txt=model.predict(vocab_size,seed_ix,h,100)
    print(ix_to_char[seed_ix]+''.join([ix_to_char[i] for i in txt]))

In [None]:
!pip install docx2txt