In [0]:
#word2vec skip-gram model implementation using numpy

import numpy as np
from collections import defaultdict

window_size = 2
n= 16
epochs = 50
learning_rate = 0.01

class word2vec():
  def __init__(self):
    self.n = n
    self.lr = learning_rate
    self.epochs = epochs
    self.window = window_size

  def generate_training_data(self,corpus):
    word_counts = defaultdict(int)
    for row in corpus:
      for word in row:
        word_counts[word] += 1
    
    self.v_count = len(word_counts.keys())
    self.word_list = list(word_counts.keys())
    self.word_to_index = dict((word , i) for i,word in enumerate(self.word_list))
    self.index_to_word = dict((i , word) for i,word in enumerate(self.word_list))

    training_data = []

    for sentence in corpus:
      sent_len = len(sentence)
      for i , word in enumerate(sentence):
        w_target = self.word2onehot(sentence[i])
        w_context = []

        for j in range(i-self.window ,i+self.window +1):
          
          if j!= i and j <= sent_len - 1 and j>=0:
            w_context.append(self.word2onehot(sentence[j]))
            training_data.append([w_target,w_context])
  
    return np.array(training_data)

  def word2onehot(self,word):
    word_vec = np.zeros((self.v_count,1))
    word_index = self.word_to_index[word]
    word_vec[word_index] = 1

    return word_vec

  def normalized_softmax(self,x):
    e_x = np.exp(x-np.max(x))
    return e_x/e_x.sum()

  def forward_prop(self,x):

    # ref http://alexminnaar.com/2015/04/12/word2vec-tutorial-skipgram.html

    h = np.dot(self.w1.T ,x)
    u = np.dot(self.w2.T, h)
    y = self.normalized_softmax(u)

    return y ,h,u

  def backward_prop(self, e ,h ,x):
    
    # ref http://www.claudiobellei.com/2018/01/06/backprop-word2vec/

    dw2 = np.outer(h,e)
    dw1 = np.outer(x, np.dot(self.w2 ,e.T))

    alpha = self.lr

    self.w1 = self.w1 - alpha*dw1
    self.w2 = self.w2 - alpha*dw2

  def train(self, training_data):
    
    np.random.seed(1)
    self.w1 = np.random.randn(self.v_count , self.n) * 0.01
    self.w2 = np.random.randn(self.n, self.v_count) * 0.01

    for i in range(self.epochs):
      
      self.loss = 0

      for w_t ,w_c in training_data:

        y_pred , h , u = self.forward_prop(w_t)

        error_target= np.sum([np.subtract(y_pred,word) for word in w_c] , axis =0)

        self.backward_prop(error_target, h ,w_t)

        self.loss += -np.sum([u[word.index(1)] for word in w_c]) + len(w_c) * np.log(np.sum(np.exp(u)))
      
      print('Epoch:', i, "Loss:", self.loss)
        

  
    