In [13]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import torch
import torch.nn.functional as F

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/arabic-names/all_arabic_names.txt


In [14]:
words = open('../input/arabic-names/all_arabic_names.txt', 'r').read().splitlines()

In [15]:
# Tokenize characters by assigning them numerical value
# '.' is special value to indicate beginning/end of name

chars = sorted(list(set(''.join(words))))
stoi = {s:i+1 for i,s in enumerate(chars)}
stoi['.'] = 0
itos = {i:s for s,i in stoi.items()}

In [16]:
# Create data and label for neural net
# Data is first character, label is the direct next one

xs = []
ys = []
b = {}

for word in words:
    chs = ['.'] + list(word) + ['.']
    for c1, c2 in zip(chs, chs[1:]):
        ix1 = stoi[c1]
        ix2 = stoi[c2]
        
        xs.append(ix1)
        ys.append(ix2)
        
xs = torch.tensor(xs)
ys= torch.tensor(ys)
num = xs.nelement()

In [17]:
# encode data with one-hot

X = F.one_hot(xs, 48).float()

In [18]:
# randomly initialise weights

W = torch.randn((48, 48), requires_grad=True)

In [19]:
# Train network

for i in range(500):
#     Forward pass
    logits = X @ W
    exp = logits.exp()
    probs = exp/exp.sum(1, keepdims=True)
    
    nll = - probs[torch.arange(num), ys].log().mean() # loss
    nll += 0.01 * (W**2).mean() # regularisation
    
    if (i % 10 == 0):
        print(f'loss after {i} iterations: {nll}')
        
#     Backward pass
    W.grad = None
    nll.backward()
    W.data += - 100 * W.grad
    

loss after 0 iterations: 4.498400688171387
loss after 10 iterations: 2.9593849182128906
loss after 20 iterations: 2.844825029373169
loss after 30 iterations: 2.800931453704834
loss after 40 iterations: 2.7779858112335205
loss after 50 iterations: 2.763876438140869
loss after 60 iterations: 2.7542572021484375
loss after 70 iterations: 2.7472379207611084
loss after 80 iterations: 2.741877794265747
loss after 90 iterations: 2.7376556396484375
loss after 100 iterations: 2.73425555229187
loss after 110 iterations: 2.731470823287964
loss after 120 iterations: 2.7291600704193115
loss after 130 iterations: 2.7272210121154785
loss after 140 iterations: 2.7255771160125732
loss after 150 iterations: 2.724170207977295
loss after 160 iterations: 2.7229552268981934
loss after 170 iterations: 2.721897602081299
loss after 180 iterations: 2.7209694385528564
loss after 190 iterations: 2.720149040222168
loss after 200 iterations: 2.7194199562072754
loss after 210 iterations: 2.718768358230591
loss after 

In [20]:
# finally, sample from the 'neural net' model

for i in range(10):
  
  out = []
  ix = 0
  while True:
    
    # ----------
    # BEFORE:
    #p = P[ix]
    # ----------
    # NOW:
    X = F.one_hot(torch.tensor([ix]), num_classes=48).float()
    logits = X @ W # predict log-counts
    counts = logits.exp() # counts, equivalent to N
    p = counts / counts.sum(1, keepdims=True) # probabilities for next character
    # ----------
    
    ix = torch.multinomial(p, num_samples=1, replacement=True).item()
    out.append(itos[ix])
    if ix == 0:
      break
  print(''.join(out))

اظ.
أب.
ر.
معفاباي.
يلره.
نادانورضالمايت.
علح.
عون.
سعالار.
مة.
