In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
cd /content/drive/My Drive/Defi-PassAI

In [7]:
pip install torch

Collecting torch
  Downloading torch-1.10.1-cp39-cp39-win_amd64.whl (226.5 MB)
Installing collected packages: torch
Successfully installed torch-1.10.1
Note: you may need to restart the kernel to use updated packages.


In [8]:
import torch
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
import unicodedata
import string
import random
from torch.utils.data import Dataset, DataLoader

In [9]:
# check which device we gonna use "GPU" or "CPU"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cpu


In [None]:
#get all the letters on training and evaluation datasets
with open("data/train.txt","r") as f:
  txt = f.read()
 
all_letters = sorted(list(set(txt)))
all_letters += ['<EOS>']
n_letters = len(all_letters)

In [None]:
print(n_letters)

94


In [None]:
# a function that return the index of a character
def letterToIndex(letter):
    if letter in all_letters:
      return all_letters.index(letter)
    return 0

In [None]:
# Read a file and split into lines
def readLines(filename):
    lines = open(filename).read().strip().split('\n')
    return [line.strip('\\') for line in lines] # remove the \ caracters from the end of passwords in the eval dataset

In [None]:
lines = readLines("data/train.txt")
max_length = max([len(word) for word in lines]) #get the maximum lenght of a password in the training set
print(max_length)

110


In [None]:
# create a dataset of classe that we will use to train the model 
class PasswordsDataset(Dataset):
    
    def __init__(self, passwords_list):
        self.passwords_list = passwords_list
        
    def __len__(self):
        return len(self.passwords_list)
    
    # it will return an X Y fo each line of the dataset X will be the table of onehot encoded array describing each character of the line [0:n]
    # and Y will be the line from [1:n] + the end of word '\\'
    # example: x= 'abcd' y='bcd\\'
    def __getitem__(self, idx):
        x_str = self.passwords_list[idx].ljust(max_length, '\\')[:max_length]
        y_str = x_str[1:] + '\\'
        x = torch.zeros((max_length, n_letters))
        y = torch.zeros(max_length)
        for i, c in enumerate(x_str):
            x[i, letterToIndex(c)] = 1
        for i, c in enumerate(y_str):
            y[i] = letterToIndex(c)
            
        return x, y

In [None]:
trainset = PasswordsDataset(lines)

In [None]:
train_loader = DataLoader(trainset, batch_size=256, shuffle=True)
print(len(train_loader))

1469


In [None]:
train_iter = train_loader.__iter__()
X, Y = train_iter.next()
print(X.size(), Y.size())

torch.Size([256, 110, 94]) torch.Size([256, 110])


In [None]:
# hyper parameters
input_size = n_letters
hidden_size = 54
output_size = n_letters
num_layers = 1
train_batch_size = 256

In [None]:
class Model(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers):
        super(Model, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm1 = nn.LSTM(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, batch_first=True)
        self.fc2 = nn.Linear(hidden_size, output_size)
        self.fc3 = nn.Linear(output_size, output_size)
        
    def forward(self, X, states):
        ht, ct = states
        batch_size = X.size(0)
        out, (ht, ct) = self.lstm1(X, (ht, ct))
        out = F.relu(self.fc2(out))
        out = self.fc3(out)
        return out, (ht, ct) # out: Size([batch_size, max_length, num_chars])

In [None]:
model = Model(input_size=input_size, hidden_size=hidden_size, output_size=output_size, num_layers=num_layers)
model = nn.DataParallel(model)
model = model.to(device)

In [None]:
ht = torch.zeros((num_layers, train_batch_size, hidden_size)).to(device)
ct = torch.zeros((num_layers, train_batch_size, hidden_size)).to(device)

In [None]:
lr = 0.005
step_size = len(train_loader) * 1
gamma = 0.95
print(step_size)

1469


In [None]:
criterion = nn.CrossEntropyLoss(reduction='mean')
optimizer = optim.Adam(model.parameters(), lr=lr)
lr_scheduler = optim.lr_scheduler.StepLR(optimizer=optimizer, step_size=step_size, gamma=gamma)

In [None]:
def generate_password(model, start='a', k=5):
    
    if len(start) >= max_length:
        return password
    
    with torch.no_grad():
        
        ht = torch.zeros((num_layers, 1, hidden_size)).to(device)
        ct = torch.zeros((num_layers, 1, hidden_size)).to(device)
        length = 0
        password = start
        # model preparation to predict the rest of the password based on `start`
        for char in start:
            X = torch.zeros((1, 1, n_letters)) # [batch_size, timestep, n_letters]
            X[0, 0, letterToIndex(char)] = 1
            out, (ht, ct) = model(X, (ht, ct))
            length += 1
        vals, idxs = torch.topk(out[0], k) # 0 -> first eg in a batch
        idx = np.random.choice(idxs.cpu().numpy()[0]) # 0 -> first...
        char = all_letters[idx]
        vals, idxs = torch.topk(out[0], k) # 0 -> first eg in a batch
        idx = np.random.choice(idxs.cpu().numpy()[0]) # 0 -> first...
        char = all_letters[idx]
        
        while char != "\\" and length <= max_length-1:
            X = torch.zeros((1, 1, n_letters)) # [batch_size, timestep, n_letters]
            X[0, 0, letterToIndex(char)] = 1
            out, (ht, ct) = model(X, (ht, ct))
            vals, idxs = torch.topk(out[0], k) # 0 -> first eg in a batch
            idx = np.random.choice(idxs.cpu().numpy()[0]) # 0 -> first...
            char = all_letters[idx]
            length += 1
            password += char
    
        if password[-1] != "\\":
            password += "\\"
    
    return password

In [None]:
passwords_list = lines
def sampler(model, start='a', n=10, k=5, only_new=False):
    
    passwords = []
    cnt = 0
    while cnt <= n:
        password = generate_password(model=model, start=start, k=k)
        if only_new: 
            if password not in passwords_list and password not in passwords:
                passwords.append(password)
                cnt += 1
        else:
            if password not in passwords:
                passwords.append(password)
                cnt += 1
    passwords = [password[:-1] for password in passwords]
    
    return passwords

In [None]:
epochs = 20

# if you are testing this do not run the next cell 
# this is only for training it gonna take hours to end 

In [None]:
from tqdm.notebook import tqdm

epoch_losses = []
epoch_lrs = []
iteration_losses = []
iteration_lrs = []

for epoch in tqdm(range(1, epochs+1), desc="Epochs"):
    epoch_loss = 0
    epoch_lr = 0
    
    for i, (X, Y) in tqdm(enumerate(train_loader, 1), total=len(train_loader), desc="Epoch-{}".format(epoch)):
    #for i, (X, Y) in enumerate(train_loader, 1):
        X, Y = X.to(device), Y.to(device)
        
        ht = torch.zeros((num_layers, X.size(0), hidden_size)).to(device)
        ct = torch.zeros((num_layers, X.size(0), hidden_size)).to(device)

        optimizer.zero_grad()
        Y_pred_logits, (ht, ct) = model(X, (ht, ct))
        Y_pred_logits = Y_pred_logits.transpose(1, 2) # Check Loss Doc: [N, d1, C] -> [N, C, d1]
        loss = criterion(Y_pred_logits, Y.long())
        loss.backward(retain_graph=True)
        optimizer.step()
        lr_scheduler.step()
        
        iteration_losses.append(loss.item())
        iteration_lrs.append(lr_scheduler.get_last_lr()[0])
        epoch_loss += loss.item()
        epoch_lr += lr_scheduler.get_last_lr()[0]
        
    epoch_loss /= len(train_loader)
    epoch_lr /= len(train_loader)
    epoch_losses.append(epoch_loss)
    epoch_lrs.append(epoch_lr)

    message = "Epoch:{}    Loss:{}    LR:{}".format(epoch, epoch_loss, epoch_lr)
    print(message)
    passwords = sampler(model, start='jo', n=10, k=10, only_new=False)
    print(passwords)
    torch.save(model.state_dict(), "password_gen_" + str(epoch) + ".pth")

Epochs:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch-1:   0%|          | 0/1469 [00:00<?, ?it/s]

Epoch:1    Loss:0.22215488488127214    LR:0.004999829816201406
['joitses71480', 'joimia1170516948050388', 'josondangonsenksan134', 'jospe158100225', 'jottss26', 'joigith002056643694806869638647255467577178489534', 'jogy', 'jootatey686451980281157', 'joeystacaraledatayduk1065063347630363640148840276761275000370660', 'jogoongut17123002036913137705', 'jontar']


Epoch-2:   0%|          | 0/1469 [00:00<?, ?it/s]

Epoch:2    Loss:0.18103079991814552    LR:0.004749838325391262
['joeliti', 'jomirlotot12', 'jooti', 'joa16970162147679559981511352375423457865', 'joesor26835257854280854407', 'jorloge674379914968823538288789328745477438737101410868396787', 'josom3', 'jodybey', 'joct109', 'joceta', 'josotan14415572168852']


Epoch-3:   0%|          | 0/1469 [00:00<?, ?it/s]

Epoch:3    Loss:0.17745744429371485    LR:0.004512346409121764
['joas', 'joi1030562', 'jogboton', 'joel2', 'josmuesine4387144155234574184074273036', 'jotemy900867392478335892', 'jonis1', 'jonamy', 'jotoo1505433', 'jonordy', 'joubbeedo']


Epoch-4:   0%|          | 0/1469 [00:00<?, ?it/s]

Epoch:4    Loss:0.17524545628088034    LR:0.004286729088665782
['jorrs10', 'joier6467161', 'joen2', 'jo10158638', 'joo2552755405', 'joan28866602055', 'joe19675', 'joomeyahaal', 'johe150', 'job1902474218046364048648248', 'jobsatsh153']


Epoch-5:   0%|          | 0/1469 [00:00<?, ?it/s]

Epoch:5    Loss:0.1736267985049521    LR:0.004072392634232523
['joyomush031392', 'jon1979036', 'jomm119038314353411673766710881161040412801060112600120', 'jouluxo2', 'joots1979496', 'jomnony67009347', 'joya093489328083804669490074419706239', 'jogneam103922480160000053122', 'joutly6887160977879916187830120974946830982110683407', 'jotim1454', 'jom15437873263043610926']


Epoch-6:   0%|          | 0/1469 [00:00<?, ?it/s]

Epoch:6    Loss:0.17239853710321604    LR:0.0038687730025207113
['joobbosh', 'jo6415020273530', 'jobase2059176028', 'josnith', 'jomyssis', 'joetelser6016', 'joene13502108', 'josir15', 'joambon', 'jojklom', 'jossi']


Epoch-7:   0%|          | 0/1469 [00:00<?, ?it/s]

Epoch:7    Loss:0.17150029427466382    LR:0.003675334352394805
['jokod19977', 'jo101788', 'jojn1556', 'jomy098299122513799682', 'joohusild1028851949222', 'joi9433077379923719416731', 'jonla183539286254130004033678', 'joerty91', 'jokey0534140144242213796', 'josiom', 'jouno263920020520']


Epoch-8:   0%|          | 0/1469 [00:00<?, ?it/s]

Epoch:8    Loss:0.1708071392417522    LR:0.003491567634775025
['jobrrug', 'jodie150', 'jobame244', 'jonsogat', 'joisty18909798646583245251244232', 'jombiam', 'jometark10', 'jometione35372266981085', 'jodaselasio4', 'joacaidog', 'joel263264707452477']


Epoch-9:   0%|          | 0/1469 [00:00<?, ?it/s]

Epoch:9    Loss:0.170280812000076    LR:0.003316989253036186
['joo27', 'jotts0089420105746050', 'jomi191442500551248053263', 'jonn22046570', 'joie2516235542036663714284', 'joi1275213377806113868194', 'joue295735546', 'jonahi1974238', 'jorthow', 'josplo1207816', 'joghmrn']


Epoch-10:   0%|          | 0/1469 [00:00<?, ?it/s]

Epoch:10    Loss:0.16984097574380402    LR:0.0031511397903845953
['jo245', 'jotrumbs252', 'jost126', 'joes201668', 'josey93356965596881', 'josthado', 'joon', 'jonas1', 'jomb14112702726715603079203', 'jokis2020552151242301', 'jodks61303']


Epoch-11:   0%|          | 0/1469 [00:00<?, ?it/s]

Epoch:11    Loss:0.1694639690418159    LR:0.002993582800865261
['jomyn', 'jodue030485679019557381', 'joea', 'jotsada', 'joijuaiokumbluty175', 'jokyall176803992', 'joar', 'joob', 'jojs', 'jovik', 'jorth078374703785408782102721621610']


Epoch-12:   0%|          | 0/1469 [00:00<?, ?it/s]

Epoch:12    Loss:0.16915305746989318    LR:0.00284390366082194
['jorct296577', 'jokay1790573556', 'jodayser48285', 'joah15332', 'jospecten20', 'joylra16969833166276676201', 'joyas215021', 'jomksdaysanta1275198565509750495975', 'jods', 'joed231014323847718971216827835', 'joily2319668469053769']


Epoch-13:   0%|          | 0/1469 [00:00<?, ?it/s]

Epoch:13    Loss:0.16893166516653285    LR:0.002701708477780842
['josen23007908792', 'jois011195812', 'jolblls41', 'joed377011525803', 'joym', 'josin10607260679520', 'jobonky6262', 'job1732', 'jo19736882559', 'jons12432', 'jotbb20541']


Epoch-14:   0%|          | 0/1469 [00:00<?, ?it/s]

Epoch:14    Loss:0.16867991252323655    LR:0.002566623053891886
['joavo05671134103', 'joaded', 'jone', 'joujusi19596', 'jonu', 'johy21699108778699625', 'joome19246591783020', 'jomed0082', 'jocounu14451', 'jod27887523241655403025731026978775384005', 'jokitito45']


Epoch-15:   0%|          | 0/1469 [00:00<?, ?it/s]

Epoch:15    Loss:0.1684714729096956    LR:0.002438291901197352
['jolssadsuc', 'joobiseyahn278288701', 'joidie1014', 'jom1', 'joes08343458', 'joul', 'jon148', 'joashru', 'jooy0538461162', 'joah0689', 'joce206689221458599028256146']


Epoch-16:   0%|          | 0/1469 [00:00<?, ?it/s]

Epoch:16    Loss:0.16830282326130902    LR:0.002316377306137435
['joessino2076668', 'joimmiksus1', 'jomisith260949652117816315', 'joi26145470170673', 'joyb195378229', 'jodrt1', 'jooclash296172704058278026', 'joy1612904749557720', 'joyjabett2468', 'joeysues607167101757515', 'joubeto11498400504703869']


Epoch-17:   0%|          | 0/1469 [00:00<?, ?it/s]

Epoch:17    Loss:0.16814592254453975    LR:0.0022005584408306017
['joic13370736692416768421786655', 'joode', 'joer11', 'jouj248837', 'joobb', 'jouai255976', 'josach', 'jopetarul141', 'jon26917759852741609628995614011277836', 'jodlo', 'joara']


Epoch-18:   0%|          | 0/1469 [00:00<?, ?it/s]

Epoch:18    Loss:0.1679888560282608    LR:0.002090530518789069
['joad12320694', 'jo156', 'jodworm4', 'joar', 'joos13757024842', 'jojdjjj11', 'josacoca', 'joel1259689005', 'jonty151533652086', 'joetty111374562721914273', 'joj157867792']


Epoch-19:   0%|          | 0/1469 [00:00<?, ?it/s]

Epoch:19    Loss:0.16785568682574512    LR:0.0019860039928495912
['jomessoun2', 'joons285', 'jobay', 'jos29396056563130017101', 'jous', 'joan02342637651001729697886281976062', 'jok255', 'josurerianieeer0061188662867322', 'jogiansa051419', 'jojy', 'joakk1838963816005']


Epoch-20:   0%|          | 0/1469 [00:00<?, ?it/s]

Epoch:20    Loss:0.16773943406250139    LR:0.0018867037932070636
['joact', 'joim', 'joikkk', 'jouou2', 'jopita262', 'joa108057999', 'josokara1', 'jof39058055', 'josm19339884012876243719366', 'jodric1512002815847190052274622443', 'jojittass20346']


In [None]:
ls 

 1M.txt                          password_gen_13.pth   password_gen_2.pth
'Copy of 1M.txt'                 password_gen_14.pth   password_gen_3.pth
 [0m[01;34mdata[0m/                           password_gen_15.pth   password_gen_4.pth
 Defi-SecAIDecembre.ipynb        password_gen_16.pth   password_gen_5.pth
'LSTM Password Genrator.ipynb'   password_gen_17.pth   password_gen_6.pth
 [01;34mmodel[0m/                          password_gen_18.pth   password_gen_7.pth
 password_gen_10.pth             password_gen_19.pth   password_gen_8.pth
 password_gen_11.pth             password_gen_1.pth    password_gen_9.pth
 password_gen_12.pth             password_gen_20.pth   [01;34mtests[0m/


In [None]:
# we load the prevouis trained model
path = "password_gen_20.pth" # change this name if you retrained the model 
model = Model(input_size=input_size, hidden_size=hidden_size, output_size=output_size, num_layers=num_layers)
model = nn.DataParallel(model)
model = model.to(device)
model.load_state_dict(torch.load(path,map_location=torch.device('cpu'))) 
# model.load_state_dict(torch.load(path)) 

<All keys matched successfully>

Generating 10k password

In [None]:
rm 10k.txt

In [None]:
# x = sampler(model,start="m", n=10,k=5, only_new=True)
# xy = []
# for i in x:
#   print(type(i+'\\'))
#   xy.append(i+'\\')

# xx = [i[:-1] for i in x]
# print(x) 
# print(xx) 
# print(xy)
# print(x)
# with open("10k.txt", "w") as f:
#   for i in x:
#     f.write(i)
#     # print(type(i))

# with open("10k.txt") as f:
#   print("reading file")
#   f.read()
# generating 1M password
start_letter = all_letters
with open('1M.txt','a') as f:
  for i in range(600000):
    r_letter = random.randrange(0,len(start_letter))
    res = sampler(model, start=start_letter[r_letter], n=0, k=5, only_new=True)
    f.write(res[0]+"\\\n")

In [None]:
ls

 1M.txt                          password_gen_13.pth   password_gen_2.pth
'Copy of 1M.txt'                 password_gen_14.pth   password_gen_3.pth
 [0m[01;34mdata[0m/                           password_gen_15.pth   password_gen_4.pth
 Defi-SecAIDecembre.ipynb        password_gen_16.pth   password_gen_5.pth
'LSTM Password Genrator.ipynb'   password_gen_17.pth   password_gen_6.pth
 [01;34mmodel[0m/                          password_gen_18.pth   password_gen_7.pth
 password_gen_10.pth             password_gen_19.pth   password_gen_8.pth
 password_gen_11.pth             password_gen_1.pth    password_gen_9.pth
 password_gen_12.pth             password_gen_20.pth   [01;34mtests[0m/


In [None]:
open('data/eval.txt','r').readlines()

In [None]:
"""
this function will test the model over the data in eval.txt
we will take every password and pass its first letters and then we will see if 
the model predict the password , and we will fdo that the first time by giving it just 1 letter and the secon 2 letters and sow on till 5 letters
"""
from tqdm.notebook import tqdm

def test():
  test_passwords = []
  results = [0,0,0,0,0]
  with open("data/eval.txt") as f:
    test_passwords = f.readlines()
  for password in tqdm(test_passwords):
    for i in range(1,5): # to take 1 letter than 2 that 3 ..... till 5
      password_guesses = sampler(model, start=password[:i], n=10, k=5 , only_new=True)
      # found = []
      if password+"\\" in password_guesses:
        results[i-1]+=1
        # found.append(password)
  for i in range(5):
    results[i]/=len(test_passwords)
  return results

In [None]:
res = test()


NameError: ignored

In [None]:
print("if we give the model: ")
for i,j in enumerate(res):
  print("\t",i,"lettes it predict:",j*100,"% password correct.")

if we give the model: 
	 0 lettes it predict: 0.0 % password correct.
	 1 lettes it predict: 0.05002501250625312 % password correct.
	 2 lettes it predict: 0.0 % password correct.
	 3 lettes it predict: 0.05002501250625312 % password correct.
	 4 lettes it predict: 0.0 % password correct.
