In [None]:
!pip install gperc

Collecting gperc
  Downloading gperc-0.7-py3-none-any.whl (34 kB)
Collecting fire<0.5.0,>=0.4.0
  Downloading fire-0.4.0.tar.gz (87 kB)
[K     |████████████████████████████████| 87 kB 3.4 MB/s 
[?25hCollecting datasets==1.12.1
  Downloading datasets-1.12.1-py3-none-any.whl (270 kB)
[K     |████████████████████████████████| 270 kB 35.4 MB/s 
Collecting fsspec[http]>=2021.05.0
  Downloading fsspec-2022.1.0-py3-none-any.whl (133 kB)
[K     |████████████████████████████████| 133 kB 49.9 MB/s 
[?25hCollecting aiohttp
  Downloading aiohttp-3.8.1-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (1.1 MB)
[K     |████████████████████████████████| 1.1 MB 56.2 MB/s 
[?25hCollecting huggingface-hub<0.1.0,>=0.0.14
  Downloading huggingface_hub-0.0.19-py3-none-any.whl (56 kB)
[K     |████████████████████████████████| 56 kB 3.8 MB/s 
Collecting xxhash
  Downloading xxhash-2.0.2-cp37-cp37m-manylinux2010_x86_64.whl (243 kB)
[K     |██████████████

In [1]:
!pip install torch_optimizer

Collecting torch_optimizer
  Downloading torch_optimizer-0.3.0-py3-none-any.whl (61 kB)
[?25l[K     |█████▎                          | 10 kB 18.1 MB/s eta 0:00:01[K     |██████████▋                     | 20 kB 25.0 MB/s eta 0:00:01[K     |███████████████▉                | 30 kB 16.3 MB/s eta 0:00:01[K     |█████████████████████▏          | 40 kB 11.9 MB/s eta 0:00:01[K     |██████████████████████████▌     | 51 kB 8.0 MB/s eta 0:00:01[K     |███████████████████████████████▊| 61 kB 8.4 MB/s eta 0:00:01[K     |████████████████████████████████| 61 kB 510 kB/s 
[?25hCollecting pytorch-ranger>=0.1.1
  Downloading pytorch_ranger-0.1.1-py3-none-any.whl (14 kB)
Installing collected packages: pytorch-ranger, torch-optimizer
Successfully installed pytorch-ranger-0.1.1 torch-optimizer-0.3.0


In [2]:
max_acc=[]
max_loss=[]
min_loss=[]

In [3]:
max_test_acc=[]
max_test_loss=[]
min_test_loss=[]

In [None]:
# testing to see if training on CSV and then using CSV like format to query the model

import os
import json
import requests
import numpy as np
import pandas as pd
from tempfile import gettempdir

import torch
from torch.nn import functional
from tqdm.std import trange
import torch_optimizer as toptim

from gperc import TextConfig, Perceiver

def pre():

  fp = gettempdir() + "/titanic.csv"
  if not os.path.exists(fp):
    r = requests.get("https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv")
    r.raise_for_status()
    with open(fp, "wb") as f:
      f.write(r.content)

  df = pd.read_csv(fp)
  df = df.drop(["PassengerId", "Name", "Ticket", "Cabin"], axis=1)

  # though the keys are the in the the following order, "Survived" is the target
  # and is easy because 0th index
  # ['Survived', 'Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare', 'Embarked']
  samples = []
  for x in json.loads(df.to_json(orient="records")):
    x["Sex"] = 0 if x["Sex"] == "male" else 1
    samples.append("|".join([str(y) for y in x.values()]))

  vocab = {k:i for i,k in enumerate(
    sorted(
      list(
        set("".join(samples))
        ) + ["?"]
      )
    )
  }
  maxlen = max([len(x) for x in samples])
  tensor = torch.zeros(len(samples), maxlen).long()
  attention_mask = torch.zeros(len(samples), maxlen).long()
  for i,s in enumerate(samples):
    tensor[i][:len(s)] = torch.tensor([vocab[c] for c in s])
    attention_mask[i][:len(s)] = 1

  print("===== Tensor")
  print(tensor.shape)
  torch.random.manual_seed(420)
  tensor = tensor[torch.randperm(len(tensor))]


  train = tensor[:int(len(tensor)*0.8)]
  train_att = attention_mask[:int(len(tensor)*0.8)]
  test = tensor[int(len(tensor)*0.8):]
  test_att = attention_mask[int(len(tensor)*0.8):]

  # create the model
  config = TextConfig(
    latent_dim = len(vocab) // 2,
    vocab_size = len(vocab),
    max_len = tensor.shape[1],
    latent_frac=0.5,
    ffw_ratio=25,
    num_layers = 350,
    num_heads = 2
  )
  model = Perceiver(config)
  print(model.num_parameters())
  print("train",train)
  print("train_att",train_att)
  print("mask",attention_mask)
  return train, train_att, test, test_att, model, vocab

def main(n = 100, lr = 3e-4, p = 0.85, optim = "Adam"):
  max_accuracy=float('-inf')
  min_l=float('inf')
  max_l=float('-inf')
  max_test_accuracy=float('-inf')
  min_test_l=float('inf')
  max_test_l=float('-inf')
  _torch = hasattr(torch.optim, optim)
  _toptim = hasattr(toptim, optim)
  if not _torch and not _toptim:
    raise ValueError("Unknown optimizer {}".format(optim))

  train, train_att, test, test_att, model, vocab = pre()
  target_train = train.clone().contiguous().view(-1)
  target_test = test.clone().contiguous().view(-1)

  optimizer = getattr(torch.optim, optim) if _torch else getattr(toptim, optim)
  optimizer = optimizer(model.parameters(), lr=lr)

  pbar = trange(n)
  for i in pbar:
    model.train()
    _this_sample = train.clone()
    mask = np.random.uniform(0, 1, _this_sample.shape) > p
    _this_sample[mask] = vocab["?"]
    _this_sample[:, 0] = vocab["?"]

    out_train = model(train, train_att)
    out_train = out_train.contiguous().view(-1, out_train.shape[-1])
    loss_train = functional.cross_entropy(out_train, target_train)
    acc_ = out_train.argmax(dim=-1) == target_train
    acc_class = acc_[mask.reshape(-1)].sum().item() / mask.sum().item()
    acc_avg = acc_.sum().item() / len(acc_)
    
    if acc_avg>max_accuracy:
      max_accuracy=acc_avg
    if loss_train.data>max_l:
      max_l=loss_train.data 
    if loss_train.data<min_l:
      min_l=loss_train.data

    model.zero_grad()
    optimizer.zero_grad()
    loss_train.backward()
    optimizer.step()

    with torch.no_grad():
      model.eval()
      _this_sample = test.clone()
      mask = np.random.uniform(0, 1, _this_sample.shape) > p
      _this_sample[mask] = vocab["?"]
      _this_sample[:, 0] = vocab["?"]

      out_test = model(test, test_att)
      out_test = out_test.contiguous().view(-1, out_test.shape[-1])
      loss_test = functional.cross_entropy(out_test, target_test)
      acc_ = out_test.argmax(dim=-1) == target_test
      acc_class_test = acc_[mask.reshape(-1)].sum().item() / mask.sum().item()
      acc_avg_test = acc_.sum().item() / len(acc_)

      if acc_avg_test>max_test_accuracy:
        max_test_accuracy=acc_avg_test
      if loss_test.data>max_test_l:
        max_test_l=loss_test.data 
      if loss_test.data<min_test_l:
        min_test_l=loss_test.data

    pbar.set_description(
      f"[{i:05d}/{n:05d} {i/n:0.3f}] "
      f"[Train] loss: {loss_train.item():.4f} acc: {acc_avg:.4f} acc_surv: {acc_class:.4f} "
      f"[Test] loss: {loss_test.item():.4f} acc: {acc_avg_test:.4f} acc_surv: {acc_class_test:.4f}"
    )
  max_acc.append(max_accuracy)
  max_loss.append(max_l)
  min_loss.append(min_l)
  max_test_acc.append(max_test_accuracy)
  max_test_loss.append(max_test_l)
  min_test_loss.append(min_test_l)

if __name__ == "__main__":
  from fire import Fire
  Fire(main)

===== Tensor
torch.Size([891, 25])
2028560
train tensor([[ 2, 19,  2,  ..., 15,  0,  0],
        [ 2, 19,  2,  ...,  0,  0,  0],
        [ 2, 19,  2,  ..., 19, 12,  0],
        ...,
        [ 2, 19,  2,  ..., 19, 12,  0],
        [ 1, 19,  4,  ...,  0,  0,  0],
        [ 2, 19,  4,  ...,  0,  0,  0]])
train_att tensor([[1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 1, 1, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        ...,
        [1, 1, 1,  ..., 1, 1, 0],
        [1, 1, 1,  ..., 1, 1, 0],
        [1, 1, 1,  ..., 0, 0, 0]])
mask tensor([[1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 1, 1, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        ...,
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0]])


  0%|          | 0/100 [00:00<?, ?it/s]

In [None]:
max_acc

[0.3448876404494382,
 0.29629213483146066,
 0.4602247191011236,
 0.5674719101123595,
 0.5487640449438203,
 0.3887640449438202,
 0.5560674157303371]

In [None]:
x_values=[]



In [4]:
!wget https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv

--2022-02-08 20:33:25--  https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 60302 (59K) [text/plain]
Saving to: ‘titanic.csv’


2022-02-08 20:33:26 (7.56 MB/s) - ‘titanic.csv’ saved [60302/60302]



In [5]:
import os


file_size = os.path.getsize('/content/titanic.csv')
print("File Size is :", file_size, "bytes")

File Size is : 60302 bytes


In [None]:
x_values.append(file_size*8/2028560)

NameError: ignored

In [None]:
x_values

[0.8310925817455121,
 0.7323091869573137,
 0.5224571131519667,
 0.43603890234643333,
 0.3389037977884872,
 0.2907942325312244,
 0.23781204401151557]

In [None]:
import plotly.express as px
import numpy as np


fig = px.line(x=x_values, y=max_acc, labels={'x':'bits/param', 'y':'max_accuracy'})
fig.show()

In [None]:
max_loss=[x.data for x in max_loss]
fig_max_loss = px.line(x=x_values, y=max_loss, labels={'x':'bits/param', 'y':'max_loss'})
fig_max_loss.show()

In [None]:
min_loss=[x.data for x in min_loss]
fig_min_loss = px.line(x=x_values, y=min_loss, labels={'x':'bits/param', 'y':'min_loss'})
fig_min_loss.show()


In [None]:
max_test_loss=[x.data for x in max_test_loss]
fig_max_test_loss = px.line(x=x_values, y=max_test_loss, labels={'x':'bits/param', 'y':'max_test_loss'})
fig_max_test_loss.show()



In [None]:
min_test_loss=[x.data for x in min_test_loss]
fig_min_test_loss = px.line(x=x_values, y=min_test_loss, labels={'x':'bits/param', 'y':'min_test_loss'})
fig_min_test_loss.show()



In [None]:
fig_max_test_acc = px.line(x=x_values, y=max_test_acc, labels={'x':'bits/param', 'y':'max_test_acc'})
fig_max_test_acc.show()



In [None]:
min_loss

[tensor(2.4340),
 tensor(2.3781),
 tensor(2.1870),
 tensor(1.9630),
 tensor(1.8608),
 tensor(2.1812)]

In [None]:
x_values

[0.8310925817455121,
 0.7323091869573137,
 0.5224571131519667,
 0.43603890234643333,
 0.3389037977884872,
 0.2907942325312244]

In [None]:
max_test_loss

[tensor(2.9875),
 tensor(3.0886),
 tensor(2.8531),
 tensor(3.1251),
 tensor(3.0330),
 tensor(3.0006)]

In [None]:
min_test_loss

[tensor(2.3912),
 tensor(2.3411),
 tensor(2.1193),
 tensor(1.9068),
 tensor(2.3112),
 tensor(2.1242)]

## Plotting param/bits


In [None]:
x_values_rev = [0.8310925817455121,
 0.7323091869573137,
 0.5224571131519667,
 0.43603890234643333,
 0.3389037977884872,
 0.2907942325312244]

In [None]:
x_values_rev=[1/x for x in x_values_rev]

In [None]:
max_acc=[0.3448876404494382,
 0.29629213483146066,
 0.4602247191011236,
 0.5674719101123595,
 0.5487640449438203,
 0.3887640449438202,
 0.5560674157303371]

In [None]:
import plotly.express as px
import numpy as np


fig = px.line(x=x_values_rev, y=max_acc, labels={'x':'params/bit', 'y':'max_accuracy'})
fig.show()

In [None]:
min_loss = [2.4340,
 2.3781,
 2.1870,
 1.9630,
 1.8608,
 2.1812]

In [None]:
fig_min_loss = px.line(x=x_values_rev, y=min_loss, labels={'x':'params/bit', 'y':'min_loss'})
fig_min_loss.show()

In [None]:
 max_test_loss=[2.9875,
 3.0886,
 2.8531,
 3.1251,
 3.0330,
 3.0006]

In [None]:
fig_test_max_loss = px.line(x=x_values_rev, y=max_test_loss, labels={'x':'params/bit', 'y':'max_test_loss'})
fig_test_max_loss.show()

In [None]:
min_test_loss=[2.3912,
 2.3411,
 2.1193,
 1.9068,
 2.3112,
 2.1242]


In [None]:
fig_test_min_loss = px.line(x=x_values_rev, y=min_test_loss, labels={'x':'params/bit', 'y':'min_test_loss'})
fig_test_min_loss.show()

# Using BERT

In [6]:
!pip install transformers

Collecting transformers
  Downloading transformers-4.16.2-py3-none-any.whl (3.5 MB)
[K     |████████████████████████████████| 3.5 MB 8.2 MB/s 
Collecting sacremoses
  Downloading sacremoses-0.0.47-py2.py3-none-any.whl (895 kB)
[K     |████████████████████████████████| 895 kB 66.5 MB/s 
Collecting tokenizers!=0.11.3,>=0.10.1
  Downloading tokenizers-0.11.4-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (6.8 MB)
[K     |████████████████████████████████| 6.8 MB 39.6 MB/s 
[?25hCollecting pyyaml>=5.1
  Downloading PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (596 kB)
[K     |████████████████████████████████| 596 kB 61.1 MB/s 
[?25hCollecting huggingface-hub<1.0,>=0.1.0
  Downloading huggingface_hub-0.4.0-py3-none-any.whl (67 kB)
[K     |████████████████████████████████| 67 kB 6.2 MB/s 
Installing collected packages: pyyaml, tokenizers, sacremoses, huggingface-hub, transformers
  Attempting uninstall: pyyaml
  

In [37]:
# testing to see if training on CSV and then using CSV like format to query the model
from transformers import BertTokenizer, BertForMaskedLM 
import torch   
import os
import json
import requests
import numpy as np
import pandas as pd
from tempfile import gettempdir

import torch
from torch.nn import functional
from tqdm.std import trange
import torch_optimizer as toptim


def pre():

  fp = gettempdir() + "/titanic.csv"
  if not os.path.exists(fp):
    r = requests.get("https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv")
    r.raise_for_status()
    with open(fp, "wb") as f:
      f.write(r.content)

  df = pd.read_csv(fp)
  df = df.drop(["PassengerId", "Name", "Ticket", "Cabin"], axis=1)

  # though the keys are the in the the following order, "Survived" is the target
  # and is easy because 0th index
  # ['Survived', 'Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare', 'Embarked']
  samples = []
  for x in json.loads(df.to_json(orient="records")):
    x["Sex"] = 0 if x["Sex"] == "male" else 1
    samples.append("|".join([str(y) for y in x.values()]))

  vocab = {k:i for i,k in enumerate(
    sorted(
      list(
        set("".join(samples))
        ) + ["?"]
      )
    )
  }
  maxlen = max([len(x) for x in samples])
  tensor = []
  attention_mask = []
  for i in samples:
    tensor.append(i)
    attention_mask.append(i[:len(i)-1]+"[MASK]")

  print("tensor",attention_mask)
  print("===== Tensor")

  train = tensor[:int(len(tensor)*0.8)]
  train_att = attention_mask[:int(len(tensor)*0.8)]
  test = tensor[int(len(tensor)*0.8):]
  test_att = attention_mask[int(len(tensor)*0.8):]

  # create the model
  tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') 
  model = BertForMaskedLM.from_pretrained('bert-base-uncased', return_dict=True) 

  print("train",train)
  print("train_att",train_att)
  print("mask",attention_mask)
  return train, train_att, test, test_att, model, tokenizer,vocab


In [None]:
import copy


def main(n = 100, lr = 3e-4, p = 0.85, optim = "Adam"):
  min_l=float('inf')
  max_l=float('-inf')
  min_test_l=float('inf')
  max_test_l=float('-inf')
  _torch = hasattr(torch.optim, optim)
  _toptim = hasattr(toptim, optim)
  if not _torch and not _toptim:
    raise ValueError("Unknown optimizer {}".format(optim))

  train, train_att, test, test_att, model, tokenizer, vocab = pre()
  target_train = copy.deepcopy(train)
  target_test = copy.deepcopy(test)


  pbar = trange(n)
  for i in pbar:
    for j in range(len(train)):
      inputs = tokenizer(train_att[j], return_tensors="pt") 
      labels = tokenizer(train[j], return_tensors="pt")["input_ids"]

      outputs = model(**inputs, labels=labels) 
      loss_train = outputs.loss 
      out_train = outputs.logits
      # acc_ = out_train.argmax(dim=-1) == target_train
      # acc_class = acc_[mask.reshape(-1)].sum().item() / mask.sum().item()
      # acc_avg = acc_.sum().item() / len(acc_)
      
      # if acc_avg>max_accuracy:
      #   max_accuracy=acc_avg
      if loss_train.data>max_l:
        max_l=loss_train
      if loss_train.data<min_l:
        min_l=loss_train

      with torch.no_grad():
        
        for k in range(len(test)):
          inputs = tokenizer(test_att[i], return_tensors="pt") 
          labels = tokenizer(test[i], return_tensors="pt")["input_ids"]
          
          out_test = model(**inputs, labels=labels) 
          loss_test = outputs.loss 
          out_test = outputs.logits
          if loss_test.data>max_test_l:
            max_test_l=loss_test.data 
          if loss_test.data<min_test_l:
            min_test_l=loss_test.data

      pbar.set_description(
        f"[{i:05d}/{n:05d} {i/n:0.3f}] "
        f"[Train] loss: {loss_train.item():.4f} "
        f"[Test] loss: {loss_test.item():.4f}"
      )
 
  max_loss.append(max_l)
  min_loss.append(min_l)
  max_test_loss.append(max_test_l)
  min_test_loss.append(min_test_l)

if __name__ == "__main__":
  from fire import Fire
  Fire(main)

In [11]:
!pip install fire

Collecting fire
  Downloading fire-0.4.0.tar.gz (87 kB)
[?25l[K     |███▊                            | 10 kB 22.6 MB/s eta 0:00:01[K     |███████▌                        | 20 kB 28.2 MB/s eta 0:00:01[K     |███████████▏                    | 30 kB 30.4 MB/s eta 0:00:01[K     |███████████████                 | 40 kB 22.2 MB/s eta 0:00:01[K     |██████████████████▊             | 51 kB 9.7 MB/s eta 0:00:01[K     |██████████████████████▍         | 61 kB 8.5 MB/s eta 0:00:01[K     |██████████████████████████▏     | 71 kB 8.7 MB/s eta 0:00:01[K     |██████████████████████████████  | 81 kB 9.6 MB/s eta 0:00:01[K     |████████████████████████████████| 87 kB 4.6 MB/s 
Building wheels for collected packages: fire
  Building wheel for fire (setup.py) ... [?25l[?25hdone
  Created wheel for fire: filename=fire-0.4.0-py2.py3-none-any.whl size=115942 sha256=8b8088e186e88246964ad9be9889f0906a6da2ce68900399bf1d5191a9900ad8
  Stored in directory: /root/.cache/pip/wheels/8a/67/fb/2e8a