In [1]:
from utils import create_model_hash, get_model_and_tokenizer, create_model_hash, create_gradient, create_gradient_hash
from hparams import batch_size, sequence_length, topk_percent

model, tokenizer = get_model_and_tokenizer()
print (create_model_hash(model))

86b63e15e20898b60322b0715d904efd6192d4f19324da088ab9795c66f88a4c


In [2]:
import torch
import numpy as np
import random

# Set random seed for reproducibility
def set_seed(seed=42):
    random.seed(seed)  # Python random module
    np.random.seed(seed)  # Numpy module
    torch.manual_seed(seed)  # PyTorch to initialize the random number generator
    torch.cuda.manual_seed(seed)  # For CUDA to ensure reproducibility for GPU
    torch.cuda.manual_seed_all(seed)  # For all GPUs, if there are multiple GPUs

    # Ensuring that PyTorch uses deterministic algorithms
    # Note: This may impact performance, and not all operations are guaranteed to be deterministic
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

    # Setting the number of threads to avoid variability due to parallelism
    torch.set_num_threads(1)

# Call this function at the very beginning of your script with the desired seed
set_seed(42)

  torch.set_num_threads(1)


In [3]:
gradient = create_gradient(
    model = model,
    tokenizer = tokenizer,
    pages = [1],
    batch_size = 1,
    sequence_length = 50,
    device = 'cpu',
    topk_percent = 0.01
)
print( create_gradient_hash( gradient ) )

78089a96c224a5ff380efb685a10962798cb9c770105f3e548ff8170cb170994


In [6]:
import typing
import bittensor as bt

class Gradient( bt.Synapse ):
    pages: typing.List[int]
    model_hash: str
    gradient_idx: typing.Dict[ str, bt.Tensor ] 
    gradient_vals: typing.Dict[ str, bt.Tensor ] 
    sequence_length: int
    batch_size: int
    topk_percent: float
    vresult: str = None

In [12]:
grad_idx: typing.Dict[ str, bt.Tensor ] = {}
grad_vals: typing.Dict[ str, bt.Tensor ] = {}
for key in gradient:
    grad_idx[key] = bt.Tensor.serialize( gradient[key][0] )
    grad_vals[key] = bt.Tensor.serialize( gradient[key][1] )
seal = Seal(
    pages = [1],
    model_hash = create_model_hash(model),
    gradient_idx = grad_idx,
    gradient_vals = grad_vals,
    sequence_length = sequence_length,
    batch_size = batch_size,
    topk_percent = topk_percent
)

In [13]:
import sys
import pickle

serialized_seal = pickle.dumps(seal)

# Calculate the size of the serialized Seal object in bytes
seal_size_bytes = sys.getsizeof(serialized_seal)

print(f"Size of Seal object: {seal_size_bytes} bytes")


Size of Seal object: 13315117 bytes


In [4]:
gradient['transformer.h.0.ln_1.bias']

(tensor([  5, 538, 156, 487, 571, 760, 147], dtype=torch.int32),
 tensor([0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002]))

In [5]:
str(gradient['transformer.h.0.ln_1.bias'][1].cpu().numpy().tobytes())

"b'\\x8e\\xe6{91Du9\\xee\\xf0i9-\\x16g9\\x12Z`9\\x0e\\xf6S9\\xd2\\xa3G9'"

In [7]:
gradient['transformer.h.0.ln_1.bias'][1].tolist()

[0.00024023113655857742,
 0.00023390424030367285,
 0.00022310364875011146,
 0.0002203813783125952,
 0.00021395858493633568,
 0.00020214190590195358,
 0.000190391467185691]

In [15]:
model.zero_grad()

# Process each batch of data
for batch in batches:
    # Move the batch to the specified device
    inputs = batch
    # Pass the inputs through the model and calculate the loss
    outputs = model(inputs, labels=inputs)
    # Normalize the loss by the number of batches
    outputs.loss /= len(batches)
    # Backpropagate the loss to compute gradients
    outputs.loss.backward()
    break

In [20]:
for value in model.state_dict().values():
    print( value[0][:3].tolist() )
    break

[-0.11010301113128662, -0.03926672413945198, 0.03310750797390938]


In [21]:
import transformers
print( transformers.__version__ )
import torch
print( torch.__version__ )

4.37.2
2.2.0


In [22]:
import torch
import numpy as np
import random

# Set random seed for reproducibility
def set_seed(seed=42):
    random.seed(seed)  # Python random module
    np.random.seed(seed)  # Numpy module
    torch.manual_seed(seed)  # PyTorch to initialize the random number generator
    torch.cuda.manual_seed(seed)  # For CUDA to ensure reproducibility for GPU
    torch.cuda.manual_seed_all(seed)  # For all GPUs, if there are multiple GPUs

    # Ensuring that PyTorch uses deterministic algorithms
    # Note: This may impact performance, and not all operations are guaranteed to be deterministic
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

    # Setting the number of threads to avoid variability due to parallelism
    torch.set_num_threads(1)

# Call this function at the very beginning of your script with the desired seed
set_seed(42)

  torch.set_num_threads(1)


In [18]:
inputs

tensor([[10723, 23740, 34433,  5451,  7369,  2534,  7922,  1367,    25,   317]])

In [17]:
outputs.logits

tensor([[[ -32.0910,  -31.5283,  -34.7687,  ...,  -40.1630,  -39.5141,
           -32.1193],
         [ -99.0835,  -97.3635, -104.2114,  ..., -107.5933, -109.2295,
           -99.6051],
         [ -95.5896,  -93.9890, -103.6517,  ..., -111.4975, -108.8931,
           -95.2858],
         ...,
         [ -79.1882,  -78.7252,  -82.5218,  ...,  -92.7975,  -91.2122,
           -77.6879],
         [-106.1695, -105.8472, -106.3847,  ..., -115.0871, -116.5293,
          -101.9647],
         [ -76.4115,  -75.9006,  -77.8268,  ...,  -85.8697,  -82.3854,
           -77.0383]]], grad_fn=<UnsafeViewBackward0>)

In [11]:
from data import SubsetFalconLoader
batches = list(
    SubsetFalconLoader(
        tokenizer=tokenizer,
        batch_size=1, 
        sequence_length=10,
        rows=[1]
    )
)

In [12]:
batches

[tensor([[10723, 23740, 34433,  5451,  7369,  2534,  7922,  1367,    25,   317]]),
 tensor([[18208, 15717,  7467,   311,  1828,    68,  1157,  3232, 13860,  8336]]),
 tensor([[  615,  1651,   198, 14906,   416, 13860,  2504, 27996,   198, 27991]]),
 tensor([[ 2433,     0,  2435,   329,  1194, 10742,   286,   845,  7895,  3950]]),
 tensor([[ 2168,   351,  6041,   286, 30953,    13,  1639,  1276,  2342, 23446]]),
 tensor([[26831,  7022,  1622,  2534,  4471,  1367,  9975,   351,   257,  3670]]),
 tensor([[  286,   564,   250,  3666, 41991, 15717,   447,   251,   763,    89]]),
 tensor([[10403,   428,   481, 11240,  1243,   510,   319,   262, 20121, 14893]]),
 tensor([[ 4636, 14995,   952,    13,   887,   287,  1339,   345,   481,   262]]),
 tensor([[4471,  319,  534, 5581,  900,  379, 1363,  837,  655, 1061]]),
 tensor([[  832,   262,  2792,   994,   284,  2342, 23446, 26831,  7022,  1622]]),
 tensor([[ 2534,  4471,  1367,   257, 10715,  5301,  2691,   329,  1479,   379]]),
 tensor([[  59