In [1]:
try:
    # install ezkl
    import google.colab
    import subprocess
    import sys
    subprocess.check_call([sys.executable, "-m", "pip", "install", "ezkl"])
    subprocess.check_call([sys.executable, "-m", "pip", "install", "onnx"])

    from google.colab import drive
    drive.mount('/content/gdrive')

# rely on local installation of ezkl if the notebook is not in colab
except:
    pass

Mounted at /content/gdrive


In [2]:
import os
import pandas as pd

In [3]:
!ls

gdrive	sample_data


In [4]:
imageDir = "/content/gdrive/MyDrive/ZKML Regens/ml_ready_plants"
train_annotations_file = "/content/gdrive/MyDrive/ZKML Regens/ml_ready_plants_metadata/train_imagesOrders.tsv"
test_annotations_file = "/content/gdrive/MyDrive/ZKML Regens/ml_ready_plants_metadata/test_imagesOrders.tsv"

In [11]:
import os
import pandas as pd
from torchvision.io import read_image
from torch.utils.data import Dataset
import torchvision.transforms as transforms
import unicodedata

class CustomImageDataset(Dataset):
    def __init__(self, annotations_file, img_dir, transform=None, target_transform=None):
        self.img_labels = pd.read_csv(annotations_file, encoding='utf8', sep = "\t")
        self.img_dir = img_dir
        self.transform = transform
        self.target_transform = target_transform

    def __len__(self):
        return len(self.img_labels)

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.img_labels.iloc[idx, 0])
        img_path = unicodedata.normalize('NFC', img_path)
        image = read_image(img_path)
        label = self.img_labels.iloc[idx, 1]
        if self.transform:
            image = self.transform(image)
        if self.target_transform:
            label = self.target_transform(label)
        return image, label

In [12]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 5 * 5, 32)
        self.fc2 = nn.Linear(32, 7)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 16 * 5 * 5)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [13]:
# Running out of RAM. Change to 1 channel.
transform = transforms.Compose(
        [
            transforms.Resize((33, 33)),
            transforms.RandomCrop((32, 32))
        ]
    )

In [14]:
train_dataset = CustomImageDataset(train_annotations_file, imageDir, transform=transform)

# ???
test_dataset = CustomImageDataset(test_annotations_file, imageDir, transform=transform)

In [15]:
from torch.utils.data import DataLoader
from torch.nn import CrossEntropyLoss
from torch.optim import Adam  # Import Adam
from torch.utils.data import DataLoader
from torchvision.transforms import ToTensor
from torchsummary import summary
import numpy as np

device = 'cuda' if torch.cuda.is_available() else 'cpu'
batch_size = 32
train_loader = DataLoader(train_dataset, shuffle=True, batch_size=batch_size)
test_loader = DataLoader(test_dataset, shuffle=True, batch_size=batch_size)

model = CNN().to(device)
summary(model, (3, 32, 32))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 6, 28, 28]             456
         MaxPool2d-2            [-1, 6, 14, 14]               0
            Conv2d-3           [-1, 16, 10, 10]           2,416
         MaxPool2d-4             [-1, 16, 5, 5]               0
            Linear-5                   [-1, 32]          12,832
            Linear-6                    [-1, 7]             231
Total params: 15,935
Trainable params: 15,935
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.01
Forward/backward pass size (MB): 0.06
Params size (MB): 0.06
Estimated Total Size (MB): 0.13
----------------------------------------------------------------


In [16]:
adam = Adam(model.parameters())  # Using Adam with a learning rate of 1e-3
loss_fn = CrossEntropyLoss()
all_epoch = 25
prev_acc = 0
for current_epoch in range(all_epoch):
    model.train()
    for idx, (train_x, train_label) in enumerate(train_loader):
        train_x = train_x.to(device)
        # normalize the image to 0 or 1 to reflect the inputs from the drawing board
        train_x = train_x.round()
        train_label = train_label.to(device)
        adam.zero_grad()  # Use adam optimizer
        predict_y = model(train_x.float())
        loss = loss_fn(predict_y, train_label.long())
        loss.backward()
        adam.step()  # Use adam optimizer
    all_correct_num = 0
    all_sample_num = 0
    model.eval()

    for idx, (test_x, test_label) in enumerate(test_loader):
        test_x = test_x.to(device)
        # normalize the image to 0 or 1 to reflect the inputs from the drawing board
        test_x = test_x.round()
        test_label = test_label.to(device)
        predict_y = model(test_x.float()).detach()
        predict_y = torch.argmax(predict_y, dim=-1)
        current_correct_num = predict_y == test_label
        all_correct_num += np.sum(current_correct_num.to('cpu').numpy(), axis=-1)
        all_sample_num += current_correct_num.shape[0]
    acc = all_correct_num / all_sample_num
    print('test accuracy: {:.3f}'.format(acc), flush=True)
    if not os.path.isdir("models_zkml"):
        os.mkdir("models_zkml")
    torch.save(model, 'models_zkml/plants{:.3f}.pkl'.format(acc))
    prev_acc = acc

test accuracy: 0.250
test accuracy: 0.258
test accuracy: 0.273
test accuracy: 0.336
test accuracy: 0.344
test accuracy: 0.328
test accuracy: 0.367
test accuracy: 0.352
test accuracy: 0.391
test accuracy: 0.328
test accuracy: 0.391
test accuracy: 0.398
test accuracy: 0.398
test accuracy: 0.414
test accuracy: 0.414
test accuracy: 0.398
test accuracy: 0.391
test accuracy: 0.398
test accuracy: 0.430
test accuracy: 0.398
test accuracy: 0.445
test accuracy: 0.359
test accuracy: 0.430
test accuracy: 0.391
test accuracy: 0.398


In [21]:
import os

model_path = os.path.join('network_cnn.onnx')
compiled_model_path = os.path.join('network.compiled')
pk_path = os.path.join('key.pk')
vk_path = os.path.join('key.vk')
settings_path = os.path.join('settings.json')
witness_path = os.path.join('witness.json')
data_path = os.path.join('input.json')

In [22]:
import torch
import json

model.eval()  # Set the model to evaluation mode

# Fetch a single data point from the train_dataset
# Ensure train_dataset is already loaded and accessible
train_data_point, _ = next(iter(train_dataset))
train_data_point = train_data_point.unsqueeze(0)  # Add a batch dimension

# Verify the device (CPU or CUDA) and transfer the data point to the same device as the model
device = 'cuda' if torch.cuda.is_available() else 'cpu'
train_data_point = train_data_point.to(device)

# Convert the data point to float32 before exporting
train_data_point = train_data_point.type(torch.float32) # This line is added

# Export the model to ONNX format
torch.onnx.export(model, train_data_point, model_path, export_params=True, do_constant_folding=True, input_names=['input_0'], output_names=['output'])

# Convert the tensor to numpy array and reshape it for JSON serialization
x = train_data_point.cpu().detach().numpy().reshape([-1]).tolist()
data = {'input_data': [x]}
with open('input.json', 'w') as f:
    json.dump(data, f)

print(f"Model exported to {model_path} and input data saved to input.json")

Model exported to network_cnn.onnx and input data saved to input.json


In [23]:
import ezkl

run_args = ezkl.PyRunArgs()
run_args.input_visibility = "private"
run_args.param_visibility = "fixed"
run_args.output_visibility = "public"
run_args.num_inner_cols = 2
run_args.variables = [("batch_size", 1)]

# Capture set of data points
num_data_points = 10

# Fetch 30 data points from the train_dataset
data_points = []
for i, (data_point, _) in enumerate(train_dataset):
    if i >= num_data_points:
        break
    data_points.append(data_point)

# Stack the data points to create a batch
train_data_batch = torch.stack(data_points)

# Add a batch dimension if not already present
if train_data_batch.dim() == 3:
    train_data_batch = train_data_batch.unsqueeze(0)

x = train_data_batch.cpu().detach().numpy().reshape([-1]).tolist()

data = dict(input_data = [x])

cal_path = os.path.join('calibration.json')

# Serialize data into file:
json.dump( data, open(cal_path, 'w' ))

!RUST_LOG=trace
# TODO: Dictionary outputs
res = ezkl.gen_settings(model_path, settings_path, py_run_args=run_args)
assert res == True

res = await ezkl.calibrate_settings(cal_path, model_path, settings_path, "resources")
assert res == True

ERROR:ezkl.graph.model:[tensor] decomposition error: integer -296890674 is too large to be represented by base 16384 and n 2
ERROR:ezkl.execute:forward pass failed: "failed to forward: [halo2] General synthesis error"
ERROR:ezkl.graph.model:[tensor] decomposition error: integer 355605816 is too large to be represented by base 16384 and n 2
ERROR:ezkl.execute:forward pass failed: "failed to forward: [halo2] General synthesis error"
ERROR:ezkl.graph.model:[tensor] decomposition error: integer -645375175 is too large to be represented by base 16384 and n 2
ERROR:ezkl.execute:forward pass failed: "failed to forward: [halo2] General synthesis error"
ERROR:ezkl.graph.model:[tensor] decomposition error: integer -644211911 is too large to be represented by base 16384 and n 2
ERROR:ezkl.execute:forward pass failed: "failed to forward: [halo2] General synthesis error"
ERROR:ezkl.graph.model:[tensor] decomposition error: integer -1290750351 is too large to be represented by base 16384 and n 2
ERR

In [24]:
res = ezkl.compile_circuit(model_path, compiled_model_path, settings_path)
assert res == True

In [25]:
# srs path
res = await ezkl.get_srs(settings_path)

In [26]:
# now generate the witness file
witness_path = "witness.json"

res = await ezkl.gen_witness(data_path, compiled_model_path, witness_path)
assert os.path.isfile(witness_path)

In [None]:
#res = ezkl.mock(witness_path, compiled_model_path)
#assert res == True

In [27]:
# HERE WE SETUP THE CIRCUIT PARAMS
# WE GOT KEYS
# WE GOT CIRCUIT PARAMETERS
# EVERYTHING ANYONE HAS EVER NEEDED FOR ZK

res = ezkl.setup(
        compiled_model_path,
        vk_path,
        pk_path,
    )

assert res == True
assert os.path.isfile(vk_path)
assert os.path.isfile(pk_path)
assert os.path.isfile(settings_path)

In [28]:
# GENERATE A PROOF


proof_path = os.path.join('test.pf')

res = ezkl.prove(
        witness_path,
        compiled_model_path,
        pk_path,
        proof_path,
        "single",
    )

print(res)
assert os.path.isfile(proof_path)

{'instances': [['7c02000000000000000000000000000000000000000000000000000000000000', '9be6ffef93f5e1439170b97948e833285d588181b64550b829a031e1724e6430', '4fe4ffef93f5e1439170b97948e833285d588181b64550b829a031e1724e6430', '30baffef93f5e1439170b97948e833285d588181b64550b829a031e1724e6430', 'fadaffef93f5e1439170b97948e833285d588181b64550b829a031e1724e6430', 'c049000000000000000000000000000000000000000000000000000000000000', 'c823000000000000000000000000000000000000000000000000000000000000']], 'proof': '0x2c85cf5947af1dd6ef005de8096fbe5ac72b557d6a697e57a30b57ac6cb4d7440499ccca0097215371aed12169c33ef2fe55664a1cae7861139e5a789b648a29235c983608527470c6eace34b2c87450425d1685b430a1f31b8dfe2bc6500a8c0338fa6bcb376756d4992d97d753d1e8cf473635c0cabb94bbb22698731d88d628b4ddf3e5c19b2d452125e2e503e10494c7c1a8f504632426dcdc49a265449002c1bf816b47538d966b41324ae9c2e05b1424c0fe2622ae27b52e19b2815355281387a84336d22d6ef019f85fa6eab6862ad3ae57fb12a646a575dfc26a2174178bd18ff67a4c6bd00a64c1d6a303fd460d68e6c05993

In [29]:
# VERIFY IT
res = ezkl.verify(
        proof_path,
        settings_path,
        vk_path,
    )

assert res == True
print("verified")

verified
