In [1]:
import pandas as pd
import numpy as np


from core import constants
from core.utils import *

log = get_logger()

pd.set_option("display.float_format", "{:.2f}".format)

In [2]:
df = pd.read_csv("data/anonymized_project_votes.csv")
df

Unnamed: 0,project_name,amount,number_of_votes
0,0xDeFiTH,"[100000.0, 1700.0, 0.0, 5000.0, 0.0, 2000.0, 4...",23
1,0xKhmer,"[1500.0, 1700.0, 50000.0, 10000.0, 5000.0, 240...",22
2,0xSalman,"[25000.0, 50000.0, 25000.0, 57000.0, 10000.0, ...",19
3,0xs34n,"[25000.0, 67000.0, 150000.0, 15000.0, 25000.0,...",31
4,1W3,"[150000.0, 75000.0, 2000.0, 5000.0, 150000.0, ...",27
...,...,...,...
638,zenbit.eth,"[23000.0, 10000.0, 20000.0, 10000.0, 20000.0, ...",11
639,zerokn0wledge,"[25000.0, 45000.0, 2000.0, 5000.0, 1800.0, 150...",24
640,‚ö°Ô∏è TurboETH,"[50000.0, 50000.0, 80000.0, 25000.0, 110000.0,...",47
641,üè∞ BuidlGuidl,"[500000.0, 50000.0, 50000.0, 790000.0, 100000....",88


### Calculate Voting Results using pytorch

In [3]:
allocator = ProjectAllocator(
    total_amount=constants.TOTAL_AMOUNT,
    min_amount=constants.MIN_AMOUNT,
    quorum=constants.QUORUM,
)


project_tensors = allocator.convert_anonymized_df_to_tensors(df)


  project_name                                             amount  \
0     0xDeFiTH  [100000.0, 1700.0, 0.0, 5000.0, 0.0, 2000.0, 4...   
1      0xKhmer  [1500.0, 1700.0, 50000.0, 10000.0, 5000.0, 240...   
2     0xSalman  [25000.0, 50000.0, 25000.0, 57000.0, 10000.0, ...   
3       0xs34n  [25000.0, 67000.0, 150000.0, 15000.0, 25000.0,...   
4          1W3  [150000.0, 75000.0, 2000.0, 5000.0, 150000.0, ...   

   number_of_votes  
0               23  
1               22  
2               19  
3               31  
4               27  
      project_name                                             amount  \
638     zenbit.eth  [23000.0, 10000.0, 20000.0, 10000.0, 20000.0, ...   
639  zerokn0wledge  [25000.0, 45000.0, 2000.0, 5000.0, 1800.0, 150...   
640    ‚ö°Ô∏è TurboETH  [50000.0, 50000.0, 80000.0, 25000.0, 110000.0,...   
641   üè∞ BuidlGuidl  [500000.0, 50000.0, 50000.0, 790000.0, 100000....   
642     üêç snekmate  [25000.0, 7000.0, 68000.0, 10000.0, 25000.0, 1...   

     numbe

In [4]:
import os
# export to onnx
allocator.eval()
project_tensors_trunc = project_tensors[0:32]

# convert projects tensors to tuple
final_allocation_torch = allocator.forward(*project_tensors_trunc)


input_names = ['input_' + str(i) for i in range(len(project_tensors_trunc))]
    # Export the model
torch.onnx.export(allocator,               # model being run
                      tuple(project_tensors_trunc),          # model input (or a tuple for multiple inputs)
                      "network.onnx",           # where to save the model (can be a file or file-like object)
                      export_params=False,       # store the trained parameter weights inside the model file
                      opset_version=17,         # the ONNX version to export the model to
                      do_constant_folding=False, # whether to execute constant folding for optimization
                      input_names = input_names,   # the model's input names
                      output_names = ['output'])

data_path = os.path.join("input.json")
data = dict(input_data = [tensor.detach().numpy().reshape([-1]).tolist() for tensor in project_tensors_trunc])
# Serialize data into file:
json.dump(data, open(data_path, 'w'))

2024-02-07 18:22:10 INFO | Check - Current iteration: 1


Check - Median Amounts: tensor([[  8900.],
        [ 24000.],
        [ 25000.],
        [ 25000.],
        [ 20000.],
        [ 29500.],
        [ 33500.],
        [ 26000.],
        [ 10000.],
        [ 21000.],
        [  6750.],
        [ 77000.],
        [ 22500.],
        [250000.],
        [ 39000.],
        [  5750.],
        [110000.],
        [ 98000.],
        [215000.],
        [ 48500.],
        [ 75000.],
        [ 50000.],
        [ 78000.],
        [  5000.],
        [ 10000.],
        [ 28000.],
        [ 50000.],
        [ 35000.],
        [ 50000.],
        [100000.],
        [  5000.],
        [ 10000.]])
Check - Eligible Median: tensor([[  8900.],
        [ 24000.],
        [ 25000.],
        [ 25000.],
        [ 20000.],
        [     0.],
        [     0.],
        [     0.],
        [     0.],
        [     0.],
        [  6750.],
        [ 77000.],
        [ 22500.],
        [250000.],
        [ 39000.],
        [     0.],
        [110000.],
        [ 98000.],


  votes_count = torch.tensor([num_bids]).reshape(1, 1)
2024-02-07 18:22:11 INFO | Check - Current iteration: 1


Check - Median Amounts: tensor([[  8900.],
        [ 24000.],
        [ 25000.],
        [ 25000.],
        [ 20000.],
        [ 29500.],
        [ 33500.],
        [ 26000.],
        [ 10000.],
        [ 21000.],
        [  6750.],
        [ 77000.],
        [ 22500.],
        [250000.],
        [ 39000.],
        [  5750.],
        [110000.],
        [ 98000.],
        [215000.],
        [ 48500.],
        [ 75000.],
        [ 50000.],
        [ 78000.],
        [  5000.],
        [ 10000.],
        [ 28000.],
        [ 50000.],
        [ 35000.],
        [ 50000.],
        [100000.],
        [  5000.],
        [ 10000.]])
Check - Eligible Median: tensor([[  8900.],
        [ 24000.],
        [ 25000.],
        [ 25000.],
        [ 20000.],
        [     0.],
        [     0.],
        [     0.],
        [     0.],
        [     0.],
        [  6750.],
        [ 77000.],
        [ 22500.],
        [250000.],
        [ 39000.],
        [     0.],
        [110000.],
        [ 98000.],


We now include sanity checks for the data. We will check the following:
- that the pandas and pytorch dataframes are the same
- that the median and scaled median allocations are the same
- that eligibility is the same

TODO: investigate the small differences in the scaled median allocations. Seems like pandas adds some small epsilons during calculations that compound.

After which we can proceed to generate the settings file for `ezkl` and run calibrate settings to find the optimal settings for `ezkl`

In [5]:
import ezkl
import os

model_path = "network.onnx"
settings_path = "settings.json"
compiled_model_path = os.path.join('network.compiled')
pk_path = os.path.join('test.pk')
vk_path = os.path.join('test.vk')
settings_path = os.path.join('settings.json')

witness_path = os.path.join('witness.json')
data_path = os.path.join('input.json')

py_run_args = ezkl.PyRunArgs()
py_run_args.input_visibility = "private"
py_run_args.output_visibility = "public"
py_run_args.param_visibility = "fixed" # private by default
py_run_args.num_inner_cols = 2
# the inputs are integer values !
py_run_args.input_scale = 0
# the params should be very large
py_run_args.param_scale = 14
#  scale rebase multiplier is 10
py_run_args.scale_rebase_multiplier = 1

res = ezkl.gen_settings(model_path, settings_path, py_run_args=py_run_args)
assert res == True

In [None]:

ezkl.calibrate_settings(data_path, model_path, settings_path, "accuracy", lookup_safety_margin=2, scales=[13, 14, 15, 16], div_rebasing=False)


RuntimeError: Failed to calibrate settings: calibration failed, could not find any suitable parameters given the calibration dataset

Next, we will compile the model. The compilation step allow us to generate proofs faster.

In [None]:
res = ezkl.compile_circuit(model_path, compiled_model_path, settings_path)
assert res == True

Mock prove as a sanity check to ensure that the model is working as expected.

Finally, we will generate the proofs and submit the results.

In [None]:
# now generate the witness file 

res = ezkl.gen_witness(data_path, compiled_model_path, witness_path)
assert os.path.isfile(witness_path)

In [None]:
res = ezkl.mock(witness_path, compiled_model_path)


spawning module 2


Before we can setup the circuit params, we need a SRS (Structured Reference String). The SRS is used to generate the proofs.

In [None]:
# srs path
res = ezkl.get_srs(settings_path)

Now run setup, this will generate a proving key (pk) and verification key (vk). The proving key is used for proving while the verification key is used for verificaton.

In [None]:

# setup
res = ezkl.setup(
        compiled_model_path,
        vk_path,
        pk_path,
    )

assert res == True
assert os.path.isfile(vk_path)
assert os.path.isfile(pk_path)
assert os.path.isfile(settings_path)

spawning module 2
spawning module 2


: 

In [None]:
# GENERATE A PROOF
proof_path = os.path.join('test.pf')

res = ezkl.prove(
        witness_path,
        compiled_model_path,
        pk_path,
        proof_path,
        "single",
    )

print(res)
assert os.path.isfile(proof_path)

spawning module 2


{'instances': [['00c0050000000000000000000000000000000000000000000000000000000000', '0000b10800000000000000000000000000000000000000000000000000000000', '0040000000000000000000000000000000000000000000000000000000000000', '000047bb0c000000000000000000000000000000000000000000000000000000', '0080050000000000000000000000000000000000000000000000000000000000', '0000701700000000000000000000000000000000000000000000000000000000', '0040000000000000000000000000000000000000000000000000000000000000', '0000105522000000000000000000000000000000000000000000000000000000', '00c0040000000000000000000000000000000000000000000000000000000000', '00006a1800000000000000000000000000000000000000000000000000000000', '0040000000000000000000000000000000000000000000000000000000000000', '000046c323000000000000000000000000000000000000000000000000000000', '00c0070000000000000000000000000000000000000000000000000000000000', '00006a1800000000000000000000000000000000000000000000000000000000', '0040000000000000000000000000000

We can now verify the proof. 

In [None]:
# VERIFY IT
res = ezkl.verify(
        proof_path,
        settings_path,
        vk_path,
    )

assert res == True
print("verified")

RuntimeError: Failed to run verify: The constraint system is not satisfied

We can now create an EVM / `.sol` verifier that can be deployed on chain to verify submitted proofs using a view function.

In [None]:

abi_path = 'test.abi'
sol_code_path = 'test_1.sol'

res = ezkl.create_evm_verifier(
        vk_path,
        settings_path,
        sol_code_path,
        abi_path,
    )
assert res == True

# Sanity checks on circuit outputs

In [None]:

import onnx
import onnxruntime
import os
import json
import ezkl
import numpy as np

witness_path = os.path.join('witness.json')
settings_path = os.path.join('settings.json')
model_path = os.path.join('network.onnx')
data_path = os.path.join('input.json')

def get_ezkl_output(witness_file, settings_file):
    # convert the quantized ezkl output to float value
    witness_output = json.load(open(witness_file))
    outputs = witness_output['outputs']
    with open(settings_file) as f:
        settings = json.load(f)
    ezkl_outputs = [[ezkl.string_to_float(
        outputs[i][j], settings['model_output_scales'][i]) for j in range(len(outputs[i]))] for i in range(len(outputs))]
    return ezkl_outputs


def get_onnx_output(model_file, input_file):
    # generate the ML model output from the ONNX file
    onnx_model = onnx.load(model_file)
    onnx.checker.check_model(onnx_model)

    with open(input_file) as f:
        inputs = json.load(f)
    # reshape the input to the model
    num_inputs = len(inputs['input_data'])

    onnx_input = dict()
    for i in range(num_inputs):
        input_node = onnx_model.graph.input[i]
        dims = []
        elem_type = input_node.type.tensor_type.elem_type
        for dim in input_node.type.tensor_type.shape.dim:
            if dim.dim_value == 0:
                dims.append(1)
            else:
                dims.append(dim.dim_value)
        if elem_type == 7:
            inputs_onnx = np.array(inputs['input_data'][i]).astype(
                np.int64).reshape(dims)
        elif elem_type == 9:
            inputs_onnx = np.array(inputs['input_data'][i]).astype(
                bool).reshape(dims)
        else:
            inputs_onnx = np.array(inputs['input_data'][i]).astype(
                np.float32).reshape(dims)
        onnx_input[input_node.name] = inputs_onnx
    try:
        onnx_session = onnxruntime.InferenceSession(model_file)
        onnx_output = onnx_session.run(None, onnx_input)
    except Exception as e:
        print("Error in ONNX runtime: ", e)
        print("using inputs[output_data]")
        onnx_output = inputs['output_data']
    return onnx_output[0]


def compare_outputs(zk_output, onnx_output):
    # calculate hamming difference between the 2 outputs (which are lists)

    res = []


    contains_sublist = any(isinstance(sub, list) for sub in zk_output)
    if contains_sublist:
        try:
            if len(onnx_output) == 1:
                zk_output = zk_output[0]
        except Exception as e:
            zk_output = zk_output[0]

    flat_zk_output = np.array(zk_output).flatten()
    flat_onnx_output = np.array(onnx_output).flatten()

    # select every 4th element
    scaled_amount_zk = flat_zk_output[3::4]
    scaled_amount_onnx = flat_onnx_output[3::4]

    # print the diff where they are not both 0
    print("scaled amount diff")
    for i in range(len(scaled_amount_zk)):
        if scaled_amount_zk[i] != 0.0 and scaled_amount_onnx[i] != 0.0:
            print("scaled amount zk: ", scaled_amount_zk[i])
            print("scaled amount onnx: ", scaled_amount_onnx[i])
            print("diff: ", scaled_amount_zk[i] - scaled_amount_onnx[i])
            print("percent diff: ", 100*(scaled_amount_zk[i] - scaled_amount_onnx[i]) / scaled_amount_onnx[i])

    zip_object = zip(flat_zk_output[3::4], flat_onnx_output[3::4])

    for list1_i, list2_i in zip_object:
        if list1_i == 0.0 and list2_i == 0.0:
            res.append(0)
        else:
            percent_diff = (list1_i - list2_i) / list2_i
            res.append(percent_diff)


    return np.abs(res)

In [None]:

import plotly.express as px
import pandas as pd

import matplotlib.pyplot as plt
    # get the ezkl output
ezkl_output = get_ezkl_output(witness_path, settings_path)
    # get the onnx output
onnx_output = get_onnx_output(model_path, data_path)
    # compare the outputs
l1_difference = compare_outputs(ezkl_output, onnx_output)

df = pd.DataFrame(l1_difference, columns=["percent error"])


# Create a histogram
fig = px.histogram(df, x="percent error",
                  title="Distribution of percent error")
fig.show()