In [1]:
import pandas as pd
import numpy as np


from core import constants
from core.utils import *

log = get_logger()

pd.set_option("display.float_format", "{:.2f}".format)

## RPGF 3 Data Check and Cleanup

In [2]:
df = pd.read_csv("data/dummy_data_rpgf3.csv")

In [3]:
display(df.sample(10))

Unnamed: 0,Address,Has voted,Has published,Published at,Created at,Updated at,Projects in ballot,Votes
993,KJwWhxp7MO,True,True,2023-11-30 06:21:37,2023-10-31 20:09:24,2023-11-17 01:08:08,11,"[{'amount': '473239', 'projectId': 'proj499'},..."
295,yjuyaRVDzT,True,True,2023-11-22 15:30:31,2023-10-30 06:06:23,2023-11-18 07:33:31,16,"[{'amount': '74832', 'projectId': 'proj55'}, {..."
392,bQxWg6soYO,True,True,2023-11-21 14:11:08,2023-09-22 06:10:44,2023-10-17 16:40:49,6,"[{'amount': '23536', 'projectId': 'proj173'}, ..."
414,WcXzYUoTvm,True,True,2023-11-28 18:14:38,2023-09-30 23:40:10,2023-10-23 22:30:46,14,"[{'amount': '40748', 'projectId': 'proj387'}, ..."
293,OXlglsOeC9,True,True,2023-11-29 12:57:24,2023-10-29 04:40:05,2023-11-29 04:05:18,4,"[{'amount': '395220', 'projectId': 'proj44'}, ..."
226,IJyQdYDNBu,False,False,,2023-09-30 03:07:49,2023-10-24 11:56:31,0,[]
592,NXZ24pSeUQ,True,True,2023-11-15 23:01:07,2023-10-06 22:51:07,2023-10-09 03:47:32,18,"[{'amount': '140093', 'projectId': 'proj261'},..."
557,oVpqhWOiTq,True,True,2023-11-28 01:16:23,2023-09-29 04:11:09,2023-10-14 20:17:05,12,"[{'amount': '987033', 'projectId': 'proj169'},..."
737,9LLA6XCSHH,False,False,,2023-11-26 08:53:11,2023-11-29 13:53:20,0,[]
756,1mz9eQ8BiT,True,True,2023-11-16 21:46:16,2023-10-12 19:53:26,2023-10-14 10:10:42,3,"[{'amount': '144650', 'projectId': 'proj52'}, ..."


In [4]:
log.info("Check - Num Ballots: " + str(df["Has published"].count()))
log.info("Check - Num Submissions (Published): " + str(df["Has published"].sum()))

# Check if voter_address is unique
if df["Address"].nunique() == df.shape[0]:
    log.info("Check - Address is unique.")
else:
    diff = df.shape[0] - df["Address"].nunique()
    log.info(f"Check - Address is not unique. There are {diff} duplicates.")

# Check if all voters have voted
if df[df["Has voted"] == False].shape[0] > 0:
    not_voted = df[df["Has voted"] == False].shape[0]
    total = df["Address"].nunique()
    log.info(f"Check - {not_voted} voters out of {total} have not voted.")
else:
    log.info("Check - All voters have voted.")

# Check if all voters have published
if df[df["Has published"] == False].shape[0] > 0:
    not_voted = df[df["Has published"] == False].shape[0]
    total = df["Address"].nunique()
    log.info(f"Check - {not_voted} voters out of {total} have not published.")
else:
    log.info("Check - All voters have published.")

2024-02-07 18:18:55 INFO | Check - Num Ballots: 1000
2024-02-07 18:18:55 INFO | Check - Num Submissions (Published): 514
2024-02-07 18:18:55 INFO | Check - Address is unique.
2024-02-07 18:18:55 INFO | Check - 243 voters out of 1000 have not voted.
2024-02-07 18:18:55 INFO | Check - 486 voters out of 1000 have not published.


In [5]:
# Apply the function and concatenate results
expanded_list = [
    expand_json(safe_json_loads(row), idx) for idx, row in df["Votes"].items()
]
expanded_df = pd.concat(expanded_list, ignore_index=True)

result_df = expanded_df.set_index("original_index").join(df.set_index(df.index))

In [6]:
testing_address = "zgdSu8Yr87"
print_df = result_df[result_df["Address"] == testing_address]
print("Num Projects Voted : " + str(print_df["projectId"].count()))
display(print_df.head(10))

Num Projects Voted : 18


Unnamed: 0,amount,projectId,Address,Has voted,Has published,Published at,Created at,Updated at,Projects in ballot,Votes
830,703749,proj117,zgdSu8Yr87,True,True,2023-11-02 18:18:29,2023-09-21 02:51:22,2023-09-23 16:56:39,18,"[{'amount': '703749', 'projectId': 'proj117'},..."
830,385034,proj51,zgdSu8Yr87,True,True,2023-11-02 18:18:29,2023-09-21 02:51:22,2023-09-23 16:56:39,18,"[{'amount': '703749', 'projectId': 'proj117'},..."
830,470511,proj230,zgdSu8Yr87,True,True,2023-11-02 18:18:29,2023-09-21 02:51:22,2023-09-23 16:56:39,18,"[{'amount': '703749', 'projectId': 'proj117'},..."
830,113619,proj204,zgdSu8Yr87,True,True,2023-11-02 18:18:29,2023-09-21 02:51:22,2023-09-23 16:56:39,18,"[{'amount': '703749', 'projectId': 'proj117'},..."
830,156327,proj141,zgdSu8Yr87,True,True,2023-11-02 18:18:29,2023-09-21 02:51:22,2023-09-23 16:56:39,18,"[{'amount': '703749', 'projectId': 'proj117'},..."
830,622540,proj390,zgdSu8Yr87,True,True,2023-11-02 18:18:29,2023-09-21 02:51:22,2023-09-23 16:56:39,18,"[{'amount': '703749', 'projectId': 'proj117'},..."
830,32962,proj482,zgdSu8Yr87,True,True,2023-11-02 18:18:29,2023-09-21 02:51:22,2023-09-23 16:56:39,18,"[{'amount': '703749', 'projectId': 'proj117'},..."
830,911617,proj46,zgdSu8Yr87,True,True,2023-11-02 18:18:29,2023-09-21 02:51:22,2023-09-23 16:56:39,18,"[{'amount': '703749', 'projectId': 'proj117'},..."
830,755757,proj250,zgdSu8Yr87,True,True,2023-11-02 18:18:29,2023-09-21 02:51:22,2023-09-23 16:56:39,18,"[{'amount': '703749', 'projectId': 'proj117'},..."
830,114436,proj82,zgdSu8Yr87,True,True,2023-11-02 18:18:29,2023-09-21 02:51:22,2023-09-23 16:56:39,18,"[{'amount': '703749', 'projectId': 'proj117'},..."


In [7]:
columns = [col for col in result_df.columns if col not in ["amount", "projectId"]]
columns += ["amount", "projectId"]  # Add the columns to the end of the list
result_df = result_df[columns]

# Update df columns names
result_df.columns = [
    "voter_address",
    "has_voted",
    "has_published",
    "published_at",
    "created_at",
    "updated_at",
    "projects_in_ballot",
    "votes",
    "amount",
    "project_id",
]

result_df.drop(columns="votes", inplace=True)

result_df["amount"] = pd.to_numeric(result_df["amount"])

In [8]:
# result_df.head()
result_df[result_df["voter_address"] == testing_address].head(70)

Unnamed: 0,voter_address,has_voted,has_published,published_at,created_at,updated_at,projects_in_ballot,amount,project_id
830,zgdSu8Yr87,True,True,2023-11-02 18:18:29,2023-09-21 02:51:22,2023-09-23 16:56:39,18,703749,proj117
830,zgdSu8Yr87,True,True,2023-11-02 18:18:29,2023-09-21 02:51:22,2023-09-23 16:56:39,18,385034,proj51
830,zgdSu8Yr87,True,True,2023-11-02 18:18:29,2023-09-21 02:51:22,2023-09-23 16:56:39,18,470511,proj230
830,zgdSu8Yr87,True,True,2023-11-02 18:18:29,2023-09-21 02:51:22,2023-09-23 16:56:39,18,113619,proj204
830,zgdSu8Yr87,True,True,2023-11-02 18:18:29,2023-09-21 02:51:22,2023-09-23 16:56:39,18,156327,proj141
830,zgdSu8Yr87,True,True,2023-11-02 18:18:29,2023-09-21 02:51:22,2023-09-23 16:56:39,18,622540,proj390
830,zgdSu8Yr87,True,True,2023-11-02 18:18:29,2023-09-21 02:51:22,2023-09-23 16:56:39,18,32962,proj482
830,zgdSu8Yr87,True,True,2023-11-02 18:18:29,2023-09-21 02:51:22,2023-09-23 16:56:39,18,911617,proj46
830,zgdSu8Yr87,True,True,2023-11-02 18:18:29,2023-09-21 02:51:22,2023-09-23 16:56:39,18,755757,proj250
830,zgdSu8Yr87,True,True,2023-11-02 18:18:29,2023-09-21 02:51:22,2023-09-23 16:56:39,18,114436,proj82


## Calculate Voting Results

In [9]:
allocator = ProjectAllocator(
    total_amount=constants.TOTAL_AMOUNT,
    min_amount=constants.MIN_AMOUNT,
    quorum=constants.QUORUM,
)

In [10]:
initial_allocation = allocator.calculate_initial_allocation(result_df)

In [11]:
display(initial_allocation.sample(1))

Unnamed: 0_level_0,votes_count,median_amount,is_eligible
project_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
proj97,10,598335.5,False


In [12]:
# Scaling the total to 30M OP by project and filter out those with < 1500 OP
allocation_iter = initial_allocation[initial_allocation["is_eligible"] == True].copy()
allocation_iter["scaled_amount"] = allocation_iter["median_amount"]
# display(allocation_iter)
# Set a maximum number of iterations to prevent infinite loop
max_iterations = 10
current_iteration = 0

while (
    allocation_iter["scaled_amount"].sum() != constants.TOTAL_AMOUNT
    and current_iteration <= max_iterations
):
    allocation_iter = allocator.scale_allocations_oneby(allocation_iter)
    current_iteration += 1

    log.info("Check - Current iteration: " + str(current_iteration))

2024-02-07 18:18:55 INFO | Check - Original Amount Eligible: 12322787.0
2024-02-07 18:18:55 INFO | Check - Scale Factor: 2.434514205268662
2024-02-07 18:18:55 INFO | Check - No projects below minimum OP
2024-02-07 18:18:55 INFO | Check - Current iteration: 1


In [13]:
# Check if the loop exited due to reaching max iterations
if (
    current_iteration == max_iterations
    and allocation_iter["scaled_amount"].sum() != constants.TOTAL_AMOUNT
):
    log.info("Maximum iterations reached without meeting the total amount condition.")
else:
    final_total = allocation_iter["scaled_amount"].sum()
    log.info(
        f"Condition met with {final_total} OP allocated through {current_iteration} iteration(s)."
    )

2024-02-07 18:18:55 INFO | Condition met with 30000000.0 OP allocated through 1 iteration(s).


In [14]:
# join the initial allocation with the final allocation, if scaled_amount is null then make it 0
final_allocation = initial_allocation.merge(
    allocation_iter["scaled_amount"],
    how="left",
    on="project_id",
).fillna({"scaled_amount": 0})

# check if the final allocation table still contains all projects.
if final_allocation.index.nunique() == result_df["project_id"].nunique():
    log.info("Check - Final allocation table has included all the projects.")
else:
    log.info(
        "Check - Final allocation table has missing projects. Printing out the missing projects below."
    )
    log.info(
        result_df[~result_df["project_id"].isin(final_allocation.index)]["project_id"]
    )

# check if the final allocation table still sums to the total amount.
if final_allocation["scaled_amount"].sum() == final_total:
    log.info(
        "Check - Final allocation table sums to the right amount of OP: "
        + str(final_total)
    )
else:
    log.info(
        "Check - Final allocation table does not sum to the total OP. Printing out the missing amount below."
    )
    log.info(str(final_total - final_allocation["scaled_amount"].sum()) + " OP")

2024-02-07 18:18:55 INFO | Check - Final allocation table has included all the projects.
2024-02-07 18:18:55 INFO | Check - Final allocation table sums to the right amount of OP: 30000000.0


In [15]:
# export csv
# allocation_iter.drop(columns="median_amount", inplace=True)
final_allocation.to_csv("data/rpgf3_allocation_final.csv")

log.info(f"Results saved in data/rpgf3_allocation_final.csv.")

2024-02-07 18:18:55 INFO | Results saved in data/rpgf3_allocation_final.csv.


In [16]:
final_allocation.head(10)

Unnamed: 0_level_0,votes_count,median_amount,is_eligible,scaled_amount
project_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
proj26,5,841223.0,False,0.0
proj323,11,838886.0,False,0.0
proj123,7,829909.0,False,0.0
proj170,10,817377.0,False,0.0
proj46,11,802627.0,False,0.0
proj269,10,793657.0,False,0.0
proj12,11,787796.0,False,0.0
proj196,7,782668.0,False,0.0
proj287,6,782357.0,False,0.0
proj130,5,778991.0,False,0.0


In [17]:
to_cut = (
    allocation_iter[allocation_iter["scaled_amount"] < 1500]
    .sort_values(by="scaled_amount")
    .head(1)
)

In [18]:
# check if to_cut is empty
to_cut.empty

True

### Calculate Voting Results using pytorch

In [26]:

result_tensor, num_projects = allocator.convert_df_to_tensor(result_df)
project_tensors = allocator.get_project_tensor(result_tensor, num_projects)


voter_address         object
has_voted               bool
has_published           bool
published_at          object
created_at            object
updated_at            object
projects_in_ballot     int64
amount                 int64
project_id            object
dtype: object
     voter_address  has_voted  has_published         published_at  \
689            491       True           True  2023-11-03 02:22:07   
897            240       True           True  2023-11-21 01:16:47   
790            260       True           True  2023-11-25 17:02:30   
643            156       True           True  2023-11-25 13:23:26   
160             74       True           True  2023-11-20 19:47:08   

              created_at           updated_at  projects_in_ballot  amount  \
689  2023-10-31 03:01:09  2023-11-02 13:18:21                   9   57138   
897  2023-11-17 09:56:08  2023-11-17 14:52:13                  15  199228   
790  2023-11-22 11:35:04  2023-11-24 11:25:56                   3  230502   
64

In [27]:
import os
# export to onnx
allocator.eval()
project_tensors = project_tensors[0:100]

# convert projects tensors to tuple
final_allocation_torch = allocator.forward(*project_tensors)
final_allocation.shape


input_names = ['input_' + str(i) for i in range(len(project_tensors))]
    # Export the model
torch.onnx.export(allocator,               # model being run
                      tuple(project_tensors),          # model input (or a tuple for multiple inputs)
                      "network.onnx",           # where to save the model (can be a file or file-like object)
                      export_params=False,       # store the trained parameter weights inside the model file
                      opset_version=17,         # the ONNX version to export the model to
                      do_constant_folding=False, # whether to execute constant folding for optimization
                      input_names = input_names,   # the model's input names
                      output_names = ['output'])

data_path = os.path.join("input.json")
data = dict(input_data = [tensor.detach().numpy().reshape([-1]).tolist() for tensor in project_tensors])
# Serialize data into file:
json.dump(data, open(data_path, 'w'))

2024-02-07 18:20:07 INFO | Check - Current iteration: 1


Check - Median Amounts: tensor([[408033.0000],
        [462152.0000],
        [398607.0000],
        [259604.0000],
        [379887.0000],
        [460381.0000],
        [526428.0000],
        [350640.0000],
        [538145.5000],
        [480345.0000],
        [311152.0000],
        [606407.0000],
        [539214.0000],
        [537648.0000],
        [323230.0000],
        [730417.0000],
        [544118.0000],
        [629848.0000],
        [578968.5000],
        [636894.0000],
        [631775.0000],
        [506211.0000],
        [397737.0000],
        [323280.0000],
        [787796.0000],
        [437577.0000],
        [563513.0000],
        [365750.0000],
        [829909.0000],
        [527195.0000],
        [346811.0000],
        [523752.0000],
        [484556.5000],
        [240050.0000],
        [417176.0000],
        [453285.5000],
        [778991.0000],
        [540730.0000],
        [180663.5000],
        [524460.0000],
        [320353.0000],
        [722584.5000],
        [4

  votes_count = torch.tensor([num_bids]).reshape(1, 1)
2024-02-07 18:20:07 INFO | Check - Current iteration: 1


Check - Median Amounts: tensor([[408033.0000],
        [462152.0000],
        [398607.0000],
        [259604.0000],
        [379887.0000],
        [460381.0000],
        [526428.0000],
        [350640.0000],
        [538145.5000],
        [480345.0000],
        [311152.0000],
        [606407.0000],
        [539214.0000],
        [537648.0000],
        [323230.0000],
        [730417.0000],
        [544118.0000],
        [629848.0000],
        [578968.5000],
        [636894.0000],
        [631775.0000],
        [506211.0000],
        [397737.0000],
        [323280.0000],
        [787796.0000],
        [437577.0000],
        [563513.0000],
        [365750.0000],
        [829909.0000],
        [527195.0000],
        [346811.0000],
        [523752.0000],
        [484556.5000],
        [240050.0000],
        [417176.0000],
        [453285.5000],
        [778991.0000],
        [540730.0000],
        [180663.5000],
        [524460.0000],
        [320353.0000],
        [722584.5000],
        [4

We now include sanity checks for the data. We will check the following:
- that the pandas and pytorch dataframes are the same
- that the median and scaled median allocations are the same
- that eligibility is the same

In [28]:

# a bunch of code to compare pytorch output and the pandas output as a sanity check
final_allocation_torch_np = final_allocation_torch.detach().numpy()
final_allocation_torch_df = pd.DataFrame(final_allocation_torch_np, columns=["votes_count", "median_amount", "is_eligible", "scaled_amount"])
# convert is eligible to boolean
final_allocation_torch_df["is_eligible"] = final_allocation_torch_df["is_eligible"].astype(bool)
# convert votes count to int
final_allocation_torch_df["votes_count"] = final_allocation_torch_df["votes_count"].astype(int)
# add index project_id
final_allocation_torch_df = final_allocation_torch_df.set_index(final_allocation.index.sort_values())

# sorted vy project id
final_allocation_torch_df = final_allocation_torch_df.sort_index()
# sort final allocation by project id
final_allocation_sorted = final_allocation.sort_index()

display(final_allocation_torch_df.head(10))
display(final_allocation_sorted.head(10))
# compare the two outputs
print("Are the two outputs equal?")
final_allocation_torch_df.compare(final_allocation_sorted)


ValueError: Length mismatch: Expected 100 rows, received array of length 500

TODO: investigate the small differences in the scaled median allocations. Seems like pandas adds some small epsilons during calculations that compound.

After which we can proceed to generate the settings file for `ezkl` and run calibrate settings to find the optimal settings for `ezkl`

In [None]:
import ezkl
import os

model_path = "network.onnx"
settings_path = "settings.json"
compiled_model_path = os.path.join('network.compiled')
pk_path = os.path.join('test.pk')
vk_path = os.path.join('test.vk')
settings_path = os.path.join('settings.json')

witness_path = os.path.join('witness.json')
data_path = os.path.join('input.json')

py_run_args = ezkl.PyRunArgs()
py_run_args.input_visibility = "private"
py_run_args.output_visibility = "public"
py_run_args.param_visibility = "fixed" # private by default
py_run_args.num_inner_cols = 2
# the inputs are integer values !
py_run_args.input_scale = 0
# the params should be very large
py_run_args.param_scale = 14
#  scale rebase multiplier is 10
py_run_args.scale_rebase_multiplier = 1

res = ezkl.gen_settings(model_path, settings_path, py_run_args=py_run_args)
assert res == True

In [None]:

ezkl.calibrate_settings(data_path, model_path, settings_path, "accuracy", lookup_safety_margin=2, scales=[9], div_rebasing=False, scale_rebase_multiplier=[10])




 <------------- Numerical Fidelity Report (input_scale: 0, param_scale: 9, scale_input_multiplier: 10) ------------->

+------------+--------------+-----------+-----------+----------------+------------------+---------------+---------------+--------------------+--------------------+------------------------+
| mean_error | median_error | max_error | min_error | mean_abs_error | median_abs_error | max_abs_error | min_abs_error | mean_squared_error | mean_percent_error | mean_abs_percent_error |
+------------+--------------+-----------+-----------+----------------+------------------+---------------+---------------+--------------------+--------------------+------------------------+
| 15000.01   | 0            | 1801616   | 0         | 15000.01       | 0                | 1801616       | 0             | 19577428000        | NaN                | NaN                    |
+------------+--------------+-----------+-----------+----------------+------------------+---------------+---------------+--

True

Next, we will compile the model. The compilation step allow us to generate proofs faster.

In [None]:
res = ezkl.compile_circuit(model_path, compiled_model_path, settings_path)
assert res == True

Mock prove as a sanity check to ensure that the model is working as expected.

Finally, we will generate the proofs and submit the results.

In [None]:
# now generate the witness file 

res = ezkl.gen_witness(data_path, compiled_model_path, witness_path)
assert os.path.isfile(witness_path)

In [None]:
res = ezkl.mock(witness_path, compiled_model_path)


Before we can setup the circuit params, we need a SRS (Structured Reference String). The SRS is used to generate the proofs.

In [None]:
# srs path
res = ezkl.get_srs(settings_path)

Now run setup, this will generate a proving key (pk) and verification key (vk). The proving key is used for proving while the verification key is used for verificaton.

In [None]:

# setup
res = ezkl.setup(
        compiled_model_path,
        vk_path,
        pk_path,
    )

assert res == True
assert os.path.isfile(vk_path)
assert os.path.isfile(pk_path)
assert os.path.isfile(settings_path)

In [None]:
# GENERATE A PROOF
proof_path = os.path.join('test.pf')

res = ezkl.prove(
        witness_path,
        compiled_model_path,
        pk_path,
        proof_path,
        "single",
    )

print(res)
assert os.path.isfile(proof_path)

We can now verify the proof. 

In [None]:
# VERIFY IT
res = ezkl.verify(
        proof_path,
        settings_path,
        vk_path,
    )

assert res == True
print("verified")

We can now create an EVM / `.sol` verifier that can be deployed on chain to verify submitted proofs using a view function.

In [None]:

abi_path = 'test.abi'
sol_code_path = 'test_1.sol'

res = ezkl.create_evm_verifier(
        vk_path,
        settings_path,
        sol_code_path,
        abi_path,
    )
assert res == True

# Sanity checks on circuit outputs

In [None]:

import onnx
import onnxruntime
import os
import json
import ezkl
import numpy as np

witness_path = os.path.join('witness.json')
settings_path = os.path.join('settings.json')
model_path = os.path.join('network.onnx')
data_path = os.path.join('input.json')

def get_ezkl_output(witness_file, settings_file):
    # convert the quantized ezkl output to float value
    witness_output = json.load(open(witness_file))
    outputs = witness_output['outputs']
    with open(settings_file) as f:
        settings = json.load(f)
    ezkl_outputs = [[ezkl.string_to_float(
        outputs[i][j], settings['model_output_scales'][i]) for j in range(len(outputs[i]))] for i in range(len(outputs))]
    return ezkl_outputs


def get_onnx_output(model_file, input_file):
    # generate the ML model output from the ONNX file
    onnx_model = onnx.load(model_file)
    onnx.checker.check_model(onnx_model)

    with open(input_file) as f:
        inputs = json.load(f)
    # reshape the input to the model
    num_inputs = len(inputs['input_data'])

    onnx_input = dict()
    for i in range(num_inputs):
        input_node = onnx_model.graph.input[i]
        dims = []
        elem_type = input_node.type.tensor_type.elem_type
        for dim in input_node.type.tensor_type.shape.dim:
            if dim.dim_value == 0:
                dims.append(1)
            else:
                dims.append(dim.dim_value)
        if elem_type == 7:
            inputs_onnx = np.array(inputs['input_data'][i]).astype(
                np.int64).reshape(dims)
        elif elem_type == 9:
            inputs_onnx = np.array(inputs['input_data'][i]).astype(
                bool).reshape(dims)
        else:
            inputs_onnx = np.array(inputs['input_data'][i]).astype(
                np.float32).reshape(dims)
        onnx_input[input_node.name] = inputs_onnx
    try:
        onnx_session = onnxruntime.InferenceSession(model_file)
        onnx_output = onnx_session.run(None, onnx_input)
    except Exception as e:
        print("Error in ONNX runtime: ", e)
        print("using inputs[output_data]")
        onnx_output = inputs['output_data']
    return onnx_output[0]


def compare_outputs(zk_output, onnx_output):
    # calculate hamming difference between the 2 outputs (which are lists)

    res = []


    contains_sublist = any(isinstance(sub, list) for sub in zk_output)
    if contains_sublist:
        try:
            if len(onnx_output) == 1:
                zk_output = zk_output[0]
        except Exception as e:
            zk_output = zk_output[0]

    flat_zk_output = np.array(zk_output).flatten()
    flat_onnx_output = np.array(onnx_output).flatten()

    # select every 4th element
    scaled_amount_zk = flat_zk_output[3::4]
    scaled_amount_onnx = flat_onnx_output[3::4]

    # print the diff where they are not both 0
    print("scaled amount diff")
    for i in range(len(scaled_amount_zk)):
        if scaled_amount_zk[i] != 0.0 and scaled_amount_onnx[i] != 0.0:
            print("scaled amount zk: ", scaled_amount_zk[i])
            print("scaled amount onnx: ", scaled_amount_onnx[i])
            print("diff: ", scaled_amount_zk[i] - scaled_amount_onnx[i])
            print("percent diff: ", 100*(scaled_amount_zk[i] - scaled_amount_onnx[i]) / scaled_amount_onnx[i])

    zip_object = zip(flat_zk_output[3::4], flat_onnx_output[3::4])

    for list1_i, list2_i in zip_object:
        if list1_i == 0.0 and list2_i == 0.0:
            res.append(0)
        else:
            percent_diff = (list1_i - list2_i) / list2_i
            res.append(percent_diff)


    return np.abs(res)

In [None]:

import plotly.express as px
import pandas as pd

import matplotlib.pyplot as plt
    # get the ezkl output
ezkl_output = get_ezkl_output(witness_path, settings_path)
    # get the onnx output
onnx_output = get_onnx_output(model_path, data_path)
    # compare the outputs
l1_difference = compare_outputs(ezkl_output, onnx_output)

df = pd.DataFrame(l1_difference, columns=["percent error"])


# Create a histogram
fig = px.histogram(df, x="percent error",
                  title="Distribution of percent error")
fig.show()