In [1]:
import pandas as pd
import numpy as np


from core import constants
from core.utils import *

log = get_logger()

pd.set_option("display.float_format", "{:.2f}".format)

## RPGF 3 Data Check and Cleanup

In [2]:
df = pd.read_csv("data/dummy_data_rpgf3.csv")

In [3]:
display(df.sample(10))

Unnamed: 0,Address,Has voted,Has published,Published at,Created at,Updated at,Projects in ballot,Votes
1,r1VjArnVgx,True,False,,2023-10-14 14:43:58,2023-11-12 05:16:37,0,[]
0,TP3fAbnFbm,True,True,2023-11-21 02:25:59,2023-09-03 10:16:45,2023-11-14 10:13:01,1,"[{'amount': '147316', 'projectId': 'proj0'}]"
3,ogrNwwmq6O,True,True,2023-11-14 04:23:37,2023-09-05 10:46:42,2023-11-08 15:38:00,1,"[{'amount': '897865', 'projectId': 'proj0'}]"
11,EHS2JpDzfO,True,True,2023-11-27 21:39:32,2023-11-23 11:05:44,2023-11-27 11:20:23,1,"[{'amount': '702474', 'projectId': 'proj0'}]"
4,q5QnuVdYXy,True,True,2023-11-28 17:47:00,2023-10-22 20:38:10,2023-11-27 06:07:49,1,"[{'amount': '398887', 'projectId': 'proj0'}]"
15,ndTdtdD5Gd,True,False,,2023-09-19 10:29:38,2023-10-18 05:52:23,0,[]
10,GhnuKoneNo,True,True,2023-11-26 11:18:32,2023-11-09 05:17:29,2023-11-16 18:52:45,1,"[{'amount': '944313', 'projectId': 'proj0'}]"
14,dx9qWCA79I,True,True,2023-11-29 12:44:31,2023-10-10 11:00:48,2023-11-26 14:38:21,1,"[{'amount': '947279', 'projectId': 'proj0'}]"
12,1dOOdTXglH,False,False,,2023-10-10 21:56:15,2023-11-25 06:11:01,0,[]
19,q3UjrnvQ0F,True,False,,2023-11-25 06:11:02,2023-11-26 01:44:29,0,[]


In [4]:
log.info("Check - Num Ballots: " + str(df["Has published"].count()))
log.info("Check - Num Submissions (Published): " + str(df["Has published"].sum()))

# Check if voter_address is unique
if df["Address"].nunique() == df.shape[0]:
    log.info("Check - Address is unique.")
else:
    diff = df.shape[0] - df["Address"].nunique()
    log.info(f"Check - Address is not unique. There are {diff} duplicates.")

# Check if all voters have voted
if df[df["Has voted"] == False].shape[0] > 0:
    not_voted = df[df["Has voted"] == False].shape[0]
    total = df["Address"].nunique()
    log.info(f"Check - {not_voted} voters out of {total} have not voted.")
else:
    log.info("Check - All voters have voted.")

# Check if all voters have published
if df[df["Has published"] == False].shape[0] > 0:
    not_voted = df[df["Has published"] == False].shape[0]
    total = df["Address"].nunique()
    log.info(f"Check - {not_voted} voters out of {total} have not published.")
else:
    log.info("Check - All voters have published.")

2024-01-03 03:49:12 INFO | Check - Num Ballots: 20
2024-01-03 03:49:12 INFO | Check - Num Submissions (Published): 12
2024-01-03 03:49:12 INFO | Check - Address is unique.
2024-01-03 03:49:12 INFO | Check - 2 voters out of 20 have not voted.
2024-01-03 03:49:12 INFO | Check - 8 voters out of 20 have not published.


In [5]:
# Apply the function and concatenate results
expanded_list = [
    expand_json(safe_json_loads(row), idx) for idx, row in df["Votes"].items()
]
expanded_df = pd.concat(expanded_list, ignore_index=True)

result_df = expanded_df.set_index("original_index").join(df.set_index(df.index))

In [6]:
testing_address = "zgdSu8Yr87"
print_df = result_df[result_df["Address"] == testing_address]
print("Num Projects Voted : " + str(print_df["projectId"].count()))
display(print_df.head(10))

Num Projects Voted : 0


Unnamed: 0_level_0,amount,projectId,Address,Has voted,Has published,Published at,Created at,Updated at,Projects in ballot,Votes
original_index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1


In [7]:
columns = [col for col in result_df.columns if col not in ["amount", "projectId"]]
columns += ["amount", "projectId"]  # Add the columns to the end of the list
result_df = result_df[columns]

# Update df columns names
result_df.columns = [
    "voter_address",
    "has_voted",
    "has_published",
    "published_at",
    "created_at",
    "updated_at",
    "projects_in_ballot",
    "votes",
    "amount",
    "project_id",
]

result_df.drop(columns="votes", inplace=True)

result_df["amount"] = pd.to_numeric(result_df["amount"])

In [8]:
# result_df.head()
result_df[result_df["voter_address"] == testing_address].head(70)

Unnamed: 0_level_0,voter_address,has_voted,has_published,published_at,created_at,updated_at,projects_in_ballot,amount,project_id
original_index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1


## Calculate Voting Results

In [9]:
allocator = ProjectAllocator(
    total_amount=constants.TOTAL_AMOUNT,
    min_amount=constants.MIN_AMOUNT,
    quorum=constants.QUORUM,
)

In [10]:
initial_allocation = allocator.calculate_initial_allocation(result_df)

In [11]:
display(initial_allocation.sample(5))

ValueError: Cannot take a larger sample than population when 'replace=False'

In [None]:
# Scaling the total to 30M OP by project and filter out those with < 1500 OP
allocation_iter = initial_allocation[initial_allocation["is_eligible"] == True].copy()
allocation_iter["scaled_amount"] = allocation_iter["median_amount"]
# display(allocation_iter)
# Set a maximum number of iterations to prevent infinite loop
max_iterations = 10
current_iteration = 0

while (
    allocation_iter["scaled_amount"].sum() != constants.TOTAL_AMOUNT
    and current_iteration <= max_iterations
):
    allocation_iter = allocator.scale_allocations_oneby(allocation_iter)
    current_iteration += 1

    log.info("Check - Current iteration: " + str(current_iteration))

In [None]:
# Check if the loop exited due to reaching max iterations
if (
    current_iteration == max_iterations
    and allocation_iter["scaled_amount"].sum() != constants.TOTAL_AMOUNT
):
    log.info("Maximum iterations reached without meeting the total amount condition.")
else:
    final_total = allocation_iter["scaled_amount"].sum()
    log.info(
        f"Condition met with {final_total} OP allocated through {current_iteration} iteration(s)."
    )

In [None]:
# join the initial allocation with the final allocation, if scaled_amount is null then make it 0
final_allocation = initial_allocation.merge(
    allocation_iter["scaled_amount"],
    how="left",
    on="project_id",
).fillna({"scaled_amount": 0})

# check if the final allocation table still contains all projects.
if final_allocation.index.nunique() == result_df["project_id"].nunique():
    log.info("Check - Final allocation table has included all the projects.")
else:
    log.info(
        "Check - Final allocation table has missing projects. Printing out the missing projects below."
    )
    log.info(
        result_df[~result_df["project_id"].isin(final_allocation.index)]["project_id"]
    )

# check if the final allocation table still sums to the total amount.
if final_allocation["scaled_amount"].sum() == final_total:
    log.info(
        "Check - Final allocation table sums to the right amount of OP: "
        + str(final_total)
    )
else:
    log.info(
        "Check - Final allocation table does not sum to the total OP. Printing out the missing amount below."
    )
    log.info(str(final_total - final_allocation["scaled_amount"].sum()) + " OP")

In [None]:
# export csv
# allocation_iter.drop(columns="median_amount", inplace=True)
final_allocation.to_csv("data/rpgf3_allocation_final.csv")

log.info(f"Results saved in data/rpgf3_allocation_final.csv.")

In [None]:
final_allocation.head(10)

In [None]:
to_cut = (
    allocation_iter[allocation_iter["scaled_amount"] < 1500]
    .sort_values(by="scaled_amount")
    .head(1)
)

In [None]:
# check if to_cut is empty
to_cut.empty

### Calculate Voting Results using pytorch

In [None]:

result_tensor, num_projects = allocator.convert_df_to_tensor(result_df)
project_tensors = allocator.get_project_tensor(result_tensor, num_projects)


In [None]:
# export to onnx
allocator.eval()

# convert projects tensors to tuple
final_allocation_torch = allocator.forward(*project_tensors)
final_allocation.shape

input_names = ['input_' + str(i) for i in range(len(project_tensors))]
    # Export the model
torch.onnx.export(allocator,               # model being run
                      tuple(project_tensors),          # model input (or a tuple for multiple inputs)
                      "network.onnx",           # where to save the model (can be a file or file-like object)
                      export_params=False,       # store the trained parameter weights inside the model file
                      opset_version=17,         # the ONNX version to export the model to
                      do_constant_folding=True, # whether to execute constant folding for optimization
                      input_names = input_names,   # the model's input names
                      output_names = ['output'])

We now include sanity checks for the data. We will check the following:
- that the pandas and pytorch dataframes are the same
- that the median and scaled median allocations are the same
- that eligibility is the same

In [None]:

# a bunch of code to compare pytorch output and the pandas output as a sanity check
final_allocation_torch_np = final_allocation_torch.detach().numpy()
final_allocation_torch_df = pd.DataFrame(final_allocation_torch_np, columns=["votes_count", "median_amount", "is_eligible", "scaled_amount"])
# convert is eligible to boolean
final_allocation_torch_df["is_eligible"] = final_allocation_torch_df["is_eligible"].astype(bool)
# convert votes count to int
final_allocation_torch_df["votes_count"] = final_allocation_torch_df["votes_count"].astype(int)
# add index project_id
final_allocation_torch_df = final_allocation_torch_df.set_index(final_allocation.index.sort_values())

# sorted vy project id
final_allocation_torch_df = final_allocation_torch_df.sort_index()
# sort final allocation by project id
final_allocation_sorted = final_allocation.sort_index()

display(final_allocation_torch_df.head(10))
display(final_allocation_sorted.head(10))
# compare the two outputs
print("Are the two outputs equal?")
final_allocation_torch_df.compare(final_allocation_sorted)


After which we can proceed to generate the settings file for `ezkl` and run calibrate settings to find the optimal settings for `ezkl`

In [None]:
import ezkl
import os

model_path = "network.onnx"
settings_path = "settings.json"
compiled_model_path = os.path.join('network.compiled')
pk_path = os.path.join('test.pk')
vk_path = os.path.join('test.vk')
settings_path = os.path.join('settings.json')

witness_path = os.path.join('witness.json')
data_path = os.path.join('input.json')

py_run_args = ezkl.PyRunArgs()
py_run_args.input_visibility = "public"
py_run_args.output_visibility = "public"
py_run_args.param_visibility = "fixed" # private by default

res = ezkl.gen_settings(model_path, settings_path, py_run_args=py_run_args)
assert res == True

We calibrate the settings to finetune the circuit to the data. This is done by running the `calibrate_settings` function.

In [None]:
data_path = os.path.join("input.json")

data = dict(input_data = [tensor.detach().numpy().reshape([-1]).tolist() for tensor in project_tensors])

# Serialize data into file:
json.dump(data, open(data_path, 'w'))


ezkl.calibrate_settings(data_path, model_path, settings_path, "resources")



In [None]:
# load settings and shrink run_args.logrows by 1 to allow for lookup overflow
settings = json.load(open(settings_path, 'r'))
settings['run_args']['logrows'] += 1
# now save
json.dump(settings, open(settings_path, 'w'))


Next, we will compile the model. The compilation step allow us to generate proofs faster.

In [None]:
res = ezkl.compile_circuit(model_path, compiled_model_path, settings_path)
assert res == True

Before we can setup the circuit params, we need a SRS (Structured Reference String). The SRS is used to generate the proofs.

In [None]:
# srs path
res = ezkl.get_srs( settings_path)

In [None]:
# now generate the witness file 

res = ezkl.gen_witness(data_path, compiled_model_path, witness_path)
assert os.path.isfile(witness_path)

Now run setup, this will generate a proving key (pk) and verification key (vk). The proving key is used for proving while the verification key is used for verificaton.

In [None]:

# setup
res = ezkl.setup(
        compiled_model_path,
        vk_path,
        pk_path,
    )

assert res == True
assert os.path.isfile(vk_path)
assert os.path.isfile(pk_path)
assert os.path.isfile(settings_path)

In [None]:
# GENERATE A PROOF


proof_path = os.path.join('test.pf')

res = ezkl.prove(
        witness_path,
        compiled_model_path,
        pk_path,
        proof_path,
        "single",
    )

print(res)
assert os.path.isfile(proof_path)

We can now verify the proof. 

In [None]:
# VERIFY IT

res = ezkl.verify(
        proof_path,
        settings_path,
        vk_path,
    )

assert res == True
print("verified")

We can now create an EVM / `.sol` verifier that can be deployed on chain to verify submitted proofs using a view function.

In [None]:

abi_path = 'test.abi'
sol_code_path = 'test_1.sol'

res = ezkl.create_evm_verifier(
        vk_path,
        settings_path,
        sol_code_path,
        abi_path,
    )
assert res == True

#### Deploying the Verifier
Now that we have the circuit setup, we can proceed to deploy the verifier onchain.

We will need to setup `solc=0.8.20` for this.

In [None]:
# check if notebook is in colab
try:
    import subprocess
    import sys
    subprocess.check_call([sys.executable, "-m", "pip", "install", "solc-select"])
    !solc-select install 0.8.20
    !solc-select use 0.8.20
    !solc --version

# rely on local installation if the notebook is not in colab
except:
    pass

In [None]:
onchain_input_array = []

proof = json.load(open(proof_path, 'r'))

# using a loop
# avoiding printing last comma
formatted_output = "["
for i, value in enumerate(proof["instances"]):
    for j, field_element in enumerate(value):
        onchain_input_array.append(ezkl.vecu64_to_felt(field_element))
        formatted_output += str(onchain_input_array[-1])
        if j != len(value) - 1:
            formatted_output += ", "
    formatted_output += "]"

# This will be the values you use onchain
# copy them over to remix and see if they verify
# What happens when you change a value?
print("pubInputs: ", formatted_output)
print("proof: ", "0x" + proof["proof"])

# Sanity checks on circuit outputs

In [None]:
!pip install onnxruntime plotly

In [None]:

import onnx
import onnxruntime
import os
import json
import ezkl
import numpy as np

witness_path = os.path.join('witness.json')
settings_path = os.path.join('settings.json')
model_path = os.path.join('network.onnx')
data_path = os.path.join('input.json')

def get_ezkl_output(witness_file, settings_file):
    # convert the quantized ezkl output to float value
    witness_output = json.load(open(witness_file))
    outputs = witness_output['outputs']
    with open(settings_file) as f:
        settings = json.load(f)
    ezkl_outputs = [[ezkl.vecu64_to_float(
        outputs[i][j], settings['model_output_scales'][i]) for j in range(len(outputs[i]))] for i in range(len(outputs))]
    return ezkl_outputs


def get_onnx_output(model_file, input_file):
    # generate the ML model output from the ONNX file
    onnx_model = onnx.load(model_file)
    onnx.checker.check_model(onnx_model)

    with open(input_file) as f:
        inputs = json.load(f)
    # reshape the input to the model
    num_inputs = len(inputs['input_data'])

    onnx_input = dict()
    for i in range(num_inputs):
        input_node = onnx_model.graph.input[i]
        dims = []
        elem_type = input_node.type.tensor_type.elem_type
        for dim in input_node.type.tensor_type.shape.dim:
            if dim.dim_value == 0:
                dims.append(1)
            else:
                dims.append(dim.dim_value)
        if elem_type == 7:
            inputs_onnx = np.array(inputs['input_data'][i]).astype(
                np.int64).reshape(dims)
        elif elem_type == 9:
            inputs_onnx = np.array(inputs['input_data'][i]).astype(
                bool).reshape(dims)
        else:
            inputs_onnx = np.array(inputs['input_data'][i]).astype(
                np.float32).reshape(dims)
        onnx_input[input_node.name] = inputs_onnx
    try:
        onnx_session = onnxruntime.InferenceSession(model_file)
        onnx_output = onnx_session.run(None, onnx_input)
    except Exception as e:
        print("Error in ONNX runtime: ", e)
        print("using inputs[output_data]")
        onnx_output = inputs['output_data']
    return onnx_output[0]


def compare_outputs(zk_output, onnx_output):
    # calculate hamming difference between the 2 outputs (which are lists)

    res = []
    
    print("zk_output", zk_output)
    print("onnx_output", onnx_output)

    contains_sublist = any(isinstance(sub, list) for sub in zk_output)
    if contains_sublist:
        try:
            if len(onnx_output) == 1:
                zk_output = zk_output[0]
        except Exception as e:
            zk_output = zk_output[0]

    zip_object = zip(np.array(zk_output).flatten(),
                     np.array(onnx_output).flatten())
    for list1_i, list2_i in zip_object:
        if list1_i == 0.0 and list2_i == 0.0:
            res.append(0)
        else:
            diff = list1_i - list2_i
            res.append(diff)


    return np.abs(res)

In [None]:

import plotly.express as px
import pandas as pd

import matplotlib.pyplot as plt
    # get the ezkl output
ezkl_output = get_ezkl_output(witness_path, settings_path)
    # get the onnx output
onnx_output = get_onnx_output(model_path, data_path)
    # compare the outputs
l1_difference = compare_outputs(ezkl_output, onnx_output)

df = pd.DataFrame(l1_difference, columns=["hamming distance"])


# Create a histogram
fig = px.histogram(df, x="hamming distance",
                  title="Distribution of hamming distance")
fig.show()