In [1]:
import pandas as pd
import numpy as np


from core import constants
from core.utils import *

log = get_logger()

pd.set_option("display.float_format", "{:.2f}".format)

## RPGF 3 Data Check and Cleanup

In [2]:
df = pd.read_csv("data/dummy_data_rpgf3.csv")

In [3]:
display(df.sample(10))

Unnamed: 0,Address,Has voted,Has published,Published at,Created at,Updated at,Projects in ballot,Votes
59,bqV5vliOeT,True,False,,2023-10-30 03:03:39,2023-11-12 02:39:36,0,[]
539,a00HcogFhe,True,True,2023-11-29 05:54:52,2023-10-24 17:32:00,2023-11-28 13:18:56,2,"[{'amount': '272638', 'projectId': 'proj487'},..."
261,4Q4COfFlK3,True,True,2023-11-17 18:43:33,2023-10-21 13:12:01,2023-10-26 20:33:46,15,"[{'amount': '52690', 'projectId': 'proj423'}, ..."
298,oeTEb4MpOa,True,False,,2023-11-11 22:46:30,2023-11-26 00:16:06,0,[]
171,FNhbxKl1Kv,True,True,2023-11-25 18:47:10,2023-09-07 08:53:55,2023-10-07 13:52:04,13,"[{'amount': '61426', 'projectId': 'proj69'}, {..."
461,iUagxgaZ5i,True,True,2023-11-17 01:48:00,2023-11-02 20:56:02,2023-11-16 13:32:21,10,"[{'amount': '76548', 'projectId': 'proj83'}, {..."
815,Yr6JqpYWLS,False,False,,2023-09-22 05:59:41,2023-10-07 22:27:38,0,[]
47,e1Mo4iCppa,True,True,2023-11-24 20:14:40,2023-10-14 01:50:45,2023-10-22 03:40:29,10,"[{'amount': '153640', 'projectId': 'proj462'},..."
664,Wv8EMK9wpF,True,True,2023-11-27 05:38:47,2023-10-18 17:54:00,2023-11-20 08:33:32,12,"[{'amount': '157747', 'projectId': 'proj453'},..."
736,SXxaDZFnuO,True,True,2023-11-24 12:07:57,2023-11-09 23:32:57,2023-11-14 08:42:20,2,"[{'amount': '663215', 'projectId': 'proj394'},..."


In [4]:
log.info("Check - Num Ballots: " + str(df["Has published"].count()))
log.info("Check - Num Submissions (Published): " + str(df["Has published"].sum()))

# Check if voter_address is unique
if df["Address"].nunique() == df.shape[0]:
    log.info("Check - Address is unique.")
else:
    diff = df.shape[0] - df["Address"].nunique()
    log.info(f"Check - Address is not unique. There are {diff} duplicates.")

# Check if all voters have voted
if df[df["Has voted"] == False].shape[0] > 0:
    not_voted = df[df["Has voted"] == False].shape[0]
    total = df["Address"].nunique()
    log.info(f"Check - {not_voted} voters out of {total} have not voted.")
else:
    log.info("Check - All voters have voted.")

# Check if all voters have published
if df[df["Has published"] == False].shape[0] > 0:
    not_voted = df[df["Has published"] == False].shape[0]
    total = df["Address"].nunique()
    log.info(f"Check - {not_voted} voters out of {total} have not published.")
else:
    log.info("Check - All voters have published.")

2024-01-02 20:35:07 INFO | Check - Num Ballots: 1000
2024-01-02 20:35:07 INFO | Check - Num Submissions (Published): 514
2024-01-02 20:35:07 INFO | Check - Address is unique.
2024-01-02 20:35:07 INFO | Check - 243 voters out of 1000 have not voted.
2024-01-02 20:35:07 INFO | Check - 486 voters out of 1000 have not published.


In [5]:
# Apply the function and concatenate results
expanded_list = [
    expand_json(safe_json_loads(row), idx) for idx, row in df["Votes"].items()
]
expanded_df = pd.concat(expanded_list, ignore_index=True)

result_df = expanded_df.set_index("original_index").join(df.set_index(df.index))

In [6]:
testing_address = "zgdSu8Yr87"
print_df = result_df[result_df["Address"] == testing_address]
print("Num Projects Voted : " + str(print_df["projectId"].count()))
display(print_df.head(10))

Num Projects Voted : 18


Unnamed: 0,amount,projectId,Address,Has voted,Has published,Published at,Created at,Updated at,Projects in ballot,Votes
830,703749,proj117,zgdSu8Yr87,True,True,2023-11-02 18:18:29,2023-09-21 02:51:22,2023-09-23 16:56:39,18,"[{'amount': '703749', 'projectId': 'proj117'},..."
830,385034,proj51,zgdSu8Yr87,True,True,2023-11-02 18:18:29,2023-09-21 02:51:22,2023-09-23 16:56:39,18,"[{'amount': '703749', 'projectId': 'proj117'},..."
830,470511,proj230,zgdSu8Yr87,True,True,2023-11-02 18:18:29,2023-09-21 02:51:22,2023-09-23 16:56:39,18,"[{'amount': '703749', 'projectId': 'proj117'},..."
830,113619,proj204,zgdSu8Yr87,True,True,2023-11-02 18:18:29,2023-09-21 02:51:22,2023-09-23 16:56:39,18,"[{'amount': '703749', 'projectId': 'proj117'},..."
830,156327,proj141,zgdSu8Yr87,True,True,2023-11-02 18:18:29,2023-09-21 02:51:22,2023-09-23 16:56:39,18,"[{'amount': '703749', 'projectId': 'proj117'},..."
830,622540,proj390,zgdSu8Yr87,True,True,2023-11-02 18:18:29,2023-09-21 02:51:22,2023-09-23 16:56:39,18,"[{'amount': '703749', 'projectId': 'proj117'},..."
830,32962,proj482,zgdSu8Yr87,True,True,2023-11-02 18:18:29,2023-09-21 02:51:22,2023-09-23 16:56:39,18,"[{'amount': '703749', 'projectId': 'proj117'},..."
830,911617,proj46,zgdSu8Yr87,True,True,2023-11-02 18:18:29,2023-09-21 02:51:22,2023-09-23 16:56:39,18,"[{'amount': '703749', 'projectId': 'proj117'},..."
830,755757,proj250,zgdSu8Yr87,True,True,2023-11-02 18:18:29,2023-09-21 02:51:22,2023-09-23 16:56:39,18,"[{'amount': '703749', 'projectId': 'proj117'},..."
830,114436,proj82,zgdSu8Yr87,True,True,2023-11-02 18:18:29,2023-09-21 02:51:22,2023-09-23 16:56:39,18,"[{'amount': '703749', 'projectId': 'proj117'},..."


In [7]:
columns = [col for col in result_df.columns if col not in ["amount", "projectId"]]
columns += ["amount", "projectId"]  # Add the columns to the end of the list
result_df = result_df[columns]

# Update df columns names
result_df.columns = [
    "voter_address",
    "has_voted",
    "has_published",
    "published_at",
    "created_at",
    "updated_at",
    "projects_in_ballot",
    "votes",
    "amount",
    "project_id",
]

result_df.drop(columns="votes", inplace=True)

result_df["amount"] = pd.to_numeric(result_df["amount"])

In [8]:
# result_df.head()
result_df[result_df["voter_address"] == testing_address].head(70)

Unnamed: 0,voter_address,has_voted,has_published,published_at,created_at,updated_at,projects_in_ballot,amount,project_id
830,zgdSu8Yr87,True,True,2023-11-02 18:18:29,2023-09-21 02:51:22,2023-09-23 16:56:39,18,703749,proj117
830,zgdSu8Yr87,True,True,2023-11-02 18:18:29,2023-09-21 02:51:22,2023-09-23 16:56:39,18,385034,proj51
830,zgdSu8Yr87,True,True,2023-11-02 18:18:29,2023-09-21 02:51:22,2023-09-23 16:56:39,18,470511,proj230
830,zgdSu8Yr87,True,True,2023-11-02 18:18:29,2023-09-21 02:51:22,2023-09-23 16:56:39,18,113619,proj204
830,zgdSu8Yr87,True,True,2023-11-02 18:18:29,2023-09-21 02:51:22,2023-09-23 16:56:39,18,156327,proj141
830,zgdSu8Yr87,True,True,2023-11-02 18:18:29,2023-09-21 02:51:22,2023-09-23 16:56:39,18,622540,proj390
830,zgdSu8Yr87,True,True,2023-11-02 18:18:29,2023-09-21 02:51:22,2023-09-23 16:56:39,18,32962,proj482
830,zgdSu8Yr87,True,True,2023-11-02 18:18:29,2023-09-21 02:51:22,2023-09-23 16:56:39,18,911617,proj46
830,zgdSu8Yr87,True,True,2023-11-02 18:18:29,2023-09-21 02:51:22,2023-09-23 16:56:39,18,755757,proj250
830,zgdSu8Yr87,True,True,2023-11-02 18:18:29,2023-09-21 02:51:22,2023-09-23 16:56:39,18,114436,proj82


## Calculate Voting Results

In [9]:
allocator = ProjectAllocator(
    total_amount=constants.TOTAL_AMOUNT,
    min_amount=constants.MIN_AMOUNT,
    quorum=constants.QUORUM,
)

In [10]:
initial_allocation = allocator.calculate_initial_allocation(result_df)

In [11]:
display(initial_allocation.sample(10))

Unnamed: 0_level_0,votes_count,median_amount,is_eligible
project_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
proj272,9,770040.0,False
proj146,8,543065.0,False
proj109,17,539214.0,True
proj5,7,580733.0,False
proj469,6,758070.0,False
proj70,8,732811.5,False
proj11,13,537648.0,False
proj419,11,491106.0,False
proj401,11,554138.0,False
proj467,12,499044.5,False


In [12]:
# Scaling the total to 30M OP by project and filter out those with < 1500 OP
allocation_iter = initial_allocation[initial_allocation["is_eligible"] == True].copy()
allocation_iter["scaled_amount"] = allocation_iter["median_amount"]
# display(allocation_iter)
# Set a maximum number of iterations to prevent infinite loop
max_iterations = 10
current_iteration = 0

while (
    allocation_iter["scaled_amount"].sum() != constants.TOTAL_AMOUNT
    and current_iteration <= max_iterations
):
    allocation_iter = allocator.scale_allocations_oneby(allocation_iter)
    current_iteration += 1

    log.info("Check - Current iteration: " + str(current_iteration))

2024-01-02 20:35:07 INFO | Check - Original Amount Eligible: 12322787.0
2024-01-02 20:35:07 INFO | Check - Scale Factor: 2.434514205268662
2024-01-02 20:35:07 INFO | Check - No projects below minimum OP
2024-01-02 20:35:07 INFO | Check - Current iteration: 1


In [13]:
# Check if the loop exited due to reaching max iterations
if (
    current_iteration == max_iterations
    and allocation_iter["scaled_amount"].sum() != constants.TOTAL_AMOUNT
):
    log.info("Maximum iterations reached without meeting the total amount condition.")
else:
    final_total = allocation_iter["scaled_amount"].sum()
    log.info(
        f"Condition met with {final_total} OP allocated through {current_iteration} iteration(s)."
    )

2024-01-02 20:35:07 INFO | Condition met with 30000000.0 OP allocated through 1 iteration(s).


In [14]:
# join the initial allocation with the final allocation, if scaled_amount is null then make it 0
final_allocation = initial_allocation.merge(
    allocation_iter["scaled_amount"],
    how="left",
    on="project_id",
).fillna({"scaled_amount": 0})

# check if the final allocation table still contains all projects.
if final_allocation.index.nunique() == result_df["project_id"].nunique():
    log.info("Check - Final allocation table has included all the projects.")
else:
    log.info(
        "Check - Final allocation table has missing projects. Printing out the missing projects below."
    )
    log.info(
        result_df[~result_df["project_id"].isin(final_allocation.index)]["project_id"]
    )

# check if the final allocation table still sums to the total amount.
if final_allocation["scaled_amount"].sum() == final_total:
    log.info(
        "Check - Final allocation table sums to the right amount of OP: "
        + str(final_total)
    )
else:
    log.info(
        "Check - Final allocation table does not sum to the total OP. Printing out the missing amount below."
    )
    log.info(str(final_total - final_allocation["scaled_amount"].sum()) + " OP")

2024-01-02 20:35:07 INFO | Check - Final allocation table has included all the projects.
2024-01-02 20:35:07 INFO | Check - Final allocation table sums to the right amount of OP: 30000000.0


In [15]:
# export csv
# allocation_iter.drop(columns="median_amount", inplace=True)
final_allocation.to_csv("data/rpgf3_allocation_final.csv")

log.info(f"Results saved in data/rpgf3_allocation_final.csv.")

2024-01-02 20:35:07 INFO | Results saved in data/rpgf3_allocation_final.csv.


In [16]:
final_allocation.head(10)

Unnamed: 0_level_0,votes_count,median_amount,is_eligible,scaled_amount
project_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
proj26,5,841223.0,False,0.0
proj323,11,838886.0,False,0.0
proj123,7,829909.0,False,0.0
proj170,10,817377.0,False,0.0
proj46,11,802627.0,False,0.0
proj269,10,793657.0,False,0.0
proj12,11,787796.0,False,0.0
proj196,7,782668.0,False,0.0
proj287,6,782357.0,False,0.0
proj130,5,778991.0,False,0.0


In [17]:
to_cut = (
    allocation_iter[allocation_iter["scaled_amount"] < 1500]
    .sort_values(by="scaled_amount")
    .head(1)
)

In [18]:
# check if to_cut is empty
to_cut.empty

True

### Calculate Voting Results using pytorch

In [19]:

result_tensor, num_projects = allocator.convert_df_to_tensor(result_df)
project_tensors = allocator.get_project_tensor(result_tensor, num_projects)


In [20]:
# export to onnx
allocator.eval()

# convert projects tensors to tuple
dummy_data = tuple(torch.randn(*x.shape) for x in project_tensors)
initial_allocation = allocator.forward(*project_tensors)
initial_allocation.shape

input_names = ['input_' + str(i) for i in range(len(project_tensors))]
    # Export the model
torch.onnx.export(allocator,               # model being run
                      dummy_data,          # model input (or a tuple for multiple inputs)
                      "network.onnx",           # where to save the model (can be a file or file-like object)
                      export_params=False,       # store the trained parameter weights inside the model file
                      opset_version=17,         # the ONNX version to export the model to
                      do_constant_folding=True, # whether to execute constant folding for optimization
                      input_names = input_names,   # the model's input names
                      output_names = ['output'])

  votes_count = torch.tensor([num_bids]).reshape(1, 1)
  return fn(g, to_cast_func(g, input, False), to_cast_func(g, other, False))


In [21]:
import ezkl

model_path = "network.onnx"
settings_path = "settings.json"


py_run_args = ezkl.PyRunArgs()
py_run_args.input_visibility = "public"
py_run_args.output_visibility = "public"
py_run_args.param_visibility = "fixed" # private by default

res = ezkl.gen_settings(model_path, settings_path, py_run_args=py_run_args)
assert res == True