In [53]:
import pandas as pd
import random
import string
from datetime import datetime, timedelta

import numpy as np

In [54]:
def generate_random_address():
    """Generate a random address-like string."""
    return "".join(random.choices(string.ascii_letters + string.digits, k=10))


def generate_random_datetime(start, end):
    """Generate a random datetime between 'start' and 'end'."""
    return start + timedelta(
        seconds=random.randint(0, int((end - start).total_seconds()))
    )


def generate_votes(max_projects, max_votes):
    """Generate a random votes array."""
    num_votes = random.randint(1, max_votes)
    projects = random.sample(range(max_projects), num_votes)
    return [
        {"amount": str(random.randint(1000, 1000000)), "projectId": f"proj{proj_id}"}
        for proj_id in projects
    ]

In [55]:
# Set parameters
num_rows = 20
max_projects_in_ballot = 2
max_votes = 1
start_date = datetime(2023, 9, 1)
end_date = datetime(2023, 12, 1)

# set seed
random.seed(42)

# Generate data
data = []
for _ in range(num_rows):
    has_published = random.choice([True, False])
    has_voted = True if has_published else random.choice([True, False])
    created_at = generate_random_datetime(start_date, end_date)
    updated_at = generate_random_datetime(created_at, end_date)
    published_at = (
        generate_random_datetime(updated_at, end_date) if has_published else None
    )

    votes = generate_votes(max_projects_in_ballot, max_votes) if has_published else []

    projects_in_ballot = (
        len(votes)
        # random.randint(1, max_projects_in_ballot) if has_published else None
    )

    row = {
        "Address": generate_random_address(),
        "Has voted": has_voted,
        "Has published": has_published,
        "Published at": published_at,
        "Created at": created_at,
        "Updated at": updated_at,
        "Projects in ballot": projects_in_ballot,
        "Votes": votes,
    }
    data.append(row)

In [56]:
# Create DataFrame
df = pd.DataFrame(data)
df.head()  # Display the first few rows of the DataFrame

Unnamed: 0,Address,Has voted,Has published,Published at,Created at,Updated at,Projects in ballot,Votes
0,TP3fAbnFbm,True,True,2023-11-21 02:25:59,2023-09-03 10:16:45,2023-11-14 10:13:01,1,"[{'amount': '147316', 'projectId': 'proj0'}]"
1,r1VjArnVgx,True,False,NaT,2023-10-14 14:43:58,2023-11-12 05:16:37,0,[]
2,YTH8xIZM1J,False,False,NaT,2023-10-29 14:47:01,2023-11-11 10:58:21,0,[]
3,ogrNwwmq6O,True,True,2023-11-14 04:23:37,2023-09-05 10:46:42,2023-11-08 15:38:00,1,"[{'amount': '897865', 'projectId': 'proj0'}]"
4,q5QnuVdYXy,True,True,2023-11-28 17:47:00,2023-10-22 20:38:10,2023-11-27 06:07:49,1,"[{'amount': '398887', 'projectId': 'proj0'}]"


In [57]:
df.to_csv("data/dummy_data_rpgf3.csv", index=False)  # Save the DataFrame to a CSV file