In [6]:
import pandas as pd
import random
import string
from datetime import datetime, timedelta

import numpy as np

In [7]:
def generate_random_address():
    """Generate a random address-like string."""
    return "".join(random.choices(string.ascii_letters + string.digits, k=10))


def generate_random_datetime(start, end):
    """Generate a random datetime between 'start' and 'end'."""
    return start + timedelta(
        seconds=random.randint(0, int((end - start).total_seconds()))
    )


def generate_votes(max_projects, max_votes):
    """Generate a random votes array."""
    num_votes = random.randint(1, max_votes)
    projects = random.sample(range(max_projects), num_votes)
    return [
        {"amount": str(random.randint(1000, 1000000)), "projectId": f"proj{proj_id}"}
        for proj_id in projects
    ]

In [8]:
# Set parameters
num_rows = 1000
max_projects_in_ballot = 500
max_votes = 20
start_date = datetime(2023, 9, 1)
end_date = datetime(2023, 12, 1)

# set seed
random.seed(42)

# Generate data
data = []
for _ in range(num_rows):
    has_published = random.choice([True, False])
    has_voted = True if has_published else random.choice([True, False])
    created_at = generate_random_datetime(start_date, end_date)
    updated_at = generate_random_datetime(created_at, end_date)
    published_at = (
        generate_random_datetime(updated_at, end_date) if has_published else None
    )

    votes = generate_votes(max_projects_in_ballot, max_votes) if has_published else []

    projects_in_ballot = (
        len(votes)
        # random.randint(1, max_projects_in_ballot) if has_published else None
    )

    row = {
        "Address": generate_random_address(),
        "Has voted": has_voted,
        "Has published": has_published,
        "Published at": published_at,
        "Created at": created_at,
        "Updated at": updated_at,
        "Projects in ballot": projects_in_ballot,
        "Votes": votes,
    }
    data.append(row)

In [9]:
# Create DataFrame
df = pd.DataFrame(data)
df.head()  # Display the first few rows of the DataFrame

Unnamed: 0,Address,Has voted,Has published,Published at,Created at,Updated at,Projects in ballot,Votes
0,FbmOHnKYaX,True,True,2023-11-21 02:25:59,2023-09-03 10:16:45,2023-11-14 10:13:01,8,"[{'amount': '92161', 'projectId': 'proj114'}, ..."
1,nVgxwvqcCh,False,False,NaT,2023-09-27 23:29:13,2023-10-13 01:46:49,0,[]
2,ZM1JRcoreo,True,False,NaT,2023-10-24 14:20:52,2023-11-07 19:55:30,0,[]
3,IQ0Wobtqn6,True,True,2023-11-25 21:44:40,2023-10-07 21:45:29,2023-11-03 21:29:00,12,"[{'amount': '666822', 'projectId': 'proj83'}, ..."
4,y4CqpIqK3y,True,False,NaT,2023-11-03 15:16:58,2023-11-27 20:55:45,0,[]


In [10]:
df.to_csv("data/dummy_data_rpgf3.csv", index=False)  # Save the DataFrame to a CSV file