In [22]:
import pandas as pd
import numpy as np
import random


In [37]:

def generate_demand(od_data):
    """
    randomly generate agent with weighted sampling
    """

    # Step 1: Calculate the total demand from the original dataset
    total_demand = od_data['Ton'].sum()

    # Step 2: Create dictionaries with the total demand from each origin and destination
    origin_demand = od_data.groupby('O')['Ton'].sum().to_dict()
    destination_demand = od_data.groupby('D')['Ton'].sum().to_dict()

    # Normalize the demands to get probabilities
    total_origin_demand = sum(origin_demand.values())
    total_destination_demand = sum(destination_demand.values())

    assert total_origin_demand == total_destination_demand

    origin_probabilities = {o: d / total_origin_demand for o, d in origin_demand.items()}
    destination_probabilities = {d: dmd / total_destination_demand for d, dmd in destination_demand.items()}

    # Step 3: Generate a new dataset
    new_data = []

    for _ in range(int(total_demand)):
        # Select origin based on weighted probability
        origin = np.random.choice(list(origin_probabilities.keys()), p=list(origin_probabilities.values()))
        
        # Select destination - 70% chance to be either '9', '10', '7', or '18'
        if random.random() < 0.7:
            destination = random.choice(['9', '10', '7', '18'])
        else:
            destination = np.random.choice(list(destination_probabilities.keys()), p=list(destination_probabilities.values()))
        
        # Assuming each entry in the new dataset has a demand of 1 to match the total demand
        new_data.append([origin, destination, 1])

    # Convert the new data into a DataFrame
    new_od_data = pd.DataFrame(new_data, columns=['O', 'D', 'Ton'])

    # Aggregate the demand for the same origin to destination pairs
    aggregated_od_data = new_od_data.groupby(['O', 'D']).count().reset_index()
    return aggregated_od_data





In [33]:
def generate_demand(od_data):
    # Step 1: Calculate the total demand from the original dataset
    total_demand = od_data['Ton'].sum()

    # Step 2: Create a list of all origin-destination pairs with their corresponding demand
    od_demand = od_data.groupby(['O', 'D'])['Ton'].sum().reset_index()

    # Normalize the demands to get probabilities
    od_demand['Probability'] = od_demand['Ton'] / total_demand

    # Create a cumulative probability distribution for efficient sampling
    od_demand['Cumulative_Prob'] = od_demand['Probability'].cumsum()

    # Step 3: Generate a new dataset
    new_data = []

    for _ in range(int(total_demand)):
        # Randomly select an origin-destination pair based on weighted probability
        random_prob = random.random()
        selected_pair = od_demand[od_demand['Cumulative_Prob'] >= random_prob].iloc[0]
        origin, destination = selected_pair['O'], selected_pair['D']

        # 70% chance to override the destination to be either '9', '10', '7', or '18'
        if random.random() < 0.7:
            destination = random.choice(['9', '10', '7', '18'])

        # Assuming each entry in the new dataset has a demand of 1 to match the total demand
        new_data.append([origin, destination, 1])

    # Convert the new data into a DataFrame
    new_od_data = pd.DataFrame(new_data, columns=['O', 'D', 'Ton'])

    # Aggregate the demand for the same origin to destination pairs
    aggregated_od_data = new_od_data.groupby(['O', 'D']).count().reset_index()
    aggregated_od_data.rename(columns={'Ton': 'Total_Demand'}, inplace=True)

    return aggregated_od_data




In [38]:
# Load the uploaded OD demand file
od_data = pd.read_csv("../data/SiouxFalls/SiouxFalls_od.csv")

od_df = generate_demand(od_data)

In [39]:
len(od_df)

672

In [40]:
od_df.to_csv("../data/SiouxFalls/SiouxFalls_od_dist2.csv", index=False)