In [43]:
import pandas as pd
from datetime import time

def calculate_distance_matrix(df):
    """
    Calculate a distance matrix based on the dataframe, df.

    Args:
        df (pandas.DataFrame)

    Returns:
        pandas.DataFrame: Distance matrix
    """
    unique_ids = df['id_start'].append(df['id_end']).unique()
    distance_matrix = pd.DataFrame(index=unique_ids, columns=unique_ids).fillna(0)

    for index, row in df.iterrows():
        distance_matrix.at[row['id_start'], row['id_end']] = row['distance']

    distance_matrix += distance_matrix.T
    return distance_matrix

def unroll_distance_matrix(df):
    """
    Unroll a distance matrix to a DataFrame in the style of the initial dataset.

    Args:
        df (pandas.DataFrame)

    Returns:
        pandas.DataFrame: Unrolled DataFrame containing columns 'id_start', 'id_end', and 'distance'.
    """
    unrolled_df = df.melt(id_vars=['id_start'], var_name='id_end', value_name='distance')
    unrolled_df = unrolled_df[unrolled_df['id_start'] != unrolled_df['id_end']]
    return unrolled_df

def find_ids_within_ten_percentage_threshold(df, reference_id):
    """
    Find all IDs whose average distance lies within 10% of the average distance of the reference ID.

    Args:
        df (pandas.DataFrame)
        reference_id (int)

    Returns:
        pandas.DataFrame: DataFrame with IDs whose average distance is within the specified percentage threshold
                          of the reference ID's average distance.
    """
    reference_avg_distance = df[df['id_start'] == reference_id]['distance'].mean()
    lower_threshold = reference_avg_distance * 0.9
    upper_threshold = reference_avg_distance * 1.1

    result_df = df.groupby('id_start')['distance'].mean().reset_index()
    result_df = result_df[(result_df['distance'] >= lower_threshold) & (result_df['distance'] <= upper_threshold)]
    return result_df

def calculate_toll_rate(df):
    """
    Calculate toll rates for each vehicle type based on the unrolled DataFrame.

    Args:
        df (pandas.DataFrame)

    Returns:
        pandas.DataFrame
    """
    rate_coefficients = {'moto': 0.8, 'car': 1.2, 'rv': 1.5, 'bus': 2.2, 'truck': 3.6}

    for vehicle_type, rate in rate_coefficients.items():
        df[vehicle_type] = df['distance'] * rate

    return df

def calculate_time_based_toll_rates(df):
    """
    Calculate time-based toll rates for different time intervals within a day.

    Args:
        df (pandas.DataFrame)

    Returns:
        pandas.DataFrame
    """
    time_ranges = [(time(0, 0), time(10, 0), 0.8),
                   (time(10, 0), time(18, 0), 1.2),
                   (time(18, 0), time(23, 59, 59), 0.8)]

    weekend_discount_factor = 0.7

    for start_time, end_time, discount_factor in time_ranges:
        mask = (df['start_time'] >= start_time) & (df['end_time'] <= end_time)
        df.loc[mask, ['car', 'rv', 'bus', 'truck']] *= discount_factor

    df.loc[df['start_day'].isin(['Saturday', 'Sunday']), ['car', 'rv', 'bus', 'truck']] *= weekend_discount_factor

    return df

# Example usage:
# df = pd.read_csv('dataset-3.csv')
# df_distance_matrix = calculate_distance_matrix(df)
# df_unrolled = unroll_distance_matrix(df_distance_matrix)
# df_threshold = find_ids_within_ten_percentage_threshold(df_unrolled, reference_id=1001400)
# df_toll_rate = calculate_toll_rate(df_unrolled)
# df_time_based_toll_rates = calculate_time_based_toll_rates(df_unrolled)


In [44]:
df=pd.read_csv('dataset-3.csv')
print(df)

    id_start   id_end  distance
0    1001400  1001402       9.7
1    1001402  1001404      20.2
2    1001404  1001406      16.0
3    1001406  1001408      21.7
4    1001408  1001410      11.1
5    1001410  1001412      15.6
6    1001412  1001414      18.2
7    1001414  1001416      13.2
8    1001416  1001418      13.6
9    1001418  1001420      12.9
10   1001420  1001422       9.6
11   1001422  1001424      11.4
12   1001424  1001426      18.6
13   1001426  1001428      15.8
14   1001428  1001430       8.6
15   1001430  1001432       9.0
16   1001432  1001434       7.9
17   1001434  1001436       4.0
18   1001436  1001438       9.0
19   1001436  1001437       5.0
20   1001438  1001437       4.0
21   1001438  1001440      10.0
22   1001440  1001442       3.9
23   1001442  1001488       4.5
24   1001488  1004356       4.0
25   1004356  1004354       2.0
26   1004354  1004355       2.0
27   1004355  1001444       0.7
28   1001444  1001446       6.6
29   1001446  1001448       9.6
30   100

In [47]:
result_matrix = unroll_distance_matrix(df)
print(result_matrix)

    id_start    id_end   distance
0    1001400    id_end  1001402.0
1    1001402    id_end  1001404.0
2    1001404    id_end  1001406.0
3    1001406    id_end  1001408.0
4    1001408    id_end  1001410.0
..       ...       ...        ...
83   1001462  distance       26.7
84   1001464  distance        8.5
85   1001466  distance       10.7
86   1001468  distance       10.6
87   1001470  distance       16.0

[88 rows x 3 columns]


  unrolled_df = df.melt(id_vars=['id_start'], var_name='id_end', value_name='distance')


{}
