# Task-1


In [51]:
import pandas as pd
import numpy as np

In [52]:

# df = pd.read_csv("datasets\dataset-1.csv")
# df2 = pd.read_csv("datasets\dataset-2.csv").dropna()
df = pd.read_csv("datasets/dataset-3.csv")
df.head()


Unnamed: 0,id_start,id_end,distance
0,1001400,1001402,9.7
1,1001402,1001404,20.2
2,1001404,1001406,16.0
3,1001406,1001408,21.7
4,1001408,1001410,11.1


In [53]:
# Assuming unrolled_df is the DataFrame from Question 2
all_reference_values = df['id_start'].unique()
# Choose one of the values from all_reference_values
reference_value = all_reference_values[0]  # Replace with the desired choice


In [54]:
def unroll_distance_matrix(df) -> pd.DataFrame():
    """
    Unroll a distance matrix to a DataFrame in the style of the initial dataset.

    Args:
        df (pandas.DataFrame)

    Returns:
        pandas.DataFrame: Unrolled DataFrame containing columns 'id_start', 'id_end', and 'distance'.
    """
    # Write your logic here

    stacked_distances = df.stack()

    unrolled_df = stacked_distances.reset_index()

    unrolled_df.columns = ['id_start', 'id_end', 'distance']

    unrolled_df = unrolled_df[unrolled_df['id_start'] != unrolled_df['id_end']]

    return unrolled_df

In [59]:
result_matrix = unroll_distance_matrix(df)
result_matrix
# df.head()

Unnamed: 0,id_start,id_end,distance
0,0,id_start,1001400.0
1,0,id_end,1001402.0
2,0,distance,9.7
3,1,id_start,1001402.0
4,1,id_end,1001404.0
...,...,...,...
127,42,id_end,1001470.0
128,42,distance,10.6
129,43,id_start,1001470.0
130,43,id_end,1001472.0


In [56]:
all_reference_values = result_matrix['id_start'].unique()

reference_value = all_reference_values[0]
reference_value

0

In [57]:
def find_ids_within_ten_percentage_threshold(df,reference_value) -> pd.DataFrame():
    """
    Find all IDs whose average distance lies within 10% of the average distance of the reference ID.

    Args:
        df (pandas.DataFrame)
        reference_id (int)

    Returns:
        pandas.DataFrame: DataFrame with IDs whose average distance is within the specified percentage threshold
                          of the reference ID's average distance.
    """
    # Write your logic here
    reference_df = df[df['id_start'] == reference_value]

    average_distance = reference_df['distance'].mean()

    lower_bound = 0.9 * average_distance
    upper_bound = 1.1 * average_distance

    result_df = df[(df['distance'] >= lower_bound) &
                   (df['distance'] <= upper_bound)]

    result_ids = sorted(result_df['id_start'].unique())

    return result_ids

In [58]:
result_matrix = find_ids_within_ten_percentage_threshold(df,reference_value)
print(result_matrix)


[]


In [62]:
def calculate_toll_rate(df):
    """
    Calculate toll rates based on vehicle types.

    Args:
        df (pandas.DataFrame): Input DataFrame with columns id_start, id_end, and distance.

    Returns:
        pandas.DataFrame: DataFrame with added columns for toll rates (moto, car, rv, bus, truck).
    """
    # Define rate coefficients
    rate_coefficients = {'moto': 0.8, 'car': 1.2, 'rv': 1.5, 'bus': 2.2, 'truck': 3.6}

    # Calculate toll rates for each vehicle type
    for vehicle_type, rate in rate_coefficients.items():
        df[vehicle_type] = df['distance'] * rate

    return df


result_df = calculate_toll_rate(df)
result_df.head()


Unnamed: 0,id_start,id_end,distance,moto,car,rv,bus,truck
0,1001400,1001402,9.7,7.76,11.64,14.55,21.34,34.92
1,1001402,1001404,20.2,16.16,24.24,30.3,44.44,72.72
2,1001404,1001406,16.0,12.8,19.2,24.0,35.2,57.6
3,1001406,1001408,21.7,17.36,26.04,32.55,47.74,78.12
4,1001408,1001410,11.1,8.88,13.32,16.65,24.42,39.96


In [66]:
import pandas as pd
import datetime

def calculate_time_based_toll_rates(df):
    """
    Calculate time-based toll rates for different time intervals within a day.

    Args:
        df (pandas.DataFrame): Input DataFrame with columns id_start, id_end, distance, and vehicle types.

    Returns:
        pandas.DataFrame: DataFrame with added columns for time-based toll rates.
    """
    # Define time ranges and discount factors
    time_ranges_weekdays = [(datetime.time(0, 0, 0), datetime.time(10, 0, 0)),
                            (datetime.time(10, 0, 0), datetime.time(18, 0, 0)),
                            (datetime.time(18, 0, 0), datetime.time(23, 59, 59))]

    time_ranges_weekends = [(datetime.time(0, 0, 0), datetime.time(23, 59, 59))]

    discount_factors_weekdays = [0.8, 1.2, 0.8]
    discount_factor_weekends = 0.7

    # Create new columns for start_day, start_time, end_day, and end_time
    df['start_day'] = df['startDay'].apply(lambda x: datetime.datetime.strptime(str(x), '%Y-%m-%d').strftime('%A'))
    df['start_time'] = df['startTime'].apply(lambda x: datetime.datetime.strptime(str(x), '%H:%M:%S').time())
    df['end_day'] = df['endDay'].apply(lambda x: datetime.datetime.strptime(str(x), '%Y-%m-%d').strftime('%A'))
    df['end_time'] = df['endTime'].apply(lambda x: datetime.datetime.strptime(str(x), '%H:%M:%S').time())

    # Apply discount factors based on time ranges
    for i, (start_time, end_time) in enumerate(time_ranges_weekdays):
        mask_weekdays = (df['start_time'] >= start_time) & (df['end_time'] <= end_time) & (df['start_day'].isin(['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday']))
        df.loc[mask_weekdays, ['moto', 'car', 'rv', 'bus', 'truck']] *= discount_factors_weekdays[i]

    for start_time, end_time in time_ranges_weekends:
        mask_weekends = (df['start_time'] >= start_time) & (df['end_time'] <= end_time) & (df['start_day'].isin(['Saturday', 'Sunday']))
        df.loc[mask_weekends, ['moto', 'car', 'rv', 'bus', 'truck']] *= discount_factor_weekends

    return df

# Example usage:
# Assuming result_df is the DataFrame from Question 4
result_df1 = calculate_time_based_toll_rates(result_df)
print(result_df1)


KeyError: 'startDay'