<a href="https://colab.research.google.com/github/yogeshagre/submissions/blob/master/python_task_2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Python Task 2




In [None]:
# Mounting google drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import pandas as pd

def calculate_distance_matrix(df: pd.DataFrame) -> pd.DataFrame:
    """
    Calculate the distance matrix between toll locations.

    Args:
        df (pandas.DataFrame): Input DataFrame with columns 'id_start', 'id_end', and 'distance'.

    Returns:
        pandas.DataFrame: Distance matrix with cumulative distances along known routes.
    """
    # Create a pivot table to represent the distance matrix
    distance_matrix = df.pivot(index='id_start', columns='id_end', values='distance').fillna(0)

    # Make the matrix symmetric by adding its transpose
    distance_matrix = distance_matrix.add(distance_matrix.T, fill_value=0)

    # Calculate cumulative distances along known routes
    distance_matrix = distance_matrix.cumsum(axis=1)

    # Set diagonal values to 0
    distance_matrix.values[[range(len(distance_matrix))]*2] = 0

    return distance_matrix

# Example usage:
df = pd.read_csv("/content/dataset-3.csv")
result = calculate_distance_matrix(df)
print(result)


         1001400  1001402  1001404  1001406  1001408  1001410  1001412  \
1001400      0.0      0.0      0.0      0.0      0.0      0.0      0.0   
1001402      0.0      0.0      0.0      0.0      0.0      0.0      0.0   
1001404      0.0      0.0      0.0      0.0      0.0      0.0      0.0   
1001406      0.0      0.0      0.0      0.0      0.0      0.0      0.0   
1001408      0.0      0.0      0.0      0.0      0.0      0.0      0.0   
1001410      0.0      0.0      0.0      0.0      0.0      0.0      0.0   
1001412      0.0      0.0      0.0      0.0      0.0      0.0      0.0   
1001414      0.0      0.0      0.0      0.0      0.0      0.0      0.0   
1001416      0.0      0.0      0.0      0.0      0.0      0.0      0.0   
1001418      0.0      0.0      0.0      0.0      0.0      0.0      0.0   
1001420      0.0      0.0      0.0      0.0      0.0      0.0      0.0   
1001422      0.0      0.0      0.0      0.0      0.0      0.0      0.0   
1001424      0.0      0.0      0.0    

Question 2: Car Type Count Calculation


In [None]:
import pandas as pd

def unroll_distance_matrix(df: pd.DataFrame) -> pd.DataFrame:
    """
    Unroll a distance matrix to a DataFrame in the style of the initial dataset.

    Args:
        df (pandas.DataFrame): Input DataFrame representing a distance matrix.

    Returns:
        pandas.DataFrame: Unrolled DataFrame containing columns 'id_start', 'id_end', and 'distance'.
    """
    # Extract unique id_start and id_end values
    unique_ids = pd.unique(df[['id_start', 'id_end']].values.ravel('K'))

    # Create an empty DataFrame to store unrolled data
    unrolled_df = pd.DataFrame(columns=['id_start', 'id_end', 'distance'])

    # Iterate over unique id_start and id_end pairs
    for start_id in unique_ids:
        for end_id in unique_ids:
            # Skip entries where id_start is equal to id_end
            if start_id != end_id:
                # Extract distance for the pair if available, otherwise set to NaN
                distance_value = df.loc[(df['id_start'] == start_id) & (df['id_end'] == end_id), 'distance'].values
                distance_value = distance_value[0] if len(distance_value) > 0 else None

                # Append the data to the unrolled DataFrame
                unrolled_df = unrolled_df.append({'id_start': start_id, 'id_end': end_id, 'distance': distance_value}, ignore_index=True)

    return unrolled_df

# Example usage:
# Assuming df is the DataFrame from Question 1
result_df = unroll_distance_matrix(df)
print(result_df)


Question 3: Finding IDs within Percentage Threshold

In [None]:
import pandas as pd

def find_ids_within_ten_percentage_threshold(df: pd.DataFrame, reference_id: int) -> pd.DataFrame:
    """
    Find all IDs whose average distance lies within 10% of the average distance of the reference ID.

    Args:
        df (pandas.DataFrame): Input DataFrame with columns 'id_start', 'id_end', and 'distance'.
        reference_id (int): Reference ID for calculating the average distance.

    Returns:
        pandas.DataFrame: DataFrame with IDs whose average distance is within the specified percentage threshold
                          of the reference ID's average distance.
    """
    # Filter rows with the reference_id as id_start
    reference_data = df[df['id_start'] == reference_id]

    # Calculate the average distance for the reference_id
    reference_avg_distance = reference_data['distance'].mean()

    # Calculate the threshold values
    lower_threshold = reference_avg_distance - 0.1 * reference_avg_distance
    upper_threshold = reference_avg_distance + 0.1 * reference_avg_distance

    # Filter IDs within the 10% threshold
    result_df = df[(df['id_start'] != reference_id) &
                   (df['distance'] >= lower_threshold) &
                   (df['distance'] <= upper_threshold)]

    # Sort the result by id_start
    result_df = result_df.sort_values(by='id_start').reset_index(drop=True)

    return result_df

# Example usage:
# df_distance_matrix = calculate_distance_matrix(df)
# unrolled_result = unroll_distance_matrix(df_distance_matrix)
# result_within_threshold = find_ids_within_ten_percentage_threshold(unrolled_result, reference_id)


Question 4: Calculate Toll Rate

In [None]:
import pandas as pd

def calculate_toll_rate(df: pd.DataFrame) -> pd.DataFrame:
    """
    Calculate toll rates for each vehicle type based on the unrolled DataFrame.

    Args:
        df (pandas.DataFrame): Input DataFrame with columns 'id_start', 'id_end', 'distance'.

    Returns:
        pandas.DataFrame: DataFrame with toll rates for each vehicle type.
    """
    # Define rate coefficients for each vehicle type
    rate_coefficients = {'moto': 0.8, 'car': 1.2, 'rv': 1.5, 'bus': 2.2, 'truck': 3.6}

    # Calculate toll rates for each vehicle type
    for vehicle_type, rate_coefficient in rate_coefficients.items():
        df[vehicle_type] = df['distance'] * rate_coefficient

    return df

# Example usage:
# df_distance_matrix = calculate_distance_matrix(df)
# unrolled_result = unroll_distance_matrix(df_distance_matrix)
# df_with_toll_rates = calculate_toll_rate(unrolled_result)


Question 5: Calculate Time-Based Toll Rates

In [None]:
from datetime import time
import pandas as pd

def calculate_time_based_toll_rates(df: pd.DataFrame) -> pd.DataFrame:
    """
    Calculate time-based toll rates for each vehicle type based on the unrolled DataFrame.

    Args:
        df (pandas.DataFrame): Input DataFrame with columns 'id_start', 'id_end', 'distance'.

    Returns:
        pandas.DataFrame: DataFrame with time-based toll rates for each vehicle type.
    """
    # Define time ranges and discount factors
    time_ranges = [(time(0, 0, 0), time(10, 0, 0)), (time(10, 0, 0), time(18, 0, 0)), (time(18, 0, 0), time(23, 59, 59))]
    weekday_discount_factors = [0.8, 1.2, 0.8]
    weekend_discount_factor = 0.7

    # Create new columns for time-based toll rates
    df['start_day'] = df['end_day'] = df['start_time'] = df['end_time'] = None

    # Iterate over each time range and apply discount factors
    for start_time, end_time in time_ranges:
        # Apply discount factors for weekdays
        df_weekday = df[df['start_day'].isin(['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday'])]
        df_weekday.loc[(df_weekday['start_time'] >= start_time) & (df_weekday['start_time'] < end_time), ['start_day', 'end_day', 'start_time', 'end_time']] = ['Monday', 'Sunday', start_time, end_time]
        df_weekday[['moto', 'car', 'rv', 'bus', 'truck']] *= weekday_discount_factors[time_ranges.index((start_time, end_time))]

        # Apply constant discount factor for weekends
        df_weekend = df[df['start_day'].isin(['Saturday', 'Sunday'])]
        df_weekend.loc[:, ['start_day', 'end_day', 'start_time', 'end_time']] = ['Monday', 'Sunday', start_time, end_time]
        df_weekend[['moto', 'car', 'rv', 'bus', 'truck']] *= weekend_discount_factor

    # Combine the results
    df_result = pd.concat([df_weekday, df_weekend])

    return df_result

# Example usage:
# df_distance_matrix = calculate_distance_matrix(df)
# unrolled_result = unroll_distance_matrix(df_distance_matrix)
# df_with_time_based_toll_rates = calculate_time_based_toll_rates(unrolled_result)
