In [None]:
import pandas as pd

# Load the dataset with the appropriate encoding to avoid errors
file_path = '/content/Islamic calendar.csv'
df = pd.read_csv(file_path,encoding='latin-1')
df.head()

Unnamed: 0.1,Unnamed: 0,Event,Day
0,06-Jan-23,Fasting Ayyamul Bidh,Friday
1,07-Jan-23,Fasting Ayyamul Bidh,Saturday
2,08-Jan-23,Fasting Ayyamul Bidh,Sunday
3,23-Jan-23,Start of Rajab (Sacred month),Monday
4,04-Feb-23,Fasting Ayyamul Bidh,Saturday


In [None]:
# Rename columns for clarity
df.columns = ['Date', 'Event', 'Day']

In [None]:
# Convert the 'Date' column to a proper datetime format, then format it to a short date (YYYY-MM-DD)
df['Date'] = pd.to_datetime(df['Date'], format='%d-%b-%y', errors='coerce')


In [None]:
df

Unnamed: 0,Date,Event,Day
0,2023-01-06,Fasting Ayyamul Bidh,Friday
1,2023-01-07,Fasting Ayyamul Bidh,Saturday
2,2023-01-08,Fasting Ayyamul Bidh,Sunday
3,2023-01-23,Start of Rajab (Sacred month),Monday
4,2023-02-04,Fasting Ayyamul Bidh,Saturday
...,...,...,...
198,2025-11-22,Start of Jumada al-Akhirah,Saturday
199,2025-12-04,Fasting Ayyamul Bidh,Thursday
200,2025-12-05,Fasting Ayyamul Bidh,Friday
201,2025-12-06,Fasting Ayyamul Bidh,Saturday


In [None]:
#from google.colab import files

#df.to_csv('ISC_df.csv', encoding='utf-8', index=False)
#files.download('ISC_df.csv')


In [None]:
# Step 1: Convert start and end dates to datetime objects and generate a date range
start_date = pd.to_datetime('2023-11-06')  # Start of the prediction period
end_date = pd.to_datetime('2024-08-26')    # End of the prediction period
prediction_dates = pd.date_range(start=start_date, end=end_date, freq='D')  # Generate all dates between start and end with daily frequency

# Step 2: This function will calculate aggregated features (like number of pickups in the past) for each prediction date
def create_aggregated_features(df, prediction_date):
    windows = [7, 14, 30]  # Define the time windows of interest (past 7 days, 14 days, and 30 days)
    features = {}  # Initialize an empty dictionary to store the features for each window

    # Loop over each time window to calculate features
    for window in windows:
        # Define the start and end of the time window
        time_window_start = prediction_date - pd.Timedelta(days=window)  # Start of the time window (past 'window' days)
        time_window_end = prediction_date - pd.Timedelta(days=1)  # End of the time window (day before the prediction date)

        # Filter the DataFrame to only include rows within the time window
        df_pickups_past = df[(df['Date'] >= time_window_start) & (df['Date'] < time_window_end)]

        # Count the number of pickups (or events) that occurred within the time window
        features[f'pickups_past_{window}_days'] = df_pickups_past.shape[0]  # Store the count of pickups in the features dictionary

    return features  # Return the dictionary of features for the given prediction date

# Step 3: Loop over each prediction date and generate the features
aggregated_features_list = []  # Initialize an empty list to store the feature dictionaries for each date

# Step 4: For each prediction date, generate the features and append them to the list
for prediction_date in prediction_dates:
    features = create_aggregated_features(df, prediction_date)  # Generate features for the current prediction date
    features['prediction_date'] = prediction_date  # Add the prediction date to the features
    aggregated_features_list.append(features)  # Append the feature dictionary to the list

# Step 5: Convert the list of dictionaries into a DataFrame for easy analysis
isc_date = pd.DataFrame(aggregated_features_list)  # Create a DataFrame from the list of features



In [None]:
isc_date

Unnamed: 0,pickups_past_7_days,pickups_past_14_days,pickups_past_30_days,prediction_date
0,1,3,4,2023-11-06
1,0,3,4,2023-11-07
2,0,3,4,2023-11-08
3,0,3,4,2023-11-09
4,0,3,4,2023-11-10
...,...,...,...,...
290,3,3,4,2024-08-22
291,3,3,4,2024-08-23
292,3,3,4,2024-08-24
293,3,3,4,2024-08-25


In [None]:
import pandas as pd

# Step 4: Define the time-range for prediction
class PredictionWindow:
    def __init__(self, start, end, frequency='D'):
        self.start_date = pd.to_datetime(start)
        self.end_date = pd.to_datetime(end)
        self.prediction_dates = pd.date_range(start=self.start_date, end=self.end_date, freq=frequency)

# Step 5: Class to handle feature aggregation based on time windows
class FeatureAggregator:
    def __init__(self, dataframe, windows=None):
        self.df = dataframe
        self.windows = windows if windows else [7, 14, 30]

    def aggregate(self, prediction_date):
        features = {}
        for window in self.windows:
            time_window_start = prediction_date - pd.Timedelta(days=window)
            time_window_end = prediction_date - pd.Timedelta(days=1)

            df_filtered = self.df[(self.df['Date'] >= time_window_start) & (self.df['Date'] < time_window_end)]
            features[f'pickups_past_{window}_days'] = df_filtered.shape[0]

        return features

# Example usage for creating features for each prediction date
def create_features_for_predictions(df, start, end):
    prediction_window = PredictionWindow(start, end)
    aggregator = FeatureAggregator(df)

    aggregated_features = []
    for date in prediction_window.prediction_dates:
        features = aggregator.aggregate(date)
        features['prediction_date'] = date
        aggregated_features.append(features)

    return pd.DataFrame(aggregated_features)

# Call the function to generate the DataFrame with features
aggregated_features_df = create_features_for_predictions(df, '2023-11-06', '2024-08-26')




In [None]:
aggregated_features_df

Unnamed: 0,pickups_past_7_days,pickups_past_14_days,pickups_past_30_days,prediction_date
0,1,3,4,2023-11-06
1,0,3,4,2023-11-07
2,0,3,4,2023-11-08
3,0,3,4,2023-11-09
4,0,3,4,2023-11-10
...,...,...,...,...
290,3,3,4,2024-08-22
291,3,3,4,2024-08-23
292,3,3,4,2024-08-24
293,3,3,4,2024-08-25
