<a href="https://colab.research.google.com/github/ynakenya/dataset/blob/main/Carbon_Credit_Dataset_Generator.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import datetime

def generate_carbon_credit_dataset(start_date, end_date, num_motorcycles, distance_km, petrol_emission_factor_kg_per_km):
    """
    Generates a dataset for calculating carbon credits based on electric motorcycle usage.

    Args:
        start_date (str): The start date for the dataset (YYYY-MM-DD).
        end_date (str): The end date for the dataset (YYYY-MM-DD).
        num_motorcycles (int): The number of electric motorcycles.
        distance_km (int): The distance traveled by each motorcycle per day (km).
        petrol_emission_factor_kg_per_km (float): The baseline emission factor for a petrol motorcycle (kg CO2/km).

    Returns:
        pandas.DataFrame: A DataFrame containing the generated dataset.  Returns empty dataframe on error.
    """
    try:
        # Convert start and end dates to datetime objects
        start_date = datetime.datetime.strptime(start_date, '%Y-%m-%d').date()
        end_date = datetime.datetime.strptime(end_date, '%Y-%m-%d').date()
    except ValueError:
        print("Error: Invalid date format. Please use YYYY-MM-DD.")
        return pd.DataFrame()  # Return an empty DataFrame on error

    # Calculate the number of days
    num_days = (end_date - start_date).days + 1

    # Create an empty list to store the data
    data = []

    # Generate data for each motorcycle and each day
    for i in range(1, num_motorcycles + 1):
        motorcycle_id = f"MC{i:03d}"  # Format motorcycle ID (e.g., MC001, MC002, ...)
        current_date = start_date
        for _ in range(num_days):
            avoided_co2_kg = distance_km * petrol_emission_factor_kg_per_km
            data.append({
                'date': current_date,
                'motorcycle_id': motorcycle_id,
                'distance_km': distance_km,
                'petrol_emission_factor_kg_per_km': petrol_emission_factor_kg_per_km,
                'avoided_co2_kg': avoided_co2_kg
            })
            current_date += datetime.timedelta(days=1)

    # Create a pandas DataFrame from the list of data
    df = pd.DataFrame(data)
    return df

def main():
    """
    Main function to generate the carbon credit dataset and save it to a CSV file.
    """
    start_date = '2023-01-01'
    end_date = '2025-05-04'
    num_motorcycles = 120
    distance_km = 100
    petrol_emission_factor_kg_per_km = 0.07

    # Generate the dataset
    df = generate_carbon_credit_dataset(start_date, end_date, num_motorcycles, distance_km, petrol_emission_factor_kg_per_km)

    # Check if the dataframe is empty before saving.
    if df.empty:
        print("Error: Dataset generation failed.  No CSV file will be saved.")
        return

    # Save the dataset to a CSV file
    csv_file_path = 'carbon_credit_dataset.csv'
    df.to_csv(csv_file_path, index=False)  # index=False prevents writing the DataFrame index to the CSV
    print(f"Dataset successfully generated and saved to {csv_file_path}")

if __name__ == "__main__":
    main()