<a href="https://colab.research.google.com/github/ynakenya/dataset/blob/main/AI_Model_Training_for_Carbon_Credits.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import numpy as np

def train_carbon_credit_model(data):
    """
    Trains a machine learning model to predict avoided CO2 emissions (carbon credits)
    based on motorcycle trip data.  It assumes the input data includes columns
    'trip_distance_1_km' through 'trip_distance_10_km', 'total_distance_km',
    'petrol_emission_factor_kg_per_km', and 'avoided_co2_kg'.

    Args:
        data (str): A string containing the data in CSV format,
                      including the header row.

    Returns:
        tuple: A tuple containing:
            - model: The trained machine learning model.
            - X_test: The test features.
            - y_test: The test target values.
            - predictions: The model's predictions on the test set.
            Returns (None, None, None, None) if data is invalid.
    """
    try:
        # Load the data from the string
        df = pd.read_csv(io.StringIO(data))  # Use io.StringIO to treat the string as a file
    except Exception as e:
        print(f"Error: Could not read data.  Check the format.  Error details: {e}")
        return None, None, None, None

    # 1. Data Preprocessing
    # Select features (independent variables) and target (dependent variable)
    features = [
        'trip_distance_1_km', 'trip_distance_2_km', 'trip_distance_3_km',
        'trip_distance_4_km', 'trip_distance_5_km', 'trip_distance_6_km',
        'trip_distance_7_km', 'trip_distance_8_km', 'trip_distance_9_km',
        'trip_distance_10_km', 'total_distance_km', 'petrol_emission_factor_kg_per_km'
    ]
    target = 'avoided_co2_kg'

    # Check if all required columns are present
    if not all(col in df.columns for col in features + [target]):
        print("Error: Input data is missing required columns.")
        return None, None, None, None

    X = df[features]
    y = df[target]

    # 2. Split Data into Training and Testing Sets
    #   This is crucial to evaluate how well the model generalizes to unseen data.
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)  # 80% train, 20% test,

    # 3. Model Selection and Training
    #   Here, we'll start with a simple Linear Regression model.  You can replace this
    #   with more complex models like RandomForestRegressor, GradientBoostingRegressor,
    #   or neural networks (e.g., using TensorFlow or PyTorch) for potentially better
    #   performance, especially if the relationship between features and target is non-linear.
    model = LinearRegression()  #  You can change this line to use a different model
    model.fit(X_train, y_train)  # Train the model

    # 4. Model Evaluation
    #   Evaluate the model's performance on the *test* set.  This gives an unbiased
    #   estimate of how well the model will perform on new, unseen data.
    predictions = model.predict(X_test)
    mse = mean_squared_error(y_test, predictions)
    r2 = r2_score(y_test, predictions)

    print("Model Evaluation:")
    print(f"Mean Squared Error: {mse:.2f}")
    print(f"R-squared: {r2:.2f}")  #  R-squared measures the proportion of variance explained by the model

    # 5. Feature Importance (for Linear Regression)
    #    Linear regression provides coefficients that indicate feature importance.  This part of the
    #    code is specific to Linear Regression.  Other models have different ways to access
    #    feature importance.
    if isinstance(model, LinearRegression):
        feature_importance = pd.DataFrame({
            'Feature': features,
            'Coefficient': model.coef_
        })
        feature_importance['AbsCoefficient'] = np.abs(feature_importance['Coefficient'])
        feature_importance = feature_importance.sort_values('AbsCoefficient', ascending=False)
        print("\nFeature Importance (Linear Regression):")
        print(feature_importance)

    return model, X_test, y_test, predictions


def main():
    """
    Main function to load the data, train the model, and print the results.
    """
    # Load the data (replace with your actual data loading)
    data = """date,motorcycle_id,trip_distance_1_km,trip_distance_2_km,trip_distance_3_km,trip_distance_4_km,trip_distance_5_km,trip_distance_6_km,trip_distance_7_km,trip_distance_8_km,trip_distance_9_km,trip_distance_10_km,total_distance_km,petrol_emission_factor_kg_per_km,avoided_co2_kg
2023-01-01,MC001,2.5,1.0,3.0,5.0,10.0,15.0,20.0,20.0,13.5,10.0,100.0,0.07,7.0
2023-01-01,MC002,5.0,5.0,10.0,10.0,15.0,15.0,15.0,10.0,10.0,5.0,100.0,0.07,7.0
2023-01-01,MC003,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,100.0,0.07,7.0
2023-01-01,MC004,15.0,12.0,10.0,8.0,15.0,10.0,10.0,10.0,5.0,5.0,100.0,0.07,7.0
2023-01-01,MC005,20.0,18.0,15.0,12.0,10.0,8.0,7.0,5.0,3.0,2.0,100.0,0.07,7.0
2023-01-01,MC006,2.0,4.0,6.0,8.0,10.0,12.0,14.0,16.0,18.0,10.0,100.0,0.07,7.0
2023-01-01,MC007,7.0,14.0,21.0,7.0,14.0,7.0,7.0,7.0,7.0,5.0,100.0,0.07,7.0
2023-01-01,MC008,10.0,20.0,30.0,10.0,10.0,5.0,5.0,5.0,2.5,2.5,100.0,0.07,7.0
2023-01-01,MC009,5.0,10.0,15.0,20.0,25.0,10.0,5.0,5.0,2.5,2.5,100.0,0.07,7.0
2023-01-01,MC010,1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0,55.0,100.0,0.07,7.0
2023-01-01,MC011,2.5,5.0,7.5,10.0,12.5,15.0,17.5,20.0,5.0,5.0,100.0,0.07,7.0
2023-01-01,MC012,8.0,16.0,8.0,16.0,8.0,16.0,8.0,8.0,8.0,4.0,100.0,0.07,7.0
2023-01-01,MC013,3.0,6.0,9.0,12.0,15.0,18.0,12.0,10.0,8.0,7.0,100.0,0.07,7.0
2023-01-01,MC014,6.0,12.0,18.0,24.0,10.0,10.0,8.0,6.0,4.0,2.0,100.0,0.07,7.0
2023-01-01,MC015,9.0,18.0,27.0,10.0,10.0,8.0,6.0,4.0,4.0,4.0,100.0,0.07,7.0
2023-01-01,MC016,4.0,8.0,12.0,16.0,20.0,10.0,10.0,8.0,6.0,6.0,100.0,0.07,7.0
2023-01-01,MC017,7.0,14.0,21.0,28.0,10.0,8.0,6.0,4.0,1.0,1.0,100.0,0.07,7.0
2023-01-01,MC018,10.0,20.0,30.0,40.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.07,7.0
2023-01-01,MC019,2.0,4.0,6.0,8.0,10.0,12.0,14.0,16.0,18.0,10.0,100.0,0.07,7.0
2023-01-01,MC020,5.0,10.0,15.0,20.0,25.0,10.0,5.0,5.0,2.5,2.5,100.0,0.07,7.0
2023-01-01,MC021,1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0,55.0,100.0,0.07,7.0
2023-01-01,MC022,2.5,5.0,7.5,10.0,12.5,15.0,17.5,20.0,5.0,5.0,100.0,0.07,7.0
2023-01-01,MC023,8.0,16.0,8.0,16.0,8.0,16.0,8.0,8.0,8.0,4.0,100.0,0.07,7.0
2023-01-01,MC024,3.0,6.0,9.0,12.0,15.0,18.0,12.0,10.0,8.0,7.0,100.0,0.07,7.0
2023-01-01,MC025,6.0,12.0,18.0,24.0,10.0,10.0,8.0,6.0,4.0,2.0,100.0,0.07,7.0
2023-01-01,MC026,9.0,18.0,27.0,10.0,10.0,8.0,6.0,4.0,4.0,4.0,100.0,0.07,7.0
2023-01-01,MC027,4.0,8.0,12.0,16.0,20.0,10.0,10.0,8.0,6.0,6.0,100.0,0.07,7.0
2023-01-01,MC028,7.0,14.0,21.0,28.0,10.0,8.0,6.0,4.0,1.0,1.0,100.0,0.07,7.0
2023-01-01,MC029,10.0,20.0,30.0,40.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.07,7.0
2023-01-01,MC030,2.0,4.0,6.0,8.0,10.0,12.0,14.0,16.0,18.0,10.0,100.0,0.07,7.0
2023-01-01,MC031,2.5,1.0,3.0,5.0,10.0,15.0,20.0,20.0,13.5,10.0,100.0,0.07,7.0
2023-01-01,MC032,5.0,5.0,10.0,10.0,15.0,15.0,15.0,10.0,10.0,5.0,100.0,0.07,7.0
2023-01-01,MC033,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,100.0,0.07,7.0
2023-01-01,MC034,15.0,12.0,10.0,8.0,15.0,10.0,10.0,10.0,5.0,5.0,100.0,0.07,7.0
2023-01-01,MC035,20.0,18.0,15.0,12.0,10.0,8.0,7.0,5.0,3.0,2.0,100.0,0.07,7.0
2023-01-01,MC036,2.0,4.0,6.0,8.0,10.0,12.0,14.0,16.0,18.0,10.0,100.0,0.07,7.0
2023-01-01,MC037,7.0,14.0,21.0,7.0,14.0,7.0,7.0,7.0,7.0,5.0,100.0,0.07,7.0
2023-01-01,MC038,10.0,20.0,30.0,10.0,10.0,5.0,5.0,5.0,2.5,2.5,100.0,0.07,7.0
2023-01-01,MC039,5.0,10.0,15.0,20.0,25.0,10.0,5.0,5.0,2.5,2.5,100.0,0.07,7.0
2023-01-01,MC040,1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0,55.0,100.0,0.07,7.0
2023-01-01,MC041,2.5,5.0,7.5,10.0,12.5,15.0,17.5,20.0,5.0,5.0,100.0,0.07,7.0
2023-01-01,MC042,8.0,16.0,8.0,16.0,8.0,16.0,8.0,8.0,8.0,4.0,100.0,0.07,7.0
2023-01-01,MC043,3.0,6.0,9.0,12.0,15.0,18.0,12.0,10.0,8.0,7.0,100.0,0.07,7.0
2023-01-01,MC044,6.0,12.0,18.0,24.0,10.0,10.0,8.0,6.0,4.0,2.0,100.0,0.07,7.0
2023-01-01,MC045,9.0,18.0,27.0,10.0,10.0,8.0,6.0,4.0,4.0,4.0,100.0,0.07,7.0
2023-01-01,MC046,4.0,8.0,12.0,16.0,20.0,10.0,10.0,8.0,6.0,6.0,100.0,0.07,7.0
2023-01-01,MC047,7.0,14.0,21.0,28.0,10.0,8.0,6.0,4.0,1.0,1.0,100.0,0.07,7.0
2023-01-01,MC048,10.0,20.0,30.0,40.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.07,7.0
2023-01-01,MC049,2.0,4.0,6.0,8.0,10.0,12.0,14.0,16.0,18.0,10.0,100.0,0.07,7.0
2023-01-01,MC050,5.0,10.0,15.0,20.0,25.0,10.0,5.0,5.0,2.5,2.5,100.0,0.07,7.0
2023-01-01,MC051,2.5,1.0,3.0,5.0,10.0,15.0,20.0,20.0,13.5,10.0,100.0,0.07,7.0
2023-01-01,MC052,5.0,5.0,10.0,10.0,15.0,15.0,15.0,10.0,10.0,5.0,100.0,0.07,7.0
2023-01-01,MC053,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,100.0,0.07,7.0
2023-01-01,MC054,15.0,12.0,10.0,8.0,15.0,10.0,10.0,10.0,5.0,5.0,100.0,0.07,7.0
2023-01-01,MC055,20.0,18.0,15.0,12.0,10.0,8.0,7.0,5.0,3.0,2.0,100.0,0.07,7.0
2023-01-01,MC056,2.0,4.0,6.0,8.0,10.0,12.0,14.0,16.0,18.0,10.0,100.0,0.07,7.0
2023-01-01,MC057,7.0,14.0,21.0,7.0,14.0,7.0,7.0,7.0,7.0,5.0,100.0,0.07,7.0
2023-01-01,MC058,10.0,20.0,30.0,10.0,10.0,5.0,5.0,5.0,2.5,2.5,100.0,0.07,7.0
2023-01-01,MC059,5.0,10.0,15.0,20.0,25.0,10.0,5.0,5.0,2.5,2.5,100.0,0.07,7.0
2023-01-01,MC060,1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0,55.0,100.0,0.07,7.0
2023-01-01,MC061,2.5,5.0,7.5,10.0,12.5,15.0,17.5,20.0,5.0,5.0,100.0,0.07,7.0
2023-01-01,MC062,8.0,16.0,8.0,16.0,8.0,16.0,8.0,8.0,8.0,4.0,100.0,0.07,7.0
2023-01-01,MC063,3.0,6.0,9.0,12.0,15.0,18.0,12.0,10.0,8.0,7.0,100.0,0