In [None]:
import pandas as pd

# Provide the full path of your file
# file_path = r"E:\Study Materials\4.1\AI\AI Lab\Dataset\FuelConsumptionCo2.csv"

# Load the CSV file into a DataFrame
df = pd.read_csv('FuelConsumptionCo2.csv')

In [None]:
df.info()

In [None]:
df.describe()

In [None]:
df.head()

In [None]:
# Checking for missing values in the dataset
missing_values = df.isnull().sum()

# Display missing values
missing_values

In [None]:
df2 = df.copy()

In [None]:
df2.columns

In [None]:
from sklearn.preprocessing import LabelEncoder, StandardScaler

# Assume df2 is your DataFrame
binary_columns = ['MAKE', 'MODEL', 'VEHICLECLASS', 'TRANSMISSION', 'FUELTYPE']
encoder = LabelEncoder()

for column in binary_columns:
    df2[column] = encoder.fit_transform(df2[column])


In [None]:
df2.head()

In [None]:
df2.shape

In [None]:
# Scale numeric features
scaler = StandardScaler()
numeric_features = ['ENGINESIZE', 'CYLINDERS', 'FUELCONSUMPTION_CITY', 'FUELCONSUMPTION_HWY', 'FUELCONSUMPTION_COMB','FUELCONSUMPTION_COMB_MPG','CO2EMISSIONS']
df2[numeric_features] = scaler.fit_transform(df2[numeric_features])

In [None]:
df2.head()

In [None]:
from sklearn.model_selection import train_test_split

# Split the data into features and target variable
X = df2.drop('CO2EMISSIONS', axis=1)
y = df2['CO2EMISSIONS']

In [None]:
X.head()

In [None]:
y.head()

In [None]:
# Split into training and testing sets (80% training, 20% testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, r2_score, mean_squared_error

# Create a linear regression model
model = LinearRegression()

In [None]:
# Train the model on the training data
model.fit(X_train, y_train)

In [None]:
# Make predictions on the test set
y_pred = model.predict(X_test)

In [None]:
# Evaluate the model's performance
mae = mean_absolute_error(y_test, y_pred)
r_squared = r2_score(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
(mae, mse, r_squared)

In [None]:
data = {"Actual":y_test,"Predicted":y_pred}
data = pd.DataFrame(data)

In [None]:
data.head()

In [None]:
# Extract the coefficients and create a DataFrame for interpretation
coefficients = pd.DataFrame({
    'Feature': X.columns,
    'Coefficient': model.coef_
}).sort_values(by='Coefficient', ascending=False)

# Display the coefficients
coefficients


In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

# Calculate the correlation matrix
correlation_matrix = df2.corr()

# Plot the heatmap
plt.figure(figsize=(12, 8))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', fmt=".2f")
plt.title("Correlation Matrix")
plt.show()