
<a href="https://www.zero-grad.com/">
         <img alt="Zero Grad" src="https://i.postimg.cc/pTkJnRy0/notebook-cover.png" >
      </a>

In [2]:
import pandas as pd
import plotly.express as px


# Read the data
df = pd.read_csv('FuelConsumption.csv')
df.head()

Unnamed: 0,MODELYEAR,MAKE,MODEL,VEHICLECLASS,ENGINESIZE,CYLINDERS,TRANSMISSION,FUELTYPE,FUELCONSUMPTION_CITY,FUELCONSUMPTION_HWY,FUELCONSUMPTION_COMB,FUELCONSUMPTION_COMB_MPG,CO2EMISSIONS
0,2014,ACURA,ILX,COMPACT,2.0,4,AS5,Z,9.9,6.7,8.5,33,196
1,2014,ACURA,ILX,COMPACT,2.4,4,M6,Z,11.2,7.7,9.6,29,221
2,2014,ACURA,ILX HYBRID,COMPACT,1.5,4,AV7,Z,6.0,5.8,5.9,48,136
3,2014,ACURA,MDX 4WD,SUV - SMALL,3.5,6,AS6,Z,12.7,9.1,11.1,25,255
4,2014,ACURA,RDX AWD,SUV - SMALL,3.5,6,AS6,Z,12.1,8.7,10.6,27,244


In [3]:
df.CYLINDERS.value_counts()

4     420
6     356
8     252
12     17
10      9
5       9
3       4
Name: CYLINDERS, dtype: int64

In [4]:
df.corr()['CO2EMISSIONS'].sort_values(ascending=False)

CO2EMISSIONS                1.000000
FUELCONSUMPTION_CITY        0.898039
FUELCONSUMPTION_COMB        0.892129
ENGINESIZE                  0.874154
FUELCONSUMPTION_HWY         0.861748
CYLINDERS                   0.849685
FUELCONSUMPTION_COMB_MPG   -0.906394
MODELYEAR                        NaN
Name: CO2EMISSIONS, dtype: float64

# Simple Linear Regression

In [5]:
X = df[['ENGINESIZE']]
y = df[['CO2EMISSIONS']]

In [6]:
X.isnull().sum()

ENGINESIZE    0
dtype: int64

In [7]:
X.describe()

Unnamed: 0,ENGINESIZE
count,1067.0
mean,3.346298
std,1.415895
min,1.0
25%,2.0
50%,3.4
75%,4.3
max,8.4


In [8]:
px.histogram(X, width=800, height=500)

In [9]:
X = df[df['ENGINESIZE'] < 8][['ENGINESIZE']]
y = df[df['ENGINESIZE'] < 8][['CO2EMISSIONS']]

In [10]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [11]:
from sklearn.linear_model import LinearRegression
lin_reg = LinearRegression()
lin_reg.fit(X_train, y_train)

print('Intercept: ', lin_reg.intercept_)
print('Coefficient: ', lin_reg.coef_)

Intercept:  [123.73465169]
Coefficient:  [[39.97804374]]


In [12]:
y_pred = lin_reg.predict(X_test)

In [13]:
# Plot the regression line


fig = px.scatter(x=X_test['ENGINESIZE'], y=y_test['CO2EMISSIONS'], title='Engine Size vs CO2 Emission',
                  labels={'x':'Engine Size', 'y':'CO2 Emission'}, width=800, height=600)
fig.add_traces(px.line(x=X_test['ENGINESIZE'], y=y_pred.flatten(), color_discrete_sequence=['red']).data)
fig.show()

In [14]:
# Evaluate the model
y_pred = lin_reg.predict(X_test)

from sklearn.metrics import r2_score, mean_squared_error

print('R2 score: ', r2_score(y_test, y_pred))  
print('Mean Squared Error: ', mean_squared_error(y_test, y_pred))

R2 score:  0.7354400081255801
Mean Squared Error:  1006.5879602110778




<img alt="Zero Grad" src="https://i.postimg.cc/cLGvjHYy/machine-learning.png" >


      

# Multiple Linear Regression

In [15]:
X = df[['ENGINESIZE', 'CYLINDERS', 'FUELCONSUMPTION_COMB']]
y = df[['CO2EMISSIONS']]

In [16]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [17]:
lin_reg2 = LinearRegression()
lin_reg2.fit(X_train, y_train)

print('Intercept: ', lin_reg2.intercept_)
print('Coefficient: ', lin_reg2.coef_)

Intercept:  [69.05220993]
Coefficient:  [[11.98993058  7.54469599  8.98695981]]


In [18]:
# Evaluate the model
y_pred = lin_reg2.predict(X_test)

print('R2 score: ', r2_score(y_test, y_pred))
print('Mean Squared Error: ', mean_squared_error(y_test, y_pred))

R2 score:  0.8604199405627129
Mean Squared Error:  589.2351940837906


# Model Saving

In [19]:
import pickle

# Save the model
pickle.dump(lin_reg2, open('model.pkl', 'wb'))

In [20]:
%%writefile app.py

import streamlit as st
import pickle

# Load the pickled model
with open('model.pkl', 'rb') as f:
    model = pickle.load(f)

# Title
st.title('Predicting the Co2 Emission of a Car')

# Image
st.image('co2.jpg')

# Inputs
engine_size = st.number_input('Engine Size', min_value=0.0, max_value=10.0, value=1.0)
cylinders = st.number_input('Cylinders', min_value=0, max_value=10, value=1)
Fuel_Consumption_Comb = st.number_input('Fuel Consumption Combined', min_value=0.0, max_value=100.0, value=1.0)

# Output
output = model.predict([[engine_size, cylinders, Fuel_Consumption_Comb]])

# Display the output
st.write(' ## The predicted Co2 Emission is: ', round(output[0][0],2))

Writing app.py


In [29]:
def run_app():
    !pip install streamlit==1.13.0 -q
    !npm install localtunnel
    !streamlit run /content/app.py &>/content/logs.txt & 
    !npx localtunnel --port 8501

In [None]:
run_app()