# Automobile Linear Regression
## Importing the libraries

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

## Importing the Automobile dataset and display the first few rows

In [13]:
dataset = pd.read_csv('Automobile.csv')
print(dataset.head())

     price  highway-mpg  city-mpg  peak-rpm  horsepower  compression-ratio  \
0  13495.0           27        21    5000.0       111.0                9.0   
1  16500.0           27        21    5000.0       111.0                9.0   
2  16500.0           26        19    5000.0       154.0                9.0   
3  13950.0           30        24    5500.0       102.0               10.0   
4  17450.0           22        18    5500.0       115.0                8.0   

   stroke  bore fuel-system  engine-size  ...  wheel-base engine-location  \
0    2.68  3.47        mpfi          130  ...        88.6           front   
1    2.68  3.47        mpfi          130  ...        88.6           front   
2    3.47  2.68        mpfi          152  ...        94.5           front   
3    3.40  3.19        mpfi          109  ...        99.8           front   
4    3.40  3.19        mpfi          136  ...        99.4           front   

   drive-wheels   body-style  num-of-doors  aspiration  fuel-type  \

# Check empty data and delete its empty row

In [14]:
print(dataset.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 205 entries, 0 to 204
Data columns (total 26 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   price              201 non-null    float64
 1   highway-mpg        205 non-null    int64  
 2   city-mpg           205 non-null    int64  
 3   peak-rpm           203 non-null    float64
 4   horsepower         203 non-null    float64
 5   compression-ratio  205 non-null    float64
 6   stroke             201 non-null    float64
 7   bore               201 non-null    float64
 8   fuel-system        205 non-null    object 
 9   engine-size        205 non-null    int64  
 10  num-of-cylinders   205 non-null    int64  
 11  engine-type        205 non-null    object 
 12  curb-weight        205 non-null    int64  
 13  height             205 non-null    float64
 14  width              205 non-null    float64
 15  length             205 non-null    float64
 16  wheel-base         205 non

In [15]:
print(dataset.isnull().sum())

price                 4
highway-mpg           0
city-mpg              0
peak-rpm              2
horsepower            2
compression-ratio     0
stroke                4
bore                  4
fuel-system           0
engine-size           0
num-of-cylinders      0
engine-type           0
curb-weight           0
height                0
width                 0
length                0
wheel-base            0
engine-location       0
drive-wheels          0
body-style            0
num-of-doors          2
aspiration            0
fuel-type             0
make                  0
normalized-losses    41
symboling             0
dtype: int64


In [16]:
# Hapus baris yang kosong
dataset.dropna(subset = ['price'], inplace=True)
dataset.dropna(subset = ['peak-rpm'], inplace=True)
dataset.dropna(subset = ['horsepower'], inplace=True)
dataset.dropna(subset = ['bore'], inplace=True)
dataset.dropna(subset = ['num-of-doors'], inplace=True)
print(dataset.info())

<class 'pandas.core.frame.DataFrame'>
Index: 193 entries, 0 to 204
Data columns (total 26 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   price              193 non-null    float64
 1   highway-mpg        193 non-null    int64  
 2   city-mpg           193 non-null    int64  
 3   peak-rpm           193 non-null    float64
 4   horsepower         193 non-null    float64
 5   compression-ratio  193 non-null    float64
 6   stroke             193 non-null    float64
 7   bore               193 non-null    float64
 8   fuel-system        193 non-null    object 
 9   engine-size        193 non-null    int64  
 10  num-of-cylinders   193 non-null    int64  
 11  engine-type        193 non-null    object 
 12  curb-weight        193 non-null    int64  
 13  height             193 non-null    float64
 14  width              193 non-null    float64
 15  length             193 non-null    float64
 16  wheel-base         193 non-null

# Feature Selection

In [19]:
# Hapus kolom yang tidak diperlukan untuk feature dan tentukan symboling sebagai target
columns_to_drop = ["normalized-losses", "symboling"]
X = dataset.drop(columns=columns_to_drop)  # Features
y = dataset["symboling"] # Target

# Pilih kolom yang bertipe numerik saja
X_numeric = X.select_dtypes(include=['float64', 'int64'])

## Splitting the dataset into the Training set and Test set (70:30)

In [20]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X_numeric, y, test_size=0.3)

## Training the Multiple Linear Regression model on the Training set

In [21]:
from sklearn.linear_model import LinearRegression
model = LinearRegression().fit(X_train, y_train)

## Predicting the Test set results

In [22]:
y_pred = model.predict(X_test)

## Evaluate the model's performance

In [23]:
from sklearn.metrics import mean_squared_error, r2_score
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

# Tampilkan hasil evaluasi
print(f"Mean Squared Error: {mse}")
print(f"R-squared: {r2}")

Mean Squared Error: 0.6463957774823057
R-squared: 0.5743833635837783


## Mean Squared Error (MSE)
Mean Squared Error (MSE) adalah salah satu metrik yang digunakan untuk mengevaluasi kualitas model regresi. MSE mengukur rata-rata kuadrat selisih antara nilai prediksi dan nilai aktual. Semakin kecil nilai MSE, semakin baik model dalam memprediksi data.

## R-squared (Koefisien Determinasi)
R-squared adalah metrik evaluasi yang mengukur seberapa baik model regresi menjelaskan variabilitas dalam data target. Nilai R-squared berkisar antara 0 dan 1. Jika R-squared = 1, model memprediksi semua data dengan sempurna. Jika R-squared = 0, model tidak dapat menjelaskan variabilitas data.