<a href="https://colab.research.google.com/github/youssefjedidi/Aircraft_Noise_Predictor/blob/main/aircraft_noise_predictor_ML_regression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# ML Regression models

## Importing the libraries

In [258]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

## Importing the dataset

In [259]:
dataset = pd.read_csv('aircraft_noise.csv')
X = dataset.iloc[:, 1:-1].values
y = dataset.iloc[:, -1].values

In [260]:
print(X)

[['LAmax' 'A' 1500 'Piston' 'Small' 1]
 ['LAmax' 'A' 1600 'Piston' 'Small' 1]
 ['LAmax' 'A' 27 'Piston' 'Small' 1]
 ...
 ['SEL' 'D' 28000 'Jet' 'Heavy' 4]
 ['EPNL' 'D' 32000 'Jet' 'Heavy' 4]
 ['SEL' 'D' 32000 'Jet' 'Heavy' 4]]


## Encoding categorical data

In [261]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [  1 , 0 , 3 ,4  ])], remainder='passthrough')
X = np.array(ct.fit_transform(X))

In [262]:
print(X)

[[1.0 0.0 0.0 ... 1.0 1500 1]
 [1.0 0.0 0.0 ... 1.0 1600 1]
 [1.0 0.0 0.0 ... 1.0 27 1]
 ...
 [0.0 1.0 0.0 ... 0.0 28000 4]
 [0.0 1.0 1.0 ... 0.0 32000 4]
 [0.0 1.0 0.0 ... 0.0 32000 4]]


## Splitting the dataset into the Training set and Test set

In [263]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)

## Multiple Linear Regression


### Training the Multiple Linear Regression model on the Training set

In [264]:
from sklearn.linear_model import LinearRegression
regressor = LinearRegression()
regressor.fit(X_train, y_train)

### Predicting the Test set results

In [265]:
y_pred = regressor.predict(X_test)
np.set_printoptions(precision=2)
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))

[[70.03 56.7 ]
 [64.99 68.4 ]
 [58.32 56.8 ]
 ...
 [71.37 65.1 ]
 [47.58 41.6 ]
 [54.87 48.  ]]


### Testing accuracy

In [266]:
def mean_absolute_percentage_error(y_true, y_pred):
    return (np.mean(np.abs((y_true - y_pred) / y_true)) * 100)

mape = mean_absolute_percentage_error(y_test, y_pred)

print(f"Mean Absolute Percentage Error (MAPE): {mape:.2f}%")
#print(regressor.predict([[1, 0, 0, 1 ,0 ,0 , 0,0 ,1 , 300]]))

from sklearn.metrics import r2_score

# Assuming y_test contains the actual target values and y_pred contains the predicted values
# Calculate R squared
r2 = r2_score(y_test, y_pred)

# Calculate adjusted R squared
n = len(y_test)  # Number of samples
p = X.shape[1]   # Number of predictors (features) in your model
adj_r2 = 1 - (1 - r2) * (n - 1) / (n - p - 1)

# Print R-squared and adjusted R-squared
print(f"R squared (r2): {r2:.4f}")
print(f"Adjusted R squared (adj_r2): {adj_r2:.4f}")

Mean Absolute Percentage Error (MAPE): 8.74%
R squared (r2): 0.6140
Adjusted R squared (adj_r2): 0.6040


## Polynomial Regression

### Training

In [267]:
from sklearn.preprocessing import PolynomialFeatures
poly_reg = PolynomialFeatures(degree = 3)
X_poly = poly_reg.fit_transform(X_train)
lin_reg_2 = LinearRegression()
lin_reg_2.fit(X_poly, y_train)

### Predecting the Test set Results

In [268]:
y_poly = lin_reg_2.predict(poly_reg.fit_transform(X_test))
np.set_printoptions(precision=2)
print(np.concatenate((y_poly.reshape(len(y_poly),1), y_test.reshape(len(y_test),1)),1))

[[67.4  56.7 ]
 [62.11 68.4 ]
 [54.92 56.8 ]
 ...
 [74.51 65.1 ]
 [46.41 41.6 ]
 [48.07 48.  ]]


### Testing accuracy

In [269]:

mape = mean_absolute_percentage_error(y_test, y_poly)

print(f"Mean Absolute Percentage Error (MAPE): {mape:.2f}%")
#print(lin_reg_2.predict(poly_reg.fit_transform([[1, 0, 0, 1 ,0 ,0 , 0,0 ,1 , 300]])))

from sklearn.metrics import r2_score

# Assuming y_test contains the actual target values and y_pred contains the predicted values
# Calculate R squared
r2_poly = r2_score(y_test, y_poly)

# Calculate adjusted R squared
n = len(y_test)  # Number of samples
p = X.shape[1]   # Number of predictors (features) in your model
adj_r2 = 1 - (1 - r2_poly) * (n - 1) / (n - p - 1)

# Print R-squared and adjusted R-squared
print(f"R squared (r2): {r2_poly:.4f}")
print(f"Adjusted R squared (adj_r2): {adj_r2:.4f}")

Mean Absolute Percentage Error (MAPE): 7.10%
R squared (r2): 0.7061
Adjusted R squared (adj_r2): 0.6985


## Support Vector Regression (SVR)

### Feature Scaling

In [270]:
from sklearn.preprocessing import StandardScaler

# Reshape input data to 2D array
X_reshaped = X[:, 3].reshape(-1, 1)  # Reshape only the fourth column
X_test_reshaped = X_test[:, 3].reshape(-1, 1)

# Initialize StandardScaler for the fourth column
sc_X = StandardScaler()

# Fit and transform the fourth column
X_scaled = X.copy()  # Create a copy of X to keep other columns unchanged
X_scaled[:, -1] = sc_X.fit_transform(X_reshaped).flatten()  # Scale and update the fourth column
X_scaled[:, -2] = sc_X.fit_transform(X_reshaped).flatten()  # Scale and update the fourth column

X_test_scaled = X_test.copy()
X_test_scaled[:, -1] = sc_X.fit_transform(X_test_reshaped).flatten()
X_test_scaled[:, -2] = sc_X.fit_transform(X_test_reshaped).flatten()

# Initialize StandardScaler for the target variable
sc_y = StandardScaler()
y_scaled = sc_y.fit_transform(y.reshape(-1, 1))  # Fit and transform the target variable
y_test_scaled = sc_y.fit_transform(y_test.reshape(-1, 1))

# Now X_scaled and y_scaled are scaled versions of X and y using StandardScaler
print("Scaled X:")
print(X_scaled)
print("\nScaled y:")
print(y_scaled)

Scaled X:
[[1.0 0.0 0.0 ... 1.0 1.7320508075688774 1.7320508075688774]
 [1.0 0.0 0.0 ... 1.0 1.7320508075688774 1.7320508075688774]
 [1.0 0.0 0.0 ... 1.0 1.7320508075688774 1.7320508075688774]
 ...
 [0.0 1.0 0.0 ... 0.0 -0.5773502691896258 -0.5773502691896258]
 [0.0 1.0 1.0 ... 0.0 -0.5773502691896258 -0.5773502691896258]
 [0.0 1.0 0.0 ... 0.0 -0.5773502691896258 -0.5773502691896258]]

Scaled y:
[[-3.06]
 [-2.97]
 [-2.9 ]
 ...
 [ 3.36]
 [ 3.47]
 [ 3.5 ]]


### Training the SVR model

In [271]:
from sklearn.svm import SVR
sv_regressor = SVR(kernel = 'rbf')
sv_regressor.fit(X_scaled, y_scaled)

  y = column_or_1d(y, warn=True)


### Predicting

In [272]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [273]:
# Perform predictions using the SVR model on the scaled test features
y_svr_scaled = sv_regressor.predict(X_test_scaled)

# Inverse transform the scaled predictions to get them back to the original scale
y_svr = sc_y.inverse_transform(y_svr_scaled.reshape(-1, 1))

np.set_printoptions(precision=2)
print(np.concatenate((y_svr.reshape(len(y_svr),1), y_test.reshape(len(y_test),1)),1))

[[71.86 56.7 ]
 [59.04 68.4 ]
 [54.5  56.8 ]
 ...
 [72.17 65.1 ]
 [46.84 41.6 ]
 [47.43 48.  ]]


### Testing accuracy

In [274]:

mape = mean_absolute_percentage_error(y_test, y_svr)

print(f"Mean Absolute Percentage Error (MAPE): {mape:.2f}%")

from sklearn.metrics import r2_score

# Assuming y_test contains the actual target values and y_pred contains the predicted values
# Calculate R squared
r2_svr = r2_score(y_test, y_svr)

# Calculate adjusted R squared
n = len(y_test)  # Number of samples
p = X.shape[1]   # Number of predictors (features) in your model
adj_r2 = 1 - (1 - r2_svr) * (n - 1) / (n - p - 1)

# Print R-squared and adjusted R-squared
print(f"R squared (r2): {r2_svr:.4f}")
print(f"Adjusted R squared (adj_r2): {adj_r2:.4f}")

Mean Absolute Percentage Error (MAPE): 19.77%
R squared (r2): 0.5740
Adjusted R squared (adj_r2): 0.5629


## Decision Tree

### Training the Decision Tree model on the Training set

In [275]:
from sklearn.tree import DecisionTreeRegressor
tree_regressor = DecisionTreeRegressor(random_state=42)
tree_regressor.fit(X_train, y_train)

###Predicting

In [276]:
y_tree = tree_regressor.predict(X_test)
np.set_printoptions(precision=2)
print(np.concatenate((y_tree.reshape(len(y_tree),1), y_test.reshape(len(y_test),1)),1))

[[54.2  56.7 ]
 [61.05 68.4 ]
 [55.53 56.8 ]
 ...
 [81.99 65.1 ]
 [44.8  41.6 ]
 [47.77 48.  ]]


### Testing accuracy

In [277]:
mape = mean_absolute_percentage_error(y_test, y_tree)

print(f"Mean Absolute Percentage Error (MAPE): {mape:.2f}%")
#print(tree_regressor.predict([[1, 0, 0, 1 ,0 ,0 , 0,0 ,1 , 300]]))

from sklearn.metrics import r2_score

# Assuming y_test contains the actual target values and y_pred contains the predicted values
# Calculate R squared
r2 = r2_score(y_test, y_tree)
# Calculate adjusted R squared
n = len(y_test)  # Number of samples
p = X.shape[1]   # Number of predictors (features) in your model
adj_r2 = 1 - (1 - r2) * (n - 1) / (n - p - 1)

# Print R-squared and adjusted R-squared
print(f"R squared (r2): {r2:.4f}")
print(f"Adjusted R squared (adj_r2): {adj_r2:.4f}")

Mean Absolute Percentage Error (MAPE): 7.17%
R squared (r2): 0.6792
Adjusted R squared (adj_r2): 0.6709


## Random Forest

### Training the Random Forest model on the Training set

In [278]:
from sklearn.ensemble import RandomForestRegressor
forest_regressor = RandomForestRegressor(n_estimators = 100)
forest_regressor.fit(X_train, y_train)

### Predicting

In [279]:
y_forest = forest_regressor.predict(X_test)
np.set_printoptions(precision=2)
print(np.concatenate((y_forest.reshape(len(y_forest),1), y_test.reshape(len(y_test),1)),1))

[[62.24 56.7 ]
 [61.   68.4 ]
 [55.53 56.8 ]
 ...
 [82.18 65.1 ]
 [52.33 41.6 ]
 [47.88 48.  ]]


### Testing accuracy

In [280]:
mape = mean_absolute_percentage_error(y_test, y_forest)

print(f"Mean Absolute Percentage Error (MAPE): {mape:.2f}%")
#print(tree_regressor.predict([[1, 0, 0, 1 ,0 ,0 , 0,0 ,1 , 300]]))

from sklearn.metrics import r2_score

# Assuming y_test contains the actual target values and y_pred contains the predicted values
# Calculate R squared
r2 = r2_score(y_test, y_forest)
# Calculate adjusted R squared
n = len(y_test)  # Number of samples
p = X.shape[1]   # Number of predictors (features) in your model
adj_r2 = 1 - (1 - r2) * (n - 1) / (n - p - 1)

# Print R-squared and adjusted R-squared
print(f"R squared (r2): {r2:.4f}")
print(f"Adjusted R squared (adj_r2): {adj_r2:.4f}")

Mean Absolute Percentage Error (MAPE): 6.98%
R squared (r2): 0.7044
Adjusted R squared (adj_r2): 0.6967
