In [1]:
import numpy as np     #imported all library
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from math import sqrt

In [2]:
df = pd.read_csv("https://raw.githubusercontent.com/vis2208/Air-Pollution-/main/Processed_data.csv")   #read data
df.head(5)

Unnamed: 0,Date,PM2.5,PM10,NO2,SO2,CO,RH,WS,WD,SR,AT
0,03/01/2018,287.71,488.92,86.46,20.49,1.82,67.28,0.32,248.7,167.41,12.41
1,15/01/2018,240.96,515.83,101.49,20.95,1.84,50.45,0.71,264.26,169.86,15.98
2,17/01/2018,294.38,402.21,72.23,17.61,1.2,64.78,1.19,88.52,166.5,14.02
3,23/01/2018,196.4,322.72,86.8,24.86,2.09,61.87,1.82,114.02,149.21,14.05
4,24/01/2018,162.97,239.81,72.06,25.29,2.08,66.09,0.71,240.56,198.31,13.42


In [3]:
# Let's say we want to split the data in 80:10:10 for train:valid:test dataset
train_size = 0.8

X = df.drop(columns = ['PM2.5','Date']).copy()
y = df['PM2.5']

# In the first step we will split the data in training and validation dataset
X_train, X_valid, y_train, y_valid = train_test_split(X,y, train_size=0.8, random_state = 42)

print(X_train.shape)
print(y_train.shape)
print(X_valid.shape)
print(y_valid.shape)

(2581, 9)
(2581,)
(646, 9)
(646,)


In [4]:
reg = MLPRegressor(hidden_layer_sizes=(64,64,64),activation="relu" ,random_state=1, max_iter=2000)
reg.fit(X_train, y_train)




In [5]:
y_pred = reg.predict(X_valid)   
y_pred                # predicted

array([110.32760411, 219.0220868 ,  72.17190539,  38.41906779,
       167.73180197, 109.86106253,  84.27064316, 161.50745042,
        68.03916594,  31.00073206, 111.89600406,  89.07703587,
       134.24366445, 163.4302444 , 113.07121068, 127.99749084,
        76.43409028, 113.41737123,  46.77521118, 117.95677627,
       126.78789175, 135.20451441, 136.46654246, 136.05370922,
        71.79507248, 102.46592398, 103.64365385,  82.51380334,
       248.09939166, 187.08476154, 129.23560149,  40.15517547,
       127.86821526,  89.40818983,  33.20782033, 106.2063825 ,
        96.22755737, 237.79016307, 193.43203814, 178.20225153,
        39.93248765, 106.01356553,  48.96821186,  57.29320808,
        44.12872788,  49.68221927,  34.9444732 ,  31.36067882,
        14.54526943,  74.71365325,  16.51991617, 273.78519838,
        71.41833803,  59.68841225, 105.27552654, 172.84618635,
       132.7589672 , 114.18623628, 130.26135029,  48.13125007,
        58.15593542,  69.90882983, 123.59259649, 177.13

In [6]:
print("Printing training stats")
y_pred_train = reg.predict(X_train) 
print("Mean absolute error: %.2f" % mean_absolute_error(y_train, y_pred_train)) # mean absolute error 
print("Mean squared error: %.2f" % mean_squared_error(y_train, y_pred_train)) # The mean squared error
print("Root mean square error: %.2f" % sqrt(mean_squared_error(y_train, y_pred_train))) # RMSE
print("Coefficient of determination: %.2f" % r2_score(y_train, y_pred_train)) # The coefficient of determination: 1 is perfect prediction

Printing training stats
Mean absolute error: 17.47
Mean squared error: 612.32
Root mean square error: 24.75
Coefficient of determination: 0.85


In [7]:
print("Printing testing stats")
print("Mean absolute error: %.2f" % mean_absolute_error(y_valid, y_pred)) # mean absolute error 
print("Mean squared error: %.2f" % mean_squared_error(y_valid, y_pred)) # The mean squared error
print("Root mean square error: %.2f" % sqrt(mean_squared_error(y_valid, y_pred))) # RMSE
print("Coefficient of determination: %.2f" % r2_score(y_valid, y_pred)) # The coefficient of determination: 1 is perfect prediction

Printing testing stats
Mean absolute error: 19.11
Mean squared error: 800.10
Root mean square error: 28.29
Coefficient of determination: 0.79


In [8]:
for Y, y in zip(y_valid, y_pred):
  print(round(Y), round(y))

56 110
178 219
61 72
95 38
81 168
117 110
86 84
183 162
69 68
39 31
135 112
94 89
179 134
99 163
104 113
153 128
58 76
143 113
39 47
102 118
86 127
176 135
117 136
250 136
105 72
106 102
132 104
72 83
190 248
191 187
105 129
44 40
149 128
103 89
78 33
112 106
70 96
272 238
193 193
144 178
32 40
136 106
54 49
37 57
92 44
79 50
25 35
27 31
9 15
52 75
40 17
63 274
81 71
60 60
87 105
242 173
178 133
75 114
109 130
53 48
38 58
66 70
150 124
211 177
107 61
155 116
148 112
40 36
26 42
20 17
72 75
39 49
69 69
84 49
38 45
77 81
110 114
54 52
159 157
52 53
70 67
85 149
58 62
61 48
111 99
80 76
145 108
44 62
103 98
82 75
129 106
159 176
109 127
221 237
78 99
48 59
166 139
49 72
36 60
61 51
77 110
69 44
141 110
28 56
259 184
102 110
115 57
126 104
138 113
42 37
33 51
40 33
267 230
178 63
77 85
52 97
132 73
121 92
40 67
106 128
192 178
121 83
58 39
18 33
32 23
201 170
104 128
31 39
155 176
15 12
74 65
100 73
57 43
51 57
51 44
68 114
70 100
16 20
64 50
206 163
91 99
89 56
40 45
98 90
141 107
149 191