In [26]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression

from sklearn.metrics import r2_score

Adding Dataset

In [27]:
df=pd.read_csv('mumbai-monthly-rains.csv')
df=df.round(decimals = 2)
print("Data heads:")
print(df.head())

Data heads:
   Year    Jan   Feb    Mar  April     May    June    July     Aug    Sept  \
0  1901  13.12  0.00   0.00   3.95   17.14  640.71  888.37  545.05   64.27   
1  1902   0.00  0.00   0.00   0.00    0.36  248.00  408.43  566.60  688.91   
2  1903   0.00  0.00   0.84   0.00  220.57  370.85  902.45  602.42  264.59   
3  1904   0.00  0.00  11.38   0.00    0.00  723.08  390.89  191.58   85.70   
4  1905   0.66  1.71   0.00   0.00    0.00  123.87  581.83  167.38  172.30   

      Oct    Nov    Dec    Total  
0    9.87   0.00   0.00  2182.48  
1   28.65   0.49  19.53  1960.97  
2  157.89   0.00   0.00  2519.61  
3   38.68   0.00   0.00  1441.32  
4    7.37  24.90   0.00  1080.02  


Modelling

In [28]:
df_M=pd.melt(df,id_vars=['Year'],var_name='Month',value_name='RainFall(mm)')
df_M.head()

Unnamed: 0,Year,Month,RainFall(mm)
0,1901,Jan,13.12
1,1902,Jan,0.0
2,1903,Jan,0.0
3,1904,Jan,0.0
4,1905,Jan,0.66


Month column Encoding

In [29]:
Month_map={'Jan':1,'Feb':2,'Mar' :3,'April':4,'May':5,'June':6,'July':7,'Aug':8,'Sept':9,
   'Oct':10,'Nov':11,'Dec':12}
df_M['Month']=df_M['Month'].map(Month_map)

In [30]:
X=np.asanyarray(df_M[['Year','Month']]).astype('int')
y=np.asanyarray(df_M['RainFall(mm)']).astype('int')
print(y.shape)
print(X.shape)

(1573,)
(1573, 2)


  X=np.asanyarray(df_M[['Year','Month']]).astype('int')


Splitting data set

In [31]:

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=10)

Polynomial Regression

In [32]:
poly_reg = PolynomialFeatures(degree = 2)
X_poly = poly_reg.fit_transform(X_train)
p_regressor=LinearRegression()
p_regressor.fit(X_poly, y_train)

Model Evaluation using R2

In [33]:
p_y_pred_test=p_regressor.predict(poly_reg.transform(X_test))
np.set_printoptions(precision=2)
print(np.concatenate((p_y_pred_test.reshape(len(p_y_pred_test),1),y_test.reshape(len(y_test),1)),1))

[[1.64e+02 0.00e+00]
 [1.90e+02 3.43e+02]
 [2.22e+03 2.44e+03]
 [1.90e+02 0.00e+00]
 [1.95e+02 0.00e+00]
 [1.60e+02 6.00e+00]
 [1.68e+02 1.15e+03]
 [1.86e+02 0.00e+00]
 [1.59e+02 1.00e+00]
 [1.63e+02 0.00e+00]
 [1.84e+02 2.64e+02]
 [2.10e+03 2.43e+03]
 [1.87e+02 0.00e+00]
 [1.63e+02 5.40e+02]
 [1.64e+02 7.42e+02]
 [1.82e+02 8.92e+02]
 [1.90e+02 0.00e+00]
 [1.61e+02 5.00e+00]
 [1.87e+02 0.00e+00]
 [2.17e+03 1.84e+03]
 [1.69e+02 0.00e+00]
 [2.13e+03 2.31e+03]
 [1.77e+02 2.85e+02]
 [1.79e+02 0.00e+00]
 [1.86e+02 2.30e+01]
 [1.93e+02 0.00e+00]
 [1.82e+02 0.00e+00]
 [1.59e+02 0.00e+00]
 [1.70e+02 1.28e+02]
 [1.90e+02 2.10e+01]
 [1.73e+02 5.00e+01]
 [1.74e+02 8.73e+02]
 [1.86e+02 4.28e+02]
 [1.88e+02 0.00e+00]
 [1.67e+02 0.00e+00]
 [1.69e+02 0.00e+00]
 [1.95e+02 4.93e+02]
 [1.77e+02 0.00e+00]
 [2.24e+03 2.47e+03]
 [1.92e+02 0.00e+00]
 [1.91e+02 0.00e+00]
 [1.98e+02 0.00e+00]
 [1.78e+02 7.30e+01]
 [1.94e+02 0.00e+00]
 [1.81e+02 0.00e+00]
 [1.88e+02 0.00e+00]
 [1.97e+02 7.90e+01]
 [1.72e+02 6.

In [34]:
r2_score(y_test,p_y_pred_test)

0.7841691091479243

In [36]:
# Evaluate 
from sklearn.metrics import mean_squared_error
print("Mean Squared Error: %.4f"
      % mean_squared_error(y_test,p_y_pred_test, squared=False))

print('Variance Score: %.4f' % p_regressor.score(poly_reg.fit_transform(X_test), y_test))

Mean Squared Error: 318.4774
Variance Score: 0.7842


