In [76]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objects as px
import plotly.express as px 

import os 
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error 
from sklearn.metrics import mean_squared_error 
from sklearn.metrics import median_absolute_error

## Loading dataset

In [77]:
df = pd.read_csv('Inflation Variaton Rate.csv', encoding='ISO-8859-1')

In [78]:
df

Unnamed: 0,country,Consumer Price Index,currency,from,End Date:,1 year variation in %,1 month variation in %
0,Argentina,Argentina CPI (Indec),ARS-Argentine peso,1/31/2003,3/31/2023,1.04,0.08
1,Austria,Austrian CPI (Statistik Austria (Vienna)),ATS-Austrian shilling,12/31/1958,3/31/2023,0.09,0.01
2,Australia,Australian CPI (ABS),AUD-Australian dollar,9/30/1948,3/31/2023,0.07,0.0
3,Belgium,Belgian CPI (Directorate-general Statistics an...,BEF-Belgian franc,12/31/1955,4/30/2023,0.06,-0.01
4,Brazil,Brazilian CPI (Instituto Brasileiro de Geograf...,BRL-Brazilian real,1/31/1985,3/31/2023,0.05,0.01
5,Canada,Canadian CPI (Statistics canada),CAD-Canadian dollar,1/31/1989,3/31/2023,0.04,0.01
6,Switzerland,Swiss CPI (Bundesamt f??r Statistik),CHF-Swiss franc,1/31/1983,3/31/2023,0.03,0.0
7,Chile,Chile CPI (Chile - National Statistics Institute),CLP-Chilean peso,12/31/1970,3/31/2023,0.11,0.01
8,Morocco,Moroccan CPI (Haut Commissariat au Plan du Maroc),MAD-Moroccan dirham,11/30/2008,3/31/2023,0.09,0.0
9,Czech Republic,Czech CPI (Czech Republic - Czech Statistical ...,CZK-Czech koruna,12/31/1991,3/31/2023,0.15,0.0


## Observe coorlation between columns


In [79]:
df.corr()

Unnamed: 0,1 year variation in %,1 month variation in %
1 year variation in %,1.0,0.885821
1 month variation in %,0.885821,1.0


In [80]:
#plt.figure(figsize=(8,2))
#sns.heatmap(df.corr(),annot = True, linewidths=.5)  # variables are high not correlated to each other

In [81]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 49 entries, 0 to 48
Data columns (total 7 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   country                 49 non-null     object 
 1   Consumer Price Index    49 non-null     object 
 2   currency                49 non-null     object 
 3   from                    49 non-null     object 
 4   End Date:               49 non-null     object 
 5   1 year variation in %   49 non-null     float64
 6   1 month variation in %  49 non-null     float64
dtypes: float64(2), object(5)
memory usage: 2.8+ KB


In [82]:
# Convert 'from' and 'End Date:' columns to datetime format
df['from'] = pd.to_datetime(df['from'])
df['End Date:'] = pd.to_datetime(df['End Date:'])

In [83]:
df.head()

Unnamed: 0,country,Consumer Price Index,currency,from,End Date:,1 year variation in %,1 month variation in %
0,Argentina,Argentina CPI (Indec),ARS-Argentine peso,2003-01-31,2023-03-31,1.04,0.08
1,Austria,Austrian CPI (Statistik Austria (Vienna)),ATS-Austrian shilling,1958-12-31,2023-03-31,0.09,0.01
2,Australia,Australian CPI (ABS),AUD-Australian dollar,1948-09-30,2023-03-31,0.07,0.0
3,Belgium,Belgian CPI (Directorate-general Statistics an...,BEF-Belgian franc,1955-12-31,2023-04-30,0.06,-0.01
4,Brazil,Brazilian CPI (Instituto Brasileiro de Geograf...,BRL-Brazilian real,1985-01-31,2023-03-31,0.05,0.01


In [84]:
df.describe()

Unnamed: 0,1 year variation in %,1 month variation in %
count,49.0,49.0
mean,0.110816,0.007347
std,0.159136,0.012711
min,0.01,-0.01
25%,0.05,0.0
50%,0.07,0.01
75%,0.1,0.01
max,1.04,0.08


In [85]:
#df.columns

In [86]:
#df.boxplot()

In [87]:
df.loc[:, ['1 year variation in %']].mean()

1 year variation in %    0.110816
dtype: float64

In [88]:
df.corr()

Unnamed: 0,1 year variation in %,1 month variation in %
1 year variation in %,1.0,0.885821
1 month variation in %,0.885821,1.0


In [89]:
#Boxplot for Month by Weekly Sales: (Outliers obviously shown at October,November and December)
#fig=px.box(df,x=df['1 year variation in %'],y=df['Consumer Price Index'])
#fig.show()

## Removing column due to high correlation


In [90]:
df.drop(['1 month variation in %'],axis=1,inplace=True)

In [91]:
#df

## Encoding Data

In [92]:
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import LabelEncoder, StandardScaler

In [93]:
# Convert categorical variables into numerical format using label encoding
from sklearn.preprocessing import OrdinalEncoder
oe = OrdinalEncoder()
cols_to_encode = ['country', 'Consumer Price Index', 'currency', 'from', 'End Date:', '1 year variation in %']
df[cols_to_encode] = oe.fit_transform(df[cols_to_encode])

In [94]:
#df

In [95]:
df.columns

Index(['country', 'Consumer Price Index', 'currency', 'from', 'End Date:',
       '1 year variation in %'],
      dtype='object')

## Scaling

In [96]:
from sklearn.preprocessing import MinMaxScaler
Scale = MinMaxScaler()
df[['country','currency','from','End Date:','Consumer Price Index']]=Scale.fit_transform(df[['country','currency','from','End Date:','Consumer Price Index']])

In [97]:
#df

## specify x and y

In [98]:
y = df.iloc[::,-1]
#y

In [99]:
#Drop the target y
df.drop(['1 year variation in %'],axis=1,inplace=True)  
#df

In [100]:
df.columns

Index(['country', 'Consumer Price Index', 'currency', 'from', 'End Date:'], dtype='object')

In [101]:
X = df.iloc[:,:]
X.shape

(49, 5)

In [102]:
df.corr()

Unnamed: 0,country,Consumer Price Index,currency,from,End Date:
country,1.0,0.78551,0.857041,-0.036837,0.122893
Consumer Price Index,0.78551,1.0,0.665918,-0.060082,0.081929
currency,0.857041,0.665918,1.0,-0.025147,-0.017556
from,-0.036837,-0.060082,-0.025147,1.0,-0.162914
End Date:,0.122893,0.081929,-0.017556,-0.162914,1.0


# Build Model


In [103]:
# Split data into train and test sets
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=2, random_state=48)

## fitting model to trainig

In [104]:
from sklearn.linear_model import LinearRegression
lin_reg = LinearRegression()
lr_model = LinearRegression()
lr_model.fit(X_train, y_train)

LinearRegression()

In [105]:
print('Linear Regression Train Score is :' ,lr_model.score(X_train,y_train))
print('Linear Regression Test Score is :' , lr_model.score(X_test,y_test))

Linear Regression Train Score is : 0.09880986878508546
Linear Regression Test Score is : 0.8622579638039235


## predict the test dataset


In [106]:
y_pred =lr_model.predict( X_test)

In [107]:
#coofficient 
print(lr_model.coef_)

[-8.29648511  2.17529339  5.39923277  1.0955094  -2.21997453]


In [108]:
#intercept
print(lr_model.intercept_)

6.825473932060281


## evaluating model

In [109]:
#Calculating Mean Absolute Error
MAEValue = mean_absolute_error(y_test, y_pred, multioutput='uniform_average') 
print('Mean Absolute Error Value is : ', MAEValue)

Mean Absolute Error Value is :  0.5564517457417568


In [110]:
#Calculating Mean Squared Error
MSEValue = mean_squared_error(y_test, y_pred, multioutput='uniform_average') 
print('Mean Squared Error Value is : ', MSEValue)

Mean Squared Error Value is :  0.30991958144117204


In [111]:
#Calculating Median Absolute Error
MdSEValue = median_absolute_error(y_test, y_pred)
print('Median Absolute Error Value is : ', MdSEValue )

Median Absolute Error Value is :  0.5564517457417568


In [112]:
from sklearn.metrics import r2_score
r2_score(y_test,y_pred)

0.8622579638039235

In [113]:
R2_accurecy = r2_score(y_test,y_pred)

In [123]:
print(np.round (R2_accurecy*100,2))

86.23


In [None]:
#Hazem Adel Elbatawy

In [None]:
"""It's difficult to say whether these scores are good or bad for the model without knowing more about the specific 
problem and the scale of the target variable. However, in general:

- A lower Mean Absolute Error (MAE) value indicates better predictive performance of the model. 
The MAE value you obtained is 0.556, which means that on average, the model's predictions are off
by 0.556 units from the actual values.

- A lower Mean Squared Error (MSE) value also indicates better predictive performance of the model.
 The MSE value you obtained is 0.3099, which means that the model's predictions have less variance from the actual values.

- A lower Median Absolute Error (MdAE) value indicates better predictive performance of the model. 
  The MdAE value you obtained is 0.556, which means that the median prediction error of the model is 0.556 units.

- An R-squared (R2) score of 0.8622 indicates that the model explains 86.22% of the variability in the target variable 
 around its mean. This is a good score, but it's important to note that R2 score alone should not be used to evaluate the
 model's performance.

Overall, these scores suggest that the model has some predictive power, but it's important to compare these scores 
with the performance of other models or with a baseline performance to get a sense of how well your model is performing."""
""