In [18]:
import pandas as pd
import numpy as np

In [19]:
df = pd.read_csv('exchange_rate data/exchange_rate.csv')
df

Unnamed: 0,Code,buying value,average value,selling value,date
0,RUB,16.845112,17.013546,17.181980,2022-09-22
1,SAR,273.661511,276.397850,279.134188,2022-09-22
2,SDG,1.816307,1.834468,1.852629,2022-09-22
3,SEK,92.656354,93.582824,94.509294,2022-09-22
4,SGD,724.852073,732.099862,739.347650,2022-09-22
...,...,...,...,...,...
114354,RS,11.280671,11.371644,11.462617,2012-01-02
114355,SRL,160.245385,161.537686,162.829987,2012-01-02
114356,ZAR,73.796963,74.3921,74.987237,2012-01-02
114357,USD,599.128064,603.959742,608.79142,2012-01-02


In [20]:
#Filter only supported currencies
currencies = ['GBP', 'EUR', 'USD', 'KES', 'ETB', 'TZS']
df = df[df["Code"].isin(currencies)]
df

Unnamed: 0,Code,buying value,average value,selling value,date
8,TZS,0.441721,0.446138,0.450554,2022-09-22
10,USD,1029.652330,1039.947813,1050.243296,2022-09-22
26,ETB,19.492863,19.687772,19.882681,2022-09-22
27,EUR,1011.118588,1021.228752,1031.338917,2022-09-22
28,GBP,1156.917358,1168.485363,1180.053368,2022-09-22
...,...,...,...,...,...
114330,GBP,926.3119,933.782157,941.252414,2012-01-02
114334,ETB,35.135354,35.418704,35.702054,2012-01-02
114335,KES,7.158469,7.216199,7.273929,2012-01-02
114341,TZS,0.384747,0.38785,0.390953,2012-01-02


In [21]:
#select relevant features
df = df.filter(items=['Code', 'buying value', 'selling value', 'date'])
df.head()

Unnamed: 0,Code,buying value,selling value,date
8,TZS,0.441721,0.450554,2022-09-22
10,USD,1029.65233,1050.243296,2022-09-22
26,ETB,19.492863,19.882681,2022-09-22
27,EUR,1011.118588,1031.338917,2022-09-22
28,GBP,1156.917358,1180.053368,2022-09-22


In [22]:
#checking for missing values
df.isna().any()

Code             False
buying value     False
selling value    False
date             False
dtype: bool

In [23]:
#remove spaces and commar characters from buying value and selling value
df['buying value'] = df['buying value'].str.strip().str.replace(',', '')
df['selling value'] = df['selling value'].str.strip().str.replace(',', '')

In [24]:
#convert buying value and selling value to float datatype
df = df.astype({'buying value':'float', 'selling value':'float'})
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 15985 entries, 8 to 114357
Data columns (total 4 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Code           15985 non-null  object 
 1   buying value   15985 non-null  float64
 2   selling value  15985 non-null  float64
 3   date           15985 non-null  object 
dtypes: float64(2), object(2)
memory usage: 624.4+ KB


In [25]:
#Select only where Selling value is greater than buying value
df = df[df['selling value'] > df['buying value']]
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 15967 entries, 8 to 114357
Data columns (total 4 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Code           15967 non-null  object 
 1   buying value   15967 non-null  float64
 2   selling value  15967 non-null  float64
 3   date           15967 non-null  object 
dtypes: float64(2), object(2)
memory usage: 623.7+ KB


In [28]:
#check for dupulicates
df[df.duplicated()]

Unnamed: 0,Code,buying value,selling value,date
10575,GBP,1339.220276,1366.001976,2021-11-08
10591,KES,8.902305,9.080334,2021-11-08
10636,TZS,0.432151,0.440793,2021-11-08
10638,USD,993.450002,1013.316996,2021-11-08
19054,TZS,0.416907,0.425245,2021-02-25
19058,USD,967.302327,986.646419,2021-02-25
19090,ETB,23.993451,24.473271,2021-02-25
19092,EUR,1177.884043,1201.439345,2021-02-25
19094,GBP,1369.84519,1397.239327,2021-02-25
19108,KES,8.809706,8.985882,2021-02-25


In [32]:
# remove duplicates
df.drop_duplicates(inplace=True)
df.duplicated().any()

False

In [34]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 15943 entries, 8 to 114357
Data columns (total 4 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Code           15943 non-null  object 
 1   buying value   15943 non-null  float64
 2   selling value  15943 non-null  float64
 3   date           15943 non-null  object 
dtypes: float64(2), object(2)
memory usage: 622.8+ KB
