In [1]:
# import all modules

In [2]:
import glob
import pandas as pd
import logging

In [3]:
# Define all .json files in root directory

In [4]:
jsons = glob.glob('*.json')
jsons

['bank_market_cap_1.json', 'bank_market_cap_2.json']

In [5]:
# Extract function for .json files

In [6]:
def extract(json):
    dataframe = pd.DataFrame(columns=['Name','Market Cap (US$ Billion)'])
    dataframe = pd.read_json(json)
    return dataframe

In [7]:
#  Extract all .json files

In [8]:
def extract_all(jsons):
    dataframes = []
    for json in jsons:
        dataframes.append(extract(json))
    dataframes = pd.concat(dataframes).reset_index(drop=True)
    return dataframes

In [9]:
# Extract GBP rate from exchange_rates.csv file

In [10]:
def rate(csv, currency):
    df = pd.read_csv(csv, index_col=0)
    rate = df.loc[currency, 'Rates']
    return rate

In [11]:
# Transform function with multiple tasks

In [12]:
def transform(df, rate, round_to, new_column_name):
    # Changes the Market Cap (US$ Billion) column from USD to GBP
    df['Market Cap (US$ Billion)'] = df['Market Cap (US$ Billion)'] * rate

    # Rounds the Market Cap (US$ Billion) column to 2 decimal places
    df['Market Cap (US$ Billion)'] = df['Market Cap (US$ Billion)'].round(round_to)

    #  Rename Market Cap (US$ Billion) to Market Cap (GBP$ Billion)
    df.rename(columns={'Market Cap (US$ Billion)': new_column_name}, inplace=True)
    return df

In [13]:
# Load function that loads the transformed data to a new .json file

In [14]:
def load(df, filename):
    df.to_json(filename)

In [15]:
# Logging function

In [16]:
# Set logging level to Info

In [17]:
logging.basicConfig(filename='etl.log', level=logging.INFO)

In [18]:
# Log function

In [19]:
def log(message):
    logging.info(message)

In [20]:
# ETL PIPELINE

In [21]:
log('Etl pipeline process started')

In [22]:
log('Extract process started')

In [23]:
extracted_data = extract_all(jsons)

In [24]:
log('Extract process ended')

In [25]:
log('Transform process started')

In [26]:
rate = rate('exchange_rates.csv', 'GBP')
transformed_data = transform(extracted_data, rate, 3, 'Market Cap (GBP$ Billion)')

In [27]:
log('Transform process ended')

In [28]:
log('Load process started')

In [29]:
load(transformed_data, 'bank_market_cap_gbp.json')

In [30]:
log('Load process ended')

In [31]:
log('Etl pipeline process ended')