## Importing required modules for the assignment


In [6]:
import os
import zipfile
import datetime
import pandas as pd
from pymongo import MongoClient
from sqlalchemy import create_engine
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker
from sqlalchemy import create_engine, Column, Float, String, Integer, DateTime, inspect
import numpy as np


## Data gathering
Gather the zip files, and extract them in a single directory so all time series csvs are in a single place for easier processing

In [2]:
import os
import zipfile

def extract_all_csvs(zip_file_path, output_folder):
    # Ensure the output directory exists
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
        # List all contained files
        inner_files = zip_ref.namelist()
        for file in inner_files:
            # If the file is a zip, extract it and then process the extracted zip
            if file.endswith('.zip'):
                # Extract this zip to a temp directory
                temp_dir = os.path.join(output_folder, 'temp')
                if not os.path.exists(temp_dir):
                    os.makedirs(temp_dir)
                zip_ref.extract(file, temp_dir)
                # Recursively extract CSVs from it
                inner_zip_path = os.path.join(temp_dir, file)
                extract_all_csvs(inner_zip_path, output_folder)
                os.remove(inner_zip_path)  # Cleanup the temporary zip file
            elif file.endswith('.csv') and 'historical_fx_rates' in file.lower():
                # Extract CSVs directly to the output folder
                zip_ref.extract(file, output_folder)

def unzip_files(directory, output_folder):
    for root, _, files in os.walk(directory):
        for file in files:
            if file.endswith('.zip'):
                file_path = os.path.join(root, file)
                extract_all_csvs(file_path, output_folder)
                print(f"Extracted: {file}")

# Define the directory path containing zip files
directory_path = '/Users/yogyasharma/Downloads/MG-GY Data Engineering/HW3/zip_files'
output_directory = '/Users/yogyasharma/Downloads/MG-GY Data Engineering/HW3/extracted_csvs'

# Run the extraction
unzip_files(directory_path, output_directory)


Extracted: drive-download-20240411T192414Z-001.zip
Extracted: drive-download-20240411T192414Z-003.zip
Extracted: drive-download-20240411T192414Z-002.zip
Extracted: drive-download-20240411T192414Z-004.zip


In [4]:
from datetime import datetime

timestamp_ms = 1262538060000 / 1000  # Converting milliseconds to seconds
date_time = datetime.fromtimestamp(timestamp_ms).strftime('%d-%m-%Y %H:%M:%S')

print(date_time)


03-01-2010 12:01:00


## Obtain common time interval among all fx pairs
To get a common time interval among all time series data, we obtain latest start date and earliest end date for all fx pairs


In [7]:
def get_start_end_time(directory):
    print('starting')
    first_datetime = None
    latest_datetime = None
    earliest_datetime = None
    for root, _, files in os.walk(directory):
        print('starting...')
        for file in files:
            print('in for loop')
            if file.endswith('.csv'):
                file_path = os.path.join(root, file)

                #Get the total number of rows in the CSV file
                total_rows = sum(1 for line in open(file_path))

                # Read the last row of the CSV file into a DataFrame
                df_end = pd.read_csv(file_path, skiprows=range(1, total_rows-1))

                if 'datetime' in df_end.columns:
                    last_row_index = len(df_end) - 1
                    last_datetime = df_end["datetime"].iloc[last_row_index]

                # Update earliest_datetime if it's None or last_datetime is earlier
                if earliest_datetime is None or last_datetime < earliest_datetime:
                    earliest_datetime = last_datetime
                print('last date for file:', file, ':', last_datetime)
                # Read only the second row of the CSV file into a DataFrame
                df_beg = pd.read_csv(file_path, nrows=2)
                # Check if the DataFrame has at least 2 rows and 'datetime' column exists
                if len(df_beg) >= 2 and 'datetime' in df_beg.columns:
                    first_datetime = df_beg["datetime"].iloc[1]

                if latest_datetime is None or first_datetime > latest_datetime:
                        latest_datetime = first_datetime
                print('first date for file: ', file,':', first_datetime)

    return latest_datetime, earliest_datetime



start_time, end_time = get_start_end_time(output_directory)
print('latest start date for all files: ',start_time)
print('earliest end date for all files:', end_time)

starting
starting...
starting...
starting...
in for loop
last date for file: SGDJPY.csv : 2023-01-25 06:10:00
first date for file:  SGDJPY.csv : 2021-01-08 00:09:00
in for loop
last date for file: AUDCAD.csv : 2023-01-24 23:58:00
first date for file:  AUDCAD.csv : 2010-01-03 17:43:00
in for loop
last date for file: HKDJPY.csv : 2023-01-25 05:55:00
first date for file:  HKDJPY.csv : 2010-01-03 17:20:00
in for loop
last date for file: GBPCAD.csv : 2023-01-25 05:41:00
first date for file:  GBPCAD.csv : 2010-01-03 17:18:00
in for loop
last date for file: USDNOK.csv : 2023-01-25 06:30:00
first date for file:  USDNOK.csv : 2010-01-03 17:02:00
in for loop
last date for file: EURSEK.csv : 2023-01-24 23:59:00
first date for file:  EURSEK.csv : 2010-01-03 17:33:00
in for loop
last date for file: GBPAUD.csv : 2023-01-25 05:39:00
first date for file:  GBPAUD.csv : 2010-01-03 17:02:00
in for loop
last date for file: EURUSD.csv : 2023-01-25 05:34:00
first date for file:  EURUSD.csv : 2010-01-03 17:1

In [13]:
directory_path = '/Users/yogyasharma/Downloads/MG-GY Data Engineering/HW3/extracted_csvs/historical_fx_rates'

Fix the time values so that they start at 12 am and end at 11 pm

In [10]:
from datetime import datetime

start_time = pd.Timestamp(datetime.strptime(start_time, '%Y-%m-%d %H:%M:%S'))
end_time = pd.Timestamp(datetime.strptime(end_time, '%Y-%m-%d %H:%M:%S'))
if start_time.hour != 0:
    start_time = pd.Timestamp(start_time.date() + datetime.timedelta(days=1))
if end_time.hour != 23:
    end_time = pd.Timestamp(datetime.combine(end_time.date() - datetime.timedelta(days=1), datetime.time(23)))
print('start_time = ', start_time)
print("end_time = ", end_time)

start_time =  2021-01-08 00:09:00
end_time =  2023-01-24 23:54:00


## Initilaise MongoDB and sql DB for data storage

In [11]:
# Set up the MongoDB client and database
client = MongoClient('mongodb://localhost:27017/')
db = client["MMGY_DE_HW3"]

# Set up the MySQL engine and database
Base = declarative_base()
engine = create_engine(f'sqlite:///HW3_predictability_fx')
Session = sessionmaker(bind=engine)

class FXData(Base):
    __tablename__ = 'fx_data_hourly'
    id = Column(Integer, primary_key=True)
    vw = Column(Float)  # Change the data type to match your DataFrame
    n = Column(Integer)  # Change the data type to match your DataFrame
    timestamp = Column(String)  # Adjust the data type according to your needs


  Base = declarative_base()


## Get data points between start and end times and convert it to have 1 data point for every hour


After converting to hourly format, thus there 24 data points for every day.After this data has been converted, store this info in mongo db and sql db for further processing.

While converting and storing data, the NULL fields are updated by the mean of the previous and the next values, in case there is no next/previous value, it is updated by the nearest value


In [14]:

# Get a list of all csv files in the input folder
csv_files = [csv_file for csv_file in os.listdir(directory_path) if csv_file.endswith('.csv')]

for csv_file in csv_files:
    # Read the csv file and only extract three required columns
    df = pd.read_csv(os.path.join(directory_path, csv_file), usecols=["vw", "n", "datetime"])
    # Convert the 'datetime' column to a pandas datetime object
    df['datetime'] = pd.to_datetime(df['datetime'])
    df_time_interval = df[(df["datetime"] >= start_time) & (df["datetime"] <= end_time)]
    # Resample the data to hourly intervals, taking the mean of price column and sum of num of transaction column
    df_every_hour = df_time_interval.resample('H', on='datetime').agg({"vw": 'mean', "n": 'sum'})
    # Fill missing values with the average of previous and next values
    df_final_hourly = df_every_hour.interpolate(method='linear')
    # For edge cases where the first or last values are missing, you can use 'nearest' method
    df_final_hourly = df_final_hourly.fillna(method='ffill')
    # Re-adding the timestamp column to the dataframe
    df_final_hourly["timestamp"] = df_final_hourly.index

    collection_name = csv_file.split('.')[0] # get the filename without the extension
    collection = db[collection_name]
    collection.insert_many(df_final_hourly.to_dict("records"))

    # Convert DataFrame to SQL and insert into the database
    with Session() as session:
        try:
            df_final_hourly.to_sql('fx_data', con=engine, if_exists='replace', index=False)
            session.commit()
            print("Data inserted successfully into SQL database.")
        except Exception as e:
            session.rollback()
            print("Error:", e)


  df_final_hourly = df_final_hourly.fillna(method='ffill')


Data inserted successfully into SQL database.


  df_final_hourly = df_final_hourly.fillna(method='ffill')


Data inserted successfully into SQL database.


  df_final_hourly = df_final_hourly.fillna(method='ffill')


Data inserted successfully into SQL database.


  df_final_hourly = df_final_hourly.fillna(method='ffill')


Data inserted successfully into SQL database.


  df_final_hourly = df_final_hourly.fillna(method='ffill')


Data inserted successfully into SQL database.


  df_final_hourly = df_final_hourly.fillna(method='ffill')


Data inserted successfully into SQL database.


  df_final_hourly = df_final_hourly.fillna(method='ffill')


Data inserted successfully into SQL database.


  df_final_hourly = df_final_hourly.fillna(method='ffill')


Data inserted successfully into SQL database.


  df_final_hourly = df_final_hourly.fillna(method='ffill')


Data inserted successfully into SQL database.


  df_final_hourly = df_final_hourly.fillna(method='ffill')


Data inserted successfully into SQL database.


  df_final_hourly = df_final_hourly.fillna(method='ffill')


Data inserted successfully into SQL database.


  df_final_hourly = df_final_hourly.fillna(method='ffill')


Data inserted successfully into SQL database.


  df_final_hourly = df_final_hourly.fillna(method='ffill')


Data inserted successfully into SQL database.


  df_final_hourly = df_final_hourly.fillna(method='ffill')


Data inserted successfully into SQL database.


  df_final_hourly = df_final_hourly.fillna(method='ffill')


Data inserted successfully into SQL database.


  df_final_hourly = df_final_hourly.fillna(method='ffill')


Data inserted successfully into SQL database.


  df_final_hourly = df_final_hourly.fillna(method='ffill')


Data inserted successfully into SQL database.


  df_final_hourly = df_final_hourly.fillna(method='ffill')


Data inserted successfully into SQL database.


  df_final_hourly = df_final_hourly.fillna(method='ffill')


Data inserted successfully into SQL database.


  df_final_hourly = df_final_hourly.fillna(method='ffill')


Data inserted successfully into SQL database.


  df_final_hourly = df_final_hourly.fillna(method='ffill')


Data inserted successfully into SQL database.


  df_final_hourly = df_final_hourly.fillna(method='ffill')


Data inserted successfully into SQL database.


  df_final_hourly = df_final_hourly.fillna(method='ffill')


Data inserted successfully into SQL database.


  df_final_hourly = df_final_hourly.fillna(method='ffill')


Data inserted successfully into SQL database.


  df_final_hourly = df_final_hourly.fillna(method='ffill')


Data inserted successfully into SQL database.


  df_final_hourly = df_final_hourly.fillna(method='ffill')


Data inserted successfully into SQL database.


  df_final_hourly = df_final_hourly.fillna(method='ffill')


Data inserted successfully into SQL database.


  df_final_hourly = df_final_hourly.fillna(method='ffill')


Data inserted successfully into SQL database.


  df_final_hourly = df_final_hourly.fillna(method='ffill')


Data inserted successfully into SQL database.


  df_final_hourly = df_final_hourly.fillna(method='ffill')


Data inserted successfully into SQL database.


  df_final_hourly = df_final_hourly.fillna(method='ffill')


Data inserted successfully into SQL database.


  df_final_hourly = df_final_hourly.fillna(method='ffill')


Data inserted successfully into SQL database.


  df_final_hourly = df_final_hourly.fillna(method='ffill')


Data inserted successfully into SQL database.


  df_final_hourly = df_final_hourly.fillna(method='ffill')


Data inserted successfully into SQL database.


  df_final_hourly = df_final_hourly.fillna(method='ffill')


Data inserted successfully into SQL database.


  df_final_hourly = df_final_hourly.fillna(method='ffill')


Data inserted successfully into SQL database.


  df_final_hourly = df_final_hourly.fillna(method='ffill')


Data inserted successfully into SQL database.


  df_final_hourly = df_final_hourly.fillna(method='ffill')


Data inserted successfully into SQL database.


  df_final_hourly = df_final_hourly.fillna(method='ffill')


Data inserted successfully into SQL database.


  df_final_hourly = df_final_hourly.fillna(method='ffill')


Data inserted successfully into SQL database.


  df_final_hourly = df_final_hourly.fillna(method='ffill')


Data inserted successfully into SQL database.


  df_final_hourly = df_final_hourly.fillna(method='ffill')


Data inserted successfully into SQL database.


  df_final_hourly = df_final_hourly.fillna(method='ffill')


Data inserted successfully into SQL database.


  df_final_hourly = df_final_hourly.fillna(method='ffill')


Data inserted successfully into SQL database.


  df_final_hourly = df_final_hourly.fillna(method='ffill')


Data inserted successfully into SQL database.


  df_final_hourly = df_final_hourly.fillna(method='ffill')


Data inserted successfully into SQL database.


  df_final_hourly = df_final_hourly.fillna(method='ffill')


Data inserted successfully into SQL database.


  df_final_hourly = df_final_hourly.fillna(method='ffill')


Data inserted successfully into SQL database.


  df_final_hourly = df_final_hourly.fillna(method='ffill')


Data inserted successfully into SQL database.


  df_final_hourly = df_final_hourly.fillna(method='ffill')


Data inserted successfully into SQL database.


  df_final_hourly = df_final_hourly.fillna(method='ffill')


Data inserted successfully into SQL database.


  df_final_hourly = df_final_hourly.fillna(method='ffill')


Data inserted successfully into SQL database.


  df_final_hourly = df_final_hourly.fillna(method='ffill')


Data inserted successfully into SQL database.


  df_final_hourly = df_final_hourly.fillna(method='ffill')


Data inserted successfully into SQL database.


  df_final_hourly = df_final_hourly.fillna(method='ffill')


Data inserted successfully into SQL database.


  df_final_hourly = df_final_hourly.fillna(method='ffill')


Data inserted successfully into SQL database.


  df_final_hourly = df_final_hourly.fillna(method='ffill')


Data inserted successfully into SQL database.


  df_final_hourly = df_final_hourly.fillna(method='ffill')


Data inserted successfully into SQL database.


Now data has been stored in mongoDB and sql DB for every hour.

## Convert hourly data points to 6-hourly data points and calculate additional info like max, min, FD, volatility

For this, we read the documents stored in MongoDB and aggregate data for six hours into a single entry. Also, add additional info about the data like volatility, FD, max, min in that time window.

In [15]:
# Function to calculate Keltner Bands
def calculate_keltner_bands(mean_val, vol_val):
    keltner_upper = [mean_val + n * 0.025 * vol_val for n in range(1, 101)]
    keltner_lower = [mean_val - n * 0.025 * vol_val for n in range(1, 101)]
    return keltner_upper, keltner_lower

# Function to calculate fractal dimension
def calc_fd(prices, keltner_upper, keltner_lower):
    if not prices or not keltner_upper or not keltner_lower:
        return None

    crossings = 0

    for i in range(1, len(prices)):
        prev_price, curr_price = prices[i-1], prices[i]

        # Iterate through Keltner channels
        for j in range(len(keltner_upper)-1):
            upper_crossed_upward = prev_price <= keltner_upper[j] and curr_price > keltner_upper[j+1]
            upper_crossed_downward = prev_price > keltner_upper[j+1] and curr_price <= keltner_upper[j]
            
            lower_crossed_downward = prev_price >= keltner_lower[j] and curr_price < keltner_lower[j+1]
            lower_crossed_upward = prev_price < keltner_lower[j+1] and curr_price >= keltner_lower[j]

            # Count crossings
            crossings += upper_crossed_upward + upper_crossed_downward + lower_crossed_downward + lower_crossed_upward

    price_range = max(prices) - min(prices)
    if price_range == 0:
        return 0  

    fd = crossings / price_range
    return fd

In [17]:
fx_collections = db.list_collection_names()
# Looping through each collection in the list of collections to calculate the asked parameters for each currency pair.

for collection_name in fx_collections:
    fx_collection = db[collection_name]
    # Converting collections to dataframes
    df_1_hour = pd.DataFrame(fx_collection.find({}, {"_id": 0}))

    # Reorganise the data from hourly basis to 6hour basis
    df_6_hour = df_1_hour.resample('6H', on='timestamp').agg(
        VWAP = pd.NamedAgg(column='vw', aggfunc=pd.Series.mean),
        Liquidity = pd.NamedAgg(column='n', aggfunc=pd.Series.mean),
        Max = pd.NamedAgg(column='vw', aggfunc=pd.Series.max),
        Min = pd.NamedAgg(column='vw', aggfunc=pd.Series.min)
    )

    # Re-adding the timestamp column to the dataframe as due to resampling the index shifts.
    df_6_hour["Timestamp"] = df_6_hour.index

    # Adding a new column which calculates "Volatility"
    df_6_hour["Volatility"] = (df_6_hour["Max"] - df_6_hour["Min"]) / df_6_hour["VWAP"]

    # Calculating FD
    fd_arr = [0]

    for i, (mean_val, max_val, min_val, vol) in df_6_hour[["VWAP", "Max", "Min", "Volatility"]][:-1].iterrows():
            #Deal with values when volume is 0, we assign FD value as NaN so it can be dealt with later on
            if vol == 0:
                fd_arr.append(np.nan)
                continue
            start_time, end_time = i, i + pd.Timedelta(hours=6)
            upper_bands, lower_bands = calculate_keltner_bands(mean_val, vol)
            prices = df_1_hour[(df_1_hour["timestamp"] >= start_time) & (df_1_hour["timestamp"] < end_time)]["vw"].tolist()
            fd_arr.append(calc_fd(prices, upper_bands, lower_bands))


    #add a new column to the dataframe and assign the calculated fd values to it.
    df_6_hour["FD"] = fd_arr
    df_6_hour = df_6_hour.interpolate(method='linear')
    df_6_hour["FD"] = df_6_hour["FD"].fillna(method="ffill")


    file_path = r'/Users/yogyasharma/Downloads/MG-GY Data Engineering/HW3/{}.csv'.format(collection_name)

    # Save DataFrame to CSV at the specified path
    df_6_hour.to_csv(file_path)  
    print("CSV saved:", collection_name+".csv")


  df_6_hour["FD"] = df_6_hour["FD"].fillna(method="ffill")


CSV saved: CADCHF.csv


  df_6_hour["FD"] = df_6_hour["FD"].fillna(method="ffill")


CSV saved: USDPLN.csv


  df_6_hour["FD"] = df_6_hour["FD"].fillna(method="ffill")


CSV saved: NZDCAD.csv


  df_6_hour["FD"] = df_6_hour["FD"].fillna(method="ffill")


CSV saved: USDNOK.csv


  df_6_hour["FD"] = df_6_hour["FD"].fillna(method="ffill")


CSV saved: EURSEK.csv


  df_6_hour["FD"] = df_6_hour["FD"].fillna(method="ffill")


CSV saved: ZARJPY.csv


  df_6_hour["FD"] = df_6_hour["FD"].fillna(method="ffill")


CSV saved: AUDNZD.csv


  df_6_hour["FD"] = df_6_hour["FD"].fillna(method="ffill")


CSV saved: USDSEK.csv


  df_6_hour["FD"] = df_6_hour["FD"].fillna(method="ffill")


CSV saved: AUDHKD.csv


  df_6_hour["FD"] = df_6_hour["FD"].fillna(method="ffill")


CSV saved: GBPCAD.csv


  df_6_hour["FD"] = df_6_hour["FD"].fillna(method="ffill")


CSV saved: GBPAUD.csv


  df_6_hour["FD"] = df_6_hour["FD"].fillna(method="ffill")


CSV saved: CADHKD.csv


  df_6_hour["FD"] = df_6_hour["FD"].fillna(method="ffill")


CSV saved: CADSGD.csv


  df_6_hour["FD"] = df_6_hour["FD"].fillna(method="ffill")


CSV saved: EURCHF.csv


  df_6_hour["FD"] = df_6_hour["FD"].fillna(method="ffill")


CSV saved: EURUSD.csv


  df_6_hour["FD"] = df_6_hour["FD"].fillna(method="ffill")


CSV saved: USDTHB.csv


  df_6_hour["FD"] = df_6_hour["FD"].fillna(method="ffill")


CSV saved: USDJPY.csv


  df_6_hour["FD"] = df_6_hour["FD"].fillna(method="ffill")


CSV saved: USDBRL.csv


  df_6_hour["FD"] = df_6_hour["FD"].fillna(method="ffill")


CSV saved: AUDCAD.csv


  df_6_hour["FD"] = df_6_hour["FD"].fillna(method="ffill")


CSV saved: GBPJPY.csv


  df_6_hour["FD"] = df_6_hour["FD"].fillna(method="ffill")


CSV saved: CADJPY.csv


  df_6_hour["FD"] = df_6_hour["FD"].fillna(method="ffill")


CSV saved: AUDSGD.csv


  df_6_hour["FD"] = df_6_hour["FD"].fillna(method="ffill")


CSV saved: USDHUF.csv


  df_6_hour["FD"] = df_6_hour["FD"].fillna(method="ffill")


CSV saved: CHFHKD.csv


  df_6_hour["FD"] = df_6_hour["FD"].fillna(method="ffill")


CSV saved: GBPUSD.csv


  df_6_hour["FD"] = df_6_hour["FD"].fillna(method="ffill")


CSV saved: USDCHF.csv


  df_6_hour["FD"] = df_6_hour["FD"].fillna(method="ffill")


CSV saved: EURJPY.csv


  df_6_hour["FD"] = df_6_hour["FD"].fillna(method="ffill")


CSV saved: AUDJPY.csv


  df_6_hour["FD"] = df_6_hour["FD"].fillna(method="ffill")


CSV saved: HKDJPY.csv


  df_6_hour["FD"] = df_6_hour["FD"].fillna(method="ffill")


CSV saved: AUDCHF.csv


  df_6_hour["FD"] = df_6_hour["FD"].fillna(method="ffill")


CSV saved: EURHKD.csv


  df_6_hour["FD"] = df_6_hour["FD"].fillna(method="ffill")


CSV saved: EURCZK.csv


  df_6_hour["FD"] = df_6_hour["FD"].fillna(method="ffill")


CSV saved: SGDJPY.csv


  df_6_hour["FD"] = df_6_hour["FD"].fillna(method="ffill")


CSV saved: EURGBP.csv


  df_6_hour["FD"] = df_6_hour["FD"].fillna(method="ffill")


CSV saved: EURAUD.csv


  df_6_hour["FD"] = df_6_hour["FD"].fillna(method="ffill")


CSV saved: EURZAR.csv


  df_6_hour["FD"] = df_6_hour["FD"].fillna(method="ffill")


CSV saved: EURHUF.csv


  df_6_hour["FD"] = df_6_hour["FD"].fillna(method="ffill")


CSV saved: USDZAR.csv


  df_6_hour["FD"] = df_6_hour["FD"].fillna(method="ffill")


CSV saved: USDTRY.csv


  df_6_hour["FD"] = df_6_hour["FD"].fillna(method="ffill")


CSV saved: USDCNY.csv


  df_6_hour["FD"] = df_6_hour["FD"].fillna(method="ffill")


CSV saved: GBPPLN.csv


  df_6_hour["FD"] = df_6_hour["FD"].fillna(method="ffill")


CSV saved: GBPHKD.csv


  df_6_hour["FD"] = df_6_hour["FD"].fillna(method="ffill")


CSV saved: NZDCHF.csv


  df_6_hour["FD"] = df_6_hour["FD"].fillna(method="ffill")


CSV saved: EURCAD.csv


  df_6_hour["FD"] = df_6_hour["FD"].fillna(method="ffill")


CSV saved: USDMXN.csv


  df_6_hour["FD"] = df_6_hour["FD"].fillna(method="ffill")


CSV saved: EURTRY.csv


  df_6_hour["FD"] = df_6_hour["FD"].fillna(method="ffill")


CSV saved: NZDSGD.csv


  df_6_hour["FD"] = df_6_hour["FD"].fillna(method="ffill")


CSV saved: USDHKD.csv


  df_6_hour["FD"] = df_6_hour["FD"].fillna(method="ffill")


CSV saved: USDSGD.csv


  df_6_hour["FD"] = df_6_hour["FD"].fillna(method="ffill")


CSV saved: CHFJPY.csv


  df_6_hour["FD"] = df_6_hour["FD"].fillna(method="ffill")


CSV saved: CHFZAR.csv


  df_6_hour["FD"] = df_6_hour["FD"].fillna(method="ffill")


CSV saved: GBPNZD.csv


  df_6_hour["FD"] = df_6_hour["FD"].fillna(method="ffill")


CSV saved: EURDKK.csv


  df_6_hour["FD"] = df_6_hour["FD"].fillna(method="ffill")


CSV saved: USDDKK.csv


  df_6_hour["FD"] = df_6_hour["FD"].fillna(method="ffill")


CSV saved: NZDHKD.csv


  df_6_hour["FD"] = df_6_hour["FD"].fillna(method="ffill")


CSV saved: NZDJPY.csv


  df_6_hour["FD"] = df_6_hour["FD"].fillna(method="ffill")


CSV saved: SGDCHF.csv
CSV saved: EURNOK.csv


  df_6_hour["FD"] = df_6_hour["FD"].fillna(method="ffill")


Now all data has been converted into 6 hourly data points along with data pointers like volatility, FD, max, min, mean etc. Next, we run pycaret module to see and compare the predictions.