In [3]:
from datetime import datetime
import pandas as pd
import time 
import json 
import os
from urllib.error import HTTPError

# Data Mining 

In [10]:
def generate_club_token_df(filename):
    club_token_df = pd.read_csv(filepath_or_buffer = "Raw Data/clubs_and_token_names.csv", 
                                delimiter = ";", 
                                header = 0)
    return club_token_df

def download_dataset(token_name): 
    
    def construct_download_url(token_name):
        """
        Interval Period used for data: period1 = 08.04.2018, period2 = 08.04.2023 (5 years)
        """
        url = f'https://query1.finance.yahoo.com/v7/finance/download/{token_name}?period1=1525651200&period2=1683417600&interval=1d&events=history&includeAdjustedClose=true'
        return url    
    
    def append_club(df):
        # append the primary key fields to the dataframe for identification
        df["Token_Name"] = token_name
        df["Club_Name"] = df["Token_Name"].map(club_token_mapping)
        
        df.reset_index(inplace = True)
        # arange column in following order: Club_Name, Token_Name, Date, Open, High, Low, Close, Volume    
        df = df[["Club_Name", "Token_Name", "Date", "Open", "High", "Low", "Close", "Volume"]]
        return df
    
    # retrive dataset
    try:
        query_url = construct_download_url(token_name)
        df = pd.read_csv(query_url)
        df = append_club(df)
        # save dataset as a CSV
        df.to_csv(f"Raw Data/Individual Token Historical Data/{token_name} Price Historical Data.csv")    
    except HTTPError:
        print(token_name)
        
def concat_and_save_all_dfs():
    csv_files = os.listdir(f"{os.getcwd()}\Raw Data\Individual Token Historical Data")
    df_all = pd.concat([pd.read_csv(f"Raw Data\Individual Token Historical Data\{file}") for file in csv_files ], ignore_index=True)    
    df_all.to_csv(f"Raw Data\All Fan Tokens Price Historical Data.csv")

if __name__ == "__main__":
    club_token_df = generate_club_token_df("Raw Data\clubs_and_token_names.csv")
    token_list = club_token_df["token_name"].to_list()
    club_token_mapping = dict(club_token_df[["token_name", "club_or_organisation_name"]].values) 
    for token_name in token_list:
        download_dataset(token_name) 
    concat_and_save_all_dfs()
    