In [2]:
import pandas as pd 
import numpy as np
import re
from unidecode import unidecode
import os
pd.options.display.max_rows = 20

## Load the token_prices DataFrames from data mined previously 

In [3]:
df_token_prices_raw = pd.read_csv("Raw Data/All Fan Tokens Price Historical Data.csv",
                                 delimiter = ",", usecols = list(range(2,10)))
df_clubs_token_names = pd.read_csv("Raw Data/clubs_and_token_names.csv",
                                   delimiter = ",")

In [4]:
df_token_prices_raw 

Unnamed: 0,Club_Name,Token_Name,Date,Open,High,Low,Close,Volume
0,AC Milan Fan Token,ACM8538-USD,2021-02-23,6.434881,7.215720,4.278654,5.944448,0.0
1,AC Milan Fan Token,ACM8538-USD,2021-02-24,5.936040,24.811480,5.487044,18.385660,202569267.0
2,AC Milan Fan Token,ACM8538-USD,2021-02-25,17.907387,21.777693,14.779701,14.827024,103583152.0
3,AC Milan Fan Token,ACM8538-USD,2021-02-26,14.754595,16.035860,11.895096,12.321579,17046412.0
4,AC Milan Fan Token,ACM8538-USD,2021-02-27,12.336160,14.357585,12.148839,12.433650,10146625.0
...,...,...,...,...,...,...,...,...
45707,Young Boys Fan Token,YBO-USD,2023-05-03,0.501151,0.510026,0.474290,0.499208,21902.0
45708,Young Boys Fan Token,YBO-USD,2023-05-04,0.499161,0.509820,0.484603,0.494228,21723.0
45709,Young Boys Fan Token,YBO-USD,2023-05-05,0.494231,0.503893,0.471165,0.483334,20773.0
45710,Young Boys Fan Token,YBO-USD,2023-05-06,,,,,


In [5]:
df_token_prices_raw.dtypes

Club_Name      object
Token_Name     object
Date           object
Open          float64
High          float64
Low           float64
Close         float64
Volume        float64
dtype: object

In [6]:
df_clubs_token_names

Unnamed: 0,token_name;club_or_organisation_name
0,ACM8538-USD;AC Milan Fan Token
1,ADANA-USD;Adanaspor Fan Token
2,AFC11532-USD;Arsenal Fan Token
3,AFYON-USD;Afyonspor Fan Token
4,ALA12649-USD;Alanyaspor Fan Token
...,...
71,UFC11533-USD;UFC Fan Token
72,VATRENI-USD;Croatian FF Fan Token
73,VCF-USD;Valencia CF Fan Token
74,VIT-USD;Team Vitality Fan Token


In [7]:
df_clubs_token_names.dtypes

token_name;club_or_organisation_name    object
dtype: object

In [10]:
df_token_prices_raw.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 45712 entries, 0 to 45711
Data columns (total 8 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   Club_Name   45712 non-null  object 
 1   Token_Name  45712 non-null  object 
 2   Date        45712 non-null  object 
 3   Open        45495 non-null  float64
 4   High        45495 non-null  float64
 5   Low         45495 non-null  float64
 6   Close       45495 non-null  float64
 7   Volume      45495 non-null  float64
dtypes: float64(5), object(3)
memory usage: 2.8+ MB


In [11]:
df_token_prices_raw.isnull().any()

Club_Name     False
Token_Name    False
Date          False
Open           True
High           True
Low            True
Close          True
Volume         True
dtype: bool

## Clean the Club Names from "Fan Token" or "Token" substrings

In [8]:
def clean_club_names(club_name):
    substring1 = "Fan Token"
    substring2 = "Token"
    if substring1 in club_name:
        return club_name.replace(" Fan Token", "")
    elif substring2 in club_name:
        return club_name.replace(" Token", "")
    else:
        return club_name

df_token_prices_raw.Club_Name = df_token_prices_raw.Club_Name.transform(lambda x: clean_club_names(x))

In [9]:
df_clubs_token_names

Unnamed: 0,token_name;club_or_organisation_name
0,ACM8538-USD;AC Milan Fan Token
1,ADANA-USD;Adanaspor Fan Token
2,AFC11532-USD;Arsenal Fan Token
3,AFYON-USD;Afyonspor Fan Token
4,ALA12649-USD;Alanyaspor Fan Token
...,...
71,UFC11533-USD;UFC Fan Token
72,VATRENI-USD;Croatian FF Fan Token
73,VCF-USD;Valencia CF Fan Token
74,VIT-USD;Team Vitality Fan Token


## Filtering the token owning teams

In [14]:
df_token_prices_raw.dtypes

Club_Name      object
Token_Name     object
Date           object
Open          float64
High          float64
Low           float64
Close         float64
Volume        float64
dtype: object

In [15]:
# Convert the date column to datetime dtype
df_token_prices_raw["Date"] = pd.to_datetime(df_token_prices_raw["Date"], 
                                             format = "%Y-%m-%d", 
                                             yearfirst = True)

# Add 3 additional columns from Date => Day, Month, Year
df_token_prices_raw["Day"] = df_token_prices_raw["Date"].dt.day
df_token_prices_raw["Month"] = df_token_prices_raw["Date"].dt.month
df_token_prices_raw["Year"] = df_token_prices_raw["Date"].dt.year

# reorder the columns
cols = list(df_token_prices_raw.columns.values)
df_token_prices_raw1 = df_token_prices_raw[cols[0:3] + cols[8:11] + cols[3:8]]

In [16]:
# R
df_token_prices_raw1.dtypes

Club_Name             object
Token_Name            object
Date          datetime64[ns]
Day                    int64
Month                  int64
Year                   int64
Open                 float64
High                 float64
Low                  float64
Close                float64
Volume               float64
dtype: object

In [17]:
# find the smallest Date in the DataFrame
df_token_prices_raw1.Date.min()

Timestamp('2018-05-07 00:00:00')

In [40]:
# find the highest Date in the DataFrame
df_token_prices_raw1.Date.max()

Timestamp('2023-05-07 00:00:00')

In [41]:
# find the number of unique clubs in the DataFrame
len(df_token_prices_raw1.Club_Name.unique())

76

In [42]:
# Adjust some of the club names needed for further data merging.
df_token_prices_raw1.loc[df_token_prices_raw1["Club_Name"] == "Fatih Karag?mr?k SK", "Club_Name"] = "Fatih Karagumruk SK"
df_token_prices_raw1.loc[df_token_prices_raw1["Club_Name"] == "Fenerbah?e", "Club_Name"] = "Fenerbahce"

In [44]:
df_token_prices_raw1.Club_Name.unique()

array(['AC Milan', 'Adanaspor', 'Arsenal', 'Afyonspor', 'Alanyaspor',
       'Alliance', 'Alpine F1 Team', 'Aston Martin Cognizant',
       'Apollon Limassol', 'Argentine Football Association', 'AS Roma',
       'Atletico De Madrid', 'Aston Villa', 'FC Barcelona', 'Bodrumspor',
       'Brazil National Football Team', 'Baskonia',
       'Club Atletico Independiente', 'Manchester City',
       'Deportivo Alaves', 'Davis Cup', 'Dinamo Zagreb', 'Everton',
       'RCD Espanyol', 'Fenerbahce', 'Fatih Karagumruk SK',
       'Fortuna Sittard', 'Peruvian National Football Team',
       'Galatasaray', 'Clube Atletico Mineiro', 'Genclerbirligi',
       'Goztepe S.K.', 'Istanbul Basaksehir FK', 'Inter Milan',
       'Italian National Football Team', 'Juventus', 'Kocaelispor',
       'S.S. Lazio', 'Legia Warsaw', 'Levante U.D.', 'Leeds United',
       'McLaren F1', 'Flamengo', 'Millonarios FC', 'MotoGP', 'MIBR',
       'MXGP', 'Napoli', 'Natus Vincere', 'Novara Calcio', 'OG',
       'Professional F

In [18]:
# here are the teams and championships that are important for this analysis

teams_of_interest = ["AC Milan", "Arsenal", "Alanyaspor", 
                     "AS Roma", "Atletico De Madrid", "Aston Villa", 
                     "FC Barcelona", "Manchester City", 
                     "Everton", "RCD Espanyol", "Fenerbahce", 
                     "Fatih Karagumruk SK", "Galatasaray", "Inter Milan", 
                     "Juventus", "S.S. Lazio", "Leeds United", 
                     "Napoli", "FC Porto", "Paris Saint-Germain", "Valencia CF"]

championships = ["Premier League", "La Liga", "Serie A", 
                 "Ligue 1", "Fußball-Bundesliga", "Eredivisie", 
                 "Primeira Liga", "UEFA Champions League", "UEFA Europa League", 
                 "Süper Lig", "Belgian First Division A"]

needed_championships = ["Premier League", "La Liga", "Serie A", 
                        "Ligue 1", "Primeira Liga", "Süper Lig",      
                        "UEFA Champions League", "UEFA Europa League"]


len(teams_of_interest)

21

## Save the data as csv file

In [48]:
df_token_prices_raw1.to_csv("Datasets for Merging/token_prices_dataset.csv", index = False)