# Fetch historical cryptocurrency daily data from yahoo finance for past 5 years

In [1]:
import yfinance as yf
import pandas as pd
from datetime import datetime

# Define the list of cryptocurrencies and their Yahoo Finance tickers
cryptocurrencies = {
    'Bitcoin': 'BTC-USD',
    'Ethereum': 'ETH-USD',
    'Ripple': 'XRP-USD',
    'Litecoin': 'LTC-USD',
    'Cardano': 'ADA-USD'
}

# Define the time period (past 5 years)
end_date = datetime.now().strftime('%Y-%m-%d')
start_date = (datetime.now() - pd.DateOffset(years=5)).strftime('%Y-%m-%d')

# Create an empty DataFrame to store all the data
combined_data = pd.DataFrame()

# Function to fetch and merge data
def fetch_and_merge_data(ticker, crypto_name):
    data = yf.download(ticker, start=start_date, end=end_date, interval='1d')
    
    data['Cryptocurrency'] = crypto_name
    
    data.reset_index(inplace=True)
    
    return data


for name, ticker in cryptocurrencies.items():
    crypto_data = fetch_and_merge_data(ticker, name)
    combined_data = pd.concat([combined_data, crypto_data], ignore_index=True)


combined_data.to_csv('cryptocurrencies_combined_daily.csv', index=False)
print('Data saved to cryptocurrencies_combined_daily.csv')

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Data saved to cryptocurrencies_combined_daily.csv


In [2]:
# Load our dataset 

import os
import pandas as pd
import numpy as np
import math
import datetime as dt

maindf=pd.read_csv('cryptocurrencies_combined_daily.csv')

In [3]:
print('Total number of days present in the dataset: ',maindf.shape[0])
print('Total number of fields present in the dataset: ',maindf.shape[1])

Total number of days present in the dataset:  9135
Total number of fields present in the dataset:  8


In [4]:
maindf.shape

(9135, 8)

In [5]:
maindf.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Cryptocurrency
0,2019-08-15,10038.421875,10437.411133,9675.316406,10311.545898,10311.545898,22899115082,Bitcoin
1,2019-08-16,10319.419922,10524.349609,9855.478516,10374.338867,10374.338867,20228207096,Bitcoin
2,2019-08-17,10358.722656,10452.625,10086.698242,10231.744141,10231.744141,13778035685,Bitcoin
3,2019-08-18,10233.005859,10487.070312,10119.094727,10345.810547,10345.810547,12999813869,Bitcoin
4,2019-08-19,10350.283203,10916.053711,10313.204102,10916.053711,10916.053711,16038264603,Bitcoin


In [6]:
maindf.tail()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Cryptocurrency
9130,2024-08-10,0.348547,0.351765,0.345273,0.346289,0.346289,155114866,Cardano
9131,2024-08-11,0.346292,0.356275,0.328037,0.328322,0.328322,203353395,Cardano
9132,2024-08-12,0.328319,0.343978,0.328052,0.338977,0.338977,276146893,Cardano
9133,2024-08-13,0.338977,0.344656,0.33189,0.340154,0.340154,208538494,Cardano
9134,2024-08-14,0.340146,0.345194,0.332437,0.335455,0.335455,206602286,Cardano


In [7]:
maindf.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9135 entries, 0 to 9134
Data columns (total 8 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   Date            9135 non-null   object 
 1   Open            9135 non-null   float64
 2   High            9135 non-null   float64
 3   Low             9135 non-null   float64
 4   Close           9135 non-null   float64
 5   Adj Close       9135 non-null   float64
 6   Volume          9135 non-null   int64  
 7   Cryptocurrency  9135 non-null   object 
dtypes: float64(5), int64(1), object(2)
memory usage: 571.1+ KB


In [8]:
maindf.describe()

Unnamed: 0,Open,High,Low,Close,Adj Close,Volume
count,9135.0,9135.0,9135.0,9135.0,9135.0,9135.0
mean,6613.664178,6761.780345,6454.555634,6618.779778,6618.779778,10546870000.0
std,14826.247812,15159.29444,14462.494271,14834.795912,14834.795912,14875170000.0
min,0.023954,0.025993,0.01913,0.023961,0.023961,19588980.0
25%,0.515896,0.527335,0.501424,0.516445,0.516445,871360700.0
50%,74.911057,76.67543,72.61898,74.920151,74.920151,3260639000.0
75%,2662.135864,2753.648071,2565.137329,2662.890015,2662.890015,15989400000.0
max,73079.375,73750.070312,71334.09375,73083.5,73083.5,350967900000.0


In [9]:
#checking for null values
print('Null Values:',maindf.isnull().values.sum())

Null Values: 0


In [10]:
print('NA values:',maindf.isnull().values.any())

NA values: False


In [11]:
maindf.shape

(9135, 8)

In [12]:
sd=maindf.iloc[0][0]
ed=maindf.iloc[-1][0]


print('Starting Date',sd)
print('Ending Date',ed)

Starting Date 2019-08-15
Ending Date 2024-08-14
