In [248]:
import warnings
warnings.filterwarnings("ignore")

import yfinance as yf
import pandas as pd

In [249]:
!pip install plotly -q

import plotly.graph_objects as go

def linear_plot(df, title):
    fig = go.Figure([go.Scatter(x=df['date'], y=df['close'], mode='lines')])
    fig.update_layout(plot_bgcolor='white', 
                      xaxis_title='Date',
                      yaxis_title='Price',
                      title=title)
    fig.show()

def candlestick_plot(df, title):
    fig = go.Figure([go.Candlestick(x=df['date'], 
                                open=df['open'],
                                high=df['high'],
                                low=df['low'],
                                close=df['close'])])
    fig.update_layout(xaxis_rangeslider_visible=False,
                      plot_bgcolor='white',
                      xaxis_title='Date',
                      yaxis_title='Price',
                      title=title)
    fig.update_yaxes(fixedrange=False)
    fig.show()
    

In [250]:
!pip install yfinance -q

df = yf.download("AAPL", start="2024-01-01", interval="1h").drop(columns=['Adj Close'])
df.head()

[*********************100%***********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Volume
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2024-01-02 09:30:00-05:00,187.800995,188.440002,186.009995,186.189896,21173291
2024-01-02 10:30:00-05:00,186.179993,186.904999,185.184296,186.804993,9010595
2024-01-02 11:30:00-05:00,186.800003,187.350006,186.570007,186.615005,6143754
2024-01-02 12:30:00-05:00,186.615005,187.039993,185.259995,185.294998,5381701
2024-01-02 13:30:00-05:00,185.289993,185.539993,184.229996,184.494995,7228643


In [251]:
df.shape

(1172, 5)

In [252]:
df.dtypes

Open      float64
High      float64
Low       float64
Close     float64
Volume      int64
dtype: object

In [253]:
df.isnull().sum()

Open      0
High      0
Low       0
Close     0
Volume    0
dtype: int64

In [254]:
df.describe()

Unnamed: 0,Open,High,Low,Close,Volume
count,1172.0,1172.0,1172.0,1172.0,1172.0
mean,194.362858,194.979136,193.711068,194.378666,7028475.0
std,20.549272,20.675305,20.387324,20.55223,5219153.0
min,164.369995,165.160004,164.080002,164.362396,0.0
25%,176.368755,176.968754,175.906254,176.413776,4067693.0
50%,188.787498,189.025002,188.295547,188.790001,5419038.0
75%,214.398746,215.567497,213.452503,214.365002,8098706.0
max,236.531998,237.229996,234.619995,235.600006,56496060.0


In [255]:
!pip install summarytools -q

from summarytools import dfSummary

dfSummary(df)

No,Variable,Stats / Values,Freqs / (% of Valid),Graph,Missing
1,Open [float64],Mean (sd) : 194.4 (20.5) min < med < max: 164.4 < 188.8 < 236.5 IQR (CV) : 38.0 (9.5),"1,093 distinct values",,0 (0.0%)
2,High [float64],Mean (sd) : 195.0 (20.7) min < med < max: 165.2 < 189.0 < 237.2 IQR (CV) : 38.6 (9.4),"1,076 distinct values",,0 (0.0%)
3,Low [float64],Mean (sd) : 193.7 (20.4) min < med < max: 164.1 < 188.3 < 234.6 IQR (CV) : 37.5 (9.5),"1,092 distinct values",,0 (0.0%)
4,Close [float64],Mean (sd) : 194.4 (20.6) min < med < max: 164.4 < 188.8 < 235.6 IQR (CV) : 38.0 (9.5),"1,111 distinct values",,0 (0.0%)
5,Volume [int64],Mean (sd) : 7028475.1 (5219152.6) min < med < max: 0.0 < 5419037.5 < 56496065.0 IQR (CV) : 4031013.2 (1.3),"1,170 distinct values",,0 (0.0%)


In [256]:
df.columns = df.columns.str.lower()
df.insert(0, "date", pd.to_datetime(df.index).tz_convert(tz=None))
df = df.sort_values(by='date')
df = df.drop_duplicates(subset='date').reset_index(drop=True)
df.to_csv('AAPL_1h.csv')
df

Unnamed: 0,date,open,high,low,close,volume
0,2024-01-02 14:30:00,187.800995,188.440002,186.009995,186.189896,21173291
1,2024-01-02 15:30:00,186.179993,186.904999,185.184296,186.804993,9010595
2,2024-01-02 16:30:00,186.800003,187.350006,186.570007,186.615005,6143754
3,2024-01-02 17:30:00,186.615005,187.039993,185.259995,185.294998,5381701
4,2024-01-02 18:30:00,185.289993,185.539993,184.229996,184.494995,7228643
...,...,...,...,...,...,...
1167,2024-08-30 15:30:00,228.220001,228.875000,227.550003,228.029999,3257176
1168,2024-08-30 16:30:00,228.020203,228.699997,227.479996,228.149994,2683350
1169,2024-08-30 17:30:00,228.134995,228.315002,227.660004,227.990005,2414533
1170,2024-08-30 18:30:00,228.000000,229.050003,227.949997,229.039993,3332557


In [257]:
df['date'].diff().value_counts()

date
0 days 01:00:00    1004
0 days 18:00:00     131
2 days 18:00:00      29
3 days 18:00:00       4
2 days 17:00:00       1
1 days 18:00:00       1
1 days 22:00:00       1
Name: count, dtype: int64

In [258]:
linear_plot(df, 'Apple')

In [259]:
candlestick_plot(df, 'Apple')

In [260]:
!pip install ccxt -q

import ccxt
import time

exchange = ccxt.binance()
symbol = 'BTC/USD'
timeframe = '1h'
since = exchange.parse8601('2024-01-01T00:00:00Z')
all_ohlcvs = []

while True:
    try:
        ohlcvs = exchange.fetch_ohlcv(symbol, timeframe, since)
        all_ohlcvs += ohlcvs
        if len(ohlcvs):
            print('Fetched', len(ohlcvs), symbol, timeframe, 'candles from', exchange.iso8601(ohlcvs[0][0]))
            since = ohlcvs[-1][0] + 1
            sleep_interval = exchange.rateLimit / 1000
            print('Sleep for', sleep_interval)
            time.sleep(sleep_interval)
        else:
            break
    except Exception as e:
        print(type(e).__name__, str(e))
print('Fetched', len(all_ohlcvs), symbol, timeframe, 'candles in total')

Fetched 500 BTC/USD 1h candles from 2024-01-01T00:00:00.000Z
Sleep for 0.05
Fetched 500 BTC/USD 1h candles from 2024-01-21T20:00:00.000Z
Sleep for 0.05
Fetched 500 BTC/USD 1h candles from 2024-02-11T16:00:00.000Z
Sleep for 0.05
Fetched 500 BTC/USD 1h candles from 2024-03-03T12:00:00.000Z
Sleep for 0.05
Fetched 500 BTC/USD 1h candles from 2024-03-24T08:00:00.000Z
Sleep for 0.05
Fetched 500 BTC/USD 1h candles from 2024-04-14T04:00:00.000Z
Sleep for 0.05
Fetched 500 BTC/USD 1h candles from 2024-05-05T00:00:00.000Z
Sleep for 0.05
Fetched 500 BTC/USD 1h candles from 2024-05-25T20:00:00.000Z
Sleep for 0.05
Fetched 500 BTC/USD 1h candles from 2024-06-15T16:00:00.000Z
Sleep for 0.05
Fetched 500 BTC/USD 1h candles from 2024-07-06T12:00:00.000Z
Sleep for 0.05
Fetched 500 BTC/USD 1h candles from 2024-07-27T08:00:00.000Z
Sleep for 0.05
Fetched 380 BTC/USD 1h candles from 2024-08-17T04:00:00.000Z
Sleep for 0.05
Fetched 5880 BTC/USD 1h candles in total


In [261]:
df = pd.DataFrame(all_ohlcvs)
df.columns = ['date','open','high','low','close','volume']
df = df.sort_values(by='date')
df = df.drop_duplicates(subset='date').reset_index(drop=True)
df['date'] = pd.to_datetime(df['date'], unit='ms')
df.to_csv('BTC_USD_1h.csv')
df

Unnamed: 0,date,open,high,low,close,volume
0,2024-01-01 00:00:00,42305.5,42585.1,42282.7,42494.9,1182.373496
1,2024-01-01 01:00:00,42494.7,42806.7,42450.6,42633.9,1286.649097
2,2024-01-01 02:00:00,42633.9,42659.9,42529.5,42612.3,553.320897
3,2024-01-01 03:00:00,42612.3,42617.1,42249.9,42356.2,1113.022829
4,2024-01-01 04:00:00,42356.2,42430.1,42233.8,42425.6,782.030291
...,...,...,...,...,...,...
5875,2024-09-01 19:00:00,58508.0,58740.5,58257.1,58481.7,666.598940
5876,2024-09-01 20:00:00,58481.7,58623.9,58348.2,58398.4,241.111758
5877,2024-09-01 21:00:00,58394.1,58399.6,58060.5,58313.0,143.663194
5878,2024-09-01 22:00:00,58313.1,58380.0,57165.2,57270.1,2053.329803


In [262]:
df.shape

(5880, 6)

In [263]:
df.dtypes

date      datetime64[ns]
open             float64
high             float64
low              float64
close            float64
volume           float64
dtype: object

In [264]:
df.isnull().sum()

date      0
open      0
high      0
low       0
close     0
volume    0
dtype: int64

In [265]:
df.describe()

Unnamed: 0,date,open,high,low,close,volume
count,5880,5880.0,5880.0,5880.0,5880.0,5880.0
mean,2024-05-02 11:29:59.999999744,60008.714354,60240.190255,59759.19,60011.22682,1831.793025
min,2024-01-01 00:00:00,38723.0,38850.0,38504.2,38723.8,66.890633
25%,2024-03-02 05:45:00,56447.075,56753.05,56152.925,56447.15,640.213941
50%,2024-05-02 11:30:00,62811.9,63060.4,62539.3,62811.85,1142.389604
75%,2024-07-02 17:15:00,66910.55,67147.9,66674.075,66910.325,2107.339675
max,2024-09-01 23:00:00,73712.4,73915.3,73312.0,73712.4,41310.160827
std,,9047.137354,9078.750082,9011.913479,9044.244216,2385.062005


In [266]:
dfSummary(df)

No,Variable,Stats / Values,Freqs / (% of Valid),Graph,Missing
1,date [datetime64[ns]],Min: 2024-01-01 Max: 2024-09-01 Duration: 244 days,5880 distinct values,,0 (0.0%)
2,open [float64],Mean (sd) : 60008.7 (9047.1) min < med < max: 38723.0 < 62811.9 < 73712.4 IQR (CV) : 10463.5 (6.6),"5,715 distinct values",,0 (0.0%)
3,high [float64],Mean (sd) : 60240.2 (9078.8) min < med < max: 38850.0 < 63060.4 < 73915.3 IQR (CV) : 10394.8 (6.6),"5,676 distinct values",,0 (0.0%)
4,low [float64],Mean (sd) : 59759.2 (9011.9) min < med < max: 38504.2 < 62539.3 < 73312.0 IQR (CV) : 10521.1 (6.6),"5,671 distinct values",,0 (0.0%)
5,close [float64],Mean (sd) : 60011.2 (9044.2) min < med < max: 38723.8 < 62811.8 < 73712.4 IQR (CV) : 10463.2 (6.6),"5,708 distinct values",,0 (0.0%)
6,volume [float64],Mean (sd) : 1831.8 (2385.1) min < med < max: 66.9 < 1142.4 < 41310.2 IQR (CV) : 1467.1 (0.8),"5,880 distinct values",,0 (0.0%)


In [267]:
linear_plot(df, 'BTC/USD')

In [268]:
candlestick_plot(df, 'BTC/USD')

In [269]:
df['date'].diff().value_counts()

date
0 days 01:00:00    5879
Name: count, dtype: int64