In [29]:
import os
import pandas as pd
from ta import add_all_ta_features
from ta.utils import dropna
import numpy as np
from matplotlib import pyplot as plt
pd.options.display.max_rows = 100
import time
from datetime import datetime
import requests
import json

# 1. Getting old data

In [6]:
DATA_DIR = 'ExtrTAdata'
DSET_NAME = 'NVDA_1_min_corr_extrs_unique.csv'
main_df = pd.read_csv(os.path.join(DATA_DIR, DSET_NAME), index_col=0)

In [36]:
main_df.tail(1).iloc[:, :15]

Unnamed: 0,mon,wd,d,hr,min,v,o,c,h,l,t,volume_adi,volume_obv,volume_cmf,volume_fi
306140,1,1,8,3,49,786.0,248.8,248.8,248.8,248.8,1644281000000.0,385231300.0,569902520.0,-0.329566,-59.140043


# 2. Adding last data

In [11]:
dt = main_df.iloc[-1, :5].values.tolist()
dt

[1.0, 1.0, 8.0, 3.0, 49.0]

In [23]:
def back_to_timestamp(arg):
    y = 2022
    mon = int(arg[0] + 1)
    d, h, m = list(map(int, arg[2:]))
    dt = datetime(y, mon, d, h, m)
    ts = int(time.mktime(dt.timetuple()) * 1000)
    return ts

In [30]:
api_key = 'pzLXmxJHiwev3vsFwSDperHvKtbxgBRP'
ticker = 'NVDA'
interval = 1
start_time = back_to_timestamp(dt) + 60000
end_time = int(time.time() * 1000)
output = None
while True:
    res = requests.get(f'https://api.polygon.io/v2/aggs/ticker/{ticker}/range/{interval}' + \
                       f'/minute/{start_time}/{end_time}?limit=50000&apiKey={api_key}')
    res_json = json.loads(res.content)
    if len(res_json['results']) == 1:
        break
    if output is None:
        output = res_json
    else:
        output['results'].extend(res_json['results'])
    print('Loaded:', time.ctime(res_json['results'][0]['t'] / 1000), ',', 
          time.ctime(res_json['results'][-1]['t'] / 1000), '| length:', len(output['results']))
    start_time = res_json['results'][-1]['t']
    time.sleep(15)

print(f'{ticker} data successfully loaded!')
print('-----------------------------------')
df = pd.DataFrame(output['results'])
last_data = df.drop_duplicates()
last_data = data.reindex(np.arange(data.shape[0]))

Loaded: Tue Feb  8 03:50:00 2022 , Sat Mar 12 03:59:00 2022 | length: 18203
NVDA data successfully loaded!
-----------------------------------


In [33]:
def month_to_number(arg):
    months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 
              'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
    if type(arg)!=str:
        res = [months.index(val) for val in arg]
    else:
        res = months.index(arg)
    return res

def weekday_to_number(arg):
    wdays = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat']
    if type(arg)!=str:
        res = [wdays.index(val) for val in arg]
    else:
        res = wdays.index(arg)
    return res

def parse_timestamp(arg):
    full_date = time.ctime(arg / 1000).split()
    weekday = weekday_to_number(full_date[0])
    month = month_to_number(full_date[1])
    day = full_date[2]
    h, m = full_date[3].split(':')[:-1]
    return [month, weekday, int(day), int(h), int(m)]

def add_datetime_to_df(df):
    new_columns = ['mon', 'wd', 'd', 'hr', 'min']
    for i, col in enumerate(new_columns):
        df.insert(i, col, np.zeros((df.shape[0])))
    df[new_columns] = [parse_timestamp(cell) for cell in df['t']]

In [34]:
add_datetime_to_df(last_data)

In [37]:
last_data.tail()

Unnamed: 0,mon,wd,d,hr,min,v,vw,o,c,h,l,t,n
18198,2,5,12,3,55,351.0,221.0781,221.08,221.08,221.08,221.08,1647046500000,9
18199,2,5,12,3,56,385.0,221.0976,221.01,221.01,221.01,221.01,1647046560000,14
18200,2,5,12,3,57,326.0,221.0431,221.0,221.0,221.0,221.0,1647046620000,16
18201,2,5,12,3,58,663.0,221.0279,221.1,221.0,221.1,221.0,1647046680000,10
18202,2,5,12,3,59,1434.0,221.0075,221.1,221.0,221.1,220.8,1647046740000,18


# 3. Getting real-time data 

In [97]:
import yfinance as yf
TK = yf.Ticker('NVDA')

In [102]:
start_time = last_data.t.values[-1]
delta = round((time.time() - start_time / 1000) / 60)

In [105]:
start_time

1647046740000

In [135]:
df = yf.download('NVDA', period='3d', interval='1m')

[*********************100%***********************]  1 of 1 completed


In [136]:
df.index = df.index.tz_convert('Europe/Moscow')

In [137]:
df.tail()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2022-03-14 22:56:00+03:00,213.369995,213.75,213.360107,213.554993,213.554993,168402
2022-03-14 22:57:00+03:00,213.570007,213.570007,213.229996,213.460007,213.460007,133085
2022-03-14 22:58:00+03:00,213.460007,213.740005,213.380005,213.581207,213.581207,218680
2022-03-14 22:59:00+03:00,213.600006,213.610001,213.009995,213.190002,213.190002,431198
2022-03-14 23:00:00+03:00,213.300003,213.300003,213.300003,213.300003,213.300003,0


In [138]:
df.index

DatetimeIndex(['2022-03-10 17:30:00+03:00', '2022-03-10 17:31:00+03:00',
               '2022-03-10 17:32:00+03:00', '2022-03-10 17:33:00+03:00',
               '2022-03-10 17:34:00+03:00', '2022-03-10 17:35:00+03:00',
               '2022-03-10 17:36:00+03:00', '2022-03-10 17:37:00+03:00',
               '2022-03-10 17:38:00+03:00', '2022-03-10 17:39:00+03:00',
               ...
               '2022-03-14 22:51:00+03:00', '2022-03-14 22:52:00+03:00',
               '2022-03-14 22:53:00+03:00', '2022-03-14 22:54:00+03:00',
               '2022-03-14 22:55:00+03:00', '2022-03-14 22:56:00+03:00',
               '2022-03-14 22:57:00+03:00', '2022-03-14 22:58:00+03:00',
               '2022-03-14 22:59:00+03:00', '2022-03-14 23:00:00+03:00'],
              dtype='datetime64[ns, Europe/Moscow]', name='Datetime', length=1170, freq=None)

In [139]:
df['t'] = df.index.values.astype(np.int64) // 10 ** 6

In [140]:
real_time_data = df[df['t'] >= start_time]
real_time_data

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,t
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2022-03-14 16:30:00+03:00,219.309998,219.320007,219.270004,219.285004,219.285004,859534,1647264600000
2022-03-14 16:31:00+03:00,219.059998,219.490005,218.210098,219.466003,219.466003,101083,1647264660000
2022-03-14 16:32:00+03:00,219.360001,219.520004,216.770004,217.090103,217.090103,233027,1647264720000
2022-03-14 16:33:00+03:00,217.133102,217.960007,216.320007,216.488007,216.488007,145346,1647264780000
2022-03-14 16:34:00+03:00,216.570007,217.429993,216.350006,216.869995,216.869995,133000,1647264840000
...,...,...,...,...,...,...,...
2022-03-14 22:56:00+03:00,213.369995,213.750000,213.360107,213.554993,213.554993,168402,1647287760000
2022-03-14 22:57:00+03:00,213.570007,213.570007,213.229996,213.460007,213.460007,133085,1647287820000
2022-03-14 22:58:00+03:00,213.460007,213.740005,213.380005,213.581207,213.581207,218680,1647287880000
2022-03-14 22:59:00+03:00,213.600006,213.610001,213.009995,213.190002,213.190002,431198,1647287940000
