# Ray start/stop

In [None]:
ray.init(ignore_reinit_error=True)

In [None]:
import ray
ray.shutdown()

# Imports

In [1]:
%load_ext autoreload
%autoreload 2

import datetime as dt
import numpy as np
import pandas as pd
pd.set_option('float_format', '{:f}'.format)
pd.set_option('display.max_rows', 70)
pd.set_option('display.max_columns', 99)
import pandas_bokeh
pandas_bokeh.output_file("data/bokeh_output.html")

import polygon_df as pdf
import polygon_ds as pds
import polygon_s3 as ps3
import bar_samples as bs
import bar_labels as bl
import bar_meta as bm
import utils_filters as ft
from utils_pickle import pickle_dump, pickle_load

import matplotlib.pyplot as plt
import matplotlib as mpl
from matplotlib import style
style.use('fivethirtyeight')
mpl.rcParams['figure.figsize'] = [20, 7]

# import pdb; pdb.set_trace()
# import pudb; pudb.set_trace()

# df.style.background_gradient(cmap='coolwarm', axis=None, vmin=-1, vmax=1)  # 'Blues', 'coolwarm', 'RdBu_r' & 'BrBG'
# pd.set_option('precision', 2)
# .plot_bokeh(sizing_mode="scale_height")

# Sample tick dates

In [2]:
# set sampling params
symbol = 'VTI'
start_date = '2020-04-20'
end_date = '2020-04-25'

thresh = {
    # mad filter
    'mad_value_winlen': 11,
    'mad_k': 22,    
    'mad_deviation_winlen': 333,     
    # jma filter
    'jma_winlen': 7,
    'jma_power': 2,
    # bar thresholds
    'renko_return': 'jma_return',
    'renko_size': 0.11,
    'renko_reveral_multiple': 2,
    'renko_range_frac': 22,
    'max_duration_td': dt.timedelta(minutes=33),
    'min_duration_td': dt.timedelta(seconds=33),
    'min_tick_count': 33,
    # label params
    'label_reward_ratios': list(np.arange(2.5, 11, 0.5)),
}

In [None]:
# bar workflow

date = '2020-04-17'

# import ipdb; ipdb.set_trace()  # breakpoint
bar_date = bm.bar_workflow(symbol, date, thresh, add_label=True)

In [None]:
bar_date['date']

tdf = bar_date['ticks_df']
bdf = pd.DataFrame(bar_date['bars'])
# lbdf = pd.DataFrame(bar_date['labeled_bars'])

tdf[tdf.status.str.startswith('clean_open')].plot(x='utc_dt', y='jma')
# tdf[tdf.status.str.startswith('clean_open')].plot(y='jma')

bdf.plot(x='close_at', y='jma_close')
# bdf.plot(y='jma_close')

In [None]:
bdf.duplicated().sum() / bdf.shape[0]

In [None]:
abs(bdf.label_rrr).value_counts() / bdf.shape[0]

In [None]:
tdf.loc[:, 'epoch_sec'] = tdf.utc_dt.astype('int64') // 10**9

tdf.groupby('epoch_sec').count().price.describe(percentiles=[.8,.9,.99])
# bar_date['ticks_df'].groupby('epoch_sec').count().price.mean()

In [None]:
# bar dates workflow

bar_dates = bm.bar_dates_workflow(symbol, start_date, end_date, thresh, add_label=True, ray_on=True)

# fetch and build bars

# tdf = ps3.fetch_date_df(symbol, date='2020-04-02', tick_type='trades')
# bars, tdf = bm.build_bars(ticks_df, thresh)
# bdf = pd.DataFrame(bars)

2021-02-05 19:03:15,738	INFO services.py:1171 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:1111[39m[22m


[2m[36m(pid=31927)[0m VTI 2020-04-24 trying to get data from local file...
[2m[36m(pid=31930)[0m VTI 2020-04-22 trying to get data from local file...
[2m[36m(pid=31928)[0m VTI 2020-04-23 trying to get data from local file...


In [None]:
n = 0
tdf = bar_dates[n]['ticks_df']
bdf = pd.DataFrame(bar_dates[n]['bars'])
print(bar_dates[n]['date'])

# bdf.plot(y='jma_close')
bdf.set_index('close_at').tz_localize('UTC').tz_convert('America/New_York').plot(y='jma_wq50')

tdf[tdf.status.str.startswith('clean_open')].plot(x='nyc_dt', y='jma')

In [None]:
# fill daily gaps
stacked_df = bm.fill_gaps_dates(bar_dates, fill_col='jma_wmean')

# stacked stats
stats_df = bm.stacked_df_stats(stacked_df)
stats_df.describe()

stacked_df[['jma_high','jma_low', 'jma_wmean']].plot_bokeh(sizing_mode="scale_height")

In [None]:
results = []

for date in bar_dates:
    out = (date['ft_ticks_df'].status.value_counts() / date['ft_ticks_df'].shape[0]).to_dict()
    out.update({'date': date['date']})
    results.append(out)

pd.DataFrame(results)

In [None]:
# rw, outlier_ind = ft.random_walk_with_outliers(0, 1000, 0.01)
# plt.plot(np.arange(len(rw)), rw)
# plt.scatter(outlier_ind, rw[outlier_ind], c='r', label='outlier')
# plt.title('Random Walk with outliers')
# plt.xlabel('Time steps')
# plt.ylabel('Values')
# plt.legend();

from bokeh.plotting import figure, output_file, show, output_notebook
# output_notebook()

TOOLS = "pan,wheel_zoom,box_zoom,reset,save,box_select"

p1 = figure(title="Legend Example", tools=TOOLS)
p1.line(df.index, df.price)
p1.circle(df[df.mad_outlier==True].index, df[df.mad_outlier==True].price, legend_label="outlier", color="orange")

# output_file("legend.html", title="legend.py example")
show(p1, sizing_mode="scale_height")  # open a browser
