# ohlcv

> Set of functions and classes used to handle data with Open, High, Low, Close and Volume

In [None]:
#| default_exp ohlc

In [None]:
#| hide
from eccore.ipython import nb_setup
from nbdev.showdoc import show_doc
from ecfintools.core import load_test_df

In [None]:
#| hide
nb_setup()

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
Set autoreload mode


In [None]:
#| export
from datetime import datetime
from pathlib import Path
from typing import Optional

import pandas as pd
from bokeh.plotting import figure, output_notebook, show

## Plotting OHLC data

In [None]:
#| export
def candlestick_plot(
    df:pd.DataFrame,      # df with datetime index, and at least following columns 'Open', 'High', 'Low', 'Close', 'Volume'
    width:int = 950,      # height of the plot figure
    height:int = 600,     # height of the plot figure
    chart_title:str = '', # title of the chart
    fig:Optional[figure] = None # figure to allow superposition of other lines on candlestick plot
    )-> None:      
    """Create a Bokeh candlestick chart using a dataframe with 'Open', 'High', 'Low', 'Close', 'Volume'."""

    if fig is None:
        p = figure(width=width, height=height, title=chart_title, x_axis_type="datetime")
    else:
        p = fig
    p.xaxis.major_label_orientation = 3.1415 / 4
    p.grid.grid_line_alpha = 0.5

    # x_axis_type as 'datetime' means that the width dimensions are measured in milliseconds
    # note: sometimes, the two first bars are not contiguous (e.g. bar[0]=Fri and bar[1]=Mon)
    # in such case, if interval(bar[1],bar[0]) > interval(contiguous bars).
    # instead of taking the first interval, we take the min interval across the full index
    intervals = df.index.unique().to_series() - df.index.unique().to_series().shift(1)
    interval_in_ms = intervals[1:-1].min().total_seconds() * 1000
    ratio = 0.60

    inc = df['Close'] > df['Open']
    dec = df['Close'] < df['Open']
    flat = df['Close'] == df['Open']

    p.segment(df[inc].index, df.loc[inc, 'High'], df[inc].index, df.loc[inc, 'Low'], color='darkgreen')
    p.segment(df[dec].index, df.loc[dec, 'High'], df[dec].index, df.loc[dec, 'Low'], color='darkred')
    p.segment(df[flat].index, df.loc[flat, 'High'], df[flat].index, df.loc[flat, 'Low'], color='black')

    p.vbar(x=df[inc].index,
           bottom=df.loc[inc, 'Open'],
           top=df.loc[inc, 'Close'],
           width=ratio * interval_in_ms,
           fill_color="darkgreen",
           line_color="darkgreen")
    p.vbar(x=df[dec].index,
           bottom=df.loc[dec, 'Close'],
           top=df.loc[dec, 'Open'],
           width=ratio * interval_in_ms,
           fill_color="darkred",
           line_color="darkred")
    p.vbar(x=df[flat].index,
           bottom=df.loc[flat, 'Close'],
           top=df.loc[flat, 'Open'],
           width=ratio * interval_in_ms,
           fill_color="black",
           line_color="black")

    if fig is None:
        show(p)


Before using the function in a notebook, you must load BokehJS, with:

In [None]:
output_notebook()

In [None]:
df = load_test_df()
display(df.head(5))
candlestick_plot(df.head(5), width=800, height=400, chart_title='Candlestick plot')

Unnamed: 0_level_0,Open,High,Low,Close,Volume
dt,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2018-10-22,2759.02,2779.27,2747.27,2754.48,26562
2018-10-23,2753.11,2755.36,2690.69,2743.45,38777
2018-10-24,2744.83,2748.58,2651.23,2672.8,41777
2018-10-25,2670.8,2722.9,2657.93,2680.71,39034
2018-10-26,2675.59,2692.34,2627.59,2663.57,61436


## Handling OLHCV data

In [None]:
def resample_ohlcv(
    df:pd.DataFrame,    # df with datetime index, and at least following columns 'Open', 'High', 'Low', 'Close'. Optional 'Volume' 
    rule_str='W-FRI'    # Time offset alias for resampling. Default: 'W-FRI'. Other commons: 'D', 'B', 'W', 'M'
    ) -> pd.DataFrame:  # df with datetime index and columns 'Open', 'High', 'Low', 'Close'. Optional 'Volume'
    """
    Resample an DataFrame with OHLCV format according to given rule string.

    The re-sampling is applied to each of the OHLC and optional V column,
    Re-sampling aggregate applies first(), max(), min(), last() and sum() to OHLCV respectively.
    Common 'rules' are: 'D', 'B', 'W', 'W-FRI', 'M'

    Offset string reference: https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#dateoffset-objects
    """

    resampled_df = df.resample(rule_str)
    resampled_index = resampled_df.groups
    # resample() returns a Re-sampler object, on which a function must be applied, e.g. first(), max(), aggregate()

    ohlcv_df = pd.DataFrame(
        {'Open': resampled_df['Open'].first(),
         'High': resampled_df['High'].max(),
         'Low': resampled_df['Low'].min(),
         'Close': resampled_df['Close'].last(),
        },
        index=resampled_index
        )

    if 'Volume' in resampled_df.count().columns:
        ohlcv_df['Volumes'] = resampled_df['Volume'].sum()

    return ohlcv_df

In [None]:
resample_ohlcv(df, rule_str='W-FRI').head(5)

Unnamed: 0,Open,High,Low,Close,Volumes
2018-10-26,2759.02,2779.27,2627.59,2663.57,207586
2018-11-02,2667.7,2766.28,2603.33,2723.76,201577
2018-11-09,2721.51,2817.01,2713.14,2778.6,118857
2018-11-16,2777.1,2794.23,2669.14,2740.15,170290
2018-11-23,2732.15,2746.53,2625.66,2630.36,132017


In [None]:
def autocorrelation_ohlcv(
    df:pd.DataFrame, # df with datetime index, with at least following one of columns 'Open', 'High', 'Low', 'Close' 
    max_lag: int = 10, # Maximum lag to consider for the autocorrelation
    ohlc:str='Close',  # Columns to use for the autocorrelation. Default: 'Close'. Options: 'Open', 'High', 'Low', 'Close'
    **kwarg
    ) -> pd.Series:
    """
    Return autocorrelation for the passed series, applied on O, H, L or O.
    """
    lag_range = range(1, max_lag+1)
    series_autocorrelation = pd.Series(0, index=lag_range, name='Autocorrelation')
    for lag in lag_range:
        series_autocorrelation.loc[lag] = df[ohlc].autocorr(lag)

    return series_autocorrelation

In [None]:
autocorrelation_ohlcv(df, max_lag=5, ohlc='Open')

1    0.948029
2    0.884840
3    0.826346
4    0.764883
5    0.713426
Name: Autocorrelation, dtype: float64

In [None]:
#| hide
import nbdev
nbdev.nbdev_export()