In [None]:
import volstreet.datamodule as dm
import plotly.express as px
from sklearn.linear_model import LinearRegression
import pandas as pd
import numpy as np
from scipy.optimize import curve_fit
from datetime import time, datetime, timedelta
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import volstreet as vs

In [None]:
# Using DataClient class
client = dm.DataClient(api_key=__import__('os').environ['EOD_API_KEY'])
kite_user = __import__('os').environ['KITE_USER']
kite_pass = __import__('os').environ['KITE_PASS']
kite_api_key = __import__('os').environ['KITE_API_KEY']
kite_api_secret = __import__('os').environ['KITE_API_SECRET']
kite_auth_key = __import__('os').environ['KITE_AUTH_KEY']

In [None]:
# Using get_data and analyser functions
nifty_data = client.get_data(symbol='NIFTY')
bnf_data = client.get_data(symbol='BANKNIFTY')
finnifty_data = client.get_data(symbol='FINNIFTY')
nifty_daily_data = dm.analyser(nifty_data, frequency='D')
bnf_daily_data = dm.analyser(bnf_data, frequency='D')
nifty_weekly_data = dm.analyser(nifty_data, frequency='W-THU')
bnf_weekly_data = dm.analyser(bnf_data, frequency='W-THU')
nifty_monthly_data = dm.analyser(nifty_data, frequency='M-THU')
bnf_monthly_data = dm.analyser(bnf_data, frequency='M-THU')

vix = client.get_data("VIX", return_columns=["open", "close"])
vix = vix.resample("B").ffill()

In [None]:
# Using ratio_analysis function
rolling_periods = 5
ratio_data = dm.ratio_analysis(bnf_weekly_data, nifty_weekly_data, add_rolling=rolling_periods)
px.line(ratio_data, x=ratio_data.index, y=['BANKNIFTY Change', 'NIFTY Change', f'Rolling {rolling_periods} Ratio'], color_discrete_map={'BANKNIFTY Change': 'red', 'NIFTY Change': 'blue', f'Rolling {rolling_periods} Ratio': 'green'})

In [None]:
ratio_data

In [None]:
# Using gambler function for NIFTY and BANKNIFTY
for index in ['NIFTY', 'BANKNIFTY']:
    print(f'{index}\n')
    data = client.get_data(symbol=index)
    for frequency in ['D', 'D-THU', 'W-THU', 'M-THU']:
        print(f'{frequency}\n')
        dm.gambler(data, frequency, 'abs_change')

In [None]:
# Using gambler function for FINNIFTY
for index in ['FINNIFTY']:
    print(f'{index}\n')
    data = client.get_data(symbol=index)
    for frequency in ['D', 'D-TUE', 'W-TUE', 'M-TUE']:
        print(f'{frequency}\n')
        dm.gambler(data, frequency, 'abs_change')

In [None]:
# Rolling average of absolute change to support gambler function
analysed_df = dm.analyser(finnifty_data, frequency='W-tue')
rolling_periods = 5
analysed_df['rolling'] = analysed_df['abs_change'].rolling(rolling_periods, min_periods=1).mean()
#fig = px.line(analysed_df, x=analysed_df.index, y='rolling')
#fig.add_hline(y=analysed_df['abs_change'].mean(), line_dash="dot", annotation_text="Mean", #annotation_position="top right")

In [None]:
analysed_df

# One min data

In [None]:
kite_obj = dm.get_greenlit_kite(kite_api_key, kite_api_secret, kite_user, kite_pass, kite_auth_key)

In [None]:
# Updating one min data for NIFTY 50, NIFTY BANK and NIFTY FIN SERVICE
dm.get_1m_data(kite_obj, 'NIFTY 50', path='data\\')
dm.get_1m_data(kite_obj, 'NIFTY BANK', path='data\\')
dm.get_1m_data(kite_obj, 'NIFTY FIN SERVICE', path='data\\')
dm.get_1m_data(kite_obj, 'NIFTY MID SELECT', path='data\\')

In [None]:
dm.get_constituent_1m_data(kite_obj, 'NIFTY', path='data\\')

In [None]:
nifty_onemin = pd.read_csv('data/NIFTY 50_onemin_prices.csv', index_col=0, parse_dates=True)
bnf_onemin = pd.read_csv('data/NIFTY BANK_onemin_prices.csv', index_col=0, parse_dates=True)
fin_onemin = pd.read_csv('data/NIFTY FIN SERVICE_onemin_prices.csv', index_col=0, parse_dates=True)
midcp_onemin = pd.read_csv('data/NIFTY MID SELECT_onemin_prices.csv', index_col=0, parse_dates=True)

# Intraday Trend

In [None]:
trend_nifty = dm.backtest_intraday_trend(nifty_onemin, vix, beta = 0.8, rolling_days=90, max_entries=10)
trend_bnf = dm.backtest_intraday_trend(bnf_onemin, vix, beta = 0.8, rolling_days=90, max_entries=10)
trend_finnifty = dm.backtest_intraday_trend(fin_onemin, vix, beta = 0.8, rolling_days=90, max_entries=10)

In [None]:
all_indices_returns = trend_bnf.merge(trend_finnifty, left_index=True, right_index=True, suffixes=('_bnf', '_finnifty')).merge(trend_nifty, left_index=True, right_index=True, suffixes=('', '_nifty'))
# Selecting only the columns of interest (minute vol, open to close trend, returns)
all_indices_returns.filter(regex='returns').sum(axis=1)

In [None]:
trend_bnf

In [None]:
# Year wise summary of returns
df_to_sum = trend_nifty
df_to_sum.groupby(df_to_sum.index.year).total_returns.sum()

In [None]:
# Plotting the distribution of returns for various entries
df_to_plot = trend_finnifty
returns_1 = df_to_plot.trade_data.apply(lambda x: x.get('entry_1', {}).get('returns', np.nan)).dropna()
returns_2 = df_to_plot.trade_data.apply(lambda x: x.get('entry_2', {}).get('returns', np.nan)).dropna()
returns_3 = df_to_plot.trade_data.apply(lambda x: x.get('entry_3', {}).get('returns', np.nan)).dropna()

fig = go.Figure()
fig.add_trace(go.Histogram(x=returns_1, name='Entry 1', nbinsx=10))
fig.add_trace(go.Histogram(x=returns_2, name='Entry 2', nbinsx=10))
fig.add_trace(go.Histogram(x=returns_3, name='Entry 3', nbinsx=10))
fig.update_layout(barmode='overlay')
fig.update_traces(opacity=0.75)
fig.show()

In [None]:
# Plotting the distribution of the ratio for stop loss and no stop loss days for all indices
all_indices_with_drivers = pd.concat([trend_nifty, trend_bnf, trend_finnifty])
all_indices_with_drivers_stop_loss =  all_indices_with_drivers[(all_indices_with_drivers.total_returns <= 0)]
all_indices_with_drivers_no_stop_loss =  all_indices_with_drivers[(all_indices_with_drivers.total_returns > 0)]
fig = px.histogram(all_indices_with_drivers_stop_loss, x='ratio')
fig.add_trace(go.Histogram(x=all_indices_with_drivers_no_stop_loss.ratio, name='No Stop Loss'))
fig.update_layout(barmode='overlay')
fig.update_traces(opacity=0.75)

In [None]:
# Plotting the rolling ratio of three indices
df_to_plot = all_indices_returns
fig = go.Figure()
fig.add_trace(go.Line(x=df_to_plot.index, y=df_to_plot['rolling_ratio_bnf'], name='BNF'))
fig.add_trace(go.Line(x=df_to_plot.index, y=df_to_plot['rolling_ratio_finnifty'], name='Fin Nifty'))
fig.add_trace(go.Line(x=df_to_plot.index, y=df_to_plot['rolling_ratio'], name='Nifty'))
fig.update_layout(barmode='overlay')
fig.update_traces(opacity=0.75)
fig.show()

In [None]:
# Plotting the minute vol and open to close trend on different y axis
df_to_plot = trend_finnifty
fig = make_subplots(specs=[[{"secondary_y": True}]])
fig.add_trace(go.Line(x=df_to_plot.index, y=df_to_plot['rolling_ratio'], name='Ratio'), secondary_y=True)
fig.add_trace(go.Line(x=df_to_plot.index, y=df_to_plot['strat_nav'], name='Nav'), secondary_y=False)

In [None]:
# Trying different beta values
for beta in range(80, 105, 5):
    beta = beta/100
    trend_nifty = dm.backtest_intraday_trend(nifty_onemin, vix, beta = beta)
    print(f'Beta: {beta}')
    print(f'NIFTY: {trend_nifty["total_returns"].sum()}')

In [None]:
# If random strategy is truly profitable then which are the best parameters
results = {}
for open_candle in range(0, 10):
    results[open_candle] = []
    for _ in range(10):
        bnf_random = dm.backtest_intraday_trend(bnf_onemin, vix, open_nth = open_candle, randomize=True)
        total_rets = bnf_random.total_returns.sum()
        results[open_candle].append(total_rets)
    print(f'Open candle: {open_candle}')
    print(f'Mean: {np.mean(results[open_candle])}')
    print(f'Std: {np.std(results[open_candle])}')

# Intraday one minute volatility

In [None]:
def add_vol_to_one_min_data(dataframe):
    dataframe = dataframe.copy()
    
    def return_vol_and_rolling_vol(group):
        group['vol'] = group['close'].pct_change().abs()*100
        # taking cumulative mean of vol
        group['rolling_vol'] = group['close'].pct_change().abs().expanding().mean()*100
        return group
    
    dataframe= dataframe.groupby(dataframe.index.date).apply(return_vol_and_rolling_vol)
    dataframe = dataframe.reset_index(level=0, drop=True)
    return dataframe

In [None]:
nifty_onemin = add_vol_to_one_min_data(nifty_onemin)

In [None]:
nifty_onemin

In [None]:
trial_df = nifty_onemin.loc['2015':]

In [None]:
def identify_dull_spurts(data, threshold_pct=0.6, threshold_window_size=90, desired_move=0.0027):
    
    print(f"Identifying dull periods and spurts for {data['date'].iloc[0].date()}")
    
    # Initialize columns for labeling dull periods and spurts
    data['dull_period'] = 0
    data['successful_exit'] = 0
    data['rolling_vol_window_mean'] = 0
    data['rolling_vol_prior'] = 0
    data['dull_threshold'] = 0
    # data['subsequent_vol'] = 0
    data['spot_at_entry'] = 0
    data['upper_bound'] = 0
    data['lower_bound'] = 0
    data['spot_at_exit'] = 0

    # Iterate through the data to identify dull periods and subsequent spurts
    i = 0
    while i < len(data) - 1:
        
        # Skip the scanning process if before 10 AM
        if data.iloc[i, data.columns.get_loc('date')].time() < pd.Timestamp("12:20").time():
            i += 1
            continue
        
        rolling_vol_window_mean = data.iloc[max(0, i-threshold_window_size):i, data.columns.get_loc('vol')].mean()
        rolling_vol_prior = data.iloc[max(0, i-threshold_window_size), data.columns.get_loc('rolling_vol')]
        dull_threshold = rolling_vol_prior * threshold_pct
        data.iloc[i, data.columns.get_loc('rolling_vol_window_mean')] = rolling_vol_window_mean
        data.iloc[i, data.columns.get_loc('rolling_vol_prior')] = rolling_vol_prior
        data.iloc[i, data.columns.get_loc('dull_threshold')] = dull_threshold
        if rolling_vol_window_mean < dull_threshold:
            # Mark the dull period
            data.iloc[i, data.columns.get_loc('dull_period')] = 1
            spot_at_entry = data.iloc[i, data.columns.get_loc('close')]
            data.iloc[i, data.columns.get_loc('spot_at_entry')] = spot_at_entry
            upper_bound = spot_at_entry * (1 + desired_move)
            lower_bound = spot_at_entry * (1 - desired_move)
            data.iloc[i, [data.columns.get_loc('upper_bound'), data.columns.get_loc('lower_bound')]] = (upper_bound, lower_bound)
            
            future_prices = data.iloc[i+1:-1]
            # Check if the price has moved beyond the upper or lower bound in future prices
            if future_prices['high'].max() > upper_bound or future_prices['low'].min() < lower_bound:
                # Check whether the price has moved beyond the upper or lower bound and if both then 
                # which one first
                if future_prices['high'].max() > upper_bound and future_prices['low'].min() < lower_bound:
                    
                    high_crossing_index = future_prices[future_prices['high'] > upper_bound].first_valid_index()
                    low_crossing_index = future_prices[future_prices['low'] < lower_bound].first_valid_index()
                    first_crossing_index = min(high_crossing_index, low_crossing_index)
                    exit_price = future_prices.loc[first_crossing_index, 'high'] if first_crossing_index == high_crossing_index else future_prices.loc[first_crossing_index, 'low']
                    data.iloc[first_crossing_index, data.columns.get_loc('spot_at_exit')] = exit_price
                    data.iloc[first_crossing_index, data.columns.get_loc('successful_exit')] = 1 if first_crossing_index == high_crossing_index else -1

                elif future_prices['high'].max() > upper_bound:
                    crossing_index = future_prices[future_prices['high'] > upper_bound].first_valid_index()
                    exit_price = future_prices.loc[crossing_index, 'high']
                    data.iloc[crossing_index, data.columns.get_loc('spot_at_exit')] = exit_price
                    data.iloc[crossing_index, data.columns.get_loc('successful_exit')] = 1
                    
                elif future_prices['low'].min() < lower_bound:
                    crossing_index = future_prices[future_prices['low'] < lower_bound].first_valid_index()
                    exit_price = future_prices.loc[crossing_index, 'low']
                    data.iloc[crossing_index, data.columns.get_loc('spot_at_exit')] = exit_price
                    data.iloc[crossing_index, data.columns.get_loc('successful_exit')] = 1
                
                else:
                    pass
            
            else:
                data.iloc[-1, data.columns.get_loc('spot_at_exit')] = data.iloc[-1, data.columns.get_loc('close')]
                data.iloc[-1, data.columns.get_loc('successful_exit')] = -1
            
            break
        else:
            i += 1

    return data

In [None]:
daily_data = [day for _, day in trial_df.groupby(trial_df.index.date)]
# Applying the updated function to each day's data and concatenating the results
labeled_daily_data = pd.concat([identify_dull_spurts(day.reset_index(), threshold_pct=0.4, threshold_window_size=120) for day in daily_data])

In [None]:
labeled_daily_data_grouped = labeled_daily_data.groupby(labeled_daily_data['date'].dt.date).agg({'dull_period': 'sum', 'successful_exit': 'sum'})
labeled_daily_data_grouped.sum()

In [None]:
filtered_labeled_data = labeled_daily_data_grouped[labeled_daily_data_grouped['dull_period'] > 0]
iter_obj = iter(filtered_labeled_data.index)

In [None]:
labeled_daily_data.set_index('date')

In [None]:
date_to_plot = next(iter_obj)
target_day_data = labeled_daily_data[labeled_daily_data['date'].dt.date == date_to_plot]
fig = px.line(target_day_data, x='date', y='close', title=f'Price movement on {date_to_plot}')
# Adding a shaded region to indicate the dull period
start_of_dull_period = target_day_data[target_day_data['dull_period'] == 1].iloc[0]['date'] - timedelta(minutes=90)
end_of_dull_period = target_day_data[target_day_data['dull_period'] == 1].iloc[0]['date']
fig.add_vrect(x0=start_of_dull_period, x1=end_of_dull_period, fillcolor="lightslategrey", opacity=0.5, line_width=0)

# Adding a line to indicate the upper bound
upper_bound = target_day_data[target_day_data['dull_period'] == 1].iloc[0]['upper_bound']
upper_bound_crossed = target_day_data.set_index('date').loc[end_of_dull_period:, 'high'].max() > upper_bound
color = 'green' if upper_bound_crossed else 'white'
fig.add_shape(
        type="line",
        xref="x",
        yref="y",
        x0=end_of_dull_period, 
        y0=upper_bound,
        x1=target_day_data.iloc[-1]['date'],
        y1=upper_bound,
        line=dict(color=color, dash="dot"),
)
fig.add_annotation(
        x=target_day_data.iloc[-1]['date'],
        y=upper_bound,
        text="Upper Bound",
        showarrow=False,
        yshift=10,
        font=dict(color=color)
)


# Adding a line to indicate the lower bound. The line will start from the end of the dull period on the x-axis
lower_bound = target_day_data[target_day_data['dull_period'] == 1].iloc[0]['lower_bound']
lower_bound_crossed = target_day_data.set_index('date').loc[end_of_dull_period:, 'low'].min() < lower_bound
color = 'green' if lower_bound_crossed else 'white'

fig.add_shape(
        type="line",
        xref="x",
        yref="y",
        x0=end_of_dull_period, 
        y0=lower_bound,
        x1=target_day_data.iloc[-1]['date'],
        y1=lower_bound,
        line=dict(color=color, dash="dot"),
)

fig.add_annotation(
        x=target_day_data.iloc[-1]['date'],
        y=lower_bound,
        text="Lower Bound",
        showarrow=False,
        yshift=10,
        font=dict(color=color)
)

fig.show()

# Working out the best stop loss for intraday trend

In [None]:
def analyse_stoploss(df, one_min_df, trend_buffer=0.1):

    stop_loss_days = df.trade_data.apply(lambda x: x.get('entry_1', {}).get('returns', np.nan)).dropna()
    stop_loss_days = stop_loss_days[stop_loss_days <= -0.3]

    first_trade_direction = df.trade_data.apply(lambda x: x.get('entry_1', {}).get('trend_direction', np.nan)).dropna()

    trend_days = df.open_to_close_trend_abs[df.open_to_close_trend_abs > df.threshold_movement + trend_buffer]
    days_with_stoploss_and_trend = stop_loss_days.index.intersection(trend_days.index)
    days_with_stoploss_and_trend = df.loc[days_with_stoploss_and_trend]
    days_with_stoploss_and_trend = days_with_stoploss_and_trend[['open_vix', 'day_open', 'open_to_close_trend', 'open_to_close_trend_abs', 'upper_bound', 'lower_bound']]

    days_with_stoploss_and_trend['first_trade_direction'] = first_trade_direction

    days_with_stoploss_and_trend['trend_direction'] = days_with_stoploss_and_trend.open_to_close_trend.apply(lambda x: 1 if x > 0 else -1)

    # Calculating the maximum other direction movement for the day
    days_with_stoploss_and_trend['extreme_price'] = days_with_stoploss_and_trend.apply(lambda x: one_min_df.loc[x.name.date().strftime('%Y-%m-%d')].close.min() if x.first_trade_direction == 1 else one_min_df.loc[x.name.date().strftime('%Y-%m-%d')].close.max(), axis=1)
    days_with_stoploss_and_trend['max_other_direction_movement'] = np.where(days_with_stoploss_and_trend.first_trade_direction == 1, days_with_stoploss_and_trend.upper_bound / days_with_stoploss_and_trend.extreme_price - 1, days_with_stoploss_and_trend.lower_bound / days_with_stoploss_and_trend.extreme_price - 1)
    return days_with_stoploss_and_trend

analysed_df = analyse_stoploss(trend_bnf, bnf_onemin)
analysed_df.max_other_direction_movement.abs().median()

# Intraday Trend - Constituent analysis

In [None]:
def get_index_with_constituent_trend_data(index_name, trend_df):

    index_onemin = pd.read_csv(f'data/{index_name}_onemin_prices.csv', index_col=0, parse_dates=True)
    index_daily_open = (index_onemin.groupby(index_onemin.index.date).apply(lambda x: x.iloc[1]).open.to_frame())
    index_onemin['day_open'] = index_daily_open.loc[index_onemin.index.date].values
    index_onemin['change_from_open'] = index_onemin['close'] / index_onemin['day_open'] - 1
    index_onemin = index_onemin[['change_from_open']]
    index_onemin.columns = map(lambda x: f'{index_name}_{x}', index_onemin.columns)
    tickers, weights = vs.get_index_constituents(index_name)
    ticker_dfs = []
    for ticker, weight in zip(tickers, weights):
        ticker_onemin = pd.read_csv(f'data/{ticker}_onemin_prices.csv', index_col=0, parse_dates=True)
        ticker_onemin['weight'] = weight/100
        ticker_daily_open = (ticker_onemin.groupby(ticker_onemin.index.date).apply(lambda x: x.iloc[1]).open.to_frame())
        ticker_onemin['day_open'] = ticker_daily_open.loc[ticker_onemin.index.date].values
        ticker_onemin['change_from_open'] = ticker_onemin['close'] / ticker_onemin['day_open'] - 1
        ticker_onemin['weighted_change'] = ticker_onemin['change_from_open'] * ticker_onemin['weight']
        ticker_onemin = ticker_onemin[['change_from_open', 'weighted_change']]
        ticker_onemin.columns = map(lambda x: f'{ticker}_{x}', ticker_onemin.columns)
        ticker_dfs.append(ticker_onemin)
    full_df = pd.concat(ticker_dfs, axis=1)
    full_df['proxy_index_change'] = full_df.filter(regex='weighted_change').sum(axis=1)
    full_df = full_df.merge(index_onemin, left_index=True, right_index=True)
    for ticker in tickers:
        full_df[f'{ticker}_contribution'] = full_df[f'{ticker}_weighted_change'] / full_df[f'proxy_index_change']
        full_df[f'{ticker}_contribution_sq'] = full_df[f'{ticker}_contribution'] ** 2
    _trigger_times = [day[entry]['trigger_time'] for day in trend_df.trade_data for entry in day.keys() if entry != 'total_returns']
    _returns = [day[entry]['returns'] for day in trend_df.trade_data for entry in day.keys() if entry != 'total_returns']
    _trend_at_close = [trend_df.set_index(trend_df.index.date).loc[tt.date()].open_to_close_trend for tt in _trigger_times]
    _trigger_returns_trend = pd.DataFrame({'trigger_time': _trigger_times, 'returns': _returns, 'trend_at_close': _trend_at_close})
    df_to_ret = full_df.merge(_trigger_returns_trend, left_index=True, right_on='trigger_time')
    df_to_ret['sum_of_abs_movement'] = df_to_ret.drop(columns=[f'{index_name}_change_from_open']).filter(regex='change_from_open').abs().sum(axis=1)
    df_to_ret['std_of_ratio'] = df_to_ret.drop(columns=[f'{index_name}_change_from_open']).filter(regex='change_from_open').div(df_to_ret['sum_of_abs_movement'], axis=0).std(axis=1)
    df_to_ret['std_of_constituents'] = df_to_ret.drop(columns=[f'{index_name}_change_from_open']).filter(regex='change_from_open').std(axis=1)
    df_to_ret['hhi_index'] = df_to_ret.filter(regex='contribution_sq').sum(axis=1)


    return df_to_ret

In [None]:
trend_bnf_consolidated = get_index_with_constituent_trend_data('NIFTY BANK', trend_bnf)
trend_bnf_consolidated_post_2021 = trend_bnf_consolidated[trend_bnf_consolidated.trigger_time > datetime(2021, 1, 1)]
trend_bnf_consolidated_post_2021

In [None]:
trend_nifty_consolidated = get_index_with_constituent_trend_data('NIFTY 50', trend_nifty)

In [None]:
trend_nifty_consolidated_post_2023 = trend_nifty_consolidated[trend_nifty_consolidated.trigger_time > datetime(2023, 1, 1)]
trend_nifty_consolidated_post_2023

In [None]:
px.scatter(trend_bnf_consolidated_post_2021, x='std_of_constituents', y='std_of_ratio', hover_data=['trigger_time', 'returns', 'trend_at_close'], color='returns', range_color=[-0.3, 1])

# Index flat vs constituents move

In [None]:
bnf_index_vs_cons =  dm.get_index_vs_constituents_recent_vols('BANKNIFTY', return_all=False, simulate_backtest=True)

In [None]:
bnf_index_vs_cons

# Insights

In [None]:
# Confirming that there is a certain drift in absolute changes as time frame increases

for index_name, daily_df in zip(['NIFTY', 'BANKNIFTY', 'FINNIFTY'], [nifty_data, bnf_data, finnifty_data]):

    daily_vol = daily_df.resample('B').ffill().close.pct_change().abs().mean()
    weekly_vol = daily_df.resample('W').ffill().close.pct_change().abs().mean()
    monthly_vol = daily_df.resample('M').ffill().close.pct_change().abs().mean()
    yearly_vol = daily_df.resample('Y').ffill().close.pct_change().abs().mean()

    weekly_ratio = weekly_vol / daily_vol
    monthly_ratio = monthly_vol / daily_vol
    yearly_ratio = yearly_vol / daily_vol

    weekly_benchmark = 5**0.5
    monthly_benchmark = 21**0.5
    yearly_benchmark = 252**0.5

    weekly_deviation_from_benchmark = weekly_ratio/weekly_benchmark
    monthly_deviation_from_benchmark = monthly_ratio/monthly_benchmark
    yearly_deviation_from_benchmark = yearly_ratio/yearly_benchmark

    print(f'{index_name}\nDaily Volatility: {daily_vol:0.3f}\nWeekly Volatility: {weekly_vol: 0.3f}, Weekly Ratio: {weekly_ratio: 0.3f}, Weekly Benchmark: {weekly_benchmark: 0.3f}\nMonthly Volatility: {monthly_vol: 0.3f}, Monthly Ratio: {monthly_ratio: 0.3f}, Monthly Benchmark: {monthly_benchmark: 0.3f}\nYearly Volatility: {yearly_vol: 0.3f}, Yearly Ratio: {yearly_ratio: 0.3f}, Yearly Benchmark: {yearly_benchmark: 0.3f}\n')

    print(f'{index_name}\nWeekly Deviation from Benchmark: {weekly_deviation_from_benchmark: 0.3f}\nMonthly Deviation from Benchmark: {monthly_deviation_from_benchmark: 0.3f}\nYearly Deviation from Benchmark: {yearly_deviation_from_benchmark: 0.3f}\n')

In [None]:
# Confirming whether drift is present in intraday movements

for onemindf, index_name in zip([nifty_onemin, bnf_onemin, fin_onemin, midcp_onemin], ['NIFTY', 'BANKNIFTY', 'FINNIFTY', 'MIDCAP']):
    print(f'{index_name}\n')

    filtered_index = filter(lambda i: i.time() not in [time(9, 15), time(9, 16), time(15, 30)], onemindf.index)
    filtered_index = list(filtered_index)

    minute_vol_sd = onemindf.close.pct_change()[filtered_index].std()
    minute_vol_abs_change = onemindf.close.pct_change()[filtered_index].abs().mean()


    print(f'Minute Volatility SD: {minute_vol_sd}')
    print(f'Minute Volatility Absolute Change: {minute_vol_abs_change}')

    open_close_std = onemindf.close.groupby(onemindf.index.date).apply(lambda x: (x.iloc[-1] / x.iloc[0] - 1)).std()
    open_close_abs_change = onemindf.close.groupby(onemindf.index.date).apply(lambda x: (x.iloc[-1] / x.iloc[0] - 1)).abs().mean()

    print(f'Open Close SD: {open_close_std}')
    print(f'Open Close Absolute Change: {open_close_abs_change}')

    ratio_of_volatility = open_close_std / minute_vol_sd
    ratio_of_abs_change = open_close_abs_change / minute_vol_abs_change

    print(f'Ratio of Volatility: {ratio_of_volatility}')
    print(f'Ratio of Absolute Change: {ratio_of_abs_change}\n')

In [None]:
df_to_test = bnf_onemin.loc['2017']
filtered_index = filter(lambda i: i.time() not in [time(9, 15), time(9, 16), time(15, 30)], df_to_test.index)
filtered_index = list(filtered_index)
#df_to_test.close.pct_change()[filtered_index].std()
df_to_test.close.groupby(df_to_test.index.date).apply(lambda x: (x.iloc[-1] / x.iloc[0] - 1)).abs().mean()

In [None]:
# Determining the distribution of one min volatility
df = bnf_onemin
filtered_index = filter(lambda i: i.time() not in [time(9, 15), time(9, 16), time(15, 30)], df.index)
filtered_index = list(filtered_index)
filtered_df = df.close.pct_change()[filtered_index]
px.histogram(x=filtered_df)

# Modelling IV surface 

In [None]:
# Reading the data and dropping the index column
data = pd.read_csv('data/vol_surface.csv', index_col=0)
data.reset_index(drop=True, inplace=True)

In [None]:
# Adding the moneyness feature (ratio of spot price to strike price)
data['moneyness'] = data['spot'] / data['strike']

# Adding the interaction term between distance squared and time to expiry
data['distance_time_interaction'] = data['distance_squared'] * data['time_to_expiry']

# Display the first few rows to verify the added features
data

In [None]:
# Performing one-hot encoding with one column left out (drop_first=True) for the 'symbol' column
data_encoded = pd.get_dummies(data, columns=['symbol'], drop_first=True)

# Display the first few rows to verify the encoding
data_encoded.head()

In [None]:
from sklearn.model_selection import train_test_split

# Selecting the features
features = ['time_to_expiry', 'distance', 'distance_squared', 'moneyness', 'distance_time_interaction', 'symbol_FINNIFTY', 'symbol_NIFTY']
X = data_encoded[features]

# Selecting the target variable
y = data_encoded['iv_multiple']

# Splitting the data into training and validation sets (80% training, 20% validation)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.20, random_state=42)

# Checking the shapes of the training and validation sets
X_train.shape, X_val.shape, y_train.shape, y_val.shape


In [None]:
from sklearn.ensemble import RandomForestRegressor

# Defining the time-to-expiry ranges
time_to_expiry_ranges = [(0, 0.0003), (0.0003, 0.0008), (0.0008, (1/365)), ((1/365), (3/365)), ((3/365), (10/365)), ((10/365), 10.0)]

# Dictionaries to store the trained models for each segment
random_forest_models = {}

# Looping through the ranges to train models for each segment
for i, (lower_bound, upper_bound) in enumerate(time_to_expiry_ranges):
    # Filtering the training data for the current segment
    X_train_segment = X_train[(X_train['time_to_expiry'] >= lower_bound) & (X_train['time_to_expiry'] < upper_bound)]
    y_train_segment = y_train[X_train_segment.index]
    
    # Define the hyperparameter grid
    param_grid = {
        'n_estimators': [100, 200, 300],
        'max_depth': [None, 10, 20],
        'min_samples_split': [2, 5],
        'max_features': [1.0, 'sqrt', 0.5]
    }

    # Create the Random Forest model
    rf_segment = RandomForestRegressor(random_state=42, min_samples_split=5)


    # Fit the model
    rf_segment.fit(X_train_segment.drop(columns=['time_to_expiry']), y_train_segment)

    random_forest_models[(round(lower_bound, 4), round(upper_bound, 4))] = rf_segment


# Models trained for all segments
"Models trained successfully for all segments!"


In [None]:
from sklearn.metrics import mean_squared_error

# Function to get the appropriate model based on time to expiry
def get_model_for_time_to_expiry(time_to_expiry, models):
    # Filtering the models based on the time to expiry
    filtered_model = [*filter(lambda x: x[0] <= time_to_expiry < x[1], models)][0]
    # Returning the model for the segment
    return models[filtered_model]

# Predicting on the test set using the respective Random Forest model for each sample
y_pred_val = np.array([get_model_for_time_to_expiry(time_to_expiry, random_forest_models).predict(pd.DataFrame([features], columns=X_train.columns[1:]))[0] for time_to_expiry, features in zip(X_val['time_to_expiry'], X_val.drop(columns=['time_to_expiry']).values)])


# Calculating the mean squared error for the Random Forest models on the test set
mse_rf = mean_squared_error(y_val, y_pred_val)
mse_rf

In [None]:
# Plotting the predicted vs actual values and using different colors for different ranges of time to expiry
fig = px.scatter(x=y_val, y=y_pred_val, color=X_val['time_to_expiry'], color_continuous_scale='RdBu', range_color=(0, 0.03))
fig.update_layout(xaxis_title='Actual', yaxis_title='Predicted')
fig.show()

In [None]:
# Comparing the random forest model with the regression model from the volstreet package
from volstreet.blackscholes import iv_transformer_coeffs_wip

regression_coeffs = X_val.time_to_expiry.apply(iv_transformer_coeffs_wip)
regression_coeffs = regression_coeffs.apply(pd.Series)
regression_coeffs.columns = ['dis_sq_coeff', 'dis_coeff', 'intercept']
regression_coeffs['y_pred_reg'] = regression_coeffs.dis_sq_coeff * X_val['distance_squared'] + regression_coeffs.dis_coeff * X_val['distance'] + regression_coeffs.intercept

mse_reg = mean_squared_error(y_val, regression_coeffs['y_pred_reg'])
mse_reg

In [None]:
# Plotting the regression model's predicted vs actual values and using different colors for different ranges of time to expiry
fig = px.scatter(x=y_val, y=regression_coeffs['y_pred_reg'], color=X_val['time_to_expiry'], color_continuous_scale='RdBu', range_color=(0, 0.03))
fig.update_layout(xaxis_title='Actual', yaxis_title='Predicted')
fig.show()

In [None]:
# Plotting the vol curve using different colors for different time to expiry for actual and predictions using both models

# Scatter plot of distance vs IV multiple - actual values
fig = px.scatter(x=X_val['distance'], y=y_val, color=X_val['time_to_expiry'], color_continuous_scale='Blues', range_color=(0, 0.03))
fig.update_layout(xaxis_title='Distance', yaxis_title='IV Multiple')

# Scatter plot of distance vs IV multiple - predicted values
fig2 = px.scatter(x=X_val['distance'], y=y_pred_val, color=X_val['time_to_expiry'], color_continuous_scale='Reds', range_color=(0, 0.03))
fig2.update_layout(xaxis_title='Distance', yaxis_title='IV Multiple')

# Scatter plot of distance vs IV multiple - regression model's predicted values
fig3 = px.scatter(x=X_val['distance'], y=regression_coeffs['y_pred_reg'], color=X_val['time_to_expiry'], color_continuous_scale='Greens', range_color=(0, 0.03))
fig3.update_layout(xaxis_title='Distance', yaxis_title='IV Multiple')

# Show the plots
fig.show()
fig2.show()
fig3.show()

In [None]:
# Feature importance for the Random Forest model

# Create a DataFrame to store the feature importance for all segments
feature_importance_df = pd.DataFrame()

# Iterate through each segment, extract feature importance, and add to the DataFrame
for segment, model in random_forest_models.items():
    feature_importance = model.feature_importances_
    segment_importance_df = pd.DataFrame({
        'Feature': X_train.drop(columns=['time_to_expiry']).columns,
        'Importance': feature_importance,
        'Segment': [segment] * len(feature_importance)
    })
    feature_importance_df = pd.concat([feature_importance_df, segment_importance_df])

# Create a Plotly figure using the DataFrame
fig = px.bar(feature_importance_df, x='Importance', y='Feature', color='Segment',
             title='Random Forest Feature Importance by Segment',
             labels={'Importance': 'Feature Importance', 'Feature': 'Feature'},
             orientation='h',
             category_orders={'Segment': sorted(feature_importance_df['Segment'].unique(), reverse=True)})

# Show the figure
fig.show()

In [None]:
# Saving the models
from joblib import dump

for segment, model in random_forest_models.items():
    dump(model, f'iv_models/random_forest_model_{segment}.joblib')

In [None]:
# Loading the models
from joblib import load
import os
import ast

random_forest_models_loaded = {}
for file in os.listdir('iv_models'):
    if file.endswith('.joblib'):
        str_literal = file.split('_')[-1].rstrip('.joblib')
        segment = ast.literal_eval(str_literal)
        random_forest_models_loaded[segment] = load(f'iv_models/{file}')

# Modelling IV surface old 

In [None]:
# Modelling IV surface
vol_surface = pd.read_csv('data/vol_surface.csv', index_col=0)
#vol_surface = vol_surface.drop(vol_surface[vol_surface.isna().all(axis=1)].index)
vol_surface['tte'] = vol_surface.time_to_expiry.apply(lambda num: round(num, 4))
vol_surface

In [None]:
# Modelling IV surface
vol_surface_dict = {}
for tte in vol_surface.tte.unique():
    X = vol_surface.loc[vol_surface.tte == tte][['distance', 'distance_squared']]
    y = vol_surface.loc[vol_surface.tte == tte]['iv_multiple']
    model = LinearRegression()
    model.fit(X, y)
    dis_sq_coeff, dis_coeff, intercept = model.coef_[1], model.coef_[0], model.intercept_
    score = model.score(X, y)
    if score > 0.9:
        vol_surface_dict[tte] = {'dis_sq_coeff': dis_sq_coeff, 'dis_coeff': dis_coeff, 'intercept': intercept, 'score': score}
    # print(f'{tte} days to expiry: Coefficients: {model.coef_}, Intercept: {model.intercept_}, R2: {model.score(X, y)}')
vol_surface_weights = pd.DataFrame(vol_surface_dict).T.reset_index().rename(columns={'index': 'time_to_expiry'})
vol_surface_weights.sort_values('time_to_expiry', inplace=True)

In [None]:
vol_surface_weights

In [None]:
fig = px.scatter(vol_surface_weights, x='time_to_expiry', y='dis_sq_coeff')
fig.show()

In [None]:
def func(x, a, b, c):
    return a * np.exp(-b * x) + c
lower_bounds = [-np.inf, -np.inf, -np.inf]
upper_bounds = [np.inf, np.inf, np.inf]
popt, pcov = curve_fit(func, vol_surface_weights['time_to_expiry'], vol_surface_weights['dis_sq_coeff'], bounds=(lower_bounds, upper_bounds))

In [None]:
dummy_range = np.arange(0, 1, 0.0001)
fig.add_trace(px.line(x=dummy_range, y=func(dummy_range, *popt)).data[0])

In [None]:
popt

In [None]:
# Modelling IV surface - Distance Squared coefficient vs Time to Expiry (inverse)
for param in np.arange(0.02, 1.5, 0.01):
    vol_surface_weights['tte_inverse'] = 1 / (vol_surface_weights.time_to_expiry**param)
    X = vol_surface_weights['tte_inverse'].values.reshape(-1, 1)
    y = vol_surface_weights['dis_coeff']
    model = LinearRegression()
    model.fit(X, y)
    print(f'{param} param: Coefficients: {model.coef_}, Intercept: {model.intercept_}, R2: {model.score(X, y)}')

In [None]:
px.scatter(vol_surface_weights, x='time_to_expiry', y='dis_coeff')

In [None]:
def coefficients_for_surface(tte):

    # distance squared coefficient
    dfs2 = 3270.27*np.exp(-384.38*tte) + 100
    dfs2 = min(dfs2, 20000)

    # distance coefficient
    if tte < 0.26/365:
        dfs = 1
    else:
        dfs = 1 / ((tte ** 0.45) * 5)
        dfs = min(dfs, 5)
        dfs = -6 + dfs

    # intercept
    if tte<3/(24*365):
        intercept=1.07
    elif tte<0.27/365:
        intercept=1
    else:
        intercept=0.98
    return dfs2, dfs, intercept

In [None]:
coefficients_for_surface(2/365)

# Gamblers Fallacy

In [None]:
import random
import logging

In [None]:
gambler_logger = logging.getLogger('gambler')
gambler_logger.setLevel(logging.INFO)

# Setting up the file handler
file_handler = logging.FileHandler('gambler_calculations.log')
file_handler.setLevel(logging.INFO)
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
file_handler.setFormatter(formatter)

# Adding the handlers to the logger
gambler_logger.addHandler(file_handler)

In [None]:

def simulate_roulette_spins(
        n_spins, initial_money=None, drought_threshold=150, bet_amount=None, stop_loss=0
):

    # Initialize droughts for all numbers to 0
    droughts = {i: [] for i in range(37)}

    # Initialize current drought counts for all numbers to 0
    current_droughts = {i: 0 for i in range(37)}

    # Initialize money
    money = 100 if initial_money is None else initial_money
    money_history = [money]
    chances_of_winning = 1/35
    chances_of_losing = 1 - chances_of_winning
    payout = 35
    kelly_percentage = chances_of_winning - (chances_of_losing/payout)
    bet_amount =  kelly_percentage * money if bet_amount is None else bet_amount * money

    stop_loss = drought_threshold + stop_loss

    # Simulate n spins
    for _ in range(n_spins):
        gambler_logger.info(f'Spin number: {_}')
        spin_result = random.randint(0, 36)
        gambler_logger.info(f'Spin result: {spin_result}')

        # Determine which numbers to bet on
        numbers_to_bet_on = {number: dryness for number, dryness in current_droughts.items() if dryness > drought_threshold}
        gambler_logger.info(f'Numbers to bet on and their droughts: {numbers_to_bet_on}')
        numbers_to_bet_on_filtered = {number: dryness for number, dryness in numbers_to_bet_on.items() if dryness < stop_loss}

        if numbers_to_bet_on != numbers_to_bet_on_filtered:
            gambler_logger.info(f'Numbers filtered down to {numbers_to_bet_on_filtered}')

        # Determine how much to bet in total
        total_bet_amount = bet_amount * len(numbers_to_bet_on_filtered)
        gambler_logger.info(f'Total bet amount: {total_bet_amount}')

        # Update money based on bet results
        if spin_result in numbers_to_bet_on_filtered:
            # If one of the numbers you bet on came up, you win 36 times the bet amount for that number
            money += 36*bet_amount - total_bet_amount
            gambler_logger.info(f'Won {36*bet_amount - total_bet_amount}')
        else:
            # If none of the numbers you bet on came up, you lose the total bet amount
            money -= total_bet_amount
            gambler_logger.info(f'Lost {total_bet_amount}')

        # Update money history
        money_history.append(money)
        gambler_logger.info(f'Money: {money}')

        # Update drought counts
        for number in range(37):
            if number == spin_result:
                # If the number came up, reset its drought count and append the drought to the list
                droughts.get(number, []).append(current_droughts[number])
                current_droughts[number] = 0
                gambler_logger.info(f'Number {number} came up. Resetting drought count.')
            else:
                # If the number didn't come up, increment its drought count
                current_droughts[number] += 1

    return droughts, money_history

In [None]:
# Number of spins
monies = []
spins = 10000

for _ in range(25):
    droughts, money_history = simulate_roulette_spins(spins, initial_money=100, drought_threshold=100, stop_loss=5)
    monies.append(money_history[-1])

In [None]:
# Plot the monies
px.histogram(x=monies)

In [None]:
# Plot the money history
px.line(y=money_history)

In [None]:
all_droughts = []
for drought in droughts.values():
    all_droughts.extend(drought)

In [None]:
# Plot the droughts
fig = go.Figure()
fig.add_trace(
    go.Histogram(x=all_droughts, nbinsx=20, cumulative=dict(enabled=True), histnorm='probability')
)
fig.update_layout(barmode='overlay')
fig.update_traces(opacity=0.75)
fig.show()

In [None]:
max(all_droughts)