## What are the % returns in 15 min and 75 min period for NIFTY futures?

In [1]:
# :: Dev Plan :: #

# Load all futures data in a single dataframe file by file
# There may be some data points where volume is low or those futures are little ahead of the time, it is better to eliminate those.
# Then create a series of 15 min/75 min return in percent
# It is better to create a series on range of the candle in percent as well
# Plot typical returns over entire data (2 years -2019 -2020)
# Plot typical returns month
# Plot cummulative % return of the period

#Why I am plotting this?
# 1. Typical return gives idea how much I'll make/loose on the trade x% of time ~ 75% time
# 2. Is range significantly different for returns ~ This may translate in hitting stops or targets.
# 3. Typicall mean reversion behaviour ~ how much cummulative returns.

In [2]:
# Imports
import sys
import logging
from typing import Dict
import glob

import numpy as np
import pandas as pd

import unittest

In [3]:
# Logging (Requires restart for log level change)

logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
logger = logging.getLogger('LOGGER_NAME')

In [4]:
# unit test settings

runtests = True

In [5]:
# Resource Folders

DATA_DIR = './../data/'

In [6]:
# Constants and Statics

COLUMN_NAMES = ['Instrument', 'Date', 'Time', 'Open', 'High', 'Low', 'Close', 'Volume', 'Open Interest']
OHLC_CONVERSION_DICT: Dict[str, str] = {'Instrument' : 'first',
                                        'Open': 'first',
                                        'High': 'max',
                                        'Low': 'min',
                                        'Close': 'last',
                                        'Volume': 'sum',
                                        'Open Interest': 'last'}

In [7]:
# A function for loading fut file in to a dataframe

def load_data(csv_file_path : str) -> pd.DataFrame:
    data = pd.read_csv(csv_file_path, header=None, index_col=None,
                                      names=COLUMN_NAMES,
                                      parse_dates=[['Date', 'Time']])
    data.set_index('Date_Time', inplace=True)
    return data

In [8]:
# Load data files in a list

data_file_path_list = []
for name in glob.glob(DATA_DIR+'NIFTY[0-9]*FUT.csv'):
    data_file_path_list.append(name)

logger.debug(data_file_path_list[:3])

DEBUG:LOGGER_NAME:['./../data/NIFTY19MARFUT.csv', './../data/NIFTY19SEPFUT.csv', './../data/NIFTY19JANFUT.csv']


In [9]:
# Convert a 1 min data to 15 min Candle

def aggregate_to_15Min(data : pd.DataFrame) -> pd.DataFrame:
    min15_data = data.resample('15min', origin='start').agg(OHLC_CONVERSION_DICT)
    return min15_data

In [10]:
# Convert a 1 min data to 75 min Candle

def aggregate_to_75min(data : pd.DataFrame) -> pd.DataFrame:
    min15_data = aggregate_to_15Min(data)
    index_dates = pd.Series(np.unique(min15_data.index.date))
    list_of_min75_data_by_date = []
    for idx_date in index_dates:
        idx_date_str = idx_date.strftime(format='%Y-%m-%d')
        min75_data_by_date = min15_data[idx_date_str].resample('75Min', origin='start').agg(OHLC_CONVERSION_DICT)
        list_of_min75_data_by_date.append(min75_data_by_date)

    min75_data = pd.concat(list_of_min75_data_by_date)
    return min75_data

In [29]:
# Calculate percent return for n period

def calculate_pct_returns(price_series : pd.Series, n : int) -> pd.Series:
    n_returns = (price_series/price_series.shift(n) - 1)*100
    n_returns = n_returns.dropna()
    return n_returns

In [49]:
# Create 15 min dataframes and combine across all future data files

pct_returns_15min_temp = [calculate_pct_returns(aggregate_to_15Min(load_data(csv_file_path= file_path))['Close'], n=1).values for file_path in data_file_path_list]
pct_returns_15min = [ret for series_ret in pct_returns_15min_temp for ret in series_ret]
logger.debug('Total number of series : %d' % len(pct_returns_15min_temp))
logger.debug('Total number of returns : %d' % len(pct_returns_15min))

DEBUG:LOGGER_NAME:Total number of series : 21
DEBUG:LOGGER_NAME:Total number of returns : 25668


In [50]:
[len(x) for x in pct_returns_15min_temp] 

[1460,
 1430,
 552,
 528,
 1032,
 1367,
 1392,
 1491,
 1400,
 48,
 1383,
 1389,
 1294,
 1499,
 1452,
 1316,
 1484,
 1269,
 1508,
 1415,
 959]

In [38]:
# Unit tests

class TestNotebook(unittest.TestCase):
    
    test_csv_file_path = './../data/NIFTY19MARFUT.csv'
    
    # test load data function for a single file
    def test_load_data(self):
        self.assertEqual(len(load_data(csv_file_path=self.test_csv_file_path)), 19773)
        
    def test_aggregate_to_15Min(self):
        data = load_data(csv_file_path=self.test_csv_file_path)
        data_15min = aggregate_to_15Min(data)
        self.assertEqual(len(data_15min), 8281)
        
    def test_aggregate_to_75min(self):
        data = load_data(csv_file_path=self.test_csv_file_path)
        data_75min = aggregate_to_75min(data)
        self.assertEqual(len(data_75min), 1725)
        
    def test_calculate_pct_returns_period_1(self):
        series = pd.Series([100.0,101.0,101.55])
        returns = calculate_pct_returns(series, n=1)
        self.assertAlmostEqual(returns.iloc[0], 1.0)

if runtests:
    unittest.main(argv=[''], verbosity=2, exit=False)

test_aggregate_to_15Min (__main__.TestNotebook) ... ok
test_aggregate_to_75min (__main__.TestNotebook) ... ok
test_calculate_pct_returns_period_1 (__main__.TestNotebook) ... ok
test_load_data (__main__.TestNotebook) ... ok

----------------------------------------------------------------------
Ran 4 tests in 0.663s

OK
