## DataLoader Class
The DataLoader Class loads data into a SQLite database from files. All files in the folder should be in the same format and they should be csv files or zipped csv files or any other file type in csv format

In [None]:
# Import libraries
import pandas as pd
import os
from sqlalchemy import create_engine
from loaders import DataLoader

## Parameters that could be changed

In [None]:
DIRECTORY = 'data' # Folder in which files are saved
DBNAME = 'data.sqlite3' # Name of the database
TABLENAME = 'eod' # Name of the table
UNIVERSE = 'NIFTY50'
STOP_LOSS = 4
NUM_STOCKS = 5
CAPITAL = 20000
LEVERAGE = 1
REF_PRICE = 'close'
COMMISSION = 0.03 # In percent

Don't change the below cell; Just run it.

This updates data of all the files in the folder. If you add a file, just rerun it again so that the database is updated.

In [None]:
connection_string = 'sqlite:///' + DBNAME
engine = create_engine(connection_string)
dl = DataLoader(DIRECTORY, mode='SQL', engine=engine, tablename=TABLENAME)
dl.load_data(parse_dates=['TIMESTAMP'], usecols=range(13)) # This actually loads data

In [None]:
# Now read the entire table back
df = pd.read_sql_table(TABLENAME, engine)

# Check the number of rows; would increase as you put more files in your folder
len(df)

Don't change any cells below.
Just change the **PARAMETERS** above

Of course, you could always *hack the below code*

Backtesting
------------
1. Filter data only for our stock universe
2. Calculate daily returns for each of the stock
3. Carry the returns figure to the next day so that we can view them the next day
4. Group by each day, and apply our strategy
5. Append all the data
6. Calculate prices and performance
7. Evaluate metrics

In [None]:
symbols = pd.read_excel('universe.xlsx', sheet_name=UNIVERSE, header=None).values.ravel()

df = df[df['symbol'].isin(symbols)]
df = df[df['series'] == "EQ"].reset_index(drop=True)
df['ret'] = (df['close']/df['prevclose']) - 1
df = df.sort_values(by='timestamp')
df['ret'] = df.groupby('symbol')['ret'].transform(lambda x: x.shift(1))
grouped = df.groupby('timestamp')
collect = []
for name, group in grouped:
    temp = group.sort_values(by='ret', ascending=False).iloc[:NUM_STOCKS]
    collect.append(temp)
orders = pd.concat(collect)
    

In [None]:
def isPrice(price, high, low):
    if price >= low and price <=high:
        return True
    else:
        False

In [None]:
trading_capital = CAPITAL * LEVERAGE
orders['price'] = orders['open']
orders['stop_loss'] = (orders['price'] * (1+STOP_LOSS*0.01)).round(2)
orders['qty'] = (trading_capital/NUM_STOCKS/orders['price']).round()
orders['sell'] = [price if isPrice(price, high, low) else close for
                 price, high, low, close in 
                 zip(orders['price'], orders['high'], orders['low'], orders[REF_PRICE])]
orders['buy'] = [price if isPrice(price, high, low) else close for
                 price, high, low, close in 
                 zip(orders['stop_loss'], orders['high'], orders['low'], orders[REF_PRICE])]
orders['profit_per_unit'] = orders['sell'] - orders['buy']
orders['total_profit'] = orders['profit_per_unit'] * orders['qty']
orders['commission'] = orders.eval('qty*(price+stop_loss)')*COMMISSION*0.01
orders['net_profit'] = orders['total_profit'] - orders['commission']
cols = ['timestamp', 'symbol', 'buy', 'sell', 'profit_per_unit', 'total_profit']


## Metrics

In [None]:
orders.tail()[['symbol', 'open', 'high', 'low', 'close', 
               'qty', 'buy', 'sell', 'total_profit', 'net_profit']]

Profit calculated on total_profit.
You can change this to **net profit** to see the real results

In [None]:

total_profit = orders['total_profit'].sum()
'Returns for the period = {:.2f}%'.format((total_profit/CAPITAL)*100)

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
sns.set()
by_day = orders.groupby('timestamp').total_profit.sum()
(by_day.cumsum() + CAPITAL).plot(title = 'Portfolio by day')
plt.xlabel('Date')
plt.ylabel('Capital')
plt.show()

In [None]:
# Correlation matrix
# This is useful to find related factors
orders.corr()

In [None]:
# Profit by date
# Add any columns if you like in the cols list
cols = ['total_profit', 'net_profit']
orders.groupby('timestamp')[cols].sum().tail()

In [None]:
# Best and worst results
# Change n for the required number of days
n = 5

print('Best results')
print(orders.groupby('timestamp')['total_profit'].sum().sort_values().tail(n))

print('Worst results')
print(orders.groupby('timestamp')['total_profit'].sum().sort_values().head(n))