<h1>1) Futures PNL

In [20]:
# your code goes here...
import pandas as pd
import numpy as np
from bokeh.plotting import figure, show
from bokeh.io import output_notebook


output_notebook()

df_fut: pd.DataFrame = pd.read_csv('data/futures_data.csv')
df_fut['timestamp'] = pd.to_datetime(df_fut['timestamp'])  # convert timestamp col to datetime
symbols: np.ndarray = df_fut.loc[:,'symbol'].unique()  # get unique symbols in dataset
contract_unit = pd.Series([5,50,20,50], index=symbols)  # define contract units per symbol
# add contract size column for problem set 2
df_fut = pd.concat([df_fut, pd.Series(np.repeat(5, df_fut.shape[0]), index=df_fut.index, name='size')], axis=1)

all_pnl = []
for s in symbols:
    df_fut_s: pd.DataFrame = df_fut[df_fut.loc[:,'symbol'] == s]
    df_fut_s = df_fut_s.sort_values(by=['timestamp'])  # make sure timestamps are ordered correctly
    # print(df_fut_s.index)
    start_price = df_fut_s.loc[:,'open'].iloc[0]  # get first open price

    pnl: pd.Series = (df_fut_s.loc[:,'close'] - start_price) * df_fut_s.loc[:,'size'].iloc[0] * contract_unit[s]
    pnl.name = 'pnl'
    all_pnl.append(pnl)

    # plot results
    print('Final PnL for %s: %s' % (s,pnl.iloc[-1].round(3)))
    p = figure(title='%s PnL' % (s), x_axis_label='Date', y_axis_label='PnL', x_axis_type='datetime', width=500, height=300)
    p.line(x=df_fut_s.loc[:,'timestamp'], y=pnl)
    show(p)

all_pnl = pd.concat(all_pnl, axis=0)  # concat with full DF for problem set 2
df_fut = pd.concat([df_fut, all_pnl],axis=1)


Final PnL for YM: 314223.0


Final PnL for RTY: 257905.0


Final PnL for NQ: 962617.0


Final PnL for ES: 531367.5


<h1>2) Data Summary

In [21]:
import pandas as pd
def summary_stats(df_: pd.DataFrame, column_name="value"):
    
    df_ = df_.dropna(how='all')
    df_["count"] = int(df_["size"].count())
    df_["mean"] = round(df_.pnl.mean())
    df_["min"] = round(df_.pnl.min())
    df_["25(%)"] = round(df_.pnl.quantile(q=0.25))
    df_["50(%)"] = round(df_.pnl.quantile(q=0.5))
    df_["75(%)"] = round(df_.pnl.quantile(q=0.75))
    df_["max"] = round(df_.pnl.max())
    df_["wins"] = df_["pnl"].apply(lambda x: 1 if x > 0 else 0)
    df_["win(%)"] = round(int(len(df_[df_.wins > 0])/df_.shape[0] * 100), 2)
    df_["mean_unit_pnl"] = round(np.sum(df_.pnl)/ np.sum(np.absolute(df_["size"])))
    try:
        df_["std_unit_pnl"] = round(np.std(df_.pnl) / np.sum(np.absolute(df_["size"])))
    except:
        df_["std_unit_pnl"] = np.nan
    df_['average_daily_pnl'] = round(df_.groupby('date').pnl.sum().mean())
    df_['total_dates'] = round(len(df_.date.unique()))
    try:
        df_['std_daily_pnl'] = round(df_.groupby('date').pnl.sum().std())
    except:
        df_['std_daily_pnl'] = np.nan
    try:
        sharpe = np.round((252**0.5)*(df_["average_daily_pnl"] / df_["std_daily_pnl"]), 3)
    except:
        sharpe = np.nan
    df_["sharpe"] = round(sharpe, 2)
    df_["total_pnl"] = round(np.sum(df_.pnl))
    df_["total_volume"] = round(np.sum(np.absolute(df_["size"])))
    df_final = df_.iloc[0][['count', 'mean', 'min', '25(%)',
                            '50(%)', '75(%)', 'max', 'win(%)', 'mean_unit_pnl', 'std_unit_pnl',
                            'average_daily_pnl', 'total_dates', 'std_daily_pnl', 'sharpe',
                            'total_pnl', 'total_volume']].reset_index()
    df_final.columns = ['statistic', column_name]
    df_final = df_final.set_index('statistic')
    return df_final

In [22]:
# using saved variables from part 1

def convert_to_daily(
    df_fut: pd.DataFrame,  # full futures df with all symbols
    symbol: str,  # symbol for which to convert to daily
) -> pd.DataFrame:
    df_fut_s = df_fut[df_fut.loc[:,'symbol'] == symbol]
    df_fut_daily = df_fut_s.set_index('timestamp',drop=True).resample('D').mean()  # convert to avg daily
    df_fut_daily['pnl'] = df_fut_daily.loc[:,'pnl'] - df_fut_daily.loc[:,'pnl'].shift(1)  # calc daily pnl
    return df_fut_daily

pt2_symbols = ['ES','NQ']
for s in pt2_symbols:
    df_fut_daily = convert_to_daily(df_fut=df_fut, symbol=s)
    df_fut_daily['date'] = df_fut_daily.index.values
    display(summary_stats(df_=df_fut_daily, column_name=s))

Unnamed: 0_level_0,ES
statistic,Unnamed: 1_level_1
count,1046.0
mean,512.0
min,-57274.0
25(%),-1472.0
50(%),608.0
75(%),3144.0
max,33039.0
win(%),59.0
mean_unit_pnl,102.0
std_unit_pnl,1.0


Unnamed: 0_level_0,NQ
statistic,Unnamed: 1_level_1
count,1046.0
mean,925.0
min,-63724.0
25(%),-2055.0
50(%),724.0
75(%),4988.0
max,45013.0
win(%),59.0
mean_unit_pnl,185.0
std_unit_pnl,2.0


<h1>3) Percent returns

In [34]:
# using saved variables from pt1 and "convert_to_daily" from pt2
for s in symbols:
    df_fut_daily: pd.DataFrame = convert_to_daily(df_fut=df_fut, symbol=s)
    df_fut_daily['percent_return_pnl'] = df_fut_daily.loc[:,'pnl'].pct_change()
    df_fut_daily['percent_return_close'] = df_fut_daily.loc[:,'close'].pct_change()
    # print(df_fut_daily.loc[:,'pnl'].pct_change().sort_values(ascending=False))
    if s == 'YM':
        display(df_fut_daily.loc[:,['pnl','percent_return_pnl','close','percent_return_close']].loc['2020-09-16':'2020-09-18'].round(5))

    # plot
        p = figure(title='%s Percent Returns' % (s), x_axis_label='Date', y_axis_label='Percent ret', x_axis_type='datetime', width=800, height=200)
        p.line(x=df_fut_daily.index.values, y=df_fut_daily.loc[:,'percent_return_close'],line_width=2)
        show(p)

Unnamed: 0_level_0,pnl,percent_return_pnl,close,percent_return_close
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2020-09-16,-4.46875,-1.00138,27967.79417,-1e-05
2020-09-17,-6428.48958,1437.54312,27710.65458,-0.00919
2020-09-18,-777.57292,-0.87904,27679.55167,-0.00112


<h1>4) Web scrape economic events

In [24]:
events_url = 'https://www.forexfactory.com/calendar'

In [26]:
import random
import pandas as pd
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from webdriver_manager.chrome import ChromeDriverManager
from datetime import datetime


def create_driver():
    user_agent_list = [
        'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36',
        'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:90.0) Gecko/20100101 Firefox/90.0',
        'Mozilla/5.0 (Macintosh; Intel Mac OS X 11.5; rv:90.0) Gecko/20100101 Firefox/90.0',
        'Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36',
        'Mozilla/5.0 (Macintosh; Intel Mac OS X 11_5_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36',
        'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:90.0) Gecko/20100101 Firefox/90.0',
        'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36'
    ]
    user_agent = random.choice(user_agent_list)

    browser_options = webdriver.ChromeOptions()
    browser_options.add_argument("--no-sandbox")
    browser_options.add_argument("--headless")
    browser_options.add_argument("start-maximized")
    browser_options.add_argument("window-size=1900,1080")
    browser_options.add_argument("disable-gpu")
    browser_options.add_argument("--disable-software-rasterizer")
    browser_options.add_argument("--disable-dev-shm-usage")
    browser_options.add_argument(f'user-agent={user_agent}')

    driver = webdriver.Chrome(
        service=Service(ChromeDriverManager().install()),
        options=browser_options, 
        service_args=["--verbose", "--log-path=test.log"],
    )

    return driver


def parse_data(
    driver: webdriver,
    url: str,
) -> pd.DataFrame:
    driver.get(url)

    data_table = driver.find_element(By.CLASS_NAME, "calendar__table")  # main data table
    rows = data_table.find_elements(By.CSS_SELECTOR, "tr.calendar__row.calendar_row")  # rows in data table

    last_date = ''
    last_time = ''
    df_list = []
    for row in rows:
        date = row.find_element(By.CSS_SELECTOR, "td.calendar__cell.calendar__date.date").text
        time = row.find_element(By.CSS_SELECTOR, "td.calendar__cell.calendar__time.time").text
        currency = row.find_element(By.CSS_SELECTOR, "td.calendar__cell.calendar__currency.currency").text
        event = row.find_element(By.CSS_SELECTOR, "td.calendar__cell.calendar__event.event").text
        actual = row.find_element(By.CSS_SELECTOR, "td.calendar__cell.calendar__actual.actual").text
        forecast = row.find_element(By.CSS_SELECTOR, "td.calendar__cell.calendar__actual.actual").text
        previous = row.find_element(By.CSS_SELECTOR, "td.calendar__cell.calendar__actual.actual").text

        try:  # add formatted most recent date to final df row
            last_date = pd.to_datetime(date.split('\n')[1]+' %s'%(datetime.now().year), format='%b %d %Y', exact=True)
            date = last_date
        except:
            date = last_date
        try:  # add formatted most recent time to final df row
            last_time = pd.to_datetime(time, format='%I:%M%p', exact=True)
            time = last_time
        except:
            time = last_time
        date = datetime.combine(date=date.date(),time=time.time())  # combine date/time into one value
        df_list.append([date,currency,event,actual,forecast,previous])
    return pd.DataFrame(df_list, columns=['date','currency','event','actual','forecast','previous'])

driver = create_driver()
df_events: pd.DataFrame = parse_data(driver=driver, url=events_url)
display(df_events.head())

Unnamed: 0,date,currency,event,actual,forecast,previous
0,2022-11-06 02:00:00,CAD,Daylight Saving Time Shift,,,
1,2022-11-06 02:00:00,USD,Daylight Saving Time Shift,,,
2,2022-11-06 22:01:00,CNY,USD-Denominated Trade Balance,85.2B,85.2B,85.2B
3,2022-11-06 22:02:00,CNY,Trade Balance,587B,587B,587B
4,2022-11-07 01:45:00,CHF,Unemployment Rate,2.1%,2.1%,2.1%


<h1>5) JSON updating

In [27]:
import json
import pandas as pd
from datetime import datetime


f = open('data/RetailSales_CHI.json')
j = json.load(f)


def edit_params(
    params: dict,
) -> dict:
    for key in params.keys():
        params[key]['event_date'] = '2021-12-14'  # hard code changes
        params[key]['event_time'] = '09:45:00'
        event_time_datetime = datetime.strptime(params[key]['event_time'],"%H:%M:%S")  # convert to datetime
        # add 5 mins to datetime and save to hedge_start_time as str
        params[key]['hedge_start_time'] = str((event_time_datetime + pd.Timedelta(5,unit='m')).time())
    return params

j['params'] = edit_params(params=j['params'])
j['params']['NKD1']

{'quote_interval': 4,
 'aliases_to_listen': ['NKD1'],
 'max_position': 1,
 'contract_size': 1,
 'max_not_complete_orders_per_side': 1,
 'market_modes': ['Open'],
 'stop_profit_in_ticks': 500,
 'stop_loss_in_ticks': 20,
 'stop_out_ticks': 5,
 'max_orders_per_price_level': 1,
 'round_out_per_side': True,
 'event_date': '2021-12-14',
 'event_time': '09:45:00',
 'hedge_start_time': '09:50:00',
 'cancel_time_post_event_in_seconds': 10,
 'strategy_start_offset_in_seconds': 1,
 'min_tick_threshold': 2,
 'hedge_seconds_before_next_order': 3,
 'hedge_minimum_contract_size': 1,
 'hedge_period': '1m',
 'calendar_day_delta': 0}

<h1>6) Email function, with attachment sending

In [1]:
# your code goes here...
import smtplib
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText


def send_test_mail(body):
    """
    Sends email with "example.txt" attachment to smtpd debug server.
    Command to start server and view output:

    python -m smtpd -n -c DebuggingServer localhost:2500
    """
    SERVER = "localhost"
    PORT = 2500

    sender_email = "sender@email.com"
    receiver_email = "receiver@email.com"

    msg = MIMEMultipart()
    msg['Subject'] = '[Email Test]'
    msg['From'] = sender_email
    msg['To'] = receiver_email

    msgText = MIMEText('<b>%s</b>' % (body), 'html')
    msg.attach(msgText)

    filename = "example.txt"
    msg.attach(MIMEText(open(filename).read()))

    server = smtplib.SMTP(SERVER, PORT)
    server.ehlo()
    server.sendmail(sender_email, receiver_email, msg.as_string())
    server.quit()

ZIP_DIR_NAME = 'test'
send_test_mail(body=ZIP_DIR_NAME)