# Import Libraries

In [None]:
from yahoo_fin.stock_info import * #Yahoo Finance Library for historical data

import datetime
import time

import pandas as pd
import numpy as np

from scipy.stats.mstats import gmean #Include function for Geometrical Mean

In [None]:
#Ticker for S&P500
market_ticker = '^GSPC'

In [None]:
#Price Data: Defining the Start and End dates 
beginning_period = datetime.datetime(1970, 1, 1).strftime('%m/%d/%Y')
finishing_period = datetime.datetime(2019, 12, 31).strftime('%m/%d/%Y')

#Downloading the Historical Data
historical_data = get_data(market_ticker, start_date = beginning_period , end_date = finishing_period)

#Rearranging the DataFrame
df = pd.DataFrame(historical_data, columns=['adjclose'])
df.index.name = 'Date'

In [None]:
#Calculate the returns
df['returns'] = df['adjclose'].pct_change()

#Insert value for Always Long Strategy
df['Long'] = 1

In [None]:
#Strategy - Sell in May and Go Away (come back in St Ledger day)
## 2 conditions: 1) from October (inclusive) until May (exclusive); 2) From the Monday after the 2nd Saturday of September

df['SMGA'] = np.where(((df.index.month < 5) | (df.index.month >= 10)), 1, 
                      np.where(
                          (df.index.month == 9) & (df.index.dayofweek  == 0) & (df.index.day >= 11) |
                          (df.index.month == 9) & (df.index.dayofweek  == 1) & (df.index.day >= 12) |
                          (df.index.month == 9) & (df.index.dayofweek  == 2) & (df.index.day >= 13) |
                          (df.index.month == 9) & (df.index.dayofweek  == 3) & (df.index.day >= 14) |
                          (df.index.month == 9) & (df.index.dayofweek  == 4) & (df.index.day >= 15), 1, 0))

In [None]:
#Strategy - Best Six Months of the year: From November (inclusively) to May (exclusively)
df['BSM'] = np.where((df.index.month < 5) | (df.index.month >= 11), 1, 0)

In [None]:
#Opposite strategy of Sell in May and Go Away - for comparison purposes
df['Opposite'] = np.where(df['SMGA'] == 1, 0, 1)

In [None]:
#Calculating cumulative returms of each "Strategy" - and show it in Index view (starting with 100 investiment)
df['Long_Ret'] = (1 + df['Long']*df['returns']).cumprod()*100
df['Long_Ret'].iloc[0] = 100

df['SMGA_Ret'] = (1 + df['SMGA']*df['returns']).cumprod()*100
df['SMGA_Ret'].iloc[0] = 100

df['BSM_Ret'] = (1 + df['BSM']*df['returns']).cumprod()*100
df['BSM_Ret'].iloc[0] = 100

df['Opposite_Ret'] = (1 + df['Opposite']*df['returns']).cumprod()*100
df['Opposite_Ret'].iloc[0] = 100

In [None]:
#Get an Yearly table

#Drop the columns with strategy returns as here we want returns in percentage and not in Index based
year_table = df.drop(columns=['Long_Ret', 'SMGA_Ret', 'Opposite_Ret', 'BSM_Ret'])

#Calculating the Returns for each Strategy
year_table['Long Returns'] = df['Long']*df['returns']
year_table['SMGA Returns'] = df['SMGA']*df['returns']
year_table['BSM Returns'] = df['BSM']*df['returns']
year_table['Opposite Returns'] = df['Opposite']*df['returns']

#Convert data to show summarized daily to yearly data
year_table = year_table.resample('Y').sum()

In [None]:
#Drop further unnecessary columns
year_table = year_table.drop(columns=['adjclose', 'returns', 'Long', 'SMGA', 'Opposite', 'BSM'])

#Convert the Index from Date to just Year
year_table.index = year_table.index.year

In [None]:
#Include Averages (Geometric Mean) in the Table
## For returns it's more correct mathematically to include the Geometrical Mean
year_table.loc['Average'] = gmean(year_table+1)-1
year_table.loc['Avg Last 20Y'] = gmean(year_table[-20:]+1)-1
year_table.loc['Avg Last 10Y'] = gmean(year_table[-10:]+1)-1

#Pass data to % base
year_table = round(year_table*100,2)

# Generating the Plots

In [None]:
#Libraries for Plotting
import holoviews as hv
from holoviews import opts
from holoviews.plotting.links import RangeToolLink
from bokeh.models import HoverTool

#Libraries for Table
from bokeh.io import show, save, output_file
from bokeh.models import ColumnDataSource
from bokeh.models.widgets import DataTable, TableColumn

import panel as pn

hv.extension('bokeh')

In [None]:
#Rearranging the dataframe for graphing - Drop and rename columns

graph_df = df.drop(columns=['adjclose', 'returns', 'Long', 'SMGA', 'Opposite', 'Opposite_Ret', 'BSM'])
graph_df = graph_df.rename(columns = {'Long_Ret': 'Long', 'SMGA_Ret': 'SMGA', 'BSM_Ret': 'BSM'})

graph_df = graph_df.round(0)

In [None]:
#Generate all curves
def getCurves(n):
    for column in graph_df.columns:
        hover = HoverTool(tooltips=[("Date", "@Date{%F}"), (column, "@"+column)], formatters={'Date': 'datetime'})  
        curve = hv.Curve(graph_df[column], label = column).opts(opts.Curve(tools=[hover]))
        yield curve
        
source_curves, target_curves  = [], []
for curve in getCurves(2):
    
    src = curve.relabel('').opts(width=800, height=100, yaxis=None) 
    tgt = curve.opts(width=800, ylabel = 'Value in $')
    source_curves.append(src)
    target_curves.append(tgt)
    
# Link RangeTool for the first curves in the list.
RangeToolLink(source_curves[0],target_curves[0], axes=['x','y'])

#Overlay the source and target curves
overlaid_plot_src = hv.Overlay(source_curves).relabel('')    
overlaid_plot_tgt = hv.Overlay(target_curves).relabel('Long Always, "Sell in May and Go Away" and Best Six Months').opts(
    height=400, legend_position='top_left')

# Layout the plot and render
full_graph = (overlaid_plot_tgt + overlaid_plot_src).cols(1)
full_graph = full_graph.opts(merge_tools=False,shared_axes=False)

Note: check the extra section to see an easier way of generating the graph, with the drawback of not being able to generate
the RangeToolLink

In [None]:
#Save the Plot
p = pn.panel(full_graph)
p.save('Full_Gaph.html', embed = True)

## Obtaining the same table for the last 20 Years only

In [None]:
#Filter the DataFrame for the last 20 Years
last20Y_df = df.loc[df.index >= '2000-01-03']

#Re-Calculate the cumulative returms of each "Strategy" - and show it in Index view (starting with 100 investiment)
last20Y_df['Long_Ret'] = (1 + last20Y_df['Long']*last20Y_df['returns']).cumprod()*100
last20Y_df['Long_Ret'].iloc[0] = 100

last20Y_df['SMGA_Ret'] = (1 + last20Y_df['SMGA']*last20Y_df['returns']).cumprod()*100
last20Y_df['SMGA_Ret'].iloc[0] = 100

last20Y_df['BSM_Ret'] = (1 + last20Y_df['BSM']*last20Y_df['returns']).cumprod()*100
last20Y_df['BSM_Ret'].iloc[0] = 100

last20Y_df['Opposite_Ret'] = (1 + last20Y_df['Opposite']*last20Y_df['returns']).cumprod()*100
last20Y_df['Opposite_Ret'].iloc[0] = 100

In [None]:
#Rearranging the dataframe for graphing - Drop and rename columns

graph_last20Y_df = last20Y_df.drop(columns=['adjclose', 'returns', 'Long', 'SMGA', 'Opposite', 'Opposite_Ret', 'BSM'])
graph_last20Y_df = graph_last20Y_df.rename(columns = {'Long_Ret': 'Long', 'SMGA_Ret': 'SMGA', 'BSM_Ret': 'BSM'})

#Round the values to have no decimal cases
graph_last20Y_df = graph_last20Y_df.round(0)

In [None]:
#Generate all curves
def getCurves(n):
    for column in graph_last20Y_df.columns:
        hover = HoverTool(tooltips=[("Date", "@Date{%F}"), (column, "@"+column)], formatters={'Date': 'datetime'})  
        curve = hv.Curve(graph_last20Y_df[column], label = column).opts(opts.Curve(tools=[hover]))
        yield curve
        
source_curves, target_curves  = [], []
for curve in getCurves(2):
    
    src = curve.relabel('').opts(width=800, height=100, yaxis=None) 
    tgt = curve.opts(width=800, ylabel = 'Value in $')
    source_curves.append(src)
    target_curves.append(tgt)
    
# Link RangeTool for the first curves in the list.
RangeToolLink(source_curves[0],target_curves[0], axes=['x','y'])

#Overlay the source and target curves
overlaid_plot_src = hv.Overlay(source_curves).relabel('')    
overlaid_plot_tgt = hv.Overlay(target_curves).relabel('Long Always, "Sell in May and Go Away" and Best Six Months - Last 20 Years').opts(
    height=400, legend_position='top_left')

# Layout the plot and render
last20Y_graph = (overlaid_plot_tgt + overlaid_plot_src).cols(1)
last20Y_graph = last20Y_graph.opts(merge_tools=False,shared_axes=False)

In [None]:
#Save the Plot
p = pn.panel(last20Y_graph)
p.save('Last20Year_Gaph.html', embed = True)

# Generating the Table

In [None]:
#Define the Table columns
year_table = year_table.reset_index()
year_table_columns = year_table.columns

#Define the Source and the Columns of the DataFrame
source = ColumnDataSource(year_table)

columns = [TableColumn(field=Ci, title=Ci, width=20) for Ci in year_table_columns]

#Create the DataFrame
year_table_datatable = DataTable(source=source, columns=columns, width=600, height=500, selectable = True, index_position = None)

#Save the DataFrame
output_file('Historical_Comparison_Table.html')
save(year_table_datatable)

# Extra

Note: easier way of generating the graph, with the drawback of not being able to generate the RangeToolLink

In [None]:
#Defining the Hovering Tooltips
hover_1 = HoverTool(tooltips=[("Date", "@Date{%F}"), ("Long", "@Long")], formatters={'Date': 'datetime'})   
hover_2 = HoverTool(tooltips=[("Date", "@Date{%F}"), ("SMGA", "@SMGA")], formatters={'Date': 'datetime'})   

#Instatiating the Curve Objects for Plotting
c_Long = hv.Curve(graph_df['Long']
                      , label='Long').opts(tools=[hover_1], color='red', xlabel='Date',
                                                          ylabel = 'Value in $')
c_SMGA = hv.Curve(graph_df['SMGA']
                      , label='SMGA').opts(tools=[hover_2], color='blue', xlabel='Date',
                                                          ylabel = 'Value in $')

#Join both Curves
full_graph = hv.Overlay(c_Long * c_SMGA).opts(width=700, height = 400, legend_position='top_left')