In [1]:
import os
import pandas as pd

## load data

In [94]:
processed_data_folder_path = "../data/etf_data/processed/"

In [95]:
etf_list = os.listdir(processed_data_folder_path)
etf_list.remove('.DS_Store')
etf_list

['SPYD.csv', 'SPY.csv']

In [96]:
spyd = pd.read_csv(processed_data_folder_path + "SPYD.csv")
spy = pd.read_csv(processed_data_folder_path + "SPY.csv")

In [97]:
spyd["category"].unique()

array(['profile', 'valuation_and_dividend', 'expense', 'tax_analysis',
       'holdings_analysis', 'performance'], dtype=object)

In [100]:
spy[spy["category"]=="holdings_analysis"]

Unnamed: 0,category,key,value
63,holdings_analysis,Apple Inc. (AAPL),7.19%
64,holdings_analysis,Microsoft Corporation (MSFT),6.51%
65,holdings_analysis,"Amazon.com, Inc. (AMZN)",3.33%
66,holdings_analysis,NVIDIA Corporation (NVDA),2.95%
67,holdings_analysis,Alphabet Inc. Class A (GOOGL),2.03%
68,holdings_analysis,Meta Platforms Inc. Class A (META),1.84%
69,holdings_analysis,"Tesla, Inc. (TSLA)",1.83%
70,holdings_analysis,Alphabet Inc. Class C (GOOG),1.76%
71,holdings_analysis,Berkshire Hathaway Inc. Class B (BRK.B),1.67%
72,holdings_analysis,UnitedHealth Group Incorporated (UNH),1.25%


## visualization

In [68]:
vis = dict()

### 1. Annual Dividend Yield

In [69]:
vis["ady"] = dict()

vis["ady"]["spyd"] = spyd[
    (spyd["category"]=="valuation_and_dividend")&
    (spyd["key"]=="Annual Dividend Yield")
]["value"].tolist()[0]

vis["ady"]["spy"] = spy[
    (spy["category"]=="valuation_and_dividend")&
    (spy["key"]=="Annual Dividend Yield")
]["value"].tolist()[0]

In [70]:
def clean_string(text):
    num = ''.join(c for c in text if c.isdigit() or c in ['.', '-'])
    return float(num)

In [71]:
vis["ady"]["spyd"] = clean_string(vis["ady"]["spyd"])
vis["ady"]["spy"] = clean_string(vis["ady"]["spy"])
vis

{'ady': {'spyd': 4.81, 'spy': 1.43}}

In [72]:
from bokeh.plotting import figure, show, output_notebook
from bokeh.models import ColumnDataSource, CustomJS, Select
from bokeh.layouts import column
from bokeh.transform import factor_cmap

output_notebook()

# Sample data
categories1 = [k for k in vis["ady"].keys()]
values1 = [ady[c] for c in categories1]

categories2 = ['X', 'Y', 'Z']
values2 = [1, 4, 3]

source = ColumnDataSource(data=dict(categories=categories1, values=values1))

# Create a new plot
p = figure(x_range=categories1, height=250, title="Annual Dividend Yield",
           toolbar_location=None, tools="",
           y_axis_label="Percentage (%)")

p.vbar(x='categories', top='values', width=0.9, source=source,
       legend_field="categories", 
       line_color='white', 
       fill_color=factor_cmap('categories', palette="Spectral5", factors=categories1))

# Callback function to update data
callback = CustomJS(args=dict(source=source, 
                              categories1=categories1, values1=values1,
                              categories2=categories2, values2=values2, 
                              p=p), code="""
    var data = source.data;
    var f = cb_obj.value;
    p.x_range.factors = (f === 'Dataset 1') ? categories1 : categories2;
    data['categories'] = (f === 'Dataset 1') ? categories1 : categories2;
    data['values'] = (f === 'Dataset 1') ? values1 : values2;
    source.change.emit();
""")

select = Select(title="Dataset", value="Dataset 1", options=["Dataset 1", "Dataset 2"])
select.js_on_change('value', callback)

# Display the plot
layout = column(select, p)
show(layout)


### 2. expense

In [77]:
vis["exp"] = dict()

In [82]:
vis["exp"]["spyd"] = dict()

vis["exp"]["spyd"]["key"] = spyd[spyd["category"]=="expense"]["key"].tolist()
vis["exp"]["spyd"]["value"] = spyd[spyd["category"]=="expense"]["value"].apply(clean_string).tolist()

In [84]:
vis["exp"]["spy"] = dict()

vis["exp"]["spy"]["key"] = spy[spy["category"]=="expense"]["key"].tolist()
vis["exp"]["spy"]["value"] = spy[spy["category"]=="expense"]["value"].apply(clean_string).tolist()

In [89]:
from bokeh.plotting import figure, show, output_notebook
from bokeh.models import ColumnDataSource, CustomJS, Select
from bokeh.layouts import column
from bokeh.transform import factor_cmap

output_notebook()

# Sample data
spyd_key = vis["exp"]["spyd"]["key"]
spyd_value = vis["exp"]["spyd"]["value"]

spy_key = vis["exp"]["spy"]["key"]
spy_value = vis["exp"]["spy"]["value"]

source = ColumnDataSource(data=dict(categories=spyd_key, values=spyd_value))

# Create a new plot
p = figure(x_range=spyd_key, height=250, title="Expense",
           toolbar_location=None, tools="",
           y_axis_label="Percentage (%)")

p.vbar(x='categories', top='values', width=0.9, source=source,
       # legend_field="categories", 
       line_color='white', 
       fill_color=factor_cmap('categories', palette="Spectral5", factors=spyd_key))

# Callback function to update data
callback = CustomJS(args=dict(source=source, 
                              categories1=spyd_key, values1=spyd_value,
                              categories2=spy_key, values2=spy_value, 
                              p=p), code="""
    var data = source.data;
    var f = cb_obj.value;
    if (f === 'SPYD') {
       data['categories'] = categories1;
       data['values'] = values1;
       p.x_range.factors = categories1;
    } 
    if (f === 'SPY') {
       data['categories'] = categories2;
       data['values'] = values2;
       p.x_range.factors = categories2;
    }
    source.change.emit();
    p.x_range.change.emit();
""")

select = Select(title="ETF List", value="SPYD", options=["SPYD", "SPY"])
select.js_on_change('value', callback)

# Display the plot
layout = column(select, p)
show(layout)


### 3. top 15 holdings

In [156]:
vis["top15"] = dict() 

In [157]:
vis["top15"]["spyd"] = dict()

vis["top15"]["spyd"]["key"] = spyd[
    (spyd["category"]=="holdings_analysis")
]["key"].tolist()

vis["top15"]["spyd"]["value"] = spyd[
    (spyd["category"]=="holdings_analysis")
]["value"].apply(clean_string).tolist()

In [158]:
vis["top15"]["spy"] = dict()

vis["top15"]["spy"]["key"] = spy[
    (spy["category"]=="holdings_analysis")
]["key"].tolist()

vis["top15"]["spy"]["value"] = spy[
    (spy["category"]=="holdings_analysis")
]["value"].apply(clean_string).tolist()

In [168]:
from bokeh.plotting import figure, show, output_notebook
from bokeh.models import ColumnDataSource, CustomJS, Select, LabelSet
from bokeh.layouts import column
from bokeh.palettes import plasma
import numpy as np

output_notebook()

# Sample data (assuming vis is defined)
spyd_key = vis["top15"]["spyd"]["key"]
spyd_value = vis["top15"]["spyd"]["value"]
spyd_colors = plasma(len(spyd_key))

spy_key = vis["top15"]["spy"]["key"]
spy_value = vis["top15"]["spy"]["value"]
spy_colors = plasma(len(spy_key))

# Reverse the order for initial display
spyd_key = list(reversed(spyd_key))
spyd_value = list(reversed(spyd_value))

# Reverse the order for initial display
spy_key = list(reversed(spy_key))
spy_value = list(reversed(spy_value))

# Titles based on Python variables
title_spyd = f"SPYD - Top 15 Holdings Total Percentage {round(sum(spyd_value),1)}"
title_spy = f"SPY - Top 15 Holdings Total Percentage {round(sum(spy_value),1)}"

source = ColumnDataSource(data=dict(categories=spyd_key, values=spyd_value, colors=spyd_colors))

# Create a new horizontal bar plot
p = figure(y_range=spyd_key, width=800, height=800, title=title_spyd,
           toolbar_location=None, tools="",
           x_axis_label="Percentage (%)")

bar_height = 0.7
bars = p.hbar(y='categories', right='values', height=bar_height, source=source, line_color='white', fill_color='colors')

# Add values next to each bar
labels = LabelSet(y='categories', x='values', text='values', level='glyph',
                  y_offset=-13.5, x_offset=5, source=source)
p.add_layout(labels)

# Callback function to update data and title
callback = CustomJS(args=dict(source=source, 
                              categories1=spyd_key, values1=spyd_value, colors1=spyd_colors,
                              categories2=spy_key, values2=spy_value, colors2=spy_colors,
                              p=p, title_spyd=title_spyd, title_spy=title_spy), code="""
    var data = source.data;
    var f = cb_obj.value;
    if (f === 'SPYD') {
       data['categories'] = categories1;
       data['values'] = values1;
       data['colors'] = colors1;
       p.y_range.factors = categories1;
       p.title.text = title_spyd;
    } else if (f === 'SPY') {
       data['categories'] = categories2;
       data['values'] = values2;
       data['colors'] = colors2;
       p.y_range.factors = categories2;
       p.title.text = title_spy;
    }
    source.change.emit();
    p.y_range.change.emit();
""")

select = Select(title="ETF List", value="SPYD", options=["SPYD", "SPY"])
select.js_on_change('value', callback)

# Display the plot
layout = column(select, p)
show(layout)


### 4. 1, 3, 5 Year Return

In [228]:
vis["yr"] = dict()

In [229]:
vis["yr"]["spyd"] = dict()

vis["yr"]["spyd"]["key"] = spyd[
    (spyd["category"]=="performance")
]["key"].tolist()

vis["yr"]["spyd"]["value"] = spyd[
    (spyd["category"]=="performance")
]["value"].apply(clean_string).tolist()

In [230]:
vis["yr"]["spy"] = dict()

vis["yr"]["spy"]["key"] = spy[
    (spy["category"]=="performance")
]["key"].tolist()

vis["yr"]["spy"]["value"] = spy[
    (spy["category"]=="performance")
]["value"].apply(clean_string).tolist()

In [233]:
from bokeh.models import ColumnDataSource, LabelSet
from bokeh.plotting import figure, show, output_notebook
from bokeh.transform import dodge
from bokeh.palettes import plasma

output_notebook()

# Sample Data
categories = vis["yr"]["spyd"]["key"]
class_a_values = vis["yr"]["spyd"]["value"]
class_b_values = vis["yr"]["spy"]["value"]

# Prepare the data in a flat structure
data = {'categories': categories,
        'SPYD': class_a_values,
        'SPY': class_b_values}

source = ColumnDataSource(data=data)

# Create a new plot
p = figure(x_range=categories, width=800, height=400, title="Year Return",
           toolbar_location=None, tools="")

# Dodge parameter for grouping
dodge_value = 0.1

# Plot bars for Class A
p.vbar(x=dodge('categories', -dodge_value, range=p.x_range), top='SPYD', width=0.2, source=source,
       color=plasma(10)[9], legend_label="SPYD")

# Plot bars for Class B
p.vbar(x=dodge('categories', dodge_value, range=p.x_range), top='SPY', width=0.2, source=source,
       color=plasma(10)[4], legend_label="SPY")

# Add labels for Class A
labels_a = LabelSet(x=dodge('categories', -dodge_value, range=p.x_range), y='SPYD', text='SPYD', 
                    y_offset=4, text_font_size="8pt", text_color="black", source=source, text_align='center')
p.add_layout(labels_a)

# Add labels for Class B
labels_b = LabelSet(x=dodge('categories', dodge_value, range=p.x_range), y='SPY', text='SPY', 
                    y_offset=4, text_font_size="8pt", text_color="black", source=source, text_align='center')
p.add_layout(labels_b)

# Rotate x-axis labels
# p.xaxis.major_label_orientation = np.pi/4

# Show plot
show(p)
