In [19]:
# !pip install numpy matplotlib cvxopt pandas yfinance finquant plotly cufflinks chart_studio pandas_datareader atoti atoti-jupyterlab

In [20]:
%matplotlib inline
import random

import cvxopt as opt
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import yfinance as yf
from cvxopt import blas, solvers
from finquant.portfolio import build_portfolio
from scipy.optimize import minimize

np.random.seed(123)

# Turn off progress printing
solvers.options["show_progress"] = False
import concurrent.futures
import os
import random
import urllib
import urllib.request
from concurrent.futures import ALL_COMPLETED, wait

import atoti as tt
import cufflinks
import numpy as np
import pandas_market_calendars as mcal

# (*) To communicate with Plotly's server, sign in with credentials file
import plotly as py

# (*) Useful Python/Plotly tools
import plotly.tools as tls

# (*) Graph objects to piece together plots
from plotly.graph_objs import *

In [21]:
# ndays = 252*10+7
ndays = 252 * 1.5 + 7
# ~training
dback = 252
# ~forecast
dahead = 20
# sample size
# if to randomize
rand = True
nran = 20
nstocks = 100
# this should match tbill 20, 91, 126, 252

end = pd.Timestamp.utcnow()
start = end - int(ndays) * pd.tseries.offsets.BDay()
one_week_end = end - int(ndays - 7) * pd.tseries.offsets.BDay()
one_week_end = one_week_end.strftime("%Y-%m-%d")
start = start.strftime("%Y-%m-%d")
end = end.strftime("%Y-%m-%d")

In [22]:
# pd.tseries.offsets.BDay(100)
nyse = mcal.get_calendar("NYSE")

# Show available calendars
# print(mcal.get_calendar_names())
tradingDays = list(
    nyse.schedule(start_date=start, end_date=end).index.strftime("%Y-%m-%d")
)

In [23]:
def rand_weights(n):
    """ Produces n random weights that sum to 1 """
    k = np.random.rand(n)
    return k / sum(k)


def weighted_return(returns, weights):
    w = weights

    p = np.asmatrix(np.mean(returns, axis=0))

    mu = np.asmatrix(w) * p.T
    return mu


def random_portfolio(returns):

    w = rand_weights(len(pd.DataFrame(returns).columns))

    p = np.asmatrix(np.mean(returns, axis=0))

    C = np.array(pd.DataFrame(returns).cov().values)

    mu = np.asmatrix(w) * p.T
    sigma = np.sqrt(np.asmatrix(w) * C * np.asmatrix(w).T)

    sharpe = mu / sigma

    return np.float(mu), np.float(sigma), np.float(sharpe), np.array(w, dtype=object)


def unique(list1):

    # intilize a null list
    unique_list = []

    # traverse for all elements
    for x in list1:
        # check if exists in unique_list or not
        if x not in unique_list:
            unique_list.append(x)

    return unique_list


def split_sequences(sequences, n_steps_in, n_steps_out):

    X, y = list(), list()
    for i in range(len(sequences)):
        # find the end of this pattern
        end_ix = i + n_steps_in
        out_end_ix = end_ix + n_steps_out
        # check if we are beyond the dataset
        if out_end_ix > len(sequences):
            break
        # gather input and output parts of the pattern
        seq_x, seq_y = sequences[i:end_ix, :], sequences[end_ix:out_end_ix, :]
        X.append(seq_x)
        y.append(seq_y)

    return np.array(X), np.array(y)

In [24]:
url = "ftp://ftp.nasdaqtrader.com/symboldirectory/nasdaqtraded.txt"

urllib.request.urlretrieve(url, "nasdaqtraded.txt")
urllib.request.urlretrieve(url, "mfundslist.txt")
urllib.request.urlretrieve(url, "bonds.txt")

df1 = pd.read_csv("nasdaqtraded.txt", sep="|")[0:-1]
df2 = pd.read_csv("mfundslist.txt", sep="|")[0:-1]
df3 = pd.read_csv("bonds.txt", sep="|")[0:-1]

# combined = pd.concat([df1['Symbol'],df2['Symbol'],df3['Symbol']],axis=0)

# process symbols for bad characters
BAD_CHARS = ["$", "."]
# pat = '|'.join(['({})'.format(re.escape(c)) for c in BAD_CHARS])
# cleaned = unique(combined.replace(BAD_CHARS,'-'))

# choose size
size = nstocks
# stocks = list(df1["Symbol"].sample(n=int(size/3)))
stocks = list(
    df1["Symbol"]
    .replace(".", "-")
    .replace("\\$", "-P", regex=True)
    .sample(n=int(size / 3))
)
mfunds = list(
    df2["Symbol"]
    .replace(".", "-")
    .replace("\\$", "-P", regex=True)
    .sample(n=int(size / 3))
)
bonds = list(
    df3["Symbol"]
    .replace(".", "-")
    .replace("\\$", "-P", regex=True)
    .sample(n=int(size / 3))
)
symbols = list(set(stocks + mfunds + bonds))  # unique(stocks + mfunds + bonds)
# symbols = unique(stocks)

In [25]:
print(start, one_week_end)

2020-05-18 2020-05-27


In [26]:
pf_pre = build_portfolio(
    names=symbols, start_date=start, end_date=one_week_end, data_api="yfinance"
)

[*********************100%***********************]  99 of 99 completed

25 Failed downloads:
- RNR-PG: Data doesn't exist for startDate = 1589778000, endDate = 1590555600
- QPX: Data doesn't exist for startDate = 1589778000, endDate = 1590555600
- WSO.B: No data found, symbol may be delisted
- PTRA: Data doesn't exist for startDate = 1589778000, endDate = 1590555600
- VMACU: Data doesn't exist for startDate = 1589778000, endDate = 1590555600
- MDWT: Data doesn't exist for startDate = 1589778000, endDate = 1590555600
- IIAC.U: No data found, symbol may be delisted
- IRNT.W: No data found, symbol may be delisted
- YALA: Data doesn't exist for startDate = 1589778000, endDate = 1590555600
- TGH-PB: Data doesn't exist for startDate = 1589778000, endDate = 1590555600
- EFIV: Data doesn't exist for startDate = 1589778000, endDate = 1590555600
- GWH.W: No data found, symbol may be delisted
- MUDSU: Data doesn't exist for startDate = 1589778000, endDate = 1590555600
- TUYA: Data doesn't exist f

In [27]:
vetted_symbols = list(
    pf_pre.data.loc[
        (np.intersect1d(list(pf_pre.data.index.strftime("%Y-%m-%d")), tradingDays))
    ]
    .head(-1)
    .tail(-1)
    .dropna(axis=1)
    .columns
)

In [28]:
# combinedList = unique(list(np.append(random.sample(symbols_sp500,int(len(symbols_sp500)*.05)),np.append(random.sample(dividendAristocraft,int(len(dividendAristocraft)*.25)),np.append(top10,random.sample(symbols_ns100,int(len(symbols_ns100)*.25)))))))

# add index
# vetted_symbols.append('TQQQ')

pf = build_portfolio(
    names=list(vetted_symbols), start_date=start, end_date=end, data_api="yfinance"
)

[*********************100%***********************]  74 of 74 completed


In [29]:
"""
import pandas_datareader.data as web

import datetime

# Multiple series:
fred_data = web.DataReader(FRED_Indicators, 'fred', start, end)
"""

FRED_Indicators = ["DTB4WK", "DTB3", "DTB6", "DTB1YR"]

import pandas_datareader.data as web


def Fred_Data(name):
    temp = web.DataReader(str(name), "fred", start, end)
    temp.index = pd.to_datetime(temp.index)
    # temp = temp.resample(frequency).mean().dropna()
    return temp


FRED_set = []
FRED_completed = []
for i in FRED_Indicators:
    FRED_completed.append(i)
    FRED_set.append(Fred_Data(i))

FRED_pvt = pd.DataFrame()

for x in range(0, len(FRED_completed)):
    values = FRED_set[x]
    values = values.loc[~values.index.duplicated(keep="last")]

    FRED_pvt = pd.concat([FRED_pvt, values], axis=1)

In [30]:
returns = np.log(
    1
    + pf.data.loc[
        (np.intersect1d(list(pf.data.index.strftime("%Y-%m-%d")), tradingDays))
    ]
    .head(-1)
    .interpolate(method="time")
    .pct_change()
    .tail(-1)
)
returns = returns.dropna(axis=1)
# returns.columns = stocks.dropna(axis=1)
len(returns.columns)
# np.sum(returns.isin([np.inf, -np.inf, np.NaN])).sort_values(kind="quicksort", ascending=True)

74

In [31]:
selectedmfunds = set(mfunds) & set(returns.columns)
selectedstocks = set(stocks) & set(returns.columns)
selectedbonds = set(bonds) & set(returns.columns)

In [32]:
n_steps_in = dback

n_steps_out = dahead

Training, Holdout = split_sequences(
    np.array(pd.DataFrame(returns.index.strftime("%Y-%m-%d"))), n_steps_in, n_steps_out
)

In [33]:
randomSet = list(np.sort(np.random.choice(range(0, len(Training)), nran)))

randomsymbols = list(np.random.choice(pf.data.columns, 10, replace=False))

# random columns
stocks = returns.loc[[i[0] for i in Training[randomSet[1]]]][randomsymbols]

"""
returns_table = session.read_csv(
    "returns.csv", keys=["stock"], store_name="Returns", array_sep=";"
)

returns_table.head()
"""
return_vec = np.array(stocks)

In [54]:
#stocks.melt(ignore_index=False)
prices = stocks.melt(ignore_index=False).reset_index()

In [56]:
prices

Unnamed: 0,Date,variable,value
0,2020-05-21,DXC,-0.068867
1,2020-05-22,DXC,-0.023621
2,2020-05-26,DXC,0.055291
3,2020-05-27,DXC,0.077595
4,2020-05-28,DXC,-0.064840
...,...,...,...
2515,2021-05-14,SPWR,0.061665
2516,2021-05-17,SPWR,-0.013125
2517,2021-05-18,SPWR,0.011325
2518,2021-05-19,SPWR,0.038007


In [55]:
session = tt.create_session(config={"user_content_storage": "./content"})

Deleting existing "Unnamed" session to create the new one.


In [58]:
price_table = session.read_pandas(
    prices,
    keys=["Date", "variable"],
    table_name="prices",
)
price_table.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,value
variable,Date,Unnamed: 2_level_1
DXC,2020-05-21,-0.068867
DXC,2020-05-22,-0.023621
DXC,2020-05-26,0.055291
DXC,2020-05-27,0.077595
DXC,2020-05-28,-0.06484


In [34]:
stocks[randomsymbols]

Unnamed: 0_level_0,DXC,UBP,ACST,HPP,GER,KAMN,IBTF,UBX,SREV,SPWR
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2020-05-21,-0.068867,0.052993,-0.015748,0.020619,-0.014737,0.006195,-0.000191,0.004216,-0.024391,-0.038549
2020-05-22,-0.023621,-0.047179,0.031253,-0.015896,0.002373,0.006691,0.000574,0.029028,0.024391,-0.022076
2020-05-26,0.055291,0.055439,0.000000,0.086619,0.016452,0.035119,-0.000574,0.059502,0.029676,0.080043
2020-05-27,0.077595,-0.008261,-0.015504,0.043549,0.008125,0.051448,-0.000192,-0.016829,-0.041797,0.001373
2020-05-28,-0.064840,0.045053,0.000000,0.055134,-0.011628,-0.003185,0.000192,0.024502,-0.018462,-0.037740
...,...,...,...,...,...,...,...,...,...,...
2021-05-14,0.037313,0.003361,0.022989,0.005204,0.041952,0.019339,0.000039,-0.014185,0.007435,0.061665
2021-05-17,-0.002717,0.014657,0.000000,-0.007070,0.005231,-0.009437,-0.000039,0.044245,-0.037740,-0.013125
2021-05-18,0.000000,-0.008635,0.022473,0.024346,-0.003484,-0.023893,0.000311,0.006811,-0.023347,0.011325
2021-05-19,0.004614,-0.011406,0.182322,-0.007682,-0.013175,-0.013804,-0.001477,-0.036870,-0.007905,0.038007


In [None]:
# data = pd.DataFrame()
# for i in randomsymbols:
#     data = pd.concat([pd.DataFrame({i: [stocks[i].values]}), data], axis=1)

In [None]:
# data.transpose()

### Converting daily returns into list

In [None]:
data_transpose = stocks[randomsymbols].T
data_transpose.head()

In [None]:
data_transpose['returns_vector'] = data_transpose.values.tolist()
data_transpose['returns_vector'].head()

In [None]:
return_vector = data_transpose['returns_vector'].to_frame()
return_vector["symbol"] = return_vector.index
return_vector.head()

### Getting list of dates for the return list

In [None]:
ret = stocks.reset_index().copy()
ret["Date"].head()

In [None]:
import atoti as tt


In [None]:
session = tt.create_session()

In [None]:
# https://towardsdatascience.com/shape-tables-like-jelly-with-pandas-melt-and-pivot-f2e13e666d6

returns_table = session.read_pandas(
    #"Returns
    return_vector,
    table_name="Returns",
    keys=["symbol"],
    #store_name="Returns",
)

returns_table.head()

In [None]:
cube = session.create_cube(returns_table, "Return Cube")

In [None]:
h, l, m = cube.hierarchies, cube.levels, cube.measures

In [None]:
# import graphviz
cube.schema

In [None]:
# pip list

In [None]:
# years = 10

import contextlib

stocksp = 0.2
mfundsp = 0.1
bondsp = 0.2
commp = 0.2

n_portfolio = 100

bestSharpe = 0
runs = 0
weight = None
bestMean = 0
bestStd = 0
means = np.array([])
stds = np.array([])
sharpes = np.array([])
bestWeights = None
weights = np.array([])

# random dates using X_index

randomSet = list(np.sort(np.random.choice(range(0, len(Training)), nran)))

for tensor in randomSet:

    randomsymbols = list(np.random.choice(pf.data.columns, 10, replace=False))
    # random columns
    stocks = returns.loc[[i[0] for i in Training[tensor]]][randomsymbols]

    return_vec = np.array(stocks)

    # display(pd.DataFrame(stocks.cumsum().iloc[-1]).transpose())

    for _ in range(n_portfolio):
        means2, stds2, sharpes2, weights2 = np.column_stack(
            [random_portfolio(return_vec) for _ in range(len(stocks.columns))]
        )
        means_ = means2
        stds_ = stds2
        sharpes_ = sharpes2
        weights_ = weights2

        bestWeights = weights_[sharpes_.argmax()]

        means = np.append(means, means_)
        stds = np.append(stds, stds_)
        sharpes = np.append(sharpes, sharpes_)
        weights = np.append(weights, weights_)

    df_ = pd.DataFrame(weights[sharpes.argmax()]).set_index(stocks.columns)
    df_.columns = ["Weights"]

    next4weeks = returns.loc[[i[0] for i in Holdout[tensor]]][randomsymbols]

    df_.sort_values(by=["Weights"], ascending=False, inplace=True)

    wr = np.array(
        weighted_return(
            np.array((np.exp(next4weeks) - 1).cumsum().tail(1)),
            weights[sharpes.argmax()],
        )
    )[0][0]

    max_sr_ret = means[sharpes.argmax()]
    max_sr_vol = stds[sharpes.argmax()]
    max_sharpe = sharpes[sharpes.argmax()]

    display([bestWeights, max_sr_ret, max_sr_vol, max_sharpe, wr])

In [None]:
cube.create_parameter_hierarchy_from_members(
    "Dates", ret["Date"].tolist(), 
    index_measure_name="Date Index",
    #name="Dates"
    #store_name="Dates"
)
# Setting the hierarchy to non-slicing, as required by
# the max_member aggregation function - see later - this behavior might change in future versions.
h["Dates"].slicing = False

In [None]:
cube.schema

In [None]:
m["daily returns"] = m["returns_vector.SUM"][m["Date Index"]]


In [None]:
m["daily returns"].formatter = "DOUBLE[#,###.0000000000]"

In [None]:
randomsymbols[1]

In [None]:
session.visualize()

In [None]:
session.visualize("Weight 0 simulation")