In [1]:
import yfinance as yf
import numpy as np
import pandas as pd
import datetime as dt
import matplotlib.pyplot as plt
from pandas.io.html import read_html
import re
import requests
import gurobipy as gp
from gurobipy import GRB
from math import *
from tqdm import tqdm

In [2]:
base_url = "https://www.investopedia.com/top-stocks-4581225"

content = requests.get(base_url)

urls_raw = re.findall("<li class=\"journey-nav__sublist-item \">\n.*\n.*\n", content.text) 

urls=[]

for url in urls_raw:
    url_clean = re.findall("https.*\"",url)
    url_cleaner = url_clean[0][:-1]
    urls.append(url_cleaner)

In [3]:
urls_m = urls[0:10+1] + urls[18:]
tables = []
try:
    for url in urls_m:
        table = read_html(url)
        tables.append(table)
except Exception as e:
    print(url)
    
del urls
# dividend stocks doesn't tables, may have to manually key in

In [4]:
# Check if very webpage follow the value, growth, momentum format.
for table in tables:
    if(len(table) != 3):
        print(len(table))

In [5]:
stocks_raw = []
tags = []

for table in tables:
    for i in range(3):
        stocks =  table[i].iloc[1:3+1,0]
        identifier = stocks.name
        identifier = identifier.split(" ")
        if("Value" in identifier): identifier = [e for e in identifier if e not in {"Best","Stocks"}]
        if("Growing" in identifier) : identifier = [e for e in identifier if e not in {"Fastest","Stocks"}]
        if("Momentum" in identifier) : identifier = [e for e in identifier if e not in {"with","the","Most","Stocks"}]

        stocks_raw.append(stocks)
        tags.append(identifier)
del tables

In [6]:
stocks_cleaned = []
for i in range(len(stocks_raw)):
    for j in range(len(stocks_raw[i])):
        stocks_df = list(stocks_raw[i])[j]
        stocks_cleaned.append([stocks_df,tags[i]])
del stocks_raw

In [7]:
stocks_rows = []

for i in range(len(stocks_cleaned)):
    stock = stocks_cleaned[i][0]
    tag = stocks_cleaned[i][1].copy()
    
    stock_raw = stock.split(" ")
    stock_name = " ".join(stock_raw[:-1])
    stock_ticker = stock_raw[-1][1:-1]
    # type: value,growing or momentum
    if ("Value" in tag):
        type_ = "Value"
        tag.remove("Value")
    if ("Growing" in tag):
        type_ = "Growth"
        tag.remove("Growing")
    if ("Momentum" in tag):
        type_ = "Momentum"
        tag.remove("Momentum")
    
    # Sub-categories
    sub = " ".join(tag)
    stocks_rows.append([stock_name,stock_ticker,type_,sub])
    
del stocks_cleaned

In [8]:
def expand_rows(lst):
    types = ['Growth', 'Momentum', 'Value']
    a = lst[0:2]
    b = [int(lst[2] in type) for type in types]
    c = [lst[3]]
    return(a+b+c)

In [9]:
stocks_expanded = list(map(lambda x:expand_rows(x),stocks_rows))

df_stocks = pd.DataFrame(stocks_expanded,columns=["stock","ticker",'is.Growth', 'is.Momentum', 'is.Value',
                                                  "Tag"])
del stocks_rows

In [10]:
df_grouped = df_stocks.groupby(df_stocks["ticker"]).mean().round()
df_grouped.reset_index(level=0, inplace=True)
df_grouped.loc[df_grouped.ticker == "BRK.B", 'ticker'] = "BRK-B"
df_grouped["is.Momentum"] = list(map(lambda x: int(x),df_grouped["is.Momentum"]))
df_grouped["is.Growth"] = list(map(lambda x: int(x),df_grouped["is.Growth"]))
df_grouped["is.Value"] = list(map(lambda x: int(x),df_grouped["is.Value"]))

In [11]:
tickers = np.unique(df_grouped["ticker"])
maindata = yf.download(list(tickers))["Adj Close"]

[*********************100%***********************]  175 of 175 completed


In [12]:
# df_stocks[df_stocks['stock'].isin(stocks)]
df_stocks

Unnamed: 0,stock,ticker,is.Growth,is.Momentum,is.Value,Tag
0,ViacomCBS Inc.,VIAC,0,0,1,Communications
1,CenturyLink Inc.,CTL,0,0,1,Communications
2,Discovery Inc.,DISCK,0,0,1,Communications
3,Netflix Inc.,NFLX,1,0,0,Communications
4,Charter Communications Inc.,CHTR,1,0,0,Communications
...,...,...,...,...,...,...
230,Orsted A/S,DNNGY,1,0,0,Wind
231,Boralex Inc.,BLX.TO,1,0,0,Wind
232,Boralex Inc.,BLX.TO,0,1,0,Wind
233,Orsted A/S,DNNGY,0,1,0,Wind


In [13]:
data = maindata.dropna()
ret = data.pct_change().dropna()
ret_mean = ret.mean()
ret_std = ret.std()
ret_cov = ret.cov()
stocks = data.columns


In [14]:
m = gp.Model('portfolio')
n = len(stocks)

weights = pd.Series(m.addVars(stocks, name = "stock",vtype = GRB.INTEGER), index=stocks)

portfolio_risk = ret_cov.dot(weights*0.01).dot(weights*0.01)
m.setObjective(portfolio_risk, GRB.MINIMIZE)

m.addConstr(weights.sum() == 100, 'budget')


m.setParam('OutputFlag', 0) # don't print the whole Chunk out
m.optimize()

# portfolio_return = ret_mean.dot(weights)

# mm.setObjective(sum(var[i]*c[i] for i in range(m)), GRB.MAXIMIZE)

Using license file c:\gurobi\gurobi.lic
Academic license - for non-commercial use only


In [15]:
weights
# print('Minimum Risk Portfolio:\n')
# for weight in weights:
#     if weight.x > 0:
#         print("{} : {}%".format(weight.varname, weight.x))
# minrisk_volatility = sqrt(portfolio_risk.getValue())
# print('Volatility      = {}'.format(minrisk_volatility))
# minrisk_return = portfolio_return.getValue()
# print('Expected Return = {}'.format(minrisk_return))

AAPL        <gurobi.Var stock[AAPL] (value -0.0)>
ABEV        <gurobi.Var stock[ABEV] (value -0.0)>
ABX.TO    <gurobi.Var stock[ABX.TO] (value -0.0)>
ADM          <gurobi.Var stock[ADM] (value -0.0)>
AEM.TO    <gurobi.Var stock[AEM.TO] (value -0.0)>
                           ...                   
WRK          <gurobi.Var stock[WRK] (value -0.0)>
WST          <gurobi.Var stock[WST] (value -0.0)>
XBIT        <gurobi.Var stock[XBIT] (value -0.0)>
XRX          <gurobi.Var stock[XRX] (value -0.0)>
ZNGA        <gurobi.Var stock[ZNGA] (value -0.0)>
Length: 175, dtype: object

In [16]:
# target = m.addConstr(portfolio_return == minrisk_return, 'target')
# frontier = pd.Series()
# for r in tqdm(np.linspace(ret_mean.min(), ret_mean.max(), 100)):
#     target.rhs = r
#     m.optimize()
#     frontier.loc[sqrt(portfolio_risk.getValue())] = r

In [17]:
ceil(0.1)

1

In [18]:
len(if([1,2,3,4] > 1))

SyntaxError: invalid syntax (<ipython-input-18-7136f5137cb8>, line 1)