In [1]:
import pandas as pd
import numpy as np
import gurobipy as gp

In [2]:
RUNTIME = 60*60

df1 = pd.read_csv('stocks2019.csv', index_col=0)
global stock_return1
stock_return1 = (df1-df1.shift(1))/df1.shift(1)
global p1
# not sure why, but if I don't round the correlation, my gurobi can't choose exactly m stocks
p1 = stock_return1.drop(df1.columns[0], axis=1).corr().round(decimals=5) 
global n
n = len(df1.columns) -1
global t1
t1 = len(stock_return1) -1

df2 = pd.read_csv('stocks2020.csv', index_col=0)
global stock_return2
stock_return2 = (df2-df2.shift(1))/df2.shift(1)
global t2
t2 = len(stock_return2) -1

In [3]:
def get_stocks(m):
    obj = np.array([0]*n)
    for i in range(n):
        for j in range(n):
            obj = np.append(obj,[p1.iloc[i,j]])
            
    A = np.zeros((n**2+n+1,n**2+n))
    # first constraint
    A[0, 0:n] = 1
    # second constraint
    for i in range(n):
        A[i+1, n*(i+1):n*(i+2)] = 1
    # third constraint
    for i in range(n):
        for j in range(n):
            A[1+n*(i+1)+j, j] = -1
    A[n+1:n**2+n+1, n:n**2+n] = np.identity(n**2)
    
    sense = np.array(['=']*(n+1)+['<']*(n**2))
    
    b = np.array([m]+[1]*n+[0]*(n**2))
    
    ndxMod = gp.Model()
    ndxMod_x = ndxMod.addMVar(len(obj),vtype=['B']*len(obj)) 
    ndxMod_con = ndxMod.addMConstrs(A, ndxMod_x, sense, b)
    ndxMod.setMObjective(None,obj,0,sense=gp.GRB.MAXIMIZE)
    ndxMod.Params.OutputFlag = 0
    ndxMod.optimize()

    stock_lst = []
    for i in range(n):
        if ndxMod_x.x[:n][i] == 1:
            stock_lst.append(i)
    return stock_lst

    
def get_weights(stock_lst):
    m = len(stock_lst)
    obj = np.array([0]*m+[1]*t1)
    
    A = np.zeros((t1*2+1,t1+m))
    # second constraint
    A[0, 0:m] = 1
    # first constraint: 
    for i in range(t1):
        for k in range(m):
            stock_idx = stock_lst[k]
            A[1+i, k] = stock_return1.iloc[1+i, stock_idx+1]  
            A[t1+1+i, k] = stock_return1.iloc[1+i, stock_idx+1] 
        A[1+i, m+i] = -1       
        A[t1+1+i, m+i] = 1
        
    sense = np.array(['=']+['<']*t1+['>']*t1)
    
    b = np.array([1])
    for i in range(2):
        for j in range(t1):
            b = np.append(b,[stock_return1.iloc[j+1,0]])
            
    ndxMod = gp.Model()
    ndxMod_x = ndxMod.addMVar(len(obj),vtype=['C']*len(obj)) 
    ndxMod_con = ndxMod.addMConstrs(A, ndxMod_x, sense, b)
    ndxMod.setMObjective(None,obj,0,sense=gp.GRB.MINIMIZE)
    ndxMod.Params.OutputFlag = 0
    ndxMod.optimize()
    
    return ndxMod_x.x[:m]

def get_evaluation(stock_lst, weight_lst, year):
    if year == 2019:
        stock_return = stock_return1
        t = t1
    else:
        stock_return = stock_return2
        t = t2    
        
    m = len(stock_lst)
    ndx = np.array([])
    for i in range(t):
        ndx = np.append(ndx, [stock_return.iloc[i+1,0]])  

    stocks = np.zeros((t, m))
    for i in range(t):
        for k in range(m):
            stock_idx = stock_lst[k]
            stocks[i, k] = stock_return.iloc[1+i, stock_idx+1] 
    idx = stocks @ weight_lst

    evaluation = 0
    for i in range(t):
        diff = ndx[i] - idx[i]
        if diff < 0:
            diff *= -1 
        evaluation += diff
    return evaluation

In [4]:
def use_big_m(m):
    obj = np.array([0]*n+[0]*n+[1]*t1)
    
    A = np.zeros((2+t1*2+n*2,n*2+t1))
    # m stocks are chosen
    A[0, 0:n] = 1
    # all weights add up to 1
    A[1, n:n*2] = 1
    # absolute value constraint
    for i in range(t1):
        for j in range(n):
            A[2+i, n+j] = stock_return1.iloc[1+i, 1+j]
            A[t1+2+i, n+j] = stock_return1.iloc[1+i, 1+j]
        A[2+i, n*2+i] = -1              
        A[t1+2+i, n*2+i] = 1    
    # big M constraint: weight > 0
    A[2+t1*2:2+t1*2+n, n:n*2] = np.identity(n)
    # big M constraint: M*y > weight 
    np.fill_diagonal(A[2+t1*2+n:2+t1*2+n*2, :n], 1) 
    np.fill_diagonal(A[2+t1*2+n:2+t1*2+n*2, n:n*2], -1) 

    sense = np.array(['=']+['=']+['<']*t1+['>']*t1+['>']*n+['>']*n)
    
    b = np.array([m]+[1])
    for i in range(2):
        for j in range(t1):
            b = np.append(b, [stock_return1.iloc[j+1,0]])
    b = np.append(b, [0]*(2*n))
            
    ndxMod = gp.Model()
    ndxMod_x = ndxMod.addMVar(len(obj),vtype=['B']*n+['C']*n+['C']*t1) 
    ndxMod_con = ndxMod.addMConstrs(A, ndxMod_x, sense, b)
    ndxMod.setMObjective(None,obj,0,sense=gp.GRB.MINIMIZE)
    ndxMod.Params.OutputFlag = 0
    ndxMod.setParam('TimeLimit', RUNTIME)
    ndxMod.optimize()
    
    stock_lst = []
    weight_lst = []
    for i in range(n):
        if ndxMod_x.x[:n][i] == 1:
            stock_lst.append(i)
    for j in stock_lst:
        weight_lst.append(ndxMod_x.x[n:n*2][j])
    return stock_lst, weight_lst

### Question 2

In [5]:
# how many stocks in a portfolio?
m = 5

# get m stocks
stock_lst = get_stocks(m)
print('Index of the', m, 'stocks we choose:', stock_lst)

# # get weight of stocks
weight_lst = get_weights(stock_lst)
print('Weights of the', m, 'stocks:', weight_lst)

# # evaluate performance
score = get_evaluation(stock_lst, weight_lst, 2020)
print('Evaluation using 2020 data:', score)

Academic license - for non-commercial use only - expires 2022-08-24
Using license file C:\Users\chiay\gurobi.lic


  stock_lst = get_stocks(m)


Index of the 5 stocks we choose: [56, 59, 63, 94, 98]
Weights of the 5 stocks: [0.04886175 0.21038806 0.58035198 0.07119022 0.089208  ]
Evaluation using 2020 data: 1.1124373455076457


  weight_lst = get_weights(stock_lst)


### Question 3

In [6]:
size = [5, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100]
df = pd.DataFrame(columns = ['Portfolio Size', '2019 Evaluation', '2020 Evaluation'])

for m in size:
    stock_lst = get_stocks(m)
    weight_lst = get_weights(stock_lst)
    eval_2019 = get_evaluation(stock_lst, weight_lst, 2019)
    eval_2020 = get_evaluation(stock_lst, weight_lst, 2020)
    df = df.append({'Portfolio Size': m, '2019 Evaluation': eval_2019, '2020 Evaluation': eval_2020}, ignore_index=True)
    
df

  stock_lst = get_stocks(m)
  weight_lst = get_weights(stock_lst)


Unnamed: 0,Portfolio Size,2019 Evaluation,2020 Evaluation
0,5.0,0.789178,1.112437
1,10.0,0.701218,1.102404
2,20.0,0.478836,0.899598
3,30.0,0.418015,0.76911
4,40.0,0.367439,0.788335
5,50.0,0.33401,0.773216
6,60.0,0.343788,1.166438
7,70.0,0.168587,0.545744
8,80.0,0.147683,0.537323
9,90.0,0.053779,0.36779


### Question 4

In [7]:
size = [5, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100]
df = pd.DataFrame(columns = ['Portfolio Size', '2019 Evaluation', '2020 Evaluation'])

for m in size:
    stock_lst, weight_lst = use_big_m(m)
    if m == 5:
        print('Index of the', m, 'stocks we choose:', stock_lst)
        print('Weights of the', m, 'stocks:', weight_lst)
    eval_2019 = get_evaluation(stock_lst, weight_lst, 2019)
    eval_2020 = get_evaluation(stock_lst, weight_lst, 2020)
    df = df.append({'Portfolio Size': m, '2019 Evaluation': eval_2019, '2020 Evaluation': eval_2020}, ignore_index=True)
    
df

  stock_lst, weight_lst = use_big_m(m)


Index of the 5 stocks we choose: [7, 9, 11, 63, 65]
Weights of the 5 stocks: [0.25012259799983066, 0.11375807105291337, 0.19169220616295285, 0.28986928001353274, 0.1545578447707704]


Unnamed: 0,Portfolio Size,2019 Evaluation,2020 Evaluation
0,5.0,0.499259,0.777362
1,10.0,0.300098,0.642562
2,20.0,0.157501,0.559464
3,30.0,0.107281,0.513487
4,40.0,0.079429,0.400889
5,50.0,0.061429,0.406563
6,60.0,0.052509,0.392672
7,70.0,0.047743,0.368295
8,80.0,0.045227,0.370629
9,90.0,0.044911,0.368682
