In [None]:
import os
import pandas as pd
import numpy as np
import statsmodels.api as sm
import scipy.stats as scs
import matplotlib.pyplot as plt

In [None]:
def get_price(stock, path):
    result = {}
    for i in stock:
        data = pd.read_csv(path + '/' + i + '.csv')
        result[i] = data["Close"].values.tolist()
        result["Date"] = data["Date"].values.tolist()
    result = pd.DataFrame(result, index=result["Date"])
    return result
def get_stock(path):
    result = []
    for root,dir,files in os.walk(path):
        for file in files:
            if file.endswith('.csv'):
                result.append(file[:-4])
    return result

In [None]:
# Retrieve Stock Data and show some
path = r'Stock'

stock = get_stock(path) #['0066.HK', '0168.HK', '0857.HK']
data = get_price(stock, path)
data.pop("Date")
data.head()

In [None]:
(data/data.iloc[0]*100).plot(figsize = (15,6))
plt.legend(loc="best")
plt.show()

In [None]:
#252 Transaction days, find annualized return
returns = np.log(data / data.shift(1))
returns.mean()*252

In [None]:
# The pandas built-in method was used to produce covariance matrix.
returns.cov()*252

In [None]:
#Randomly assign initial weights to different assets
noa = len(stock)
weights = np.random.random(noa)
weights /= np.sum(weights)
weights

In [None]:
#Annualized return on portfolio
np.sum(returns.mean()*weights)*252

In [None]:
# combination variance
np.dot(weights.T, np.dot(returns.cov()*252,weights))

In [None]:
# Combined standard deviation
np.sqrt(np.dot(weights.T, np.dot(returns.cov()* 252,weights)))


In [None]:
# Through a Monte Carlo simulation, a large number of random weight vectors are generated and the expected returns and variances of random combinations are recorded.
port_returns = []
port_variance = []
for p in range(4000):
    weights = np.random.random(noa)
    weights /=np.sum(weights)
    port_returns.append(np.sum(returns.mean()*252*weights))
    port_variance.append(np.sqrt(np.dot(weights.T, np.dot(returns.cov()*252, weights))))

port_returns = np.array(port_returns)
port_variance = np.array(port_variance)

#The risk-free rate was set at 4%
risk_free = 0.04
plt.figure(figsize = (8,4))
plt.scatter(port_variance, port_returns, c=(port_returns-risk_free)/port_variance, marker = 'o')
plt.grid(True)
plt.xlabel('excepted volatility')
plt.ylabel('expected return')
plt.colorbar(label = 'Sharpe ratio')
plt.show()

In [None]:
# Portfolio Optimization 1 -- SHARPE maximizes
# Create statistics function to record important portfolio statistics (returns, variance and # Sharpe ratio)
# By solving the constrained optimal problem, the optimal solution is obtained. Where the constraint is that the sum of the weights is 1.

def statistics(weights):
    weights = np.array(weights)
    port_returns = np.sum(returns.mean()*weights)*252
    port_variance = np.sqrt(np.dot(weights.T, np.dot(returns.cov()*252,weights)))
    return np.array([port_returns, port_variance, port_returns/port_variance])
#The derivation of portfolio optimization is a constrained optimization problem
import scipy.optimize as sco

#Minimize the negative of the Sharpe index
def min_sharpe(weights):
    return -statistics(weights)[2]

#The constraint is that the sum of all the parameters (weights) is 1. This can be expressed in the terms minimize function
cons = ({'type':'eq', 'fun':lambda x: np.sum(x)-1})

#We also limit the parameter values (weights) to between 0 and 1. These values are provided to the minimization function in the form of a tuple consisting of several tuples
bnds = tuple((0,1) for x in range(noa))

#The only input ignored in the optimization function call is the start argument list (the initial guess at the weight). Let's just use the average distribution.
opts = sco.minimize(min_sharpe, noa*[1./noa,], method = 'SLSQP', bounds = bnds, constraints = cons)
opts

In [None]:
# The optimal combination weight vector obtained is:
opts['x'].round(3)

In [None]:
# The three statistics of the largest combination of SHARpe are:
#Expected return, expected volatility, optimal Sharpe index
statistics(opts['x']).round(3)

In [None]:
# Portfolio optimization 2 -- Minimum variance
# The optimal portfolio is selected by means of the minimum variance.

# But let's define a function that minimizes the variance
def min_variance(weights):
    return statistics(weights)[1]

optv = sco.minimize(min_variance, noa*[1./noa,],method = 'SLSQP', bounds = bnds, constraints = cons)
optv

In [None]:
# The optimal combination weight vector with the minimum variance and the statistical data of the combination are respectively:
optv['x'].round(3)

In [None]:
# The expected yield, volatility and Sharpe index are obtained
statistics(optv['x']).round(3)

In [None]:
# The effective frontier has the portfolio composition with the minimum variance under the given target rate of return.
# Two constraints are used in optimization, 1. Given target return rate, 2. Portfolio weight sum is 1.

def min_variance(weights):
    return statistics(weights)[1]

# One of the minimized constraints changes with the different target return levels (TARGEt_returns) loop.
target_returns = np.linspace(0.0,0.5,50)
target_variance = []
for tar in target_returns:
    cons = ({'type':'eq','fun':lambda x:statistics(x)[0]-tar},{'type':'eq','fun':lambda x:np.sum(x)-1})
    res = sco.minimize(min_variance, noa*[1./noa,],method = 'SLSQP', bounds = bnds, constraints = cons)
    target_variance.append(res['fun'])

target_variance = np.array(target_variance)

The following is a demonstration of the optimal results:
Cross: The formed curve is the effective frontier (the optimal portfolio under the target yield rate)

Red Star: Sharpe's largest portfolio

Yellow Star: The portfolio with the least variance

In [None]:
plt.figure(figsize = (8,4))
#Circle: A combination of randomly generated distributions in Monte Carlo
plt.scatter(port_variance, port_returns, c = port_returns/port_variance,marker = 'o')
#Cross: Effective leading edge
plt.scatter(target_variance,target_returns, c = target_returns/target_variance, marker = 'x')
#Red star: Marked with the highest SHARPE combination
plt.plot(statistics(opts['x'])[1], statistics(opts['x'])[0], 'r*', markersize = 15.0)
#Yellow star: Marks the minimum variance combination
plt.plot(statistics(optv['x'])[1], statistics(optv['x'])[0], 'y*', markersize = 15.0)
plt.grid(True)
plt.xlabel('expected volatility')
plt.ylabel('expected return')
plt.colorbar(label = 'Sharpe ratio')
plt.show()