In [1]:
import pandas as pd

# Load the provided dataset
file_path = r'C:\Users\jianbai\Desktop\protfolio\data.csv'
data = pd.read_csv(file_path)

# Display the first few rows of the dataset to understand its structure
data.head()


Unnamed: 0,Company,Date,Close/Last,Volume,Open,High,Low
0,AAPL,07/17/2023,$193.99,50520160,$191.90,$194.32,$191.81
1,AAPL,07/14/2023,$190.69,41616240,$190.23,$191.1799,$189.63
2,AAPL,07/13/2023,$190.54,41342340,$190.50,$191.19,$189.78
3,AAPL,07-12-2023,$189.77,60750250,$189.68,$191.70,$188.47
4,AAPL,07-11-2023,$188.08,46638120,$189.16,$189.30,$186.60


In [23]:
# Check the data type of 'Close/Last' column
data_type_close_last = data['Close/Last'].dtype

# If 'Close/Last' is not of string type, convert it to string and then apply the replace operation
if data_type_close_last != 'object':
    data['Close/Last'] = data['Close/Last'].astype(str).str.replace('$', '').astype(float)
else:
    data['Close/Last'] = data['Close/Last'].str.replace('$', '').astype(float)

# Convert 'Date' to datetime format
data['Date'] = pd.to_datetime(data['Date'])

# Recalculate daily returns for each stock
data.sort_values(by=['Company', 'Date'], inplace=True)
data['Daily Return'] = data.groupby('Company')['Close/Last'].pct_change()

# Display the modified dataset with daily returns
data.head()





Unnamed: 0,Company,Date,Close/Last,Volume,Open,High,Low,Daily Return
2515,AAPL,2013-07-18,15.4199,218632537,15.4779,15.5311,15.3789,
2514,AAPL,2013-07-19,15.1768,268548901,15.4679,15.4993,15.1554,-0.015765
2513,AAPL,2013-07-22,15.2254,207648981,15.3379,15.3482,15.1953,0.003202
2512,AAPL,2013-07-23,14.9639,354477618,15.2143,15.2486,14.9539,-0.017175
2511,AAPL,2013-07-24,15.7325,591624923,15.6761,15.8782,15.545,0.051364


In [25]:
# Check for duplicate entries in the dataset for the combination of 'Date' and 'Company'
duplicate_entries = data.duplicated(subset=['Date', 'Company'], keep=False)
duplicates = data[duplicate_entries]

duplicates.head() if not duplicates.empty else "No duplicates found"





Unnamed: 0,Company,Date,Close/Last,Volume,Open,High,Low,Daily Return
3,AAPL,NaT,189.77,60750250,189.68,191.7,188.47,-0.021754
4,AAPL,NaT,188.08,46638120,189.16,189.3,186.6,-0.008906
5,AAPL,NaT,188.61,59922160,189.26,189.99,187.035,0.002818
6,AAPL,NaT,190.68,46815000,191.41,192.67,190.24,0.010975
7,AAPL,NaT,191.81,45156010,189.84,192.02,189.2,0.005926


In [32]:
# Remove duplicate entries if any
data_no_duplicates = data.drop_duplicates(subset=['Date', 'Company'])

# Recalculate the covariance matrix of daily returns using the data with no duplicates
cov_matrix = data_no_duplicates.pivot(index='Date', columns='Company', values='Daily Return').cov()


In [33]:
from scipy.optimize import minimize
import numpy as np

# Convert the mean returns and covariance matrix to numpy arrays
mean_returns = mean_daily_returns.values
cov_matrix_np = cov_matrix.values

# Number of assets in the portfolio
num_assets = len(mean_returns)

# Function to calculate portfolio volatility
def portfolio_volatility(weights, mean_returns, cov_matrix):
    return np.sqrt(np.dot(weights.T, np.dot(cov_matrix, weights)))

# Constraint: Sum of weights is 1 (all investment is allocated)
constraints = ({'type': 'eq', 'fun': lambda x: np.sum(x) - 1})

# Constraint: Non-negative weights
bounds = tuple((0, 1) for asset in range(num_assets))

# Constraint: Target return (set to the median of mean returns as an example)
target_return = np.median(mean_returns)
return_constraint = {'type': 'eq', 'fun': lambda x: np.sum(mean_returns * x) - target_return}

# Initial guess (equal distribution)
init_guess = num_assets * [1. / num_assets,]

# Minimize the portfolio volatility
optimal_portfolio = minimize(portfolio_volatility, init_guess, args=(mean_returns, cov_matrix_np),
                             method='SLSQP', bounds=bounds, constraints=[constraints, return_constraint])

optimal_portfolio_weights = optimal_portfolio.x if optimal_portfolio.success else None
optimal_portfolio_weights


array([0.06211833, 0.01479293, 0.0185496 , 0.34674463, 0.078245  ,
       0.        , 0.06974958, 0.06839196, 0.25082792, 0.09058005])

In [34]:
# Function to calculate expected portfolio return
def portfolio_return(weights, mean_returns):
    return np.sum(mean_returns * weights)

# Function to calculate Sharpe ratio (assuming risk-free rate is 0 for simplicity)
def sharpe_ratio(weights, mean_returns, cov_matrix):
    return portfolio_return(weights, mean_returns) / portfolio_volatility(weights, mean_returns, cov_matrix)

# Calculate portfolio Expected Return, Risk (Volatility), and Sharpe Ratio
expected_return = portfolio_return(optimal_portfolio_weights, mean_returns)
risk = portfolio_volatility(optimal_portfolio_weights, mean_returns, cov_matrix_np)
sharpe_ratio_value = sharpe_ratio(optimal_portfolio_weights, mean_returns, cov_matrix_np)

# Calculate Diversification
# Diversification can be measured as 1 - Herfindahl Index (sum of the squared portfolio weights)
diversification = 1 - np.sum(optimal_portfolio_weights ** 2)

expected_return, risk, sharpe_ratio_value, diversification


(0.0004586638988146737,
 0.016919727216098274,
 0.02710823247660151,
 0.7885624193070495)

In [38]:
# Calculate portfolio Expected Return, Risk (Volatility), and Sharpe Ratio
expected_return = portfolio_return(optimal_portfolio_weights, mean_returns)
risk = portfolio_volatility(optimal_portfolio_weights, mean_returns, cov_matrix_np)
sharpe_ratio_value = sharpe_ratio(optimal_portfolio_weights, mean_returns, cov_matrix_np)

# Calculate Diversification
# Diversification can be measured as 1 - Herfindahl Index (sum of the squared portfolio weights)
diversification = 1 - np.sum(optimal_portfolio_weights ** 2)

print("Expected Return:", expected_return)
print("Risk (Volatility):", risk)
print("Sharpe Ratio:", sharpe_ratio_value)
print("Diversification:", diver)


Expected Return: 0.0004586638988146737
Risk (Volatility): 0.016919727216098274
Sharpe Ratio: 0.02710823247660151
Diversification: 0.6730260279613982


In [37]:
# Individual risks (standard deviation) of each stock
individual_risks = np.sqrt(np.diag(cov_matrix))

# Weighted average of individual risks
weighted_average_individual_risk = np.dot(optimal_portfolio_weights, individual_risks)

# Diversification measure (the lower, the better)
diver = risk / weighted_average_individual_risk
diver

0.6730260279613982