In [1]:
import pandas as pd

# Load the dataset
file_path = r'C:\Users\jianbai\Desktop\protfolio\data.csv'
data = pd.read_csv(file_path)

# Display the first few rows of the dataset
data.head()


Unnamed: 0,Company,Date,Close/Last,Volume,Open,High,Low
0,AAPL,07/17/2023,$193.99,50520160,$191.90,$194.32,$191.81
1,AAPL,07/14/2023,$190.69,41616240,$190.23,$191.1799,$189.63
2,AAPL,07/13/2023,$190.54,41342340,$190.50,$191.19,$189.78
3,AAPL,07-12-2023,$189.77,60750250,$189.68,$191.70,$188.47
4,AAPL,07-11-2023,$188.08,46638120,$189.16,$189.30,$186.60


In [2]:
# Preprocessing the data
# Convert 'Close/Last' to float and handle the date formatting

# Removing $ sign and converting to float
data['Close/Last'] = data['Close/Last'].replace('[\$,]', '', regex=True).astype(float)

# Converting 'Date' to datetime format
data['Date'] = pd.to_datetime(data['Date'], errors='coerce')

# Handling possible parsing errors
data.dropna(subset=['Date'], inplace=True)

# Sorting values by company and date
data.sort_values(by=['Company', 'Date'], inplace=True)

# Calculate daily returns for each stock
data['Return'] = data.groupby('Company')['Close/Last'].pct_change()

# Removing the rows with NaN returns (first row for each stock)
data.dropna(subset=['Return'], inplace=True)

# Display the first few rows of the processed dataset
data.head()


Unnamed: 0,Company,Date,Close/Last,Volume,Open,High,Low,Return
2514,AAPL,2013-07-19,15.1768,268548901,$15.4679,$15.4993,$15.1554,-0.015765
2513,AAPL,2013-07-22,15.2254,207648981,$15.3379,$15.3482,$15.1953,0.003202
2512,AAPL,2013-07-23,14.9639,354477618,$15.2143,$15.2486,$14.9539,-0.017175
2511,AAPL,2013-07-24,15.7325,591624923,$15.6761,$15.8782,$15.545,0.051364
2510,AAPL,2013-07-25,15.6607,229432412,$15.7393,$15.7643,$15.5646,-0.004564


In [3]:
# Calculating the mean returns and the covariance matrix
mean_returns = data.groupby('Company')['Return'].mean()
covariance_matrix = data.pivot(index='Date', columns='Company', values='Return').cov()

# Displaying the mean returns and the covariance matrix
mean_returns, covariance_matrix


(Company
 AAPL    0.001908
 AMD     0.003169
 AMZN    0.001754
 CSCO    0.000627
 META    0.002104
 MSFT    0.001706
 NFLX    0.002252
 QCOM    0.000844
 SBUX    0.000918
 TSLA    0.003369
 Name: Return, dtype: float64,
 Company      AAPL       AMD      AMZN      CSCO      META      MSFT      NFLX  \
 Company                                                                         
 AAPL     0.000487  0.000374  0.000283  0.000199  0.000315  0.000281  0.000283   
 AMD      0.000374  0.002140  0.000446  0.000294  0.000469  0.000393  0.000528   
 AMZN     0.000283  0.000446  0.000654  0.000165  0.000394  0.000300  0.000459   
 CSCO     0.000199  0.000294  0.000165  0.000366  0.000171  0.000198  0.000183   
 META     0.000315  0.000469  0.000394  0.000171  0.000948  0.000313  0.000468   
 MSFT     0.000281  0.000393  0.000300  0.000198  0.000313  0.000415  0.000309   
 NFLX     0.000283  0.000528  0.000459  0.000183  0.000468  0.000309  0.001238   
 QCOM     0.000312  0.000540  0.000266  0.

In [4]:
mu_p = mean_returns.median()
mu_p


0.0018309733742580036

In [5]:
import numpy as np
from scipy.optimize import minimize

# Number of assets
n_assets = len(mean_returns)

# Initial guess for the weights (equal distribution)
w_initial = np.ones(n_assets) / n_assets

# Constraints
constraints = (
    {'type': 'eq', 'fun': lambda w: np.sum(w) - 1},  # Sum of weights is 1
    {'type': 'eq', 'fun': lambda w: np.dot(w, mean_returns) - 1.4*mu_p}  # Target return
)

# Bounds for the weights (0 to 1 for each weight)
bounds = tuple((0, 1) for _ in range(n_assets))

# Objective function (portfolio variance)
def portfolio_variance(w, covariance_matrix):
    return np.dot(w.T, np.dot(covariance_matrix, w))

# Optimize using gradient method (SLSQP algorithm)
opt_result = minimize(portfolio_variance, w_initial, args=(covariance_matrix,), 
                      method='SLSQP', bounds=bounds, constraints=constraints)

# Optimized portfolio weights
optimized_weights = opt_result.x

# Check if optimization was successful
optimization_success = opt_result.success
optimization_message = opt_result.message

optimized_weights, optimization_success, optimization_message



(array([1.02351636e-01, 2.20433073e-01, 8.78453730e-02, 4.16333634e-17,
        1.20622645e-01, 8.33737446e-02, 1.34437816e-01, 2.19160868e-03,
        9.41865585e-03, 2.39325449e-01]),
 True,
 'Optimization terminated successfully')

In [6]:
# Expected return of the optimized portfolio
expected_return = np.dot(optimized_weights, mean_returns)
expected_return

# Risk (standard deviation) of the optimized portfolio
risk = np.sqrt(portfolio_variance(optimized_weights, covariance_matrix))
risk

# Assuming a risk-free rate of 0 for simplicity
risk_free_rate = 0

# Sharpe ratio of the optimized portfolio
sharpe_ratio = (expected_return - risk_free_rate) / risk
sharpe_ratio


# Individual risks (standard deviation) of each stock
individual_risks = np.sqrt(np.diag(covariance_matrix))

# Weighted average of individual risks
weighted_average_individual_risk = np.dot(optimized_weights, individual_risks)

# Diversification measure (the lower, the better)
diversification = risk / weighted_average_individual_risk
diversification


0.7082457171432429

In [7]:
# Printing the portfolio characteristics in English with their names
print("Portfolio Characteristics:")
print("--------------------------")
print(f"Expected Return: {expected_return:.6f} (The anticipated level of return from the optimized portfolio)")
print(f"Risk (Standard Deviation): {risk:.6f} (Measures the volatility or uncertainty of returns)")
print(f"Sharpe Ratio: {sharpe_ratio:.6f} (Indicates the excess return per unit of risk)")
print(f"Diversification: {diversification:.6f} (A measure of risk reduction due to diversification)")


Portfolio Characteristics:
--------------------------
Expected Return: 0.002563 (The anticipated level of return from the optimized portfolio)
Risk (Standard Deviation): 0.025427 (Measures the volatility or uncertainty of returns)
Sharpe Ratio: 0.100813 (Indicates the excess return per unit of risk)
Diversification: 0.708246 (A measure of risk reduction due to diversification)
