# Get the data

Depending on each case your data may look a little bit different but we should start our portfolio optimization by acquiring information on a set of STOCK to better understand their behaviour trough, in this case, one year of activity in the market.

In [44]:
import pandas as pd

data = pd.read_csv("tonghop2thang4.csv")
data

Unnamed: 0,Asset,Open time,Open,High,Low,Close,Volume,Close time,quote_asset_volume,number_of_trades,taker_buy_base_asset_volume,taker_buy_quote_asset_volume,ignore
0,BTCUSDT,1682812800000,29230.4500,29969.3900,29079.5900,29233.2100,3.975254e+04,1682899199999,1.172155e+09,949458,1.989427e+04,5.867117e+08,0
1,BTCUSDT,1682899200000,29233.2000,29337.3400,27666.9500,28068.2600,6.443366e+04,1682985599999,1.828221e+09,1362793,3.076193e+04,8.727120e+08,0
2,BTCUSDT,1682985600000,28068.2600,28879.8800,27872.0000,28669.8600,5.082452e+04,1683071999999,1.441734e+09,1174697,2.478937e+04,7.032068e+08,0
3,BTCUSDT,1683072000000,28669.8500,29266.6600,28113.6900,29026.1600,6.461579e+04,1683158399999,1.845961e+09,1502909,3.229121e+04,9.225500e+08,0
4,BTCUSDT,1683158400000,29026.1600,29379.8300,28663.6400,28838.1600,4.257548e+04,1683244799999,1.233816e+09,961542,2.047021e+04,5.932814e+08,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
243,XRPUSDT,1687737600000,0.4903,0.4919,0.4701,0.4790,3.053380e+08,1687823999999,1.470005e+08,163717,1.643139e+08,7.912598e+07,0
244,XRPUSDT,1687824000000,0.4790,0.4851,0.4759,0.4838,2.915839e+08,1687910399999,1.401223e+08,132593,1.704094e+08,8.189001e+07,0
245,XRPUSDT,1687910400000,0.4839,0.4844,0.4493,0.4647,3.258378e+08,1687996799999,1.533296e+08,164563,1.600544e+08,7.536062e+07,0
246,XRPUSDT,1687996800000,0.4648,0.4773,0.4612,0.4747,2.112146e+08,1688083199999,9.914728e+07,102466,9.942828e+07,4.665360e+07,0


Following this we will calculate the average expected revenue for each asset. This is done by getting the difference between *opening* and *closing* time scaled to the opening price. This way we make sure each asset is independently evaluated.

We will also compute the covariance between each asset so that we can consider this values as part of our portfolio diversification constraint.

In [45]:
from cmath import exp
import numpy as np

# Unique asset list
asset_list = data["Asset"].unique()
#expected return
exp_ret = {}
return_list = []
for asset in asset_list:
    open_price = np.array(data[data["Asset"] == asset]["Open"].astype("float"))
    close_price = np.array(data[data["Asset"] == asset]["Close"].astype("float"))
        
    # Sign will be used to indicate the value gradient direction
    returns = ((close_price - open_price)/open_price)
    exp_ret[asset] = returns.mean()
    return_list.append(returns)

# Expected return on each asset
return_list = np.array(return_list)
mu = [i for i in exp_ret.values()]   
    
# Compute covariance between returns
sigma = np.cov((return_list))

Here $\mu$ is the value associated with the expected average return for each asset.

In [46]:
for i,v in zip(asset_list, mu):
    print(f"Expected average return for asset {i} is {v}")

Expected average return for asset BTCUSDT is 0.0008997860530971459
Expected average return for asset ETHUSDT is 0.00048031255222815144
Expected average return for asset SOLUSDT is -0.0026702570589396614
Expected average return for asset XRPUSDT is 0.00011490019883617948


And $\sigma$ is the covariance between those very same assets.

In [47]:
sigma

array([[0.00046808, 0.00042506, 0.00055679, 0.00029436],
       [0.00042506, 0.00053332, 0.00065014, 0.00035847],
       [0.00055679, 0.00065014, 0.00145862, 0.00054303],
       [0.00029436, 0.00035847, 0.00054303, 0.00059225]])

It is important to know what the cost is of each asset so that we can also limit the budget we would like to spend in our investment.

In [48]:
filter = data.groupby("Asset").agg({"Open time":max}).reset_index()
costs = data.merge(filter, how='inner').drop_duplicates()
#print(costs)
cost_list = costs[["Asset","Open"]].to_dict('records')
print(cost_list)

[{'Asset': 'BTCUSDT', 'Open': 30447.31}, {'Asset': 'ETHUSDT', 'Open': 1851.99}, {'Asset': 'SOLUSDT', 'Open': 18.0}, {'Asset': 'XRPUSDT', 'Open': 0.4747}]


  filter = data.groupby("Asset").agg({"Open time":max}).reset_index()


We will store this information so that it can be used later.

In [43]:
import json

# Serializing json  
data = {"mu" : mu, "sigma": sigma.tolist(), "assets": cost_list} 
json_object = json.dumps(data, indent = 4)

with open("binance-data.json", "w") as file:
    file.write(json_object)