In [8]:
import numpy as np
import pandas as pd
import pandas_datareader.data as web
import os
import warnings

warnings.filterwarnings('ignore')

In [39]:
def mySVD(A):
    V1 = np.dot(A.T, A)
    U1 = np.dot(A, A.T)
    V_val, V = np.linalg.eig(V1)
    U_val, U = np.linalg.eig(U1)
    S = np.sqrt(U_val)
    if np.allclose(np.eye(len(U)), U.dot(U.T.conj())) == True and np.allclose(np.eye(len(V)), V.dot(V.T.conj())) == True:
        return -np.sort(-S)
    else:
        print('\nError: matrix U or V is not unitary matrix.\n')
        return -np.sort(-S)

In [42]:
def compute_distribution(df):
    s = mySVD(df)
    rate = s/np.sum(s)
    return rate


def compute_entropy(x):
    s = compute_distribution(x)
    entropy = -np.log2(np.inner(s,np.log2(1+s)))
    return entropy


def relative_entropy(p,q):
    if not(p.shape == q.shape):
        print("\n Error: dimension must be aligned for two vectors\n")
        quit()
    p_over_q = np.zeros(len(p))
    with np.errstate(divide='ignore'):
        p_over_q = p/q
    t = np.log2(p_over_q)
    t[np.isnan(t)] = 0.0
    t[np.isinf(t)] = 0.0
    ans = np.inner(p,t)
    return ans

In [12]:
def retrieve_stock_data(stock_list, s_date, e_date):
    i = 0
    stock_data = {}
    for stock in stock_list:
        data = web.DataReader(stock, 'yahoo', s_date, e_date)
        stock_data[stock] = data
        filename = stock + '.csv'
        data.to_csv(filename)
        i += 1
    return stock_data

In [15]:
stock_list = ['GM', 'F', 'TM', 'TSLA', 'HMC']

dir_name = 'Stock_data'
if not os.path.isfile(dir_name):
    try:
        os.mkdir(dir_name)
    except FileExistsError:
        print("Can't create " + dir_name + "!\t")
        print("A same name folder may exist\n")

os.chdir(dir_name)
stock_data = retrieve_stock_data(stock_list, '10/01/2018', '11/04/2019')
os.chdir('..')
print("\nData retrival is complete. Check your directory**" + dir_name + "**to find your data!\n")

Can't create Stock_data!	
A same name folder may exist


Data retrival is complete. Check your directory**Stock_data**to find your data!



In [48]:
stock_entropy = np.zeros(len(stock_list))
p_distribution = {}
i = 0
for stock in stock_list:
    df = stock_data[stock].iloc[:, 2:7]
    p_distribution[stock] = compute_distribution(df)
    stock_entropy[i] = compute_entropy(df)
    i += 1

print('\nChecking stock entropy\n')
print(stock_entropy)
    
relative_entropy_matrix = np.zeros([len(p_distribution),len(p_distribution)])
for i in range(len(stock_list)):
    for j in range(len(stock_list)):
        relative_entropy_matrix[i,j] = relative_entropy(p_distribution[stock_list[i]], p_distribution[stock_list[j]])
relative_entropy_df = pd.DataFrame(data=relative_entropy_matrix, index= stock_list, columns= stock_list)
print('\nChecking Relative Entropy\n')
display(relative_entropy_df)


Error: matrix U or V is not unitary matrix.


Error: matrix U or V is not unitary matrix.


Error: matrix U or V is not unitary matrix.


Error: matrix U or V is not unitary matrix.


Error: matrix U or V is not unitary matrix.


Error: matrix U or V is not unitary matrix.


Error: matrix U or V is not unitary matrix.


Error: matrix U or V is not unitary matrix.


Error: matrix U or V is not unitary matrix.


Error: matrix U or V is not unitary matrix.


Checking stock entropy

[7.92238789e-06 1.57281394e-06 1.28272087e-03 6.09982212e-05
 6.97592246e-05]

Checking Relative Entropy



Unnamed: 0,GM,F,TM,TSLA,HMC
GM,-1.601708e-16,6.961928e-06,0.0007203337,2.185338e-05,2.62009e-05
F,2.395623e-06,-1.601712e-16,0.000742077,3.265958e-05,3.809773e-05
TM,0.00318334,0.005368681,-4.908151e-22,0.001577918,0.00148061
TSLA,4.510113e-05,0.0001427462,0.0006032564,-1.1931270000000002e-23,-2.762163e-07
HMC,5.619704e-05,0.0001692869,0.0005888465,1.610801e-08,-6.780950000000001e-23


# Conclution
#### The entropy measures the uncertainty of a stock, the lower the entropy , greater uncertainty the stock will have.   GM are most stable in stock price among the fives stocks, while Tesla Ford and Honda also stable in stock price trends. Toyota are more risky in stock prices’ trend. Relative entropy compares it’s stability with other stocks. GM has high stability in stock prices among the five stocks,Ford Motor has second highest stability  in stock prices among the five stocks. Toyota has highest uncertainty among the five stocks. Tesla and Honda has similar satibilty in stock prices.