In [1]:
#Importing libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")

In [2]:
#Reading data file
df2 = pd.read_csv('data_prepared.csv')

In [3]:
df2 = df2.loc[df2['Security Type'] == 'Common Stock']

In [4]:
df2 = df2.dropna()

In [5]:
#Select appropriate features
features = ["Security Price", "Volume (90 Day Avg)", "Market Capitalization", "Dividend Yield", "Total Return (1 Yr Annualized)", "Beta (1 Year Annualized)", "Standard Deviation (1 Yr Annualized)", "S&P Global Market Intelligence Valuation", "S&P Global Market Intelligence Quality", "S&P Global Market Intelligence Growth Stability", "S&P Global Market Intelligence Financial Health", "Institutional Ownership", "Institutional Ownership (Last vs. Prior Qtr)"]

X = df2.copy()
y = X.pop('Analyst Sentiment')
X = X.loc[:, features]

In [6]:
y.value_counts()

neutral         289
bearish         175
bullish         149
very bullish     68
very bearish     45
Name: Analyst Sentiment, dtype: int64

In [7]:
from sklearn.linear_model import LogisticRegression

model = LogisticRegression()

from sklearn.model_selection import cross_val_score
accuracy = cross_val_score(model, X, y, cv = 10, scoring='accuracy')
error_rate = 1 - (1/10)*np.sum(accuracy)
print('Cross-validation error rate:{}'.format(error_rate))

Cross-validation error rate:0.6199771689497717


In [8]:
# 10-Fold Cross Validation
def cross_validation_clas (df, func):
    from sklearn.model_selection import KFold
    kf = KFold(n_splits = 10, shuffle = True, random_state = 10)
    kf_accuracy = []
    y_hat_array = []
    
    for train, test in kf.split(df):
        X_train = df.iloc[train].loc[:, df.columns != 'Analyst Sentiment']
        X_train = X_train.squeeze()
        X_test = df.iloc[test].loc[:, df.columns != 'Analyst Sentiment']
        y_train = df.iloc[train].loc[:,'Analyst Sentiment']
        y_test = df.iloc[test].loc[:,'Analyst Sentiment']
        
        reg = func.fit(X_train, y_train)
        y_hat = reg.predict(X_test)
        y_hat_array.append(y_hat)
        from sklearn.metrics import accuracy_score
        kf_accuracy.append(accuracy_score(y_test, y_hat))
                
    kf_ACCURACY = (1/10) * np.sum(kf_accuracy)
        
    return (kf_ACCURACY, y_hat_array)

In [9]:
df2_2 = pd.concat([X, y], axis = 1)
model_cv_accuracy = cross_validation_clas(df2_2, LogisticRegression())
1-model_cv_accuracy[0]

0.6018835616438356

In [10]:
model = LogisticRegression(random_state=0)
model.fit(X,y)
y_hat = model.predict(X)

In [11]:
def conf_mat(y, y_hat):
    from sklearn.metrics import confusion_matrix
    cm = confusion_matrix(y, y_hat)
    
    return(cm)

In [12]:
cm = conf_mat(y, y_hat)
print('Confusion matrix\n\n', cm)
print('\nTrue Positives(TP) = ', cm[0,0])
print('\nTrue Negatives(TN) = ', cm[1,1])
print('\nFalse Positives(FP) = ', cm[0,1])
print('\nFalse Negatives(FN) = ', cm[1,0])

Confusion matrix

 [[  0   0 175   0   0]
 [  0   0 149   0   0]
 [  0   0 289   0   0]
 [  0   0  45   0   0]
 [  0   0  68   0   0]]

True Positives(TP) =  0

True Negatives(TN) =  0

False Positives(FP) =  0

False Negatives(FN) =  0


In [13]:
y_hat

array(['neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral',
       'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral',
       'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral',
       'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral',
       'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral',
       'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral',
       'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral',
       'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral',
       'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral',
       'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral',
       'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral',
       'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral',
       'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral',
       'neutral', 'neutral', 'neutral', 'neutral', 