In [1]:
import pandas as pd
import matplotlib.pyplot as plt
from datetime import date, timedelta
from urllib.request import urlopen, Request
from bs4 import BeautifulSoup
from nltk.sentiment.vader import SentimentIntensityAnalyzer

In [2]:
tickers = ['htz','cvx','bcs','tm','amzn','wmt','googl']
df = pd.DataFrame(columns=['Ticker', 'date', 'Time', 'Title']) 
for ticker in tickers:
    url = f'https://finviz.com/quote.ashx?t={ticker}'
    req = Request(url=url, headers={'user-agent': 'news'})
    response = urlopen(req)
    news_tables ={}
    html = BeautifulSoup(response, features = 'html.parser')
    news_table = html.find(id = 'news-table')
    news_tables[ticker] = news_table
    ignore_source = ['(Motley Fool)', '(TheStreet.com)']
    parsed = []    
    for ticker, news_table in news_tables.items():  
        for row in news_table.findAll('tr'): 
            if row.a != None:
                title = row.a.text
                source = row.span.text
                date = row.td.text.strip().split(' ')
                if len(date) > 1:    
                    date1 = date[0]
                    time = date[1]
                else:
                    time = date[0] 
            if source.strip() not in ignore_source:
                parsed.append([ticker, date1, time, title])  
    temp = pd.DataFrame(parsed, columns=['Ticker', 'date', 'Time', 'Title']) 
    df = pd.concat([df,temp],axis=0)


In [3]:
vader = SentimentIntensityAnalyzer()
p_score = lambda title: vader.polarity_scores(title)['pos']
n_score = lambda title: vader.polarity_scores(title)['neu']
neg_score = lambda title: vader.polarity_scores(title)['neg']
score = lambda title: vader.polarity_scores(title)['compound']
df['postive'] = df['Title'].apply(p_score) 
df['negative'] = df['Title'].apply(neg_score) 
df['neutral'] = df['Title'].apply(n_score) 
df['compound'] = df['Title'].apply(score) 

In [4]:
df['date'] = df['date'].replace('Today','Dec-03-23')
df

Unnamed: 0,Ticker,date,Time,Title,postive,negative,neutral,compound
0,htz,Nov-30-23,10:35AM,Hertz and Carvana: what their diverging fortun...,0.000,0.000,1.000,0.0000
1,htz,Nov-17-23,07:00AM,Hertz and EVgo Partner to Offer EV Renters One...,0.172,0.000,0.828,0.4019
2,htz,Nov-07-23,03:12PM,WeBroke: Investors Must Avoid WE Stock if It T...,0.000,0.196,0.804,-0.2960
3,htz,Oct-30-23,01:57AM,The Hertz Global Holdings Inc (HTZ) Company: A...,0.000,0.000,1.000,0.0000
4,htz,Oct-28-23,03:52PM,"Hertz Global Holdings, Inc. (NASDAQ:HTZ) Q3 20...",0.000,0.000,1.000,0.0000
...,...,...,...,...,...,...,...,...
69,googl,Nov-27-23,01:59PM,Jeff Bezos Was An Early Investor In Google A ...,0.148,0.000,0.852,0.7096
70,googl,Nov-27-23,01:19PM,Google will start deleting 'inactive' accounts...,0.000,0.000,1.000,0.0000
71,googl,Nov-27-23,01:16PM,The World's Most Profitable Company Is Not App...,0.166,0.000,0.834,0.4927
72,googl,Nov-27-23,01:06PM,Warren Buffett Stocks: Google Among 27 Names O...,0.000,0.000,1.000,0.0000


In [5]:
df['date'] = pd.to_datetime(df.date).dt.date 

  df['date'] = pd.to_datetime(df.date).dt.date


In [6]:
df = df.drop(['Time'],axis=1)
df

Unnamed: 0,Ticker,date,Title,postive,negative,neutral,compound
0,htz,2023-11-30,Hertz and Carvana: what their diverging fortun...,0.000,0.000,1.000,0.0000
1,htz,2023-11-17,Hertz and EVgo Partner to Offer EV Renters One...,0.172,0.000,0.828,0.4019
2,htz,2023-11-07,WeBroke: Investors Must Avoid WE Stock if It T...,0.000,0.196,0.804,-0.2960
3,htz,2023-10-30,The Hertz Global Holdings Inc (HTZ) Company: A...,0.000,0.000,1.000,0.0000
4,htz,2023-10-28,"Hertz Global Holdings, Inc. (NASDAQ:HTZ) Q3 20...",0.000,0.000,1.000,0.0000
...,...,...,...,...,...,...,...
69,googl,2023-11-27,Jeff Bezos Was An Early Investor In Google A ...,0.148,0.000,0.852,0.7096
70,googl,2023-11-27,Google will start deleting 'inactive' accounts...,0.000,0.000,1.000,0.0000
71,googl,2023-11-27,The World's Most Profitable Company Is Not App...,0.166,0.000,0.834,0.4927
72,googl,2023-11-27,Warren Buffett Stocks: Google Among 27 Names O...,0.000,0.000,1.000,0.0000


In [7]:
amzn = pd.read_csv('AMZN.csv')
amzn['Ticker'] = 'amzn'
data = amzn
data['Date'] = pd.to_datetime(data['Date'])
data=data.sort_values(by="Date",ascending=True)
data.set_index("Date",inplace=True)
start_date = data.index.min()
end_date = data.index.max()
all_dates = pd.date_range(start=start_date, end=end_date, freq='D')
missing_dates = all_dates.difference(data.index)
print("The count of missing dates are: ",len(missing_dates))
data = data.reindex(all_dates)
data.ffill(inplace=True)
data['Difference'] = data['Adj Close'].diff()
data = data.dropna()
data['Invest'] = 'No'
data.loc[data['Difference'] >= 0, 'Invest'] = 'Yes'
amzn = data

The count of missing dates are:  908


In [8]:
bcs = pd.read_csv('BCS.csv')
bcs['Ticker'] = 'bcs'
data = bcs
data['Date'] = pd.to_datetime(data['Date'])
data=data.sort_values(by="Date",ascending=True)
data.set_index("Date",inplace=True)
start_date = data.index.min()
end_date = data.index.max()
all_dates = pd.date_range(start=start_date, end=end_date, freq='D')
missing_dates = all_dates.difference(data.index)
print("The count of missing dates are: ",len(missing_dates))
data = data.reindex(all_dates)
data.ffill(inplace=True)
data['Difference'] = data['Adj Close'].diff()
data = data.dropna()
data['Invest'] = 'No'
data.loc[data['Difference'] >= 0, 'Invest'] = 'Yes'
bcs = data

The count of missing dates are:  112


In [9]:
cvx = pd.read_csv('CVX.csv')
cvx['Ticker'] = 'cvx'
data = cvx
data['Date'] = pd.to_datetime(data['Date'])
data=data.sort_values(by="Date",ascending=True)
data.set_index("Date",inplace=True)
start_date = data.index.min()
end_date = data.index.max()
all_dates = pd.date_range(start=start_date, end=end_date, freq='D')
missing_dates = all_dates.difference(data.index)
print("The count of missing dates are: ",len(missing_dates))
data = data.reindex(all_dates)
data.ffill(inplace=True)
data['Difference'] = data['Adj Close'].diff()
data = data.dropna()
data['Invest'] = 'No'
data.loc[data['Difference'] >= 0, 'Invest'] = 'Yes'
cvx = data

The count of missing dates are:  908


In [10]:
googl = pd.read_csv('GOOGL.csv')
googl['Ticker'] = 'googl'
data = googl
data['Date'] = pd.to_datetime(data['Date'])
data=data.sort_values(by="Date",ascending=True)
data.set_index("Date",inplace=True)
start_date = data.index.min()
end_date = data.index.max()
all_dates = pd.date_range(start=start_date, end=end_date, freq='D')
missing_dates = all_dates.difference(data.index)
print("The count of missing dates are: ",len(missing_dates))
data = data.reindex(all_dates)
data.ffill(inplace=True)
data['Difference'] = data['Adj Close'].diff()
data = data.dropna()
data['Invest'] = 'No'
data.loc[data['Difference'] >= 0, 'Invest'] = 'Yes'
googl = data

The count of missing dates are:  1011


In [11]:
htz = pd.read_csv('HTZ.csv')
htz['Ticker'] = 'htz'
data = htz
data['Date'] = pd.to_datetime(data['Date'])
data=data.sort_values(by="Date",ascending=True)
data.set_index("Date",inplace=True)
start_date = data.index.min()
end_date = data.index.max()
all_dates = pd.date_range(start=start_date, end=end_date, freq='D')
missing_dates = all_dates.difference(data.index)
print("The count of missing dates are: ",len(missing_dates))
data = data.reindex(all_dates)
data.ffill(inplace=True)
data['Difference'] = data['Adj Close'].diff()
data = data.dropna()
data['Invest'] = 'No'
data.loc[data['Difference'] >= 0, 'Invest'] = 'Yes'
htz = data

The count of missing dates are:  112


In [12]:
tm = pd.read_csv('TM.csv')
tm['Ticker'] = 'tm'
data = tm
data['Date'] = pd.to_datetime(data['Date'])
data=data.sort_values(by="Date",ascending=True)
data.set_index("Date",inplace=True)
start_date = data.index.min()
end_date = data.index.max()
all_dates = pd.date_range(start=start_date, end=end_date, freq='D')
missing_dates = all_dates.difference(data.index)
print("The count of missing dates are: ",len(missing_dates))
data = data.reindex(all_dates)
data.ffill(inplace=True)
data['Difference'] = data['Adj Close'].diff()
data = data.dropna()
data['Invest'] = 'No'
data.loc[data['Difference'] >= 0, 'Invest'] = 'Yes'
tm = data

The count of missing dates are:  112


In [13]:
wmt = pd.read_csv('WMT.csv')
wmt['Ticker'] = 'wmt'
data = wmt
data['Date'] = pd.to_datetime(data['Date'])
data=data.sort_values(by="Date",ascending=True)
data.set_index("Date",inplace=True)
start_date = data.index.min()
end_date = data.index.max()
all_dates = pd.date_range(start=start_date, end=end_date, freq='D')
missing_dates = all_dates.difference(data.index)
print("The count of missing dates are: ",len(missing_dates))
data = data.reindex(all_dates)
data['Difference'] = data['Adj Close'].diff()
data.ffill(inplace=True)
data = data.dropna()
data['Invest'] = 'No'
data.loc[data['Difference'] >= 0, 'Invest'] = 'Yes'
wmt = data

The count of missing dates are:  908


In [14]:
finance = pd.concat([amzn,bcs,cvx,googl,htz,tm,wmt],axis=0)

In [15]:
finance['date'] = pd.to_datetime(finance.index)
finance = finance.reset_index(drop=True)
finance.index = finance.index + 1

In [16]:
df = df.drop(['Title'],axis=1)

In [17]:
avg_df = df.groupby(['Ticker', 'date'])[['postive','negative','neutral','compound']].mean().reset_index()
avg_df['date'] = pd.to_datetime(avg_df['date'])

In [18]:
final =  pd.merge(avg_df, finance, on=['date','Ticker'], how='inner')

In [19]:
final

Unnamed: 0,Ticker,date,postive,negative,neutral,compound,Open,High,Low,Close,Adj Close,Volume,Difference,Invest
0,amzn,2023-11-29,0.196462,0.032231,0.771308,0.240346,147.850006,148.539993,145.970001,146.320007,146.320007,40610900.0,-0.709992,No
1,amzn,2023-11-30,0.161952,0.037286,0.800762,0.166767,144.759995,146.929993,144.330002,146.089996,146.089996,65814000.0,-0.230011,No
2,bcs,2023-10-02,0.000000,0.174000,0.826000,-0.226300,7.770000,7.800000,7.570000,7.590000,7.590000,9308300.0,-0.200000,No
3,bcs,2023-10-04,0.076667,0.079667,0.843667,-0.008600,7.550000,7.570000,7.480000,7.570000,7.570000,7859600.0,0.070000,Yes
4,bcs,2023-10-05,0.000000,0.217000,0.783000,-0.361200,7.460000,7.540000,7.450000,7.530000,7.530000,8171000.0,-0.040000,No
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
157,wmt,2023-11-25,0.295000,0.000000,0.705000,0.505100,155.149994,156.130005,154.699997,156.059998,156.059998,4658400.0,-1.190003,No
158,wmt,2023-11-27,0.075429,0.000000,0.924571,0.100657,155.949997,157.360001,155.949997,156.770004,156.770004,7797900.0,-1.190003,No
159,wmt,2023-11-28,0.112000,0.077000,0.811000,0.058617,156.660004,158.919998,156.660004,158.639999,158.639999,7845000.0,1.869995,Yes
160,wmt,2023-11-29,0.093750,0.134000,0.772250,0.011225,158.770004,158.770004,155.610001,156.080002,156.080002,9965500.0,-2.559997,No


In [20]:
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Dropout
from keras.layers import *
from keras.callbacks import EarlyStopping
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error

2023-12-03 23:20:33.257870: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [21]:
import numpy as np

In [85]:
# def nn_data_creation(data):
#     adj_values = data[['Adj Close']].values
#     needed = int(len(adj_values)*0.8)
#     train = adj_values[0:needed, :]
#     scaler = MinMaxScaler(feature_range=(0,1))
#     train_scaled = scaler.fit_transform(train)
#     X_train = []
#     y_train = []
#     for i in range(7, len(train)):
#         X_train.append(train_scaled[i-7:i, 0])
#         y_train.append(train_scaled[i:i+7, 0]) 
#     X_train, y_train = np.array(X_train), np.array(y_train)
#     X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
#     y_train = np.reshape(y_train, (y_train.shape[0],1))
    
#     test = data[['Adj Close']][needed:].values
#     #inputs = data[['Adj Close']][len(adj_values) - len(test) - 7:].values
#     test = np.reshape(test,(-1,1))
#     test  = scaler.transform(test)
    
#     X_test = []
#     y_test = []
#     for i in range(7,len(data)-needed):
#         X_test.append(test[i-7:i,0])
#         y_test.append(test[i:i+7,0])
#     X_test = np.array(X_test)
#     y_test = np.array(y_test)
#     X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))
#     y_test = np.reshape(y_test, (y_test.shape[0],1))
#     return X_train,y_train,X_test,y_test,scaler

In [397]:
def nn_data_creation(data):
    adj_values = data[['Adj Close']].values
    needed = int(len(adj_values) * 0.8)
    
    train = adj_values[0:needed, :]
    scaler = MinMaxScaler(feature_range=(0, 1))
    train_scaled = scaler.fit_transform(train)
    
    X_train = []
    y_train = []
    
    for i in range(7, len(train)-7):
        X_train.append(train_scaled[i-7:i, 0])
        y_train.append(train_scaled[i:i+7, 0]) 
        
    X_train, y_train = np.array(X_train), np.array(y_train)
    X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
    y_train = np.reshape(y_train, (y_train.shape[0], 7, 1))  # Corrected to match the model output
    
    test = data[['Adj Close']][needed:].values
    test = np.reshape(test, (-1, 1))
    test = scaler.transform(test)
    
    X_test = []
    y_test = []
    
    for i in range(7, len(data) - needed-7):
        X_test.append(test[i-7:i, 0])
        y_test.append(test[i:i+7, 0])
    
    X_test = np.array(X_test)
    y_test = np.array(y_test)
    X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))
    y_test = np.reshape(y_test, (y_test.shape[0], 7, 1))  # Corrected to match the model output
    
    return X_train, y_train, X_test, y_test, scaler

In [127]:
from keras.layers import SimpleRNN
def rnn_model(X_train,y_train):
    model = Sequential()
    model.add(SimpleRNN(32, return_sequences=True, input_shape=(X_train.shape[1],1)))  
    model.add(Dropout(0.2))
    model.add(SimpleRNN(32, return_sequences=True)) 
    model.add(Dropout(0.2)) 
    model.add(SimpleRNN(32))
    model.add(Dense(7))

    model.compile(optimizer='adam', loss='mean_squared_error')
    model.fit(X_train, y_train, epochs=200, batch_size=64, verbose=0)
    return model

In [478]:
X_train,y_train,X_test,y_test,scaler = nn_data_creation(amzn)

In [None]:
from sklearn.metrics import mean_squared_error, r2_score
X_train,y_train,X_test,y_test,scaler = nn_data_creation(amzn)
amzn_model_1 = rnn_model(X_train,y_train)
predictions = amzn_model_1.predict(X_test,verbose=0)
predictions = scaler.inverse_transform(predictions)
y_test = np.reshape(y_test,(y_test.shape[0],y_test.shape[1]))
y_test = scaler.inverse_transform(y_test)
print('RMSE: {}'.format(round(mean_squared_error(predictions ,y_test, squared=False), 3)))
print('R^2 score: {}'.format(round(r2_score(predictions, y_test), 3)))

In [131]:
from sklearn.metrics import mean_squared_error, r2_score
X_train,y_train,X_test,y_test,scaler = nn_data_creation(bcs)
bcs_model_1 = rnn_model(X_train,y_train)
predictions = bcs_model_1.predict(X_test,verbose=0)
predictions = scaler.inverse_transform(predictions)
y_test = np.reshape(y_test,(y_test.shape[0],y_test.shape[1]))
y_test = scaler.inverse_transform(y_test)
print('RMSE: {}'.format(round(mean_squared_error(predictions ,y_test, squared=False), 3)))
print('R^2 score: {}'.format(round(r2_score(predictions, y_test), 3)))

RMSE: 0.319
R^2 score: 0.276


In [132]:
from sklearn.metrics import mean_squared_error, r2_score
X_train,y_train,X_test,y_test,scaler = nn_data_creation(cvx)
cvx_model_1 = rnn_model(X_train,y_train)
predictions = cvx_model_1.predict(X_test,verbose=0)
predictions = scaler.inverse_transform(predictions)
y_test = np.reshape(y_test,(y_test.shape[0],y_test.shape[1]))
y_test = scaler.inverse_transform(y_test)
print('RMSE: {}'.format(round(mean_squared_error(predictions ,y_test, squared=False), 3)))
print('R^2 score: {}'.format(round(r2_score(predictions, y_test), 3)))

RMSE: 6.439
R^2 score: -0.433


In [133]:
from sklearn.metrics import mean_squared_error, r2_score
X_train,y_train,X_test,y_test,scaler = nn_data_creation(googl)
googl_model_1 = rnn_model(X_train,y_train)
predictions = googl_model_1.predict(X_test,verbose=0)
predictions = scaler.inverse_transform(predictions)
y_test = np.reshape(y_test,(y_test.shape[0],y_test.shape[1]))
y_test = scaler.inverse_transform(y_test)
print('RMSE: {}'.format(round(mean_squared_error(predictions ,y_test, squared=False), 3)))
print('R^2 score: {}'.format(round(r2_score(predictions, y_test), 3)))

RMSE: 4.916
R^2 score: 0.9


In [134]:
from sklearn.metrics import mean_squared_error, r2_score
X_train,y_train,X_test,y_test,scaler = nn_data_creation(htz)
htz_model_1 = rnn_model(X_train,y_train)
predictions = htz_model_1.predict(X_test,verbose=0)
predictions = scaler.inverse_transform(predictions)
y_test = np.reshape(y_test,(y_test.shape[0],y_test.shape[1]))
y_test = scaler.inverse_transform(y_test)
print('RMSE: {}'.format(round(mean_squared_error(predictions ,y_test, squared=False), 3)))
print('R^2 score: {}'.format(round(r2_score(predictions, y_test), 3)))

RMSE: 1.429
R^2 score: -3.702


In [135]:
from sklearn.metrics import mean_squared_error, r2_score
X_train,y_train,X_test,y_test,scaler = nn_data_creation(tm)
tm_model_1 = rnn_model(X_train,y_train)
predictions = tm_model_1.predict(X_test,verbose=0)
predictions = scaler.inverse_transform(predictions)
y_test = np.reshape(y_test,(y_test.shape[0],y_test.shape[1]))
y_test = scaler.inverse_transform(y_test)
print('RMSE: {}'.format(round(mean_squared_error(predictions ,y_test, squared=False), 3)))
print('R^2 score: {}'.format(round(r2_score(predictions, y_test), 3)))

RMSE: 6.88
R^2 score: -2.604


In [136]:
from sklearn.metrics import mean_squared_error, r2_score
X_train,y_train,X_test,y_test,scaler = nn_data_creation(wmt)
wmt_model_1 = rnn_model(X_train,y_train)
predictions = wmt_model_1.predict(X_test,verbose=0)
predictions = scaler.inverse_transform(predictions)
y_test = np.reshape(y_test,(y_test.shape[0],y_test.shape[1]))
y_test = scaler.inverse_transform(y_test)
print('RMSE: {}'.format(round(mean_squared_error(predictions ,y_test, squared=False), 3)))
print('R^2 score: {}'.format(round(r2_score(predictions, y_test), 3)))

RMSE: 5.045
R^2 score: 0.724


In [24]:
from sklearn.metrics import mean_squared_error, r2_score
X_train,y_train,X_test,y_test,scaler = nn_data_creation(amzn)
amzn_model = rnn_model(X_train,y_train)
predictions = amzn_model.predict(X_test,verbose=0)
predictions = scaler.inverse_transform(predictions)
y_test = scaler.inverse_transform(y_test)
print('RMSE: {}'.format(round(mean_squared_error(predictions ,y_test, squared=False), 3)))
print('R^2 score: {}'.format(round(r2_score(predictions, y_test), 3)))

RMSE: 3.175
R^2 score: 0.967


In [25]:
from sklearn.metrics import mean_squared_error, r2_score
X_train,y_train,X_test,y_test,scaler = nn_data_creation(bcs)
bcs_model = rnn_model(X_train,y_train)
predictions = bcs_model.predict(X_test,verbose=0)
predictions = scaler.inverse_transform(predictions)
y_test = scaler.inverse_transform(y_test)
print('RMSE: {}'.format(round(mean_squared_error(predictions ,y_test, squared=False), 3)))
print('R^2 score: {}'.format(round(r2_score(predictions, y_test), 3)))

RMSE: 0.207
R^2 score: 0.788


In [32]:
from sklearn.metrics import mean_squared_error, r2_score
X_train,y_train,X_test,y_test,scaler = nn_data_creation(cvx)
cvx_model = rnn_model(X_train,y_train)
predictions = cvx_model.predict(X_test,verbose=0)
predictions = scaler.inverse_transform(predictions)
y_test = scaler.inverse_transform(y_test)
print('RMSE: {}'.format(round(mean_squared_error(predictions ,y_test, squared=False), 3)))
print('R^2 score: {}'.format(round(r2_score(predictions, y_test), 3)))

RMSE: 5.563
R^2 score: 0.08


In [28]:
from sklearn.metrics import mean_squared_error, r2_score
X_train,y_train,X_test,y_test,scaler = nn_data_creation(googl)
googl_model = rnn_model(X_train,y_train)
predictions = googl_model.predict(X_test,verbose=0)
predictions = scaler.inverse_transform(predictions)
y_test = scaler.inverse_transform(y_test)
print('RMSE: {}'.format(round(mean_squared_error(predictions ,y_test, squared=False), 3)))
print('R^2 score: {}'.format(round(r2_score(predictions, y_test), 3)))

RMSE: 2.41
R^2 score: 0.977


In [29]:
from sklearn.metrics import mean_squared_error, r2_score
X_train,y_train,X_test,y_test,scaler = nn_data_creation(htz)
htz_model = rnn_model(X_train,y_train)
predictions = htz_model.predict(X_test,verbose=0)
predictions = scaler.inverse_transform(predictions)
y_test = scaler.inverse_transform(y_test)
print('RMSE: {}'.format(round(mean_squared_error(predictions ,y_test, squared=False), 3)))
print('R^2 score: {}'.format(round(r2_score(predictions, y_test), 3)))

RMSE: 0.88
R^2 score: 0.228


In [30]:
from sklearn.metrics import mean_squared_error, r2_score
X_train,y_train,X_test,y_test,scaler = nn_data_creation(tm)
tm_model = rnn_model(X_train,y_train)
predictions = tm_model.predict(X_test,verbose=0)
predictions = scaler.inverse_transform(predictions)
y_test = scaler.inverse_transform(y_test)
print('RMSE: {}'.format(round(mean_squared_error(predictions ,y_test, squared=False), 3)))
print('R^2 score: {}'.format(round(r2_score(predictions, y_test), 3)))

RMSE: 2.925
R^2 score: 0.74


In [31]:
from sklearn.metrics import mean_squared_error, r2_score
X_train,y_train,X_test,y_test,scaler = nn_data_creation(wmt)
wmt_model = rnn_model(X_train,y_train)
predictions = wmt_model.predict(X_test,verbose=0)
predictions = scaler.inverse_transform(predictions)
y_test = scaler.inverse_transform(y_test)
print('RMSE: {}'.format(round(mean_squared_error(predictions ,y_test, squared=False), 3)))
print('R^2 score: {}'.format(round(r2_score(predictions, y_test), 3)))

RMSE: 5.594
R^2 score: 0.701


In [395]:
def model_name(tic):
    if tic=='amzn':
        model = amzn_model_1
        data = amzn
    elif tic =='bcs':
        model = bcs_model_1
        data = bcs
    elif tic == 'cvx':
        model = cvx_model_1
        data = cvx
    elif tic == 'googl':
        model = googl_model_1
        data = googl
    elif tic == 'htz':
        model = htz_model_1
        data = htz
    elif tic == 'tm':
        model = tm_model_1
        data = tm
    else:
        model = wmt_model_1
        data = wmt
    return model,data

In [396]:
from datetime import datetime, timedelta
def next_seven(tic,date):
    model,data = model_name(tic)
    check_date = pd.to_datetime(date)
    past_val = []
    for i in range(7):
        temp = check_date - timedelta(days=i)
        if pd.to_datetime(temp) in data.index:
            val = data.loc[pd.to_datetime(temp), 'Adj Close']
            past_val.append(val)
        else:
            pass
    if len(past_val) == 7:
        past_val_arr = np.reshape(np.array(past_val),(-1,1))
        past_val_reshaped = past_val_arr.reshape(1,-1,1)
        t = model.predict(past_val_reshaped,verbose=0) 
        f = scaler.inverse_transform(t)
        return f
    else:
        return None

In [None]:
# from datetime import datetime, timedelta
# def next_seven(tic,date):
#     model,data = model_name(tic)
#     f1 = []
#     for j in range(7):
#         check_date = pd.to_datetime(date)
#         past_val = f1.copy()
#         for i in range(7-j):
#             temp = check_date - timedelta(days=i+10+j)
#             if pd.to_datetime(temp) in data.index:
#                 val = data.loc[pd.to_datetime(temp), 'Adj Close']
#                 past_val.append(val)
#             else:
#                 # Handle missing data (e.g., use a default value or skip)
#                 pass
#             #val = data.loc[pd.to_datetime(temp),'Adj Close']
#             #past_val.append(val)
#         if len(past_val)==7:
#             past_val = np.reshape(np.array(past_val),(-1,1))
#             past_val = scaler.transform(past_val)
#             past_val_reshaped = past_val.reshape(1, -1, 1)
#             t = model.predict(past_val_reshaped,verbose=0) 
#             f = scaler.inverse_transform(t)
#             f1.append(f[0][0])
#         else:
#             pass
#     return f1

In [153]:
new_f = finance
new_f['result'] = new_f.apply(lambda row: next_seven(row['Ticker'], row['date'],), axis=1)

In [171]:
new_f = new_f.replace(to_replace='None', value=np.nan).dropna()
new_f = new_f.reset_index()
new_f = new_f.drop(['index'],axis=1)
new_f

  new_mask[arr_mask] = arr[arr_mask] == x


Unnamed: 0,Open,High,Low,Close,Adj Close,Volume,Ticker,Difference,Invest,date,result
0,33.156502,33.999500,32.989498,33.866501,33.866501,73038000.0,amzn,0.375000,Yes,2015-12-08,"[[80.909454, 172.63925, 69.60054, 178.16803, 2..."
1,33.900002,33.950001,32.784000,33.239498,33.239498,103164000.0,amzn,-0.627003,No,2015-12-09,"[[80.853905, 172.72963, 69.61179, 178.19397, 2..."
2,33.279499,33.426498,32.978001,33.116001,33.116001,69110000.0,amzn,-0.123497,No,2015-12-10,"[[80.88811, 172.6776, 69.6176, 178.17477, 234...."
3,32.561501,32.894001,31.981001,32.007500,32.007500,109488000.0,amzn,-1.108501,No,2015-12-11,"[[80.88763, 172.67921, 69.61818, 178.17534, 23..."
4,32.561501,32.894001,31.981001,32.007500,32.007500,109488000.0,amzn,0.000000,Yes,2015-12-12,"[[80.88924, 172.6724, 69.61162, 178.17453, 234..."
...,...,...,...,...,...,...,...,...,...,...,...
13054,155.149994,156.130005,154.699997,156.059998,156.059998,4658400.0,wmt,-1.190003,No,2023-11-26,"[[168.90338, 204.81299, 154.91382, 218.90895, ..."
13055,155.949997,157.360001,155.949997,156.770004,156.770004,7797900.0,wmt,-1.190003,No,2023-11-27,"[[168.89877, 204.81972, 154.91498, 218.91777, ..."
13056,156.660004,158.919998,156.660004,158.639999,158.639999,7845000.0,wmt,1.869995,Yes,2023-11-28,"[[168.89474, 204.80936, 154.88419, 218.88518, ..."
13057,158.770004,158.770004,155.610001,156.080002,156.080002,9965500.0,wmt,-2.559997,No,2023-11-29,"[[168.90019, 204.80511, 154.89317, 218.88974, ..."


In [175]:
def create_next_seven(data):
    temp = pd.DataFrame(data,columns=['next_1','next_2','next_3','next_4','next_5','next_6','next_7'])
    return temp

In [166]:
# Drawback, only works from 2015, 2023

In [186]:
new_f

Unnamed: 0,Open,High,Low,Close,Adj Close,Volume,Ticker,Difference,Invest,date,result
0,33.156502,33.999500,32.989498,33.866501,33.866501,73038000.0,amzn,0.375000,Yes,2015-12-08,"[[80.909454, 172.63925, 69.60054, 178.16803, 2..."
1,33.900002,33.950001,32.784000,33.239498,33.239498,103164000.0,amzn,-0.627003,No,2015-12-09,"[[80.853905, 172.72963, 69.61179, 178.19397, 2..."
2,33.279499,33.426498,32.978001,33.116001,33.116001,69110000.0,amzn,-0.123497,No,2015-12-10,"[[80.88811, 172.6776, 69.6176, 178.17477, 234...."
3,32.561501,32.894001,31.981001,32.007500,32.007500,109488000.0,amzn,-1.108501,No,2015-12-11,"[[80.88763, 172.67921, 69.61818, 178.17534, 23..."
4,32.561501,32.894001,31.981001,32.007500,32.007500,109488000.0,amzn,0.000000,Yes,2015-12-12,"[[80.88924, 172.6724, 69.61162, 178.17453, 234..."
...,...,...,...,...,...,...,...,...,...,...,...
13054,155.149994,156.130005,154.699997,156.059998,156.059998,4658400.0,wmt,-1.190003,No,2023-11-26,"[[168.90338, 204.81299, 154.91382, 218.90895, ..."
13055,155.949997,157.360001,155.949997,156.770004,156.770004,7797900.0,wmt,-1.190003,No,2023-11-27,"[[168.89877, 204.81972, 154.91498, 218.91777, ..."
13056,156.660004,158.919998,156.660004,158.639999,158.639999,7845000.0,wmt,1.869995,Yes,2023-11-28,"[[168.89474, 204.80936, 154.88419, 218.88518, ..."
13057,158.770004,158.770004,155.610001,156.080002,156.080002,9965500.0,wmt,-2.559997,No,2023-11-29,"[[168.90019, 204.80511, 154.89317, 218.88974, ..."


In [328]:
new_f.to_csv('next_7.csv')

In [189]:
temp = pd.DataFrame(new_f['result'])
temp[['next_1','next_2','next_3','next_4','next_5','next_6','next_7']] = temp['result'].apply(lambda x: pd.Series(x[0]))

In [301]:
f_new = pd.concat([temp,new_f],axis=1)
f_new = f_new.drop(['result'],axis=1)

In [302]:
f_new = f_new.drop(['Open','Close','High','Low','Close','Volume','Adj Close'],axis=1)

In [303]:
no_senti = f_new.drop(['Difference'],axis=1)

In [304]:
senti = pd.merge(final,no_senti,on=['Ticker','date','Invest'])
senti = senti.dropna()
senti = senti.drop(['Open','High', 'Low', 'Close', 'Adj Close', 'Volume', 'Difference'],axis=1)

In [305]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
no_senti['Ticker']= le.fit_transform(no_senti['Ticker'])
senti['Ticker']= le.transform(senti['Ticker'])
no_senti = no_senti.drop(['date'],axis=1)
senti = senti.drop(['date'],axis=1)

In [306]:
le = LabelEncoder()
no_senti['Invest']= le.fit_transform(no_senti['Invest'])
senti['Invest']= le.transform(senti['Invest'])

In [370]:
no_senti = no_senti.drop(['Ticker'],axis=1)

In [463]:
senti = senti.drop(['Ticker'],axis=1)

In [464]:
X_train_no_senti,X_test_no_senti,y_train_no_senti,y_test_no_senti = train_test_split(no_senti.drop(['Invest'],axis=1).values,no_senti['Invest'].values,test_size=0.3,random_state=42)
X_train_senti,X_test_senti,y_train_senti,y_test_senti = train_test_split(senti.drop(['Invest'],axis=1).values,senti['Invest'].values,test_size=0.1,random_state=42)

In [465]:
scaler = MinMaxScaler(feature_range=(0,1))
X_train_no_senti = scaler.fit_transform(X_train_no_senti)
X_test_no_senti = scaler.transform(X_test_no_senti)
X_train_senti = scaler.fit_transform(X_train_senti)
X_test_senti = scaler.transform(X_test_senti)

In [466]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

model_no_senti = LogisticRegression()

model_no_senti.fit(X_train_no_senti,y_train_no_senti)


In [467]:
y_pred = model_no_senti.predict(X_test_no_senti)
accuracy = accuracy_score(y_test_no_senti, y_pred)
print(f'Accuracy: {accuracy}')

Accuracy: 0.630423685553854


In [468]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

model_senti = LogisticRegression()
model_senti.fit(X_train_senti,y_train_senti)
y_pred = model_senti.predict(X_test_senti)
accuracy = accuracy_score(y_test_senti, y_pred)
print(f'Accuracy: {accuracy}')

Accuracy: 0.47058823529411764


In [479]:
from datetime import datetime, timedelta
def next_seven(tic,date):
    model,data = model_name(tic)
    check_date = pd.to_datetime(date)
    past_val = []
    for i in range(7):
        temp = check_date - timedelta(days=i)
        if pd.to_datetime(temp) in data.index:
            val = data.loc[pd.to_datetime(temp), 'Adj Close']
            past_val.append(val)
        else:
            pass
    if len(past_val) == 7:
        past_val_arr = np.reshape(np.array(past_val),(-1,1))
        past_val_reshaped = past_val_arr.reshape(1,-1,1)
        t = model.predict(past_val_reshaped,verbose=0) 
        f = scaler.inverse_transform(t)
        return f
    else:
        return None

In [None]:
user_input = input('Enter the date and Stock Name [AMZN,BCS,CVX,GOOGL,HTZ,TM,WMT]')
date,tick = user_input.split(',')[0],user_input.split(',')[1]
next_seven_days = next_seven(tick,date)
l = final[(final['Ticker'] == tick) & (final['date'] == date)].index.to_list()
if len(l)>0:
        id = l[0]
        next_seven_days= np.insert(next_seven_days,0,final.iloc[id]['postive'])
        next_seven_days= np.insert(next_seven_days,1,final.iloc[id]['negative'])
        next_seven_days= np.insert(next_seven_days,2,final.iloc[id]['neutral'])
        next_seven_days =np.insert(next_seven_days,3,final.iloc[id]['compound'])
        next_seven_days = next_seven_days.reshape(1,-1)
        predict = model_senti.predict(next_seven_days)
else:
    next_seven_days = next_seven_days.reshape(1,-1)
    predict = model_no_senti.predict(next_seven_days)

if predict == 1:
      print('You can')
else:
      print('You Cannot')

You can
