# TESLA Stock Article extractions

In [23]:
import requests

# api_key = '23f5208b7e2940aeb8b72c67195565b2'
api_key = 'bd9bb272078b41ebb0ee0a905d4a9532'
date_from = '2023-12-09'
date_to = '2024-01-08'
# Expanded list of sources
sources = 'bloomberg,cnbc,reuters,financial-times,techcrunch,the-wall-street-journal,the-verge,business-insider,the-economist,wired,engadget,bbc-news,fortune,techradar'

# Broadening the search query
query = 'Elon Musk OR Tesla OR "Electric Vehicles" OR "SpaceX" OR "Neuralink" OR "Twitter acquisition" OR "Tesla stock" OR "Autonomous driving" OR "Battery technology" OR "Solar energy" OR "Hyperloop" OR "Mars colonization"'

url = f'https://newsapi.org/v2/everything?q={query}&from={date_from}&to={date_to}&sources={sources}&apiKey={api_key}'
response = requests.get(url)
data = response.json()

# Check for 'articles' in response and extract headlines with publication dates
if 'articles' in data:
    news_data = [(article['title'], article['publishedAt'][:10]) for article in data['articles']]
    for headline, date in news_data:
        print(f"Date: {date}, Headline: {headline}")
else:
    print("Error or no articles found. Response:")
    print(data)


Date: 2023-12-23, Headline: The Race to Put Brain Implants in People Is Heating Up
Date: 2023-12-18, Headline: Jeff Bezos wants Elon Musk to know Blue Origin is serious now
Date: 2023-12-28, Headline: The Most Dangerous People on the Internet in 2023
Date: 2023-12-28, Headline: Musk's latest threat from China is a new luxury EV built by one of its biggest smartphone firms
Date: 2023-12-22, Headline: The startup that attempted to bring Elon Musk's futuristic hyperloop dream to life is shutting down
Date: 2023-12-27, Headline: Elon Musk trolled his way through 2023. He's still ending the year $100 billion up.
Date: 2023-12-14, Headline: My Surprisingly Unbiased Week With Elon Musk's 'Politically Biased' Chatbot
Date: 2024-01-04, Headline: SpaceX is accused of illegally firing employees who wrote a letter criticizing Elon Musk
Date: 2023-12-17, Headline: Ro Khanna says Elon Musk is 'unparalleled in genius' as an entrepreneur, but 'then you see his tweet that's like a seventh grader'
Date:

# Sentiment Analysis on News Articles

In [24]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch

# Initialize tokenizer and model
tokenizer = AutoTokenizer.from_pretrained('yiyanghkust/finbert-tone')
model = AutoModelForSequenceClassification.from_pretrained('yiyanghkust/finbert-tone')

# Extract just the headlines from news_data
headlines = [headline for headline, _ in news_data]

# Tokenize and encode headlines
max_length = 512  # You can adjust this based on your requirements
inputs = tokenizer(headlines, padding=True, truncation=True, max_length=max_length, return_tensors="pt")

# Predict sentiment
with torch.no_grad():
    outputs = model(**inputs)
    predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)

# Pair each sentiment score with its corresponding date
sentiment_data = []
for (headline, date), prediction in zip(news_data, predictions):
    sentiment_data.append({
        'date': date,
        'headline': headline,
        'sentiment_positive': prediction[0].item(),
        'sentiment_neutral': prediction[1].item(),
        'sentiment_negative': prediction[2].item()
    })


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


In [25]:
sentiment_data

[{'date': '2023-12-23',
  'headline': 'The Race to Put Brain Implants in People Is Heating Up',
  'sentiment_positive': 0.9928834438323975,
  'sentiment_neutral': 6.643677625106648e-05,
  'sentiment_negative': 0.007050192449241877},
 {'date': '2023-12-18',
  'headline': 'Jeff\xa0Bezos\xa0wants\xa0Elon Musk\xa0to know Blue Origin is serious now',
  'sentiment_positive': 0.9647932052612305,
  'sentiment_neutral': 0.00013012185809202492,
  'sentiment_negative': 0.03507668524980545},
 {'date': '2023-12-28',
  'headline': 'The Most Dangerous People on the Internet in 2023',
  'sentiment_positive': 0.41556516289711,
  'sentiment_neutral': 0.00012190965207992122,
  'sentiment_negative': 0.584312915802002},
 {'date': '2023-12-28',
  'headline': "Musk's latest threat from China is a new luxury EV built by one of its biggest smartphone firms",
  'sentiment_positive': 0.0002295366721227765,
  'sentiment_neutral': 1.0273206640931676e-07,
  'sentiment_negative': 0.9997703433036804},
 {'date': '2023

# Historical Stock Data and Sentiment Analysis Combined

In [26]:
import pandas as pd
import yfinance as yf
from sklearn.preprocessing import StandardScaler

# Convert sentiment_data to a DataFrame
sentiment_df = pd.DataFrame(sentiment_data)

# Convert date to datetime and set as index
sentiment_df['date'] = pd.to_datetime(sentiment_df['date'])
sentiment_df.set_index('date', inplace=True)

# Aggregate sentiment scores by date
average_sentiment = sentiment_df.groupby('date').mean()

# Download stock data
ticker_symbol = 'TSLA'
stock_data = yf.download(ticker_symbol, start='2023-12-09', end='2024-01-08')

# Selecting the required columns
stock_data = stock_data[['Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume']]

# Convert stock_data index to datetime
stock_data.index = pd.to_datetime(stock_data.index)

# Combine stock data with sentiment data
combined_data = stock_data.join(average_sentiment)

# Interpolate missing values
combined_data = combined_data.interpolate(method='time')

# Scale price-related columns
price_cols = ['Open', 'High', 'Low', 'Close', 'Adj Close']
scaler_prices = StandardScaler()
combined_data[price_cols] = scaler_prices.fit_transform(combined_data[price_cols])

# Scale Volume column independently
scaler_volume = StandardScaler()
combined_data['Volume'] = scaler_volume.fit_transform(combined_data[['Volume']])

print(combined_data)

[*********************100%%**********************]  1 of 1 completed
                Open      High       Low     Close  Adj Close    Volume  \
Date                                                                      
2023-12-11 -0.764383 -1.092344 -0.895237 -1.094194  -1.094194 -0.735566   
2023-12-12 -1.271462 -1.638720 -1.339923 -1.450870  -1.450870 -0.870829   
2023-12-13 -1.799114 -1.477877 -2.044215 -1.152988  -1.152988  1.794988   
2023-12-14 -0.948336  0.189493 -0.480363  0.383459   0.383459  2.555786   
2023-12-15  0.260667  0.220188  0.452484  0.703551   0.703551  1.242265   
2023-12-18  0.571690  0.786208  0.832578  0.518028   0.518028  0.232379   
2023-12-19  0.535383  0.737096  1.037530  1.189570   1.189570 -0.273973   
2023-12-20  0.889976  0.921268  0.291006 -0.127384  -0.127384  0.686491   
2023-12-21  0.344170  0.302452  0.483537  0.834202   0.834202 -0.124523   
2023-12-22  0.932335  0.722363  0.833819  0.578126   0.578126 -0.979564   
2023-12-26  0.657616  0.691668

  average_sentiment = sentiment_df.groupby('date').mean()


In [27]:
import pandas as pd
df = pd.DataFrame(combined_data)
df

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,sentiment_positive,sentiment_neutral,sentiment_negative
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2023-12-11,-0.764383,-1.092344,-0.895237,-1.094194,-1.094194,-0.735566,0.886077,7.668485e-06,0.113915
2023-12-12,-1.271462,-1.63872,-1.339923,-1.45087,-1.45087,-0.870829,0.771475,0.2236879,0.004837
2023-12-13,-1.799114,-1.477877,-2.044215,-1.152988,-1.152988,1.794988,0.436152,0.2644239,0.299424
2023-12-14,-0.948336,0.189493,-0.480363,0.383459,0.383459,2.555786,0.997672,2.167144e-05,0.002306
2023-12-15,0.260667,0.220188,0.452484,0.703551,0.703551,1.242265,0.930649,0.05226901,0.017082
2023-12-18,0.57169,0.786208,0.832578,0.518028,0.518028,0.232379,0.741102,0.0001232745,0.258774
2023-12-19,0.535383,0.737096,1.03753,1.18957,1.18957,-0.273973,0.992385,0.0004074428,0.007207
2023-12-20,0.889976,0.921268,0.291006,-0.127384,-0.127384,0.686491,0.594985,0.2030768,0.201938
2023-12-21,0.34417,0.302452,0.483537,0.834202,0.834202,-0.124523,0.99952,9.498297e-07,0.000479
2023-12-22,0.932335,0.722363,0.833819,0.578126,0.578126,-0.979564,0.756659,0.1999968,0.043344


# Transformer Architecture

In [28]:
# import pandas as pd
# import numpy as np
# import tensorflow as tf
# from tensorflow.keras.models import Model
# from tensorflow.keras.layers import Dense, Dropout, BatchNormalization, LayerNormalization, MultiHeadAttention
# from tensorflow.keras.optimizers import Adam
# from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
# from tensorflow.keras import Sequential

# # Assuming 'combined_data' is your pre-processed DataFrame

# # Feature selection
# selected_features = ['Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume', 'sentiment_positive', 'sentiment_neutral', 'sentiment_negative']
# data = combined_data[selected_features]

# # Time series split
# train_size = int(len(data) * 0.7)
# val_size = int(len(data) * 0.15)
# train_data = data.iloc[:train_size]
# val_data = data.iloc[train_size:train_size + val_size]
# test_data = data.iloc[train_size + val_size:]

# # Separating features and target
# # Assuming 'Close' is the target variable
# X_train = train_data.drop('Close', axis=1)
# y_train = train_data['Close']
# X_val = val_data.drop('Close', axis=1)
# y_val = val_data['Close']
# X_test = test_data.drop('Close', axis=1)
# y_test = test_data['Close']

# # Reshaping the data - assuming your data does not have a sequence dimension
# # Adding a dummy sequence dimension
# X_train = np.expand_dims(X_train, axis=1)
# X_val = np.expand_dims(X_val, axis=1)
# X_test = np.expand_dims(X_test, axis=1)

# # Ensure that the data is of type float32 (if it's not already)
# X_train = X_train.astype('float32')
# X_val = X_val.astype('float32')
# X_test = X_test.astype('float32')

# # Define your custom transformer layer
# class CustomTransformerLayer(tf.keras.layers.Layer):
#     def __init__(self, num_layers=6, d_model=64, num_heads=8, dff=256, rate=0.1):
#         super(CustomTransformerLayer, self).__init__()
#         self.num_layers = num_layers
#         self.d_model = d_model

#         self.mhas = [MultiHeadAttention(key_dim=d_model, num_heads=num_heads) for _ in range(num_layers)]
#         self.ffns = [tf.keras.Sequential([
#                       tf.keras.layers.Dense(dff, activation='relu'),
#                       tf.keras.layers.Dense(d_model)]) for _ in range(num_layers)]
#         self.layernorms1 = [LayerNormalization(epsilon=1e-6) for _ in range(num_layers)]
#         self.layernorms2 = [LayerNormalization(epsilon=1e-6) for _ in range(num_layers)]
#         self.dropout = tf.keras.layers.Dropout(rate)

#     def call(self, x, training):
#         for i in range(self.num_layers):
#             attn_output = self.mhas[i](x, x)
#             attn_output = self.dropout(attn_output, training=training)
#             out1 = self.layernorms1[i](x + attn_output)

#             ffn_output = self.ffns[i](out1)
#             ffn_output = self.dropout(ffn_output, training=training)
#             x = self.layernorms2[i](out1 + ffn_output)

#         return x

# # Create the StockTransformer model
# class StockTransformer(tf.keras.Model):
#     def __init__(self, num_layers=6, d_model=64, num_heads=8, dff=256, rate=0.1):
#         super(StockTransformer, self).__init__()
#         self.transformer = CustomTransformerLayer(num_layers=num_layers, d_model=d_model, num_heads=num_heads, dff=dff, rate=rate)
#         self.flatten = tf.keras.layers.Flatten()
#         self.final_layer = tf.keras.layers.Dense(1, activation='linear')

#     def call(self, x, training=False):
#         x = self.transformer(x, training)
#         x = self.flatten(x)
#         return self.final_layer(x)

# # Instantiate the model
# model = StockTransformer()

# # Compile the model
# model.compile(optimizer=Adam(learning_rate=0.001), loss='mean_squared_error')

# # Training
# history = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_val, y_val))

# # Evaluation
# predictions = model.predict(X_test)
# mse = mean_squared_error(y_test, predictions)
# mae = mean_absolute_error(y_test, predictions)
# r2 = r2_score(y_test, predictions)

# print(f'MSE: {mse}, MAE: {mae}, R^2: {r2}')


In [29]:
# import matplotlib.pyplot as plt

# # Extracting loss and validation loss from the history object
# loss = history.history['loss']
# val_loss = history.history['val_loss']
# epochs = range(1, len(loss) + 1)

# # Plotting
# plt.figure(figsize=(12, 6))
# plt.plot(epochs, loss, 'bo', label='Training loss')
# plt.plot(epochs, val_loss, 'b', label='Validation loss')
# plt.title('Training and Validation Loss')
# plt.xlabel('Epochs'); plt.grid();
# plt.ylabel('Loss')
# plt.legend()
# plt.show()


In [30]:
# import pandas as pd
# import numpy as np
# import tensorflow as tf
# from tensorflow.keras.models import Sequential
# from tensorflow.keras.layers import Dense, Dropout
# from tensorflow.keras.optimizers import Adam
# from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
# import matplotlib.pyplot as plt

# # Separating features and target
# X_train = train_data.drop('Close', axis=1)
# y_train = train_data['Close']
# X_val = val_data.drop('Close', axis=1)
# y_val = val_data['Close']
# X_test = test_data.drop('Close', axis=1)
# y_test = test_data['Close']

# # Simple Neural Network Model
# model = Sequential()
# model.add(Dense(32, activation='relu', input_shape=(X_train.shape[1],)))
# model.add(Dropout(0.2))
# model.add(Dense(16, activation='relu'))
# model.add(Dense(1))

# # Compile the model
# model.compile(optimizer=Adam(learning_rate=0.001), loss='mean_squared_error')

# # Training
# history = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_val, y_val))

# # Plotting Training and Validation Loss
# plt.figure(figsize=(10, 6))
# plt.plot(history.history['loss'], label='Training Loss')
# plt.plot(history.history['val_loss'], label='Validation Loss')
# plt.title('Training and Validation Loss')
# plt.xlabel('Epochs'); plt.grid();
# plt.ylabel('Loss')
# plt.legend()
# plt.show()

In [31]:
# import pandas as pd
# import numpy as np
# import tensorflow as tf
# from tensorflow.keras.models import Sequential
# from tensorflow.keras.layers import Dense, Dropout
# from tensorflow.keras.optimizers import Adam
# from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
# from sklearn.preprocessing import StandardScaler
# import plotly.graph_objects as go

# # Extract 'Close' prices from the training data
# close_prices_train = train_data[['Close']]

# # Scale 'Close' prices using only training data
# scaler_close = StandardScaler()
# scaler_close.fit(close_prices_train)

# # Now scale the 'Close' column in train, validation, and test sets
# y_train_scaled = scaler_close.transform(train_data[['Close']])
# y_val_scaled = scaler_close.transform(val_data[['Close']])
# y_test_scaled = scaler_close.transform(test_data[['Close']])

# # Prepare the features (excluding 'Close' column)
# X_train = train_data.drop('Close', axis=1)
# X_val = val_data.drop('Close', axis=1)
# X_test = test_data.drop('Close', axis=1)

# # Simple Neural Network Model
# model = Sequential()
# model.add(Dense(32, activation='relu', input_shape=(X_train.shape[1],)))
# model.add(Dropout(0.2))
# model.add(Dense(16, activation='relu'))
# model.add(Dense(1))

# # Compile the model
# model.compile(optimizer=Adam(learning_rate=0.001), loss='mean_squared_error')

# # Training
# history = model.fit(X_train, y_train_scaled, epochs=50, batch_size=32, validation_data=(X_val, y_val_scaled))

# # Evaluation
# predictions_scaled = model.predict(X_test)
# mse = mean_squared_error(y_test_scaled, predictions_scaled)
# mae = mean_absolute_error(y_test_scaled, predictions_scaled)
# r2 = r2_score(y_test_scaled, predictions_scaled)

# print(f'MSE: {mse}, MAE: {mae}, R^2: {r2}')

# # For inverse scaling, extract 'Close' column index from original dataset
# close_col_index = combined_data.columns.get_loc('Close')

# # Reshape y_test and predictions for inverse transform
# y_test_reshaped = np.expand_dims(y_test.values, axis=1)
# predictions_reshaped = np.expand_dims(predictions.flatten(), axis=1)

# # Create dummy array for inverse scaling
# dummy_array = np.zeros((len(y_test_reshaped), len(combined_data.columns)))
# dummy_array[:, close_col_index] = y_test_reshaped.flatten()

# # Inverse transform using the dummy array
# y_test_original = scaler_prices.inverse_transform(dummy_array)[:, close_col_index]

# # Repeat for predictions
# dummy_array[:, close_col_index] = predictions_reshaped.flatten()
# predictions_original = scaler_prices.inverse_transform(dummy_array)[:, close_col_index]

# # Plotting with Plotly
# fig = go.Figure()

# # Actual values
# fig.add_trace(go.Scatter(x=np.arange(len(y_test_original)), y=y_test_original, mode='lines', name='Actual'))

# # Predictions
# fig.add_trace(go.Scatter(x=np.arange(len(predictions_original)), y=predictions_original, mode='lines', name='Predicted'))

# fig.update_layout(title='Actual vs Predicted Stock Prices',
#                   xaxis_title='Data Points',
#                   yaxis_title='Stock Price',
#                   legend_title='Legend')

# fig.show()


In [86]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.preprocessing import StandardScaler
import plotly.graph_objects as go
from tensorflow.keras.callbacks import EarlyStopping

selected_features = ['Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume', 'sentiment_positive', 'sentiment_neutral', 'sentiment_negative']
data = combined_data[selected_features]

train_size = int(len(data) * 0.7)
val_size = int(len(data) * 0.15)
train_data = data.iloc[:train_size]
val_data = data.iloc[train_size:train_size + val_size]
test_data = data.iloc[train_size + val_size:]

# Prepare features and target for model training
X_train = train_data.drop('Close', axis=1)
y_train = train_data['Close']
X_val = val_data.drop('Close', axis=1)
y_val = val_data['Close']
X_test = test_data.drop('Close', axis=1)
y_test = test_data['Close']

# Define and compile the neural network model
model = Sequential()
model.add(Dense(32, activation='relu', input_shape=(X_train.shape[1],)))
model.add(Dropout(0.2))
model.add(Dense(16, activation='relu'))
model.add(Dense(1))
model.compile(optimizer=Adam(learning_rate=0.01), loss='mean_squared_error')

# Train the model
early_stopping = EarlyStopping(monitor='val_loss', patience=5, verbose=1, restore_best_weights=True)

# Train the model with early stopping
history = model.fit(
    X_train, y_train,
    epochs=50,  # Set back to 50 or an arbitrarily large number
    batch_size=8,
    validation_data=(X_val, y_val),
    callbacks=[early_stopping]  # Add the early stopping callback here
)

train_loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(1, len(train_loss) + 1)

# Convert the epochs range object to a list
epochs_list = list(epochs)

# Plotting the training and validation loss
loss_fig = go.Figure()
loss_fig.add_trace(go.Scatter(x=epochs_list, y=train_loss, mode='lines', name='Training Loss'))
loss_fig.add_trace(go.Scatter(x=epochs_list, y=val_loss, mode='lines', name='Validation Loss'))
loss_fig.update_layout(title='Training and Validation Loss per Epoch',
                       xaxis_title='Epochs',
                       yaxis_title='Loss',
                       legend_title='Type')
loss_fig.show()

# Make predictions
predictions = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, predictions)
mae = mean_absolute_error(y_test, predictions)
r2 = r2_score(y_test, predictions)
print(f'MSE: {mse}, MAE: {mae}, R^2: {r2}')

# Reshape y_test and predictions for inverse transform
y_test_reshaped = y_test.values.reshape(-1, 1)
predictions_reshaped = predictions.reshape(-1, 1)

num_scaled_cols = 5

# Create separate dummy arrays for inverse scaling
dummy_array_y_test = np.zeros((len(y_test_reshaped), num_scaled_cols))
dummy_array_predictions = np.zeros((len(predictions_reshaped), num_scaled_cols))

# Fill in the 'Close' column values in the dummy arrays
# Assuming 'Close' is the last of the scaled columns
dummy_array_y_test[:, -1] = y_test_reshaped.flatten()
dummy_array_predictions[:, -1] = predictions_reshaped.flatten()

# Inverse transform the 'Close' prices using the dummy arrays
y_test_original = scaler_prices.inverse_transform(dummy_array_y_test)[:, -1]
predictions_original = scaler_prices.inverse_transform(dummy_array_predictions)[:, -1]

# Extracting testing dates
test_dates = test_data.index

# Plotting with Plotly
fig = go.Figure()
fig.add_trace(go.Scatter(x=test_dates, y=y_test_original, mode='lines', name='Actual'))
fig.add_trace(go.Scatter(x=test_dates, y=predictions_original, mode='lines', name='Predicted'))
fig.update_layout(title='Actual vs Predicted Stock Prices', xaxis_title='Date', yaxis_title='Stock Price', legend_title='Legend')
fig.show()

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 12: early stopping


MSE: 0.039367231334915334, MAE: 0.17935213626856833, R^2: 0.8882782790222558


In [42]:
model.summary()

Model: "sequential_9"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_27 (Dense)            (None, 32)                288       
                                                                 
 dropout_9 (Dropout)         (None, 32)                0         
                                                                 
 dense_28 (Dense)            (None, 16)                528       
                                                                 
 dense_29 (Dense)            (None, 1)                 17        
                                                                 
Total params: 833 (3.25 KB)
Trainable params: 833 (3.25 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
