<a href="https://colab.research.google.com/github/xanderrp2/StockAI/blob/main/ModelTraining.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Concatenate
from sklearn.model_selection import train_test_split

# Load dataset
data = pd.read_csv('/content/drive/My Drive/stockNews.csv')

# Extract features and target
headlines = [eval(x) for x in data['headlines']]  # Convert stringified lists to actual lists
contents = [eval(x) for x in data['contents']]
percent_changes = data['percent_change'].values
ema = data['EMA'].values
macd = np.array([eval(x) for x in data['MACD']])  # Convert stringified tuples to array

# Pad sequences for consistent input size
X_headlines = pad_sequences(headlines, padding='post')
X_contents = pad_sequences(contents, padding='post')
X_ema = np.array(ema).reshape(-1, 1)
X_macd = macd

# Combine MACD into individual components
X_macd_first = X_macd[:, 0].reshape(-1, 1)
X_macd_second = X_macd[:, 1].reshape(-1, 1)

# Target variable
y = np.array(percent_changes)

# Split into training and test sets
(X_headlines_train, X_headlines_test,
 X_contents_train, X_contents_test,
 X_ema_train, X_ema_test,
 X_macd_first_train, X_macd_first_test,
 X_macd_second_train, X_macd_second_test,
 y_train, y_test) = train_test_split(
    X_headlines, X_contents, X_ema, X_macd_first, X_macd_second, y,
    test_size=0.2, random_state=42
)

# Model architecture
# Input for headlines
input_headlines = Input(shape=(X_headlines.shape[1],), name="headlines_input")
headlines_dense = Dense(64, activation="relu")(input_headlines)

# Input for contents
input_contents = Input(shape=(X_contents.shape[1],), name="contents_input")
contents_dense = Dense(64, activation="relu")(input_contents)

# Input for EMA
input_ema = Input(shape=(1,), name="ema_input")
ema_dense = Dense(16, activation="relu")(input_ema)

# Input for MACD components
input_macd_first = Input(shape=(1,), name="macd_first_input")
macd_first_dense = Dense(16, activation="relu")(input_macd_first)

input_macd_second = Input(shape=(1,), name="macd_second_input")
macd_second_dense = Dense(16, activation="relu")(input_macd_second)

# Combine all inputs
merged = Concatenate()([
    headlines_dense, contents_dense, ema_dense, macd_first_dense, macd_second_dense
])
combined_dense = Dense(64, activation="relu")(merged)
out = Dense(1, activation="linear", name="output_layer")(combined_dense)

# Define the model
model = Model(
    inputs=[input_headlines, input_contents, input_ema, input_macd_first, input_macd_second],
    outputs=out
)

# Compile the model
model.compile(optimizer="adam", loss="mse", metrics=["mae"])

# Train the model
history = model.fit(
    [X_headlines_train, X_contents_train, X_ema_train, X_macd_first_train, X_macd_second_train],
    y_train,
    validation_split=0.2,
    epochs=8,
    batch_size=32,
    verbose=1
)

# Evaluate the model on the test set
test_loss, test_mae = model.evaluate(
    [X_headlines_test, X_contents_test, X_ema_test, X_macd_first_test, X_macd_second_test],
    y_test,
    verbose=0
)

# Print results
print(f"Test Loss: {test_loss}, Test MAE: {test_mae}")



Epoch 1/8
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step - loss: 8947.9395 - mae: 68.0841 - val_loss: 4311.5859 - val_mae: 58.6804
Epoch 2/8
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 190ms/step - loss: 6669.2803 - mae: 58.4859 - val_loss: 3078.8088 - val_mae: 49.4973
Epoch 3/8
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 85ms/step - loss: 4753.5073 - mae: 49.0792 - val_loss: 2065.0547 - val_mae: 40.4207
Epoch 4/8
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 74ms/step - loss: 3183.4019 - mae: 39.7950 - val_loss: 1265.6163 - val_mae: 31.4908
Epoch 5/8
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 80ms/step - loss: 1947.5564 - mae: 30.6318 - val_loss: 673.9673 - val_mae: 22.7657
Epoch 6/8
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 79ms/step - loss: 1036.6542 - mae: 21.6599 - val_loss: 280.1128 - val_mae: 14.3622
Epoch 7/8
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s

In [None]:
from tensorflow.keras.preprocessing.sequence import pad_sequences

def predict_todays_percent_change(company_name, headlines, contents, ema, macd):
    # Vectorize the input data (using the same logic as in training)
    vectorized_headlines = [vectorize_text(headline, words) for headline in headlines]
    vectorized_contents = [vectorize_text(content, words) for content in contents]

    # Get the shape of X_headlines and X_contents from the training data
    # Assuming you have access to these variables from your training script
    global X_headlines, X_contents # Access the X_headlines and X_contents from the global scope

    # Pad sequences
    X_headlines_pred = pad_sequences([vectorized_headlines], padding='post', maxlen=X_headlines.shape[1])  # Ensure same length as training
    X_contents_pred = pad_sequences([vectorized_contents], padding='post', maxlen=X_contents.shape[1])
    X_ema = np.array([ema]).reshape(-1, 1)
    X_macd = np.array(macd)  # Assuming macd is a tuple (MACD, Signal)
    X_macd_first = X_macd[0].reshape(-1, 1)
    X_macd_second = X_macd[1].reshape(-1, 1)

    # Make the prediction
    prediction = model.predict([X_headlines_pred, X_contents_pred, X_ema, X_macd_first, X_macd_second])
    return prediction[0][0]  # Return the predicted value

In [None]:
# Example usage of the predict_todays_percent_change function
company_name = "TSLA"  # Replace with the desired company name
today = datetime.now().strftime('%Y-%m-%d')
# Fetch today's news for the company
articles = fetch_news(query="Telsa", day=today)

# Organize and vectorize the news
news_data = organize_news(articles)
vectorizedNews = vectorizeNews(news_data)

# Get the EMA and MACD values (you'll need to adjust this based on how you obtain them in your real-time environment)
ema = getEMA(company_name)
macd = MACD(company_name)

# Prepare the input
headlines = news_data['headlines']
contents = news_data['contents']


# Added check for None values
predicted_change = predict_todays_percent_change(company_name, headlines, contents, ema, macd)
print(f"Predicted percentage change for {company_name}: {predicted_change}")