<a href="https://colab.research.google.com/github/venki05/Mavericks_oneAPI_hack_kpr/blob/main/Stock_Prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import MinMaxScaler

# Load the trained model (imported from the train.py)
from train import model, scaler, predictors

# Read the CSV file
df = pd.read_csv('TCS.csv')

# Remove 'Adj Close' column (if exists)
df = df.drop(columns=['Adj Close'], errors='ignore')

# Add technical indicators: Moving averages (10-day, 50-day)
df["MA_10"] = df["Close"].rolling(window=10).mean()
df["MA_50"] = df["Close"].rolling(window=50).mean()

# Drop the rows with NaN values due to moving averages
df = df.dropna()

# Add a new column 'Tomorrow' that shifts 'Close' by -1 day (this will be our target)
df["Tomorrow"] = df["Close"].shift(-1)
df = df.dropna()  # Drop rows with NaN in 'Tomorrow'

# Define predictors (input features), including moving averages
predictors = ["Close", "Volume", "Open", "High", "Low", "MA_10", "MA_50"]

# Split the data into training (all but the last 100 rows) and testing (last 100 rows) sets
train = df.iloc[:-100]
test = df.iloc[-100:]

# Ensure 'Date' column is a datetime object and set it as index
if 'Date' in test.columns:
    test['Date'] = pd.to_datetime(test['Date'])  # Convert 'Date' column to datetime if necessary
    test.set_index('Date', inplace=True)

# Scaling the features (MinMaxScaler)
test_scaled = scaler.transform(test[predictors])

# Make predictions for the test set only
test["Predicted"] = model.predict(test_scaled)

# Calculate mean squared error for the test set
mse = mean_squared_error(test["Tomorrow"], test["Predicted"])
print(f"Mean Squared Error: {mse}")

# Predict for the next 30 days
def predict_next_days(model, last_known_data, n_days=30):
    predictions = []
    current_data = last_known_data.copy()

    for _ in range(n_days):
        # Scale the current data for prediction
        scaled_current_data = scaler.transform([current_data[predictors].values])

        # Make prediction
        predicted_value = model.predict(scaled_current_data)[0]
        predictions.append(predicted_value)

        # Update the current data for the next iteration
        current_data = current_data.to_frame().T  # Convert to DataFrame
        current_data["Close"] = predicted_value  # Update 'Close' with the predicted value

        # Update moving averages
        current_data["MA_10"] = current_data["Close"].rolling(window=10).mean()
        current_data["MA_50"] = current_data["Close"].rolling(window=50).mean()

        # To maintain the structure for the next prediction
        current_data = current_data.iloc[0]  # Keep the first row only for the next iteration
        current_data["Volume"] = 0  # Assuming future volume data is unknown
        current_data["Open"] = predicted_value  # Using predicted close for open, can adjust if needed
        current_data["High"] = predicted_value  # Using predicted close for high, can adjust if needed
        current_data["Low"] = predicted_value  # Using predicted close for low, can adjust if needed

    return predictions

# Get the last known row for prediction
last_known_data = df.iloc[-1]

# Make predictions for the next 30 days
predicted_next_30_days = predict_next_days(model, last_known_data, n_days=30)

# Create a date range for the next 30 business days
predicted_dates = pd.date_range(start=test.index[-1] + pd.Timedelta(days=1), periods=30, freq='B')

# Create a DataFrame for the predicted values
predicted_df = pd.DataFrame(predicted_next_30_days, index=predicted_dates, columns=["Predicted Close Price"])

# Filter the actual data from 2015 to 2024
actual_data = df[(df['Date'] >= '2015-01-01') & (df['Date'] <= '2023-12-29')]

# Ensure that 'Date' is the index for plotting
actual_data['Date'] = pd.to_datetime(actual_data['Date'])  # Convert to datetime if necessary
actual_data.set_index('Date', inplace=True)

# Plot the data from 2015 to 2024 (in blue)
plt.figure(figsize=(12, 6))
plt.plot(actual_data.index, actual_data["Close"], label="Actual Close Price (2015-2024)", color='blue', linestyle='-', markersize=4)

# Plot the predicted close prices for the next 30 days (in red)
plt.plot(predicted_df.index, predicted_df["Predicted Close Price"], label="Predicted Close Price for Next 30 Days", color='red', linestyle='--', markersize=4)

# Add title and labels for clarity
plt.title("Actual Close Price (2015-2024) vs Predicted Close Price (Next 30 Days)")
plt.xlabel("Date")
plt.ylabel("Close Price")
plt.legend(loc='best')
plt.grid(True)
plt.xticks(rotation=45)

# Show the plot
plt.show()
