In [None]:
#loading the dataset
import pandas as pd
ds = pd.read_excel("yahoo_data.xlsx")
ds.head()


In [None]:
#setting date as index
ds.isnull().sum()
ds['Date']= pd.to_datetime(ds['Date'])
ds = ds.sort_values(by='Date')
ds.set_index('Date', inplace = True)


In [None]:
#calculating the SMA
ds['SMA_200'] = ds['Close*'].rolling(window=200).mean()

In [None]:
#calculating EMA
ds['EMA_200'] = ds['Close*'].ewm(span=200,adjust = False).mean()
ds.head()

In [None]:
#calculating RSI
delta = ds['Close*'].diff()
gain = delta.where(delta>0 , 0)
loss = -delta.where(delta<0, 0)
avg_gain = gain.rolling (window = 14).mean()
avg_loss = loss.rolling(window = 14).mean()
rs = avg_gain/avg_loss
rsi = 100 - (100 / (1+rs))
ds['RSI'] = rsi          


In [None]:
ds.head(400)

In [None]:
#training isolation forest for anomaly detection
from sklearn.ensemble import IsolationForest
data = ds[['Close*', 'Volume']]
model = IsolationForest (contamination = 0.01 , random_state = 42)
model.fit(data)
ds['Anamoly'] = model.predict(data)
anamolies = ds[ds['Anamoly'] ==-1]


In [None]:
print(anamolies)

In [None]:
anamolies.to_csv("anomalies_detected.csv")


In [None]:
pip install tensorflow

In [None]:
#importing the important libraries
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import  LSTM, Dense

In [None]:
data = ds[['Close*']]

In [None]:
#scaling the data
scaler = MinMaxScaler(feature_range = (0,1))
scaled_data = scaler.fit_transform(data)

In [None]:
#sequence creation for lstm
def create_sequences( data , time_steps):
    X = []
    Y = []
    for i in range(time_steps  , len(data)):
        X.append(data[i-time_steps:i , 0])
        Y.append(data[i , 0])
    return np.array(X) , np.array(Y)
time_steps = 60
X, y = create_sequences(scaled_data, time_steps)
print(X.shape)
print(y.shape)

    
    


In [None]:
#SPLITTING THE TRAINING AND TESTING DATA
split = int(0.8 * len(X))
X_train, X_test = X[:split], X[split:]
y_train, y_test = y[:split], y[split:]
print(X_train.shape, X_test.shape)
print(y_train.shape, y_test.shape)


In [None]:
#creating the lstm model
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Input  
model = Sequential()
model.add(Input(shape=(X_train.shape[1], 1)))  
model.add(LSTM(units=50, return_sequences=False))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mean_squared_error')


In [None]:
 #training the lstm model
history = model.fit(
    X_train,  
    y_train,  
    epochs=10,  
    batch_size=32,  
    validation_data=(X_test, y_test),  
    verbose=1  
)


In [None]:
# Model Evaluation (Test Loss)
test_loss = model.evaluate(X_test, y_test)
print(f'Test Loss: {test_loss}')


In [None]:
# Predictions for the test data
predictions = model.predict(X_test)


In [None]:
# Inverse scaling of predictions to get the original values
predictions_rescaled = scaler.inverse_transform(predictions)


In [None]:
import matplotlib.pyplot as plt

# Plot actual vs predicted stock prices
plt.plot(y_test, color='blue', label='Actual Price')
plt.plot(predictions, color='red', label='Predicted Price')
plt.title('Stock Price Prediction')
plt.xlabel('Time')
plt.ylabel('Stock Price')
plt.legend()
plt.show()


In [None]:
#evaluating model performance using mse and rmse
from sklearn.metrics import mean_squared_error
import numpy as np

mse = mean_squared_error(y_test, predictions)
rmse = np.sqrt(mse)
print(f'Mean Squared Error: {mse}')
print(f'Root Mean Squared Error: {rmse}')


In [None]:
 import matplotlib.pyplot as plt

plt.figure(figsize=(14, 6))

# Plot main stock prices
plt.plot(ds.index, ds['Close*'], label='Stock Price', color='blue')

# Plot anomalies
plt.scatter(ds[ds['Anamoly'] == -1].index, ds[ds['Anamoly'] == -1]['Close*'],
            color='red', marker='x', label='Anomalies')

# Graph 
plt.title('Stock Price with Detected Anomalies (Isolation Forest)')
plt.xlabel('Date')
plt.ylabel('Stock Price')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()


In [None]:
!pip install streamlit


In [None]:
%%writefile app.py
import streamlit as st
import pandas as pd
from sklearn.ensemble import IsolationForest
import plotly.graph_objects as go

st.title("📈 Excel-Based Stock Anomaly Detector")
st.write("Upload an Excel file with 'Close*' column and Date as index")

# File uploader
uploaded_file = st.file_uploader("Choose an Excel file", type=["xlsx"])

if uploaded_file is not None:
    try:
        # Read Excel with index column (Date)
        df = pd.read_excel(uploaded_file, index_col=0)
        df.index = pd.to_datetime(df.index)  # ensure index is datetime
        st.success("✅ File uploaded successfully!")

        # Preview
        st.subheader("📊 Preview of Uploaded Data")
        st.write(df.head())

        # Check for 'Close*' column
        if 'Close*' in df.columns:
            # Isolation Forest
            model = IsolationForest(contamination=0.05)
            df['Anomaly'] = model.fit_predict(df[['Close*']])
            anomalies = df[df['Anomaly'] == -1]

            # Plot
            st.subheader("📉 Stock Close* with Anomalies")
            fig = go.Figure()
            fig.add_trace(go.Scatter(x=df.index, y=df['Close*'], name='Close* Price'))
            fig.add_trace(go.Scatter(x=anomalies.index, y=anomalies['Close*'], mode='markers',
                                     marker=dict(color='red', size=10), name='Anomalies'))
            st.plotly_chart(fig)

            # Show anomaly rows
            st.subheader("📌 Detected Anomalies")
            st.write(anomalies[['Close*']])
        else:
            st.error("❌ 'Close*' column missing in Excel file.")
    except Exception as e:
        st.error(f"Error reading Excel file: {e}")


In [None]:
!streamlit run app.py
