<a href="https://colab.research.google.com/github/yashkochar14/SA/blob/main/Stock_Analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [86]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Set visualization style
plt.style.use('fivethirtyeight')



In [None]:
from google.colab import files

# Upload the CSV file
uploaded = files.upload()

# Load the dataset
hcl_data = pd.read_csv('HCLTECH.csv')
hcl_data.head()  # Display first 5 rows


In [None]:
# Check for missing values
print("Missing Values:\n", hcl_data.isnull().sum())

hcl_data.info()

hcl_data.describe()


In [None]:
# Check for missing values
missing_values = hcl_data.isnull().sum()
print("Missing Values:\n", missing_values)

# Fill numerical columns with the mean
numerical_cols = hcl_data.select_dtypes(include=[np.number]).columns
hcl_data[numerical_cols] = hcl_data[numerical_cols].fillna(hcl_data[numerical_cols].mean())

print("Missing Values After Filling:\n", hcl_data.isnull().sum())


In [None]:

hcl_data['Date'] = pd.to_datetime(hcl_data['Date'], errors='coerce')        # Convert date to datetime format


In [None]:
hcl_data.head(5)

In [None]:
# Check for duplicates
duplicate_count = hcl_data.duplicated().sum()
print(f"Number of duplicate rows: {duplicate_count}")

# Remove duplicates if any
hcl_data.drop_duplicates(inplace=True)


In [None]:

hcl_data.info()
hcl_data.head()


In [None]:
# Price Range (High - Low)
hcl_data['Price Range'] = hcl_data['High'] - hcl_data['Low']

# Daily Return (%)
hcl_data['Daily Return'] = hcl_data['Close'].pct_change() * 100

# Moving Averages (7-day, 21-day, 50-day)
hcl_data['MA_7'] = hcl_data['Close'].rolling(window=7).mean()
hcl_data['MA_21'] = hcl_data['Close'].rolling(window=21).mean()
hcl_data['MA_50'] = hcl_data['Close'].rolling(window=50).mean()

# Exponential Moving Average (14-day)
hcl_data['EMA_14'] = hcl_data['Close'].ewm(span=14, adjust=False).mean()

# Display the first few rows to check
hcl_data.head(10)



In [None]:

hcl_data.fillna(method='bfill', inplace=True)  # Backward fill for missing values
hcl_data.head(5)

In [None]:
# Sort by Date (Oldest to Newest)
hcl_data = hcl_data.sort_values(by='Date').reset_index(drop=True)

# Selecting essential features for LSTM and analysis
lstm_data = hcl_data[['Date', 'Close', 'Price Range', 'Daily Return', 'MA_7', 'MA_21', 'MA_50', 'EMA_14']]

# View the prepared data
lstm_data.head(10)


In [None]:
plt.figure(figsize=(12, 6))
plt.plot(lstm_data['Date'], lstm_data['Close'], color='dodgerblue', label='Closing Price')
plt.xlabel('Date')
plt.ylabel('Closing Price (₹)')
plt.title('Closing Price Trend Over Time')
plt.xticks(rotation=45)
plt.grid(True)
plt.legend()
plt.show()


In [None]:
plt.figure(figsize=(8, 4))
plt.plot(lstm_data['Date'], lstm_data['Close'], color='dodgerblue', label='Closing Price')
plt.plot(lstm_data['Date'], lstm_data['MA_7'], color='red', label='7-Day MA')
plt.plot(lstm_data['Date'], lstm_data['MA_21'], color='green', label='21-Day MA')
plt.plot(lstm_data['Date'], lstm_data['MA_50'], color='purple', label='50-Day MA')
plt.xlabel('Date')
plt.ylabel('Price (₹)')
plt.title('Closing Price with Moving Averages')
plt.xticks(rotation=45)
plt.grid(True)
plt.legend()
plt.show()


In [None]:
plt.figure(figsize=(8, 5))
sns.histplot(lstm_data['Daily Return'], bins=50, color='orange', kde=True)
plt.xlabel('Daily Return (%)')
plt.ylabel('Frequency')
plt.title('Distribution of Daily Returns')
plt.grid(True)
plt.show()

# To measure volatility: Wide distribution or extreme outliers indicate higher volatility.
# To check normal distribution: If the distribution resembles a bell curve (normal distribution), it suggests stable returns.
# To identify risks: Negative skew (left tail) indicates more risk of price drops.


In [None]:
plt.figure(figsize=(7, 4))
correlation_matrix = lstm_data.drop(columns='Date').corr()
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', linewidths=0.5)
plt.title('Correlation Heatmap of Features')
plt.show()

# Feature Correlations help us decide which inputs to keep or drop for effective LSTM training.
#  If two or more features are strongly correlated, it can cause issues in modeling.


In [None]:
from sklearn.preprocessing import MinMaxScaler # minmaxsscaler apn import krya hai sklearn.preprocessing librarry se. we do that mebause minmix se sabh value 0-1 ka scale mein hojave which is easy to tarin

# Selecting features to scale
features_to_scale = lstm_data.drop(columns='Date').columns

# Scaling features
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(lstm_data[features_to_scale])
# fit: Learns the minimum and maximum values of each feature to calculate scaling.
# transform: Scales each value using the formula:
# Scaled Value= Max−Min/Value−Min
# lstm_data[features_to_scale]:Selects all numeric features for scaling.
# Result:The scaled data is a NumPy array with scaled values between 0 and 1.

# Creating a DataFrame with scaled values
scaled_df = pd.DataFrame(scaled_data, columns=features_to_scale)
scaled_df['Date'] = lstm_data['Date']  # Adding Date back for reference
scaled_df.head(10)


In [None]:
# Convert scaled data to a NumPy array
scaled_array = scaled_df.drop(columns='Date').values

sequence_length = 60  #use data from the past 60 days to predict the next day's Close price.

# Creating sequences
X = []       #Will store sequences of 60 days of feature values.
y = []       #Will store the target values — the next day's Close price.


for i in range(sequence_length, len(scaled_array)):
    X.append(scaled_array[i-sequence_length:i])  # Sequence of 60 days
    y.append(scaled_array[i, 0])  # Predicting next day 'Close' price

# Converting to numpy arrays
X, y = np.array(X), np.array(y)

# Checking shapes for LSTM input to ensure compatibility
print(f"Shape of X: {X.shape}")  #5240: Total samples (data sequences) created for training, 60: The sequence length — we use 60 days of data to make each prediction ,7: The number of features used for each day (like Close, Price Range, etc.).
print(f"Shape of y: {y.shape}")


In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

# Initializing the LSTM model
model = Sequential()

# Adding LSTM layers
model.add(LSTM(units=64, return_sequences=True, input_shape=(X.shape[1], X.shape[2])))
model.add(Dropout(0.2))  # 20% dropout to prevent overfitting

model.add(LSTM(units=64, return_sequences=False))
model.add(Dropout(0.2))

# Dense layer for prediction
model.add(Dense(units=1))  # Final output layer

# Compiling the model
model.compile(optimizer='adam', loss='mean_squared_error')

# Model summary
model.summary()


In [None]:
# Define the training and testing data split
train_size = int(len(X) * 0.8)  # 80% for training
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]

print(f"Training data shape: X_train: {X_train.shape}, y_train: {y_train.shape}")
print(f"Testing data shape: X_test: {X_test.shape}, y_test: {y_test.shape}")


In [None]:
# Training the model
history = model.fit(X_train, y_train,
                    epochs=20,
                    batch_size=32,
                    validation_split=0.1,  # 10% of training data for validation
                    verbose=1)  # Display training progress


In [None]:
from sklearn.metrics import mean_squared_error
import math

# Make predictions on the test data
y_pred = model.predict(X_test)

# Calculate Mean Squared Error (MSE)
mse = mean_squared_error(y_test, y_pred)
rmse = math.sqrt(mse)

print(f"Mean Squared Error (MSE): {mse:.4f}")
print(f"Root Mean Squared Error (RMSE): {rmse:.4f}")



In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(12, 6))
plt.plot(y_test, label='Actual Prices', color='blue')
plt.plot(y_pred, label='Predicted Prices', color='red')
plt.title('Actual vs. Predicted Prices')
plt.xlabel('Time')
plt.ylabel('Scaled Close Price')
plt.legend()
plt.grid(True)
plt.show()
# Closer the predicted line to the actual line: Better model performance.



In [None]:
# Reshape y_pred and y_test to match scaler's expected input shape
y_pred_reshaped = y_pred.reshape(-1, 1)
y_test_reshaped = y_test.reshape(-1, 1)

# Use the same scaler to inverse transform only the 'Close' price
y_pred_original = scaler.inverse_transform(
    np.hstack((y_pred_reshaped, np.zeros((y_pred_reshaped.shape[0], len(features_to_scale)-1))))
)[:, 0]

y_test_original = scaler.inverse_transform(
    np.hstack((y_test_reshaped, np.zeros((y_test_reshaped.shape[0], len(features_to_scale)-1))))
)[:, 0]

# Display some actual vs predicted values
comparison_df = pd.DataFrame({'Actual': y_test_original, 'Predicted': y_pred_original})
print(comparison_df.head(10))  # Show first 10 comparisons


In [None]:
plt.figure(figsize=(12, 6))
plt.plot(y_test_original, label='Actual Prices', color='blue')
plt.plot(y_pred_original, label='Predicted Prices', color='red')
plt.title('Actual vs. Predicted Stock Prices')
plt.xlabel('Time')
plt.ylabel('Stock Price (INR)')
plt.legend()
plt.grid(True)
plt.show()


In [None]:
# Make predictions again to ensure 'predictions' is defined
predictions = model.predict(X_test)

from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error
import numpy as np

# Mean Absolute Error (MAE)
mae = mean_absolute_error(y_test, predictions)

# Mean Squared Error (MSE)
mse = mean_squared_error(y_test, predictions)

# Root Mean Squared Error (RMSE)
rmse = np.sqrt(mse)

# Mean Absolute Percentage Error (MAPE)
mape = mean_absolute_percentage_error(y_test, predictions) * 100  # In percentage

print(f"Mean Absolute Error (MAE): {mae:.2f}")
print(f"Mean Squared Error (MSE): {mse:.2f}")
print(f"Root Mean Squared Error (RMSE): {rmse:.2f}")
print(f"Mean Absolute Percentage Error (MAPE): {mape:.2f}%")


In [None]:
pip install streamlit matplotlib pandas numpy


In [None]:
import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Title
st.title("📊 Stock Price Prediction Dashboard")

# Display error metrics
st.subheader("Model Evaluation Metrics")
st.write(f"**Mean Absolute Error (MAE):** {0.01}")
st.write(f"**Mean Squared Error (MSE):** {0.00}")
st.write(f"**Root Mean Squared Error (RMSE):** {0.01}")
st.write(f"**Mean Absolute Percentage Error (MAPE):** {2.42}%")

# Load predictions (assuming `y_test` and `predictions` are available as numpy arrays)
actual_prices = np.random.uniform(800, 1100, 100)  # Replace with y_test
predicted_prices = actual_prices + np.random.uniform(-10, 10, 100)  # Replace with predictions

# Plot actual vs predicted prices
st.subheader("📈 Actual vs. Predicted Stock Prices")
fig, ax = plt.subplots()
ax.plot(actual_prices, color='blue', label='Actual Prices')
ax.plot(predicted_prices, color='red', label='Predicted Prices')
ax.set_xlabel("Time")
ax.set_ylabel("Stock Price (INR)")
ax.legend()
st.pyplot(fig)

# Future prediction section
st.subheader("🔮 Make Future Predictions")
future_days = st.number_input("Enter the number of future days:", min_value=1, max_value=30, value=5)
future_predictions = predicted_prices[-1] + np.random.uniform(-10, 10, future_days)
st.write(f"Predicted prices for the next {future_days} days: {future_predictions}")


In [None]:
!pip install streamlit pyngrok


In [None]:
%%writefile app.py
import streamlit as st

st.title("Stock Price Prediction Dashboard")
st.write("This is a demo dashboard using Streamlit in Google Colab!")


In [None]:
# from pyngrok import ngrok

# # Start the Streamlit app
# !streamlit run app.py &

# # Create a public URL using ngrok
# public_url = ngrok.connect(port='8501')
# print(f"Access the Streamlit app here: {public_url}")


In [None]:
!pip install dash dash-bootstrap-components pandas plotly


In [None]:
from jupyter_dash import JupyterDash
from dash import dcc, html, Input, Output
import dash_bootstrap_components as dbc
import plotly.express as px
import pandas as pd

# Create a sample DataFrame
data = pd.DataFrame({
    'Category': ['A', 'B', 'C', 'D'],
    'Values': [10, 20, 30, 40]
})

# Initialize the JupyterDash app
app = JupyterDash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])

# App Layout
app.layout = dbc.Container([
    html.H1("Power BI-like Dashboard on Google Colab"),
    dcc.Dropdown(
        id='category-dropdown',
        options=[{'label': cat, 'value': cat} for cat in data['Category']],
        value='A',
        clearable=False
    ),
    dcc.Graph(id='bar-chart')
])

# Callback for interactivity
@app.callback(
    Output('bar-chart', 'figure'),
    [Input('category-dropdown', 'value')]
)
def update_chart(selected_category):
    filtered_data = data[data['Category'] == selected_category]
    fig = px.bar(filtered_data, x='Category', y='Values', title=f"Data for Category {selected_category}")
    return fig

# Run the app
app.run_server(mode='inline')


In [None]:
from dash import Dash, dcc, html, Input, Output
import dash_bootstrap_components as dbc
import plotly.express as px
import pandas as pd

# Create a sample DataFrame
data = pd.DataFrame({
    'Category': ['A', 'B', 'C', 'D'],
    'Values': [10, 20, 30, 40]
})

# Initialize the Dash app
app = Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])

# App Layout
app.layout = dbc.Container([
    html.H1("Power BI-like Dashboard on Google Colab"),
    dcc.Dropdown(
        id='category-dropdown',
        options=[{'label': cat, 'value': cat} for cat in data['Category']],
        value='A',
        clearable=False
    ),
    dcc.Graph(id='bar-chart')
])

# Callback for interactivity
@app.callback(
    Output('bar-chart', 'figure'),
    [Input('category-dropdown', 'value')]
)
def update_chart(selected_category):
    filtered_data = data[data['Category'] == selected_category]
    fig = px.bar(filtered_data, x='Category', y='Values', title=f"Data for Category {selected_category}")
    return fig

# Run the Dash app
if __name__ == '__main__':
    app.run(debug=True)
