In [None]:
pip install pandas numpy yfinance scikit-learn matplotlib seaborn plotly flask python-dotenv tensorflow

In [None]:
import os
import yfinance as yf
import pandas as pd
import numpy as np
import pandas as pd

# # IT Companies
# IT_companies = [
#     'TCS.NS',
#     'INFY.NS',
#     'LTIM.NS',
#     'TECHM.NS',
#     'WIPRO.NS',
#     'ACN',
#     'CTSH',
#     'BSOFT.NS'
# ]

# # Non-IT Companies
# NonIT_companies = [
#     'HDFCBANK.NS',
#     'G',
#     'LT.NS',
#     'RELIANCE.NS',
#     'TATASTEEL.NS'   # Replaced TATAMOTORS (more stable)
# ]

COMPANIES = {
    # -------- IT --------
    "TCS": "TCS.NS",
    "Infosys": "INFY.NS",
    "LTIMindtree": "LTIM.NS",
    "Tech Mahindra": "TECHM.NS",
    "Wipro": "WIPRO.NS",
    "HCL Tech": "HCLTECH.NS",
    "Accenture": "ACN",
    "Cognizant": "CTSH",
    "Birlasoft": "BSOFT.NS",

    # -------- Non-IT --------
    "Sagility": "SAGILITY.NS",
    "Genpact": "G",
    "L&T": "LT.NS",
    "Reliance": "RELIANCE.NS",
    "Tata Consumer": "TATACONSUM.NS"
}


all_stocks = list(COMPANIES.values())

# Download data safely
all_data = {}

for ticker in all_stocks:
    print(f"Downloading {ticker}...")
    try:
        df = yf.download(ticker, period="5y", 
            auto_adjust=True,   # important
            threads=False,  # Better than manual date
            progress=False
        )

        if not df.empty:
            all_data[ticker] = df[['Open', 'High', 'Low', 'Close', 'Volume']]
        else:
            print(f"{ticker} returned empty data.")

    except Exception as e:
        print(f"Error downloading {ticker}: {e}")

# Combine
data = pd.concat(all_data, axis=1)

# Flatten columns
data.columns = [f"{col[0]}_{col[1]}" for col in data.columns]

# Save CSV
output_folder = r'C:\Users\Sai\OneDrive\Documents\uma_project'
os.makedirs(output_folder, exist_ok=True)

file_path = os.path.join(output_folder, 'stock_price_5yrs.csv')
data.to_csv(file_path)

print("\nData saved successfully!")
print("Start Date:", data.index.min())
print("End Date:", data.index.max())


In [None]:
# Loading the dataset
import pandas as pd

pd.options.display.max_rows = 100
df = pd.read_csv("stock_price_5yrs.csv")
print(df.to_string())

In [5]:
df.shape

(1298, 61)

In [None]:
df.info()

In [None]:
df.describe()

In [None]:
df.isna().sum()

<h3>code for handling null values, new features, create targets/features<h3>

In [None]:
import pandas as pd

# Load dataset
df = pd.read_csv("stock_price_5yrs.csv", parse_dates=['Date'])
df = df.sort_values('Date').set_index('Date')

# Separate columns
price_cols = [c for c in df.columns if any(x in c for x in ['Open','High','Low','Close']) and 'Volume' not in c]
volume_cols = [c for c in df.columns if 'Volume' in c]

# 1️⃣ Handle nulls
df[price_cols] = df[price_cols].ffill().bfill()
df[volume_cols] = df[volume_cols].ffill().bfill().fillna(0)

# 2️⃣ Create features in separate DataFrames to avoid fragmentation
feature_dfs = []

for col in price_cols:
    temp = pd.DataFrame({
        f'{col}_MA20': df[col].rolling(20).mean(),
        f'{col}_MA50': df[col].rolling(50).mean(),
        f'{col}_Return': df[col].pct_change() * 100,
        f'{col}_Volatility': df[col].rolling(20).std(),
        f'{col}_Next': df[col].shift(-1) if 'Close' in col else None  # Next-day Close only for Close columns
    }, index=df.index)
    
    # Remove None columns
    temp = temp[[c for c in temp.columns if temp[c].notna().any()]]
    
    feature_dfs.append(temp)

# Concatenate all features with original df
df = pd.concat([df, *feature_dfs], axis=1)

# 3️⃣ Drop rows with NaNs created by rolling / shift
df = df.dropna()

# 4️⃣ Save final dataset
df.to_csv("stock_features_ready.csv")

print("Dataset ready! Columns now include features and targets.")


In [10]:
df.shape

(1248, 264)

In [11]:
# All feature columns (exclude Volume if you like)
feature_cols = [c for c in df.columns if any(x in c for x in ['MA20','MA50','Return','Volatility'])]
target_cols = [c for c in df.columns if '_Next' in c]

X = df[feature_cols]
y = df[target_cols]


In [None]:
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import mean_squared_error, mean_absolute_error

timesteps = 60

results = []

company_models = {}
scalers_X = {}
scalers_y = {}
company_features_dict = {}

for target in target_cols:

    print(f"\nTraining model for: {target}")

    # Select features only related to this company
    company_prefix = target.replace("_Close_Next", "")
    company_features = [
        c for c in feature_cols 
        if company_prefix in c and c != target
    ]
    
    # Save features for prediction
    company_features_dict[target] = company_features

    X = df[company_features]
    y = df[[target]]

    # --- Create Sequences ---
    X_seq, y_seq = [], []

    for i in range(timesteps, len(df)):
        X_seq.append(X.iloc[i-timesteps:i].values)
        y_seq.append(y.iloc[i].values)

    X_seq = np.array(X_seq)
    y_seq = np.array(y_seq)

    # --- Scaling (separate per company) ---
    # --- Train/Test Split (BEFORE scaling) ---
    train_size = int(len(X_seq) * 0.8)

    X_train = X_seq[:train_size]
    X_test = X_seq[train_size:]

    y_train = y_seq[:train_size]
    y_test = y_seq[train_size:]

    # --- Scaling (fit ONLY on train) ---
    scaler_X = MinMaxScaler()
    scaler_y = MinMaxScaler()

    nsamples, ntimesteps, nfeatures = X_train.shape

    # Flatten only TRAIN
    X_train_flat = X_train.reshape((nsamples * ntimesteps, nfeatures))
    scaler_X.fit(X_train_flat)

    # Transform train
    X_train_scaled = scaler_X.transform(X_train_flat).reshape(X_train.shape)

    # Transform test (do NOT fit again)
    X_test_flat = X_test.reshape((X_test.shape[0] * ntimesteps, nfeatures))
    X_test_scaled = scaler_X.transform(X_test_flat).reshape(X_test.shape)

    # Scale y
    scaler_y.fit(y_train.reshape(-1,1))

    y_train_scaled = scaler_y.transform(y_train.reshape(-1,1))
    y_test_scaled = scaler_y.transform(y_test.reshape(-1,1))


    # --- Model ---
    model = Sequential()
    model.add(LSTM(126, return_sequences=True, input_shape=(timesteps, nfeatures)))
    model.add(Dropout(0.2))
    model.add(LSTM(64))
    model.add(Dropout(0.2))
    model.add(Dense(32, activation='relu'))
    model.add(Dense(1))

    optimizer = Adam(learning_rate=0.001)
    model.compile(optimizer=optimizer, loss='mse')

    early_stop = EarlyStopping(monitor='val_loss',
                                patience=10,
                                restore_best_weights=True)

    model.fit(
        X_train_scaled, 
        y_train_scaled,
        epochs=100,
        batch_size=16,
        validation_split=0.2,
        callbacks=[early_stop],
        verbose=0
    )

    # --- Evaluation ---
    y_pred_scaled = model.predict(X_test_scaled, verbose=0)

    y_pred = scaler_y.inverse_transform(y_pred_scaled)
    y_actual = y_test

    rmse = np.sqrt(mean_squared_error(y_actual, y_pred))
    mae = mean_absolute_error(y_actual, y_pred)
    mape = np.mean(np.abs((y_actual - y_pred) / (y_actual + 1e-8))) * 100

    from sklearn.linear_model import LinearRegression

    # Flatten for Linear Regression
    X_flat_train = X_train_scaled.reshape(X_train_scaled.shape[0], -1)
    X_flat_test = X_test_scaled.reshape(X_test_scaled.shape[0], -1)

    lr_model = LinearRegression()
    lr_model.fit(X_flat_train, y_train)

    lr_pred = lr_model.predict(X_flat_test)

    lr_rmse = np.sqrt(mean_squared_error(y_actual, lr_pred))
    lr_mae = mean_absolute_error(y_actual, lr_pred)
    lr_mape = np.mean(np.abs((y_actual - lr_pred) / (y_actual + 1e-8))) * 100

    results.append([
        target,
        rmse, mae, mape,
        lr_rmse, lr_mae, lr_mape
    ])

    # Save model and scalers for this company
    company_models[target] = model
    scalers_X[target] = scaler_X
    scalers_y[target] = scaler_y



In [None]:
# --- 5. Evaluate Model ---
evaluation_df = pd.DataFrame(results,
                             columns=[
                                 "Company",
                                 "LSTM_RMSE",
                                 "LSTM_MAE",
                                 "LSTM_MAPE(%)",
                                 "LR_RMSE",
                                 "LR_MAE",
                                 "LR_MAPE(%)"
                             ])

print("\nFinal Model Comparison:")
print(evaluation_df)


In [None]:
import plotly.graph_objects as go
import pandas as pd

companies = [c.replace('_Next','') for c in target_cols]

for target in target_cols:
    comp = target.replace('_Next','')
    print(f"\nPredicting 7-day future for: {comp}")

    model = company_models[target]
    scaler_X = scalers_X[target]
    scaler_y = scalers_y[target]

    # Features for this company
    company_features = company_features_dict[target]
    X = df[company_features].values  # full feature data

    # Last 60 days
    last_seq = X[-timesteps:].copy()
    last_seq_scaled = scaler_X.transform(last_seq).reshape(1, timesteps, len(company_features))

    future_preds = []
    for _ in range(7):
        next_pred_scaled = model.predict(last_seq_scaled, verbose=0)
        next_pred = scaler_y.inverse_transform(next_pred_scaled)[0, 0]
        future_preds.append(next_pred)

        # Update last_seq_scaled
        new_features = last_seq_scaled[0, -1, :].copy()
        new_features[-1] = next_pred_scaled.item()
        last_seq_scaled = np.roll(last_seq_scaled, -1, axis=1)
        last_seq_scaled[0, -1, :] = new_features

    # Dates
    actual_prices = df[comp].values[-30:]
    last_date = df.index[-1]
    future_dates = pd.date_range(start=last_date + pd.Timedelta(days=1), periods=7)

    # Plotly figure
    fig = go.Figure()

    # Actual Prices
    fig.add_trace(go.Scatter(
        x=df.index[-30:], 
        y=actual_prices,
        mode='lines+markers',
        name='Actual Price',
        fill='tozeroy',
        hovertemplate='<b>Actual</b><br>Date: %{x}<br>Price: ₹%{y:,.2f}<extra></extra>'  # Custom tooltip

    ))

    # Predicted Prices
    fig.add_trace(go.Scatter(
        x=future_dates,
        y=future_preds,
        mode='lines+markers',
        name='Predicted Price',
        line=dict(dash='dash', color='red'),
        hovertemplate='<b>Predicted</b><br>Date: %{x}<br>Price: ₹%{y:,.2f}<extra></extra>'  # Custom tooltip

    ))

    fig.update_layout(
        title=f"{comp} Close Price: Actual vs Predicted (7-day forecast)",
        xaxis_title='Date',
        yaxis_title='Price',
        legend=dict(x=1,      # Right edge (0=left, 1=right)
                    y=1,      # Top edge (0=bottom, 1=top)
                    # xanchor='right',  # Anchor legend's right side to x position
                    # yanchor='top' # Anchor legend's top to y position
                ),
        template='plotly_white',
        width=500,   # set width in pixels
        height=300 ,
        hovermode='x unified'
    )
    fig.show()


In [None]:
import os
import joblib

os.makedirs("models", exist_ok=True)

# Save each model separately
for name, model in company_models.items():
    model.save(f"models/{name}.keras")

# Save scalers & feature dict
joblib.dump(scalers_X, "scalers_X.pkl")
joblib.dump(scalers_y, "scalers_y.pkl")
joblib.dump(company_features_dict, "features.pkl")

print("Models and scalers saved properly.")
