In [2]:
import pandas as pd
import os
# -------------------------------
# 1. Load raw dataset
# -------------------------------
df = pd.read_csv("../data/raw/TCS_raw.csv")

# Remove corrupted first row (contains 'TCS.NS')
df = df.iloc[1:].reset_index(drop=True)

# -------------------------------
# 2. Fix data types
# -------------------------------
# Convert Date column
df['Date'] = pd.to_datetime(df['Date'])

# Convert numeric columns explicitly
numeric_cols = ['Open', 'High', 'Low', 'Close', 'Volume']
df[numeric_cols] = df[numeric_cols].apply(pd.to_numeric)

# Set Date as index
df.set_index('Date', inplace=True)

# -------------------------------
# 3. Feature Engineering
# -------------------------------

# Simple Moving Averages
df['SMA_20'] = df['Close'].rolling(window=20).mean()
df['SMA_50'] = df['Close'].rolling(window=50).mean()

# Exponential Moving Average
df['EMA_20'] = df['Close'].ewm(span=20, adjust=False).mean()

# RSI (14-period)
delta = df['Close'].diff()
gain = delta.clip(lower=0)
loss = -delta.clip(upper=0)
avg_gain = gain.rolling(window=14).mean()
avg_loss = loss.rolling(window=14).mean()
rs = avg_gain / avg_loss
df['RSI'] = 100 - (100 / (1 + rs))

# MACD
ema_12 = df['Close'].ewm(span=12, adjust=False).mean()
ema_26 = df['Close'].ewm(span=26, adjust=False).mean()
df['MACD'] = ema_12 - ema_26
df['MACD_signal'] = df['MACD'].ewm(span=9, adjust=False).mean()

# Daily Returns
df['Daily_Return'] = df['Close'].pct_change()

# Remove rows with NaN values
df.dropna(inplace=True)

# -------------------------------
# 4. Target Variable (Classification)
# -------------------------------
# 1 = Price goes UP, 0 = Price goes DOWN
df['Target'] = (df['Close'].shift(-1) > df['Close']).astype(int)
df.dropna(inplace=True)

# -------------------------------
# 5. Save processed dataset
# -------------------------------
df.reset_index(inplace=True)
df.to_csv("../data/processed/TCS_features.csv", index=False)

print("Feature engineering completed successfully")


Feature engineering completed successfully
