In [48]:
#libraries
import pandas as pd
import pandas_ta as ta
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score

In [49]:
features = ["Year",
          "Month",
          "DayOfWeek",
          "bb_middle",
          "bb_std",
          "bb_upper",
          "bb_lower",
          "sma",
          "rsi",
          "OPEN",
          "HIGH",
          "LOW",
          "PREV CLOSE",
          "LTP",
          "VWAP",
          "52W H",
          "52W L",
          "VALUE",
          "VOLUME",
          "NO OF TRADES",
          "CLOSE"
           ]

#setting panda
# Set the display format for float numbers
pd.options.display.float_format = '{:.2f}'.format

#read raw stock data
df = pd.read_csv('merged_data.csv')

#setting columns
new_column_names = ["DATE","SERIES","OPEN", "HIGH", "LOW", "PREV CLOSE","LTP","CLOSE","VWAP","52W H","52W L","VALUE","VOLUME", "NO OF TRADES"]
df.columns = new_column_names


# initial preprocess
# Drop rows with any null values
df.dropna(inplace=True)

# Remove the comma from the 'VALUE' column
df['VALUE'] = df['VALUE'].str.replace(',', '')

# Convert 'VALUE' column to float dtype
df['VALUE'] = df['VALUE'].astype(float)

# Remove the comma from the 'NO OF TRADES' column
df['NO OF TRADES'] = df['NO OF TRADES'].str.replace(',', '')

# Convert 'NO OF TRADES' column to float dtype
df['NO OF TRADES'] = df['NO OF TRADES'].astype(float)

# Remove the comma from the 'VOLUME' column
df['VOLUME'] = df['VOLUME'].str.replace(',', '')

# Convert 'NO OF TRADES' column to float dtype
df['VOLUME'] = df['VOLUME'].astype(float)

#remove series column
df.drop("SERIES",axis=1, inplace=True)

# Convert the 'Date' column to a datetime data type
df['DATE'] = pd.to_datetime(df['DATE'])

# Extract year, month, and day of the week as new features
df['Year'] = df['DATE'].dt.year
df['Month'] = df['DATE'].dt.month
df['DayOfWeek'] = df['DATE'].dt.dayofweek

# Drop the original 'Date' column
df.drop('DATE', axis=1, inplace=True)


# Create a copy of the 'Close' column and store it as 'Close_Copy'
df['NEXT_PRICE'] = df['CLOSE'].copy()

# Move the 'Close' column one cell down
df['NEXT_PRICE'] = df['NEXT_PRICE'].shift(1)
df.dropna(inplace=True)
#print(df["VOLUME"])


In [50]:
#calculate technical indicators

# Calculate RSI
df['rsi'] = ta.rsi(df['CLOSE'])

# Calculate Bollinger Bands
# Define the parameters for the Bollinger Bands
window = 20  # Number of periods for the moving average
std_dev = 2  # Number of standard deviations for the bands

# Calculate the rolling mean (middle band) and rolling standard deviation for the window
df['bb_middle'] = df['CLOSE'].rolling(window).mean()
df['bb_std'] = df['CLOSE'].rolling(window).std()

# Calculate the upper band and lower band
df['bb_upper'] = df['bb_middle'] + (std_dev * df['bb_std'])
df['bb_lower'] = df['bb_middle'] - (std_dev * df['bb_std'])


# Calculate the MACD
macd = ta.macd(df['CLOSE'])

#calculate SMA
# Define the window size for the SMA
window_size = 10  # Number of periods for the moving average

# Calculate the SMA
df['sma'] = df['CLOSE'].rolling(window=window_size).mean()

# Remove rows with NaN values
df.dropna(inplace=True)


#print(df.head(30))

  macd = ta.macd(df['CLOSE'])


In [51]:
# Split the data into input features and the target variable
X = df[features]  # Replace 'feature1', 'feature2', 'feature3' with actual feature names
y = df["NEXT_PRICE"]  # Replace 'target' with the actual target variable name

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a linear regression model and fit it to the training data
model = LinearRegression()
model.fit(X_train, y_train)

# Predict the target variable for the test data
y_pred = model.predict(X_test)

# Evaluate the model using mean squared error
mse = mean_squared_error(y_test, y_pred)


#print("Mean Squared Error:", mse)



# Calculate the model score
model_score = model.score(X_test, y_test)

# Print the model score
#print("Model Score: ",model_score)




In [55]:
#PREDICT
#test features data
test_f = [2023,
          3,
          0,
          710.06,
          10.14,
          730.15,
          689.96,
          703.19,
          46.10,
          691,
          706.75,
          691,
          706.20,
          703.45,
          697.72,
          861.15,
          686.60,
          1018438,
          710588687.9,
          38046,
          702.60
                ]

test_df = pd.DataFrame([test_f], columns=features)
T = test_df[features]

#print(test_df)
output = model.predict(T)

#display output
print("Model Score: ",model_score)
print("Mean Squared Error:", mse)
#actual next price for tata consumer stock price is 700.90
print("[Actual Next PRICE   ] --> [700.90]")
print("[Predicted Next Price] --> [",round(output[0],2),"]")

Model Score:  0.9991973408233907
Mean Squared Error: 56.314506723486325
[Actual Next PRICE   ] --> [700.90]
[Predicted Next Price] --> [ 700.46 ]
