In [None]:

import pandas as pd 
import numpy as np
from yahoo_fin.stock_info import get_data

comp_name="RELIANCE.NS"
start_date="01/01/2022"
end_date="12/31/2022"
interval="1d"

historical_data = get_data(comp_name, start_date=start_date, end_date=end_date, index_as_date = True, interval=interval)
# element=historical_data.loc['2020-01-01','open']

useful_data_coll=pd.DataFrame(historical_data['open'])
useful_data_coll['high']=historical_data['high']
useful_data_coll['low']=historical_data['low']
useful_data_coll['close']=historical_data['close']
useful_data_coll['volume']=historical_data['volume']
print(useful_data_coll)

In [None]:
#functionns portion of indicaters

#********************************************************************************************for sma
def calculate_sma(prices, period):
    sma_values = []
    for i in range(len(prices) - period + 1):
        window = prices[i:i+period]
        sma = sum(window) / period
        sma_values.append(sma)
    return list(sma_values) 
    
#********************************************************************************************for ema
def calculate_ema(prices, period):
    alpha = 2 / (period + 1)
    ema_values = [prices[0]]  # Initial EMA value (can be adjusted)
    for i in range(1, len(prices)):
        ema_values.append((prices[i] * alpha) + (ema_values[i-1] * (1 - alpha)))
    
    return list(ema_values)  # Return the last EMA value

#********************************************************************************************for Moving average convergence divergence
def calculate_macd(prices, short_period=12, long_period=26, signal_period=9):
    # Calculate short EMA
    short_ema = calculate_ema(prices, short_period)
    
    # Calculate long EMA
    long_ema = calculate_ema(prices, long_period)
    
    # Calculate MACD line (difference between short EMA and long EMA)
    macd_line = [short - long for short, long in zip(short_ema, long_ema)]
    
    # Calculate signal line (EMA of MACD line)
    signal_line = calculate_ema(macd_line, signal_period)
    
    # Calculate MACD histogram (the difference between MACD line and signal line)
    macd_histogram = [macd - signal for macd, signal in zip(macd_line, signal_line)]
    
    return list(macd_line), list(signal_line), list(macd_histogram)
#********************************************************************************************************std_dev
def calculate_std_dev(closing_prices):
    # Calculate the mean (average) of closing prices
    mean = sum(closing_prices) / len(closing_prices)
    
    # Calculate the sum of squared differences from the mean
    sum_squared_diff = sum((x - mean) ** 2 for x in closing_prices)
    
    # Calculate the variance (average of squared differences)
    variance = sum_squared_diff / len(closing_prices)
    
    # Calculate the standard deviation (square root of the variance)
    std_dev = variance ** 0.5
    
    return std_dev
    
def calculate_std_dev_list(closing_prices, window_size):
    std_dev_Values = []

    for i in range(len(closing_prices) - window_size + 1):
        subset = closing_prices[i:i + window_size]
        std_dev = calculate_std_dev(subset)
        std_dev_Values.append(std_dev)

    return std_dev_Values

#********************************************************************************************************Stochastic Oscillator:
def calculate_stochastic_oscillator(closing_prices, period):

    stochastic_oscillator = []

    for i in range(len(closing_prices) - period + 1):
        current_prices = closing_prices[i:i + period]
        min_price = min(current_prices)
        max_price = max(current_prices)
        
        if min_price == max_price:
            stochastic_oscillator.append(0)
        else:
            current_price = closing_prices[i + period - 1]
            stochastic_value = ((current_price - min_price) / (max_price - min_price)) * 100
            stochastic_oscillator.append(stochastic_value)

    return stochastic_oscillator
#**************************************************************************************************on-balance-volumn
def calculate_obv(prices, volumes):
    obv = [0]
    for i in range(1, len(prices)):
        if prices[i] > prices[i-1]:
            obv.append(obv[-1] + volumes[i])
        elif prices[i] < prices[i-1]:
            obv.append(obv[-1] - volumes[i])
        else:
            obv.append(obv[-1])
    return obv



#********************************************************* diffrence calculater bitween two lists for ex ema(9)-ema(21)
def average_diff(savg,lavg):
    avgdiff=[]
    for i in range(len(savg)):
        if savg[i]==None or lavg[i]==None:
            avgdiff.append(None)
        else:
            avgdiff.append(savg[i]-lavg[i])
    return avgdiff

In [None]:

#getting value from indicaters

#********************************************************************************************for sma
short_period = 9
long_period = 21

short_sma_values = calculate_sma(historical_data["close"], short_period)
long_sma_values = calculate_sma(historical_data["close"], long_period)
for i in range(short_period-1):
    short_sma_values.insert(0, None)
for i in range(long_period-1):
    long_sma_values.insert(0, None)

useful_data_coll['s_sma']=short_sma_values
useful_data_coll['l_sma']=long_sma_values

#********************************************************************************************for eme
short_period = 9
long_period = 21

short_ema_values = calculate_ema(historical_data["close"],short_period)
long_ema_values = calculate_ema(historical_data["close"],long_period)

useful_data_coll['s_ema']=short_ema_values
useful_data_coll['l_ema']=long_ema_values

#********************************************************************************************for macd
short_period = 12
long_period = 26
signal_period = 9

macd_line, signal_line, macd_histogram = calculate_macd(historical_data["close"], short_period, long_period, signal_period)
# print(len(macd_line),"\n", len(signal_line),"\n", len(macd_histogram),"\n")
useful_data_coll['macd_h']=macd_histogram
#********************************************************************************************for std_decv
window_size=37
std_dev = calculate_std_dev_list(historical_data["close"],window_size)

for i in range(window_size-1):
    std_dev.insert(0, None)
    
useful_data_coll['std_dev']=std_dev   
# print(len(std_dev))

#********************************************************************************************************Stochastic Oscillator
period = 14#default
stochastic_oscillator = calculate_stochastic_oscillator(list(historical_data["close"]), period)
for i in range(period-1):
    stochastic_oscillator.insert(0, None)
useful_data_coll['stc_osc']=stochastic_oscillator
# print("Stochastic Oscillator:", len(stochastic_oscillator))

#**************************************************************************************************obv
obv_values = calculate_obv(historical_data["close"], historical_data["volume"])
useful_data_coll['obv']=obv_values
#print("On-Balance Volume (OBV) values:", obv_values)


#*******************************************************************
useful_data_coll
print(useful_data_coll)


In [None]:
def generate_features(df):


    sma_diff_values=average_diff(useful_data_coll['s_sma'],useful_data_coll['l_sma'])

    ema_diff_values=average_diff(useful_data_coll['s_ema'],useful_data_coll['l_ema'])
    
    # Calculate Price Rate of Change (ROC)
    roc = (df - df.shift(1)) / df.shift(1) * 100
   
    return roc,sma_diff_values,ema_diff_values



useful_data_coll['roc'],useful_data_coll['sma_diff'],useful_data_coll['ema_diff']=generate_features(useful_data_coll['close'])

df=useful_data_coll.dropna()
print(df)

In [None]:
# Function to generate trading decisions based on columns of df
def generate_trading_decisions(df):

    # Initialize trading decision column by o as hold
    df.loc[:, 'td_ds'] = 0

    # Defining conditions for generating trading decisions
    buy_condition = (
        ((df['obv'] > df['obv'].shift(1)) &  # OBV is increasing
        ((df['ema_diff'] > 0) & (df['sma_diff'] > 0) & (df['macd_h'] > 0))) | # EMA and SMA differences andMACD histogram are positive
    
        ((df['macd_h'] < 0) &  # MACD histogram is negative
        (df['macd_h'].shift(1) < 0) &  # MACD histogram was negetive in the previous period
        (df['macd_h'].shift(-1) > 0)  )# MACD histogram is positive in the next period
    )

    sell_condition = (
      
        ( (df['obv'] < df['obv'].shift(1)) & # OBV is decreasing
        ((df['ema_diff'] < 0) & (df['sma_diff'] < 0) & (df['macd_h'] < 0))) | # EMA and SMA and MACD histogram differences are negative
            
        ((df['macd_h'] > 0) &  # MACD histogram is positive
        (df['macd_h'].shift(1) > 0) &  # MACD histogram was positive in the previous period
        (df['macd_h'].shift(-1) < 0)  )# MACD histogram is negative in the next period
    
    )

    # Updating trading decisions based on conditions
    df.loc[buy_condition, 'td_ds'] = 1
    df.loc[sell_condition, 'td_ds'] = -1

    return df



df=generate_trading_decisions(df)
count_buy = df['td_ds'].value_counts()[1]
count_sell = df['td_ds'].value_counts()[-1]
count_hold = df['td_ds'].value_counts()[0]
print("Number of occurrences of 'Buy' , sell , hold : ", count_buy,count_sell,count_hold)
# print(df['td_ds'])
df = df.reset_index(drop=True)
print(df)

In [None]:
# Shuffle the dataset (optional but recommended)
df_shuffled = df.sample(frac=1, random_state=42).reset_index(drop=True)
train_ratio = 0.8  # 80% of data for training, 20% for testing

# Calculating the number of samples for training
num_train_samples = int(len(df_shuffled) * train_ratio)

# Spliting the dataset into training and testing sets
X_train = df_shuffled.drop(columns=['td_ds']).iloc[:num_train_samples]
y_train = df_shuffled['td_ds'].iloc[:num_train_samples]
X_test = df_shuffled.drop(columns=['td_ds']).iloc[num_train_samples:]
y_test = df_shuffled['td_ds'].iloc[num_train_samples:]

# Converting y_train and y_test to numpy arrays
y_train = np.array(y_train)
y_test = np.array(y_test)


In [None]:
from sklearn.linear_model import LogisticRegression


model = LogisticRegression(solver='newton-cg', multi_class='multinomial', random_state=0)
H = model.fit(X_train, y_train)

print('Logistic Regression Model Coeff (m) =\n' , model.coef_)
print('\nLogistic Regression Model Coeff (b) =\n' , model.intercept_)

# Predicting the data

y_predict=model.predict(X_test)
y_predict_pro=model.predict_proba(X_test)


# Evaluate the Model

print("\nPrediction Probability : \n",y_predict_pro)

print("\nPrediction : \n",y_predict)


In [None]:
from sklearn.metrics import roc_auc_score, accuracy_score, f1_score,confusion_matrix

def calculate_metrics(y_true, y_pred_proba, y_pred):
    """Calculate AUC-ROC, accuracy, and F1 score."""
    auc_roc = roc_auc_score(y_true, y_pred_proba, multi_class='ovr')  # Assuming one-vs-rest strategy
    accuracy = accuracy_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred, average='weighted')  # weighted average for multiclass
    return auc_roc, accuracy, f1


auc_roc, accuracy, f1 = calculate_metrics(y_test, y_predict_pro, y_predict)
print("AUC-ROC:", auc_roc)
print("Accuracy:", accuracy)
print("F1 Score:", f1)
print("Confusion Matrix : \n",confusion_matrix(y_test, model.predict(X_test)))