In [None]:
#import libraries
import pandas as pd
import numpy as np
import random as rd
import matplotlib.pyplot as plt
import warnings
from scipy.stats import lognorm
from sklearn.cluster import KMeans
import os
import joblib
warnings.filterwarnings('ignore')

In [None]:
data = pd.read_csv('/Users/uddashyakumar/Desktop/multyfi/NIFTY50.csv')

# Convert the 'datetime' column to datetime format
data['datetime'] = pd.to_datetime(data['datetime'])

# Filter data for the years 2017 to 2019
start_date = pd.to_datetime('2017-01-01')
end_date = pd.to_datetime('2019-12-31')
filtered_data = data[(data['datetime'] >= start_date) & (data['datetime'] <= end_date)]

filtered_data.head(5)


In [None]:

# Select only the 'close' column
data = filtered_data[['close']]
data.head(5)
data.shape

In [None]:
# Set the 'datetime' column as the index
filtered_data.set_index('datetime', inplace=True)

# Make sure the index is in DatetimeIndex format
filtered_data.index = pd.DatetimeIndex(filtered_data.index)

# Resample the data on a weekly basis and calculate OHLCV values
monthly_resampled_data = filtered_data.resample('M').apply({
    'open': 'first',
    'high': 'max',
    'low': 'min',
    'close': 'last',
    'volume': 'sum'
})

# Display the resampled data
print(monthly_resampled_data.head())
monthly_resampled_data.shape

In [None]:
# Resample the data on a weekly basis and calculate OHLCV values
weekly_resampled_data = filtered_data.resample('W').apply({
    'open': 'first',
    'high': 'max',
    'low': 'min',
    'close': 'last',
    'volume': 'sum'
})

# Display the resampled data
print(weekly_resampled_data.head())
weekly_resampled_data.shape

In [None]:
# Fit a lognormal distribution to the 'close' data
mu_weekly, sigma_weekly = np.log(weekly_resampled_data['close']).mean(), np.log(weekly_resampled_data['close']).std()
s_weekly = np.random.lognormal(mu_weekly, sigma_weekly, len(weekly_resampled_data))

In [None]:
# Fit a lognormal distribution to the 'close' data
mu, sigma = np.log(data['close']).mean(), np.log(data['close']).std()
s = np.random.lognormal(mu, sigma, len(data))

In [None]:
# Fit a lognormal distribution to the 'close' data
mu_monthly, sigma_monthly = np.log(monthly_resampled_data['close']).mean(), np.log(monthly_resampled_data['close']).std()
s_monthly = np.random.lognormal(mu_monthly, sigma_monthly, len(monthly_resampled_data))


In [None]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
data_for_scaling = s
data_for_scaling = data_for_scaling.reshape(-1, 1)

data_scaled = scaler.fit_transform(data_for_scaling)

data_scaled_df = pd.DataFrame(data_scaled, columns=['log close'])

plt.figure(figsize=(12,6))

plt.plot(data_scaled_df['log close'])
data_scaled_df.shape

In [None]:
data_for_scaling_weekly = s_weekly
data_for_scaling_weekly = data_for_scaling_weekly.reshape(-1, 1)

data_scaled_weekly = scaler.fit_transform(data_for_scaling_weekly)

data_scaled_weekly_df = pd.DataFrame(data_for_scaling_weekly, columns=['log close'])

plt.figure(figsize=(12,6))

plt.plot(data_scaled_weekly_df['log close'])
data_scaled_weekly.shape

In [None]:
data_for_scaling_monthly = s_monthly
data_for_scaling_monthly = data_for_scaling_monthly.reshape(-1, 1)

data_scaled_monthly = scaler.fit_transform(data_for_scaling_monthly)

data_scaled_monthly_df = pd.DataFrame(data_for_scaling_weekly, columns=['log close'])

plt.figure(figsize=(12,6))

plt.plot(data_scaled_monthly_df['log close'])

In [None]:
# Check if the trained model file exists, if not, fit the KMeans model and save it
if not os.path.exists('kmeans_model.joblib'):
    model = KMeans(n_clusters=3, init='k-means++')
    model.fit(data_scaled_df)
    # Save the trained model to a file
    joblib.dump(model, 'kmeans_model.joblib')
else:
    # Load the trained model from the file
    model = joblib.load('kmeans_model.joblib')

In [None]:
data_scaled_df['Cluster'] = model.predict(data_scaled_df)
data_scaled_df.head(5)

In [None]:
# Check if the trained model file exists for weekly data, if not, fit the KMeans model and save it
if not os.path.exists('kmeans_weekly_model.joblib'):
    model_weekly = KMeans(n_clusters=3, init='k-means++')
    model_weekly.fit(data_scaled_weekly_df)
    # Save the trained model to a file
    joblib.dump(model_weekly, 'kmeans_weekly_model.joblib')
else:
    # Load the trained model from the file
    model_weekly = joblib.load('kmeans_weekly_model.joblib')

In [None]:
data_scaled_weekly_df['Cluster'] = model_weekly.predict(data_scaled_weekly_df)
data_scaled_weekly_df.head(5)

In [None]:
# Check if the trained model file exists for weekly data, if not, fit the KMeans model and save it
if not os.path.exists('kmeans_monthly_model.joblib'):
    model_monthly = KMeans(n_clusters=3, init='k-means++')
    model_monthly.fit(data_scaled_monthly_df)
    # Save the trained model to a file
    joblib.dump(model_monthly, 'kmeans_monthly_model.joblib')
else:
    # Load the trained model from the file
    model_monthly = joblib.load('kmeans_monthly_model.joblib')

In [None]:
data_scaled_monthly_df['Cluster'] = model_monthly.predict(data_scaled_monthly_df)
data_scaled_monthly_df.head(5)
data_scaled_monthly_df.shape

In [None]:
plt.figure(figsize=(12, 6))
for cluster in range(3):
    plt.plot(data_scaled_df[data_scaled_df['Cluster'] == cluster],
                label=f'Cluster {cluster}')
plt.legend()
plt.show()


In [None]:
plt.figure(figsize=(12, 6))
for cluster in range(3):
    plt.plot(data_scaled_weekly_df[data_scaled_weekly_df['Cluster'] == cluster],
                label=f'Cluster {cluster}')
plt.legend()
plt.show()


In [None]:
plt.figure(figsize=(12, 6))
for cluster in range(3):
    plt.plot(data_scaled_monthly_df[data_scaled_monthly_df['Cluster'] == cluster],
                label=f'Cluster {cluster}')
plt.legend()
plt.show()


In [None]:
# Function to predict cluster 
def predict_cluster(closing_price):

  scaled_data = scaler.transform([[closing_price]])

  prediction = model.predict(scaled_data)
  if prediction[0]==0:
    regime='Bearish'
  elif prediction[0]==2:
    regime='Consolidated'
  else:
    regime='Bullish'
  return regime


In [None]:
# Take input
closing_price_daily = float(input("Enter Daily closing price: "))

# Predict cluster
cluster_daily = predict_cluster(closing_price_daily)

# Print result  
print(f"Closing price {closing_price_daily} belongs to cluster:", cluster_daily)

In [None]:
# Take input
closing_price_weekly = float(input("Enter Daily closing price: "))

# Predict cluster
cluster_weekly = predict_cluster(closing_price_weekly)

# Print result  
print(f"Closing price {closing_price_weekly} belongs to cluster:", cluster_weekly)

In [None]:
# Take input
closing_price_monthly = float(input("Enter Daily closing price: "))

# Predict cluster
cluster_monthly = predict_cluster(closing_price_monthly)

# Print result  
print(f"Closing price {closing_price_monthly} belongs to cluster:", cluster_monthly)