In [11]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [57]:
import pandas as pd

from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.pipeline import Pipeline

df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/quantvision_financial_dataset_200.csv')
num_features = ['lookback_days','high_volatility','trend_continuation', 'technical_score', 'edge_density',
                'slope_strength', 'candlestick_variance', 'pattern_symmetry']
cat_features = ['asset_type', 'market_regime']
x = df.drop('future_trend', axis=1)
y = df['future_trend']

preprocessing = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), num_features),
        ('cat', OneHotEncoder(drop = "first"), cat_features)
    ]
)

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=42,stratify = y)

logmodel = log_model=Pipeline(steps=[
    ("preprocessor", preprocessing),
     ("classifier", LogisticRegression(
         random_state=7,
         max_iter=1000
         ))
     ])

logmodel.fit(x_train, y_train)
y_pred_logmodel = logmodel.predict(x_test)
print("\n======= Classification Report for logmodel =======\n", classification_report(y_test, y_pred_logmodel))
print( "\nConfusion mstrix :\n",confusion_matrix(y_test, y_pred_logmodel))

nn_model=Pipeline(steps=[
    ("preprocessor", preprocessing),
    ("classifier", MLPClassifier(
        hidden_layer_sizes=(64, 32),
        activation="relu",
        solver="adam",
        max_iter=1000,
        random_state=7
    ))
])
nn_model.fit(x_train, y_train)
y_pred_nn_model = nn_model.predict(x_test)
print("\n======= Classification Report for nn_model =======\n", classification_report(y_test, y_pred_nn_model))
print( "\nConfusion mstrix :\n",confusion_matrix(y_test, y_pred_nn_model))


               precision    recall  f1-score   support

           0       0.67      0.50      0.57         4
           1       0.96      0.98      0.97        56

    accuracy                           0.95        60
   macro avg       0.82      0.74      0.77        60
weighted avg       0.95      0.95      0.95        60


Confusion mstrix :
 [[ 2  2]
 [ 1 55]]

               precision    recall  f1-score   support

           0       0.75      0.75      0.75         4
           1       0.98      0.98      0.98        56

    accuracy                           0.97        60
   macro avg       0.87      0.87      0.87        60
weighted avg       0.97      0.97      0.97        60


Confusion mstrix :
 [[ 3  1]
 [ 1 55]]


In [58]:
# Why Logistic Regression performs reasonably good or bad
#due to it's linear nature the model might not be able to fit certain data points on a straight line.

# Why Neural Network performs better or worse
#neural networks work in a non linear manner.It identifies trends on various factor and works like a decision tree.

#The effect of volatility on predictions
#high volatility means high engy in the market which can either cause prices to soar high or a caos in the price trends,
# logistic regression struggles with this as there will many outliers in its patter which will result in bad prediction.

# The role of trend continuation
#helps in increasing accuracy for both the models as the classification of datapoints are predictible

# Situations where the model fails and why
#models fails in situations of high volatility with causes and unstable market price,this makes it difficult for the models to make clear trends.
# the logistic regrssion model might not be able to fit a straight line through the messy datapoints
# whereas NN might overfit(memorize) certain patters in the data (like high volatility +high technical score)
# which will not happen in the future leading to poor predictions.