In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import classification_report
from sklearn.utils import class_weight
from imblearn.over_sampling import SMOTE
from imblearn.pipeline import make_pipeline

# Load the data
df = pd.read_csv('3-2018_2020Data.csv')

# Adjusting to calculate 3-day Moving Averages for ClosePrice and Volume
df['3day_MA_Close'] = df['ClosePrice'].rolling(window=3).mean().shift(-1)
df['3day_MA_Volume'] = df['Volume'].rolling(window=3).mean().shift(-1)

# Drop rows with NaN values that result from the rolling mean calculation
df.dropna(inplace=True)

X = df[['3day_MA_Close', '3day_MA_Volume']].values[:-1]  # Features: 3-day MAs
y_binary = np.where(df['ClosePrice'].diff() > 0, 1, 0)[1:]  # Target: 1 if price increased, 0 otherwise

# Split the data
X_train_clf, X_test_clf, y_train_clf, y_test_clf = train_test_split(X, y_binary, test_size=0.2, random_state=42)

# Pipeline for SMOTE + Scaling + SVC
pipeline = make_pipeline(
    SMOTE(random_state=42),
    StandardScaler(),
    SVC(random_state=42)
)

# GridSearchCV for hyperparameter tuning
param_grid = {
    'svc__C': [0.1, 1, 10],
    'svc__gamma': ['scale', 'auto'],
    'svc__kernel': ['rbf', 'linear']
}

grid_search = GridSearchCV(pipeline, param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train_clf, y_train_clf)

# Display the best parameters found and the best cross-validated score
print("Best parameters found: ", grid_search.best_params_)
print("Best cross-validated score: ", grid_search.best_score_)

# Predictions and Evaluation
y_pred_clf = grid_search.predict(X_test_clf)
print(classification_report(y_test_clf, y_pred_clf))
