# Forex Trading: MACD Strategy Enhancement with Machine Learning

This notebook demonstrates a complete hybrid workflow:
1. **Genetic Algorithm (GA)**: Optimizes MACD parameters (`Fast`, `Slow`, `Signal`) to find a profitable base strategy.
2. **Machine Learning (Random Forest)**: Filters signals based on a snapshot of indicators at the entry point.
3. **Deep Learning (LSTM)**: Filters signals by analyzing the *sequence* of price action leading up to the entry point.

---

## 1. Setup & Load Data

In [None]:
import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import ta
import random
from deap import base, creator, tools, algorithms
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report
import tensorflow as tf

# Set Seeds for Reproducibility
np.random.seed(42)
tf.random.set_seed(42)
random.seed(42)

# Reload custom modules if edited
%load_ext autoreload
%autoreload 2

from data_loader import load_data
from strategy import MACDStrategy

# Load Data
df = load_data('EURUSD=X', '2010-01-01', '2024-12-31')
df['Close'].plot(figsize=(12,6), title='EURUSD Price')
plt.show()

## 2. Genetic Algorithm Optimization
We use **DEAP** (Distributed Evolutionary Algorithms in Python) to evolve the MACD parameters.
- **Objective**: Maximize Total Return.
- **Parameters**: Fast Period, Slow Period, Signal Period.
- **Constraint**: Fast < Slow.

In [None]:
# Initialize Strategy Class
strategy = MACDStrategy(df)

# --- GA Setup ---
if hasattr(creator, "FitnessMax"):
    del creator.FitnessMax
if hasattr(creator, "Individual"):
    del creator.Individual

creator.create("FitnessMax", base.Fitness, weights=(1.0,))
creator.create("Individual", list, fitness=creator.FitnessMax)

toolbox = base.Toolbox()
toolbox.register("attr_fast", random.randint, 5, 50)
toolbox.register("attr_slow", random.randint, 20, 100)
toolbox.register("attr_signal", random.randint, 5, 50)
toolbox.register("individual", tools.initCycle, creator.Individual, 
                 (toolbox.attr_fast, toolbox.attr_slow, toolbox.attr_signal), n=1)
toolbox.register("population", tools.initRepeat, list, toolbox.individual)

def evaluate(individual):
    fast, slow, sig = individual
    if fast >= slow: return -9999,
    return strategy.evaluate(fast, slow, sig),

toolbox.register("evaluate", evaluate)
toolbox.register("mate", tools.cxTwoPoint)
toolbox.register("mutate", tools.mutUniformInt, low=[5, 20, 5], up=[50, 100, 50], indpb=0.2)
toolbox.register("select", tools.selTournament, tournsize=3)

# Run GA
print("Running GA Optimization...")
pop = toolbox.population(n=50)
result_pop, log = algorithms.eaSimple(pop, toolbox, cxpb=0.5, mutpb=0.2, ngen=10, verbose=False)

best_ind = tools.selBest(result_pop, 1)[0]
print(f"Best Params Found: Fast={best_ind[0]}, Slow={best_ind[1]}, Signal={best_ind[2]}")
print(f"Best Return: {best_ind.fitness.values[0]:.4f}")

BEST_FAST = best_ind[0]
BEST_SLOW = best_ind[1]
BEST_SIGNAL = best_ind[2]

## 3. Generate Signals & Features
With the optimized parameters, we generate the trade signals and compute technical indicators to be used as features for ML models.

In [None]:
# Generate Signals with Best Params
df_signals = strategy.generate_signals(BEST_FAST, BEST_SLOW, BEST_SIGNAL)

# Add Technical Features for ML
temp_df = df_signals.copy()
temp_df['rsi'] = ta.momentum.rsi(temp_df['Close'], window=14)
temp_df['atr'] = ta.volatility.average_true_range(temp_df['High'], temp_df['Low'], temp_df['Close'])
temp_df['adx'] = ta.trend.adx(temp_df['High'], temp_df['Low'], temp_df['Close'])

# MACD Specific Features
temp_df['macd_norm'] = temp_df['MACD_Line'] / temp_df['Close']
temp_df['signal_norm'] = temp_df['Signal_Line'] / temp_df['Close']
temp_df['macd_slope'] = temp_df['MACD_Line'].diff()

temp_df.dropna(inplace=True)

# Identify Trade Entry Points (Signal Change)
temp_df['Prev_Signal'] = temp_df['Signal'].shift(1)
trades_df = temp_df[temp_df['Signal'] != temp_df['Prev_Signal']].copy()

# Label Logic: Check if the trade resulted in profit
def get_trade_result(idx, signal, full_df):
    entry_price = full_df.loc[idx, 'Close']
    future_df = full_df.loc[idx:]
    if len(future_df) < 2: return 0
    
    # Find Exit
    future_signals = future_df[future_df['Signal'] != signal]
    if len(future_signals) == 0: return 0
    
    exit_price = future_signals.iloc[0]['Close']
    
    if signal == 1: return (exit_price - entry_price) / entry_price
    else: return (entry_price - exit_price) / entry_price

labels = []
returns = []
for idx, row in trades_df.iterrows():
    ret = get_trade_result(idx, row['Signal'], temp_df)
    returns.append(ret)
    labels.append(1 if ret > 0 else 0)

trades_df['Trade_Return'] = returns
trades_df['Label'] = labels

print(f"Total Trades: {len(trades_df)}")
print(f"Profitable: {sum(labels)} ({(sum(labels)/len(labels))*100:.2f}%)")

## 4. Machine Learning Filter (Random Forest)
**Concept**: Use a Random Forest Classifier to analyzing the state of indicators *at the moment of entry*.
- **Input**: RSI, ATR, ADX, MACD Values (Scalar values).
- **Output**: Probability of Profit (0 or 1).

In [None]:
features = ['rsi', 'atr', 'adx', 'macd_norm', 'signal_norm', 'macd_slope']
X = trades_df[features]
y = trades_df['Label']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, shuffle=False)

rf = RandomForestClassifier(n_estimators=100, max_depth=5, random_state=42)
rf.fit(X_train, y_train)

preds = rf.predict(X_test)
print("Random Forest Results:")
print(classification_report(y_test, preds))

importances = pd.Series(rf.feature_importances_, index=features).sort_values(ascending=False)
importances.plot(kind='bar', title='RF Feature Importance')
plt.show()

## 5. Deep Learning Filter (LSTM)
**Concept**: LSTM (Long Short-Term Memory) networks are designed for time-series.
Instead of looking at a single snapshot, we look at the **sequence of data** leading up to the trade.
- **Input**: A sequence of the last 10 candles (RSI, Prices, etc.).
- **Output**: Probability of Profit.

In [None]:
# 1. Prepare Sequences
SEQ_LEN = 10
scaler = StandardScaler()

# Scale the features inside the main dataframe first
df_scaled = temp_df.copy()
df_scaled[features] = scaler.fit_transform(temp_df[features])

# Map datetime index to integer index for easier slicing
feature_data = df_scaled[features].values
idx_map = {idx: i for i, idx in enumerate(df_scaled.index)}

X_lstm = []
y_lstm = []

# Iterate over the identified trades in trades_df
for time_idx in trades_df.index:
    if time_idx not in idx_map: continue
    
    i = idx_map[time_idx]
    if i < SEQ_LEN: continue
    
    # Take sequence [i-SEQ_LEN : i]
    seq = feature_data[i-SEQ_LEN+1 : i+1]
    label = trades_df.loc[time_idx, 'Label']
    
    if len(seq) == SEQ_LEN:
        X_lstm.append(seq)
        y_lstm.append(label)

X_lstm = np.array(X_lstm)
y_lstm = np.array(y_lstm)

print(f"LSTM Data Shape: {X_lstm.shape}")

# 2. Split Data
X_train_lstm, X_test_lstm, y_train_lstm, y_test_lstm = train_test_split(X_lstm, y_lstm, test_size=0.3, shuffle=False)

# 3. Build Model
model = tf.keras.Sequential([
    tf.keras.layers.LSTM(32, input_shape=(SEQ_LEN, len(features)), return_sequences=False),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(16, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# 4. Train
history = model.fit(X_train_lstm, y_train_lstm, epochs=50, batch_size=8, validation_split=0.2, verbose=0)

# Plot Training History
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Val Loss')
plt.legend()
plt.title('LSTM Training Loss')
plt.show()

# 5. Evaluate
preds_prob = model.predict(X_test_lstm)
preds_lstm = (preds_prob > 0.5).astype(int)

print("LSTM Results:")
print(classification_report(y_test_lstm, preds_lstm, zero_division=0))