In [1]:
import numpy as np
import pandas as pd
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
from collections import Counter
from imblearn.under_sampling import RandomUnderSampler

def compute_EMA(closePs, backW):
    # Compute Exponential Moving Average (EMA)
    return pd.Series(closePs).ewm(span=backW, min_periods=backW).mean()

def compute_thresholds(percentage):
    # Compute thresholds a and b
    a = np.percentile(percentage, 85)
    b = np.percentile(percentage, 99.7)
    return a, b

def labeling_algorithm(closePs, backW, forW, a, b, f):
    # Compute EMA using backW

    closePs= compute_EMA(closePs, backW)

    # Initialize labels array to store buy/sell/hold signals
    labels = []
    
    for i in range(len(closePs) - forW):
        # Compute return of close prices
        R = ((1 - f) * closePs[i + forW] - (1 + f) * closePs[i]) / closePs[i]
        
        # Check if return falls within thresholds a and b
        if a < abs(R) < b:
            if R > 0:
                labels.append('Buy')
            else:
                labels.append('Sell')
        else:
            labels.append('Hold')
    
    return labels


def profitability_evaluation(labels_predicted):
    # Simulate a profitability metric based on predicted labels
    # For example, let's assume a simple metric based on consecutive 'Buy' signals
    consecutive_buys = 0
    total_trades = 0
    
    for label in labels_predicted:
        if label == 'Buy':
            consecutive_buys += 1
            total_trades += 1
        else:
            total_trades += 1
    
    # Calculate profitability metric based on consecutive 'Buy' signals
    if total_trades > 0:
        profitability_metric = consecutive_buys / total_trades
    else:
        profitability_metric = 0.0  # Default to 0 if no trades
    
    return profitability_metric

def grid_search_for_best_scheme(closePs, available_backW, available_forW, a, b):
    best_profitability = -float('inf')
    best_backW = None
    best_forW = None
    best_scheme = None
    
    for backW in available_backW:
        for forW in available_forW:
            b += forW * b * 0.1  # Increment b based on forward window size
            
            # Perform labeling using the current backW, forW, a, b
            labels = labeling_algorithm(closePs, backW, forW, a, b, f=0.005)
            
            # Calculate trade counts based on the labels
            buy_trades = labels.count('Buy')
            sell_trades = labels.count('Sell')
            hold_trades = labels.count('Hold')
            total_trades = len(labels)
            
            # Print trade counts
            print(f"Number of Buy Trades: {buy_trades}")
            print(f"Number of Sell Trades: {sell_trades}")
            print(f"Number of Hold Trades: {hold_trades}")
            print(f"Total Number of Trades: {total_trades}")
            
            # Convert labels to numerical format (e.g., 'Buy' -> 1, 'Sell' -> -1, 'Hold' -> 0)
            labels_numeric = np.array([1 if label == 'Buy' else -1 if label == 'Sell' else 0 for label in labels])
            
            # Reshape data for MLP training
            features = labels_numeric[:].reshape(-1, 1)  # Reshaping the feature array
           
            # Perform random undersampling to balance the dataset
            counter = Counter(labels_numeric)
            majority_class = max(counter, key=counter.get)
            minority_classes = [cls for cls in counter if cls != majority_class]
            # Ensure at least two minority classes for sampling strategy
            if len(minority_classes) >= 2:

                sampling_strategy = {
                    majority_class: counter[minority_classes[0]],
                    minority_classes[0]: counter[minority_classes[0]],
                    minority_classes[1]: counter[minority_classes[1]]
                }  # Set sampling strategy
       
                sampler = RandomUnderSampler(sampling_strategy=sampling_strategy, random_state=42)
                features_resampled, labels_resampled = sampler.fit_resample(features, labels_numeric)
                
                # Create an MLP classifier
                mlp = MLPClassifier(hidden_layer_sizes=(128, 64, 32), activation='relu', solver='adam', random_state=42)
                
                # Train the MLP classifier
                mlp.fit(features_resampled[:-1], labels_resampled[1:])  # Predict the next label based on the current label
                
                # Predict labels using the trained MLP model
                labels_predicted = mlp.predict(features_resampled)
                
                # Evaluate profitability based on predicted labels
                profitability_metric = profitability_evaluation(labels_predicted)
                
                # Update best scheme if the current scheme is more profitable
                if profitability_metric > best_profitability:
                    best_profitability = profitability_metric
                    best_backW = backW
                    best_forW = forW
                    best_scheme = labels

    return best_backW, best_forW, best_scheme



currency_data = pd.read_csv('backtest.csv')
# Example usage
close_prices = currency_data['close']
open_close_percentage_change=(currency_data['close']-currency_data['open'])/currency_data['open']
a, b = compute_thresholds(open_close_percentage_change)
print(a,b)
available_backW = [1, 2, 3, 4, 5]
available_forW = [1, 2, 3, 4, 5]

best_backW, best_forW, best_scheme = grid_search_for_best_scheme(close_prices, available_backW, available_forW,a,b)
print(f"Best Backward Window: {best_backW}, Best Forward Window: {best_forW}")
print(f"Best Scheme Labels: {best_scheme}")


0.007276067709273086 0.03564730643611374
Number of Buy Trades: 251
Number of Sell Trades: 5733
Number of Hold Trades: 2758
Total Number of Trades: 8742
Number of Buy Trades: 575
Number of Sell Trades: 5219
Number of Hold Trades: 2947
Total Number of Trades: 8741
Number of Buy Trades: 849
Number of Sell Trades: 5042
Number of Hold Trades: 2849
Total Number of Trades: 8740
Number of Buy Trades: 1129
Number of Sell Trades: 4964
Number of Hold Trades: 2646
Total Number of Trades: 8739
Number of Buy Trades: 1349
Number of Sell Trades: 4898
Number of Hold Trades: 2491
Total Number of Trades: 8738
Number of Buy Trades: 95
Number of Sell Trades: 6344
Number of Hold Trades: 2303
Total Number of Trades: 8742
Number of Buy Trades: 403
Number of Sell Trades: 5553
Number of Hold Trades: 2785
Total Number of Trades: 8741
Number of Buy Trades: 695
Number of Sell Trades: 5236
Number of Hold Trades: 2809
Total Number of Trades: 8740
Number of Buy Trades: 976
Number of Sell Trades: 5064
Number of Hold T

In [3]:
# create a new csv file with signals as additional column

new_data = pd.DataFrame()
new_data['date'] = currency_data['date']
new_data['open'] = currency_data['open']
new_data['high'] = currency_data['high']    
new_data['low'] = currency_data['low']
new_data['close'] = currency_data['close']
new_data['volume'] = currency_data['volume']

# signals column contains only 1,-1 and 0 for buy, sell and hold respectively , in between 1 and -1 marks with zero

# ... (your existing code)

# Add the following code after the existing code
new_data['signals'] = np.zeros(len(new_data))  # Initialize 'signals' column with zeros

# Convert 'Buy' signals to 1, 'Sell' signals to -1, and leave 'Hold' signals as 0
buy_indices = np.where(np.array(best_scheme) == 'Buy')[0]
sell_indices = np.where(np.array(best_scheme) == 'Sell')[0]

new_data.loc[buy_indices, 'signals'] = 1
new_data.loc[sell_indices, 'signals'] = -1

# Save the new DataFrame to a new CSV file
new_data.to_csv('new_backtest.csv', index=False)

print("New CSV file with signals column created successfully.")




New CSV file with signals column created successfully.


In [9]:
# Load your DataFrame
# For example: new_data = pd.read_csv('new_backtest.csv')

# Function to perform the required transformation
def transform_signals(signals):
    result = signals.copy()
    
    for i in range(1, len(signals)-1):
        if signals[i-1] == 1 and signals[i] == -1:
            # If the sequence is misaligned (1 followed by -1), replace with 0
            result[i] = 0
        elif signals[i] == 1 and signals[i+1] not in [0, -1]:
            # If after 1, a value other than -1 or 0 is present, replace with 0
            result[i+1] = 0
        elif signals[i] == -1 and signals[i+1] not in [0, 1]:
            # If after -1, a value other than 1 or 0 is present, replace with 0
            result[i+1] = 0
    
    return result

# Apply the transformation to the 'signals' column
new_data['signals'] = transform_signals(new_data['signals'])

new_data.head(50)


Unnamed: 0,date,open,high,low,close,volume,signals
0,11-10-20 0:00,11293.22,11321.05,11270.7,11300.69,1273.872963,0.0
1,11-10-20 1:00,11300.7,11395.8,11298.07,11365.34,1613.472173,-1.0
2,11-10-20 2:00,11365.18,11373.86,11345.82,11352.84,580.746781,0.0
3,11-10-20 3:00,11352.84,11352.84,11307.71,11332.0,730.86349,0.0
4,11-10-20 4:00,11332.01,11341.11,11301.0,11322.31,693.252077,0.0
5,11-10-20 5:00,11322.23,11343.3,11303.77,11337.94,979.835871,0.0
6,11-10-20 6:00,11337.95,11373.97,11335.72,11359.25,821.711579,0.0
7,11-10-20 7:00,11359.0,11369.51,11342.23,11350.01,800.969694,0.0
8,11-10-20 8:00,11350.01,11367.75,11337.89,11346.1,1137.049017,0.0
9,11-10-20 9:00,11346.1,11355.92,11327.05,11354.05,1026.07995,0.0


In [10]:
# Load your DataFrame
# For example: new_data = pd.read_csv('new_backtest.csv')

# Function to ensure balance in the signals column
def ensure_balance(signals):
    result = signals.copy()
    
    for i in range(1, len(signals) - 1):
        if signals[i - 1] == 1 and signals[i] == 0 and signals[i + 1] != -1:
            result[i] = -1
        elif signals[i - 1] == -1 and signals[i] == 0 and signals[i + 1] != 1:
            result[i] = 1
    
    return result

# Apply the transformation to ensure balance in the 'signals' column
new_data['signals'] = ensure_balance(new_data['signals'])

# Print the modified DataFrame
new_data.head(50)


Unnamed: 0,date,open,high,low,close,volume,signals
0,11-10-20 0:00,11293.22,11321.05,11270.7,11300.69,1273.872963,0.0
1,11-10-20 1:00,11300.7,11395.8,11298.07,11365.34,1613.472173,-1.0
2,11-10-20 2:00,11365.18,11373.86,11345.82,11352.84,580.746781,1.0
3,11-10-20 3:00,11352.84,11352.84,11307.71,11332.0,730.86349,0.0
4,11-10-20 4:00,11332.01,11341.11,11301.0,11322.31,693.252077,0.0
5,11-10-20 5:00,11322.23,11343.3,11303.77,11337.94,979.835871,0.0
6,11-10-20 6:00,11337.95,11373.97,11335.72,11359.25,821.711579,0.0
7,11-10-20 7:00,11359.0,11369.51,11342.23,11350.01,800.969694,0.0
8,11-10-20 8:00,11350.01,11367.75,11337.89,11346.1,1137.049017,0.0
9,11-10-20 9:00,11346.1,11355.92,11327.05,11354.05,1026.07995,0.0


In [11]:
new_data.to_csv('results.csv', index=False)