In [1]:
import yfinance as yf
import pandas as pd
from indicator import TechnicalIndicators
from trade import TradingSimulator
import pandas as pd
import warnings
warnings.filterwarnings('ignore')
import numpy as np
import random
import matplotlib.pyplot as plt
from itertools import combinations
from functools import reduce
from operator import mul

In [2]:
# Fetch the data from Yahoo Finance and return it as a dataframe
def fetch_ohlcv_data(ticker, start_date, end_date):
    data = yf.download(ticker, start=start_date, end=end_date)
    data.reset_index(inplace=True)
    return data

In [3]:
ticker = 'IOO.AX'  # SharesGlobal 100 ETF (IOO) listed on the Australian market
start_date = '2021-01-01'
end_date = '2023-05-06'

# Fetch the data
data = fetch_ohlcv_data(ticker, start_date, end_date)

[*********************100%***********************]  1 of 1 completed


In [4]:
indicators = TechnicalIndicators(data)

# Add SMA and EMA for the close price with certain window
window_list = [5, 10, 20, 50]
for window_size in window_list:
    indicators.add_sma('Close', window_size)
    indicators.add_ema('Close', window_size)
    
# Add other technical indicator with some parameters
indicators.add_rsi('Close', 14)
indicators.add_stoch('High', 'Low', 'Close', 9, 6)
# indicators.add_stochrsi('Close', 14, 3, 3)
indicators.add_macd('Close', 12, 26)
indicators.add_adx('High', 'Low', 'Close', 14)
# indicators.add_williamsr('High', 'Low', 'Close', 14)
indicators.add_cci('High', 'Low', 'Close', 14)
# indicators.add_atr('High', 'Low', 'Close', 14)
indicators.add_roc('Close', 14)
indicators.add_bull_bear_power('High', 'Low', 'Close', 13)
indicators.data.tail(5)

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,sma_5,ema_5,sma_10,...,ema_20,sma_50,ema_50,rsi_(14),"stoch_(9,6)","macd_(12,26)",adx_(14),cci_(14),roc_(14),bull/bear power_(13)
585,2023-05-01,109.360001,109.360001,108.940002,109.010002,109.010002,31654,107.992001,108.105126,106.981001,...,105.604097,101.7336,102.644587,73.954355,90.048991,1.962883,31.627849,164.376624,4.525843,5.019914
586,2023-05-02,108.980003,108.980003,107.389999,107.389999,107.389999,31082,108.178001,107.86675,107.098,...,105.774183,101.8896,102.830681,62.62095,84.472893,1.864388,31.350915,75.991719,2.00418,2.875641
587,2023-05-03,107.389999,107.68,107.32,107.68,107.68,35506,108.104001,107.8045,107.302,...,105.955689,102.0674,103.020851,63.693576,78.470627,1.789107,31.024728,45.808239,2.591465,1.239118
588,2023-05-04,107.68,107.68,106.760002,106.900002,106.900002,35800,108.028001,107.503001,107.417001,...,106.045624,102.238,103.172974,58.805714,67.681166,1.647516,30.167701,10.368336,2.247727,0.673532
589,2023-05-05,106.800003,106.800003,106.080002,106.25,106.25,34866,107.446001,107.085334,107.422001,...,106.065088,102.4044,103.293642,55.016726,55.863839,1.465955,28.741989,-48.291144,1.113436,-0.705541


In [42]:
def create_population(indicator_list, population_size, min_indicators=1, max_indicators=10):
    population = []
    # generate population by random indicators
    print("Init the population:")
    for _ in range(population_size):
        n_indicators = random.randint(min_indicators, max_indicators)
        individual = random.sample(indicator_list, n_indicators)
        population.append(individual)
        print(individual)
    print()
    return population

In [26]:
# calculate fitness of each individual
def calculate_fitness(individual, indicators, start_time, end_time):
    simulator = TradingSimulator(indicators, start_time, end_time, individual)
    fitness = 1000 - simulator.calculate_return()
    # if fitness is negative, set it to 0
    return max(fitness, 0)

In [27]:
# calculate the probability of each individual and select them as the parents of next generation
def roulette_wheel_selection(population, fitness):
    total_fitness = sum(fitness)
    if total_fitness == 0:
        raise ValueError("The total fitness is 0, please try to run the program again.")
    probabilities = [f / total_fitness for f in fitness]
    selected_indices = np.random.choice(range(len(population)), size=2, replace=False, p=probabilities)
    return [population[i] for i in selected_indices]

In [28]:
# crossover the parents to generate children
def crossover(parents):
    parent1, parent2 = parents
    common_elements = list(set(parent1).intersection(parent2))
    unique_elements1 = list(set(parent1).difference(parent2))
    unique_elements2 = list(set(parent2).difference(parent1))

    child1 = common_elements + random.sample(unique_elements1, len(unique_elements1) // 2)
    child2 = common_elements + random.sample(unique_elements2, len(unique_elements2) // 2)
    
    # 如果子代为空，从父代中各随机选择技术指标添加到子代中
    if len(child1) == 0:
        if len(common_elements):
            child1.append(random.choice(common_elements))
        if len(unique_elements1):
            child1.append(random.choice(unique_elements1))
        if len(unique_elements2):
            child1.append(random.choice(unique_elements2))
    if len(child2) == 0:
        if len(common_elements):
            child2.append(random.choice(common_elements))
        if len(unique_elements1):
            child2.append(random.choice(unique_elements1))
        if len(unique_elements2):
            child2.append(random.choice(unique_elements2))

    return child1, child2

In [29]:
# mutation
def mutation(individual, indicator_list, mutation_rate=0.2):
    for i in range(len(individual)):
        if random.random() < mutation_rate:
            individual[i] = random.choice(list(set(indicator_list).difference(individual)))
    return individual

In [58]:
# Parameters
indicator_list = ['sma_5', 'ema_5', 'sma_10', 'ema_10', 'sma_20', 'ema_20', 'sma_50', 'ema_50', 'rsi_(14)', 'stoch_(9,6)', 'macd_(12,26)', 'adx_(14)', 'cci_(14)', 'roc_(14)', 'bull/bear power_(13)']
train_start_time = random.randint(0, 575 - 300)
train_end_time = train_start_time + random.randint(100, 300)
test_start_time = train_end_time + 1
test_end_time = test_start_time + 14

print("Start_time:", train_start_time)
print("End_time:", train_end_time)

Start_time: 259
End_time: 521


In [59]:
def genetic_algorithm(indicators, indicator_list, start_time, end_time, population_size=100, 
                      generations=5, mutation_rate=0.2):
    # initialize the population
    population = create_population(indicator_list, population_size)

    # run the algorithm for several generations
    for generation in range(generations):
        # calculate the fitness of each individual
        fitness = [calculate_fitness(ind, indicators, start_time, end_time) for ind in population]
        new_population = []

        # print the top 10 individuals and their fitness
        print(f"Generation {generation + 1}: Top 10 individuals and their fitness:")
        top_individuals = sorted(zip(population, fitness), key=lambda x: x[1], reverse=True)[:10]
        for individual, fit in top_individuals:
            print(f"Individual {individual} - Fitness: {fit:.2f}")

        # generate new population by crossover and mutation
        while len(new_population) < POPULATION_SIZE:
            parents = roulette_wheel_selection(population, fitness)
            child1, child2 = crossover(parents)
            child1 = mutation(child1, indicator_list, mutation_rate)
            child2 = mutation(child2, indicator_list, mutation_rate)
            new_population.extend([child1, child2])

        population = new_population
    
    # return the top 10 individuals
    return top_individuals

In [60]:
# Run the genetic algorithm
top_individuals = genetic_algorithm(indicators, indicator_list, train_start_time, train_end_time)

Init the population:
['ema_50', 'sma_20', 'sma_10', 'adx_(14)', 'cci_(14)', 'rsi_(14)', 'ema_10', 'bull/bear power_(13)', 'sma_5']
['ema_50', 'macd_(12,26)', 'sma_20', 'roc_(14)']
['sma_20', 'rsi_(14)', 'sma_5', 'ema_20', 'ema_10', 'bull/bear power_(13)', 'stoch_(9,6)', 'sma_50', 'ema_50']
['sma_20', 'roc_(14)', 'rsi_(14)']
['stoch_(9,6)', 'macd_(12,26)']
['ema_20', 'cci_(14)', 'bull/bear power_(13)', 'sma_10']
['sma_50', 'ema_5', 'roc_(14)', 'stoch_(9,6)', 'macd_(12,26)', 'sma_20', 'ema_20']
['sma_10', 'roc_(14)', 'macd_(12,26)', 'ema_5', 'ema_10', 'sma_20', 'cci_(14)', 'sma_50']
['sma_50', 'roc_(14)', 'sma_5', 'adx_(14)']
['cci_(14)', 'sma_10', 'stoch_(9,6)', 'sma_5', 'ema_50', 'bull/bear power_(13)', 'roc_(14)', 'ema_5']
['ema_50', 'sma_5', 'macd_(12,26)', 'sma_20', 'stoch_(9,6)', 'ema_10', 'adx_(14)', 'ema_20', 'cci_(14)']
['cci_(14)', 'sma_5', 'macd_(12,26)', 'bull/bear power_(13)', 'ema_20', 'stoch_(9,6)', 'adx_(14)', 'roc_(14)']
['bull/bear power_(13)', 'sma_20', 'sma_10', 'sma_

Generation 3: Top 10 individuals and their fitness:
Individual ['stoch_(9,6)', 'ema_5'] - Fitness: 393.44
Individual ['stoch_(9,6)', 'rsi_(14)', 'ema_5'] - Fitness: 393.44
Individual ['ema_20', 'cci_(14)', 'ema_10', 'roc_(14)'] - Fitness: 392.39
Individual ['roc_(14)'] - Fitness: 382.83
Individual ['stoch_(9,6)', 'sma_20'] - Fitness: 377.98
Individual ['stoch_(9,6)', 'rsi_(14)', 'sma_20'] - Fitness: 377.98
Individual ['sma_5'] - Fitness: 377.87
Individual ['sma_20', 'ema_20', 'bull/bear power_(13)', 'roc_(14)'] - Fitness: 374.94
Individual ['rsi_(14)', 'stoch_(9,6)', 'ema_10', 'ema_20'] - Fitness: 365.18
Individual ['stoch_(9,6)', 'sma_10', 'rsi_(14)', 'sma_20', 'sma_50'] - Fitness: 346.96
Generation 4: Top 10 individuals and their fitness:
Individual ['ema_20'] - Fitness: 411.77
Individual ['ema_20'] - Fitness: 411.77
Individual ['bull/bear power_(13)', 'ema_10'] - Fitness: 384.45
Individual ['roc_(14)'] - Fitness: 382.83
Individual ['roc_(14)'] - Fitness: 382.83
Individual ['sma_20',

In [61]:
def test_top_individuals(top_individuals, indicators, start_time, end_time):
    fitness_values = [calculate_fitness(ind, indicators, start_time, end_time) for ind,fit in top_individuals]
    return fitness_values

In [62]:
# Test the top individuals
test_results = test_top_individuals(top_individuals, indicators, test_start_time, test_end_time)
# Print the results
for i, individual in enumerate(top_individuals):
    print(f"参数组合 {i + 1}: {individual}")
    print(f"测试集评分: {test_results[i]}")

参数组合 1: (['ema_20'], 411.7699432373047)
测试集评分: 0
参数组合 2: (['ema_5'], 393.44002532958984)
测试集评分: 0
参数组合 3: (['ema_5'], 393.44002532958984)
测试集评分: 0
参数组合 4: (['cci_(14)', 'ema_5'], 393.44002532958984)
测试集评分: 0
参数组合 5: (['bull/bear power_(13)'], 387.47984313964844)
测试集评分: 0
参数组合 6: (['sma_20'], 377.9800338745117)
测试集评分: 0
参数组合 7: (['sma_20'], 377.9800338745117)
测试集评分: 0
参数组合 8: (['sma_5'], 377.87000274658203)
测试集评分: 0
参数组合 9: (['bull/bear power_(13)', 'sma_10'], 368.24991607666016)
测试集评分: 0
参数组合 10: (['ema_20', 'sma_10'], 365.75)
测试集评分: 0
