In [1]:
import pandas as pd
import time
import os
import threading
import shutil

from tqdm import tqdm
from binance.client import Client

client = Client()

In [None]:
source_directory = "/home/tsereir/tsereir/projets/algorithmic_trading/data/worstPerformers"
destination_directory = "/home/tsereir/tsereir/projets/algorithmic_trading/data/worstPerformers"

for filename in os.listdir(source_directory):
    if filename.startswith("top_performers_") and filename.endswith('.parquet'):
        date_time_part = filename.replace("top_performers_", "", 1)

        source_file = os.path.join(source_directory, filename)
        
        destination_file = os.path.join(destination_directory, date_time_part)

        shutil.move(source_file, destination_file)
        print(f"Fichier {filename} déplacé et renommé en {date_time_part}")

### All tradable assets in this dictionnary

In [3]:
info = client.get_exchange_info()

In [9]:
symbols = [x['symbol'] for x in info['symbols']]

In [16]:
len(symbols)

2766

In [15]:
#exclude leveraged tokens
exclude = ['UP', 'DOWN', 'BEAR', 'BULL']
non_lev = [ symbol for symbol in symbols if all(excludes not in symbol for excludes in exclude)]

In [14]:
len(non_lev)

2697

### Keep only usdt pair

In [17]:
relevant = [symbol for symbol in non_lev if symbol.endswith('USDT')]

In [19]:
len(relevant)

471

In [20]:
klines = {}

### Get 1 minute candles for each symbol

In [21]:
for symbol in tqdm(relevant) : 
    klines[symbol] = client.get_historical_klines(symbol, '1m', '1 hour ago UTC')

100%|██████████| 471/471 [04:22<00:00,  1.80it/s]


In [25]:
(pd.DataFrame(klines['BTCUSDT'])[4].astype(float).pct_change() + 1 ).prod() -1 #4 column is the close

0.00021125056342108373

In [26]:
returns, symbols = [], []

for symbol in relevant:
    if len(klines[symbol]) > 0 : 
        cumret = (pd.DataFrame(klines[symbol])[4].astype(float).pct_change() + 1 ).prod() - 1
        returns.append(cumret)
        symbols.append(symbol)

In [27]:
retdf = pd.DataFrame(returns, index=symbols, columns=['ret'])

In [28]:
retdf.ret.nlargest(10)

API3USDT     0.043275
OAXUSDT      0.034763
ZECUSDT      0.024750
DYDXUSDT     0.016505
MOVRUSDT     0.015326
FTTUSDT      0.014781
RAYUSDT      0.012847
BTTCUSDT     0.011628
MAGICUSDT    0.010071
AGLDUSDT     0.008056
Name: ret, dtype: float64

### Function that return the top performers

In [34]:
import pandas as pd

def get_top_performer(client, interval, duration, n=10):
    info = client.get_exchange_info()
    symbols = [x['symbol'] for x in info['symbols']]
    
    #exclure les tokens à effet de levier
    exclude = ['UP', 'DOWN', 'BEAR', 'BULL']
    non_lev = [symbol for symbol in symbols if all(excludes not in symbol for excludes in exclude)]
    relevant_symbols = [symbol for symbol in non_lev if symbol.endswith('USDT')]
    
    #récupérer les données de marché pour chaque symbole
    returns, selected_symbols = [], []
    for symbol in tqdm(relevant_symbols):
        klines = client.get_historical_klines(symbol=symbol, interval=interval, start_str=duration)
        
        if len(klines) > 0:
            #calcul du rendement cumulé
            df = pd.DataFrame(klines)
            df_close_prices = df[4].astype(float)  
            cumret = (df_close_prices.pct_change() + 1).prod() - 1
            
            returns.append(cumret)
            selected_symbols.append(symbol)
    
    #construire un DataFrame pour les rendements
    retdf = pd.DataFrame(returns, index=selected_symbols, columns=['ret'])
    return retdf.ret.nlargest(n)


In [35]:
top_performers = get_top_performer(client, '1m',  '10 minutes ago UTC', n=10)


100%|██████████| 471/471 [04:34<00:00,  1.72it/s]


In [36]:
top_performers

KP3RUSDT     0.026362
TROYUSDT     0.025681
APEUSDT      0.018535
RAYUSDT      0.008809
TURBOUSDT    0.008788
FTMUSDT      0.007834
RIFUSDT      0.006674
MEMEUSDT     0.005793
SKLUSDT      0.005366
OOKIUSDT     0.004673
Name: ret, dtype: float64

### Same with multithreading, 20 time faster

In [25]:
from datetime import datetime

def get_history(client, symbol, interval, duration):
    time.sleep(0.01)  #délai pour contourner la limite de l'API
    return client.get_historical_klines(symbol=symbol, interval=interval, start_str=duration)

def get_top_performer_multithreading(client, interval, duration, n=10):
    #informations sur les symboles
    info = client.get_exchange_info()
    symbols = [x['symbol'] for x in info['symbols']]
    #exclure les tokens à effet de levier
    exclude = ['UP', 'DOWN', 'BEAR', 'BULL']
    non_lev = [symbol for symbol in symbols if all(excludes not in symbol for excludes in exclude)]
    relevant_symbols = [symbol for symbol in non_lev if symbol.endswith('USDT')]

    #créer une liste pour stocker les données et une pour les rendements
    returns, selected_symbols, dates = [], [], []
    klines_data = [None] * len(relevant_symbols)
    
    #fonction pour récupérer les données en parallèle
    def _helper(i):
        klines_data[i] = get_history(client, relevant_symbols[i], interval, duration)
    
    #créer et lancer les threads
    threads = [threading.Thread(target=_helper, args=(i,)) for i in range(len(relevant_symbols))]
    for thread in threads:
        thread.start()
    for thread in tqdm(threads):
        thread.join()
    
    #calcul des rendements
    for i, klines in enumerate(klines_data):
        if klines and len(klines) > 0:
            df = pd.DataFrame(klines)
            df_close_prices = df[4].astype(float)  # La colonne des prix de clôture
            cumret = (df_close_prices.pct_change() + 1).prod() - 1
            
            start_date = pd.to_datetime(df.iloc[0, 0], unit='ms')
            end_date = pd.to_datetime(df.iloc[-1, 0], unit='ms')
            
            returns.append(cumret)
            selected_symbols.append(relevant_symbols[i])
            dates.append((start_date, end_date))

    
    retdf = pd.DataFrame({
        'ret': returns,
        'start_date': [d[0] for d in dates],
        'end_date': [d[1] for d in dates]
    }, index=selected_symbols)

    result = retdf.nlargest(n, 'ret')
    
    result = retdf.nlargest(n, 'ret')
    
    top_end_date = result['end_date'].iloc[0]
    # date_str = datetime.now().strftime("%Y-%m-%d")
    date_str = top_end_date.strftime("%Y-%m-%d_%H-%M-%S")
    file_path = f"C:/Users/tariq/algorithmic_trading/data/topPerformers/top_performers_{date_str}.parquet"
    
    # Sauvegarder le résultat en parquet
    result.to_parquet(file_path)    
    print("ajout effectué pour la date : ", date_str)
    return retdf.nlargest(n, 'ret')

In [35]:
top_performers = get_top_performer_multithreading(client, '1m',  '6 minutes ago UTC', n=1)

100%|██████████| 472/472 [00:06<00:00, 77.09it/s] 


In [36]:
top_performers

Unnamed: 0,ret,start_date,end_date
VIBUSDT,0.018115,2024-11-02 14:03:00,2024-11-02 14:08:00


In [27]:
top_perf = pd.read_parquet("C:\\Users\\tariq\\algorithmic_trading\\data\\topPerformers\\top_performers_2024-11-02_14-04-00.parquet")

In [34]:
top_perf

Unnamed: 0,ret,start_date,end_date
KP3RUSDT,0.03483,2024-11-02 14:00:00,2024-11-02 14:04:00


In [38]:
topp = pd.read_parquet("C:\\Users\\tariq\\algorithmic_trading\\data\\topPerformers\\top_performers_2024-11-02_14-59-00.parquet")
topp

Unnamed: 0,ret,start_date,end_date
UNFIUSDT,0.012172,2024-11-02 14:56:00,2024-11-02 14:59:00


In [57]:
top_performers_path = "C:\\Users\\tariq\\algorithmic_trading\\data\\topPerformers"

# Backtest

In [3]:
import os
import pandas as pd
top_performers_path = "C:\\Users\\tariq\\algorithmic_trading\\data\\topPerformers"

worst_performer_path = "/home/tsereir/tsereir/projets/algorithmic_trading/data/worstPerformers"
worst_file_list = os.listdir(worst_performer_path)
count = 0
for file in worst_file_list : 
    count+=1   
     
    path = os.path.join(worst_performer_path, file)
    df = pd.read_parquet(path)
    if count==1:
        print(df)
        

               ret          start_date            end_date
ARKMUSDT -0.013889 2024-11-20 16:14:00 2024-11-20 16:17:00


In [230]:
import sys
import os
from os.path import join, getsize

top_performers_path = "C:\\Users\\tariq\\algorithmic_trading\\data\\topPerformers"
top_performers_dates = {}
files_list = os.listdir(top_performers_path)

ticker_counter = {}

count = 0
bttc_count = 0
for file_name in files_list:
    count +=1
    file_path = os.path.join(top_performers_path, file_name)
    df = pd.read_parquet(file_path)
    # print(df.index)
    if df.index[0] not in ticker_counter.keys():
        ticker_counter[df.index[0]] = 1
    else : 
        ticker_counter[df.index[0]] +=1

    if df.index == "BTTCUSDT":
        bttc_count+=1  
unique = 0
for file_name in files_list : 
    file_path = os.path.join(top_performers_path, file_name)
    df = pd.read_parquet(file_path)
    if ticker_counter[df.index[0]] ==1:
        unique+=1

    # display(df)
print("nb de fichier :", count)
print("nombre de ticker presents une seule fois dans les top performers : ", unique)
# print("nb de fois que bttc est apparu", bttc_count)
print(dict(sorted(ticker_counter.items(), key=lambda item: item[1])))

nb de fichier : 121
nombre de ticker presents une seule fois dans les top performers :  22
{'OMNIUSDT': 1, 'REIUSDT': 1, 'FLUXUSDT': 1, 'WINGUSDT': 1, 'GUSDT': 1, 'VOXELUSDT': 1, 'DEXEUSDT': 1, 'BICOUSDT': 1, 'FTTUSDT': 1, 'EDUUSDT': 1, 'SUNUSDT': 1, 'ARKMUSDT': 1, 'SNTUSDT': 1, 'ALPHAUSDT': 1, 'OGUSDT': 1, 'CVXUSDT': 1, 'NEIROUSDT': 1, 'SANTOSUSDT': 1, 'SCUSDT': 1, 'AVAXUSDT': 1, 'LTOUSDT': 1, 'BARUSDT': 1, 'LUMIAUSDT': 2, 'RADUSDT': 2, 'AUDIOUSDT': 2, 'GLMRUSDT': 2, 'PYRUSDT': 2, 'HARDUSDT': 2, 'STRAXUSDT': 2, 'OAXUSDT': 2, 'WUSDT': 3, 'TROYUSDT': 3, 'RAREUSDT': 3, 'VIBUSDT': 4, 'SXPUSDT': 4, 'OGNUSDT': 5, 'VIDTUSDT': 5, 'PROSUSDT': 5, 'KP3RUSDT': 5, 'CVCUSDT': 5, 'BTTCUSDT': 11, 'OOKIUSDT': 13, 'UNFIUSDT': 17}


### Take data from binance

In [2]:
import time as tm
def get_history(client, symbol, interval, start_str, end_str):
    tm.sleep(0.01)  
    return client.get_historical_klines(symbol=symbol, interval=interval, start_str=start_str, end_str=end_str)


### Download top performers data

In [61]:
top_performers_path = "C:\\Users\\tariq\\algorithmic_trading\\data\\topPerformers"
top_performers_output_path = "C:\\Users\\tariq\\algorithmic_trading\\data\\topPerformersData"

worst_performer_path = "/home/tsereir/tsereir/projets/algorithmic_trading/data/worstPerformers"
worst_performer_output_path = "/home/tsereir/tsereir/projets/algorithmic_trading/data/worstPerformersData"

In [None]:

from datetime import timedelta

import time as tm
def get_history(client, symbol, interval, start_str, end_str):
    tm.sleep(0.01)  
    return client.get_historical_klines(symbol=symbol, interval=interval, start_str=start_str, end_str=end_str)



def download_top_performers_data(client, performer_path, output_path):
    fichier_ajout =0

    files_list = os.listdir(performer_path)
    print("Nombre de fichiers à traiter : ", len(files_list))
    for file_name in tqdm(files_list):
        file_path = os.path.join(performer_path, file_name)
        df = pd.read_parquet(file_path)
        for ticker, row in df.iterrows():
            end_date = pd.to_datetime(row['end_date'])  
            start_str = end_date.strftime("%Y-%m-%d %H:%M:%S")

            # end_ts = end_date + timedelta(hours=2)
            end_ts = end_date + timedelta(hours=10)
            
            end_str = end_ts.strftime("%Y-%m-%d %H:%M:%S")
            
            output_file = os.path.join(output_path, f"{ticker}_{start_str.replace(':', '-')}.parquet")
            if os.path.exists(output_file):
                file = pd.read_parquet(output_file)

#on prends 1200 minutes -> 20 heures (donc duree de vie d'un trade est de max 20 heures apres l'achat ou la vente du top ou worst performer)
                if len(file) > 600:
                    # print(f"Fichier {output_file} a ", {len(file)}, " lignes, passage au suivant.")
                    # print(f"Fichier {output_file} déjà téléchargé, passage au suivant.")
                    continue
                # print(f"Fichier {output_file} déjà téléchargé, passage au suivant.")
                # continue

            data = get_history(client, ticker, '1m', start_str, end_str)            
            df_data = pd.DataFrame(data)
            if not df_data.empty:
                df_data['close'] = df_data[4].astype(float)
                df_data["date"] = pd.to_datetime(df_data[0], unit='ms') 
                df_data = df_data[['date', 'close']].set_index('date')
                df_data.to_parquet(output_file)
                # print(f"Fichier {output_file} téléchargé et enregistré.")

In [68]:
import warnings
warnings.filterwarnings('ignore')
download_top_performers_data(client, worst_performer_path, worst_performer_output_path) 

Nombre de fichiers à traiter :  1345


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1345/1345 [09:10<00:00,  2.44it/s]


### Add return to files

In [70]:
def add_cumret(top_performers_data_path):
    files_list = os.listdir(top_performers_data_path)
    
    for file_name in files_list:
        file_path = os.path.join(top_performers_data_path, file_name)        
        df = pd.read_parquet(file_path)        
        if 'return' in df.columns:
            print(f"La colonne 'return' existe déjà dans {file_name}.")
            continue
        
        if 'close' in df.columns:
            df['return'] = (df['close'].pct_change() + 1).cumprod() - 1            
            df.to_parquet(file_path)
            print(f"Colonne 'return' ajoutée dans {file_name}.")
        else:
            print(f"Fichier {file_name} ne contient pas la colonne 'close'.")

In [None]:
add_cumret(worst_performer_output_path)

### Perform backtest

In [72]:
def perform_backtest(top_performers_data_path, tp, sl):
    winning_trade = 0
    losing_trade = 0
    results = []

    files_list = os.listdir(top_performers_data_path)
    
    for file_name in files_list:
        if 'BTTCUSDT' in file_name:
            print(f"Le fichier {file_name} concerne le ticker BTTCUSDT, skipping...")
            continue

        file_path = os.path.join(top_performers_data_path, file_name)
        df = pd.read_parquet(file_path)   
        
        if 'return' not in df.columns:
            print(f"Le fichier {file_name} ne contient pas la colonne 'return'.")
            continue
        
        entry_price = df['close'].iloc[0]
        trade_active = True
        
        # Itérer sur chaque ligne du DataFrame
        for index, row in df.iterrows():
            current_return = row['return']
            current_price = row['close']

            if trade_active:
                if current_return > tp:
                    winning_trade += 1
                    results.append({
                        'file': file_name,
                        'result': 'win',
                        'entry_price': entry_price,
                        'exit_price': current_price
                    })
                    trade_active = False  # Terminer le trade
                    break  # Passer au fichier suivant
                
                elif current_return < -sl:
                    losing_trade += 1
                    results.append({
                        'file': file_name,
                        'result': 'loss',
                        'entry_price': entry_price,
                        'exit_price': current_price
                    })
                    trade_active = False  
                    break  
        
    # print(f"Total des trades gagnants: {winning_trade}, Total des trades perdants: {losing_trade}")
    # results_df = pd.DataFrame(results)
    return winning_trade, losing_trade

In [45]:
print(perform_backtest(worst_performer_output_path, 0.02, 0.02) )

(404, 399)


In [267]:
result_2 = perform_backtest(output_path, tp=0.01, sl=0.01)

Total des trades gagnants: 36, Total des trades perdants: 87


### Verifications des données

In [46]:
printed=  0 
for file in os.listdir(worst_performer_output_path):
    if printed < 3 : 
        path = os.path.join(worst_performer_output_path, file)
        df = pd.read_parquet(path)
        display(df.head())
        printed+=1

Unnamed: 0_level_0,close,return
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2024-11-21 15:44:00,0.006065,
2024-11-21 15:45:00,0.006133,0.011212
2024-11-21 15:46:00,0.006116,0.008409
2024-11-21 15:47:00,0.006157,0.015169
2024-11-21 15:48:00,0.00616,0.015664


Unnamed: 0_level_0,close,return
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2024-11-21 20:36:00,3.266,
2024-11-21 20:37:00,3.262,-0.001225
2024-11-21 20:38:00,3.257,-0.002756
2024-11-21 20:39:00,3.261,-0.001531
2024-11-21 20:40:00,3.261,-0.001531


Unnamed: 0_level_0,close,return
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2024-11-21 16:32:00,0.1052,
2024-11-21 16:33:00,0.1051,-0.000951
2024-11-21 16:34:00,0.1049,-0.002852
2024-11-21 16:35:00,0.1052,0.0
2024-11-21 16:36:00,0.1053,0.000951


### We calculate return for each dataframe

### Quelques métriques

In [47]:
def backtest_strategy(candle_data, take_profit_threshold, stop_loss_threshold):
    results = {
        'symbol': [],
        'total_return': [],
        'num_trades': [],
        'winning_trades': [],
        'losing_trades': [],
        'win_rate': [],
    }
    total_metrics = {"winning trade" : 0, "losing trade" : 0}

    for symbol, df in candle_data.items():
        initial_investment = 1  
        initial_price = df['close'].iloc[0]

        df['cumret'] = (1 + df['close'].pct_change()).cumprod() - 1

        take_profit_price = initial_price * (1 + take_profit_threshold / 100)
        stop_loss_price = initial_price * (1 - stop_loss_threshold / 100)

        trades = 0
        wins = 0
        losses = 0
        final_return = None

        for i in range(1, len(df)):
            current_price = df['close'].iloc[i]

            if current_price >= take_profit_price:
                trades += 1
                wins += 1
                final_return = take_profit_price   
                total_metrics["winning trade"]+=1
                break

            elif current_price <= stop_loss_price:
                trades += 1
                losses += 1
                final_return = stop_loss_price  
                total_metrics["losing trade"] +=1
                break

        if final_return is not None:
            total_return = (final_return - initial_price) / initial_price 
        else:
            total_return = 0        
        win_rate = wins / trades if trades > 0 else 0
        
        # Enregistrer les résultats
        results['symbol'].append(symbol)
        results['total_return'].append(total_return)
        results['num_trades'].append(trades)
        results['winning_trades'].append(wins)
        results['losing_trades'].append(losses)
        results['win_rate'].append(win_rate)

    return results, total_metrics

# take_profit_threshold = 2 
# stop_loss_threshold = 2  
# strategy_results, total_metrics = backtest_strategy(candle_data, take_profit_threshold, stop_loss_threshold)

# for i in range(len(strategy_results['symbol'])):
#     print(f"Symbol: {strategy_results['symbol'][i]}, Total Return: {strategy_results['total_return'][i]:.2%}, "
#           f"Number of Trades: {strategy_results['num_trades'][i]}, Winning Trades: {strategy_results['winning_trades'][i]}, "
#           f"Losing Trades: {strategy_results['losing_trades'][i]}, Win Rate: {strategy_results['win_rate'][i]:.2%}")


In [48]:
values = np.linspace(0, 0.1, 21)
print(values)

[0.    0.005 0.01  0.015 0.02  0.025 0.03  0.035 0.04  0.045 0.05  0.055
 0.06  0.065 0.07  0.075 0.08  0.085 0.09  0.095 0.1  ]


In [73]:
import numpy as np
take_profit_threshold = 2 
stop_loss_threshold = 2  
from itertools import product

winning_ratios = {}

for value in values:
    tp = value
    sl = value
    print(f"Take Profit: {tp}, Stop Loss: {sl}")
    winning, losing = perform_backtest(worst_performer_output_path, tp, sl)
    ratio = winning / (winning + losing) 
    print(f"Winning Trades: {winning}, Losing Trades: {losing}, Winning Ratio: {ratio:.2%}")
    # print(total_metrics)

Take Profit: 0.0, Stop Loss: 0.0
Winning Trades: 680, Losing Trades: 665, Winning Ratio: 50.56%
Take Profit: 0.005, Stop Loss: 0.005
Winning Trades: 656, Losing Trades: 688, Winning Ratio: 48.81%
Take Profit: 0.01, Stop Loss: 0.01
Winning Trades: 677, Losing Trades: 663, Winning Ratio: 50.52%
Take Profit: 0.015, Stop Loss: 0.015
Winning Trades: 644, Losing Trades: 683, Winning Ratio: 48.53%
Take Profit: 0.02, Stop Loss: 0.02
Winning Trades: 648, Losing Trades: 657, Winning Ratio: 49.66%
Take Profit: 0.025, Stop Loss: 0.025
Winning Trades: 641, Losing Trades: 623, Winning Ratio: 50.71%
Take Profit: 0.03, Stop Loss: 0.03
Winning Trades: 629, Losing Trades: 570, Winning Ratio: 52.46%
Take Profit: 0.035, Stop Loss: 0.035
Winning Trades: 590, Losing Trades: 533, Winning Ratio: 52.54%
Take Profit: 0.04, Stop Loss: 0.04
Winning Trades: 570, Losing Trades: 471, Winning Ratio: 54.76%
Take Profit: 0.045, Stop Loss: 0.045
Winning Trades: 545, Losing Trades: 408, Winning Ratio: 57.19%
Take Profit: