In [12]:
import time as tm
from datetime import time, timedelta, datetime
import schedule
import pandas as pd
import threading
from tqdm import tqdm
from binance.client import Client

from datetime import datetime, timedelta
from dateutil.relativedelta import relativedelta
import pandas as pd
from joblib import Parallel, delayed
import os
import threading
from tqdm import tqdm
from binance.enums import HistoricalKlinesType

client = Client()

def get_history(client, symbol, interval, start_str, end_str=None):
    """
    Récupère les données historiques pour un ticker donné.
    """
    tm.sleep(0.01)
    return client.get_historical_klines(symbol=symbol, interval=interval, start_str=start_str, end_str=end_str, klines_type=HistoricalKlinesType.FUTURES)


In [63]:
info = client.futures_exchange_info()

symbols = [x['symbol'] for x in info['symbols']]
exclude = ['UP', 'DOWN', 'BEAR', 'BULL']
non_lev = [symbol for symbol in symbols if all(excludes not in symbol for excludes in exclude)]
relevant_symbols = [symbol for symbol in non_lev if symbol.endswith('USDT')]

In [14]:
from concurrent.futures import ThreadPoolExecutor, as_completed
from requests.exceptions import ReadTimeout
from pytz import timezone

def get_performers_past(client, interval, start_date, end_date, n=1, worst=True, future=True):
    """
    Identifie les meilleurs ou pires performeurs dans chaque plage horaire.
    """
    paris_tz = timezone('Europe/Paris')
    if future : 
        info = client.futures_exchange_info()
    else: 
        info = client.get_exchange_info()
        
    symbols = [x['symbol'] for x in info['symbols']]
    exclude = ['UP', 'DOWN', 'BEAR', 'BULL']
    non_lev = [symbol for symbol in symbols if all(excludes not in symbol for excludes in exclude)]
    relevant_symbols = [symbol for symbol in non_lev if symbol.endswith('USDT')]

    all_results = []
    date_range = []
    current_time = start_date
    while current_time < end_date:
        date_range.append(current_time)
        current_time += timedelta(hours=1)
        
    print(f"start : {start_date}, end : {end_date}")
    print("Nombre de plage d'heure", len(date_range))

    for current_time in date_range : 
        print("Processing time range:", current_time, "to", current_time + timedelta(hours=1))
        next_time = current_time + timedelta(hours=1)

        # Initialisation des listes pour cette période
        returns, selected_symbols, dates = [], [], []
        klines_data = [None] * len(relevant_symbols)

        def _helper(klines_data, i):
            try:
                klines_data[i] = get_history(
                    client, relevant_symbols[i], interval,
                    start_str=current_time.strftime("%Y-%m-%d %H:%M:%S"),
                    end_str=next_time.strftime("%Y-%m-%d %H:%M:%S")
                )
            except ReadTimeout:
                print(f"Timeout for {relevant_symbols[i]} during {current_time} -> {next_time}. Retrying...")
            except Exception:
                pass

        threads = [threading.Thread(target=_helper, args=(klines_data, i)) for i in range(len(relevant_symbols))]
        for thread in threads:
            thread.start()
        for thread in tqdm(threads, desc=f"Processing {current_time} -> {next_time}"):
            thread.join()

        # Calculer les rendements
        for i, klines in enumerate(klines_data):
                # print(f"{relevant_symbols[i]}: {len(klines) if klines else 0} klines retrieved.")
            if relevant_symbols[i] == 'BTTCUSDT':  
                continue
            if klines and len(klines) > 0:
                df = pd.DataFrame(klines)
                df_close_prices = df[4].astype(float)  # Prix de clôture
                cumret = (df_close_prices.pct_change() + 1).prod() - 1

                # start_date = pd.to_datetime(df.iloc[0, 0], unit='ms')
                # end_date = pd.to_datetime(df.iloc[-1, 0], unit='ms')

                #to have paris timezone
                start_date = pd.to_datetime(df.iloc[0, 0], unit='ms').tz_localize('UTC').tz_convert(paris_tz)
                end_date = pd.to_datetime(df.iloc[-1, 0], unit='ms').tz_localize('UTC').tz_convert(paris_tz)



                returns.append(cumret)
                selected_symbols.append(relevant_symbols[i])
                dates.append((start_date, end_date))

        retdf = pd.DataFrame({
            'ret': returns,
            'start_date': [d[0] for d in dates],
            'end_date': [d[1] for d in dates]
        }, index=selected_symbols)

        if not retdf.empty:
            if worst:
                result = retdf.nsmallest(n, 'ret')
            else:
                result = retdf.nlargest(n, 'ret')

            result['start_period'] = current_time
            result['end_period'] = next_time
            all_results.append(result)
        else:
            print("WARNING -- df EMPTY")

        current_time = next_time
        # print("Finished processing:", current_time)

    return pd.concat(all_results).reset_index()


In [15]:
from datetime import datetime
import time as tm
from dateutil.relativedelta import relativedelta

end_date = datetime(2024, 12, 21, 11)
start_date = end_date - relativedelta(hours=3)


print(start_date)
print(end_date)

diff_in_seconds = int((end_date - start_date).total_seconds())
diff_in_hours = diff_in_seconds // 3600
print(diff_in_hours)

2024-12-21 08:00:00
2024-12-21 11:00:00
3


In [20]:
start_time = tm.time()
df_1h = get_performers_past(
    client=client,
    interval='1h', 
    start_date=start_date,
    end_date=end_date,
    n=1,  
    worst=True,  
    future=True  
)
print("Temps d'execution : ", start_time - tm.time())

start : 2024-12-21 08:00:00, end : 2024-12-21 11:00:00
Nombre de plage d'heure 3
Processing time range: 2024-12-21 08:00:00 to 2024-12-21 09:00:00


Processing 2024-12-21 08:00:00 -> 2024-12-21 09:00:00: 100%|██████████| 364/364 [00:12<00:00, 28.58it/s]

Timeout for POWRUSDT during 2024-12-21 08:00:00 -> 2024-12-21 09:00:00. Retrying...





Processing time range: 2024-12-21 09:00:00 to 2024-12-21 10:00:00


Processing 2024-12-21 09:00:00 -> 2024-12-21 10:00:00: 100%|██████████| 364/364 [00:12<00:00, 29.97it/s]


Processing time range: 2024-12-21 10:00:00 to 2024-12-21 11:00:00


Processing 2024-12-21 10:00:00 -> 2024-12-21 11:00:00:   9%|▉         | 34/364 [00:10<01:04,  5.13it/s]

Timeout for PHBUSDT during 2024-12-21 10:00:00 -> 2024-12-21 11:00:00. Retrying...
Timeout for FXSUSDT during 2024-12-21 10:00:00 -> 2024-12-21 11:00:00. Retrying...


Processing 2024-12-21 10:00:00 -> 2024-12-21 11:00:00:  31%|███       | 113/364 [00:12<00:12, 19.62it/s]

Timeout for CELOUSDT during 2024-12-21 10:00:00 -> 2024-12-21 11:00:00. Retrying...
Timeout for STEEMUSDT during 2024-12-21 10:00:00 -> 2024-12-21 11:00:00. Retrying...


Processing 2024-12-21 10:00:00 -> 2024-12-21 11:00:00: 100%|██████████| 364/364 [00:16<00:00, 22.29it/s]

Timeout for POWRUSDT during 2024-12-21 10:00:00 -> 2024-12-21 11:00:00. Retrying...





Temps d'execution :  -45.36068344116211


In [18]:

df_5min = df_normal 

In [21]:
df_1h

Unnamed: 0,index,ret,start_date,end_date,start_period,end_period
0,USUALUSDT,-0.05397,2024-12-21 09:00:00+01:00,2024-12-21 10:00:00+01:00,2024-12-21 08:00:00,2024-12-21 09:00:00
1,MOVEUSDT,-0.025351,2024-12-21 10:00:00+01:00,2024-12-21 11:00:00+01:00,2024-12-21 09:00:00,2024-12-21 10:00:00
2,USUALUSDT,-0.059198,2024-12-21 11:00:00+01:00,2024-12-21 12:00:00+01:00,2024-12-21 10:00:00,2024-12-21 11:00:00


In [19]:
df_5min

Unnamed: 0,index,ret,start_date,end_date,start_period,end_period
0,AVAUSDT,-0.047434,2024-12-21 09:00:00+01:00,2024-12-21 10:00:00+01:00,2024-12-21 08:00:00,2024-12-21 09:00:00
1,USUALUSDT,-0.044468,2024-12-21 10:00:00+01:00,2024-12-21 11:00:00+01:00,2024-12-21 09:00:00,2024-12-21 10:00:00
2,MOVEUSDT,-0.03049,2024-12-21 11:00:00+01:00,2024-12-21 12:00:00+01:00,2024-12-21 10:00:00,2024-12-21 11:00:00


In [None]:
move= get_history(client, "MOVEUSDT", '5m', start_str, end_str=None):


In [7]:
if len(df_normal) == diff_in_hours : 
    print("bon nombre de performers récuperer")
else :
    print(diff_in_hours - len(df_normal) , "manquantes" )
df_normal

bon nombre de performers récuperer


Unnamed: 0,index,ret,start_date,end_date,start_period,end_period
0,VELODROMEUSDT,-0.050465,2024-12-21 09:00:00+01:00,2024-12-21 10:00:00+01:00,2024-12-21 08:00:00,2024-12-21 09:00:00
1,USUALUSDT,-0.044299,2024-12-21 10:00:00+01:00,2024-12-21 11:00:00+01:00,2024-12-21 09:00:00,2024-12-21 10:00:00
2,MOVEUSDT,-0.030609,2024-12-21 11:00:00+01:00,2024-12-21 12:00:00+01:00,2024-12-21 10:00:00,2024-12-21 11:00:00


### Debug

In [65]:
relevant_symbols_test = relevant_symbols
print("Nombre de symbols de test", len(relevant_symbols_test) ) 
time_st = datetime(2024, 12, 21, 9)
time_end = time_st + timedelta(hours=3)

Nombre de symbols de test 364


### Multithreading Method

In [66]:
import time
start=  time.time()

def _helper(klines_data, symbols, i, time_start, time_end, interval):
    try:
        klines_data[i] = get_history(
            client, symbols[i], interval,
            start_str=time_start.strftime("%Y-%m-%d %H:%M:%S"),
            end_str=time_end.strftime("%Y-%m-%d %H:%M:%S")
        )
    except ReadTimeout:
        print(f"Timeout for {symbols[i]} during {time_start} -> {time_end}. Retrying...")
    except Exception:
        pass

klines_data_test = [None] * len(relevant_symbols_test)
threads = [threading.Thread(target=_helper, args=(klines_data_test, relevant_symbols_test, i, time_st, time_end, '5m' )) for i in range(len(relevant_symbols_test))]

for thread in threads:
    thread.start()
for thread in tqdm(threads, desc=f"Processing {time_st} -> {time_end}"):
    thread.join()
print("Temps", time.time() - start)


Processing 2024-12-21 09:00:00 -> 2024-12-21 12:00:00: 100%|██████████| 364/364 [00:12<00:00, 29.50it/s]

Temps 13.304525375366211





### joblib method faster

In [70]:
start = time.time()

def fetch_data(symbol, time_start, time_end, interval):
    try:
        data =  get_history(
            client, symbol, interval,
            start_str=time_start.strftime("%Y-%m-%d %H:%M:%S"),
            end_str=time_end.strftime("%Y-%m-%d %H:%M:%S")
        )
        if not data:
            return symbol, None
        df = pd.DataFrame(
            data, columns=["open time", "open", "high", "low", "close", "volume", "closing time", "quote asset volume", "nb trades", "tbav", "tbqav", "ignore"] )
        
        df["open time"] = pd.to_datetime(df["open time"], unit="ms").dt.round("min")
        df["closing time"] = pd.to_datetime(df["closing time"], unit="ms").dt.round("min")
        df["close"] = df["close"].astype(float)
        df["ret"] = df["close"].pct_change()
        df["cumret"] = (1 + df["ret"]).cumprod() - 1
        return symbol, df

    except ReadTimeout:
        print(f"Timeout for {symbol} during {time_start} -> {time_end}. Retrying...")
        return None
    
    except Exception as e:
        print(f"Error for {symbol}: {e}")
        return symbol, None

results = Parallel(n_jobs=-1, backend="threading")(
    delayed(fetch_data)(symbol, time_st, time_end, '5m') for symbol in tqdm(relevant_symbols_test, desc="Downloading data")
)

print(f"Time taken: {time.time() - start:.2f} seconds")

Downloading data:  35%|███▌      | 128/364 [00:02<00:04, 55.55it/s]

Error for BTCSTUSDT: APIError(code=-1122): Invalid symbol status.


Downloading data: 100%|██████████| 364/364 [00:06<00:00, 57.80it/s]


Time taken: 8.43 seconds


In [79]:
results[0][0]

'BTCUSDT'

### En passant en paramètre 10h, 13h sur des bougies 5 min 
Le return cumulatif va etre calculé entre le close de 10h5 et le close de 13h5

In [55]:
df_btc = pd.DataFrame(klines_data_test[0], columns=["open time", "open", "high", "low", "close", 'volume', 'closing time', 'quote asset volume', 'nb trades', 'tbav', 'tbqav', 'ignore'])
df_btc["open time"] = pd.to_datetime(df_btc['open time'], unit='ms').dt.round("min")
df_btc["closing time"]  = pd.to_datetime(df_btc['closing time'], unit='ms').dt.round("min")

df_btc["close"] = df_btc["close"].astype(float)
df_btc["ret"] = df_btc["close"].pct_change()
df_btc["cumret"] = (1 + df_btc["ret"]).cumprod() - 1

# df_btc["cumret"] = (df_btc["close"].astype(float).pct_change() + 1).prod() - 1

df_btc[["open time", "closing time", "open", "close", "ret", "cumret"]]

Unnamed: 0,open time,closing time,open,close,ret,cumret
0,2024-12-21 09:00:00,2024-12-21 09:05:00,98601.7,98485.2,,
1,2024-12-21 09:05:00,2024-12-21 09:10:00,98485.1,98316.6,-0.001712,-0.001712
2,2024-12-21 09:10:00,2024-12-21 09:15:00,98316.5,98443.2,0.001288,-0.000426
3,2024-12-21 09:15:00,2024-12-21 09:20:00,98443.4,98300.0,-0.001455,-0.00188
4,2024-12-21 09:20:00,2024-12-21 09:25:00,98300.0,98300.1,1e-06,-0.001879
5,2024-12-21 09:25:00,2024-12-21 09:30:00,98300.1,98412.7,0.001145,-0.000736
6,2024-12-21 09:30:00,2024-12-21 09:35:00,98412.7,98471.7,0.0006,-0.000137
7,2024-12-21 09:35:00,2024-12-21 09:40:00,98471.6,98486.0,0.000145,8e-06
8,2024-12-21 09:40:00,2024-12-21 09:45:00,98486.0,98353.4,-0.001346,-0.001338
9,2024-12-21 09:45:00,2024-12-21 09:50:00,98353.5,98432.2,0.000801,-0.000538


In [27]:
for i, klines in enumerate(klines_data_test):
            if klines and len(klines) > 0:
                df = pd.DataFrame(klines)
                df_close_prices = df[4].astype(float)  # Prix de clôture
                cumret = (df_close_prices.pct_change() + 1).prod() - 1

                # start_date = pd.to_datetime(df.iloc[0, 0], unit='ms')
                # end_date = pd.to_datetime(df.iloc[-1, 0], unit='ms')

                #to have paris timezone
                start_date = pd.to_datetime(df.iloc[0, 0], unit='ms').tz_localize('UTC').tz_convert(paris_tz)
                end_date = pd.to_datetime(df.iloc[-1, 0], unit='ms').tz_localize('UTC').tz_convert(paris_tz)



                returns.append(cumret)
                selected_symbols.append(relevant_symbols[i])
                dates.append((start_date, end_date))

        retdf = pd.DataFrame({
            'ret': returns,
            'start_date': [d[0] for d in dates],
            'end_date': [d[1] for d in dates]
        }, index=selected_symbols)

        if not retdf.empty:
            if worst:
                result = retdf.nsmallest(n, 'ret')
            else:
                result = retdf.nlargest(n, 'ret')

            result['start_period'] = current_time
            result['end_period'] = next_time
            all_results.append(result)
        else:
            print("WARNING -- df EMPTY")

[[[1734771600000,
   '98601.70',
   '98644.00',
   '98360.70',
   '98485.20',
   '559.464',
   1734771899999,
   '55093800.50740',
   12304,
   '222.268',
   '21887921.73800',
   '0'],
  [1734771900000,
   '98485.10',
   '98485.20',
   '98298.40',
   '98316.60',
   '701.628',
   1734772199999,
   '69010949.81480',
   13061,
   '248.000',
   '24392712.55000',
   '0'],
  [1734772200000,
   '98316.50',
   '98507.20',
   '98300.00',
   '98443.20',
   '626.721',
   1734772499999,
   '61674055.65220',
   11413,
   '376.242',
   '37026943.91760',
   '0'],
  [1734772500000,
   '98443.40',
   '98507.30',
   '98300.00',
   '98300.00',
   '481.330',
   1734772799999,
   '47361397.65770',
   9375,
   '154.222',
   '15182875.45380',
   '0'],
  [1734772800000,
   '98300.00',
   '98400.00',
   '98300.00',
   '98300.10',
   '353.462',
   1734773099999,
   '34753829.70930',
   7269,
   '116.428',
   '11448623.37230',
   '0'],
  [1734773100000,
   '98300.10',
   '98452.50',
   '98300.00',
   '98412.70',