In [1]:
import time as tm
from datetime import time, timedelta, datetime
import schedule
import pandas as pd
import threading
from tqdm import tqdm
from binance.client import Client

from datetime import datetime, timedelta
from dateutil.relativedelta import relativedelta
import pandas as pd
from joblib import Parallel, delayed
import os
import threading
from tqdm import tqdm

client = Client()

def get_history(client, symbol, interval, start_str, end_str=None):
    """
    Récupère les données historiques pour un ticker donné.
    """
    tm.sleep(0.01)
    return client.get_historical_klines(symbol=symbol, interval=interval, start_str=start_str, end_str=end_str)


In [2]:
info = client.futures_exchange_info()

symbols = [x['symbol'] for x in info['symbols']]
exclude = ['UP', 'DOWN', 'BEAR', 'BULL']
non_lev = [symbol for symbol in symbols if all(excludes not in symbol for excludes in exclude)]
relevant_symbols = [symbol for symbol in non_lev if symbol.endswith('USDT')]

In [18]:
from concurrent.futures import ThreadPoolExecutor, as_completed
from requests.exceptions import ReadTimeout
from pytz import timezone

def get_performers_past(client, interval, start_date, end_date, n=1, worst=True, future=True):
    """
    Identifie les meilleurs ou pires performeurs dans chaque plage horaire.
    """
    paris_tz = timezone('Europe/Paris')

    if future : 
        info = client.futures_exchange_info()
    else: 
        info = client.get_exchange_info()
        
    symbols = [x['symbol'] for x in info['symbols']]
    exclude = ['UP', 'DOWN', 'BEAR', 'BULL']
    non_lev = [symbol for symbol in symbols if all(excludes not in symbol for excludes in exclude)]
    relevant_symbols = [symbol for symbol in non_lev if symbol.endswith('USDT')]

    all_results = []

    date_range = []
    current_time = start_date
    while current_time < end_date:
        date_range.append(current_time)
        current_time += timedelta(hours=1)
        
    print(f"start : {start_date}, end : {end_date}")
    print("Nombre de plage d'heure", len(date_range))
    for current_time in date_range : 
        print("Processing time range:", current_time, "to", current_time + timedelta(hours=1))
        next_time = current_time + timedelta(hours=1)

        # Initialisation des listes pour cette période
        returns, selected_symbols, dates = [], [], []
        klines_data = [None] * len(relevant_symbols)

        def _helper(klines_data, i):
            try:
                klines_data[i] = get_history(
                    client, relevant_symbols[i], interval,
                    start_str=current_time.strftime("%Y-%m-%d %H:%M:%S"),
                    end_str=next_time.strftime("%Y-%m-%d %H:%M:%S")
                )
            except ReadTimeout:
                print(f"Timeout for {relevant_symbols[i]} during {current_time} -> {next_time}. Retrying...")
            except Exception:
                pass

        threads = [threading.Thread(target=_helper, args=(klines_data, i)) for i in range(len(relevant_symbols))]
        for thread in threads:
            thread.start()
        for thread in tqdm(threads, desc=f"Processing {current_time} -> {next_time}"):
            thread.join()

        # Calculer les rendements
        for i, klines in enumerate(klines_data):
                # print(f"{relevant_symbols[i]}: {len(klines) if klines else 0} klines retrieved.")

            if relevant_symbols[i] == 'BTTCUSDT':  
                continue
            if klines and len(klines) > 0:
                df = pd.DataFrame(klines)
                df_close_prices = df[4].astype(float)  # Prix de clôture
                cumret = (df_close_prices.pct_change() + 1).prod() - 1

                # start_date = pd.to_datetime(df.iloc[0, 0], unit='ms')
                # end_date = pd.to_datetime(df.iloc[-1, 0], unit='ms')

                #to have paris timezone
                start_date = pd.to_datetime(df.iloc[0, 0], unit='ms').tz_localize('UTC').tz_convert(paris_tz)
                end_date = pd.to_datetime(df.iloc[-1, 0], unit='ms').tz_localize('UTC').tz_convert(paris_tz)



                returns.append(cumret)
                selected_symbols.append(relevant_symbols[i])
                dates.append((start_date, end_date))

        retdf = pd.DataFrame({
            'ret': returns,
            'start_date': [d[0] for d in dates],
            'end_date': [d[1] for d in dates]
        }, index=selected_symbols)

        if not retdf.empty:
            if worst:
                result = retdf.nsmallest(n, 'ret')
            else:
                result = retdf.nlargest(n, 'ret')

            result['start_period'] = current_time
            result['end_period'] = next_time
            all_results.append(result)
        else:
            print("WARNING -- df EMPTY")

        current_time = next_time
        # print("Finished processing:", current_time)

    return pd.concat(all_results).reset_index()


In [26]:
from datetime import datetime
import time as tm
from dateutil.relativedelta import relativedelta

end_date = datetime(2024, 12, 9, 13)
start_date = end_date - relativedelta(hours=24)


print(start_date)
print(end_date)

diff_in_seconds = int((end_date - start_date).total_seconds())
diff_in_hours = diff_in_seconds // 3600
print(diff_in_hours)

2024-12-08 13:00:00
2024-12-09 13:00:00
24


In [30]:
start_time = tm.time()
df_normal = get_performers_past(
    client=client,
    interval='5m', 
    start_date=start_date,
    end_date=end_date,
    n=1,  
    worst=True,  
    future=True  
)
print("Temps d'execution : ", start_time - tm.time())

start : 2024-12-08 13:00:00, end : 2024-12-09 13:00:00
Nombre de plage d'heure 24
Processing time range: 2024-12-08 13:00:00 to 2024-12-08 14:00:00


Processing 2024-12-08 13:00:00 -> 2024-12-08 14:00:00: 100%|██████████| 355/355 [00:08<00:00, 43.67it/s]


Processing time range: 2024-12-08 14:00:00 to 2024-12-08 15:00:00


Processing 2024-12-08 14:00:00 -> 2024-12-08 15:00:00: 100%|██████████| 355/355 [00:09<00:00, 37.98it/s]


Processing time range: 2024-12-08 15:00:00 to 2024-12-08 16:00:00


Processing 2024-12-08 15:00:00 -> 2024-12-08 16:00:00: 100%|██████████| 355/355 [00:09<00:00, 35.82it/s] 


Processing time range: 2024-12-08 16:00:00 to 2024-12-08 17:00:00


Processing 2024-12-08 16:00:00 -> 2024-12-08 17:00:00: 100%|██████████| 355/355 [00:10<00:00, 34.70it/s]


Processing time range: 2024-12-08 17:00:00 to 2024-12-08 18:00:00


Processing 2024-12-08 17:00:00 -> 2024-12-08 18:00:00: 100%|██████████| 355/355 [00:09<00:00, 36.32it/s] 


Processing time range: 2024-12-08 18:00:00 to 2024-12-08 19:00:00


Processing 2024-12-08 18:00:00 -> 2024-12-08 19:00:00: 100%|██████████| 355/355 [00:10<00:00, 33.80it/s] 


Processing time range: 2024-12-08 19:00:00 to 2024-12-08 20:00:00


Processing 2024-12-08 19:00:00 -> 2024-12-08 20:00:00: 100%|██████████| 355/355 [00:10<00:00, 35.32it/s]


Processing time range: 2024-12-08 20:00:00 to 2024-12-08 21:00:00


Processing 2024-12-08 20:00:00 -> 2024-12-08 21:00:00: 100%|██████████| 355/355 [00:10<00:00, 34.83it/s] 


Processing time range: 2024-12-08 21:00:00 to 2024-12-08 22:00:00


Processing 2024-12-08 21:00:00 -> 2024-12-08 22:00:00: 100%|██████████| 355/355 [00:10<00:00, 35.09it/s] 


Processing time range: 2024-12-08 22:00:00 to 2024-12-08 23:00:00


Processing 2024-12-08 22:00:00 -> 2024-12-08 23:00:00: 100%|██████████| 355/355 [00:09<00:00, 38.24it/s] 


Processing time range: 2024-12-08 23:00:00 to 2024-12-09 00:00:00


Processing 2024-12-08 23:00:00 -> 2024-12-09 00:00:00: 100%|██████████| 355/355 [00:09<00:00, 37.01it/s] 


Processing time range: 2024-12-09 00:00:00 to 2024-12-09 01:00:00


Processing 2024-12-09 00:00:00 -> 2024-12-09 01:00:00: 100%|██████████| 355/355 [00:09<00:00, 36.36it/s] 


Processing time range: 2024-12-09 01:00:00 to 2024-12-09 02:00:00


Processing 2024-12-09 01:00:00 -> 2024-12-09 02:00:00: 100%|██████████| 355/355 [00:05<00:00, 65.32it/s]


Processing time range: 2024-12-09 02:00:00 to 2024-12-09 03:00:00


Processing 2024-12-09 02:00:00 -> 2024-12-09 03:00:00: 100%|██████████| 355/355 [00:09<00:00, 35.57it/s] 


Processing time range: 2024-12-09 03:00:00 to 2024-12-09 04:00:00


Processing 2024-12-09 03:00:00 -> 2024-12-09 04:00:00: 100%|██████████| 355/355 [00:10<00:00, 35.48it/s] 


Processing time range: 2024-12-09 04:00:00 to 2024-12-09 05:00:00


Processing 2024-12-09 04:00:00 -> 2024-12-09 05:00:00: 100%|██████████| 355/355 [00:02<00:00, 129.62it/s]


Processing time range: 2024-12-09 05:00:00 to 2024-12-09 06:00:00


Processing 2024-12-09 05:00:00 -> 2024-12-09 06:00:00: 100%|██████████| 355/355 [00:01<00:00, 242.99it/s]


Processing time range: 2024-12-09 06:00:00 to 2024-12-09 07:00:00


Processing 2024-12-09 06:00:00 -> 2024-12-09 07:00:00: 100%|██████████| 355/355 [00:01<00:00, 229.95it/s]


Processing time range: 2024-12-09 07:00:00 to 2024-12-09 08:00:00


Processing 2024-12-09 07:00:00 -> 2024-12-09 08:00:00: 100%|██████████| 355/355 [00:00<00:00, 1123.26it/s]


Processing time range: 2024-12-09 08:00:00 to 2024-12-09 09:00:00


Processing 2024-12-09 08:00:00 -> 2024-12-09 09:00:00: 100%|██████████| 355/355 [00:01<00:00, 220.75it/s]


Processing time range: 2024-12-09 09:00:00 to 2024-12-09 10:00:00


Processing 2024-12-09 09:00:00 -> 2024-12-09 10:00:00: 100%|██████████| 355/355 [00:01<00:00, 315.59it/s]


Processing time range: 2024-12-09 10:00:00 to 2024-12-09 11:00:00


Processing 2024-12-09 10:00:00 -> 2024-12-09 11:00:00: 100%|██████████| 355/355 [00:01<00:00, 200.80it/s]


Processing time range: 2024-12-09 11:00:00 to 2024-12-09 12:00:00


Processing 2024-12-09 11:00:00 -> 2024-12-09 12:00:00: 100%|██████████| 355/355 [00:00<00:00, 1166.45it/s]


Processing time range: 2024-12-09 12:00:00 to 2024-12-09 13:00:00


Processing 2024-12-09 12:00:00 -> 2024-12-09 13:00:00: 100%|██████████| 355/355 [00:03<00:00, 117.88it/s]

Temps d'execution :  -210.73862314224243





In [31]:
if len(df_normal) == diff_in_hours : 
    print("bon nombre de performers récuperer")
else :
    print(diff_in_hours - len(df_normal) , "manquantes" )
df_normal

7 manquantes


Unnamed: 0,index,ret,start_date,end_date,start_period,end_period
0,QNTUSDT,-0.021277,2024-12-08 14:00:00+01:00,2024-12-08 15:00:00+01:00,2024-12-08 13:00:00,2024-12-08 14:00:00
1,ACXUSDT,-0.061161,2024-12-08 15:00:00+01:00,2024-12-08 16:00:00+01:00,2024-12-08 14:00:00,2024-12-08 15:00:00
2,ONEUSDT,-0.023169,2024-12-08 16:00:00+01:00,2024-12-08 17:00:00+01:00,2024-12-08 15:00:00,2024-12-08 16:00:00
3,UMAUSDT,-0.093864,2024-12-08 17:00:00+01:00,2024-12-08 18:00:00+01:00,2024-12-08 16:00:00,2024-12-08 17:00:00
4,TRUUSDT,-0.025292,2024-12-08 18:00:00+01:00,2024-12-08 19:00:00+01:00,2024-12-08 17:00:00,2024-12-08 18:00:00
5,CELRUSDT,-0.026407,2024-12-08 19:00:00+01:00,2024-12-08 20:00:00+01:00,2024-12-08 18:00:00,2024-12-08 19:00:00
6,IDEXUSDT,-0.05962,2024-12-08 20:00:00+01:00,2024-12-08 21:00:00+01:00,2024-12-08 19:00:00,2024-12-08 20:00:00
7,AMBUSDT,-0.023488,2024-12-08 21:00:00+01:00,2024-12-08 22:00:00+01:00,2024-12-08 20:00:00,2024-12-08 21:00:00
8,1MBABYDOGEUSDT,-0.031001,2024-12-08 22:00:00+01:00,2024-12-08 23:00:00+01:00,2024-12-08 21:00:00,2024-12-08 22:00:00
9,IDEXUSDT,-0.07047,2024-12-08 23:00:00+01:00,2024-12-09 00:00:00+01:00,2024-12-08 22:00:00,2024-12-08 23:00:00


In [93]:
worst_performers.to_parquet("C:\\Users\\tariq\\algorithmic_trading\\data\\futures\\1hour\\test.parquet")
