In [2]:
%run ./creating_arrays.ipynb

D:\KISHORE\Binance-Data-Downloader\data\downloaded_data
D:\KISHORE\Binance-Data-Downloader\data\extracted_data
['1000BONKUSDC', '1000BONKUSDT', '1000BTTCUSDT', '1000FLOKIUSDT', '1000LUNCBUSD', '1000LUNCUSDT', '1000PEPEUSDC', '1000PEPEUSDT', '1000RATSUSDT', '1000SATSUSDT', '1000SHIBBUSD', '1000SHIBUSDC', '1000SHIBUSDT', '1000XECUSDT', '1INCHUSDT', 'AAVEUSDT', 'ACEUSDT', 'ACHUSDT', 'ADABUSD', 'ADAUSDT', 'AEVOUSDT', 'AGIXBUSD', 'AGIXUSDT', 'AGLDUSDT', 'AIUSDT', 'AKROUSDT', 'ALGOUSDT', 'ALICEUSDT', 'ALPACAUSDT', 'ALPHAUSDT', 'ALTUSDT', 'AMBBUSD', 'AMBUSDT', 'ANCBUSD', 'ANCUSDT', 'ANKRUSDT', 'ANTUSDT', 'APEBUSD', 'APEUSDT', 'API3USDT', 'APTBUSD', 'APTUSDT', 'ARBUSDC', 'ARBUSDT', 'ARKMUSDT', 'ARKUSDT', 'ARPAUSDT', 'ARUSDT', 'ASTRUSDT', 'ATAUSDT', 'ATOMUSDT', 'AUCTIONBUSD', 'AUCTIONUSDT', 'AUDIOUSDT', 'AVAXBUSD', 'AVAXUSDC', 'AVAXUSDT', 'AXLUSDT', 'AXSUSDT', 'BADGERUSDT', 'BAKEUSDT', 'BALUSDT', 'BANANAUSDT', 'BANDUSDT', 'BATUSDT', 'BBUSDT', 'BCHUSDC', 'BCHUSDT', 'BEAMXUSDT', 'BELUSDT', 'BICOU

In [2]:
from pathlib import Path
import os
import re
import shutil
import talib
import datetime
import glob
import zipfile
import pandas as pd
from pandas.api.types import is_numeric_dtype
import inspect
import talib
import time
import numpy as np
import requests
import urllib.request
from numba import njit, prange
from tqdm import tqdm


# all tiny modular functions

In [8]:
def download_monthly_data(month_array, symbol, chart_time):
    #downloading monthly data
    root_dir = Path.cwd()
    # Create the new folder path
    folder_path = Path(download_dir) / f"{symbol}-{chart_time}-monthly_data"
    folder_path.mkdir(parents=True, exist_ok=True)
    count = 0
    for month in month_array:
        # Construct the link
        link = f"{BINANCE_MONTHLY_URL}{symbol}/{chart_time}/{symbol}-{chart_time}-{month}.zip"
        symbol_object = f"{symbol}-{chart_time}-{month}.zip"
        # Create the file path
        file_path = Path(folder_path) / symbol_object
        if not file_path.exists():
            try:
                # Download the file
                urllib.request.urlretrieve(link, file_path)
                count += 1
            except:
                #                     print(f'{link} not found')
                continue
    if count > 0:
        print(f"Monthly Data Downloaded for {symbol},{chart_time}")
    else:
        print(
            f"you're already up to date for monthly data for {symbol},{chart_time}"
        )


def download_daily_data(day_array, symbol, chart_time):
    #downloading daily data
    root_dir = Path.cwd()
    # Create the new folder path
    folder_path = Path(download_dir) / f"{symbol}-{chart_time}-daily_data"
    folder_path.mkdir(parents=True, exist_ok=True)
    count = 0
    for day in day_array:
        # Construct the link
        link = f"{BINANCE_DAILY_URL}{symbol}/{chart_time}/{symbol}-{chart_time}-{day}.zip"
        symbol_object = f"{symbol}-{chart_time}-{day}.zip"
        # Create the file path
        file_path = Path(folder_path) / symbol_object
        if not file_path.exists():
            try:
                # Download the file
                urllib.request.urlretrieve(link, file_path)
                count += 1
            except:
                #                     print(f'{link} not found')
                continue
    if count > 0:
        print(f"Daily Data Downloaded for {symbol},{chart_time}")
    else:
        print(
            f"you're already up to date for daily data for {symbol},{chart_time}"
        )


def construct_csv_file_path(folder_path,
                            symbol,
                            chart_time,
                            file,
                            is_daily=False):
    if is_daily:
        # For daily data, use a different pattern
        return os.path.join(
            folder_path,
            f"{symbol}-{chart_time}-{file.split('-')[-3]}-{file.split('-')[-2]}-{file.split('-')[-1][:-4]}.csv"
        )
    else:
        # For monthly data, use the original pattern
        return os.path.join(folder_path,
                            f"{symbol}-{chart_time}{file[-12:-4]}.csv")


def process_zip_folder(folder_path,
                       pattern,
                       new_csv_folder_path,
                       symbol,
                       chart_time,
                       df_list,
                       daily_array=None,
                       is_daily=False):
    if not os.path.exists(folder_path):
        print(f"Folder not found: {folder_path}")
        return df_list

    # Iterate over files in the directory
    for file_name in os.listdir(folder_path):
        # Check if the file matches the pattern
        if pattern.match(file_name):
            if is_daily and daily_array:
                # Check if any date in the daily_array is in the file_name
                if not any(date in file_name for date in daily_array):
                    continue

            # Construct the file path
            file_path = os.path.join(folder_path, file_name)

            # Extract the ZIP file
            try:
                with zipfile.ZipFile(file_path, 'r') as zip_ref:
                    zip_ref.extractall(new_csv_folder_path)
            except Exception as e:
                print(f"Error extracting file {file_name}: {e}")
                continue

            # Construct the CSV file path using the helper function
            csv_file_path = construct_csv_file_path(new_csv_folder_path,
                                                    symbol, chart_time,
                                                    file_name, is_daily)

            # Read the CSV file into a data frame, ignoring the headers
            try:
                df = pd.read_csv(csv_file_path, header=None)
                # Remove the first row (which contains the header)
                df = df.iloc[1:]
                # Add it to the list
                df_list.append(df)
            except Exception as e:
                print(f"Error reading file {csv_file_path}: {e}")
                continue

    return df_list


def get_correct_headers(new_csv_folder_path):
    possible_headers = [
        'open_time', 'open', 'high', 'low', 'close', 'volume', 'close_time',
        'quote_volume', 'count', 'taker_buy_volume', 'taker_buy_quote_volume',
        'ignore'
    ]

    for file_name in os.listdir(new_csv_folder_path):
        file_path = os.path.join(new_csv_folder_path, file_name)
        try:
            # Read the first row to get headers
            headers = pd.read_csv(file_path, nrows=1).columns.tolist()
            # Check if at least 2 headers match
            matches = [
                header for header in headers if header in possible_headers
            ]
            if len(matches) >= 2:
                print(f"Found matching headers in {file_name}: {matches}")
                return headers
        except Exception as e:
            print(f"Error processing file {file_name}: {e}")

    raise ValueError("Could not find matching headers in any CSV files.")


def concatenate_data_frames(df_list, new_csv_folder_path, symbol, chart_time):
    # Get correct headers from a CSV file
    try:
        correct_headers = get_correct_headers(new_csv_folder_path)
    except ValueError as e:
        print(e)
        return "Error finding headers"

    # Concatenate the data frames in the list
    df_final = pd.concat(df_list, ignore_index=True)

    # Check if df_final has headers or not
    if df_final.columns[0] not in correct_headers:
        print("Updating older CSVs with correct headers.")
        # Update headers for older CSVs that lack them
        for file_name in os.listdir(new_csv_folder_path):
            file_path = os.path.join(new_csv_folder_path, file_name)
            try:
                df_old = pd.read_csv(file_path, header=None)
                # Ensure we only update files with correct structure
                if len(df_old.columns) == len(correct_headers):
                    df_old.columns = correct_headers
                    df_old.to_csv(file_path, index=False)
            except Exception as e:
                print(f"Error updating file {file_name}: {e}")

    # Set the headers as the column names of the final dataframe
    df_final.columns = correct_headers

    # Convert 'open_time' and 'close_time' columns to datetime
    try:
        df_final['open_time'] = pd.to_datetime(
            df_final['open_time'],
            unit='ms').dt.tz_localize('UTC').dt.tz_convert('Asia/Kolkata')
        df_final['close_time'] = pd.to_datetime(
            df_final['close_time'],
            unit='ms').dt.tz_localize('UTC').dt.tz_convert('Asia/Kolkata')
    except KeyError as e:
        print(f"Column not found for conversion: {e}")
        return "Error in date conversion"

    # Delete the 'ignore' column if it exists
    if 'ignore' in df_final.columns:
        df_final = df_final.drop(['ignore'], axis=1)

    # Add a new column called 'entry' that will take previous close
    df_final['entry'] = df_final['open']

    # Set the file name
    concatenated_file_name = f"{symbol}-{chart_time}.csv"

    # Construct the file path
    concatenated_file_path = os.path.join(new_csv_folder_path,
                                          concatenated_file_name)

    # Write the data frame to the CSV file
    df_final.to_csv(concatenated_file_path, index=False)

    directory_final = Path(
        concatenated_file_path).parent  # Get the parent directory

    # Deleting all the other CSVs except the final concatenated file
    for file_path in directory_final.iterdir():
        if file_path.is_file():
            # Ensure we only delete individual CSVs used for concatenation
            if file_path.name.startswith(
                    f"{symbol}-{chart_time}-") and file_path.name.endswith(
                        '.csv') and file_path.name != concatenated_file_name:
                file_path.unlink()

    return "Data concatenated, individual CSVs deleted"


In [None]:
def calculate_indicators_using_talib(timeperiods, df):
    new_columns = pd.DataFrame()

    # List to store indicator columns
    indicator_columns = []
    indicator_columns.append(('HT_TRENDLINE', talib.HT_TRENDLINE(df['close'])))
    # indicator_columns.append(('MAMA', df['MAMA']), ('FAMA', df['FAMA']))
    # indicator_columns.append(('MAVP', df['MAVP']))
    indicator_columns.append(
        ('SAR', talib.SAR(df['high'], df['low'], acceleration=0, maximum=0)))
    indicator_columns.append(('SAREXT', talib.SAREXT(df['high'], df['low'])))
    indicator_columns.append(
        ('T3', talib.T3(df['close'], timeperiod=5, vfactor=0)))
    # Momentum Indicators
    indicator_columns.append(
        ('APO', talib.APO(df['close'], fastperiod=12, slowperiod=26)))
    indicator_columns.append(
        ('BOP', talib.BOP(df['open'], df['high'], df['low'], df['close'])))
    macd, macd_signal, macd_hist = talib.MACD(df['close'],
                                              fastperiod=12,
                                              slowperiod=26,
                                              signalperiod=9)
    indicator_columns.append(('MACD', macd))
    indicator_columns.append(('MACD_signal', macd_signal))
    indicator_columns.append(('MACD_hist', macd_hist))
    indicator_columns.append(
        ('PPO', talib.PPO(df['close'], fastperiod=12, slowperiod=26,
                          matype=0)))
    indicator_columns.append(('TRIX', talib.TRIX(df['close'])))
    indicator_columns.append(
        ('ULTOSC', talib.ULTOSC(df['high'], df['low'], df['close'])))
    indicator_columns.append(
        ('WILLR', talib.WILLR(df['high'], df['low'], df['close'])))

    #     # Not Working ATM
    #     indicator_columns.append(('STOCH', talib.STOCH(df['high'], df['low'], df['close'])))
    #     indicator_columns.append(('STOCHF', talib.STOCHF(df['high'], df['low'], df['close'])))
    #     indicator_columns.append(('STOCHRSI', talib.STOCHRSI(df['close'])))
    #     indicator_columns.append(('MACDEXT', talib.MACDEXT(df['close'], fastperiod=12, fastmatype=0, slowperiod=26, slowmatype=0, signalperiod=9, signalmatype=0)))
    #     indicator_columns.append(('MACDFIX', talib.MACDFIX(df['close'], signalperiod=9)))

    #########Volume Indicators
    indicator_columns.append(
        ('AD', talib.AD(df['high'], df['low'], df['close'], df['volume'])))
    indicator_columns.append(('ADOSC',
                              talib.ADOSC(df['high'],
                                          df['low'],
                                          df['close'],
                                          df['volume'],
                                          fastperiod=3,
                                          slowperiod=10)))
    indicator_columns.append(('OBV', talib.OBV(df['close'], df['volume'])))

    #########Cycle Indicators
    indicator_columns.append(('HT_DCPERIOD', talib.HT_DCPERIOD(df['close'])))
    indicator_columns.append(('HT_DCPHASE', talib.HT_DCPHASE(df['close'])))
    phasor_inphase, phasor_quadrature = talib.HT_PHASOR(df['close'])
    indicator_columns.append(('HT_PHASOR_inphase', phasor_inphase))
    indicator_columns.append(('HT_PHASOR_quadrature', phasor_quadrature))
    # indicator_columns.append(('HT_SINE', talib.HT_SINE(df['close'])))
    indicator_columns.append(('HT_TRENDMODE', talib.HT_TRENDMODE(df['close'])))

    #########Price Transform
    indicator_columns.append(('AVGPRICE',
                              talib.AVGPRICE(df['open'], df['high'], df['low'],
                                             df['close'])))
    indicator_columns.append(
        ('MEDPRICE', talib.MEDPRICE(df['high'], df['low'])))
    indicator_columns.append(
        ('TYPPRICE', talib.TYPPRICE(df['high'], df['low'], df['close'])))
    indicator_columns.append(
        ('WCLPRICE', talib.WCLPRICE(df['high'], df['low'], df['close'])))
    #########Volatility Indicators
    indicator_columns.append(
        ('TRANGE', talib.TRANGE(df['high'], df['low'], df['close'])))
    #########Pattern Recognition
    indicator_columns.append(('CDL2CROWS',
                              talib.CDL2CROWS(df['open'], df['high'],
                                              df['low'], df['close'])))
    indicator_columns.append(('CDL3BLACKCROWS',
                              talib.CDL3BLACKCROWS(df['open'], df['high'],
                                                   df['low'], df['close'])))
    indicator_columns.append(('CDL3INSIDE',
                              talib.CDL3INSIDE(df['open'], df['high'],
                                               df['low'], df['close'])))
    indicator_columns.append(('CDL3LINESTRIKE',
                              talib.CDL3LINESTRIKE(df['open'], df['high'],
                                                   df['low'], df['close'])))
    indicator_columns.append(('CDL3OUTSIDE',
                              talib.CDL3OUTSIDE(df['open'], df['high'],
                                                df['low'], df['close'])))
    indicator_columns.append(('CDL3STARSINSOUTH',
                              talib.CDL3STARSINSOUTH(df['open'], df['high'],
                                                     df['low'], df['close'])))
    indicator_columns.append(('CDL3WHITESOLDIERS',
                              talib.CDL3WHITESOLDIERS(df['open'], df['high'],
                                                      df['low'], df['close'])))
    indicator_columns.append(('CDLABANDONEDBABY',
                              talib.CDLABANDONEDBABY(df['open'],
                                                     df['high'],
                                                     df['low'],
                                                     df['close'],
                                                     penetration=0)))

    indicator_columns.append(('CDLADVANCEBLOCK',
                              talib.CDLADVANCEBLOCK(df['open'], df['high'],
                                                    df['low'], df['close'])))
    indicator_columns.append(('CDLBELTHOLD',
                              talib.CDLBELTHOLD(df['open'], df['high'],
                                                df['low'], df['close'])))
    indicator_columns.append(('CDLBREAKAWAY',
                              talib.CDLBREAKAWAY(df['open'], df['high'],
                                                 df['low'], df['close'])))
    indicator_columns.append(
        ('CDLCLOSINGMARUBOZU',
         talib.CDLCLOSINGMARUBOZU(df['open'], df['high'], df['low'],
                                  df['close'])))
    indicator_columns.append(
        ('CDLCONCEALBABYSWALL',
         talib.CDLCONCEALBABYSWALL(df['open'], df['high'], df['low'],
                                   df['close'])))
    indicator_columns.append(('CDLCOUNTERATTACK',
                              talib.CDLCOUNTERATTACK(df['open'], df['high'],
                                                     df['low'], df['close'])))
    indicator_columns.append(('CDLDARKCLOUDCOVER',
                              talib.CDLDARKCLOUDCOVER(df['open'],
                                                      df['high'],
                                                      df['low'],
                                                      df['close'],
                                                      penetration=0)))

    indicator_columns.append(('CDLDOJI',
                              talib.CDLDOJI(df['open'], df['high'], df['low'],
                                            df['close'])))
    indicator_columns.append(('CDLDOJISTAR',
                              talib.CDLDOJISTAR(df['open'], df['high'],
                                                df['low'], df['close'])))
    indicator_columns.append(('CDLDRAGONFLYDOJI',
                              talib.CDLDRAGONFLYDOJI(df['open'], df['high'],
                                                     df['low'], df['close'])))
    indicator_columns.append(('CDLENGULFING',
                              talib.CDLENGULFING(df['open'], df['high'],
                                                 df['low'], df['close'])))
    indicator_columns.append(('CDLEVENINGDOJISTAR',
                              talib.CDLEVENINGDOJISTAR(df['open'],
                                                       df['high'],
                                                       df['low'],
                                                       df['close'],
                                                       penetration=0)))

    indicator_columns.append(('CDLEVENINGSTAR',
                              talib.CDLEVENINGSTAR(df['open'],
                                                   df['high'],
                                                   df['low'],
                                                   df['close'],
                                                   penetration=0)))
    indicator_columns.append(
        ('CDLGAPSIDESIDEWHITE',
         talib.CDLGAPSIDESIDEWHITE(df['open'], df['high'], df['low'],
                                   df['close'])))
    indicator_columns.append(('CDLGRAVESTONEDOJI',
                              talib.CDLGRAVESTONEDOJI(df['open'], df['high'],
                                                      df['low'], df['close'])))
    indicator_columns.append(('CDLHAMMER',
                              talib.CDLHAMMER(df['open'], df['high'],
                                              df['low'], df['close'])))
    indicator_columns.append(('CDLHANGINGMAN',
                              talib.CDLHANGINGMAN(df['open'], df['high'],
                                                  df['low'], df['close'])))
    indicator_columns.append(('CDLHARAMI',
                              talib.CDLHARAMI(df['open'], df['high'],
                                              df['low'], df['close'])))
    indicator_columns.append(('CDLHARAMICROSS',
                              talib.CDLHARAMICROSS(df['open'], df['high'],
                                                   df['low'], df['close'])))
    indicator_columns.append(('CDLHIGHWAVE',
                              talib.CDLHIGHWAVE(df['open'], df['high'],
                                                df['low'], df['close'])))
    indicator_columns.append(('CDLHIKKAKE',
                              talib.CDLHIKKAKE(df['open'], df['high'],
                                               df['low'], df['close'])))
    indicator_columns.append(('CDLHIKKAKEMOD',
                              talib.CDLHIKKAKEMOD(df['open'], df['high'],
                                                  df['low'], df['close'])))
    indicator_columns.append(('CDLHOMINGPIGEON',
                              talib.CDLHOMINGPIGEON(df['open'], df['high'],
                                                    df['low'], df['close'])))
    indicator_columns.append(
        ('CDLIDENTICAL3CROWS',
         talib.CDLIDENTICAL3CROWS(df['open'], df['high'], df['low'],
                                  df['close'])))
    indicator_columns.append(('CDLINNECK',
                              talib.CDLINNECK(df['open'], df['high'],
                                              df['low'], df['close'])))
    indicator_columns.append(('CDLINVERTEDHAMMER',
                              talib.CDLINVERTEDHAMMER(df['open'], df['high'],
                                                      df['low'], df['close'])))
    indicator_columns.append(('CDLKICKING',
                              talib.CDLKICKING(df['open'], df['high'],
                                               df['low'], df['close'])))
    indicator_columns.append(
        ('CDLKICKINGBYLENGTH',
         talib.CDLKICKINGBYLENGTH(df['open'], df['high'], df['low'],
                                  df['close'])))
    indicator_columns.append(('CDLLADDERBOTTOM',
                              talib.CDLLADDERBOTTOM(df['open'], df['high'],
                                                    df['low'], df['close'])))
    indicator_columns.append(('CDLLONGLEGGEDDOJI',
                              talib.CDLLONGLEGGEDDOJI(df['open'], df['high'],
                                                      df['low'], df['close'])))
    indicator_columns.append(('CDLLONGLINE',
                              talib.CDLLONGLINE(df['open'], df['high'],
                                                df['low'], df['close'])))
    indicator_columns.append(('CDLMARUBOZU',
                              talib.CDLMARUBOZU(df['open'], df['high'],
                                                df['low'], df['close'])))
    indicator_columns.append(('CDLMATCHINGLOW',
                              talib.CDLMATCHINGLOW(df['open'], df['high'],
                                                   df['low'], df['close'])))
    indicator_columns.append(('CDLMATHOLD',
                              talib.CDLMATHOLD(df['open'],
                                               df['high'],
                                               df['low'],
                                               df['close'],
                                               penetration=0)))
    indicator_columns.append(
        ('CDLMORNINGDOJISTAR',
         talib.CDLMORNINGDOJISTAR(df['open'], df['high'], df['low'],
                                  df['close'])))
    indicator_columns.append(('CDLMORNINGSTAR',
                              talib.CDLMORNINGSTAR(df['open'], df['high'],
                                                   df['low'], df['close'])))
    indicator_columns.append(('CDLONNECK',
                              talib.CDLONNECK(df['open'], df['high'],
                                              df['low'], df['close'])))
    indicator_columns.append(('CDLPIERCING',
                              talib.CDLPIERCING(df['open'], df['high'],
                                                df['low'], df['close'])))
    indicator_columns.append(('CDLRICKSHAWMAN',
                              talib.CDLRICKSHAWMAN(df['open'], df['high'],
                                                   df['low'], df['close'])))
    indicator_columns.append(
        ('CDLRISEFALL3METHODS',
         talib.CDLRISEFALL3METHODS(df['open'], df['high'], df['low'],
                                   df['close'])))
    indicator_columns.append(
        ('CDLSEPARATINGLINES',
         talib.CDLSEPARATINGLINES(df['open'], df['high'], df['low'],
                                  df['close'])))
    indicator_columns.append(('CDLSHOOTINGSTAR',
                              talib.CDLSHOOTINGSTAR(df['open'], df['high'],
                                                    df['low'], df['close'])))
    indicator_columns.append(('CDLSHORTLINE',
                              talib.CDLSHORTLINE(df['open'], df['high'],
                                                 df['low'], df['close'])))
    indicator_columns.append(('CDLSPINNINGTOP',
                              talib.CDLSPINNINGTOP(df['open'], df['high'],
                                                   df['low'], df['close'])))
    indicator_columns.append(('CDLSTALLEDPATTERN',
                              talib.CDLSTALLEDPATTERN(df['open'], df['high'],
                                                      df['low'], df['close'])))
    indicator_columns.append(('CDLSTICKSANDWICH',
                              talib.CDLSTICKSANDWICH(df['open'], df['high'],
                                                     df['low'], df['close'])))
    indicator_columns.append(('CDLTAKURI',
                              talib.CDLTAKURI(df['open'], df['high'],
                                              df['low'], df['close'])))
    indicator_columns.append(('CDLTASUKIGAP',
                              talib.CDLTASUKIGAP(df['open'], df['high'],
                                                 df['low'], df['close'])))
    indicator_columns.append(('CDLTHRUSTING',
                              talib.CDLTHRUSTING(df['open'], df['high'],
                                                 df['low'], df['close'])))
    indicator_columns.append(('CDLTRISTAR',
                              talib.CDLTRISTAR(df['open'], df['high'],
                                               df['low'], df['close'])))
    indicator_columns.append(('CDLUNIQUE3RIVER',
                              talib.CDLUNIQUE3RIVER(df['open'], df['high'],
                                                    df['low'], df['close'])))
    indicator_columns.append(
        ('CDLUPSIDEGAP2CROWS',
         talib.CDLUPSIDEGAP2CROWS(df['open'], df['high'], df['low'],
                                  df['close'])))
    indicator_columns.append(
        ('CDLXSIDEGAP3METHODS',
         talib.CDLXSIDEGAP3METHODS(df['open'], df['high'], df['low'],
                                   df['close'])))
    #########Statistic Functions
    indicator_columns.append(('LINEARREG', talib.LINEARREG(df['close'])))
    indicator_columns.append(
        ('LINEARREG_ANGLE', talib.LINEARREG_ANGLE(df['close'])))
    indicator_columns.append(
        ('LINEARREG_INTERCEPT', talib.LINEARREG_INTERCEPT(df['close'])))
    indicator_columns.append(
        ('LINEARREG_SLOPE', talib.LINEARREG_SLOPE(df['close'])))
    # new_columns['STDDEV'] = df['close'].rolling(timeperiod).std()
    indicator_columns.append(('TSF', talib.TSF(df['close'])))
    indicator_columns.append(('VAR', talib.VAR(df['close'])))
    # Iterate over the time periods
    for timeperiod in timeperiods:
        #########Overlap Studies
        indicator_columns.append((f'BB_upper_{timeperiod}',
                                  talib.BBANDS(df['close'],
                                               timeperiod=timeperiod)))
        indicator_columns.append((f'BB_middle_{timeperiod}',
                                  talib.BBANDS(df['close'],
                                               timeperiod=timeperiod)))
        indicator_columns.append((f'BB_lower_{timeperiod}',
                                  talib.BBANDS(df['close'],
                                               timeperiod=timeperiod)))
        indicator_columns.append((f'DEMA_{timeperiod}',
                                  talib.DEMA(df['close'],
                                             timeperiod=timeperiod)))
        indicator_columns.append(
            (f'EMA_{timeperiod}', talib.EMA(df['close'],
                                            timeperiod=timeperiod)))
        indicator_columns.append((f'KAMA_{timeperiod}',
                                  talib.KAMA(df['close'],
                                             timeperiod=timeperiod)))
        indicator_columns.append(
            (f'MA_{timeperiod}', talib.MA(df['close'], timeperiod=timeperiod)))
        # new_columns['MAMA'], new_columns['FAMA'] = talib.MAMA(df['close'], fastlimit=0, slowlimit=0)
        # new_columns['MAVP'] = talib.MAVP(df['close'], periods=None, minperiod=2, maxperiod=30, matype=0)
        indicator_columns.append((f'MIDPOINT_{timeperiod}',
                                  talib.MIDPOINT(df['close'],
                                                 timeperiod=timeperiod)))
        indicator_columns.append((f'MIDPRICE_{timeperiod}',
                                  talib.MIDPRICE(df['high'],
                                                 df['low'],
                                                 timeperiod=timeperiod)))
        indicator_columns.append(
            (f'SMA_{timeperiod}', talib.SMA(df['close'],
                                            timeperiod=timeperiod)))
        indicator_columns.append((f'TEMA_{timeperiod}',
                                  talib.TEMA(df['close'],
                                             timeperiod=timeperiod)))
        indicator_columns.append((f'TRIMA_{timeperiod}',
                                  talib.TRIMA(df['close'],
                                              timeperiod=timeperiod)))
        indicator_columns.append(
            (f'WMA_{timeperiod}', talib.WMA(df['close'],
                                            timeperiod=timeperiod)))
        indicator_columns.append((f'ADX_{timeperiod}',
                                  talib.ADX(df['high'],
                                            df['low'],
                                            df['close'],
                                            timeperiod=timeperiod)))
        indicator_columns.append((f'ADXR_{timeperiod}',
                                  talib.ADXR(df['high'],
                                             df['low'],
                                             df['close'],
                                             timeperiod=timeperiod)))
        aroon_up, aroon_down = talib.AROON(df['high'],
                                           df['low'],
                                           timeperiod=timeperiod)
        indicator_columns.append((f'AROON_up_{timeperiod}', aroon_up))
        indicator_columns.append((f'AROON_down_{timeperiod}', aroon_down))
        indicator_columns.append((f'AROONOSC_{timeperiod}',
                                  talib.AROONOSC(df['high'],
                                                 df['low'],
                                                 timeperiod=timeperiod)))
        indicator_columns.append((f'CCI_{timeperiod}',
                                  talib.CCI(df['high'],
                                            df['low'],
                                            df['close'],
                                            timeperiod=timeperiod)))
        indicator_columns.append(
            (f'CMO_{timeperiod}', talib.CMO(df['close'],
                                            timeperiod=timeperiod)))
        indicator_columns.append((f'DX_{timeperiod}',
                                  talib.DX(df['high'],
                                           df['low'],
                                           df['close'],
                                           timeperiod=timeperiod)))
        indicator_columns.append((f'MFI_{timeperiod}',
                                  talib.MFI(df['high'],
                                            df['low'],
                                            df['close'],
                                            df['volume'],
                                            timeperiod=timeperiod)))
        indicator_columns.append((f'MINUS_DI_{timeperiod}',
                                  talib.MINUS_DI(df['high'],
                                                 df['low'],
                                                 df['close'],
                                                 timeperiod=timeperiod)))
        indicator_columns.append((f'MINUS_DM_{timeperiod}',
                                  talib.MINUS_DM(df['high'],
                                                 df['low'],
                                                 timeperiod=timeperiod)))
        indicator_columns.append(
            (f'MOM_{timeperiod}', talib.MOM(df['close'],
                                            timeperiod=timeperiod)))
        indicator_columns.append((f'PLUS_DI_{timeperiod}',
                                  talib.PLUS_DI(df['high'],
                                                df['low'],
                                                df['close'],
                                                timeperiod=timeperiod)))
        indicator_columns.append((f'PLUS_DM_{timeperiod}',
                                  talib.PLUS_DM(df['high'],
                                                df['low'],
                                                timeperiod=timeperiod)))
        indicator_columns.append(
            (f'ROC_{timeperiod}', talib.ROC(df['close'],
                                            timeperiod=timeperiod)))
        indicator_columns.append((f'ROCP_{timeperiod}',
                                  talib.ROCP(df['close'],
                                             timeperiod=timeperiod)))
        indicator_columns.append((f'ROCR_{timeperiod}',
                                  talib.ROCR(df['close'],
                                             timeperiod=timeperiod)))
        indicator_columns.append((f'ROCR100_{timeperiod}',
                                  talib.ROCR100(df['close'],
                                                timeperiod=timeperiod)))
        indicator_columns.append(
            (f'RSI_{timeperiod}', talib.RSI(df['close'],
                                            timeperiod=timeperiod)))

        indicator_columns.append((f'ATR_{timeperiod}',
                                  talib.ATR(df['high'],
                                            df['low'],
                                            df['close'],
                                            timeperiod=timeperiod)))
        indicator_columns.append((f'NATR_{timeperiod}',
                                  talib.NATR(df['high'],
                                             df['low'],
                                             df['close'],
                                             timeperiod=timeperiod)))
        #########Statistic Functions
        indicator_columns.append((f'BETA_{timeperiod}',
                                  talib.BETA(df['high'],
                                             df['low'],
                                             timeperiod=timeperiod)))
        indicator_columns.append((f'CORREL_{timeperiod}',
                                  talib.CORREL(df['high'],
                                               df['low'],
                                               timeperiod=timeperiod)))
    new_columns = pd.concat([
        pd.DataFrame(data, columns=[name]) for name, data in indicator_columns
    ],
                            axis=1)
    return new_columns

In [None]:
def calculate_indicators_using_talib_new(timeperiods, df):
    def apply_indicator(func, *args, **kwargs):
        return func(*args, **kwargs)

    def apply_indicator_with_timeperiod(func, *args, **kwargs):
        return [
            (f"{func.__name__}_{timeperiod}", apply_indicator(func, *args, timeperiod=timeperiod, **kwargs))
            for timeperiod in timeperiods
        ]

    indicator_columns = [
        ('HT_TRENDLINE', talib.HT_TRENDLINE(df['close'])),
        ('SAR', talib.SAR(df['high'], df['low'], acceleration=0, maximum=0)),
        ('SAREXT', talib.SAREXT(df['high'], df['low'])),
        ('T3', talib.T3(df['close'], timeperiod=5, vfactor=0)),
        ('APO', talib.APO(df['close'], fastperiod=12, slowperiod=26)),
        ('BOP', talib.BOP(df['open'], df['high'], df['low'], df['close'])),
        *zip(['MACD', 'MACD_signal', 'MACD_hist'], talib.MACD(df['close'], fastperiod=12, slowperiod=26, signalperiod=9)),
        ('PPO', talib.PPO(df['close'], fastperiod=12, slowperiod=26, matype=0)),
        ('TRIX', talib.TRIX(df['close'])),
        ('ULTOSC', talib.ULTOSC(df['high'], df['low'], df['close'])),
        ('WILLR', talib.WILLR(df['high'], df['low'], df['close'])),
        ('AD', talib.AD(df['high'], df['low'], df['close'], df['volume'])),
        ('ADOSC', talib.ADOSC(df['high'], df['low'], df['close'], df['volume'], fastperiod=3, slowperiod=10)),
        ('OBV', talib.OBV(df['close'], df['volume'])),
        ('HT_DCPERIOD', talib.HT_DCPERIOD(df['close'])),
        ('HT_DCPHASE', talib.HT_DCPHASE(df['close'])),
        *zip(['HT_PHASOR_inphase', 'HT_PHASOR_quadrature'], talib.HT_PHASOR(df['close'])),
        ('HT_TRENDMODE', talib.HT_TRENDMODE(df['close'])),
        ('AVGPRICE', talib.AVGPRICE(df['open'], df['high'], df['low'], df['close'])),
        ('MEDPRICE', talib.MEDPRICE(df['high'], df['low'])),
        ('TYPPRICE', talib.TYPPRICE(df['high'], df['low'], df['close'])),
        ('WCLPRICE', talib.WCLPRICE(df['high'], df['low'], df['close'])),
        ('TRANGE', talib.TRANGE(df['high'], df['low'], df['close'])),
    ]

    # Add pattern recognition indicators
    pattern_funcs = [getattr(talib, f) for f in dir(talib) if f.startswith('CDL')]
    indicator_columns.extend([
        (f.__name__, apply_indicator(f, df['open'], df['high'], df['low'], df['close']))
        for f in pattern_funcs
    ])

    # Add statistic functions
    stat_funcs = [talib.LINEARREG, talib.LINEARREG_ANGLE, talib.LINEARREG_INTERCEPT, talib.LINEARREG_SLOPE, talib.TSF, talib.VAR]
    indicator_columns.extend([
        (f.__name__, apply_indicator(f, df['close']))
        for f in stat_funcs
    ])

    # Add indicators with timeperiods
    timeperiod_funcs = [
        (talib.BBANDS, ['high', 'low', 'close']),
        (talib.DEMA, ['close']),
        (talib.EMA, ['close']),
        (talib.KAMA, ['close']),
        (talib.MA, ['close']),
        (talib.MIDPOINT, ['close']),
        (talib.MIDPRICE, ['high', 'low']),
        (talib.SMA, ['close']),
        (talib.TEMA, ['close']),
        (talib.TRIMA, ['close']),
        (talib.WMA, ['close']),
        (talib.ADX, ['high', 'low', 'close']),
        (talib.ADXR, ['high', 'low', 'close']),
        (talib.AROON, ['high', 'low']),
        (talib.AROONOSC, ['high', 'low']),
        (talib.CCI, ['high', 'low', 'close']),
        (talib.CMO, ['close']),
        (talib.DX, ['high', 'low', 'close']),
        (talib.MFI, ['high', 'low', 'close', 'volume']),
        (talib.MINUS_DI, ['high', 'low', 'close']),
        (talib.MINUS_DM, ['high', 'low']),
        (talib.MOM, ['close']),
        (talib.PLUS_DI, ['high', 'low', 'close']),
        (talib.PLUS_DM, ['high', 'low']),
        (talib.ROC, ['close']),
        (talib.ROCP, ['close']),
        (talib.ROCR, ['close']),
        (talib.ROCR100, ['close']),
        (talib.RSI, ['close']),
        (talib.ATR, ['high', 'low', 'close']),
        (talib.NATR, ['high', 'low', 'close']),
        (talib.BETA, ['high', 'low']),
        (talib.CORREL, ['high', 'low']),
    ]

    for func, columns in timeperiod_funcs:
        indicator_columns.extend(apply_indicator_with_timeperiod(func, *[df[col] for col in columns]))

    return pd.concat([pd.DataFrame(data, columns=[name]) for name, data in indicator_columns], axis=1)

# new download and concatenate data but with modularity

In [9]:
def download_data_and_concatenate(master_dictionary, month_array, day_array):
    for symbol in master_dictionary["symbols"]:
        for chart_time in master_dictionary["chart_times"]:
            print(f"Setting up things for {symbol}, {chart_time}")

            # Set up an empty list for the data frames
            df_list = []

            # Compile the regular expression pattern
            pattern = re.compile(rf"^{symbol}-{chart_time}-\d{{4}}-\d{{2}}\.zip$")

            # Compile the regular expression pattern for daily zip files
            pattern_daily = re.compile(
                rf"^{symbol}-{chart_time}-\d{{4}}-\d{{2}}-\d{{2}}\.zip$")

            # Create the new folder path for daily ZIP files
            new_daily_zip_folder_path = os.path.join(
                download_dir, f"{symbol}-{chart_time}-daily_data")

            # Create the new folder path for ZIP files
            new_monthly_zip_folder_path = os.path.join(
                download_dir, f"{symbol}-{chart_time}-monthly_data")

            # Create the new folder path for CSV files
            new_csv_folder_path = os.path.join(output_dir,
                                               f"{symbol}-{chart_time}")

            # Set the file name
            concatenated_file_name = f"{symbol}-{chart_time}.csv"

            # Construct the file path
            concatenated_file_path = os.path.join(new_csv_folder_path,
                                                  concatenated_file_name)

            download_monthly_data(month_array, symbol, chart_time)  
            download_daily_data(day_array, symbol, chart_time)  

            # Process the monthly ZIP folder and add to df_list
            df_list = process_zip_folder(
                new_monthly_zip_folder_path, 
                pattern, 
                new_csv_folder_path, 
                symbol, 
                chart_time, 
                df_list,
                day_array,
            )

            # Process the daily ZIP folder and add to df_list
            df_list = process_zip_folder(
                new_daily_zip_folder_path, 
                pattern_daily, 
                new_csv_folder_path, 
                symbol, 
                chart_time, 
                df_list, 
                day_array,  
                is_daily=True
            )

            # Call the function to concatenate and process the data frames
            print(concatenate_data_frames(df_list, new_csv_folder_path, symbol, chart_time))
    return "Data downloaded and concatenated"


In [None]:
def calculate_wins_losses_old(master_dictionary, win_perc=0.73, loss_perc=0.4):
    for symbol in master_dictionary["symbols"]:
        for chart_time in master_dictionary["chart_times"]:
            try:
                print(f"Calculating for {symbol} {chart_time}")

                # Define the directory for processed data
                processed_data_dir = Path(output_dir) / f"{symbol}-{chart_time}/processed_data"

                # Create the directory if it doesn't exist
                if not processed_data_dir.exists():
                    processed_data_dir.mkdir(parents=True, exist_ok=True)

                # Construct the file name
                og_file_name = f"{symbol}-{chart_time}.csv"
                og_file_path = Path(output_dir) / f"{symbol}-{chart_time}/{og_file_name}"
                new_file_name = f"{symbol}-{chart_time}_W{win_perc}_L{loss_perc}.csv"
                new_file_path = processed_data_dir / new_file_name

                # Read the CSV file into a dataframe
                df = pd.read_csv(og_file_path)

                # Initialize new columns
                df["if_short"] = 0
                df["if_long"] = 0
                df["long_target"] = np.nan
                df["short_target"] = np.nan
                df["long_stop_loss"] = np.nan
                df["short_stop_loss"] = np.nan
                df["shorts_win_after"] = np.nan
                df["longs_win_after"] = np.nan
#                 df["dual_loss"] = 0
#                 df["entered_before"] = np.nan


                # Calculate targets and stop losses, then determine wins and losses
                for i in tqdm(range(len(df)), desc=f"Processing {symbol}-{chart_time}", unit='row'):
                    if pd.notna(df.loc[i, 'entry']):
                        long_target = df.loc[i, 'entry'] * (1 + win_perc / 100)
                        long_stop_loss = df.loc[i, 'entry'] * (1 - loss_perc / 100)
                        short_target = df.loc[i, 'entry'] * (1 - win_perc / 100)
                        short_stop_loss = df.loc[i, 'entry'] * (1 + loss_perc / 100)

                        # Initialize columns for current row
                        df.loc[i, 'if_long'] = np.nan
                        df.loc[i, 'longs_win_after'] = np.nan
                        df.loc[i, 'if_short'] = np.nan
                        df.loc[i, 'shorts_win_after'] = np.nan
                        df.loc[i, 'dual_loss'] = 0
                        df.loc[i, 'entered_before'] = np.nan
                        df.loc[i, 'long_target'] = long_target
                        df.loc[i, 'long_stop_loss'] = long_stop_loss
                        df.loc[i, 'short_stop_loss'] = short_stop_loss
                        

                        # Evaluate long trades
                        for j in range(i, len(df)):
                            if df.loc[j, 'high'] >= long_target:
                                if df.loc[j, 'low'] <= long_stop_loss:
                                    df.loc[i, 'if_long'] = -1
#                                     df.loc[i, 'dual_loss'] = 1
#                                     df.loc[i, 'entered_before'] = j - i
                                else:
                                    df.loc[i, 'if_long'] = 1
                                    df.loc[i, 'longs_win_after'] = j - i
                                break
                            elif df.loc[j, 'low'] <= long_stop_loss:
                                df.loc[i, 'if_long'] = -1
                                break
                        df.loc[i, 'short_target'] = short_target
                        # Evaluate short trades
                        for j in range(i, len(df)):
                            if df.loc[j, 'low'] <= short_target:
                                if df.loc[j, 'high'] >= short_stop_loss:
                                    df.loc[i, 'if_short'] = -1
#                                     df.loc[i, 'dual_loss'] = 1
#                                     df.loc[i, 'entered_before'] = j - i
                                else:
                                    df.loc[i, 'if_short'] = 1
                                    df.loc[i, 'shorts_win_after'] = j - i
                                break
                            elif df.loc[j, 'high'] >= short_stop_loss:
                                df.loc[i, 'if_short'] = -1
                                break

                # Save the processed data
                df.to_csv(new_file_path, index=False)
                print(f"Processed file saved as {new_file_name}")

            except Exception as e:
                print(f"Error processing {symbol} {chart_time}: {e}")



In [None]:
def calculate_wins_losses(master_dictionary, win_perc=0.73, loss_perc=0.4, lookahead_window=10000):
    for symbol in master_dictionary["symbols"]:
        for chart_time in master_dictionary["chart_times"]:
            try:
                print(f"Calculating for {symbol} {chart_time}")

                # Define the directory for processed data
                processed_data_dir = Path(output_dir) / f"{symbol}-{chart_time}/processed_data"
                processed_data_dir.mkdir(parents=True, exist_ok=True)

                # Construct the file names
                og_file_name = f"{symbol}-{chart_time}.csv"
                og_file_path = Path(output_dir) / f"{symbol}-{chart_time}/{og_file_name}"
                new_file_name = f"{symbol}-{chart_time}_W{win_perc}_L{loss_perc}_{lookahead_window}cdls.csv"
                new_file_path = processed_data_dir / new_file_name

                # Read the CSV file into a dataframe
                df = pd.read_csv(og_file_path, usecols=['entry', 'high', 'low', 'open', 'close'])

                # Initialize new columns
                df["if_short"] = np.nan
                df["if_long"] = np.nan
                df["long_target"] = df["entry"] * (1 + win_perc / 100)
                df["short_target"] = df["entry"] * (1 - win_perc / 100)
                df["long_stop_loss"] = df["entry"] * (1 - loss_perc / 100)
                df["short_stop_loss"] = df["entry"] * (1 + loss_perc / 100)
                df["shorts_win_after"] = np.nan
                df["longs_win_after"] = np.nan

                # Convert DataFrame columns to numpy arrays for faster processing
                highs = df['high'].values
                lows = df['low'].values
                long_targets = df['long_target'].values
                short_targets = df['short_target'].values
                long_stop_losses = df['long_stop_loss'].values
                short_stop_losses = df['short_stop_loss'].values

                # Process each entry row by row
                for i in tqdm(range(len(df)), desc="Processing Rows", unit='row'):
                    # Define the lookahead window range
                    lookahead_end = min(i + lookahead_window, len(df))
                    
                    # Slice the future highs and lows from current index onwards, respecting the lookahead window
                    future_highs = highs[i:lookahead_end]
                    future_lows = lows[i:lookahead_end]

                    # Early stopping for performance improvement
                    long_hit_idx = np.argmax(future_highs >= long_targets[i]) if np.any(future_highs >= long_targets[i]) else np.nan
                    long_stop_idx = np.argmax(future_lows <= long_stop_losses[i]) if np.any(future_lows <= long_stop_losses[i]) else np.nan

                    short_hit_idx = np.argmax(future_lows <= short_targets[i]) if np.any(future_lows <= short_targets[i]) else np.nan
                    short_stop_idx = np.argmax(future_highs >= short_stop_losses[i]) if np.any(future_highs >= short_stop_losses[i]) else np.nan

                    # Long trade logic
                    if not np.isnan(long_hit_idx) and (np.isnan(long_stop_idx) or long_hit_idx < long_stop_idx):
                        df.at[i, 'if_long'] = 1
                        df.at[i, 'longs_win_after'] = long_hit_idx
                    elif not np.isnan(long_stop_idx):
                        df.at[i, 'if_long'] = -1

                    # Short trade logic
                    if not np.isnan(short_hit_idx) and (np.isnan(short_stop_idx) or short_hit_idx < short_stop_idx):
                        df.at[i, 'if_short'] = 1
                        df.at[i, 'shorts_win_after'] = short_hit_idx
                    elif not np.isnan(short_stop_idx):
                        df.at[i, 'if_short'] = -1

                # Save the processed data
                df.to_csv(new_file_path, index=False)
                print(f"Processed file saved as {new_file_name}")

            except Exception as e:
                print(f"Error processing {symbol} {chart_time}: {e}")



In [1]:
def calculate_wins_losses_optimized(master_dictionary, win_perc=0.73, loss_perc=0.4, lookahead_window=10000):
    for symbol in master_dictionary["symbols"]:
        for chart_time in master_dictionary["chart_times"]:
            try:
                print(f"Calculating for {symbol} {chart_time}")

                # Define the directory for processed data
                processed_data_dir = Path(output_dir) / f"{symbol}-{chart_time}/processed_data"
                processed_data_dir.mkdir(parents=True, exist_ok=True)

                # Construct the file names
                og_file_name = f"{symbol}-{chart_time}.csv"
                og_file_path = Path(output_dir) / f"{symbol}-{chart_time}/{og_file_name}"
                new_file_name = f"{symbol}-{chart_time}_W{win_perc}_L{loss_perc}_{lookahead_window}cdls.csv"
                new_file_path = processed_data_dir / new_file_name

                # Read the CSV file into a dataframe
                df = pd.read_csv(og_file_path, usecols=['entry', 'high', 'low', 'open', 'close'])

                # Initialize new columns
                df["if_short"] = np.nan
                df["if_long"] = np.nan
                df["long_target"] = df["entry"] * (1 + win_perc / 100)
                df["short_target"] = df["entry"] * (1 - win_perc / 100)
                df["long_stop_loss"] = df["entry"] * (1 - loss_perc / 100)
                df["short_stop_loss"] = df["entry"] * (1 + loss_perc / 100)
                df["shorts_win_after"] = np.nan
                df["longs_win_after"] = np.nan

                # Convert DataFrame columns to numpy arrays for faster processing
                highs = df['high'].values
                lows = df['low'].values
                long_targets = df['long_target'].values
                short_targets = df['short_target'].values
                long_stop_losses = df['long_stop_loss'].values
                short_stop_losses = df['short_stop_loss'].values

                # Prepare result arrays
                if_long_results = np.full(len(df), np.nan)
                if_short_results = np.full(len(df), np.nan)
                longs_win_after = np.full(len(df), np.nan)
                shorts_win_after = np.full(len(df), np.nan)

                # Process each entry row by row
                for i in tqdm(range(len(df)), desc="Processing Rows", unit='row'):
                    # Define the lookahead window range
                    lookahead_end = min(i + lookahead_window, len(df))
                    
                    # Slice the future highs and lows from current index onwards, respecting the lookahead window
                    future_highs = highs[i:lookahead_end]
                    future_lows = lows[i:lookahead_end]

                    # Early stopping for performance improvement
                    long_hit_idx = np.argmax(future_highs >= long_targets[i]) if np.any(future_highs >= long_targets[i]) else np.nan
                    long_stop_idx = np.argmax(future_lows <= long_stop_losses[i]) if np.any(future_lows <= long_stop_losses[i]) else np.nan

                    short_hit_idx = np.argmax(future_lows <= short_targets[i]) if np.any(future_lows <= short_targets[i]) else np.nan
                    short_stop_idx = np.argmax(future_highs >= short_stop_losses[i]) if np.any(future_highs >= short_stop_losses[i]) else np.nan

                    # Long trade logic
                    if not np.isnan(long_hit_idx) and (np.isnan(long_stop_idx) or long_hit_idx < long_stop_idx):
                        if_long_results[i] = 1
                        longs_win_after[i] = long_hit_idx
                    elif not np.isnan(long_stop_idx):
                        if_long_results[i] = -1

                    # Short trade logic
                    if not np.isnan(short_hit_idx) and (np.isnan(short_stop_idx) or short_hit_idx < short_stop_idx):
                        if_short_results[i] = 1
                        shorts_win_after[i] = short_hit_idx
                    elif not np.isnan(short_stop_idx):
                        if_short_results[i] = -1

                # Update DataFrame with results
                df['if_short'] = if_short_results
                df['if_long'] = if_long_results
                df['shorts_win_after'] = shorts_win_after
                df['longs_win_after'] = longs_win_after

                # Save the processed data
                df.to_csv(new_file_path, index=False)
                print(f"Processed file saved as {new_file_name}")

            except Exception as e:
                print(f"Error processing {symbol} {chart_time}: {e}")


In [2]:
def calculate_indicator_values(master_dictionary, win_perc=0.73, loss_perc=0.4):
    # Iterate over the symbols and chart times
    for symbol in master_dictionary["symbols"]:
        for chart_time in master_dictionary["chart_times"]:
            # Define the directory for processed data
            data_dir = Path(output_dir) / f"{symbol}-{chart_time}"
            
            # Construct the file name and path
            file_name = f"{symbol}-{chart_time}-yo.csv"
            file_path = data_dir / file_name
            
            if not file_path.exists():
                print(f"File path for {file_name} doesn't exist. Skipping.")
                continue
            
            # Read the CSV file into a dataframe
            df = pd.read_csv(file_path)
            print(df.dtypes)
            
            # Calculate indicators using TA-Lib
            new_columns = calculate_indicators_using_talib_new(master_dictionary["timeperiods"], df)
            
            # Save the updated dataframe to the CSV file
            df = pd.concat([df, new_columns], axis=1)
            df.to_csv(file_path, index=False)

    return "Indicators are added to the CSV"

In [None]:
print("data creation utilities successfully initialized")
