In [1]:
import pandas as pd

# Load the CSV file using the full path
file_path_1 = r"C:\Users\Pranav Darekar\Documents\all_coins_ohlcv_filtered"
df_ohlcv = pd.read_csv(file_path_1)

file_path_2 = r"C:\Users\Pranav Darekar\Documents\crypto_listings_latest_sorted"
df_crypto_listings_sorted = pd.read_csv(file_path_2)


In [2]:
# Set the slug column as the index for both DataFrames
df_ohlcv.set_index('symbol', inplace=True)
df_crypto_listings_sorted.set_index('symbol', inplace=True)

# Perform an inner join on cmc_rank
df = df_crypto_listings_sorted[['cmc_rank']].join(df_ohlcv, how='inner')

# Filter rows where 'cmc_rank' is between 1 and 100 inclusive
df_raw = df[(df['cmc_rank'] >= 1) & (df['cmc_rank'] <= 100)]


In [3]:
df = df_raw
# Ensure the timestamp column is in datetime format
df['timestamp'] = pd.to_datetime(df['timestamp'])

# Sort the DataFrame by 'slug' and 'timestamp' columns
df.sort_values(by=['slug', 'timestamp'], inplace=True)

# Perform time-series calculations within each group (each cryptocurrency)
grouped = df.groupby('slug')
# Calculate percentage change for each cryptocurrency
df['m_pct_1d'] = grouped['close'].pct_change()

# Calculate cumulative returns for each cryptocurrency
df['d_pct_cum_ret'] = (1 + df['m_pct_1d']).groupby(df['slug']).cumprod() - 1

In [4]:
import pandas as pd

# Define the function to calculate MACD
def calculate_macd(group):
    # Calculate the 12-day EMA
    group['EMA_12'] = group['close'].ewm(span=12, adjust=False).mean()

    # Calculate the 26-day EMA
    group['EMA_26'] = group['close'].ewm(span=26, adjust=False).mean()

    # Calculate the MACD line
    group['MACD'] = group['EMA_12'] - group['EMA_26']

    # Calculate the Signal line (9-day EMA of the MACD line)
    group['Signal'] = group['MACD'].ewm(span=9, adjust=False).mean()

    return group

# Apply this function to each group (coin)
df = df.groupby('slug').apply(calculate_macd).reset_index(level=0, drop=True)


In [5]:
df.head()

Unnamed: 0_level_0,cmc_rank,id,slug,name,timestamp,ref_cur_id,ref_cur_name,time_open,time_close,time_high,...,low,close,volume,market_cap,m_pct_1d,d_pct_cum_ret,EMA_12,EMA_26,MACD,Signal
symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
AAVE,58,7278.0,aave,Aave,2020-10-03 23:59:59,2781.0,USD,2020-10-03,2020-10-03 23:59:59,2020-10-03 18:11:38,...,0.523801,53.151488,0.0,0.0,,,53.151488,53.151488,0.0,0.0
AAVE,58,7278.0,aave,Aave,2020-10-04 23:59:59,2781.0,USD,2020-10-04,2020-10-04 23:59:59,2020-10-04 13:07:13,...,50.68899,52.675035,0.0,0.0,-0.008964,-0.008964,53.078187,53.116195,-0.038008,-0.007602
AAVE,58,7278.0,aave,Aave,2020-10-05 23:59:59,2781.0,USD,2020-10-05,2020-10-05 23:59:59,2020-10-05 20:02:38,...,49.7879,53.219243,0.0,89128130.0,0.010331,0.001275,53.099888,53.123828,-0.02394,-0.010869
AAVE,58,7278.0,aave,Aave,2020-10-06 23:59:59,2781.0,USD,2020-10-06,2020-10-06 23:59:59,2020-10-06 00:10:42,...,40.734578,42.401599,583091.46,71011440.0,-0.203266,-0.20225,51.453997,52.329589,-0.875591,-0.183814
AAVE,58,7278.0,aave,Aave,2020-10-07 23:59:59,2781.0,USD,2020-10-07,2020-10-07 23:59:59,2020-10-07 00:02:45,...,35.97069,40.083976,682834.19,67130040.0,-0.054659,-0.245854,49.704763,51.422506,-1.717743,-0.4906


In [6]:
def calculate_cci(group, period=20):
    # Calculate Typical Price
    group['TP'] = (group['high'] + group['low'] + group['close']) / 3

    # Calculate SMA of Typical Price
    group['SMA_TP'] = group['TP'].rolling(window=period).mean()

    # Calculate Mean Absolute Deviation manually
    def mean_absolute_deviation(series):
        return (series - series.mean()).abs().mean()

    group['MAD'] = group['TP'].rolling(window=period).apply(mean_absolute_deviation, raw=False)

    # Calculate CCI
    group['CCI'] = (group['TP'] - group['SMA_TP']) / (0.015 * group['MAD'])

    return group

df = df.groupby('slug').apply(calculate_cci).reset_index(level=0, drop=True)


In [7]:
import pandas as pd

def calculate_adx(group, period=14):
    # Ensure 'timestamp' is sorted
    group = group.sort_values('timestamp')

    # Calculate True Range (TR)
    group['TR'] = pd.concat([
        group['high'] - group['low'],
        (group['high'] - group['close'].shift()).abs(),
        (group['low'] - group['close'].shift()).abs()
    ], axis=1).max(axis=1)

    # Calculate Directional Movement (+DM and -DM)
    group['+DM'] = ((group['high'] - group['high'].shift()) > (group['low'].shift() - group['low'])) & (group['high'] - group['high'].shift() > 0) * (group['high'] - group['high'].shift())
    group['-DM'] = ((group['low'].shift() - group['low']) > (group['high'] - group['high'].shift())) & (group['low'].shift() - group['low'] > 0) * (group['low'].shift() - group['low'])

    # Calculate Smoothed Averages for +DM, -DM, and TR
    group['Smoothed_TR'] = group['TR'].rolling(window=period).sum()
    group['Smoothed_+DM'] = group['+DM'].rolling(window=period).sum()
    group['Smoothed_-DM'] = group['-DM'].rolling(window=period).sum()

    # Calculate +DI and -DI
    group['+DI'] = 100 * group['Smoothed_+DM'] / group['Smoothed_TR']
    group['-DI'] = 100 * group['Smoothed_-DM'] / group['Smoothed_TR']

    # Calculate DX
    group['DX'] = 100 * abs(group['+DI'] - group['-DI']) / (group['+DI'] + group['-DI'])

    # Calculate ADX
    group['ADX'] = group['DX'].rolling(window=period).mean()

    return group

# Apply the ADX calculation function to each cryptocurrency
df = df.groupby('slug').apply(calculate_adx).reset_index(level=0, drop=True)


In [8]:
import pandas as pd

def calculate_ultimate_oscillator(group, short_period=3, intermediate_period=6, long_period=9):
    # Ensure 'timestamp' is sorted
    group = group.sort_values('timestamp')

    # Calculate True Range (TR)
    group['prev_close'] = group['close'].shift(1)
    group['TR'] = pd.concat([
        group['high'] - group['low'],
        (group['high'] - group['prev_close']).abs(),
        (group['low'] - group['prev_close']).abs()
    ], axis=1).max(axis=1)

    # Calculate Buying Pressure (BP)
    group['BP'] = group['close'] - group[['low', 'prev_close']].min(axis=1)

    # Calculate Smoothed BP and TR for different periods
    group['Avg_BP_short'] = group['BP'].rolling(window=short_period).sum()
    group['Avg_TR_short'] = group['TR'].rolling(window=short_period).sum()

    group['Avg_BP_intermediate'] = group['BP'].rolling(window=intermediate_period).sum()
    group['Avg_TR_intermediate'] = group['TR'].rolling(window=intermediate_period).sum()

    group['Avg_BP_long'] = group['BP'].rolling(window=long_period).sum()
    group['Avg_TR_long'] = group['TR'].rolling(window=long_period).sum()

    # Calculate Ultimate Oscillator (UO)
    group['UO'] = 100 * (
        (4 * group['Avg_BP_short'] + 2 * group['Avg_BP_intermediate'] + group['Avg_BP_long']) /
        (4 * group['Avg_TR_short'] + 2 * group['Avg_TR_intermediate'] + group['Avg_TR_long'])
    )

    return group

# Apply the Ultimate Oscillator calculation function to each cryptocurrency
df = df.groupby('slug').apply(calculate_ultimate_oscillator).reset_index(level=0, drop=True)


In [9]:
import pandas as pd

def calculate_awesome_oscillator(group):
    # Ensure 'timestamp' is sorted
    group = group.sort_values('timestamp')

    # Calculate Median Price (MP)
    group['MP'] = (group['high'] + group['low']) / 2

    # Calculate the 5-period and 34-period SMA of the Median Price
    group['SMA_5'] = group['MP'].rolling(window=5).mean()
    group['SMA_34'] = group['MP'].rolling(window=34).mean()

    # df['SMA_5'] = grouped['close'].transform(lambda x: x.rolling(window=9).mean())

    # Calculate the Awesome Oscillator (AO)
    group['AO'] = group['SMA_5'] - group['SMA_34']

    return group

# Apply the Awesome Oscillator calculation function to each cryptocurrency
df = df.groupby('slug').apply(calculate_awesome_oscillator).reset_index(level=0, drop=True)


In [10]:
import pandas as pd

def calculate_trix(group, period=15):
    # Ensure 'timestamp' is sorted
    group = group.sort_values('timestamp')

    # Calculate the Triple Exponential Moving Average (TEMA)
    group['EMA1'] = group['close'].ewm(span=period, adjust=False).mean()
    group['EMA2'] = group['EMA1'].ewm(span=period, adjust=False).mean()
    group['EMA3'] = group['EMA2'].ewm(span=period, adjust=False).mean()

    # Calculate TRIX Oscillator
    group['TRIX'] = group['EMA3'].pct_change() * 100

    return group

# Apply the TRIX Oscillator calculation function to each cryptocurrency
df = df.groupby('slug').apply(calculate_trix).reset_index(level=0, drop=True)


In [11]:
## BINARY SIGNALS

In [12]:
# prompt: name,timestamp,ref_cur_id,ref_cur_name,time_open,time_close,time_high,time_low,open,high,low,close,volume,market_cap... drop these colums from df

# List of columns to drop
columns_to_drop = ['name', 'ref_cur_id', 'ref_cur_name', 'time_open',
                   'time_close', 'time_high', 'time_low', 'open', 'high', 'low',
                   'close', 'volume', 'market_cap']

# Drop the specified columns
df_bin = df.drop(columns=columns_to_drop, errors='ignore')


In [13]:
# prompt: can you help me cal ... where macd is greater than signal ... mark it as 1 in a col name called macd_crossover and vice a vesa for -1

# Create a new column 'macd_crossover' and initialize it with 0
df_bin['m_osc_macd_crossover'] = 0

# Set 'macd_crossover' to 1 where MACD is greater than Signal
df_bin.loc[df_bin['MACD'] > df_bin['Signal'], 'm_osc_macd_crossover'] = 1

# Set 'macd_crossover' to -1 where MACD is less than Signal
df_bin.loc[df_bin['MACD'] < df_bin['Signal'], 'm_osc_macd_crossover'] = -1

In [14]:
# prompt: can you help me cal ... where cci is greater than 200 1 and and where it is less than 200 -1 and between that 0 neutral

# Create a new column 'cci_signal' and initialize it with 0
df_bin['m_osc_cci'] = 0

# Set 'cci_signal' to 1 where CCI is greater than 200
df_bin.loc[df_bin['CCI'] > 108, 'm_osc_cci'] = 1

# Set 'cci_signal' to -1 where CCI is less than -200
df_bin.loc[df_bin['CCI'] < -108, 'm_osc_cci'] = -1


In [15]:
# prompt: can you help me cal ... where +DI is greater than -DI and adx is greater or equals to 20 then 1 ... and when  where -DI is greater than -DI and adx is greater or equals to 20 then -1 else 0
#  23  ADX

# Create a new column 'adx_signal' and initialize it with 0
df_bin['m_osc_adx'] = 0

# Set 'adx_signal' to 1 where +DI is greater than -DI and ADX is greater than or equal to 20
df_bin.loc[(df_bin['+DI'] > df_bin['-DI']) & (df_bin['ADX'] >= 20), 'm_osc_adx'] = 1

# Set 'adx_signal' to -1 where -DI is greater than +DI and ADX is greater than or equal to 20
df_bin.loc[(df_bin['-DI'] > df_bin['+DI']) & (df_bin['ADX'] >= 20), 'm_osc_adx'] = -1


In [16]:
# prompt: can you help me cal ... where uo is less than 33 = 1 and uo is more then 67 = -1 or else 0

# Create a new column 'uo_signal' and initialize it with 0
df_bin['m_osc_uo'] = 0

# Set 'uo_signal' to 1 where UO is less than 33
df_bin.loc[df_bin['UO'] < 33, 'm_osc_uo'] = 1

# Set 'uo_signal' to -1 where UO is greater than 67
df_bin.loc[df_bin['UO'] > 67, 'm_osc_uo'] = -1


In [17]:
df_bin.info()

<class 'pandas.core.frame.DataFrame'>
Index: 148093 entries, AAVE to XRP
Data columns (total 45 columns):
 #   Column                Non-Null Count   Dtype         
---  ------                --------------   -----         
 0   cmc_rank              148093 non-null  int64         
 1   id                    148093 non-null  float64       
 2   slug                  148093 non-null  object        
 3   timestamp             148093 non-null  datetime64[ns]
 4   m_pct_1d              147991 non-null  float64       
 5   d_pct_cum_ret         147991 non-null  float64       
 6   EMA_12                148093 non-null  float64       
 7   EMA_26                148093 non-null  float64       
 8   MACD                  148093 non-null  float64       
 9   Signal                148093 non-null  float64       
 10  TP                    148093 non-null  float64       
 11  SMA_TP                146188 non-null  float64       
 12  MAD                   146188 non-null  float64       
 13  CCI 

In [18]:
# prompt: can you map the values of AO where if AO is grater than 0 = 1 and if AO less then 0 = -1

# Create a new column 'ao_signal' and initialize it with 0
df_bin['m_osc_ao'] = 0

# Set 'ao_signal' to 1 where AO is greater than 0
df_bin.loc[df_bin['AO'] > 0, 'm_osc_ao'] = 1

# Set 'ao_signal' to -1 where AO is less than 0
df_bin.loc[df_bin['AO'] < 0, 'm_osc_ao'] = -1


In [19]:
# prompt: do the same logic as AO just for TRIX

# Create a new column 'trix_signal' and initialize it with 0
df_bin['m_osc_trix'] = 0

# Set 'trix_signal' to 1 where TRIX is greater than 0
df_bin.loc[df_bin['TRIX'] > 0, 'm_osc_trix'] = 1

# Set 'trix_signal' to -1 where TRIX is less than 0
df_bin.loc[df_bin['TRIX'] < 0, 'm_osc_trix'] = -1


In [20]:
df_bin.info()

<class 'pandas.core.frame.DataFrame'>
Index: 148093 entries, AAVE to XRP
Data columns (total 47 columns):
 #   Column                Non-Null Count   Dtype         
---  ------                --------------   -----         
 0   cmc_rank              148093 non-null  int64         
 1   id                    148093 non-null  float64       
 2   slug                  148093 non-null  object        
 3   timestamp             148093 non-null  datetime64[ns]
 4   m_pct_1d              147991 non-null  float64       
 5   d_pct_cum_ret         147991 non-null  float64       
 6   EMA_12                148093 non-null  float64       
 7   EMA_26                148093 non-null  float64       
 8   MACD                  148093 non-null  float64       
 9   Signal                148093 non-null  float64       
 10  TP                    148093 non-null  float64       
 11  SMA_TP                146188 non-null  float64       
 12  MAD                   146188 non-null  float64       
 13  CCI 

In [21]:
# prompt: ,m_pct_1d,d_pct_cum_ret,EMA_12,EMA_26,MACD,Signal,TP,SMA_TP,MAD,CCI,TR,+DM,-DM,Smoothed_TR,Smoothed_+DM,Smoothed_-DM,+DI,DI,DX,ADX,prev_close,BP,Avg_BP_short,Avg_TR_short,Avg_BP_intermediate,Avg_TR_intermediate,Avg_BP_long,Avg_TR_long,UO,MP,SMA_5,SMA_34,AO,EMA1,EMA2,EMA3,TRIX
# drop these col form df_bin

# List of columns to drop
columns_to_drop = ['m_pct_1d', 'd_pct_cum_ret', 'EMA_12', 'EMA_26', 'MACD', 'Signal',
                   'TP', 'SMA_TP', 'MAD', 'CCI', 'TR', '+DM', '-DM', 'Smoothed_TR',
                   'Smoothed_+DM', 'Smoothed_-DM', '+DI', 'DI', 'DX', 'ADX', 'prev_close',
                   'BP', 'Avg_BP_short', 'Avg_TR_short', 'Avg_BP_intermediate',
                   'Avg_TR_intermediate', 'Avg_BP_long', 'Avg_TR_long', 'UO', 'MP',
                   'SMA_5', 'SMA_34', 'AO', 'EMA1', 'EMA2', 'EMA3', 'TRIX']

# Drop the specified columns from df_bin
df_bin = df_bin.drop(columns=columns_to_drop, errors='ignore')

df_bin.info()


<class 'pandas.core.frame.DataFrame'>
Index: 148093 entries, AAVE to XRP
Data columns (total 11 columns):
 #   Column                Non-Null Count   Dtype         
---  ------                --------------   -----         
 0   cmc_rank              148093 non-null  int64         
 1   id                    148093 non-null  float64       
 2   slug                  148093 non-null  object        
 3   timestamp             148093 non-null  datetime64[ns]
 4   -DI                   146468 non-null  float64       
 5   m_osc_macd_crossover  148093 non-null  int64         
 6   m_osc_cci             148093 non-null  int64         
 7   m_osc_adx             148093 non-null  int64         
 8   m_osc_uo              148093 non-null  int64         
 9   m_osc_ao              148093 non-null  int64         
 10  m_osc_trix            148093 non-null  int64         
dtypes: datetime64[ns](1), float64(2), int64(7), object(1)
memory usage: 13.6+ MB


In [22]:
import pandas as pd

# Convert 'timestamp' to datetime if it's not already
df_bin['timestamp'] = pd.to_datetime(df_bin['timestamp'])

# Find the latest date in the 'timestamp' column
latest_date = df_bin['timestamp'].dt.date.max()

# Filter the DataFrame for the latest date
df_filtered = df_bin[df_bin['timestamp'].dt.date == latest_date]


Unnamed: 0_level_0,cmc_rank,id,slug,timestamp,-DI,m_osc_macd_crossover,m_osc_cci,m_osc_adx,m_osc_uo,m_osc_ao,m_osc_trix
symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
AAVE,58,7278.0,aave,2024-07-25 23:59:59,10.311204,-1,0,0,0,1,1
AIOZ,99,9104.0,aioz-network,2024-07-25 23:59:59,674.703061,1,0,1,1,1,-1
AKT,86,7431.0,akash-network,2024-07-25 23:59:59,187.262353,-1,-1,0,0,1,-1
ALGO,66,4030.0,algorand,2024-07-25 23:59:59,6425.420975,-1,0,0,1,1,1
APT,27,21794.0,aptos,2024-07-25 23:59:59,113.915085,1,0,1,0,1,-1
ARB,42,11841.0,arbitrum,2024-07-25 23:59:59,637.068793,1,0,1,0,1,-1
FET,28,3773.0,artificial-superintelligence-alliance,2024-07-25 23:59:59,,-1,0,0,0,0,-1
AR,46,5632.0,arweave,2024-07-25 23:59:59,14.544452,1,0,1,0,1,1
AVAX,12,5805.0,avalanche,2024-07-25 23:59:59,18.99571,1,0,1,0,1,1
AXS,76,6783.0,axie-infinity,2024-07-25 23:59:59,141.061262,1,0,1,1,1,-1


In [23]:
df_oscillator_bin = df_filtered
df_oscillator = df

In [None]:
df_oscillator_bin.to_csv('df_oscillator_bin.csv', index=False)
df_oscillator.to_csv('df_oscillator.csv', index=False)