In [1]:
import pandas as pd

# Load the CSV file using the full path
file_path_1 = r"C:\Users\Pranav Darekar\Documents\all_coins_ohlcv_filtered"
df_ohlcv = pd.read_csv(file_path_1)

file_path_2 = r"C:\Users\Pranav Darekar\Documents\crypto_listings_latest_sorted"
df_crypto_listings_sorted = pd.read_csv(file_path_2)


In [2]:
# Set the slug column as the index for both DataFrames
df_ohlcv.set_index('symbol', inplace=True)
df_crypto_listings_sorted.set_index('symbol', inplace=True)

# Perform an inner join on cmc_rank
df = df_crypto_listings_sorted[['cmc_rank']].join(df_ohlcv, how='inner')

# Filter rows where 'cmc_rank' is between 1 and 100 inclusive
df_raw = df[(df['cmc_rank'] >= 1) & (df['cmc_rank'] <= 1001)]




In [3]:
df = df_raw
# Ensure the timestamp column is in datetime format
df['timestamp'] = pd.to_datetime(df['timestamp'])

# Sort the DataFrame by 'slug' and 'timestamp' columns
df.sort_values(by=['slug', 'timestamp'], inplace=True)

# Perform time-series calculations within each group (each cryptocurrency)
grouped = df.groupby('slug')




In [4]:
# Calculate percentage change for each cryptocurrency
df['m_pct_1d'] = grouped['close'].pct_change()

In [5]:
import pandas as pd
import numpy as np

def calculate_rsi(df, period=1):
    # Ensure the DataFrame is sorted by timestamp
    df = df.sort_values('timestamp')
    
    # Calculate the difference in closing prices
    df['delta'] = df['close'].diff()
    
    # Calculate gains and losses
    df['gain'] = np.where(df['delta'] > 0, df['delta'], 0)
    df['loss'] = np.where(df['delta'] < 0, -df['delta'], 0)
    
    # Calculate average gain and average loss
    df['avg_gain'] = df['gain'].rolling(window=period, min_periods=1).mean()
    df['avg_loss'] = df['loss'].rolling(window=period, min_periods=1).mean()
    
    # Calculate RS (Relative Strength)
    df['rs'] = df['avg_gain'] / df['avg_loss']
    
    # Calculate RSI (Relative Strength Index)
    df['rsi'] = 100 - (100 / (1 + df['rs']))
    
    # Drop intermediate columns
    df = df.drop(columns=['delta', 'gain', 'loss', 'avg_gain', 'avg_loss', 'rs'])
    
    return df



# Apply the RSI calculation function to each group
df['m_mom_rsi_9'] = df.groupby('slug').apply(lambda x: calculate_rsi(x, period=9)).reset_index(level=0, drop=True)['rsi']

# Apply the RSI calculation function to each group
df['m_mom_rsi_18'] = df.groupby('slug').apply(lambda x: calculate_rsi(x, period=18)).reset_index(level=0, drop=True)['rsi']

# Apply the RSI calculation function to each group
df['m_mom_rsi_27'] = df.groupby('slug').apply(lambda x: calculate_rsi(x, period=27)).reset_index(level=0, drop=True)['rsi']

# Apply the RSI calculation function to each group
df['m_mom_rsi_54'] = df.groupby('slug').apply(lambda x: calculate_rsi(x, period=54)).reset_index(level=0, drop=True)['rsi']

# Apply the RSI calculation function to each group
df['m_mom_rsi_108'] = df.groupby('slug').apply(lambda x: calculate_rsi(x, period=108)).reset_index(level=0, drop=True)['rsi']




In [6]:
import pandas as pd

def calculate_sma(df, column, sma_length=14):
    # Ensure the DataFrame is sorted by timestamp
    df = df.sort_values('timestamp')
    
    # Calculate SMA (Simple Moving Average) for the specified column
    df['sma_14'] = df[column].rolling(window=sma_length, min_periods=1).mean()
    
    # Normalize the SMA values between 30 and 70
    min_sma = df['sma_14'].min()
    max_sma = df['sma_14'].max()
    df['sma_14_normalized'] = 30 + (df['sma_14'] - min_sma) * (70 - 30) / (max_sma - min_sma)
    
    return df

# Group by 'slug' and apply the SMA calculation and normalization
df = df.groupby('slug').apply(lambda x: calculate_sma(x, 'close', sma_length=14)).reset_index(level=0, drop=True)


In [7]:
import pandas as pd

def calculate_roc(df, period=9):
    # Ensure the DataFrame is sorted by timestamp
    df = df.sort_values('timestamp')
    
    # Calculate the Rate of Change (ROC)
    df['m_mom_roc'] = ((df['close'] - df['close'].shift(period)) / df['close'].shift(period)) * 100
    
    return df

# Apply the ROC calculation function to each group
df['m_mom_roc'] = df.groupby('slug').apply(lambda x: calculate_roc(x, period=9)).reset_index(level=0, drop=True)['m_mom_roc']


In [8]:
import pandas as pd

def calculate_williams_r(df, period=14):
    # Ensure the DataFrame is sorted by timestamp
    df = df.sort_values('timestamp')
    
    # Calculate the Highest High and Lowest Low over the period
    df['highest_high'] = df['high'].rolling(window=period, min_periods=1).max()
    df['lowest_low'] = df['low'].rolling(window=period, min_periods=1).min()
    
    # Calculate Williams %R
    df['m_mom_williams_%'] = ((df['highest_high'] - df['close']) / (df['highest_high'] - df['lowest_low'])) * -100
    
    # Drop intermediate columns
    df = df.drop(columns=['highest_high', 'lowest_low'])
    
    return df

# Apply the Williams %R calculation function to each group
df['m_mom_williams_%'] = df.groupby('slug').apply(lambda x: calculate_williams_r(x, period=14)).reset_index(level=0, drop=True)['m_mom_williams_%']


In [9]:
import pandas as pd

def calculate_smi(df, period=14, smooth_k=3, smooth_d=3):
    # Ensure the DataFrame is sorted by timestamp
    df = df.sort_values('timestamp')
    
    # Calculate Highest High and Lowest Low over the period
    df['highest_high'] = df['high'].rolling(window=period, min_periods=1).max()
    df['lowest_low'] = df['low'].rolling(window=period, min_periods=1).min()
    
    # Calculate %K
    df['percent_k'] = ((df['close'] - df['lowest_low']) / (df['highest_high'] - df['lowest_low'])) * 100
    
    # Smooth %K with moving average
    df['smoothed_k'] = df['percent_k'].rolling(window=smooth_k, min_periods=1).mean()
    
    # Calculate %D (Moving average of smoothed %K)
    df['percent_d'] = df['smoothed_k'].rolling(window=smooth_d, min_periods=1).mean()
    
    # Calculate SMI
    df['m_mom_smi'] = df['smoothed_k'] - df['percent_d']
    
    # Drop intermediate columns
    df = df.drop(columns=['highest_high', 'lowest_low', 'percent_k', 'smoothed_k', 'percent_d'])
    
    return df

# Apply the SMI calculation function to each group
df['m_mom_smi'] = df.groupby('slug').apply(lambda x: calculate_smi(x, period=14, smooth_k=3, smooth_d=3)).reset_index(level=0, drop=True)['m_mom_smi']



In [10]:
import pandas as pd

def calculate_cmo(df, period=14):
    # Ensure the DataFrame is sorted by timestamp
    df = df.sort_values('timestamp')
    
    # Calculate the daily price changes
    df['delta'] = df['close'].diff()
    
    # Calculate gains and losses
    df['gain'] = df['delta'].apply(lambda x: x if x > 0 else 0)
    df['loss'] = df['delta'].apply(lambda x: -x if x < 0 else 0)
    
    # Calculate the sum of gains and losses over the period
    df['sum_gain'] = df['gain'].rolling(window=period, min_periods=1).sum()
    df['sum_loss'] = df['loss'].rolling(window=period, min_periods=1).sum()
    
    # Calculate CMO
    df['m_mom_cmo'] = (df['sum_gain'] - df['sum_loss']) / (df['sum_gain'] + df['sum_loss']) * 100
    
    # Drop intermediate columns
    df = df.drop(columns=['delta', 'gain', 'loss', 'sum_gain', 'sum_loss'])
    
    return df

# Apply the CMO calculation function to each group
df['m_mom_cmo'] = df.groupby('slug').apply(lambda x: calculate_cmo(x, period=14)).reset_index(level=0, drop=True)['m_mom_cmo']


In [11]:
import pandas as pd

def calculate_mom(df, period=10):
    # Ensure the DataFrame is sorted by timestamp
    df = df.sort_values('timestamp')
    
    # Calculate Momentum
    df['m_mom_mom'] = df['close'] - df['close'].shift(period)
    
    return df

# Apply the MOM calculation function to each group
df['m_mom_mom'] = df.groupby('slug').apply(lambda x: calculate_mom(x, period=10)).reset_index(level=0, drop=True)['m_mom_mom']


In [12]:
import pandas as pd
import numpy as np

def calculate_tsi(df, short_period=13, long_period=25):
    # Ensure the DataFrame is sorted by timestamp
    df = df.sort_values('timestamp')
    
    # Calculate the price changes
    df['delta'] = df['close'].diff()
    
    # Calculate the smoothed price changes using exponential smoothing
    df['smoothed_delta_short'] = df['delta'].ewm(span=short_period, adjust=False).mean()
    df['smoothed_delta_long'] = df['delta'].ewm(span=long_period, adjust=False).mean()
    
    # Calculate the TSI
    df['m_mom_tsi'] = 100 * (df['smoothed_delta_short'].ewm(span=short_period, adjust=False).mean() /
                       df['smoothed_delta_long'].ewm(span=long_period, adjust=False).mean())
    
    # Drop intermediate columns
    df = df.drop(columns=['delta', 'smoothed_delta_short', 'smoothed_delta_long'])
    
    return df

# Apply the TSI calculation function to each group
df['m_mom_tsi'] = df.groupby('slug').apply(lambda x: calculate_tsi(x, short_period=13, long_period=25)).reset_index(level=0, drop=True)['m_mom_tsi']


In [13]:
import pandas as pd

# Ensure 'timestamp' column is in datetime format
df['timestamp'] = pd.to_datetime(df['timestamp'])

# Calculate the date 108 days ago from the most recent date
start_date = df['timestamp'].max() - pd.Timedelta(days=32)

# Filter the DataFrame for the last 108 days
df = df[df['timestamp'] >= start_date]



In [14]:
#Binary Data

In [15]:
import pandas as pd

def map_rsi_values(row, rsi_column):
    # Check the conditions and map values accordingly
    if row[rsi_column] < 30 and row['sma_14_normalized'] < 50:
        return -1
    elif row[rsi_column] > 70 and row['sma_14_normalized'] > 50:
        return 1
    else:
        return 0

# Apply the mapping function to each RSI column and store in corresponding binary columns
df['m_mom_rsi_9_bin'] = df.apply(lambda row: map_rsi_values(row, 'm_mom_rsi_9'), axis=1)
df['m_mom_rsi_18_bin'] = df.apply(lambda row: map_rsi_values(row, 'm_mom_rsi_18'), axis=1)
df['m_mom_rsi_27_bin'] = df.apply(lambda row: map_rsi_values(row, 'm_mom_rsi_27'), axis=1)


In [16]:
import pandas as pd

def map_rsi_values_54_108(row, rsi_column):
    # Check the conditions and map values accordingly
    if row[rsi_column] > row['sma_14_normalized'] and row[rsi_column] > 50:
        return 1
    elif row[rsi_column] < row['sma_14_normalized'] and row[rsi_column] < 50:
        return -1
    else:
        return 0

# Apply the mapping function to 'm_mom_rsi_54' and 'm_mom_rsi_108'
df['m_mom_rsi_54_bin'] = df.apply(lambda row: map_rsi_values_54_108(row, 'm_mom_rsi_54'), axis=1)
df['m_mom_rsi_108_bin'] = df.apply(lambda row: map_rsi_values_54_108(row, 'm_mom_rsi_108'), axis=1)


In [17]:
import pandas as pd

def map_roc_values(row):
    # Map ROC values to 1 if greater than 0, -1 if less than 0, else 0
    if row['m_mom_roc'] > 0:
        return 1
    elif row['m_mom_roc'] < 0:
        return -1
    else:
        return 0

# Apply the mapping function to each row
df['m_mom_roc_bin'] = df.apply(map_roc_values, axis=1)


In [18]:
import pandas as pd

def map_williams_r(row):
    # Map Williams %R values based on the specified conditions
    if row['m_mom_williams_%'] > -50:
        return 1
    elif row['m_mom_williams_%'] < -50:
        return -1
    else:
        return 0

# Apply the mapping function to each row
df['m_mom_williams_%_bin'] = df.apply(map_williams_r, axis=1)


In [19]:
import pandas as pd

# Define a function to map SMI values
def map_smi(value):
    if value >= 25:
        return 1
    elif value <= -25:
        return -1
    else:
        return 0

# Apply the mapping function to 'm_mom_smi' column
df['m_mom_smi_bin'] = df['m_mom_smi'].apply(map_smi)


In [34]:
import pandas as pd

# Define a function to map CMO values
def map_cmo(value):
    if value > 40:
        return 1
    elif value < -40:
        return -1
    else:
        return 0

# Apply the mapping function to 'm_mom_cmo' column
df['m_mom_cmo_bin'] = df['m_mom_cmo'].apply(map_cmo)


In [41]:
import pandas as pd

# Define a function to map Momentum values
def map_momentum(value):
    if value > 4000:
        return 1
    elif value < -4000:
        return -1
    else:
        return 0

# Apply the mapping function to 'm_mom_mom' column
df['m_mom_mom_bin'] = df['m_mom_mom'].apply(map_momentum)


In [46]:
import pandas as pd

# Define the columns to keep
columns_to_keep = [
    'cmc_rank', 'id', 'slug', 'name', 'timestamp', 
    'm_mom_rsi_9_bin', 'm_mom_rsi_18_bin', 'm_mom_rsi_27_bin',
    'm_mom_rsi_54_bin', 'm_mom_rsi_108_bin', 'm_mom_roc_bin', 
    'm_mom_williams_%_bin', 'm_mom_smi_bin', 'm_mom_cmo_bin', 
    'm_mom_mom_bin'
]

# Create the new DataFrame with only the selected columns
df_momentum_filtered = df[columns_to_keep]


In [48]:
df_momentum = df
df_momentum_bin = df_momentum_filtered

In [None]:
# Export df_momentum_bin to a CSV file
df_momentum_bin.to_csv('df_momentum_bin.csv', index=False)
df_momentum.to_csv('df_momentum.csv', index=False)


In [24]:
"""import numpy as np
import pandas as pd

def calculate_kama(df, short_period=10, long_period=30):
    # Ensure the DataFrame is sorted by timestamp
    df = df.sort_values('timestamp').drop_duplicates().reset_index(drop=True)
    
    # Calculate the absolute price changes
    df['abs_change'] = df['close'].diff().abs()
    df['price_change'] = df['close'].diff()
    
    # Calculate the sum of absolute price changes and absolute price change over the long period
    df['sum_abs_change'] = df['abs_change'].rolling(window=long_period, min_periods=1).sum()
    df['sum_price_change'] = df['price_change'].rolling(window=long_period, min_periods=1).apply(lambda x: np.abs(x).sum(), raw=True)
    
    # Calculate the Efficiency Ratio (ER)
    df['efficiency_ratio'] = df['sum_price_change'] / df['sum_abs_change']
    
    # Calculate the Smoothing Constant (SC)
    sc_max = 2 / (short_period + 1)
    sc_min = 2 / (long_period + 1)
    df['smoothing_constant'] = sc_max - (sc_max - sc_min) * df['efficiency_ratio']
    
    # Initialize KAMA with the first value
    df['kama'] = df['close'].ewm(span=short_period, adjust=False).mean()
    
    # Calculate KAMA for the rest of the DataFrame
    for i in range(short_period, len(df)):
        smoothing_constant = df['smoothing_constant'].iloc[i]
        kama_prev = df['kama'].iloc[i - 1]
        close = df['close'].iloc[i]
        df.at[i, 'kama'] = kama_prev + smoothing_constant * (close - kama_prev)
    
    # Drop intermediate columns
    df = df.drop(columns=['abs_change', 'price_change', 'sum_abs_change', 'sum_price_change', 'efficiency_ratio', 'smoothing_constant'])
    
    return df

# Apply the function to your DataFrame for each slug
df['m_mom_kama'] = df.groupby('slug').apply(lambda x: calculate_kama(x, short_period=10, long_period=30)).reset_index(level=0, drop=True)['kama']

now write a code if value of my m_mom_rsi_9 is less than 30 and my sma is less than 0.05 map the values of m_mom_rsi_9 as -1 and if the value
of m_mom_rsi_9 is grater than 70 and sma value greater than 0.05 map the m_mom_rsi_9 value to 1 and rest to 0

"""


"import numpy as np\nimport pandas as pd\n\ndef calculate_kama(df, short_period=10, long_period=30):\n    # Ensure the DataFrame is sorted by timestamp\n    df = df.sort_values('timestamp').drop_duplicates().reset_index(drop=True)\n    \n    # Calculate the absolute price changes\n    df['abs_change'] = df['close'].diff().abs()\n    df['price_change'] = df['close'].diff()\n    \n    # Calculate the sum of absolute price changes and absolute price change over the long period\n    df['sum_abs_change'] = df['abs_change'].rolling(window=long_period, min_periods=1).sum()\n    df['sum_price_change'] = df['price_change'].rolling(window=long_period, min_periods=1).apply(lambda x: np.abs(x).sum(), raw=True)\n    \n    # Calculate the Efficiency Ratio (ER)\n    df['efficiency_ratio'] = df['sum_price_change'] / df['sum_abs_change']\n    \n    # Calculate the Smoothing Constant (SC)\n    sc_max = 2 / (short_period + 1)\n    sc_min = 2 / (long_period + 1)\n    df['smoothing_constant'] = sc_max - (s