In [1]:
import pandas as pd

# Load the CSV file using the full path
file_path_1 = r"C:\Users\Pranav Darekar\Documents\all_coins_ohlcv_filtered"
df_ohlcv = pd.read_csv(file_path_1)

file_path_2 = r"C:\Users\Pranav Darekar\Documents\crypto_listings_latest_sorted"
df_crypto_listings_sorted = pd.read_csv(file_path_2)


In [2]:
# Set the slug column as the index for both DataFrames
df_ohlcv.set_index('symbol', inplace=True)
df_crypto_listings_sorted.set_index('symbol', inplace=True)

# Perform an inner join on cmc_rank
df = df_crypto_listings_sorted[['cmc_rank']].join(df_ohlcv, how='inner')

# Filter rows where 'cmc_rank' is between 1 and 100 inclusive
df = df[(df['cmc_rank'] >= 1) & (df['cmc_rank'] <= 1001)]




In [3]:
# Ensure the timestamp column is in datetime format
df['timestamp'] = pd.to_datetime(df['timestamp'])

# Sort the DataFrame by 'slug' and 'timestamp' columns
df.sort_values(by=['slug', 'timestamp'], inplace=True)

# Perform time-series calculations within each group (each cryptocurrency)
grouped = df.groupby('slug')




In [4]:
# Calculate percentage change for each cryptocurrency
df['m_pct_1d'] = grouped['close'].pct_change()

In [5]:
# Calculate cumulative returns for each cryptocurrency
df['d_pct_cum_ret'] = (1 + df['m_pct_1d']).groupby(df['slug']).cumprod() - 1

In [6]:
# Get the unique values in the 'slug' column
unique_slugs = df['slug'].unique()

# Count the number of unique values
unique_count = len(unique_slugs)

print(unique_count)


999


In [7]:

# Step 1: Calculate all-time high and all-time low, and their corresponding dates
all_time_high = df.groupby('slug')['high'].max().reset_index()
all_time_high.columns = ['slug', 'v_met_ath']

all_time_low = df.groupby('slug')['low'].min().reset_index()
all_time_low.columns = ['slug', 'v_met_atl']

# Merge all-time high and low into the DataFrame
df = pd.merge(df, all_time_high, on='slug', how='left')
df = pd.merge(df, all_time_low, on='slug', how='left')

# Step 2: Find the dates for ATH and ATL for each cryptocurrency
ath_dates = df[df['high'] == df['v_met_ath']].groupby('slug')['timestamp'].max().reset_index()
ath_dates.columns = ['slug', 'ath_date']

atl_dates = df[df['low'] == df['v_met_atl']].groupby('slug')['timestamp'].max().reset_index()
atl_dates.columns = ['slug', 'atl_date']

# Merge the ATH and ATL dates back into the original DataFrame
df = pd.merge(df, ath_dates, on='slug', how='left')
df = pd.merge(df, atl_dates, on='slug', how='left')

# Step 3: Calculate the number of days since ATH and ATL
current_date = pd.Timestamp.now()
df['d_met_ath_days'] = (current_date - df['ath_date']).dt.days
df['d_met_atl_days'] = (current_date - df['atl_date']).dt.days

# Convert days to weeks and months
df['_met_ath_week'] = df['d_met_ath_days'] // 7
df['_met_ath_month'] = df['d_met_ath_days'] // 30

df['_met_atl_week'] = df['d_met_atl_days'] // 7
df['_met_atl_month'] = df['d_met_atl_days'] // 30


In [8]:
df = df_crypto_listings_sorted

In [9]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 10000 entries, BTC to WNEON
Data columns (total 31 columns):
 #   Column                            Non-Null Count  Dtype  
---  ------                            --------------  -----  
 0   row_names                         10000 non-null  int64  
 1   id                                10000 non-null  int64  
 2   name                              10000 non-null  object 
 3   slug                              10000 non-null  object 
 4   cmc_rank                          10000 non-null  int64  
 5   market_pair_count                 10000 non-null  int64  
 6   circulating_supply                10000 non-null  float64
 7   self_reported_circulating_supply  10000 non-null  float64
 8   total_supply                      10000 non-null  float64
 9   max_supply                        6552 non-null   float64
 10  is_active                         10000 non-null  int64  
 11  last_updated                      10000 non-null  object 
 12  date_ad

In [10]:

# Convert 'date_added' and 'last_updated' columns to datetime without time zone information
df['date_added'] = pd.to_datetime(df['date_added'], errors='coerce').dt.tz_localize(None)
df['last_updated'] = pd.to_datetime(df['last_updated'], errors='coerce').dt.tz_localize(None)

# Ensure there are no NaT values in 'date_added' or 'last_updated'
df = df.dropna(subset=['date_added', 'last_updated'])

# Step 1: Calculate CoinAge in Days, Months, and Years
current_date = pd.Timestamp.now().normalize()  # Normalize to remove time part if present
df['d_met_coin_age_d'] = (current_date - df['date_added']).dt.days
df['d_met_coin_age_m'] = df['d_met_coin_age_d'] // 30  # Approximate months
df['d_met_coin_age_y'] = df['d_met_coin_age_d'] // 365  # Approximate years

# Step 2: Categorize Market Capitalization
def categorize_market_cap(market_cap):
    if market_cap >= 1e12:
        return '1T-100B'
    elif market_cap >= 1e11:
        return '100B-10B'
    elif market_cap >= 1e10:
        return '10B-1B'
    elif market_cap >= 1e9:
        return '1B-100M'
    elif market_cap >= 1e7:
        return '100M-1M'
    else:
        return 'Under1M'

df['m_cap_cat'] = df['market_cap'].apply(categorize_market_cap)


In [11]:
"""
Issue:

# Calculate differences
df['TotalSupply_MaxSupply'] = df['total_supply'] - df['max_supply']
df['MaxSupply_CirculatingSupply'] = df['max_supply'] - df['circulating_supply']
df['TotalSupply_CirculatingSupply'] = df['total_supply'] - df['circulating_supply']

# Display the updated DataFrame
print(df[['slug', 'total_supply', 'max_supply', 'circulating_supply', 'TotalSupply_MaxSupply', 'MaxSupply_CirculatingSupply', 'TotalSupply_CirculatingSupply']].head())"""

"\nIssue:\n\n# Calculate differences\ndf['TotalSupply_MaxSupply'] = df['total_supply'] - df['max_supply']\ndf['MaxSupply_CirculatingSupply'] = df['max_supply'] - df['circulating_supply']\ndf['TotalSupply_CirculatingSupply'] = df['total_supply'] - df['circulating_supply']\n\n# Display the updated DataFrame\nprint(df[['slug', 'total_supply', 'max_supply', 'circulating_supply', 'TotalSupply_MaxSupply', 'MaxSupply_CirculatingSupply', 'TotalSupply_CirculatingSupply']].head())"

In [12]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 10000 entries, BTC to WNEON
Data columns (total 35 columns):
 #   Column                            Non-Null Count  Dtype         
---  ------                            --------------  -----         
 0   row_names                         10000 non-null  int64         
 1   id                                10000 non-null  int64         
 2   name                              10000 non-null  object        
 3   slug                              10000 non-null  object        
 4   cmc_rank                          10000 non-null  int64         
 5   market_pair_count                 10000 non-null  int64         
 6   circulating_supply                10000 non-null  float64       
 7   self_reported_circulating_supply  10000 non-null  float64       
 8   total_supply                      10000 non-null  float64       
 9   max_supply                        6552 non-null   float64       
 10  is_active                         10000 non-null 

In [13]:
# @title Binary DF