<font size=6>Read STEPN data from DB and analyze --> otuput to CSV<font>

In [None]:
import datetime, os  # mitosheet
import modin.experimental.pandas as pd
from sqlalchemy import create_engine
import numpy as np

db_connection_str = 'mysql+pymysql://crypto:trackcrack@192.168.0.2/crypto'  # connect to 'crypto' database in B150M
db_connection = create_engine(db_connection_str)
conn = db_connection.connect()

interim_savepath = '/mlflow/crypto_data/stepn/'
end_savepath = '/home/lstm/Github/jumpthepull/data/'

In [None]:
# set pandas display option
pd.set_option('display.max_colwidth', -1)  # full columns
pd.set_option('display.max_rows', 5)  # max num of rows
pd.set_option('display.max_columns', 500)  # max num of rows
pd.set_option('display.width', 1000)

# Pull data from DB, analyze data, and save.

## GST

### GST out & receivers

In [None]:
df_gst_out = pd.read_sql("SELECT datetime, amount, receiver FROM stepn_solana_gst_out_bitquery", con=db_connection_str, index_col='datetime')
df_gst_out.tail()

In [None]:
# display tx without receiver
df_gst_out.loc[(df_gst_out['receiver'] == ''), :]

In [None]:
# mark transfers without receiver as burn (<= 2022-05-31)
done_through = datetime.date(2022, 5, 31)
print("BURN MARKING DONE THROUGH: ", done_through)
df_gst_out.loc[(df_gst_out['receiver'] == '') & (df_gst_out.index.date <= done_through), 'receiver'] = 'burn'

In [None]:
# by frequency
gst_receivers_by_frequency = df_gst_out['receiver'].groupby('receiver').size().sort_values(ascending=False).to_frame()
gst_receivers_by_frequency.columns = ['frequency']

In [None]:
# by amount
gst_receivers_by_amount = df_gst_out.loc[:, ['amount', 'receiver']].groupby(df_gst_out['receiver']).sum().sort_values(by='amount', ascending=False)

In [None]:
# take out burn
gst_burn = df_gst_out.loc[df_gst_out['receiver'] == 'burn', :]
df_gst_out = df_gst_out.loc[df_gst_out['receiver'] != 'burn', :]

In [None]:
# by date (proxy for active users) -- exclude burn
gst_receivers_by_date = df_gst_out.loc[:, ['receiver']].groupby(df_gst_out.index.date).count()

In [None]:
# new recievers by date (proxy for new users) - exclude burn
existing_receivers = list()
new_receivers = {}
for dtt in sorted(list(set(df_gst_out.index.date))):
    print(dtt)
    users = df_gst_out.loc[df_gst_out.index.date == dtt, 'receiver'].to_list()
    new_users = np.setdiff1d(users, existing_receivers)    
    new_receivers[dtt] = new_users
gst_new_receivers_by_date_raw = pd.DataFrame([new_receivers]).T
gst_new_receivers_by_date = gst_new_receivers_by_date_raw.apply(lambda x: len(x[0]), axis=1).to_frame()
gst_new_receivers_by_date.columns = ['new_receivers']

In [None]:
# amount by date
gst_out_amount_by_date = df_gst_out.loc[:, ['amount']].groupby(df_gst_out.index.date).sum()

In [None]:
# save data 
# ---- raw data
gst_new_receivers_by_date_raw.index.name = 'date'
gst_new_receivers_by_date_raw.columns = ['receivers']
gst_new_receivers_by_date_raw.reset_index().to_feather(interim_savepath + 'gst_new_receivers_by_date_raw.feather')

# ---- processed data
gst_receivers_by_frequency.to_csv(interim_savepath + 'gst_receivers_by_frequency.csv')
gst_receivers_by_amount.to_csv(interim_savepath + 'gst_receivers_by_amount.csv')
gst_receivers_by_date.to_csv(interim_savepath + 'gst_receivers_by_date.csv')
gst_out_amount_by_date.to_csv(interim_savepath + 'gst_out_amount_by_date.csv')
gst_burn.to_csv(interim_savepath + 'gst_burn.csv')

### GST in & senders

In [None]:
df_gst_in = pd.read_sql("SELECT datetime, amount, sender FROM stepn_solana_gst_in_bitquery", con=conn, index_col='datetime')
df_gst_in.tail()

In [None]:
# display tx without sender
df_gst_in.loc[(df_gst_in['sender'] == ''), :]

In [None]:
# mark transfers without receiver as burn (<= 2022-05-31)
done_through = datetime.date(2022, 5, 31)
print("MINT MARKING DONE THROUGH: ", done_through)
df_gst_in.loc[(df_gst_in['sender'] == '') & (df_gst_in.index.date <= done_through), 'sender'] = 'mint'

In [None]:
# by frequency
gst_senders_by_frequency = df_gst_in['sender'].groupby('sender').size().sort_values(ascending=False).to_frame()
gst_senders_by_frequency.columns = ['frequency']

In [None]:
# by amount
gst_senders_by_amount = df_gst_in.loc[:, ['amount', 'sender']].groupby(df_gst_in['sender']).sum().sort_values(by='amount', ascending=False)

In [None]:
# take out mint
gst_mint = df_gst_in.loc[df_gst_in['sender'] == 'mint', :]
df_gst_in = df_gst_in.loc[df_gst_in['sender'] != 'mint', :]

In [None]:
# by date (proxy for active users) - exclude mint
gst_senders_by_date = df_gst_in.loc[:, ['sender']].groupby(df_gst_in.index.date).count()

In [None]:
# new senders by date (proxy for new users)
existing_senders = list()
new_senders = {}
for dtt in sorted(list(set(df_gst_in.index.date))):
    print(dtt)
    users = df_gst_in.loc[df_gst_in.index.date == dtt, 'sender'].to_list()
    new_users = np.setdiff1d(users, existing_senders)    
    new_senders[dtt] = new_users
gst_new_senders_by_date_raw = pd.DataFrame([new_senders]).T
gst_new_senders_by_date = gst_new_senders_by_date_raw.apply(lambda x: len(x[0]), axis=1).to_frame()
gst_new_senders_by_date.columns = ['new_senders']

In [None]:
# amount by date
gst_in_amount_by_date = df_gst_in.loc[:, ['amount']].groupby(df_gst_in.index.date).sum()

In [None]:
# save data 
# ---- raw data
gst_new_senders_by_date_raw.index.name = 'date'
gst_new_senders_by_date_raw.columns = ['senders']
gst_new_senders_by_date_raw.reset_index().to_feather(interim_savepath + 'gst_new_senders_by_date_raw.feather')

# ---- processed data
gst_senders_by_frequency.to_csv(interim_savepath + 'gst_senders_by_frequency.csv')
gst_senders_by_amount.to_csv(interim_savepath + 'gst_senders_by_amount.csv')
gst_senders_by_date.to_csv(interim_savepath + 'gst_senders_by_date.csv')
gst_in_amount_by_date.to_csv(interim_savepath + 'gst_in_amount_by_date.csv')
gst_mint.to_csv(interim_savepath + 'gst_mint.csv')

## GMT

### GMT out & receivers

In [None]:
df_gmt_out = pd.read_sql("SELECT datetime, amount, receiver FROM stepn_solana_gmt_out_bitquery", con=db_connection_str, index_col='datetime')
df_gmt_out.tail()

In [None]:
# display tx without receiver
df_gmt_out.loc[(df_gmt_out['receiver'] == ''), :]

In [None]:
# mark transfers without receiver as burn (<= 2022-05-31)
done_through = datetime.date(2022, 5, 31)
print("BURN MARKING DONE THROUGH: ", done_through)
df_gmt_out.loc[(df_gmt_out['receiver'] == '') & (df_gmt_out.index.date <= done_through), 'receiver'] = 'burn'

In [None]:
# by frequency
gmt_receivers_by_frequency = df_gmt_out['receiver'].groupby('receiver').size().sort_values(ascending=False).to_frame()
gmt_receivers_by_frequency.columns = ['frequency']

In [None]:
# by amount
gmt_receivers_by_amount = df_gmt_out.loc[:, ['amount', 'receiver']].groupby(df_gmt_out['receiver']).sum().sort_values(by='amount', ascending=False)

In [None]:
# take out burn
gmt_burn = df_gmt_out.loc[df_gmt_out['receiver'] == 'burn', :]
df_gmt_out = df_gmt_out.loc[df_gmt_out['receiver'] != 'burn', :]

In [None]:
# by date (proxy for active users) - exclude burn & treasury
gmt_receivers_by_date = df_gmt_out.loc[:, ['receiver']].groupby(df_gmt_out.index.date).count()

In [None]:
# new recievers by date (proxy for new users) - exclude burn & treasury
existing_receivers = list()
new_receivers = {}
for dtt in sorted(list(set(df_gmt_out.index.date))):
    print(dtt)
    users = df_gmt_out.loc[df_gmt_out.index.date == dtt, 'receiver'].to_list()
    new_users = np.setdiff1d(users, existing_receivers)    
    new_receivers[dtt] = new_users
gmt_new_receivers_by_date_raw = pd.DataFrame([new_receivers]).T
gmt_new_receivers_by_date = gmt_new_receivers_by_date_raw.apply(lambda x: len(x[0]), axis=1).to_frame()
gmt_new_receivers_by_date.columns = ['new_receivers']

In [None]:
# amount by date - exclude burn & treasury
gmt_out_amount_by_date = df_gmt_out.loc[:, ['amount']].groupby(df_gmt_out.index.date).sum()

# amount by date, ex-treasury (=supply)
df_gmt_out_ex_treas = df_gmt_out.loc[
    df_gmt_out['receiver'] != 'stepnnmTZcdZQ6HDqUVoU6KcYP8kBAHKzyj2jpiiwaH', ['amount']
]
df_gmt_out_by_date_ex_treas = df_gmt_out_ex_treas.groupby(df_gmt_out_ex_treas.index.date).sum()

In [None]:
# save data 
# ---- raw data
gmt_new_receivers_by_date_raw.index.name = 'date'
gmt_new_receivers_by_date_raw.columns = ['receivers']
gmt_new_receivers_by_date_raw.reset_index().to_feather(interim_savepath + 'gmt_new_receivers_by_date_raw.feather')

# ---- processed data
gmt_receivers_by_frequency.to_csv(interim_savepath + 'gmt_receivers_by_frequency.csv')
gmt_receivers_by_amount.to_csv(interim_savepath + 'gmt_receivers_by_amount.csv')
gmt_receivers_by_date.to_csv(interim_savepath + 'gmt_receivers_by_date.csv')
gmt_out_amount_by_date.to_csv(interim_savepath + 'gmt_out_amount_by_date.csv')
df_gmt_out_by_date_ex_treas.to_csv(interim_savepath + 'gmt_out_amount_by_date_ex_treas.csv')
gmt_burn.to_csv(interim_savepath + 'gmt_burn.csv')

### GMT in & senders

In [None]:
df_gmt_in = pd.read_sql("SELECT datetime, amount, sender FROM stepn_solana_gmt_in_bitquery", con=conn, index_col='datetime')
df_gmt_in.tail()

In [None]:
# display tx without sender
df_gmt_in.loc[(df_gmt_in['sender'] == ''), :]

In [None]:
# mark transfers without receiver as mint (<= 2022-05-31)
done_through = datetime.date(2022, 5, 31)
print("MINT MARKING DONE THROUGH: ", done_through)
df_gmt_in.loc[(df_gmt_in['sender'] == '') & (df_gmt_in.index.date <= done_through), 'sender'] = 'mint'

In [None]:
# by frequency
gmt_senders_by_frequency = df_gmt_in['sender'].groupby('sender').size().sort_values(ascending=False).to_frame()
gmt_senders_by_frequency.columns = ['frequency']

In [None]:
# by amount
gmt_senders_by_amount = df_gmt_in.loc[:, ['amount', 'sender']].groupby(df_gmt_in['sender']).sum().sort_values(by='amount', ascending=False)

In [None]:
# take out mint
gmt_mint = df_gmt_in.loc[df_gmt_in['sender'] == 'mint', :]
df_gmt_in = df_gmt_in.loc[df_gmt_in['sender'] != 'mint', :]

In [None]:
# by date (proxy for active users)
gmt_senders_by_date = df_gmt_in.loc[:, ['sender']].groupby(df_gmt_in.index.date).count()

In [None]:
# new senders by date (proxy for new users)
existing_senders = list()
new_senders = {}
for dtt in sorted(list(set(df_gmt_in.index.date))):
    print(dtt)
    users = df_gmt_in.loc[df_gmt_in.index.date == dtt, 'sender'].to_list()
    new_users = np.setdiff1d(users, existing_senders)    
    new_senders[dtt] = new_users
gmt_new_senders_by_date_raw = pd.DataFrame([new_senders]).T
gmt_new_senders_by_date = gmt_new_senders_by_date_raw.apply(lambda x: len(x[0]), axis=1).to_frame()
gmt_new_senders_by_date.columns = ['new_senders']

In [None]:
# amount by date
gmt_in_amount_by_date = df_gmt_in.loc[:, ['amount']].groupby(df_gmt_in.index.date).sum()

In [None]:
# save data 
# ---- raw data
gmt_new_senders_by_date_raw.index.name = 'date'
gmt_new_senders_by_date_raw.columns = ['senders']
gmt_new_senders_by_date_raw.reset_index().to_feather(interim_savepath + 'gmt_new_senders_by_date_raw.feather')

# ---- processed data
gmt_senders_by_frequency.to_csv(interim_savepath + 'gmt_senders_by_frequency.csv')
gmt_senders_by_amount.to_csv(interim_savepath + 'gmt_senders_by_amount.csv')
gmt_senders_by_date.to_csv(interim_savepath + 'gmt_senders_by_date.csv')
gmt_in_amount_by_date.to_csv(interim_savepath + 'gmt_in_amount_by_date.csv')
gmt_mint.to_csv(interim_savepath + 'gmt_mint.csv')

## SOL

### SOL out & recievers

In [None]:
df_sol_out = pd.read_sql("SELECT datetime, amount, symbol, receiver FROM stepn_solana_token_out_bitquery", con=db_connection_str, index_col='datetime')
df_sol_out = df_sol_out.loc[df_sol_out['symbol'] == 'SOL', ['amount', 'receiver']]
df_sol_out.tail()

In [None]:
# display tx without receiver
df_sol_out.loc[(df_sol_out['receiver'] == ''), :]

In [None]:
# by frequency
sol_receivers_by_frequency = df_sol_out['receiver'].groupby('receiver').size().sort_values(ascending=False).to_frame()
sol_receivers_by_frequency.columns = ['frequency']

In [None]:
# by amount
sol_receivers_by_amount = df_sol_out.loc[:, ['amount', 'receiver']].groupby(df_sol_out['receiver']).sum().sort_values(by='amount', ascending=False)

In [None]:
# by date (proxy for active users)
sol_receivers_by_date = df_sol_out.loc[:, ['receiver']].groupby(df_sol_out.index.date).count()

In [None]:
# new recievers by date (proxy for new users)
existing_receivers = list()
new_receivers = {}
for dtt in sorted(list(set(df_sol_out.index.date))):
    print(dtt)
    users = df_sol_out.loc[df_sol_out.index.date == dtt, 'receiver'].to_list()
    new_users = np.setdiff1d(users, existing_receivers)    
    new_receivers[dtt] = new_users
sol_new_receivers_by_date_raw = pd.DataFrame([new_receivers]).T
sol_new_receivers_by_date = sol_new_receivers_by_date_raw.apply(lambda x: len(x[0]), axis=1).to_frame()
sol_new_receivers_by_date.columns = ['new_receivers']

In [None]:
# amount by date
sol_out_amount_by_date = df_sol_out.loc[:, ['amount']].groupby(df_sol_out.index.date).sum()

# amount by date, ex-treasury (=supply)
df_sol_out_ex_treas = df_sol_out.loc[
    df_sol_out['receiver'] != 'stepnnmTZcdZQ6HDqUVoU6KcYP8kBAHKzyj2jpiiwaH', ['amount']
]
df_sol_out_by_date_ex_treas = df_sol_out_ex_treas.groupby(df_sol_out_ex_treas.index.date).sum()

In [None]:
# save data 
# ---- raw data
sol_new_receivers_by_date_raw.index.name = 'date'
sol_new_receivers_by_date_raw.columns = ['receivers']
sol_new_receivers_by_date_raw.reset_index().to_feather(interim_savepath + 'sol_new_receivers_by_date_raw.feather')

# ---- processed data
sol_receivers_by_frequency.to_csv(interim_savepath + 'sol_receivers_by_frequency.csv')
sol_receivers_by_amount.to_csv(interim_savepath + 'sol_receivers_by_amount.csv')
sol_receivers_by_date.to_csv(interim_savepath + 'sol_receivers_by_date.csv')
sol_out_amount_by_date.to_csv(interim_savepath + 'sol_out_amount_by_date.csv')

### SOL in & senders

In [None]:
df_sol_in = pd.read_sql("SELECT datetime, amount, sender, symbol FROM stepn_solana_token_in_bitquery", con=conn, index_col='datetime')
df_sol_in = df_sol_in.loc[df_sol_in['symbol'] == 'SOL', ['amount', 'sender']]
df_sol_in.tail()

In [None]:
# display tx without sender
df_sol_in.loc[(df_sol_in['sender'] == ''), :]

In [None]:
# by frequency
sol_senders_by_frequency = df_sol_in['sender'].groupby('sender').size().sort_values(ascending=False).to_frame()
sol_senders_by_frequency.columns = ['frequency']

In [None]:
# by amount
sol_senders_by_amount = df_sol_in.loc[:, ['amount', 'sender']].groupby(df_sol_in['sender']).sum().sort_values(by='amount', ascending=False)

In [None]:
# by date (proxy for active users)
sol_senders_by_date = df_sol_in.loc[:, ['sender']].groupby(df_sol_in.index.date).count()

In [None]:
# new senders by date (proxy for new users)
existing_senders = list()
new_senders = {}
for dtt in sorted(list(set(df_sol_in.index.date))):
    print(dtt)
    users = df_sol_in.loc[df_sol_in.index.date == dtt, 'sender'].to_list()
    new_users = np.setdiff1d(users, existing_senders)    
    new_senders[dtt] = new_users
sol_new_senders_by_date_raw = pd.DataFrame([new_senders]).T
sol_new_senders_by_date = sol_new_senders_by_date_raw.apply(lambda x: len(x[0]), axis=1).to_frame()
sol_new_senders_by_date.columns = ['new_senders']

In [None]:
# amount by date
sol_in_amount_by_date = df_sol_in.loc[:, ['amount']].groupby(df_sol_in.index.date).sum()

In [None]:
# save data 
# ---- raw data
sol_new_senders_by_date_raw.index.name = 'date'
sol_new_senders_by_date_raw.columns = ['senders']
sol_new_senders_by_date_raw.reset_index().to_feather(interim_savepath + 'sol_new_senders_by_date_raw.feather')

# ---- processed data
sol_senders_by_frequency.to_csv(interim_savepath + 'sol_senders_by_frequency.csv')
sol_senders_by_amount.to_csv(interim_savepath + 'sol_senders_by_amount.csv')
sol_senders_by_date.to_csv(interim_savepath + 'sol_senders_by_date.csv')
sol_in_amount_by_date.to_csv(interim_savepath + 'sol_in_amount_by_date.csv')

## NFT

### NFT out & receivers

In [None]:
df_nft_out = pd.read_sql("SELECT datetime, amount, symbol, receiver, decimals FROM stepn_solana_token_out_bitquery", con=db_connection_str, index_col='datetime')
df_nft_out = df_nft_out.loc[((df_nft_out['symbol'] != 'SOL') & (df_nft_out['decimals'] == 0)), ['amount', 'receiver']]
df_nft_out.tail()

In [None]:
# display tx without receiver
df_nft_out.loc[(df_nft_out['receiver'] == ''), :]

In [None]:
# by frequency
nft_receivers_by_frequency = df_nft_out['receiver'].groupby('receiver').size().sort_values(ascending=False).to_frame()
nft_receivers_by_frequency.columns = ['frequency']

In [None]:
# by amount
nft_receivers_by_amount = df_nft_out.loc[:, ['amount', 'receiver']].groupby(df_nft_out['receiver']).sum().sort_values(by='amount', ascending=False)

In [None]:
# by date (proxy for active users)
nft_receivers_by_date = df_nft_out.loc[:, ['receiver']].groupby(df_nft_out.index.date).count()

In [None]:
# new recievers by date (proxy for new users)
existing_receivers = list()
new_receivers = {}
for dtt in sorted(list(set(df_nft_out.index.date))):
    print(dtt)
    users = df_nft_out.loc[df_nft_out.index.date == dtt, 'receiver'].to_list()
    new_users = np.setdiff1d(users, existing_receivers)    
    new_receivers[dtt] = new_users
nft_new_receivers_by_date_raw = pd.DataFrame([new_receivers]).T
nft_new_receivers_by_date = nft_new_receivers_by_date_raw.apply(lambda x: len(x[0]), axis=1).to_frame()
nft_new_receivers_by_date.columns = ['new_receivers']

In [None]:
# amount by date
nft_out_amount_by_date = df_nft_out.loc[:, ['amount']].groupby(df_nft_out.index.date).sum()

In [None]:
# save data 
# ---- raw data
nft_new_receivers_by_date_raw.index.name = 'date'
nft_new_receivers_by_date_raw.columns = ['receivers']
nft_new_receivers_by_date_raw.reset_index().to_feather(interim_savepath + 'nft_new_receivers_by_date_raw.feather')

# ---- processed data
nft_receivers_by_frequency.to_csv(interim_savepath + 'nft_receivers_by_frequency.csv')
nft_receivers_by_amount.to_csv(interim_savepath + 'nft_receivers_by_amount.csv')
nft_receivers_by_date.to_csv(interim_savepath + 'nft_receivers_by_date.csv')
nft_out_amount_by_date.to_csv(interim_savepath + 'nft_out_amount_by_date.csv')

### NFT in & senders

In [None]:
df_nft_in = pd.read_sql("SELECT datetime, amount, symbol, sender, decimals FROM stepn_solana_token_in_bitquery", con=db_connection_str, index_col='datetime')
df_nft_in = df_nft_in.loc[((df_nft_in['symbol'] != 'SOL') & (df_nft_in['decimals'] == 0)), ['amount', 'sender']]
df_nft_in.tail()

In [None]:
# display tx without sender
df_nft_in.loc[(df_nft_in['sender'] == ''), :]

In [None]:
# mark transfers without receiver as mint (<= 2022-05-31)
done_through = datetime.date(2022, 5, 31)
print("MINT MARKING DONE THROUGH: ", done_through)
df_nft_in.loc[(df_nft_in['sender'] == '') & (df_nft_in.index.date <= done_through), 'sender'] = 'mint'

In [None]:
# by frequency
nft_senders_by_frequency = df_nft_in['sender'].groupby('sender').size().sort_values(ascending=False).to_frame()
nft_senders_by_frequency.columns = ['frequency']

In [None]:
# by amount
nft_senders_by_amount = df_nft_in.loc[:, ['amount', 'sender']].groupby(df_nft_in['sender']).sum().sort_values(by='amount', ascending=False)

In [None]:
# take out mint
nft_mint = df_nft_in.loc[df_nft_in['sender'] == 'mint', :]
df_nft_in = df_nft_in.loc[df_nft_in['sender'] != 'mint', :]

In [None]:
# by date (proxy for active users)
nft_senders_by_date = df_nft_in.loc[:, ['sender']].groupby(df_nft_in.index.date).count()

In [None]:
# new senders by date (proxy for new users)
existing_senders = list()
new_senders = {}
for dtt in sorted(list(set(df_nft_in.index.date))):
    print(dtt)
    users = df_nft_in.loc[df_nft_in.index.date == dtt, 'sender'].to_list()
    new_users = np.setdiff1d(users, existing_senders)    
    new_senders[dtt] = new_users
nft_new_senders_by_date_raw = pd.DataFrame([new_senders]).T
nft_new_senders_by_date = nft_new_senders_by_date_raw.apply(lambda x: len(x[0]), axis=1).to_frame()
nft_new_senders_by_date.columns = ['new_senders']

In [None]:
# amount by date
nft_in_amount_by_date = df_nft_in.loc[:, ['amount']].groupby(df_nft_in.index.date).sum()

In [None]:
# save data 
# ---- raw data
nft_new_senders_by_date_raw.index.name = 'date'
nft_new_senders_by_date_raw.columns = ['senders']
nft_new_senders_by_date_raw.reset_index().to_feather(interim_savepath + 'nft_new_senders_by_date_raw.feather')

# ---- processed data
nft_senders_by_frequency.to_csv(interim_savepath + 'nft_senders_by_frequency.csv')
nft_senders_by_amount.to_csv(interim_savepath + 'nft_senders_by_amount.csv')
nft_senders_by_date.to_csv(interim_savepath + 'nft_senders_by_date.csv')
nft_in_amount_by_date.to_csv(interim_savepath + 'nft_in_amount_by_date.csv')
nft_mint.to_csv(interim_savepath + 'nft_mint.csv')

## Other tokens

In [None]:
df_tokens_out = pd.read_sql(
    "SELECT datetime, mint, symbol, amount, sender, receiver, decimals, signature FROM stepn_solana_token_out_bitquery", 
    con=db_connection_str, 
    index_col='datetime'
)
df_tokens_out = df_tokens_out.loc[((df_tokens_out['symbol'] != 'SOL') & (df_tokens_out['decimals'] != 0)), :]
df_tokens_out.sort_values('amount', ascending=False).to_csv(interim_savepath + 'tokens_out.csv')

In [None]:
df_tokens_in = pd.read_sql(
    "SELECT datetime, mint, symbol, amount, sender, receiver, decimals, signature FROM stepn_solana_token_in_bitquery", 
    con=db_connection_str, 
    index_col='datetime'
)
df_tokens_in = df_tokens_in.loc[((df_tokens_in['symbol'] != 'SOL') & (df_tokens_in['decimals'] != 0)), :]
df_tokens_in.sort_values('amount', ascending=False).to_csv(interim_savepath + 'tokens_in.csv')

# load saved data , clean up, and combine into one table

In [None]:
# list all files in interim save folder
sorted(os.listdir(interim_savepath))

In [None]:
# define file names tp process

# new receivers & senders feather data
raw_feather_files = [
    'gmt_new_senders_by_date_raw.feather',
    'gst_new_receivers_by_date_raw.feather',
    'gst_new_senders_by_date_raw.feather',
    'sol_new_receivers_by_date_raw.feather',
    'sol_new_senders_by_date_raw.feather',
    'nft_new_receivers_by_date_raw.feather',
    'nft_new_senders_by_date_raw.feather'
]

# data by date
dataname_by_date = sorted([
    'gmt_in_amount_by_date.csv',
    'gmt_out_amount_by_date.csv',                  
    'gmt_receivers_by_date.csv',                                
    'gmt_senders_by_date.csv',                
    'gst_in_amount_by_date.csv',
    'gst_out_amount_by_date.csv',                
    'gst_receivers_by_date.csv',                                
    'gst_senders_by_date.csv',                
    'nft_in_amount_by_date.csv',
    'nft_out_amount_by_date.csv',                
    'nft_receivers_by_date.csv',                                
    'nft_senders_by_date.csv',                
    'sol_in_amount_by_date.csv',
    'sol_out_amount_by_date.csv',                
    'sol_receivers_by_date.csv',
    'sol_senders_by_date.csv'                
])

dataname_by_date_add = 'gmt_out_amount_by_date_ex_treas.csv'

dataname_by_order = ([  # [..._by amount , ..by_frequency]
    ['gmt_receivers_by_amount.csv', 'gmt_receivers_by_frequency.csv'], 
    ['gmt_senders_by_amount.csv', 'gmt_senders_by_frequency.csv'],
    ['gst_receivers_by_amount.csv', 'gst_receivers_by_frequency.csv'], 
    ['gst_senders_by_amount.csv', 'gst_senders_by_frequency.csv'],
    ['nft_receivers_by_amount.csv', 'nft_receivers_by_frequency.csv'], 
    ['nft_senders_by_amount.csv', 'nft_senders_by_frequency.csv'],
    ['sol_receivers_by_amount.csv', 'sol_receivers_by_frequency.csv'], 
    ['sol_senders_by_amount.csv', 'sol_senders_by_frequency.csv']
])

In [None]:
# process data

# new sender and receivers in feather format
for file in raw_feather_files:    
    tmp = pd.read_feather(interim_savepath + file).set_index('date')
    tmp[file.rstrip('_by_date_raw.feather')] = tmp[tmp.columns[-1]].apply(lambda x: len(x))     
    if file == raw_feather_files[0]:
        new_by_date = tmp.loc[:, [file.rstrip('_by_date_raw.feather')]]
    else:
        new_by_date = pd.concat([new_by_date, tmp.loc[:, [file.rstrip('_by_date_raw.feather')]]], axis=1)

# data by date, saved in CSV        
for file in dataname_by_date:      
    tmp = pd.read_csv(interim_savepath + file, parse_dates=True, index_col=0)
    tmp.columns = [file.replace('_by_date.csv', '')]
    
    if file == dataname_by_date[0]:
        data_by_date = tmp
    else:
        data_by_date = pd.concat([data_by_date, tmp], axis=1)
    data_by_date.index.name = 'date'
    
# additional data_by_date
print(dataname_by_date_add)  # = 'gmt_out_amount_by_date_ex_treas.csv'
gmt_out_ex_treas = pd.read_csv(interim_savepath + 'gmt_out_amount_by_date_ex_treas.csv', parse_dates=True, index_col=0)
gmt_out_ex_treas.columns = ['gmt_out_amount_ex_treas']
data_by_date = pd.concat([data_by_date, gmt_out_ex_treas], axis=1)
        
# data by order, saved in CSV --> combine amount & frequency in one df
for file in dataname_by_order:
    f1 = pd.read_csv(interim_savepath + file[0], index_col=0)
    f2 = pd.read_csv(interim_savepath + file[1], index_col=0)
    ff = pd.concat([f1, f2], axis=1)
    ff.index.name = file[0].replace('_by_amount.csv', '')
    ff.colums = ['amount', 'frequency']
    ff.sort_values('amount', ascending=False, inplace=True)
    print(ff.index.name)
    exec(ff.index.name + ' = ff')

In [None]:
# final clean up and save
jtp_data_by_date = pd.concat([new_by_date, data_by_date], axis=1)
jtp_data_by_date = jtp_data_by_date.reindex(columns=jtp_data_by_date.columns.sort_values().to_list())
jtp_data_by_date.to_csv(end_savepath + 'jtp_data_by_date.csv')

gst_receivers.to_csv(interim_savepath + 'gst_receivers_final.csv')
gst_senders.to_csv(interim_savepath + 'gst_senders_final.csv')
gmt_receivers.to_csv(interim_savepath + 'gmt_receivers_final.csv')
gmt_senders.to_csv(interim_savepath + 'gmt_senders_final.csv')
sol_receivers.to_csv(interim_savepath + 'sol_receivers_final.csv')
sol_senders.to_csv(interim_savepath + 'sol_senders_final.csv')
nft_receivers.to_csv(interim_savepath + 'nft_receivers_final.csv')
nft_senders.to_csv(interim_savepath + 'nft_senders_final.csv')

# scrach

In [None]:
# save data to S3 using s3fs

import s3fs
fs = s3fs.S3FileSystem(anon=False)
fs.ls('jumpthepull')
fs.put(csv_savepath + "stepn_solana_combined_table.csv", "jumpthepull/stepn_solana_combined_table.csv")