In [215]:
from dotenv import load_dotenv
import os
import requests
from bs4 import BeautifulSoup
import random
from decimal import Decimal
from re import sub
import pandas as pd
from tabulate import tabulate
import dateparser
import numpy as np

In [216]:
dotenv_path = os.path.join(os.getcwd(), '.env')
load_dotenv(dotenv_path)
cr_api = os.getenv("CRYPTORANK_API")

In [217]:
# get a list of user agents for randomization
userAgents = requests.get("https://raw.githubusercontent.com/DavidWittman/requests-random-user-agent/master/requests_random_user_agent/useragents.txt").text.split("\n")

# get a list of cryptocurrencies data
url = "https://api.cryptorank.io/v1/currencies"
p = {
    'api_key': cr_api,
    'limit': 500,
}
currencies = requests.get(url, params=p).json()["data"]

In [218]:
# convert to slug
currencySlugs = [currencies[i]["slug"] for i in range(len(currencies))]
currencySlugs

['bitcoin',
 'ethereum',
 'tether',
 'bnb',
 'usdcoin',
 'ripple',
 'solana',
 'cardano',
 'luna',
 'terra-usd',
 'binance-usd',
 'dogecoin',
 'polkadot',
 'avalanche',
 'lido-staked-ether',
 'wrapped-bitcoin',
 'shiba-inu',
 'tron',
 'multicollateraldai',
 'near-protocol',
 'matic-network',
 'crypto-com-cro',
 'litecoin',
 'leobitfinex',
 'bitcoin-cash',
 'algorand',
 'chainlink',
 'cosmos',
 'ftx-token',
 'okb',
 'stellar',
 'monero',
 'ethereum-classic',
 'uniswap',
 'ape-coin',
 'bit-dao',
 'vechainthor',
 'magic-internet-money',
 'internet-computer',
 'frax_uniswap',
 'hedera-hashgraph',
 'filecoin',
 'elrond-gold',
 'sandbox',
 'axie-infinity',
 'tezos',
 'defichain',
 'theta-token',
 'the-graph',
 'pancakeswap',
 'decentraland',
 'eos',
 'bittorrent-new',
 'aave',
 'thor-chain',
 'klaytn',
 'fantom',
 'kucoin-shares',
 'stepn',
 'flow',
 'chain-new',
 'zcash',
 'frax-share',
 'helium-hnt',
 'huobi-token',
 'bitcoin-sv',
 'convex-finance',
 'iota',
 'true-usd',
 'waves',
 'ecash'

In [219]:
tokenSaleLink = "https://cryptorank.io/ico/"
master_dict = []

for asset in currencySlugs:

    # get asset tokenomics page with randomized user agent
    scrapeLink = tokenSaleLink + asset
    r = requests.get(scrapeLink, headers={"User-Agent": random.choice(userAgents)})

    d = {}
    d["asset"] = asset

    # if page exists
    if r.status_code == 200:
        print(asset + ": DATA GET SUCCESS")
        assetSoup = BeautifulSoup(r.text, "html.parser")
        
        # find allocation component
        tokenomics_component = assetSoup.find_all("p", {"class": "styled__TokenomicsValue-sc-3338go-2 dCnyxZ"})
        # separate allocation component
        for i in tokenomics_component:
            if ":" in i.text:
                element = i.text.split("(")[0][:-1].split(":")

                colName = element[0]
                colValue = Decimal(sub(r'[^\d.]', '', element[1]))
            else:
                colName = "Total Raised"
                colValue = Decimal(sub(r'[^\d.]', '', i.text))

            d[colName] = colValue

        # find rounds component
        rounds_component = assetSoup.find_all("div", {"class": "styled__IcoRoundContainer-sc-19xalay-0 gcmMjD"})

        # for each round, identify the round date (for reordering)
        round_Dates = []
        for i in range(len(rounds_component)):
            try:
                round_date = rounds_component[i].find("span", {"class": "info-text__date"}).text
                # if range, use end month
                if "—" in round_date:
                    end_date = dateparser.parse(round_date.split("—")[-1]).strftime("%Y-%m")
                # if useless, replace with nan
                elif "in the Past" in end_date:
                    end_date = np.nan
                # if no range, keep as is
                else:
                    end_date = dateparser.parse(round_date).strftime("%Y-%m")
                round_Dates.append(end_date)
            except:
                end_date = np.nan
        # reorder rounds by index
        roundOrder = np.argsort(np.array(round_Dates))


        roundCount = 0
        for i in roundOrder:
            roundCount += 1
            round_type = rounds_component[i].find("span", {"class": "info-text__type"}).text.split("is ended")[0]
            round_date = rounds_component[i].find("span", {"class": "info-text__date"}).text
            try:
                # if range, use end month
                if "—" in round_date:
                    end_date = dateparser.parse(round_date.split("—")[-1]).strftime("%Y-%m")
                # if useless, replace with nan
                elif "in the Past" in end_date:
                    end_date = np.nan
                # if no range, keep as is
                else:
                    end_date = dateparser.parse(round_date).strftime("%Y-%m")
            except:
                end_date = np.nan

            d[f"R{str(roundCount)} type"] = round_type
            d[f"R{str(roundCount)} date"] = end_date

            round_detail = rounds_component[i].find_all("div", {"class": "styled__IcoColumn-sc-19xalay-4 kElqSx"})
            for j in round_detail:
                try:
                    try:
                        colName = j.find("h4", {"class": "styled__HeaderColumn-sc-19xalay-3 iOiuVh"}).text
                        colValue = j.find("div", {"class": "styled__ContentColumn-sc-1g6ocb2-1 cltaKE"}).find("p").text
                        d[f"R{str(roundCount)} {colName}"] = colValue

                    except: # lockup period has a different div class
                            colName = j.find("h4", {"class": "styled__HeaderColumn-sc-19xalay-3 iOiuVh"}).text
                            colValue = j.find("div", {"class": "styled__ContentColumn-sc-1g6ocb2-1 styled__LongDescriptionColumn-sc-1g6ocb2-2 cltaKE bGlSHw"}).find("p").text
                            d[f"R{str(roundCount)} {colName}"] = colValue
                except:
                    pass

        master_dict.append(d)
        
    # if page doesn't  exist
    else:
        print(asset + ": NO DATA")

raw_df = pd.DataFrame(master_dict)

bitcoin: NO DATA
ethereum: NO DATA
tether: NO DATA
bnb: NO DATA
usdcoin: NO DATA
ripple: NO DATA
solana: DATA GET SUCCESS


  date_obj = stz.localize(date_obj)


cardano: DATA GET SUCCESS
luna: DATA GET SUCCESS
terra-usd: NO DATA
binance-usd: NO DATA
dogecoin: NO DATA
polkadot: NO DATA
avalanche: DATA GET SUCCESS
lido-staked-ether: NO DATA
wrapped-bitcoin: NO DATA
shiba-inu: NO DATA
tron: DATA GET SUCCESS
multicollateraldai: NO DATA
near-protocol: DATA GET SUCCESS
matic-network: DATA GET SUCCESS
crypto-com-cro: NO DATA
litecoin: NO DATA
leobitfinex: DATA GET SUCCESS
bitcoin-cash: NO DATA
algorand: DATA GET SUCCESS
chainlink: DATA GET SUCCESS
cosmos: DATA GET SUCCESS
ftx-token: NO DATA
okb: NO DATA
stellar: NO DATA
monero: NO DATA
ethereum-classic: NO DATA
uniswap: NO DATA
ape-coin: NO DATA
bit-dao: DATA GET SUCCESS
vechainthor: NO DATA
magic-internet-money: NO DATA
internet-computer: NO DATA
frax_uniswap: NO DATA
hedera-hashgraph: DATA GET SUCCESS
filecoin: DATA GET SUCCESS
elrond-gold: DATA GET SUCCESS
sandbox: DATA GET SUCCESS
axie-infinity: DATA GET SUCCESS
tezos: DATA GET SUCCESS
defichain: NO DATA
theta-token: DATA GET SUCCESS
the-graph: D

In [220]:
colCategories = ["type", "date", "Price", "ROI", "ATH ROI", "Raise", "Tokens For Sale", "Lock-up", "Min/Max personal Cap"]
roundcolumns = [f"R{i+1} {colCategories[col]}" for i in range(5) for col in range(len(colCategories))]
columns = raw_df.columns.tolist()[:5] + roundcolumns

df = raw_df[columns].copy()

In [221]:
# write to csv as well as plaintext
df.to_csv("assets_fundraise.csv", index=False)
with open('table_assets_fundraise.txt', 'w') as f:
    f.write(tabulate(df, headers='keys', tablefmt='grid'))