# CoinMarketCap: Data Mining

We will download data from **CryptoCurrency Market Capitalization**, and filter for `Ethereum` data with a Market Cap.

Data will be accessed from [https://coinmarketcap.com/assets/views/all](https://coinmarketcap.com/assets/views/all).

In [1]:
# The usual suspects ...
import pandas as pd
import tabulate

# And their accomplices ...
from datetime import datetime

In [2]:
# The url:
url = 'https://coinmarketcap.com/tokens/views/all'

# The dataframe (shows first table on page):
df = pd.read_html(url, attrs={'id': 'assets-all'})[0]
# Column list
df.columns

Index(['#', 'Name', 'Platform', 'Market Cap', 'Price', 'Circulating Supply',
       'Volume (24h)', '% 1h', '% 24h', '% 7d', 'Unnamed: 10'],
      dtype='object')

In [3]:
# New column names:
df.columns = ['#', 'Name', 'Platform', 'MarketCap',
              'Price', 'CirculatingSupply', 'VolumeDay',
              'pctHour', 'pctDay', 'pctWeek', 'NewCol']

# Build an upper case name column so we can sort on it more easily
df['NameUpper'] = map(lambda x: x.upper(), df['Name'])

In [4]:
# Cleaning numeric data:
df['Price'] = df['Price'].str.replace('$', '')
df['MarketCap'] = df['MarketCap'].str.replace('$', '')
df['MarketCap'] = df['MarketCap'].str.replace(',', '')
df['VolumeDay'] = df['VolumeDay'].str.replace('$', '')
df['VolumeDay'] = df['VolumeDay'].str.replace(',', '')
df['VolumeDay'] = df['VolumeDay'].str.replace('Low Vol', '0')
df['pctHour'] = df['pctHour'].str.replace('%', '')
df['pctDay'] = df['pctDay'].str.replace('%', '')
df['pctWeek'] = df['pctWeek'].str.replace('%', '')

In [5]:
# Filter for rows only containing Ethereum and a MarketCap value
df = df.loc[(df['Platform'] == 'Ethereum') & (df['MarketCap'] != '?')]

In [6]:
# Convert numeric columns to numeric type
def coerce_df_columns_to_numeric(df, column_list):
    df[column_list] = df[column_list].apply(pd.to_numeric, errors='coerce')
    
coerce_df_columns_to_numeric(df, ['MarketCap', 'Price', 'CirculatingSupply', 
                                  'VolumeDay', 'pctHour', 'pctDay', 'pctWeek'])

#### Dataframe Sorting Functions:

In [8]:
# To sort dataframe values:
def sort_dataframe(df, col, ascending=False):
    '''Returns sorted dataframe values.'''
    return df.sort_values([col], ascending=ascending)

# To sort names:
def sort_name(df):
    '''Returns sorted dataframe value names.'''
    return sort_dataframe(df, 'NameUpper', True).ix[:, [1, 3, 5, 6]]

# To sort marketcap values:
def sort_marketcap(df):
    '''Returns sorted MarketCap values.'''
    return sort_dataframe(df, 'MarketCap', False).ix[:, [1, 3]]

# To sort the price:
def sort_price(df):
    '''Returns sorted price values.'''
    return sort_dataframe(df, 'Price', False).ix[:, [1, 4]]

# To sort the volume:
def sort_volume(df):
    '''Returns sorted volume values.'''
    return sort_dataframe(df, 'VolumeDay', False).ix[:, [1, 6]]

# To sort hourly percentages:
def sort_hour(df):
    '''Returns sorted hourly percentages.'''
    return sort_dataframe(df, 'pctHour', False).ix[:, [1, 7]]

# To sort daily percentages:
def sort_day(df):
    '''Returns sorted daily percentages.'''
    return sort_dataframe(df, 'pctDay', False).ix[:, [1, 8]]

# To sort weekly percentages:
def sort_week(df):
    '''Returns sorted weekly percentages.'''
    return sort_dataframe(df, 'pctWeek', False).ix[:, [1, 9]]

# Printing sorted dataframe in a tabulated format:
def print_tabulated(df):
    '''Prints sorted dataframe in a tabular format.'''
    print tabulate.tabulate(df, headers='keys', showindex='false', numalign='right')
    
def report():
    print('Title   :')

SyntaxError: unexpected EOF while parsing (<ipython-input-8-bd506e658e97>, line 32)

In [None]:
''