In [1]:
# !conda install -y beautifulsoup4 lxml requests folium

In [2]:
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from matplotlib.colors import rgb2hex
from matplotlib import cm
import branca.colormap as cmap

import matplotlib as mpl
import mplleaflet
import folium
import pandas as pd
import numpy as np

from bs4 import BeautifulSoup
import requests

%matplotlib inline

In [3]:
def scrape_table(url):
    data = requests.get(url).text
    soup = BeautifulSoup(data)
    
    rows = soup.find_all('table')[1].find_all('tr')[4:]
    nrows = len(rows)
    ncols = len(rows[0].find_all('td'))
    colnames = [
        'Year', 'Month', 'Day', 'Hr', 'Mn', 'Sec', 'Tsu', 'Vol', 'Addl EQ Info', 
        'Name', 'Lat', 'Lon', 'Focal Depth', 'Mag', 'MMI int', 
        'Deaths', 'Deaths Num Cat', 'Injuries', 'Injuries Num Cat', 
        'Dmg $Mill', 'Dmg $Mill Cat', 'Houses Destroyed', 'Houses Destroyed Cat', 
        'Houses Damaged', 'Houses Damaged Cat', 'Photos'
    ]
    df = np.zeros((nrows, ncols))
    df = pd.DataFrame(df, columns=colnames)

    for row in range(nrows):

        for col in range(ncols):
            value_lst = rows[row].find_all('td')[col].contents

            try:
                df.iloc[row, col] = value_lst[0]
                
            except:
                continue
                
    return df

In [4]:
def make_data_frame(url_list):
    
    for i, url in enumerate(url_list):
        
        if i == 0:
            DF = scrape_table(url)
            DF['Country'] = i
            
        else:
            df = scrape_table(url)
            df['Country'] = i
            DF = pd.concat([DF, df], axis=0)
            
    return DF

In [5]:
def preprocess_raw_data(raw_data):
    df = raw_data[['Year', 'Month', 'Name', 'Lat', 'Lon', 'Mag', 'Country']]
    df[['Year', 'Month']] = df[['Year', 'Month']].astype('int')
    df[['Lat', 'Lon']] = df[['Lat', 'Lon']].astype('float')
    df['Mag'] = df['Mag'].astype('float')

    mask = (df['Lat'] < 51) & (df['Lat'] > -10)

    return df[mask]

In [6]:
url_1 = 'https://www.ngdc.noaa.gov/nndc/struts/results?bt_0=1900&st_0=&type_17=EXACT&query_17=40&op_12=eq&v_12=&type_12=Or&query_14=None+Selected&type_3=Like&query_3=&st_1=&bt_2=&st_2=&bt_1=&bt_4=0.0&st_4=9.9&bt_5=&st_5=&bt_6=&st_6=&bt_7=&st_7=&bt_8=&st_8=&bt_9=&st_9=&bt_10=&st_10=&type_11=Exact&query_11=&type_16=Exact&query_16=&bt_18=&st_18=&ge_19=&le_19=&type_20=Like&query_20=&display_look=1&t=101650&s=1&submit_all=Search+Database'
url_2 = 'https://www.ngdc.noaa.gov/nndc/struts/results?bt_0=1900&st_0=&type_17=EXACT&query_17=30&op_12=eq&v_12=&type_12=Or&query_14=None+Selected&type_3=Like&query_3=&st_1=&bt_2=&st_2=&bt_1=&bt_4=0.0&st_4=9.9&bt_5=&st_5=&bt_6=&st_6=&bt_7=&st_7=&bt_8=&st_8=&bt_9=&st_9=&bt_10=&st_10=&type_11=Exact&query_11=&type_16=Exact&query_16=&bt_18=&st_18=&ge_19=&le_19=&type_20=Like&query_20=&display_look=1&t=101650&s=1&submit_all=Search+Database'
url_3 = 'https://www.ngdc.noaa.gov/nndc/struts/results?bt_0=1900&st_0=&type_17=EXACT&query_17=60&op_12=eq&v_12=&type_12=Or&query_14=None+Selected&type_3=Like&query_3=&st_1=&bt_2=&st_2=&bt_1=&bt_4=0.0&st_4=9.9&bt_5=&st_5=&bt_6=&st_6=&bt_7=&st_7=&bt_8=&st_8=&bt_9=&st_9=&bt_10=&st_10=&type_11=Exact&query_11=&type_16=Exact&query_16=&bt_18=&st_18=&ge_19=&le_19=&type_20=Like&query_20=&display_look=1&t=101650&s=1&submit_all=Search+Database'
url_list = [url_1, url_2, url_3]

raw_data = make_data_frame(url_list)

data = preprocess_raw_data(raw_data)



 BeautifulSoup(YOUR_MARKUP})

to this:

 BeautifulSoup(YOUR_MARKUP, "html.parser")

  markup_type=markup_type))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self[k1] = value[k2]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


In [7]:
def draw_map(data):
    MAP = folium.Map(location=[25, 100], min_zoom=3, zoom_start=3, 
                     max_zoom=10, max_bounds=False)
    
    colormap = cmap.linear.OrRd.scale(data['Mag'].min(), data['Mag'].max())
    colormap.caption = 'Magnitude'
    MAP.add_child(colormap)
    
    data['For Popup'] = data.apply(lambda row: '{} ({}/{}); Mag: {}'.format(row['Name'], 
                                                                            row['Month'], 
                                                                            row['Year'],
                                                                            row['Mag']), axis=1)
    data['Intensity'] = (data['Mag']-data['Mag'].min()) / (data['Mag'].max()-data['Mag'].min())
    data['Radius'] = 0.1*(1.9**data['Mag'])
    
    data.apply(lambda row: folium.CircleMarker(location=[row['Lat'], row['Lon']], 
                                               radius=row['Radius'],
                                               fill_color=rgb2hex(cm.Reds(row['Intensity'])),
                                               color=rgb2hex(cm.Reds(row['Intensity'])),
                                               popup=row['For Popup']).add_to(MAP), axis=1)
    
    return MAP
        
draw_map(data)