In [1]:
import locale
import numpy as np
import pandas as pd
import re
import requests

from bs4 import BeautifulSoup
from datetime import datetime

In [2]:
def scrape_osrs_world_select():    
    world_select_url = ('https://oldschool.runescape.com/slu')
    response = requests.get(world_select_url)
    return response


def extract_world_data(response):
    soup = BeautifulSoup(response.text, 'lxml')
    data = soup.findAll('tr', {'class':'server-list__row'})
    data = pd.DataFrame(np.array(data, dtype=object))
    return data


def extract_total_player_data(response):
    soup = BeautifulSoup(response.text, 'lxml')
    data = soup.find('p')
    data = pd.DataFrame(data, columns=['player_count'])
    return data

In [3]:
def drop_even_columns(data):
    data = data[data.columns[1::2]]
    return data


def select(data, columns):
    data = data[columns]
    return data


def rename_columns(data):
    columns = ['world', 'players', 'location', 'type', 'activity']
    new_columns = dict(zip(data.columns, columns))
    renamed_data = data.rename(columns=new_columns)
    return renamed_data


def add_datetime_column(data, dt):
    data['datetime'] = dt
    return data


def get_content(tag, index=0):
    try:
        content = tag.contents[index]
    except:
        content = np.nan

    return content


def get_contents(data):
    data['world'] = data['world'].apply(lambda t: get_content(t, index=1))
    
    for column in data.columns:
        data[column] = data[column].apply(lambda t: get_content(t))
        
    return data


def get_match(pattern, string):
    try:
        match = re.search(pattern, string).group(0)
    except:
        match = np.nan
    return match


def get_total_player_count(data):
    # pattern adapted from https://stackoverflow.com/questions/5917082/
    locale.setlocale(locale.LC_ALL, '')
    pattern = r'\d{1,3}(,\d{3})*'
    player_count = data.loc[0, 'player_count']
    player_count = get_match(pattern, player_count)
    data.loc[0, 'player_count'] = locale.atoi(player_count)
    return data


def get_numbers(data):
    columns = ['world', 'players']
    pattern = re.compile(r'\d+')
    for column in columns:
        data[column] = data[column].apply(lambda s: get_match(pattern, s))
        try:
            data[column] = data[column].astype(np.int16)
        except:
            data[column] = data[column].astype(np.float)

    return data


def transform_world_data(data, dt, columns):
    data = data.copy()
    transformed_data = (
        data.pipe(drop_even_columns)
            .pipe(rename_columns)
            .pipe(get_contents)
            .pipe(get_numbers)
            .pipe(add_datetime_column, dt=dt)
            .pipe(select, columns=columns)
    )
    return transformed_data


def transform_total_player_data(data, dt):
    data = data.copy()
    transformed_data = (
        data.pipe(get_total_player_count)
            .pipe(add_datetime_column, dt=dt)
            .pipe(select, columns=['datetime', 'player_count'])
    )
    return transformed_data

In [None]:
#with open('osrs_worlds.html', 'rb') as page:
#    page_content = page.read()
#    soup = BeautifulSoup(page_content, 'lxml')

In [None]:
# create world information, initialize logs

# create MySQL database

In [None]:
# check logs, if last request was successful, get datetime and make new request

In [4]:
dt = datetime.now()
response = scrape_osrs_world_select()

In [5]:
# check response, if response.ok, etl
print(response.ok)
print(response.status_code)

True
200


In [6]:
world_data = extract_world_data(response)
total_player_data = extract_total_player_data(response)

transformed_world_data = transform_world_data(world_data, dt=dt, columns=['datetime', 'world', 'players'])
transformed_total_player_data = transform_total_player_data(total_player_data, dt)

# load world_data
# load total_player_data

In [None]:
# endif
# update_logs

In [7]:
world_info = transform_world_data(world_data, columns=['world', 'location', 'type', 'activity'], dt=None)
world_info

Unnamed: 0,world,location,type,activity
0,168,United States,Free,500 skill total
1,117,United States,Free,-
2,170,United States,Free,-
3,183,United States,Free,-
4,94,United States,Free,Clan Wars - Free
...,...,...,...,...
258,88,Australia,Members,Theatre of Blood
259,87,Australia,Members,Blast Furnace
260,234,Australia,Members,-
261,89,Australia,Members,Wintertodt
