# 0.0. Info Table

Monster Hunter 2 Dos Database App

    1. Lista de Quests separadas por Ranks.

In [1]:
import re
import sqlite3
import requests
import pandas as pd

from bs4 import BeautifulSoup
from sqlalchemy import create_engine

# 1.0. Quest List Data

## 1.1. Jp Hunter Basics Quest List

### 1.1.0. Jp Data Collect

In [2]:
path = '../data/quest.html'

soup = BeautifulSoup( open(path).read(), 'html.parser' )

list_train_quests = [list( filter( None, p.get_text().split('\n'))) for p in soup.find_all( 'table', class_='cnt-tb2' )[-1].find_all('tr')]

### 1.1.1. Jp Data Cleaning

In [109]:
df_train = pd.DataFrame( list_train_quests )

# Rename Columns and Filter Rows
df_train.columns = ['id', 'rank']+[p.replace(' ', '_').lower() for p in df_train.iloc[0, :].tolist()][2:]
df_train = df_train[~df_train['quest_name'].str.contains('Quest name')]
df_train = df_train.drop( columns=['special_conditions'], axis=1 )
df_train = df_train.reset_index( drop=True )
df_train = df_train.drop( columns=['season', 'contract_money', 'hrp'], axis=1 )

# Individual Columns
df_train['rank']         = df_train['rank'].replace('¡ù£±', 1)
df_train['rank']         = df_train['rank'].replace('¡ù£²', 2)
df_train['rank']         = df_train['rank'].replace('¡ù£³', 3)
df_train['rank']         = df_train['rank'].replace('¡ù£´', 4)
df_train['rank']         = df_train['rank'].replace('¡ù£µ', 5)
df_train['quest_name']   = df_train['quest_name'].str.replace('¢ã', '')
df_train['quest_name']   = df_train['quest_name'].str.replace('¢ä', '')
df_train['time']         = df_train['time'].str.extract('(\d+)')
df_train['destination']  = df_train['destination'].apply( lambda x: x.title() )
df_train['quest_name']   = [p.lower().replace('<< basics of hunter >> ', '')
                                   .replace('«basic hunter» ', '').strip().title() for p in df_train['quest_name'].tolist()]
df_train['remuneration'] = [str(int(k[0]) + int(k[1]) + int(k[2]))+'z' for k in 
                                 [re.findall( '\d+', p ) for p in df_train['remuneration'].tolist()]]

df_train = df_train.reset_index( drop=True )

df_train['request_details'] = [j[2:] if j.startswith('/') else j for j in [p.replace('None',        '/ None')
                                             .replace('Delivery',    '/ Delivery')
                                             .replace('Hunting',     '/ Hunting' )
                                             .replace('Subjugation', '/ Subjugation').strip() for p in df_train['request_details'].tolist()]]

# Generate New DataFrame
df_aux = df_train['request_details'].str.split('/', expand=True).reset_index( drop=True )

# Clean New DataFrame
for i in range( 3 ):
    df_aux[i] = df_aux[i].apply( lambda x: x.lower() if pd.notnull( x ) else x )
    df_aux[i] = df_aux[i].str.replace( 'subjugation', 'hunt' )
    df_aux[i] = df_aux[i].str.replace( 'hunting',     'hunt' )
    df_aux[i] = df_aux[i].str.replace( 'delivery',   'deliver' )
    df_aux[i] = df_aux[i].str.replace( 'defeat ',     'hunt' )
    df_aux[i] = df_aux[i].apply( lambda x: x.strip() if pd.notnull( x ) else x )
    df_aux[i] = df_aux[i].str.replace( 'None', 'No Sub' )
    
    # Index Verify
    df_aux.iloc[20, 0] = 'Defeat 25 Langosta'
    df_aux.iloc[20, 1] = 'Deliver 10 Spice Worms'
    df_aux.iloc[20, 2] = 'Deliver 8 Monster Body Fluids'
    df_aux.iloc[26, 0] = 'Dunt of 40 White Lampos'
    df_aux.iloc[26, 1] = 'Deliver 7 Kerubi Horns'
    df_aux.iloc[26, 2] = 'Hunt of 4 Blanco'
    
    df_aux[i] = df_aux[i].apply( lambda x: x.title() if pd.notnull( x ) else x )

# Ranema and Drop Columns 
df_aux.columns = ['main', 'sub_a', 'sub_b']
df_train.columns = df_train.columns.tolist()[:2]+['name', 'reward', 'time', 'area']
df_train = df_train.drop( columns=['request_details'], axis=1 )
df_aux['main'] = df_aux['main'].apply( lambda x: x.replace(' Of', '') if x.endswith('Of') else x )

# Concat Columns
df_train = pd.concat( [df_train, df_aux], axis=1 )

## 1.2. En Quest List 

In [387]:
url = 'https://monsterhunter.fandom.com/wiki/MH2:_Quests'
hdr = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5),AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36'}

soup = BeautifulSoup( requests.get( url, hdr ).text , 'html.parser' )

total_quests = int(len(soup.find_all('table')[1:]) / 3)
color_list   = ['#007d11;', '#a57564;', '#53305C;', '#8C8F83;', '#762622;', '#93C72C;', '#8a7518;']

### 1.2.0. One Quest

In [177]:
list_quests = soup.find_all('table', {'style': 'border: 3px solid #007d11; background-color:#AC9A7E; color:#371c01; width:100%;'})

# Name, Main, Reward, Time, Season, Sub A,, Sub_a_reward Sub B, sub_b_reward

q_name      = list_quests[0].find('span', class_='mw-headline').get_text()
q_main      = list_quests[0].find_all('p')[1].get_text()
q_main_rwd  = list_quests[0].find_all('p')[2].get_text().split('\n')[1]
q_season    = list_quests[0].find_all('img')[0]['alt']
q_time      = list_quests[0].find_all('p')[2].get_text().split('\n')[0]
q_sub_a     = list_quests[0].find_all('td', attrs={'colspan': '2'})[1].get_text()
q_sub_b     = list_quests[0].find_all('td', attrs={'colspan': '2'})[2].get_text()
q_sub_a_rwd = list_quests[0].find_all('p')[2].get_text().split('\n')[2]
q_sub_b_rwd = list_quests[0].find_all('p')[2].get_text().split('\n')[3]

### 1.2.1. Multiple Index

In [498]:
df = pd.DataFrame( columns=['name', 'main', 'reward', 'season', 'time', 'sub_a', 'sub_b', 'sub_a_reward', 'sub_b_reward'] )

for i in range( len( list_quests ) ):
    list_quests = soup.find_all('table', {'style': 'border: 3px solid #007d11; background-color:#AC9A7E; color:#371c01; width:100%;'})

    # Name, Main, Reward, Time, Season, Sub A,, Sub_a_reward Sub B, sub_b_reward
    q_name      = list_quests[i].find('span', class_='mw-headline').get_text()
    q_main      = list_quests[i].find_all('p')[1].get_text()
    q_main_rwd  = list_quests[i].find_all('p')[2].get_text().split('\n')[1]
    q_season    = list_quests[i].find_all('img')[0]['alt']
    q_time      = list_quests[i].find_all('p')[2].get_text().split('\n')[0]
    q_sub_a     = list_quests[i].find_all('td', attrs={'colspan': '2'})[1].get_text()
    q_sub_b     = list_quests[i].find_all('td', attrs={'colspan': '2'})[2].get_text()
    q_sub_a_rwd = list_quests[i].find_all('p')[2].get_text().split('\n')[2]
    q_sub_b_rwd = list_quests[i].find_all('p')[2].get_text().split('\n')[3]
    
    df_aux = pd.DataFrame( [q_name, q_main, q_main_rwd, q_season, q_time, q_sub_a, q_sub_b, q_sub_a_rwd, q_sub_b_rwd] ).T
    df_aux.columns = ['name', 'main', 'reward', 'season', 'time', 'sub_a', 'sub_b', 'sub_a_reward', 'sub_b_reward']
    
    df = pd.concat( [df, df_aux], axis=0 )

In [535]:
df.head(1)

Unnamed: 0,name,main,reward,season,time,sub_a,sub_b,sub_a_reward,sub_b_reward
0,Familiarize with the environments of the jungle!,Deliver Special Mushroom (特産キノコ): X5\n,Reward: 300ｚ,Warm Season,Time Limit: 50 min.,Deliver Royal Beetle (ロイヤルカブト): X2\n,Deliver Raw Meat (生肉): X4\n,Subquest A Reward: 450ｚ,Subquest B Reward 100ｚ


### 1.2.1. Multiple Quests

In [418]:
color_list   = ['#007d11;', '#a57564;', '#53305C;', '#8C8F83;', '#762622;', '#93C72C;', '#8a7518;']

df = pd.DataFrame( columns=['name', 'main', 'reward', 'area', 'season', 'time', 'sub_a', 'sub_b', 'sub_a_reward', 'sub_b_reward'] )

for p in color_list:
    list_quests = soup.find_all('table', {'style': "border: 3px solid "+ p +" background-color:#AC9A7E; color:#371c01; width:100%;"})
    list_colors = soup.find_all('td', {'style': "background-color:" + p +" color:#000000; font-weight:bold; font-size:9pt; text-align:left;" })
    
    for i in range( len( list_quests ) ):
        # Name, Main, Reward, Area, Time, Season, Sub A,, Sub_a_reward Sub B, sub_b_reward
        q_name      = list_quests[i].find('span', class_='mw-headline').get_text()
        q_main      = list_quests[i].find_all('p')[1].get_text()
        q_main_rwd  = list_quests[i].find_all('p')[2].get_text().split('\n')[1]
        q_area      = list_colors[i].get_text().replace('\n', '')
        q_season    = list_quests[i].find_all('img')[0]['alt']
        q_time      = list_quests[i].find_all('p')[2].get_text().split('\n')[0]
        q_sub_a     = list_quests[i].find_all('td', attrs={'colspan': '2'})[1].get_text()
        q_sub_b     = list_quests[i].find_all('td', attrs={'colspan': '2'})[2].get_text()
        q_sub_a_rwd = list_quests[i].find_all('p')[2].get_text().split('\n')[2]
        q_sub_b_rwd = list_quests[i].find_all('p')[2].get_text().split('\n')[3]

        df_aux = pd.DataFrame( [q_name, q_main, q_main_rwd, q_area, q_season, q_time, q_sub_a, q_sub_b, q_sub_a_rwd, q_sub_b_rwd] ).T
        df_aux.columns = ['name', 'main', 'reward', 'area', 'season', 'time', 'sub_a', 'sub_b', 'sub_a_reward', 'sub_b_reward']

        df = pd.concat( [df, df_aux], axis=0 )

### 1.2.3 En Data Cleaning

In [428]:
df1 = df.copy()
df1 = df1.reset_index( drop=True )

# Individual Columns
df1['main']   = df1['main'].apply( lambda x: x.replace( ' (特産キノコ)', '' ).replace( '\n', '' ) )
df1['season'] = df1['season'].apply( lambda x: x.lower().replace(' season', '') )
df1['area']   = df1['area'].str.extract('([a-zA-Z ]+)')[0]
df1['time']   = df1['time'].str.extract('(\d+)')[0]
df1['sub_a']  = df1['sub_a'].apply( lambda x: x.replace('\n', '').replace('N/A', 'No Sub') )
df1['sub_b']  = df1['sub_b'].apply( lambda x: x.replace('\n', '').replace('N/A', 'No Sub') )
df1['sub_b']  = df1['sub_b'].apply( lambda x: x.replace( '', 'No Sub' ) if len(x) == 0 else x )

# Generate Full Reward
reward = df1['reward'].str.extract('(\d+)')[0].tolist()
sub_a  = df1['sub_a_reward'].str.extract('(\d+)')[0].tolist()
sub_b  = df1['sub_b_reward'].str.extract('(\d+)')[0].tolist()
df1['full_reward'] = [str(int(reward[p]) 
                        + int(sub_a[p]) 
                        + int(sub_b[p]))+'z' for p in range( len( reward ) )]

df1['reward']       = [p+'z' for p in reward]
df1['sub_a_reward'] = [p+'z' for p in sub_a]
df1['sub_b_reward'] = [p+'z' for p in sub_b]

df1 = df1[['name', 'main', 'reward', 'area', 'full_reward', 'season', 'time', 'sub_a', 'sub_b', 'sub_a_reward', 'sub_b_reward']]
df1 = df1.reset_index( drop=True )

# Generate Individual Monster Ranks
df1['rank'] = 1
df1.iloc[10:16, -1] = 2
df1.iloc[16, -1]    = 3
df1.iloc[17, -1]    = 4
df1.iloc[18:20, -1] = 2
df1.iloc[20:23, -1] = 2
df1.iloc[23:29, -1] = 3
df1.iloc[36:40, -1] = 2
df1.iloc[40:43, -1] = 3
df1.iloc[43:46, -1] = 4
df1.iloc[47:49, -1] = 2
df1.iloc[49:51, -1] = 3
df1.iloc[51:53, -1] = 4
df1.iloc[56:60, -1] = 3
df1.iloc[60:62, -1] = 4
df1.iloc[62:65, -1] = 2
df1.iloc[65, -1]  = 4
df1.iloc[66, -1]  = 3
df1.iloc[67:, -1] = 4

## 1.3. List Quests Storange 

In [None]:
df_train.to_csv('../data/quest_train.csv')
df1.to_csv('../data/quest_village.csv')

In [437]:
con = sqlite3.connect( '../sql_databases/quests.sqlite' )
c   = con.cursor()

query_create_table = '''
    CREATE TABLE village (
        name            TEXT,
        main            TEXT,
        reward          TEXT,
        area            TEXT,
        full_reward     TEXT,
        season          TEXT,
        time            TEXT,
        sub_a           TEXT,
        sub_b           TEXT,
        sub_a_reward    TEXT,
        sub_b_reward    TEXT,
        rank            INTEGER );'''

c.execute( query_create_table )
con.commit()

In [441]:
db  = create_engine( 'sqlite:///../sql_databases/quests.sqlite', echo=False )
con = db.connect()

df_train.to_sql( 'train', con=con, if_exists='append', index=False )
df1.to_sql( 'village', con=con, if_exists='append', index=False )