# 0.0. Info Table

Monster Hunter 2 Dos Database App

    1. Lista de Quests separadas por Ranks.
    2. Lista de Monstros e suas caracteristicas.
    3. Lista de Kitchen Combination separadas por season.

In [1]:
import re
import sqlite3
import requests
import pandas as pd

from bs4 import BeautifulSoup
from sqlalchemy import create_engine

# 1.0. List Monsters.

## 1.1. List Monsters Collection

In [2]:
url = 'https://monsterhunter.fandom.com/wiki/MH2:_Monsters'
hdr = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5),AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36'}

soup = BeautifulSoup( requests.get( url, hdr).text, 'html.parser' )

## 1.2. Monster Details

In [3]:
monster_names = [p.get_text() for p in soup.find_all('b')][:-12]
monster_names = [p.replace(' ', '_') for p in monster_names]

In [5]:
df = pd.DataFrame( columns=['jp_name', 'class', 'element', 'weakness', 'generation'] )

for i in range( len( monster_names ) ):
    url = 'https://monsterhunter.fandom.com/wiki/' + monster_names[i]
    soup = BeautifulSoup( requests.get( url, hdr).text, 'html.parser' )

    # Get Data
    jp_name = re.match( '\w+', 
              soup.find('aside').find_all( 'h2' )[1].get_text().replace('\xa0', ' ') ).group(0)

    m_class = soup.find_all('section')[1].find('div', class_='pi-data-value pi-font').get_text()

    m_element = soup.find_all('section')[1].find_all('div', class_='pi-data-value pi-font')[1].get_text()

    m_weakness = [p.get_text().replace(' ', '') for p in soup.find_all('section')[1].find_all('div', class_='pi-data-value pi-font')[3].find_all('small')][:2]
    
    if len(m_weakness) <= 0:
        m_weakness = 'None' 
    if len(m_weakness) == 2:
        m_weakness = m_weakness[0] + ', ' + m_weakness[1]
    else:
        m_weakness = m_weakness[0]

    m_generation = soup.find_all('section')[1].find_all('div', class_='pi-data-value pi-font')[-1].get_text()
    
    # Generate Aux DataFrame
    aux = pd.DataFrame( [jp_name, m_class, m_element, m_weakness, m_generation] ).T
    aux.columns = ['jp_name', 'class', 'element', 'weakness', 'generation']
    
    # Concat Dataframe
    df = pd.concat( [df, aux], axis=0 )

## 1.3. Monster Data Cleaning

In [7]:
df1 = df
df1 = df1.reset_index( drop=True )

# Combine with Names 
monster_names = [p.replace('_', ' ') for p in monster_names]
monster_names = pd.Series( monster_names )
monster_names.name = 'name'

# Select Columns
df1 = pd.concat( [df1, monster_names], axis=1 )
df1 = df1[['name', 'jp_name', 'class', 'element', 'weakness', 'generation']]

# Individual Columns
df1['class'] = df1['class'].apply( lambda x: re.match('[a-zA-Z ]+', 
                                                       x).group(0).strip())

df1['element'] = df1['element'].replace('N/A', 'None')
df1['element'] = df1['element'].str.extract('([a-zA-Z ]+)')[0].apply( lambda x: x.strip() )
df1['element'] = df1['element'].replace('Earth', 'None')

df1.loc[df1['element'].str.contains('Dragon '), 'element'] = 'Dragon, Fire'
df1.loc[df1['element'].str.contains('Thunder '), 'element'] = 'Dragon, Thunder'
df1.loc[df1['element'].str.contains('Fire '), 'element'] = 'Fire'

df1['weakness'] = df1['weakness'].replace('N', 'None')
df1['weakness'] = df1['weakness'].replace(', (2ndGen)', '')
df1.loc[34, 'weakness'] = 'Ice'

df1['generation'] = df1['generation'].apply( lambda x: x.replace('*', ''))

df1 = df1.reset_index( drop=True )
df1 = df1.drop_duplicates()

## 1.4 List Monsters Storange

In [20]:
df1.to_csv('../data/monsters.csv')

con = sqlite3.connect( '../sql_databases/monsters.sqlite' )
c   = con.cursor()

query_create_table = '''
    CREATE TABLE monsters (
        name          TEXT,
        jp_name       TEXT,
        class         TEXT,
        element       TEXT,
        weakness      TEXT,
        generation    TEXT );'''

c.execute( query_create_table )
con.commit()

db = create_engine( 'sqlite:///../sql_databases/monsters.sqlite', echo=False )
con = db.connect()

df1.to_sql( 'monsters', con=con, if_exists='append', index=False )