# Import Dependencies

In [1]:
from bs4 import BeautifulSoup
from splinter import Browser
import pandas as pd
import time

# Init Splinter Browser

In [3]:
# MAC
# executable_path = { 'executable_path': '/usr/local/bin/chromedriver' }
# WINDOWS
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=True)

# Scrape the List of Genre 

In [9]:
# URL to be scraped
url = 'https://www.vgchartz.com/gamedb/?page='

# Open webpage
browser.visit(url)

# Retrieve HTML webpage source
html = browser.html

# Parse HTML webpage source using BeautifulSoup
soup = BeautifulSoup(html, 'html.parser')

# Scrape the list of genre
genre_list = []
result_select = soup.find('select', {'name':'genre'})
result_options = result_select.find_all('option')
for result in result_options:
    if result['value'] != '':
        genre_list.append(result['value'])
genre_list

['Action',
 'Action-Adventure',
 'Adventure',
 'Board Game',
 'Education',
 'Fighting',
 'Misc',
 'MMO',
 'Music',
 'Party',
 'Platform',
 'Puzzle',
 'Racing',
 'Role-Playing',
 'Sandbox',
 'Shooter',
 'Simulation',
 'Sports',
 'Strategy',
 'Visual Novel']

# Scrape Games data for each Genre

In [19]:
# Loop and scrape games info for each genre
for genre in genre_list:
    
    # Variable to hold page numbers
    page_num = 1
    
    # Variable to hold if page exists
    page_exist = False
    
    # Build URL to be scraped
    url_base = "https://www.vgchartz.com/games/games.php?"
    url_dyn = f"page={page_num}&results=200&genre={genre.replace(' ', '%20')}"
    url_tail = "&order=Sales&ownership=Both&direction=DESC"
    url_tail += "&showtotalsales=1&shownasales=1&showpalsales=1"
    url_tail += "&showjapansales=1&showothersales=1&showpublisher=1"
    url_tail += "&showdeveloper=1&showreleasedate=1&showlastupdate=1"
    url_tail += "&showvgchartzscore=1&showcriticscore=1&showuserscore=1"
    url_tail += "&showshipped=1"
    
    url = url_base + url_dyn + url_tail
    
    # Open webpage
    browser.visit(url)
    
    # Retrieve HTML webpage source
    html = browser.html
    
    # Parse HTML webpage source using BeautifulSoup
    soup = BeautifulSoup(html, 'html.parser')
    
    # Logic to check if the page exists
    soup_div = soup.find("div", id="generalBody")
    page_anchors = soup_div.find("tr").find_all("th")[1].find_all("a")
    for a in page_anchors:
        if(a.text.find(str(page_num)) > 0):
            page_exist = True

    while(page_exist):
        url_dyn = f"page={page_num}&results=200&genre={genre.replace(' ', '%20')}"
        url = url_base + url_dyn + url_tail
        
        # Open webpage
        browser.visit(url)
    
        time.sleep(2)
        
        # Retrieve HTML webpage source
        html = browser.html
        
        # Parse HTML webpage source using BeautifulSoup
        soup = BeautifulSoup(html, 'html.parser')
        
        # Scrape the game info into Dataframe
        result = soup.find('div', id='generalBody')
        html_string = result.prettify()
        tbody_start_index = html_string.find("<tbody>\n   <tr>")
        tbody_end_index = html_string.find('<tr>\n    <th style="background-image:url(')
        html_string_start = html_string[0:tbody_start_index + 11]
        html_string_end = html_string[tbody_end_index:]
        new_html_string = html_string_start + html_string_end
        dfs = pd.read_html(new_html_string)
        df = dfs[0]
        
        # Add Genre Column
        df['Genre'] = genre
        
        # Scrape the console info into a list
        console_list = []
        all_trs = result.find('table').find_all('tr')
        tr_count = 0
        for tr in all_trs:
            # Skip first 3 TR tags
            if tr_count > 2:
                console_list.append(tr.find_all("td")[3].find('img').attrs['alt'])
            tr_count += 1;
        
        # Update Console info in Dataframe
        df['Console'] = console_list
        print(df)
        
        page_exist = False

     Pos  Game                                             Game.1 Console  \
0      1   NaN                                         God of War  Series   
1      2   NaN                                           Warriors  Series   
2      3   NaN                                      Devil May Cry  Series   
3      4   NaN                                   Dynasty Warriors  Series   
4      5   NaN                                 Grand Theft Auto V     PS3   
..   ...   ...                                                ...     ...   
195  196   NaN  2 Games in 1 Double Pack: The Incredibles / Fi...     GBA   
196  197   NaN                                   Kinect Star Wars    X360   
197  198   NaN                      Just Cause 2  Read the review     PS3   
198  199   NaN               Metal Gear Solid V: The Phantom Pain      PC   
199  200   NaN                                          For Honor     PS4   

                        Publisher                Developer  VGChartz Score 

    Pos  Game                                      Game.1 Console  \
0     1   NaN                           Momotaro Dentetsu  Series   
1     2   NaN                Monopoly for Nintendo Switch      NS   
2     3   NaN                      Mahjong * Dream C Club    X360   
3     4   NaN                                Billion Road      NS   
4     5   NaN                                 Carcassonne      NS   
5     6   NaN                              Catan Universe      NS   
6     7   NaN                                 Chess Ultra      NS   
7     8   NaN                                Conduct Exam    CD32   
8     9   NaN                               Fable Fortune      PC   
9    10   NaN                               Fable Fortune    XOne   
10   11   NaN                     Munchkin: Quacked Quest      NS   
11   12   NaN                                     Othello      NS   
12   13   NaN                                    Pandemic      NS   
13   14   NaN                     

     Pos  Game                                     Game.1 Console  \
0      1   NaN                                     WWE 2K  Series   
1      2   NaN                          Super Smash Bros.  Series   
2      3   NaN                              Mortal Kombat  Series   
3      4   NaN                                     Tekken  Series   
4      5   NaN                             Street Fighter  Series   
..   ...   ...                                        ...     ...   
195  196   NaN  Injustice: Gods Among Us Ultimate Edition     PS4   
196  197   NaN                      Mortal Kombat Trilogy     N64   
197  198   NaN             Mortal Kombat: Deadly Alliance      XB   
198  199   NaN                 WWE SmackDown vs. Raw 2009     PS3   
199  200   NaN                        Knockout Kings 2000     N64   

                                  Publisher                Developer  \
0                                       THQ          Visual Concepts   
1                          

     Pos  Game                     Game.1 Console          Publisher  \
0      1   NaN                 Just Dance  Series            Ubisoft   
1      2   NaN                Guitar Hero  Series         Activision   
2      3   NaN                   SingStar  Series            Unknown   
3      4   NaN     Dance Dance Revolution  Series             Konami   
4      5   NaN          Taiko no Tatsujin  Series              Namco   
..   ...   ...                        ...     ...                ...   
195  196   NaN  Wii Karaoke U by JOYSOUND    WiiU           Nintendo   
196  197   NaN                  ZillerNet      XB          Microsoft   
197  198   NaN            AVICII Invector    XOne  Wired Productions   
198  199   NaN            AVICII Invector      NS  Wired Productions   
199  200   NaN            AVICII Invector      PC  Wired Productions   

                Developer  VGChartz Score  Critic Score  User Score  \
0                 Ubisoft             NaN           NaN         

     Pos  Game                                        Game.1 Console  \
0      1   NaN                                        Tetris  Series   
1      2   NaN                                     Bejeweled  Series   
2      3   NaN                                     Brain Age  Series   
3      4   NaN                                        Tetris      GB   
4      5   NaN                                        Portal  Series   
..   ...   ...                                           ...     ...   
195  196   NaN                                       Pac-Pix      DS   
196  197   NaN                      Ratatouille: Food Frenzy      DS   
197  198   NaN                 Super Nazo Puyo: Ruruu no Ruu    SNES   
198  199   NaN                             Super Collapse! 3      DS   
199  200   NaN  Are You Smarter than a 5th Grader? Game Time     Wii   

        Publisher              Developer  VGChartz Score  Critic Score  \
0         Unknown        Alexey Pajitnov             NaN     

    Pos  Game                                             Game.1 Console  \
0     1   NaN                                          Minecraft  Series   
1     2   NaN                                         Saints Row  Series   
2     3   NaN                              Dragon Quest Builders  Series   
3     4   NaN                                          Minecraft      NS   
4     5   NaN                                      CastleMiner Z     XBL   
5     6   NaN                                   Planet Explorers      PC   
6     7   NaN                                           Windward      PC   
7     8   NaN                                             PixARK      PC   
8     9   NaN                                          Blockland      PC   
9    10   NaN                                            Avorion      PC   
10   11   NaN                                        Outer Wilds      PC   
11   12   NaN                                        Outer Wilds    XOne   
12   13   Na

     Pos  Game                                          Game.1 Console  \
0      1   NaN                                        The Sims  Series   
1      2   NaN                                 Animal Crossing  Series   
2      3   NaN                                      Nintendogs  Series   
3      4   NaN  Animal Crossing: New Horizons  Read the review      NS   
4      5   NaN                                            Petz  Series   
..   ...   ...                                             ...     ...   
195  196   NaN                                            Dogz     GBA   
196  197   NaN                  Cooking Mama 4: Kitchen Magic!     3DS   
197  198   NaN                      The Urbz: Sims in the City     GBA   
198  199   NaN                                    World of Zoo     Wii   
199  200   NaN                                 Petz: Dogz Pack      DS   

           Publisher          Developer  VGChartz Score  Critic Score  \
0    Electronic Arts              Maxi

     Pos  Game                                   Game.1 Console  \
0      1   NaN                          NEKOPARA Vol. 1      PC   
1      2   NaN                          NEKOPARA Vol. 0      PC   
2      3   NaN                          NEKOPARA Vol. 2      PC   
3      4   NaN                      Tricolour Lovestory      PC   
4      5   NaN                        Hatoful Boyfriend      PC   
..   ...   ...                                      ...     ...   
195  196   NaN              Memories Off: Innocent File     PS4   
196  197   NaN                              School Wars     PSP   
197  198   NaN             Flowers: Le volume sur hiver     PSV   
198  199   NaN                 Kuroyukihime: Snow Black     PSP   
199  200   NaN  Moujuutsukai to Oujisama: Flower & Snow     PSV   

            Publisher        Developer  VGChartz Score  Critic Score  \
0             Unknown       Neko Works             NaN           NaN   
1             Unknown       Neko Works             