# Web scrapping
### This file was ran in local environment.

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
#!pip install requests
#!pip install beautifulsoup4
#!pip install pandas

import requests
from bs4 import BeautifulSoup
import pandas as pd

games = pd.read_csv('/content/drive/MyDrive/BT4222/Intermediate data/Games without Description.csv')
games.head()


Unnamed: 0.1,Unnamed: 0,app_id,title,win,mac,linux,rating,positive_ratio,user_reviews,price_original,discount,steam_deck,release_year,release_month,description,tags
0,35496,730,Counter-Strike: Global Offensive,1,1,1,7,88,7297791,0.0,0,1,2012,August,,[]
1,36135,578080,PUBG: BATTLEGROUNDS,1,0,0,4,57,2187691,0.0,0,1,2017,December,,[]
2,35494,570,Dota 2,1,1,1,7,82,1998934,0.0,0,1,2013,July,,[]
3,35748,271590,Grand Theft Auto V,1,0,0,7,86,1431104,0.0,0,1,2015,April,,[]
4,35862,359550,Tom Clancy's Rainbow Six® Siege,1,0,0,7,86,966803,19.99,0,1,2015,December,,[]


In [None]:
base_url = 'https://store.steampowered.com/app/'

for index, row in games.iterrows():
    id = row['app_id']
    url = base_url + str(id) + '/'

    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')

    description = soup.find('div', {'class': 'game_description_snippet'})
    if description:
        games.at[index, 'description'] = description.text.strip()

    tags = [tag.text.strip() for tag in soup.find_all('a', {'class': 'app_tag'}) if tag.text.strip()]
    cleaned_tags = [tag.replace('\r', '').replace('\n', '').replace('\t', '').strip() for tag in tags]
    if cleaned_tags:
        games.at[index, 'tags'] = ', '.join(cleaned_tags)



games.tail()


Unnamed: 0.1,Unnamed: 0,app_id,title,win,mac,linux,rating,positive_ratio,user_reviews,price_original,discount,steam_deck,release_year,release_month,description,tags
7141,26742,1633730,HOT WHEELS™ - Bone Shaker™ Unleashed Edition,1,0,0,6,89,19,0.99,60,1,2021,September,,Racing
7142,26720,702950,FSX Steam Edition: McDonnell Douglas F-4 Phant...,1,0,0,4,52,19,34.99,0,1,2018,March,,Simulation
7143,35133,1011950,CHAOS;CHILD - LIMITED EDITION OST,1,0,0,6,100,19,0.0,0,1,2019,January,,Adventure
7144,30614,1068400,Skirmish Line - United Front,1,1,1,6,100,19,4.99,0,1,2019,May,,"Strategy, Action, Indie, Casual, Simulation"
7145,24109,246256,Pac-Man Championship Edition DX+: Pac Steps BGM,1,0,0,6,100,19,0.99,0,1,2013,September,,Action


In [None]:
games['description'].isna().sum()

6471

In [None]:
games['tags'].apply(lambda x: x=='[]').sum()

83

In [None]:
games.to_csv('Games_scraped.csv', index=False)
new_games = pd.read_csv('/content/drive/MyDrive/BT4222/Intermediate data/Games_scraped.csv')
new_games.head()

Unnamed: 0.1,Unnamed: 0,app_id,title,win,mac,linux,rating,positive_ratio,user_reviews,price_original,discount,steam_deck,release_year,release_month,description,tags
0,35496,730,Counter-Strike: Global Offensive,1,1,1,7,88,7297791,0.0,0,1,2012,August,"For over two decades, Counter-Strike has offer...","FPS, Shooter, Multiplayer, Competitive, Action..."
1,36135,578080,PUBG: BATTLEGROUNDS,1,0,0,4,57,2187691,0.0,0,1,2017,December,Play PUBG: BATTLEGROUNDS for free. Land on str...,"Survival, Shooter, Battle Royale, Multiplayer,..."
2,35494,570,Dota 2,1,1,1,7,82,1998934,0.0,0,1,2013,July,"Every day, millions of players worldwide enter...","Free to Play, MOBA, Multiplayer, Strategy, eSp..."
3,35748,271590,Grand Theft Auto V,1,0,0,7,86,1431104,0.0,0,1,2015,April,Grand Theft Auto V for PC offers players the o...,"Open World, Action, Multiplayer, Crime, Automo..."
4,35862,359550,Tom Clancy's Rainbow Six® Siege,1,0,0,7,86,966803,19.99,0,1,2015,December,"Tom Clancy's Rainbow Six® Siege is an elite, t...","FPS, PvP, eSports, Shooter, Multiplayer, Tacti..."


In [None]:
# test scrape with CS2
des = []
tags = []
url = 'https://store.steampowered.com/app/662350/'
request = requests.get(url)
soup = BeautifulSoup(request.text, "html.parser")
for d in soup.find_all('div', {'class': 'game_description_snippet'}):
    des.append(d)

for t in soup.find_all('a', {'class': 'app_tag'}):
    tags.append(t)

print(des)
print(tags)

[]
[<a class="app_tag" href="https://store.steampowered.com/tags/en/Action/?snr=1_5_9__409" style="display: none;">
												Action												</a>, <a class="app_tag" href="https://store.steampowered.com/tags/en/Adventure/?snr=1_5_9__409" style="display: none;">
												Adventure												</a>, <a class="app_tag" href="https://store.steampowered.com/tags/en/RPG/?snr=1_5_9__409" style="display: none;">
												RPG												</a>, <a class="app_tag" href="https://store.steampowered.com/tags/en/Open%20World/?snr=1_5_9__409" style="display: none;">
												Open World												</a>]
