<a href="https://colab.research.google.com/github/COMM599-Fall2023/fall_2023/blob/yueqihua-final-project/student%20folders/yueqihua/final/data_collection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Data Collection

---

*Using twitch_data_full.csv to create a cleaned Twitch dataset and a games dataset with game information scraped from Steam*

*   Input: twitch_data_full.csv
*   Output: twitch_data.csv, game_data.csv

## Install dependencies, import modules, input credentials

In [None]:
!pip install pandas



In [None]:
from getpass import getpass
import pandas as pd
import requests
from bs4 import BeautifulSoup
from datetime import datetime
import re
import time
from tqdm import tqdm
import unicodedata

## Twitch Streaming Data

Using a [dataset from Kaggle](https://www.kaggle.com/datasets/rankirsh/evolution-of-top-games-on-twitch)

In [None]:
# loading and cleaning dataset

twitch_df = pd.read_csv("datasets/twitch_data_full.csv", encoding='latin-1', index_col=0)

twitch_df = twitch_df.reset_index(drop=False)
twitch_df.columns = twitch_df.columns.str.lower()
twitch_df['game'] = twitch_df['game'].apply(lambda game: unicodedata.normalize('NFC', str(game))) # converting unicode into special characters
twitch_df['month'] = twitch_df['month'].apply(lambda x: f"{x:02d}") # formatting months from "3" to "03"
twitch_df['year'] = twitch_df['year'].astype(str)

twitch_df.head()

Unnamed: 0,rank,game,month,year,hours_watched,hours_streamed,peak_viewers,peak_channels,streamers,avg_viewers,avg_channels,avg_viewer_ratio
0,1,League of Legends,1,2016,94377226,1362044,530270,2903,129172,127021,1833,69.29
1,2,Counter-Strike: Global Offensive,1,2016,47832863,830105,372654,2197,120849,64378,1117,57.62
2,3,Dota 2,1,2016,45185893,433397,315083,1100,44074,60815,583,104.26
3,4,Hearthstone,1,2016,39936159,235903,131357,517,36170,53749,317,169.29
4,5,Call of Duty: Black Ops III,1,2016,16153057,1151578,71639,3620,214054,21740,1549,14.03


In [None]:
twitch_df.dtypes

rank                  int64
game                 object
month                object
year                 object
hours_watched         int64
hours_streamed        int64
peak_viewers          int64
peak_channels         int64
streamers             int64
avg_viewers           int64
avg_channels          int64
avg_viewer_ratio    float64
dtype: object

In [None]:
# checking how many rows of data each game has

game_count = twitch_df.groupby('game')['game'].count()
game_count = game_count.sort_values(ascending=False)

game_count_df = pd.DataFrame({'count': game_count})
game_count_df.reset_index(inplace=True)

game_count_df # 2150 x 2

Unnamed: 0,game,count
0,Dungeons & Dragons,94
1,Retro,94
2,Magic: The Gathering,93
3,Warcraft III,93
4,Super Mario 64,93
...,...,...
2145,Left 4 Dead,1
2146,Last Tide,1
2147,LEGO Worlds,1
2148,LEGO The Lord of the Rings,1


## Steam Games Data

Using Twitch dataset to get Steam game data

##### Filtering out games from Twitch dataset that are not available on Steam

In [None]:
# defining functions

def get_steam_game_list(): # gets all available apps on steam (includes dlcs, skin packs, etc)
    url = "https://api.steampowered.com/ISteamApps/GetAppList/v2"
    params = {
    }

    try:
        response = requests.get(url, params=params, timeout=15)
        all_steam_games = response.json()['applist']['apps']
        all_steam_games = [remove_registered_symbol(game.copy()) for game in all_steam_games]
        return all_steam_games
    except:
        print("Error fetching the Steam game list.")
        return []

def remove_registered_symbol(game): # removes unnecessary symbols
    game['name'] = re.sub(r'®', '', game['name'])
    return game

def check_games_on_steam(twitch_games, steam_games): # compares games in the twitch dataset and the steam website
    steam_game_names = {game['name']: game['appid'] for game in steam_games}
    available_on_steam = {}

    to_omit = ['Retro', 'Dofus', 'Just Chatting', 'Live'] # these categories are not game-related but are names of games on steam

    for game in twitch_games:
        if game in steam_game_names and game not in to_omit:
            appid = steam_game_names[game]
            available_on_steam[str(appid)] = game

    return available_on_steam

In [None]:
# getting list of games in the twitch dataset

twitch_games = twitch_df['game'].drop_duplicates().tolist()
print(len(twitch_games)) # 2150

2150


In [None]:
#  getting list of all games on steam

all_steam_games = get_steam_game_list()
print(len(all_steam_games))

181671


In [None]:
# finding common games in twitch_games and all_steam_games

game_list = check_games_on_steam(twitch_games, all_steam_games)
print(len(game_list)) # 1120 is what i got when i ran it but sometimes it returns different numbers depending on the API

1120


##### Scraping Steam game information from game_list

In [None]:
### optional: uncomment and load dataset instead of scraping, then you can skip the scraping part
# game_data_df = pd.read_csv("datasets/game_data.csv", encoding='utf-8-sig', index_col=0)

In [None]:
# defining functions

def get_game_data(appid, game) : # returns game data for an appid
    url = "https://store.steampowered.com/api/appdetails"
    params = {
        'appids' : appid,
        'cc' : 'US',
        'l' : 'en'
    }

    try :
        response = requests.get(url, params=params, timeout=15)
        response.raise_for_status()

        result = response.json()
        if appid not in result or "data" not in result[appid] :
            print("Data not found for game: ", game)
            return None

        data = result[appid]['data']

        input_format = "%b %d, %Y"
        output_format = "%Y-%m-%d"
        date_string = data['release_date']['date']
        date_input = datetime.strptime(date_string, input_format)
        release_date = date_input.strftime(output_format)

        description = BeautifulSoup(data['detailed_description'], 'html.parser')
        description = description.get_text()

        if data['is_free'] :
            price = 0
        else :
            price = data['price_overview']['final'] / 100 # changing units from cents to dollars

        platforms_list = [ key.capitalize() for key, value in data['platforms'].items() if value ]
        platforms = ", ".join(platforms_list)

        genres_list = [ genre["description"] for genre in data['genres'] ]
        genres = ", ".join(genres_list)

        developers = ", ".join(data['developers'])
        publishers =  ", ".join(data['publishers'])

        if 'metacritic' in data and 'score' in data['metacritic']:
            score = data['metacritic']['score']
        else:
            score = None

        game_data = {
            'appid' : data['steam_appid'],
            'game' : game,
            'release_date' : release_date,
            'required_age' : data['required_age'],
            'is_free' : data['is_free'],
            'price' : price,
            'description' : description,
            'developers' : developers,
            'publishers' : publishers,
            'platforms' : platforms,
            'score' : score,
            'genres' : genres
        }

        return game_data
    except :
        print("API request failed for game: ", game)
        time.sleep(30)
        return None

In [None]:
# testing

get_game_data('322330', 'Don\'t Starve Together')

{'appid': 322330,
 'game': "Don't Starve Together",
 'release_date': '2016-04-21',
 'required_age': 0,
 'is_free': False,
 'price': 5.09,
 'description': 'Explore TogetherDiscover and explore a massive procedurally generated and biome-rich world with countless resources and threats. Whether you stick to the surface world, go spelunking in the caves, dive deeper into the Ancient Archive, or set sail for the Lunar islands, it will be a long time before you run out of things to do.Fight TogetherSeasonal bosses, wandering menaces, lurking shadow creatures, and plenty of flora and fauna ready to turn you into a spooky ghost.Farm TogetherPlow fields and sow seeds to grow the farm of your dreams. Tend to your crops to help your fellow survivors stay fed and ready for the challenges to come.Build TogetherProtect yourself, your friends, and everything you have managed to gather, because you can be sure, somebody or something is going to want it back.About the GameFight, Farm, Build and Explore 

In [None]:
# this takes around 1 hour

game_data_list = []

count = 0
for appid, game in tqdm(game_list.items(), desc="Processing games") : # iterating through all games in the game list and getting their data
    game_data = get_game_data(appid, game)

    if game_data is None :
        continue
    else :
        game_data_list.append(game_data)

        count += 1
        if count % 50 == 0 :
            print(f"Scraped {count} games. Sleeping for 60 seconds...")
            time.sleep(60) # sleeping because if not the API blocks the requests

game_data_df = pd.DataFrame(game_data_list)

game_data_df # 1076

Processing games:   0%|          | 1/1120 [00:00<05:33,  3.36it/s]

Data not found for game:  League of Legends


Processing games:   1%|          | 7/1120 [00:02<07:52,  2.36it/s]

API request failed for game:  Grand Theft Auto V


Processing games:   1%|          | 12/1120 [01:04<1:27:59,  4.77s/it]

API request failed for game:  NBA 2K16


Processing games:   1%|▏         | 15/1120 [02:05<3:17:15, 10.71s/it]

API request failed for game:  Rocket League


Processing games:   2%|▏         | 18/1120 [03:06<3:53:49, 12.73s/it]

API request failed for game:  The Binding of Isaac: Repentance


Processing games:   5%|▍         | 52/1120 [04:19<06:18,  2.82it/s]

API request failed for game:  Rayman Legends


Processing games:   5%|▍         | 55/1120 [05:21<2:43:16,  9.20s/it]

Scraped 50 games. Sleeping for 60 seconds...


Processing games:   5%|▌         | 59/1120 [06:22<2:33:22,  8.67s/it]

API request failed for game:  Ori and the Blind Forest


Processing games:   6%|▌         | 64/1120 [07:24<1:46:47,  6.07s/it]

API request failed for game:  Darksiders II


Processing games:   8%|▊         | 84/1120 [08:32<08:03,  2.14it/s]

API request failed for game:  The Culling


Processing games:   9%|▉         | 100/1120 [09:38<07:20,  2.32it/s]

API request failed for game:  WWE 2K16


Processing games:   9%|▉         | 104/1120 [10:39<1:51:44,  6.60s/it]

API request failed for game:  Far Cry 4


Processing games:  10%|▉         | 110/1120 [11:42<1:09:17,  4.12s/it]

Scraped 100 games. Sleeping for 60 seconds...


Processing games:  10%|█         | 112/1120 [12:42<4:09:06, 14.83s/it]

API request failed for game:  Atlas Reactor


Processing games:  11%|█         | 122/1120 [13:47<25:34,  1.54s/it]

Data not found for game:  TERA


Processing games:  13%|█▎        | 148/1120 [13:56<05:51,  2.76it/s]

API request failed for game:  ARK: Survival Of The Fittest


Processing games:  14%|█▎        | 153/1120 [14:58<1:15:41,  4.70s/it]

API request failed for game:  Orcs Must Die! Unchained


Processing games:  14%|█▍        | 160/1120 [16:00<45:01,  2.81s/it]  

Data not found for game:  Call of Duty: Modern Warfare 2


Processing games:  15%|█▍        | 163/1120 [16:01<18:54,  1.19s/it]

Data not found for game:  Call of Duty: Black Ops


Processing games:  15%|█▍        | 166/1120 [16:03<10:18,  1.54it/s]

Scraped 150 games. Sleeping for 60 seconds...


Processing games:  15%|█▌        | 169/1120 [17:04<2:27:23,  9.30s/it]

Data not found for game:  Call of Duty 4: Modern Warfare


Processing games:  15%|█▌        | 171/1120 [17:04<1:14:56,  4.74s/it]

Data not found for game:  NBA 2K15


Processing games:  15%|█▌        | 173/1120 [17:05<39:34,  2.51s/it]

Data not found for game:  Pro Evolution Soccer 2016


Processing games:  16%|█▌        | 176/1120 [17:06<17:06,  1.09s/it]

Data not found for game:  Call of Duty: Modern Warfare 3


Processing games:  16%|█▌        | 178/1120 [17:07<10:59,  1.43it/s]

Data not found for game:  Shattered Skies


Processing games:  17%|█▋        | 186/1120 [17:10<05:12,  2.99it/s]

Data not found for game:  Chronicle: RuneScape Legends


Processing games:  17%|█▋        | 187/1120 [17:10<04:55,  3.15it/s]

Data not found for game:  LawBreakers


Processing games:  17%|█▋        | 195/1120 [17:13<05:43,  2.69it/s]

API request failed for game:  Necropolis


Processing games:  18%|█▊        | 202/1120 [18:16<38:20,  2.51s/it]

Data not found for game:  Mafia II


Processing games:  18%|█▊        | 203/1120 [18:16<28:32,  1.87s/it]

Data not found for game:  BioShock


Processing games:  18%|█▊        | 206/1120 [18:17<13:38,  1.12it/s]

API request failed for game:  Mark of the Ninja


Processing games:  18%|█▊        | 207/1120 [19:18<4:45:25, 18.76s/it]

API request failed for game:  Ghostbusters


Processing games:  20%|█▉        | 222/1120 [20:23<07:59,  1.87it/s]

Data not found for game:  Resident Evil 5


Processing games:  20%|██        | 229/1120 [20:26<06:16,  2.37it/s]

Scraped 200 games. Sleeping for 60 seconds...


Processing games:  21%|██        | 231/1120 [21:27<3:12:48, 13.01s/it]

Data not found for game:  Fallout: New Vegas


Processing games:  23%|██▎       | 255/1120 [21:35<04:54,  2.94it/s]

API request failed for game:  Revelation


Processing games:  23%|██▎       | 263/1120 [22:38<26:43,  1.87s/it]

Data not found for game:  Mass Effect 2


Processing games:  24%|██▎       | 265/1120 [22:39<16:25,  1.15s/it]

API request failed for game:  The Political Machine 2016


Processing games:  25%|██▌       | 283/1120 [23:46<05:33,  2.51it/s]

Scraped 250 games. Sleeping for 60 seconds...


Processing games:  26%|██▌       | 286/1120 [24:47<2:08:07,  9.22s/it]

API request failed for game:  Space Hulk: Deathwing


Processing games:  28%|██▊       | 309/1120 [25:55<04:41,  2.88it/s]

API request failed for game:  Brawl of Ages


Processing games:  30%|██▉       | 335/1120 [27:06<04:39,  2.81it/s]

Scraped 300 games. Sleeping for 60 seconds...


Processing games:  30%|███       | 338/1120 [28:07<1:59:38,  9.18s/it]

API request failed for game:  Worlds Adrift


Processing games:  31%|███       | 343/1120 [29:09<1:20:30,  6.22s/it]

API request failed for game:  NBA Playgrounds


Processing games:  31%|███▏      | 352/1120 [30:13<20:51,  1.63s/it]

API request failed for game:  SOS


Processing games:  32%|███▏      | 359/1120 [31:15<32:28,  2.56s/it]

Data not found for game:  Metro 2033


Processing games:  32%|███▏      | 362/1120 [31:16<14:33,  1.15s/it]

API request failed for game:  Gigantic


Processing games:  35%|███▍      | 390/1120 [32:27<04:17,  2.84it/s]

Scraped 350 games. Sleeping for 60 seconds...


Processing games:  37%|███▋      | 416/1120 [33:37<03:54,  3.00it/s]

Data not found for game:  Football Manager 2018


Processing games:  37%|███▋      | 417/1120 [33:37<03:43,  3.14it/s]

Data not found for game:  Fallout 3


Processing games:  39%|███▉      | 435/1120 [33:43<04:35,  2.48it/s]

API request failed for game:  Grand Theft Auto IV


Processing games:  40%|███▉      | 443/1120 [34:46<20:38,  1.83s/it]

Scraped 400 games. Sleeping for 60 seconds...


Processing games:  42%|████▏     | 472/1120 [35:57<03:37,  2.97it/s]

API request failed for game:  Radical Heights


Processing games:  43%|████▎     | 485/1120 [37:02<06:20,  1.67it/s]

API request failed for game:  Bless Online


Processing games:  44%|████▍     | 496/1120 [38:07<09:26,  1.10it/s]

Data not found for game:  Deathgarden: BLOODHARVEST
Scraped 450 games. Sleeping for 60 seconds...


Processing games:  48%|████▊     | 532/1120 [39:21<03:39,  2.68it/s]

API request failed for game:  Shadow of the Tomb Raider


Processing games:  48%|████▊     | 536/1120 [40:22<1:03:37,  6.54s/it]

API request failed for game:  The Bard's Tale IV


Processing games:  49%|████▉     | 548/1120 [41:27<07:58,  1.19it/s]

Scraped 500 games. Sleeping for 60 seconds...


Processing games:  50%|████▉     | 556/1120 [42:29<17:23,  1.85s/it]

Data not found for game:  OVERKILL's The Walking Dead


Processing games:  50%|████▉     | 557/1120 [42:30<13:07,  1.40s/it]

API request failed for game:  Football Manager 2019


Processing games:  50%|█████     | 562/1120 [43:31<44:49,  4.82s/it]  

Data not found for game:  S.T.A.L.K.E.R.: Clear Sky


Processing games:  51%|█████     | 572/1120 [43:35<04:29,  2.03it/s]

API request failed for game:  Just Cause 4


Processing games:  52%|█████▎    | 588/1120 [44:42<04:18,  2.06it/s]

API request failed for game:  Anno 1800


Processing games:  54%|█████▍    | 603/1120 [45:47<04:21,  1.97it/s]

Scraped 550 games. Sleeping for 60 seconds...


Processing games:  55%|█████▍    | 611/1120 [46:50<15:30,  1.83s/it]

Data not found for game:  Borderlands


Processing games:  57%|█████▋    | 636/1120 [46:59<03:07,  2.58it/s]

API request failed for game:  SAMURAI SHODOWN


Processing games:  57%|█████▋    | 639/1120 [48:00<1:13:37,  9.18s/it]

Data not found for game:  Conqueror's Blade


Processing games:  57%|█████▋    | 643/1120 [48:02<19:35,  2.46s/it]

Data not found for game:  F1 2019


Processing games:  59%|█████▊    | 657/1120 [48:07<02:50,  2.71it/s]

Scraped 600 games. Sleeping for 60 seconds...


Processing games:  61%|██████    | 682/1120 [49:16<02:37,  2.78it/s]

API request failed for game:  Football Manager 2020


Processing games:  63%|██████▎   | 708/1120 [50:26<03:00,  2.28it/s]

Scraped 650 games. Sleeping for 60 seconds...


Processing games:  68%|██████▊   | 758/1120 [51:44<02:08,  2.82it/s]

Scraped 700 games. Sleeping for 60 seconds...


Processing games:  68%|██████▊   | 761/1120 [52:45<54:53,  9.17s/it]  

API request failed for game:  Madden NFL 21


Processing games:  71%|███████   | 797/1120 [53:59<01:55,  2.78it/s]

API request failed for game:  Football Manager 2021


Processing games:  72%|███████▏  | 807/1120 [55:02<05:39,  1.08s/it]

API request failed for game:  The Medium


Processing games:  72%|███████▏  | 811/1120 [56:04<34:29,  6.70s/it]

Scraped 750 games. Sleeping for 60 seconds...


Processing games:  73%|███████▎  | 822/1120 [57:08<04:55,  1.01it/s]

API request failed for game:  The Movies


Processing games:  76%|███████▌  | 849/1120 [58:17<01:31,  2.97it/s]

Data not found for game:  Aion


Processing games:  76%|███████▌  | 851/1120 [58:18<01:27,  3.09it/s]

Data not found for game:  Grand Chase


Processing games:  77%|███████▋  | 858/1120 [58:21<01:41,  2.57it/s]

API request failed for game:  Madden NFL 22


Processing games:  77%|███████▋  | 865/1120 [59:23<10:28,  2.47s/it]

Scraped 800 games. Sleeping for 60 seconds...


Processing games:  78%|███████▊  | 869/1120 [1:00:24<29:25,  7.03s/it]

API request failed for game:  Movie World


Processing games:  78%|███████▊  | 875/1120 [1:01:26<16:57,  4.15s/it]

API request failed for game:  FIFA 22


Processing games:  81%|████████  | 903/1120 [1:02:36<01:12,  2.99it/s]

API request failed for game:  Arcane


Processing games:  81%|████████  | 908/1120 [1:03:38<16:39,  4.71s/it]

API request failed for game:  Myth of Empires


Processing games:  81%|████████  | 909/1120 [1:04:39<1:15:22, 21.44s/it]

API request failed for game:  Football Manager 2022


Processing games:  82%|████████▏ | 920/1120 [1:05:43<04:16,  1.28s/it]

Scraped 850 games. Sleeping for 60 seconds...


Processing games:  83%|████████▎ | 927/1120 [1:06:46<08:20,  2.59s/it]

API request failed for game:  Fall Guys


Processing games:  87%|████████▋ | 971/1120 [1:08:02<00:53,  2.76it/s]

Scraped 900 games. Sleeping for 60 seconds...


Processing games:  90%|████████▉ | 1003/1120 [1:09:14<00:43,  2.69it/s]

API request failed for game:  The First Descendant


Processing games:  90%|█████████ | 1010/1120 [1:10:17<04:32,  2.48s/it]

API request failed for game:  Football Manager 2023


Processing games:  91%|█████████ | 1015/1120 [1:11:19<08:50,  5.05s/it]

API request failed for game:  Goat Simulator 3


Processing games:  91%|█████████▏| 1024/1120 [1:12:22<02:32,  1.59s/it]

Scraped 950 games. Sleeping for 60 seconds...


Processing games:  93%|█████████▎| 1044/1120 [1:13:30<00:28,  2.65it/s]

Data not found for game:  Lineage II


Processing games:  94%|█████████▍| 1050/1120 [1:13:33<00:25,  2.77it/s]

API request failed for game:  THE FINALS


Processing games:  95%|█████████▍| 1059/1120 [1:14:36<01:25,  1.39s/it]

API request failed for game:  The Godfather


Processing games:  95%|█████████▌| 1066/1120 [1:15:39<02:18,  2.57s/it]

Data not found for game:  Winning Post 10


Processing games:  96%|█████████▋| 1078/1120 [1:15:43<00:16,  2.52it/s]

Scraped 1000 games. Sleeping for 60 seconds...


Processing games:  97%|█████████▋| 1083/1120 [1:16:45<02:53,  4.68s/it]

API request failed for game:  Blue Protocol


Processing games: 100%|██████████| 1120/1120 [1:17:59<00:00,  4.18s/it]


Unnamed: 0,appid,game,release_date,required_age,is_free,price,description,developers,publishers,platforms,score,genres
0,570,Dota 2,2013-07-09,0,True,0.00,"The most-played game on Steam.Every day, milli...",Valve,Valve,"Windows, Mac, Linux",90.0,"Action, Strategy, Free to Play"
1,311210,Call of Duty: Black Ops III,2015-11-05,17,False,19.79,Zombies Chronicles Deluxe EditionNow with more...,"Treyarch, Aspyr (Mac)","Activision, Aspyr (Mac)","Windows, Mac",73.0,"Action, Adventure"
2,1407200,World of Tanks,2021-04-28,0,True,0.00,Command over 600 machines from World War II th...,Wargaming Group Limited,Wargaming Group Limited,Windows,80.0,"Action, Massively Multiplayer, Simulation, Fre..."
3,359550,Tom Clancy's Rainbow Six Siege,2015-12-01,17,False,19.99,Edition ComparisonDeluxe EditionThe Tom Clancy...,Ubisoft Montreal,Ubisoft,Windows,,Action
4,1343400,RuneScape,2020-10-14,0,True,0.00,Journey into the Sixth Age of Gielinor and dis...,Jagex Ltd,Jagex Ltd,"Windows, Mac",,"Free to Play, Massively Multiplayer, RPG"
...,...,...,...,...,...,...,...,...,...,...,...,...
1035,2203070,Dragonheir: Silent Gods,2023-10-26,0,True,0.00,Join Our DiscordBug Report1. Join the official...,SGRA Studio,Nuverse Pte. Ltd.,Windows,,"Adventure, Massively Multiplayer, RPG, Strateg..."
1036,2107670,Warhaven,2023-09-20,0,True,0.00,JOIN OUR DISCORD!About the GameENTER THE FRAYW...,NEXON,NEXON,Windows,,"Action, Free to Play, Early Access"
1037,2140510,Town of Salem 2,2023-08-25,0,True,0.00,The Town of Salem is in turmoil as the Coven h...,BlankMediaGames LLC,BlankMediaGames LLC,"Windows, Mac",,"Casual, Indie, RPG, Strategy, Free to Play"
1038,2137700,I'm on Observation Duty 6,2023-09-01,0,False,11.99,The most ambitious I'm on Observation Duty gam...,"Notovia, Dreamloop Games",Notovia,"Windows, Mac, Linux",,Indie


In [None]:
# basic statistics

print("Size of the dataset:", game_data_df.shape)
print('----------')

max_score = game_data_df[game_data_df['score'] == game_data_df['score'].max()]
min_score = game_data_df[game_data_df['score'] == game_data_df['score'].min()]
print("*** Game Ratings:",
      f"\n\tAverage score: {game_data_df['score'].mean()}"
      f"\n\tHighest scored game: {max_score['game'].values[0]} [{max_score['score'].values[0]}]",
      f"\n\tLowest scored game: {min_score['game'].values[0]} [{min_score['score'].values[0]}]")
print('----------')

print("*** Free or Paid:")
print(f"\tFree: {len(game_data_df[game_data_df['is_free'] == True])}")
print(f"\tPaid: {len(game_data_df[game_data_df['is_free'] == False])}")
print('----------')

print("*** Top:")
for mode in game_data_df['developers'].mode() :
    mode_count = game_data_df['developers'].value_counts()[mode]
    print(f"\tDeveloper: {mode} [{mode_count}]")
for mode in game_data_df['publishers'].mode() :
    mode_count = game_data_df['publishers'].value_counts()[mode]
    print(f"\tPublisher: {mode} [{mode_count}]")

Size of the dataset: (1040, 12)
----------
*** Game Ratings: 
	Average score: 78.87550200803213
	Highest scored game: Disco Elysium [97.0] 
	Lowest scored game: Hello Neighbor [38.0]
----------
*** Free or Paid:
	Free: 118
	Paid: 922
----------
*** Top:
	Developer: CAPCOM Co., Ltd. [15]
	Publisher: Bethesda Softworks [25]


In [None]:
# save to csv

game_data_df.to_csv("/content/datasets/game_data.csv", index=True, encoding='utf-8-sig')

## Back to Twitch Streaming Data
Filtering Twitch dataset so it only contains games in Steam dataset

In [None]:
game_list = game_data_df['game'].tolist()

twitch_df.drop(twitch_df[~twitch_df['game'].isin(game_list)].index, inplace=True)
twitch_df.reset_index(drop=True, inplace=True)

twitch_df # 8071 x 12

Unnamed: 0,rank,game,month,year,hours_watched,hours_streamed,peak_viewers,peak_channels,streamers,avg_viewers,avg_channels,avg_viewer_ratio
0,3,Dota 2,01,2016,45185893,433397,315083,1100,44074,60815,583,104.26
1,5,Call of Duty: Black Ops III,01,2016,16153057,1151578,71639,3620,214054,21740,1549,14.03
2,15,World of Tanks,01,2016,4993627,128356,39358,368,16121,6720,172,38.90
3,16,Tom Clancy's Rainbow Six Siege,01,2016,4866039,242134,25742,694,42244,6549,325,20.10
4,17,RuneScape,01,2016,4632857,74628,18809,167,7628,6235,100,62.08
...,...,...,...,...,...,...,...,...,...,...,...,...
8066,190,I'm on Observation Duty 6,09,2023,581068,4730,24948,30,1249,808,6,122.85
8067,191,Deceit 2,09,2023,575900,3089,43348,55,987,800,4,186.44
8068,194,Valheim,09,2023,568622,75546,8861,203,7975,790,105,7.53
8069,196,PlateUp!,09,2023,560064,18617,16476,71,4034,778,25,30.08


In [None]:
# games with the most months in the top 200

game_count = twitch_df.groupby('game')['game'].count()
game_count = game_count.sort_values(ascending=False)

game_count_df = pd.DataFrame({'count': game_count})
game_count_df.reset_index(inplace=True)

game_count_df

Unnamed: 0,game,count
0,Tom Clancy's Rainbow Six Siege,93
1,Dota 2,93
2,Rust,93
3,Path of Exile,93
4,World of Warships,93
...,...,...
1035,LEGO Worlds,1
1036,Last Tide,1
1037,Left 4 Dead,1
1038,Life is Strange: True Colors,1


In [None]:
# save to csv

twitch_df.to_csv("/content/datasets/twitch_data.csv", index=True, encoding='utf-8-sig')