In [1]:
# Bibliotecas a utilizar
import pandas as pd
import numpy as np
import json

In [4]:
# Se guarda el path del json en una variable
sg_path = '../data/raw/steam_games.json.gz'

In [5]:
df_steam_games = pd.read_json(sg_path, compression='gzip', lines=True)

df_steam_games.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 120445 entries, 0 to 120444
Data columns (total 13 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   publisher     24083 non-null  object 
 1   genres        28852 non-null  object 
 2   app_name      32133 non-null  object 
 3   title         30085 non-null  object 
 4   url           32135 non-null  object 
 5   release_date  30068 non-null  object 
 6   tags          31972 non-null  object 
 7   reviews_url   32133 non-null  object 
 8   specs         31465 non-null  object 
 9   price         30758 non-null  object 
 10  early_access  32135 non-null  float64
 11  id            32133 non-null  float64
 12  developer     28836 non-null  object 
dtypes: float64(2), object(11)
memory usage: 11.9+ MB


In [6]:
# Se elimina toda fila con datos competamente nulos
df_steam_games.dropna(how='all', inplace=True)

df_steam_games.info()

<class 'pandas.core.frame.DataFrame'>
Index: 32135 entries, 88310 to 120444
Data columns (total 13 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   publisher     24083 non-null  object 
 1   genres        28852 non-null  object 
 2   app_name      32133 non-null  object 
 3   title         30085 non-null  object 
 4   url           32135 non-null  object 
 5   release_date  30068 non-null  object 
 6   tags          31972 non-null  object 
 7   reviews_url   32133 non-null  object 
 8   specs         31465 non-null  object 
 9   price         30758 non-null  object 
 10  early_access  32135 non-null  float64
 11  id            32133 non-null  float64
 12  developer     28836 non-null  object 
dtypes: float64(2), object(11)
memory usage: 3.4+ MB


In [7]:
# Hay que averiguar que tipo de datos tiene 'publisher'
df_steam_games['publisher'].apply(lambda x: type(x)).value_counts()

publisher
<class 'str'>         24083
<class 'NoneType'>     8052
Name: count, dtype: int64

In [8]:
# Se utiliza un for para recorrer cada columna y saber que tipos de datos tienen
for c in df_steam_games:
    print(c, df_steam_games[c].apply(lambda x: type(x)).value_counts())

publisher publisher
<class 'str'>         24083
<class 'NoneType'>     8052
Name: count, dtype: int64
genres genres
<class 'list'>        28852
<class 'NoneType'>     3283
Name: count, dtype: int64
app_name app_name
<class 'str'>         32133
<class 'NoneType'>        2
Name: count, dtype: int64
title title
<class 'str'>         30085
<class 'NoneType'>     2050
Name: count, dtype: int64
url url
<class 'str'>    32135
Name: count, dtype: int64
release_date release_date
<class 'str'>         30068
<class 'NoneType'>     2067
Name: count, dtype: int64


tags tags
<class 'list'>        31972
<class 'NoneType'>      163
Name: count, dtype: int64
reviews_url reviews_url
<class 'str'>         32133
<class 'NoneType'>        2
Name: count, dtype: int64
specs specs
<class 'list'>        31465
<class 'NoneType'>      670
Name: count, dtype: int64
price price
<class 'float'>       28848
<class 'str'>          1910
<class 'NoneType'>     1377
Name: count, dtype: int64
early_access early_access
<class 'float'>    32135
Name: count, dtype: int64
id id
<class 'float'>    32135
Name: count, dtype: int64
developer developer
<class 'str'>         28836
<class 'NoneType'>     3299
Name: count, dtype: int64


In [9]:
# Tomemos una muestra del dataframe
df_steam_games.sample(5)

Unnamed: 0,publisher,genres,app_name,title,url,release_date,tags,reviews_url,specs,price,early_access,id,developer
118889,Might and Delight,"[Adventure, Indie]",Pid,Pid,http://store.steampowered.com/app/218740/Pid/,2012-10-31,"[Indie, Platformer, Adventure, Puzzle, Singlep...",http://steamcommunity.com/app/218740/reviews/?...,"[Single-player, Shared/Split Screen, Steam Ach...",9.99,0.0,218740.0,Might and Delight
107195,,"[Indie, RPG, Strategy]",Fantasy Grounds - P2 - The Dark City of the Qu...,Fantasy Grounds - P2 - The Dark City of the Qu...,http://store.steampowered.com/app/602820/Fanta...,2017-03-06,"[Strategy, RPG, Indie]",http://steamcommunity.com/app/602820/reviews/?...,"[Multi-player, Co-op, Cross-Platform Multiplay...",3.99,0.0,602820.0,"SmiteWorks USA, LLC"
116637,Atari,"[Simulation, Strategy]",911: First Responders®,911: First Responders®,http://store.steampowered.com/app/323610/911_F...,2006-04-13,"[Simulation, Strategy, Moddable, Multiplayer, ...",http://steamcommunity.com/app/323610/reviews/?...,"[Single-player, Multi-player, Co-op, Shared/Sp...",4.99,0.0,323610.0,Sixteen Tons Entertainment
93803,Flox Studios Ltd.,"[Indie, Simulation, Strategy]",Life in Bunker,Life in Bunker,http://store.steampowered.com/app/401690/Life_...,2016-02-25,"[Strategy, Simulation, Survival, Indie, Sandbo...",http://steamcommunity.com/app/401690/reviews/?...,"[Single-player, Steam Achievements, Stats]",15.99,0.0,401690.0,Flox Studios Ltd.
119938,Viva Media,[Adventure],Mata Hari,Mata Hari,http://store.steampowered.com/app/18480/Mata_H...,2008-11-21,"[Adventure, Point & Click, Female Protagonist,...",http://steamcommunity.com/app/18480/reviews/?b...,[Single-player],9.99,0.0,18480.0,4Head Studios


In [10]:
# Las columnas 'genres' y 'tags' son muy similares y ambas tienen datos nulos que podrían complementarse.
print('Cantidad de valores núlos en la columna "genres" ',df_steam_games['genres'].isna().sum())
print('\nCantidad de valores núlos en la columna "tags" ',df_steam_games['tags'].isna().sum())

Cantidad de valores núlos en la columna "genres"  3283

Cantidad de valores núlos en la columna "tags"  163


In [12]:
# Considerando la información de datos nulos en 'tags' y 'genres', se observa que la información es muy similar entre ambas columnas
# Por lo que se ocupará a información de 'tags' para rellenar la columna 'genres' y viceversa
df_steam_games['genres'].fillna(df_steam_games['tags'], inplace=True)
df_steam_games['tags'].fillna(df_steam_games['genres'], inplace=True)

In [13]:
# De los 3283 datos nulos de 'genres' y los 163 de 'tags' solamente quedaron 139 que aun siguen nulos en ambas columnas
print('Cantidad de valores núlos en la columna "genres" ',df_steam_games['genres'].isna().sum())
print('\nCantidad de valores núlos en la columna "tags" ',df_steam_games['tags'].isna().sum())


Cantidad de valores núlos en la columna "genres"  139

Cantidad de valores núlos en la columna "tags"  139


In [14]:
# Mostrar filas que tienen valores nulos en genres y tags
df_steam_games[df_steam_games['genres'].isna() | df_steam_games['tags'].isna()]

Unnamed: 0,publisher,genres,app_name,title,url,release_date,tags,reviews_url,specs,price,early_access,id,developer
88384,,,,,http://store.steampowered.com/,,,,,19.99,0.0,,
88668,Paradox Interactive,,Europa Universalis III: Heir to the Throne,Europa Universalis III: Heir to the Throne,http://store.steampowered.com/app/25806/Europa...,2009-12-15,,http://steamcommunity.com/app/25806/reviews/?b...,"[Single-player, Multi-player, Downloadable Con...",9.99,0.0,25806.0,Paradox Interactive
88779,DnS Development,,Booster Trooper Demo,Booster Trooper Demo,http://store.steampowered.com/app/27930/Booste...,2010-08-31,,http://steamcommunity.com/app/27930/reviews/?b...,[Game demo],,0.0,27930.0,DnS Development
88922,"SEGA, Feral Interactive (Mac/Linux)",,"Warhammer 40,000: Dawn of War II - Retribution...","Warhammer 40,000: Dawn of War II - Retribution...",http://store.steampowered.com/app/56436/Warham...,2011-02-28,,http://steamcommunity.com/app/56436/reviews/?b...,[Downloadable Content],0.99,0.0,56436.0,"Relic Entertainment,Feral Interactive (Mac/Linux)"
89089,Trendy Entertainment,,Dungeon Defenders Halloween Costume Pack,Dungeon Defenders Halloween Costume Pack,http://store.steampowered.com/app/202520/Dunge...,2011-11-11,,http://steamcommunity.com/app/202520/reviews/?...,[Downloadable Content],0.99,0.0,202520.0,Trendy Entertainment
...,...,...,...,...,...,...,...,...,...,...,...,...,...
119491,"SEGA, Feral Interactive (Mac), Feral Interacti...",,Total War: SHOGUN 2 - Sengoku Jidai Unit Pack,Total War: SHOGUN 2 - Sengoku Jidai Unit Pack,http://store.steampowered.com/app/34342/Total_...,2011-07-28,,http://steamcommunity.com/app/34342/reviews/?b...,[Downloadable Content],2.99,0.0,34342.0,"The Creative Assembly,Feral Interactive (Mac),..."
119572,Team17 Digital Ltd,,"Worms Reloaded: The ""Pre-order Forts and Hats""...","Worms Reloaded: The ""Pre-order Forts and Hats""...",http://store.steampowered.com/app/22630/Worms_...,2011-05-17,,http://steamcommunity.com/app/22630/reviews/?b...,[Downloadable Content],1.99,0.0,22630.0,Team17 Digital Ltd
119583,"SEGA, Feral Interactive (Mac), Feral Interacti...",,Total War: SHOGUN 2 - The Ikko Ikki Clan Pack,Total War: SHOGUN 2 - The Ikko Ikki Clan Pack,http://store.steampowered.com/app/34348/Total_...,2011-05-26,,http://steamcommunity.com/app/34348/reviews/?b...,[Downloadable Content],4.99,0.0,34348.0,"The Creative Assembly,Feral Interactive (Mac),..."
119634,Tripwire Interactive,,"Killing Floor ""London's Finest"" Character Pack","Killing Floor ""London's Finest"" Character Pack",http://store.steampowered.com/app/35419/Killin...,2010-12-14,,http://steamcommunity.com/app/35419/reviews/?b...,[Downloadable Content],7.99,0.0,35419.0,Tripwire Interactive


In [15]:
# También es notorio que 'title' y 'app_name' comparten la informacion
print('Cantidad de valores núlos en la columna "app_name" ',df_steam_games['app_name'].isna().sum())
print('\nCantidad de valores núlos en la columna "title" ',df_steam_games['title'].isna().sum())

Cantidad de valores núlos en la columna "app_name"  2

Cantidad de valores núlos en la columna "title"  2050


In [16]:
# Rellenamos la informacion de 'title' con ''app_name' y viceversa
df_steam_games['title'].fillna(df_steam_games['app_name'], inplace=True)
df_steam_games['app_name'].fillna(df_steam_games['title'], inplace=True)

In [14]:
# Vemos que ahora se redujo la cantidad de datos nulos
print('Cantidad de valores núlos en la columna "app_name" ',df_steam_games['app_name'].isna().sum())
print('\nCantidad de valores núlos en la columna "title" ',df_steam_games['title'].isna().sum())

Cantidad de valores núlos en la columna "app_name"  2

Cantidad de valores núlos en la columna "title"  2


In [17]:
# También 'publisher' y 'developer' comparten la informacion
print('Cantidad de valores núlos en la columna "publisher" ',df_steam_games['publisher'].isna().sum())
print('\nCantidad de valores núlos en la columna "developer" ',df_steam_games['developer'].isna().sum())

Cantidad de valores núlos en la columna "publisher"  8052

Cantidad de valores núlos en la columna "developer"  3299


In [18]:
# Rellenamos la informacion de 'publisher' con 'developer' y viceversa
df_steam_games['publisher'].fillna(df_steam_games['developer'], inplace=True)
df_steam_games['developer'].fillna(df_steam_games['publisher'], inplace=True)

# Vemos que los datos nulos se han reducido
print('Cantidad de valores núlos en la columna "pubisher" ',df_steam_games['publisher'].isna().sum())
print('\nCantidad de valores núlos en la columna "developer" ',df_steam_games['developer'].isna().sum())

Cantidad de valores núlos en la columna "pubisher"  3234

Cantidad de valores núlos en la columna "developer"  3234


In [19]:
# ver si 'id' tiene datos nulos
print('Cantidad de valores núlos en la columna "id" ',df_steam_games['id'].isna().sum())

Cantidad de valores núlos en la columna "id"  2


In [20]:
# ver filas donde 'id' tiene datos nulos
df_steam_games[df_steam_games['id'].isna()]

Unnamed: 0,publisher,genres,app_name,title,url,release_date,tags,reviews_url,specs,price,early_access,id,developer
88384,,,,,http://store.steampowered.com/,,,,,19.99,0.0,,
119271,"Warner Bros. Interactive Entertainment, Feral ...","[Action, Adventure]",Batman: Arkham City - Game of the Year Edition,Batman: Arkham City - Game of the Year Edition,http://store.steampowered.com/app/200260,2012-09-07,"[Action, Open World, Batman, Adventure, Stealt...",,"[Single-player, Steam Achievements, Steam Trad...",19.99,0.0,,"Rocksteady Studios,Feral Interactive (Mac)"


In [21]:
# Eliminar el registro que tiene datos nulos en 'id' con el indice 88384 pues carece de datos relevantes el url está incompleto
# y solo tiene el dato de 'price', que no dice nada
df_steam_games.drop(88384, inplace=True)

In [22]:
df_steam_games[df_steam_games['id'].isna()]

Unnamed: 0,publisher,genres,app_name,title,url,release_date,tags,reviews_url,specs,price,early_access,id,developer
119271,"Warner Bros. Interactive Entertainment, Feral ...","[Action, Adventure]",Batman: Arkham City - Game of the Year Edition,Batman: Arkham City - Game of the Year Edition,http://store.steampowered.com/app/200260,2012-09-07,"[Action, Open World, Batman, Adventure, Stealt...",,"[Single-player, Steam Achievements, Steam Trad...",19.99,0.0,,"Rocksteady Studios,Feral Interactive (Mac)"


In [23]:
# Se identifica que el 'id' viene en la url, así que se rellena 'id' con el dato 200260 identificado
df_steam_games['id'].fillna(200260, inplace=True)


In [24]:
# Otra observación es con relación al tipo de dato de la columna 'id' que es flotante, pero podemos convertir a entero
df_steam_games['id'] = df_steam_games['id'].astype('int32')

In [25]:
df_steam_games.info()

<class 'pandas.core.frame.DataFrame'>
Index: 32134 entries, 88310 to 120444
Data columns (total 13 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   publisher     28901 non-null  object 
 1   genres        31996 non-null  object 
 2   app_name      32133 non-null  object 
 3   title         32133 non-null  object 
 4   url           32134 non-null  object 
 5   release_date  30068 non-null  object 
 6   tags          31996 non-null  object 
 7   reviews_url   32133 non-null  object 
 8   specs         31465 non-null  object 
 9   price         30757 non-null  object 
 10  early_access  32134 non-null  float64
 11  id            32134 non-null  int32  
 12  developer     28901 non-null  object 
dtypes: float64(1), int32(1), object(11)
memory usage: 3.3+ MB


#### Revisar a partir de aquí, que columnas me son útiles

In [28]:
# La columna 'price' contiene valores str, así que recorremos la columna para identificar los valores
str_values = []
for price in df_steam_games['price']:
    if isinstance(price, str):
        if price not in str_values:
            str_values.append(price)

str_values

['Free To Play',
 'Free to Play',
 'Free',
 'Free Demo',
 'Play for Free!',
 'Install Now',
 'Play WARMACHINE: Tactics Demo',
 'Free Mod',
 'Install Theme',
 'Third-party',
 'Play Now',
 'Free HITMAN™ Holiday Pack',
 'Play the Demo',
 'Starting at $499.00',
 'Starting at $449.00',
 'Free to Try',
 'Free Movie',
 'Free to Use']

In [29]:
# Hacer un diccionario usando la lista str_values como las claves de diccionario
# y los valores serán el indice de la lista
price_dicc = {
    'Free To Play': 0,
    'Free to Play': 0,
    'Free': 0,
    'Free Demo': 0,
    'Play for Free!': 0,
    'Install Now': 0,
    'Play WARMACHINE: Tactics Demo': 0,
    'Free Mod': 0,
    'Install Theme': 0,
    'Third-party': 0,
    'Play Now': 0,
    'Free HITMAN™ Holiday Pack': 0,
    'Play the Demo': 0,
    'Starting at $499.00': 499,
    'Starting at $449.00': 449,
    'Free to Try': 0,
    'Free Movie': 0,
    'Free to Use': 0
}

In [30]:
# Utilizar el diccionario price_dicc para identificar los valores en str y cambiarlos por los valores correspondientes
for key, value in price_dicc.items():
    df_steam_games['price'].replace(key, value, inplace=True)

In [31]:
# Verificar que ya no hay valores str en 'price'
df_steam_games['price'].apply(lambda x: type(x)).value_counts()

price
<class 'float'>    32134
Name: count, dtype: int64

In [43]:
# Buscamos valores nulos en 'price'
df_steam_games['price'].isna().sum()

1377

In [32]:
# Mostrar valores que no sean flotante en 'price' menores a 0
df_steam_games[~df_steam_games['price'].apply(lambda x: isinstance(x, float) and x >= 0)]

Unnamed: 0,publisher,genres,app_name,title,url,release_date,tags,reviews_url,specs,price,early_access,id,developer
88319,RewindApp,"[Casual, Indie, Racing, Simulation]",Race,Race,http://store.steampowered.com/app/768800/Race/,2018-01-04,"[Indie, Casual, Simulation, Racing]",http://steamcommunity.com/app/768800/reviews/?...,"[Single-player, Multi-player, Partial Controll...",,0.0,768800,RewindApp
88320,Qucheza,"[Action, Indie, Simulation, Early Access]",Uncanny Islands,Uncanny Islands,http://store.steampowered.com/app/768570/Uncan...,Soon..,"[Early Access, Action, Indie, Simulation, Surv...",http://steamcommunity.com/app/768570/reviews/?...,[Single-player],,1.0,768570,Qucheza
88341,BlueLine Games,"[Casual, Indie, Strategy]",Lost Cities,Lost Cities,http://store.steampowered.com/app/520680/Lost_...,2018-01-01,"[Casual, Indie, Strategy, Card Game, Board Gam...",http://steamcommunity.com/app/520680/reviews/?...,"[Single-player, Multi-player, Online Multi-Pla...",,0.0,520680,BlueLine Games
88342,Games by Brundle,[Action],Twisted Enhanced Edition,Twisted Enhanced Edition,http://store.steampowered.com/app/690410/Twist...,2018-01-01,"[Action, Platformer, Side Scroller]",http://steamcommunity.com/app/690410/reviews/?...,"[Single-player, Full controller support]",,0.0,690410,Games by Brundle
88344,ProjectorGames,"[Action, Casual, Indie, Massively Multiplayer,...",Tactics Forever,Tactics Forever,http://store.steampowered.com/app/413120/Tacti...,2018-01-01,"[Casual, Action, Massively Multiplayer, Indie,...",http://steamcommunity.com/app/413120/reviews/?...,"[Online Multi-Player, MMO, Cross-Platform Mult...",,0.0,413120,ProjectorGames
...,...,...,...,...,...,...,...,...,...,...,...,...,...
120409,Picorinne Soft,"[Action, Indie]",Infinos Gaiden,Infinos Gaiden,http://store.steampowered.com/app/771070/Infin...,2018-01-19,"[Action, Indie, Shoot 'Em Up]",http://steamcommunity.com/app/771070/reviews/?...,"[Single-player, Partial Controller Support]",,0.0,771070,Picorinne Soft
120421,I.O.S. Team,[Sports],International Online Soccer,International Online Soccer,http://store.steampowered.com/app/90007/Intern...,2002-01-01,"[Sports, Mod]",http://steamcommunity.com/app/90007/reviews/?b...,"[Multi-player, Mods (require HL2)]",,0.0,90007,I.O.S. Team
120433,VersoVR,"[Casual, Indie, Simulation, Sports, Early Access]",Cricket Club,Cricket Club,http://store.steampowered.com/app/772180/Crick...,January 2018,"[Early Access, Indie, Casual, Simulation, Sports]",http://steamcommunity.com/app/772180/reviews/?...,[Single-player],,1.0,772180,VersoVR
120435,Retro Army Limited,"[Action, Adventure, Indie]",The spy who shot me™,The spy who shot me™,http://store.steampowered.com/app/771810/The_s...,2018-10-01,"[Action, Adventure, Indie]",http://steamcommunity.com/app/771810/reviews/?...,"[Single-player, Captions available]",,0.0,771810,Retro Army Limited


In [52]:
df_steam_games[~df_steam_games['price'].apply(lambda x: isinstance(x, float) and x >= 0)]

Unnamed: 0,publisher,genres,app_name,title,url,release_date,tags,reviews_url,specs,price,early_access,id,developer
88319,RewindApp,"[Casual, Indie, Racing, Simulation]",Race,Race,http://store.steampowered.com/app/768800/Race/,2018-01-04,"[Indie, Casual, Simulation, Racing]",http://steamcommunity.com/app/768800/reviews/?...,"[Single-player, Multi-player, Partial Controll...",,0.0,768800,RewindApp
88320,Qucheza,"[Action, Indie, Simulation, Early Access]",Uncanny Islands,Uncanny Islands,http://store.steampowered.com/app/768570/Uncan...,Soon..,"[Early Access, Action, Indie, Simulation, Surv...",http://steamcommunity.com/app/768570/reviews/?...,[Single-player],,1.0,768570,Qucheza
88341,BlueLine Games,"[Casual, Indie, Strategy]",Lost Cities,Lost Cities,http://store.steampowered.com/app/520680/Lost_...,2018-01-01,"[Casual, Indie, Strategy, Card Game, Board Gam...",http://steamcommunity.com/app/520680/reviews/?...,"[Single-player, Multi-player, Online Multi-Pla...",,0.0,520680,BlueLine Games
88342,Games by Brundle,[Action],Twisted Enhanced Edition,Twisted Enhanced Edition,http://store.steampowered.com/app/690410/Twist...,2018-01-01,"[Action, Platformer, Side Scroller]",http://steamcommunity.com/app/690410/reviews/?...,"[Single-player, Full controller support]",,0.0,690410,Games by Brundle
88344,ProjectorGames,"[Action, Casual, Indie, Massively Multiplayer,...",Tactics Forever,Tactics Forever,http://store.steampowered.com/app/413120/Tacti...,2018-01-01,"[Casual, Action, Massively Multiplayer, Indie,...",http://steamcommunity.com/app/413120/reviews/?...,"[Online Multi-Player, MMO, Cross-Platform Mult...",,0.0,413120,ProjectorGames
...,...,...,...,...,...,...,...,...,...,...,...,...,...
120409,Picorinne Soft,"[Action, Indie]",Infinos Gaiden,Infinos Gaiden,http://store.steampowered.com/app/771070/Infin...,2018-01-19,"[Action, Indie, Shoot 'Em Up]",http://steamcommunity.com/app/771070/reviews/?...,"[Single-player, Partial Controller Support]",,0.0,771070,Picorinne Soft
120421,I.O.S. Team,[Sports],International Online Soccer,International Online Soccer,http://store.steampowered.com/app/90007/Intern...,2002-01-01,"[Sports, Mod]",http://steamcommunity.com/app/90007/reviews/?b...,"[Multi-player, Mods (require HL2)]",,0.0,90007,I.O.S. Team
120433,VersoVR,"[Casual, Indie, Simulation, Sports, Early Access]",Cricket Club,Cricket Club,http://store.steampowered.com/app/772180/Crick...,January 2018,"[Early Access, Indie, Casual, Simulation, Sports]",http://steamcommunity.com/app/772180/reviews/?...,[Single-player],,1.0,772180,VersoVR
120435,Retro Army Limited,"[Action, Adventure, Indie]",The spy who shot me™,The spy who shot me™,http://store.steampowered.com/app/771810/The_s...,2018-10-01,"[Action, Adventure, Indie]",http://steamcommunity.com/app/771810/reviews/?...,"[Single-player, Captions available]",,0.0,771810,Retro Army Limited


In [47]:
df_steam_games.sample(20)

Unnamed: 0,publisher,genres,app_name,title,url,release_date,tags,reviews_url,specs,price,early_access,id,developer
109886,,"[Sports, VR]",JUST BAT (VR CRICKET),JUST BAT (VR CRICKET),http://store.steampowered.com/app/525950/JUST_...,,"[Sports, VR]",http://steamcommunity.com/app/525950/reviews/?...,"[Single-player, HTC Vive, Tracked Motion Contr...",3.99,0.0,525950,
119276,Kalypso Media Digital,"[Action, Simulation, Strategy]",Port Royale 3,Port Royale 3,http://store.steampowered.com/app/205610/Port_...,2012-05-04,"[Strategy, Simulation, Trading, Pirates, Econo...",http://steamcommunity.com/app/205610/reviews/?...,"[Single-player, Multi-player, Steam Achievemen...",14.99,0.0,205610,Gaming Minds
92642,Marvelous,[Action],Exile's End - Original Soundtrack by Keiji Yam...,Exile's End - Original Soundtrack by Keiji Yam...,http://store.steampowered.com/app/397000/Exile...,2015-09-01,[Action],http://steamcommunity.com/app/397000/reviews/?...,[Downloadable Content],9.99,0.0,397000,"Magnetic Realms,Bravewave"
95471,"SmiteWorks USA, LLC","[Indie, RPG, Strategy]",Fantasy Grounds - New Gods of Mankind - Anoint...,Fantasy Grounds - New Gods of Mankind - Anoint...,http://store.steampowered.com/app/525750/Fanta...,2016-09-02,"[Strategy, RPG, Indie]",http://steamcommunity.com/app/525750/reviews/?...,"[Multi-player, Co-op, Cross-Platform Multiplay...",5.99,0.0,525750,"SmiteWorks USA, LLC"
118175,Activision,[Action],Call of Duty®: Ghosts - Digital Hardened Edition,Call of Duty®: Ghosts - Digital Hardened Edition,http://store.steampowered.com/app/255163/Call_...,2013-11-04,"[Action, FPS, Shooter, Multiplayer, Casual]",http://steamcommunity.com/app/255163/reviews/?...,"[Single-player, Multi-player, Steam Achievemen...",99.99,0.0,255163,Infinity Ward
105148,ArsLogica LP,"[Action, Indie, RPG]",Devoid of Shadows,Devoid of Shadows,http://store.steampowered.com/app/671900/Devoi...,2017-07-13,"[Action, Indie, RPG, Vampire]",http://steamcommunity.com/app/671900/reviews/?...,[Single-player],8.99,0.0,671900,N-Game Studios
102709,,"[Indie, Casual, VR, Ninja]",Ninja in Training,Ninja in Training,http://store.steampowered.com/app/730010/Ninja...,,"[Indie, Casual, VR, Ninja]",http://steamcommunity.com/app/730010/reviews/?...,"[Single-player, HTC Vive, Tracked Motion Contr...",0.99,0.0,730010,
115349,"SmiteWorks USA, LLC","[Indie, RPG, Strategy]",Fantasy Grounds - D&D Fighter Class Pack,Fantasy Grounds - D&amp;D Fighter Class Pack,http://store.steampowered.com/app/360096/Fanta...,2015-04-07,"[Indie, RPG, Strategy]",http://steamcommunity.com/app/360096/reviews/?...,"[Multi-player, Co-op, Cross-Platform Multiplay...",2.99,0.0,360096,"SmiteWorks USA, LLC"
93322,"Ubisoft Quebec, in collaboration with Ubisoft ...","[Action, Adventure]",Assassin's Creed Syndicate - Jack The Ripper,Assassin's Creed Syndicate - Jack The Ripper,http://store.steampowered.com/app/405080/Assas...,2015-12-22,"[Action, Adventure, Horror]",http://steamcommunity.com/app/405080/reviews/?...,"[Single-player, Downloadable Content, Partial ...",14.99,0.0,405080,"Ubisoft Quebec, in collaboration with Ubisoft ..."
89363,Warner Bros. Interactive Entertainment,[Action],Gotham City Impostors Free to Play: Premium Ca...,Gotham City Impostors Free to Play: Premium Ca...,http://store.steampowered.com/app/216458/Gotha...,2012-08-30,[Action],http://steamcommunity.com/app/216458/reviews/?...,"[Multi-player, Downloadable Content, Steam Ach...",0.99,0.0,216458,"Monolith Productions, Inc."


In [38]:
df_steam_games.info()

<class 'pandas.core.frame.DataFrame'>
Index: 32134 entries, 88310 to 120444
Data columns (total 13 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   publisher     28901 non-null  object 
 1   genres        31996 non-null  object 
 2   app_name      32133 non-null  object 
 3   title         32133 non-null  object 
 4   url           32134 non-null  object 
 5   release_date  30068 non-null  object 
 6   tags          31996 non-null  object 
 7   reviews_url   32133 non-null  object 
 8   specs         31465 non-null  object 
 9   price         30757 non-null  float64
 10  early_access  32134 non-null  float64
 11  id            32134 non-null  int32  
 12  developer     28901 non-null  object 
dtypes: float64(2), int32(1), object(10)
memory usage: 3.3+ MB
