In [1]:
# -- Positive Ratings, Cleaning and CSV Export 

# Import dependencies

import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt 
import math

In [2]:
# Load steam.csv Kaggle data from main resources folder

file_to_load = '../../resources/steam.csv'

In [3]:
# Load CSV into dataframe games_features
games_features = pd.read_csv(file_to_load)

games_features.head()

Unnamed: 0,appid,name,release_date,english,developer,publisher,platforms,required_age,categories,genres,steamspy_tags,achievements,positive_ratings,negative_ratings,average_playtime,median_playtime,owners,price
0,10,Counter-Strike,2000-11-01,1,Valve,Valve,windows;mac;linux,0,Multi-player;Online Multi-Player;Local Multi-P...,Action,Action;FPS;Multiplayer,0,124534,3339,17612,317,10000000-20000000,7.19
1,20,Team Fortress Classic,1999-04-01,1,Valve,Valve,windows;mac;linux,0,Multi-player;Online Multi-Player;Local Multi-P...,Action,Action;FPS;Multiplayer,0,3318,633,277,62,5000000-10000000,3.99
2,30,Day of Defeat,2003-05-01,1,Valve,Valve,windows;mac;linux,0,Multi-player;Valve Anti-Cheat enabled,Action,FPS;World War II;Multiplayer,0,3416,398,187,34,5000000-10000000,3.99
3,40,Deathmatch Classic,2001-06-01,1,Valve,Valve,windows;mac;linux,0,Multi-player;Online Multi-Player;Local Multi-P...,Action,Action;FPS;Multiplayer,0,1273,267,258,184,5000000-10000000,3.99
4,50,Half-Life: Opposing Force,1999-11-01,1,Gearbox Software,Valve,windows;mac;linux,0,Single-player;Multi-player;Valve Anti-Cheat en...,Action,FPS;Action;Sci-fi,0,5250,288,624,415,5000000-10000000,3.99


In [4]:
# Creating new column of game rating out of 100 by taking the average of positive and negative ratings 
games_features['percent_positive_reviews'] = games_features['positive_ratings'] / (games_features['positive_ratings'] + games_features['negative_ratings'])

In [5]:
# Filter and select games with a count of positive ratings over 100. 
games_features = games_features.loc[games_features['positive_ratings'] > 100, ]

In [6]:
# Confirming row and column counts of dataframe after removing games with low positive rating count (7492 rows, 19 columns)
games_features.shape

(7492, 19)

In [7]:
# Updating dataframe to only name and review column 
games_features = games_features[["name", "percent_positive_reviews"]]

In [8]:
# Review dataframe 
games_features.head()

Unnamed: 0,name,percent_positive_reviews
0,Counter-Strike,0.973888
1,Team Fortress Classic,0.839787
2,Day of Defeat,0.895648
3,Deathmatch Classic,0.826623
4,Half-Life: Opposing Force,0.947996


In [9]:
# Confirm data types
games_features.dtypes

name                         object
percent_positive_reviews    float64
dtype: object

In [10]:
# Update positive review values round to a score out of 100 

games_features['percent_positive_reviews'] = games_features['percent_positive_reviews'] * 100

games_features['percent_positive_reviews'] = games_features['percent_positive_reviews'].round()

In [11]:
# Review updated ratings 
games_features.head(10)

Unnamed: 0,name,percent_positive_reviews
0,Counter-Strike,97.0
1,Team Fortress Classic,84.0
2,Day of Defeat,90.0
3,Deathmatch Classic,83.0
4,Half-Life: Opposing Force,95.0
5,Ricochet,80.0
6,Half-Life,96.0
7,Counter-Strike: Condition Zero,89.0
8,Half-Life: Blue Shift,90.0
9,Half-Life 2,97.0


In [12]:
# Export dataframe to user_reviews.csv in main resource folder for use in Popular Tags and Genre csv cleaning. 

games_features.to_csv(r'../../resources/user_reviews.csv')