In [27]:
import requests
import json
import urllib.parse
import time

def getApiSteam(repeat, num_per_page=20, url="https://store.steampowered.com/appreviews/892970?json=1&language=english", cursor='*', data=[]):
    time.sleep(num_per_page/10)
    
    if repeat == 0:
        # Flatten the list of reviews
        data = [x for xs in data for x in xs]
        # Extract only the review text
        reviewData = [i["review"] for i in data]
        
        # Save the review data to a file
        with open('reviewdata.json', 'w', encoding='utf-8') as f:
            json.dump(reviewData, f, ensure_ascii=False, indent=4)
            
        return 1
    else:
        repeat -= 1

        # Fetch the next page of reviews
        response = requests.get(f'{url}&cursor={cursor}&num_per_page={num_per_page}')
        temp = json.loads(response.text)
        # Append the new reviews to the data list
        data.append(temp["reviews"])
        # Update the cursor for the next request
        new_cursor = urllib.parse.quote(temp["cursor"])
        print(f"Fetched reviews. New cursor: {new_cursor}")
        
        # Recursively call the function with the updated cursor and data
        return getApiSteam(repeat, num_per_page, cursor=new_cursor, data=data)

def main():
    getApiSteam(20, 50)
    # Load the review data from the file
    with open("reviewdata.json", "r", encoding='utf-8') as file:
        reviewData = json.load(file)
    print("Number of reviews: ", len(reviewData))

if __name__ == "__main__":
    main()


Fetched reviews. New cursor: AoIIPwYYanDj7egE
Fetched reviews. New cursor: AoIIPwFUIn744ekE
Fetched reviews. New cursor: AoIIPwAAAHb2gOcE
Fetched reviews. New cursor: AoIIPwwLVnSJ6egE
Fetched reviews. New cursor: AoIIPwVCon%2Bo3uwE
Fetched reviews. New cursor: AoIIPwAAAHb2gOcE
Fetched reviews. New cursor: AoIIPwwLVnSJ6egE
Fetched reviews. New cursor: AoIIPwVCon%2Bo3uwE
Fetched reviews. New cursor: AoIIPwAAAHb2gOcE
Fetched reviews. New cursor: AoIIPwwLVnSJ6egE
Fetched reviews. New cursor: AoIIPwVCon%2Bo3uwE
Fetched reviews. New cursor: AoIIPwAAAHb2gOcE
Fetched reviews. New cursor: AoIIPwwLVnSJ6egE
Fetched reviews. New cursor: AoIIPwVCon%2Bo3uwE
Fetched reviews. New cursor: AoIIPwAAAHb2gOcE


KeyboardInterrupt: 

In [57]:
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
import json

# Download necessary NLTK resources
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')

# Read Game Review Data JSON
with open("reviewdata.json","r", encoding='utf-8') as file:
        reviewData = json.load(file)
    
def preprocess(text):
    # Convert text to lowercase
    text_lower = text.lower()
    
    # Tokenization
    tokens = word_tokenize(text_lower)
    
    # Removing punctuation and special characters
    tokens = [word for word in tokens if word.isalnum()]
    
    # Removing stop words
    stop_words = set(stopwords.words('english'))
    tokens = [word for word in tokens if word not in stop_words]
    
    # Lemmatization
    lemmatizer = WordNetLemmatizer()
    tokens = [lemmatizer.lemmatize(word) for word in tokens]
    
    return tokens
    
# Joining tokens back into a string
preprocessed_text = ' '.join(preprocess(reviewData[22]))

print("\nProcessed:\n"+preprocessed_text)
print("\n\nNot Processed:\n"+reviewData[22])


Processed:
took three try get game felt unnatural first sat put two three hour game immediately hooked like basically game ever writing 383 hour later even dove playing game friend yet absolute best part game according take space drive cost le aaa game find absolutely delightful exploration amazing genuinely love world natural difficulty make actually care preparing cooking lose stats adjusted game definitely within top ten game time


Not Processed:
It took me three tries to get into this game - it felt unnatural to me at first, but after I sat down and put about two to three hours into the game it immediately hooked me like basically no other game ever has. 

I am writing this 383 hours later, and I haven't even dove into playing the game with my friends yet (The absolute best part of the game according to most). It takes no space on your drive, it costs less than all AAA games, and I find it absolutely delightful.

Exploration is amazing - I genuinely love this world.
Natural diffi

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\reapy\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\reapy\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\reapy\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


### Get Steam Reviews json

In [5]:
import steamreviews

request_params = dict()
# Reference: https://partner.steamgames.com/doc/store/getreviews
request_params['filter'] = 'all'  # reviews are sorted by helpfulness instead of chronology
request_params['language'] = 'english'
request_params['day_range'] = '84'  # focus on reviews which were published during the past four weeks

steamreviews.download_reviews_for_app_id_batch(chosen_request_params=request_params)

Loading idlist.txt
Loading idprocessed_on_20240422.txt
Creating idprocessed_on_20240422.txt
Downloading reviews for appID = 1623730
[appID = 1623730] expected #reviews = 124655
[appID = 1623730] num_reviews = 5349 (expected: 124655)
Downloading reviews for appID = 892970
[appID = 892970] expected #reviews = 262477
[appID = 892970] num_reviews = 600 (expected: 262477)
Downloading reviews for appID = 105600
[appID = 105600] expected #reviews = 636028
[appID = 105600] num_reviews = 1077 (expected: 636028)
Downloading reviews for appID = 346110
[appID = 346110] expected #reviews = 304609
[appID = 346110] num_reviews = 398 (expected: 304609)
Downloading reviews for appID = 252490
[appID = 252490] expected #reviews = 532677
[appID = 252490] num_reviews = 1369 (expected: 532677)
Downloading reviews for appID = 275850
[appID = 275850] expected #reviews = 176517
[appID = 275850] num_reviews = 694 (expected: 176517)
Downloading reviews for appID = 322330
[appID = 322330] expected #reviews = 1165

True

### Merge multiple json to one csv

In [7]:
import os
import json
import csv

# Directory containing the JSON files
json_directory = os.getcwd()+"/data/"

# Output CSV file path
csv_file_path = 'output.csv'

# Initialize an empty list to hold the extracted data
data = []

# Iterate over each file in the directory
for filename in os.listdir(json_directory):
    if filename.endswith('.json'):
        file_path = os.path.join(json_directory, filename)
        with open(file_path, 'r', encoding='utf-8') as file:
            # Load the JSON file
            json_data = json.load(file)
            # Extract reviews
            for review_id, review_data in json_data['reviews'].items():
                # Determine sentiment
                sentiment = 'positive' if review_data['voted_up'] else 'negative'
                # Extract the review text
                review_text = review_data['review']
                # Append the data to the list
                data.append([sentiment, review_text])

# Write the data to a CSV file
with open(csv_file_path, 'w', newline='', encoding='utf-8') as csvfile:
    writer = csv.writer(csvfile)
    # Write the header
    writer.writerow(['sentiment', 'review'])
    # Write the data
    writer.writerows(data)

print(f"Data has been written to {csv_file_path}")

Data has been written to output.csv
