## User Reviews Scraping
We fetch all the user reviews from the Play Store and store them in `OtherData/UserReviewsData` in a separate dataset for every app identifier.

### The Scraping Logic

Play Store reviews are paginated in the backend so the maximum request we can make at once is for 200 reviews. We fetch reviews in batches of 200.

In [None]:
from google_play_scraper import Sort, reviews
import simplejson
import pandas as pd
from tqdm import tqdm

"""
Scrape a given number of reviews for a given app in batches of 200 reviews per HTTP request

:param app_id the identifier of the app (e.g. com.foobar.app)
:param review_count (by default, it scrapes everything)

:return a dictionary containing the reviews
"""
def scrape_reviews(app_id, review_count=0):
    # continuationToken contains the metadata that keeps track of the progress we've made in scraping
    results = []
    continuation_token = None
    batch_size = 200
    total_to_fetch = review_count if review_count > 0 else float('inf')

    with tqdm(total=total_to_fetch, desc=f"Scraping reviews for {app_id}") as pbar:
        while len(results) < total_to_fetch:
            count = min(batch_size, total_to_fetch - len(results))

            result, continuation_token = reviews(
                app_id,
                lang='en',
                country='us',
                sort=Sort.NEWEST,
                count=count,
                continuation_token=continuation_token,
            )

            if not result:
                break

            results.extend(result)
            pbar.update(len(result))

            if continuation_token is None:
                break

    return results


### Scraping Every App

We give a list of app identifiers to scrape and start working!

In [None]:
app_ids = ['com.nianticlabs.pokemongo']
for app_id in app_ids:
    reviews = scrape_reviews(app_name, 15000)
    reviews_df = pd.DataFrame(reviews)
    print(reviews_df.head(50))
    # Drop user information as an attempt to anonymize
    reviews_df = reviews_df.drop(['userName', 'userImage'], axis=1)
    # Save the dataframe to csv
    reviews_df.to_csv('OtherData/UserReviewsData/'+app_name+'.csv', index=False)
