In [1]:
import json
import pandas as pd
import sqlalchemy

from database_credentials import get_database_url
from google_play_scraper import app, reviews, Sort
from google_play_scraper.features.reviews import ContinuationToken

In [2]:
def insert_review_data(data, connection):
    data.to_sql(
        'reviews', 
        con=connection, 
        if_exists='append', 
        index=False,
        method='multi'
    )

# https://stackoverflow.com/questions/12309269/
def save_continuation_token(continuation_token):
    token_data = continuation_token.unpack()
    with open('token_data.json', 'w', encoding='utf-8') as json_file:
        json.dump(token_data, json_file, ensure_ascii=False, indent=4)

def load_continuation_token():
    with open('token_data.json', 'r', encoding='utf-8') as json_file:
        token_data = json.load(json_file)

    token_data = tuple(token_data)
    continuation_token = ContinuationToken(*token_data)
    return continuation_token

def preprocess(data):
    column_names = ['userName', 'content', 'score', 'reviewCreatedVersion', 'at']
    new_column_names = ['user_id', 'review', 'rating', 'version', 'datetime_created']
    column_map = dict(zip(column_names, new_column_names))

    preprocessed_data = pd.DataFrame(data) 
    preprocessed_data = preprocessed_data[column_names] # subset columns
    preprocessed_data.rename(columns=column_map, inplace=True) # rename columns
    return preprocessed_data

def get_data(num_reviews):
    try:
        continuation_token = load_continuation_token()
    except:
        continuation_token = None

    data, continuation_token = reviews(
        'com.tgc.sky.android', 
        count=num_reviews, 
        continuation_token=continuation_token
    )

    review_data = preprocess(data)
    
    try:
        insert_review_data(review_data, connection)
        save_continuation_token(continuation_token)
    except:
        print('Error')

In [3]:
database_url = get_database_url()
engine = sqlalchemy.create_engine(database_url)
connection = engine.connect()

In [4]:
get_data(2000)

In [5]:
connection.close()
engine.dispose()