In [None]:
# To extract the following app details from Google Play Store

# name
# app_url
# appstore_url
# description
# operating_system
# application_category
# image
# content_rating
# author_name
# author_url
# rating_value
# rating_count
# price
# price_currency
# respective star rating (eg. 5star, 4star, 3star, 2star, 1 star)

In [1]:
# Import dependencies
import requests
import json
from bs4 import BeautifulSoup

In [2]:
# To convert "app search" url to "app info" url
def get_app_link(query):
	html_page = requests.get("https://play.google.com/store/search?q="+query+"&c=apps")
	soup = BeautifulSoup(html_page.text,'html.parser')
	applink = soup.find('a',attrs={'class':'Qfxief'})
	return applink['href']

# To extract relevant info from respecitve "app info" url
def get_app_info(app_name):
    base_url = 'https://play.google.com'
    app_url = base_url + get_app_link(app_name)
    
    data = requests.get(app_url)
    soup = BeautifulSoup(data.text, 'html.parser')

    # App information from info_boxes
    info_boxes = [json.loads(info_box.text.strip()) for info_box in soup.find_all('script', {'type': 'application/ld+json'})]

    # App rating (eg. 5star, 4star, 3star, 2star, 1 star)
    div_elements_with_class = soup.find_all('div', class_='RutFAf wcB8se')

    extracted_info = {}
    
    
    if info_boxes:
        
        info_box = info_boxes[0]  # Assuming you want information from the first box

        # Application Category and Subcategory
        application_category = info_box.get('applicationCategory', '')
        application_category_split = application_category.split('_')[0] if '_' in application_category else application_category
        application_subcategory_split = application_category.split('_')[1] if '_' in application_category else ""

        extracted_info = {
            'name': info_box.get('name', ''),
            'app_url' : app_url,
            'appstore_url': info_box.get('url', ''),
            'description': info_box.get('description', ''),
            'operating_system': info_box.get('operatingSystem', ''),
            'application_category': info_box.get('applicationCategory', ''),
            'application_category_split': application_category_split,
            'application_subcategory_split': application_subcategory_split,
            'image': info_box.get('image', ''),
            'content_rating': info_box.get('contentRating', ''),
            'author_name': info_box.get('author', {}).get('name', ''),
            'author_url': info_box.get('author', {}).get('url', ''),
            'rating_value': info_box.get('aggregateRating', {}).get('ratingValue', ''),
            'rating_count': info_box.get('aggregateRating', {}).get('ratingCount', ''),
            'price': info_box.get('offers', [{}])[0].get('price', ''),
            'price_currency': info_box.get('offers', [{}])[0].get('priceCurrency', '')
        }

    # div_elements_with_class = soup.find_all('div', class_='RutFAf wcB8se')
    star_ratings = []

    for i, div_element in enumerate(reversed(div_elements_with_class), 1):
        style_attribute = div_element.get('style', '')
        title_attribute = div_element.get('title', '')

        # Extracting width value from style attribute
        width_percentage = float(style_attribute.split(':')[1].replace('%', '').strip())

        # Extracting count from title attribute
        count = int(title_attribute.replace(',', '').strip())

        # Creating a dictionary with reversed star rating information
        star_dict = {'star_rating': i, 'percentage': width_percentage, 'count': count}
        star_ratings.append(star_dict)

    # Add star rating information to the extracted_info dictionary
    extracted_info['star_ratings'] = star_ratings

    return extracted_info



In [3]:
# query_list = ["whatsapp", "fairprice", "grab", "gpay", "honkai star rail", "genshin impact", "byd"]
query_list = ["call of duty", "minecraft", "clash of clan", "pokemon unite", "twinkle unicorn cat princess", "OCBC Digital"]

for query in query_list:
    app_info = get_app_info(query)
    print(f"App info for {query}:")
    print(app_info)
    print("\n")

App info for call of duty:
{'name': 'Call of Duty: Mobile Season 2', 'app_url': 'https://play.google.com/store/apps/details?id=com.activision.callofduty.shooter', 'appstore_url': 'https://play.google.com/store/apps/details/Call_of_Duty_Mobile_Season_2?id=com.activision.callofduty.shooter&hl=en_US', 'description': 'Season 2 is out now! Survive and Dominate in Multiplayer FPS', 'operating_system': 'ANDROID', 'application_category': 'GAME_ACTION', 'application_category_split': 'GAME', 'application_subcategory_split': 'ACTION', 'image': 'https://play-lh.googleusercontent.com/zX7jmUbnCkH1LlhGFIffDv76OgJjIy3zZvzC6DPO-Cl-BPXfNVluTCDHTX6YSpvxKUrd', 'content_rating': 'Mature 17+', 'author_name': 'Activision Publishing, Inc.', 'author_url': 'http://www.activision.com', 'rating_value': '4.309465408325195', 'rating_count': '15905469', 'price': '0', 'price_currency': 'USD', 'star_ratings': [{'star_rating': 1, 'percentage': 10.650381402437201, 'count': 1639210}, {'star_rating': 2, 'percentage': 2.73

In [5]:
import pandas as pd

# Create an empty list to store DataFrames
dfs = []

# Loop through each query in the list
for query in query_list:
    app_info = get_app_info(query)
    
    # Convert the app_info dictionary to a DataFrame and append it to the list
    df = pd.DataFrame([app_info], index=[query])
    dfs.append(df)

# Concatenate the list of DataFrames into a single DataFrame
result_df = pd.concat(dfs)
result_df.to_csv('Google Play App Store Scrape.csv')

# Print the DataFrame
print(result_df)

                                                        name  \
call of duty                   Call of Duty: Mobile Season 2   
minecraft                                          Minecraft   
clash of clan                                 Clash of Clans   
pokemon unite                                  Pokémon UNITE   
twinkle unicorn cat princess  Twinkle - Unicorn Cat Princess   
OCBC Digital                   OCBC Digital - Mobile Banking   

                                                                        app_url  \
call of duty                  https://play.google.com/store/apps/details?id=...   
minecraft                     https://play.google.com/store/apps/details?id=...   
clash of clan                 https://play.google.com/store/apps/details?id=...   
pokemon unite                 https://play.google.com/store/apps/details?id=...   
twinkle unicorn cat princess  https://play.google.com/store/apps/details?id=...   
OCBC Digital                  https://play.google.com