In [None]:
# To extract the following app details from Apple App Store

# name
# description
# applicationCategory
# datePublished
# operatingSystem
# authorname
# authorurl
# ratingValue
# reviewCount
# price
# priceCurrency
# star_ratings (respective star rating (eg. 5star, 4star, 3star, 2star, 1 star) %, can calculate count from reviewCount)



In [2]:
# Import dependencies
import requests
import json
from bs4 import BeautifulSoup

In [16]:
# Test

def get_app_link(query):
    html_page = requests.get("https://www.apple.com/sg/search/"+query+"?src=serp")
    soup = BeautifulSoup(html_page.text, 'html.parser')
    app_link = soup.find('a', class_='icon icon-after icon-chevronright')['href']
    app_id = app_link.split('/')[-1]
    return app_id

def get_app_info(appname):
    html_page = requests.get("https://apps.apple.com/sg/app/"+get_app_link(appname))
    soup2 = BeautifulSoup(html_page.text, 'html.parser')

    info_boxes = [json.loads(info_box.text.strip()) for info_box in soup2.find_all('script', {'type': 'application/ld+json'})]
    return info_boxes

get_app_info("minecraft")

[{'@context': 'http://schema.org',
  '@type': 'SoftwareApplication',
  'name': 'Minecraft',
  'description': "Explore infinite worlds and build everything from the simplest of homes to the grandest of castles. Play in creative mode with unlimited resources or mine deep into the world in survival mode, crafting weapons and armor to fend off dangerous mobs. \n\nCreate, explore, and survive along or play with friends on all different devices. Scale craggy mountains, unearth elaborate caves, and mine large ore veins. Discover lush cave and dripstone cave biomes. Light up your world with candles to show what a savvy spelunker and master mountaineer you are! \n\nEXPAND YOUR GAME:\nMarketplace - Discover the latest community creations in the marketplace! Get unique maps, skins, and texture packs from your favorite creators.\n\nSlash commands - Tweak how the game plays: you can give items away, summon mobs, change the time of day, and more. \n\nAdd-Ons - Customize your experience even further 

In [17]:
# To extract relevant info from respecitve "app info" url
def get_app_link(query):
    html_page = requests.get("https://www.apple.com/sg/search/"+query+"?src=serp")
    soup1 = BeautifulSoup(html_page.text, 'html.parser')
    app_link = soup1.find('a', class_='icon icon-after icon-chevronright')['href']
    app_id = app_link.split('/')[-1]
    return app_id

def get_app_info(appname):
    html_page = requests.get("https://apps.apple.com/sg/app/"+get_app_link(appname))
    soup2 = BeautifulSoup(html_page.text, 'html.parser')

    info_boxes = [json.loads(info_box.text.strip()) for info_box in soup2.find_all('script', {'type': 'application/ld+json'})]

    extracted_info = {}
    
    if info_boxes:
        info_box = info_boxes[0]  # Assuming you want information from the first box
        extracted_info = {
            'name': info_box['name'],
            'description': info_box['description'],
            'applicationCategory': info_box['applicationCategory'],
            'datePublished': info_box['datePublished'],
            'operatingSystem': info_box['operatingSystem'],
            'authorname': info_box['author']['name'],
            'authorurl': info_box['author']['url'],
            'ratingValue': info_box['aggregateRating']['ratingValue'],
            'reviewCount': info_box['aggregateRating']['reviewCount'],
            'price': info_box['offers']['price'],
            'priceCurrency': info_box['offers']['priceCurrency'],
            # 'Category': info_box['offers']['category']
        }

    # Add star rating information to the extracted_info dictionary
    soup = BeautifulSoup(html_page.text, 'html.parser')
    width_styles = soup.find_all('div', class_='we-star-bar-graph__bar__foreground-bar')
    percentages = [style['style'].split(': ')[1].rstrip(';') for style in width_styles]
    star_ratings = ['5 Star', '4 Star', '3 Star', '2 Star', '1 Star']

    result = list(zip(star_ratings, percentages))   

    extracted_info['star_ratings'] = result

    return extracted_info



In [21]:
query_list = ["whatsapp", "telegram", "nonogram", "minecraft", "mobile legends"]

for query in query_list:
    app_info = get_app_info(query)
    print(f"App info for {query}:")
    print(app_info)
    print("\n")



App info for whatsapp:
{'name': 'WhatsApp Messenger', 'description': 'With WhatsApp for Mac, you can conveniently sync all your chats to your computer. Message privately, make calls and share files with your friends, family and colleagues.', 'applicationCategory': 'Social Networking', 'datePublished': '4 May 2009', 'operatingSystem': 'macOS 11.0 or later. Requires iOSÂ\xa012.0 or later. Compatible with iPhone.', 'authorname': 'WhatsApp Inc.', 'authorurl': 'https://apps.apple.com/sg/developer/whatsapp-inc/id310634000', 'ratingValue': 1.8, 'reviewCount': 83, 'price': 0, 'priceCurrency': 'SGD', 'star_ratings': [('5 Star', '12%'), ('4 Star', '6%'), ('3 Star', '2%'), ('2 Star', '7%'), ('1 Star', '72%')]}


App info for telegram:
{'name': 'Telegram Messenger', 'description': "Pure instant messaging â\x80\x94 simple, fast, secure, and synced across all your devices. One of the world's top 10 most downloaded apps with over 800 million active users.\n\nFAST: Telegram is the fastest messaging ap

In [23]:
import pandas as pd

# Create an empty list to store DataFrames
dfs = []

# Loop through each query in the list
for query in query_list:
    app_info = get_app_info(query)
    
    # Convert the app_info dictionary to a DataFrame and append it to the list
    df = pd.DataFrame([app_info], index=[query])
    dfs.append(df)

# Concatenate the list of DataFrames into a single DataFrame
result_df = pd.concat(dfs)
result_df.to_csv('Apple App Store Scrape.csv')

# Print the DataFrame
print(result_df)

                                       name  \
whatsapp                 WhatsApp Messenger   
telegram                 Telegram Messenger   
nonogram        Nonogram.com - Number Games   
minecraft                         Minecraft   
mobile legends    Mobile Legends: Bang Bang   

                                                      description  \
whatsapp        With WhatsApp for Mac, you can conveniently sy...   
telegram        Pure instant messaging â simple, fast, secur...   
nonogram        Nonogram.com is an addictive logic game with a...   
minecraft       Explore infinite worlds and build everything f...   
mobile legends  Join your friends in Mobile Legends: Bang Bang...   

               applicationCategory datePublished  \
whatsapp         Social Networking    4 May 2009   
telegram         Social Networking   14 Aug 2013   
nonogram                     Games   28 Apr 2019   
minecraft                    Games   17 Nov 2011   
mobile legends               Games    9 No