In [1]:
import pandas as pd
import os
from pathlib import Path
import numpy as np
import cv2
import math
import json
import shutil
import re

from collections import defaultdict

In [2]:
dataset_folder = "/home/pandian/Dataset/rico"
dataset_path = Path(dataset_folder)

In [3]:
annotations_path = dataset_path.joinpath("annotations")
screenshots_path= dataset_path.joinpath("screenshots")
app_details_path = dataset_path.joinpath("app_details.csv")
ui_details_path = dataset_path.joinpath("ui_details.csv")

In [4]:
app_details = pd.read_csv(app_details_path)
ui_details = pd.read_csv(ui_details_path)

app_details = app_details.drop(8404)

In [5]:
app_details = app_details.astype({"Number of Ratings": "int32"})
app_details = app_details.astype({"Category": "str"})

In [6]:
app_details = app_details.sort_values(by=["Number of Ratings", "Average Rating", "Category"], ascending=False)

# Remove non-ascii names
mask = app_details["Play Store Name"].apply(lambda x: str(x).isascii())
app_details = app_details[mask]

# Remove non-ascii names
mask = app_details["Play Store Name"].apply(lambda x: str(x).isascii())
app_details = app_details[mask]

In [7]:
def dir_case(text):
    return re.sub(r"\W+","_", text.strip())



print(dir_case("	T20 League App 2017 - Live K+	"))
print(dir_case("Events High - Meet Your City!	"))
print(dir_case("UC Browser: - Fast Download	"))

T20_League_App_2017_Live_K_
Events_High_Meet_Your_City_
UC_Browser_Fast_Download


In [8]:
# Remove non-ascii names
app_details["Play Store Name"] = app_details["Play Store Name"].apply(lambda x: dir_case(x))
app_details["Category"] = app_details["Category"].apply(lambda x: dir_case(x))

In [9]:
app_details = app_details[~app_details["Play Store Name"].isin(["Path", "JW_Library", "Oxford_Dictionary_of_English", "Zigbang", "Domain_Real_Estate_Property"])]

In [10]:
app_details

Unnamed: 0,App Package Name,Play Store Name,Category,Average Rating,Number of Ratings,Number of Downloads,Date Updated,Icon URL
5518,com.facebook.katana,Facebook,Social,4.0,65923961,"1,000,000,000 - 5,000,000,000","May 10, 2017",https://lh3.googleusercontent.com/ZZPdzvlpK9r_...
2287,com.whatsapp,WhatsApp_Messenger,Communication,4.4,55264718,"1,000,000,000 - 5,000,000,000","May 12, 2017",https://lh6.ggpht.com/mp86vbELnqLi2FzvhiKdPX31...
4950,com.instagram.android,Instagram,Social,4.5,48792032,"1,000,000,000 - 5,000,000,000","May 12, 2017",https://lh3.googleusercontent.com/aYbdIM1abwyV...
6984,com.google.android.youtube,YouTube,Video_Players_Editors,4.2,14364672,"1,000,000,000 - 5,000,000,000","December 13, 2016",https://lh5.ggpht.com/jZ8XCjpCQWWZ5GLhbjRAufsw...
6221,com.UCMobile.intl,UC_Browser_Fast_Download,Communication,4.5,14079441,"100,000,000 - 500,000,000","April 26, 2017",https://lh3.googleusercontent.com/dH4iXrVFkT-6...
...,...,...,...,...,...,...,...,...
5524,com.mangauniversity101,Manga_University_How_to_Draw,Comics,4.0,25,"1,000 - 5,000","June 11, 2016",https://lh3.googleusercontent.com/cC3VKmKtFIB7...
5747,cards.priceguide.pgc,Sports_Card_Price_Guide,Sports,3.4,25,"1,000 - 5,000","August 22, 2016",https://lh3.googleusercontent.com/3-f-dVnL1Xi8...
1865,com.paccar.paclink.ui,PACLink_Service_Application,Auto_Vehicles,3.4,25,"1,000 - 5,000","November 14, 2016",https://lh3.googleusercontent.com/iy6RuXtTCy5c...
96,com.olo.tindrum,Tin_Drum_Rewards,Food_Drink,3.3,25,"1,000 - 5,000","July 6, 2016",https://lh3.googleusercontent.com/YKKvcrRA2qrW...


In [11]:
chosen_apps = app_details.groupby('Category').head(15)
chosen_apps

Unnamed: 0,App Package Name,Play Store Name,Category,Average Rating,Number of Ratings,Number of Downloads,Date Updated,Icon URL
5518,com.facebook.katana,Facebook,Social,4.0,65923961,"1,000,000,000 - 5,000,000,000","May 10, 2017",https://lh3.googleusercontent.com/ZZPdzvlpK9r_...
2287,com.whatsapp,WhatsApp_Messenger,Communication,4.4,55264718,"1,000,000,000 - 5,000,000,000","May 12, 2017",https://lh6.ggpht.com/mp86vbELnqLi2FzvhiKdPX31...
4950,com.instagram.android,Instagram,Social,4.5,48792032,"1,000,000,000 - 5,000,000,000","May 12, 2017",https://lh3.googleusercontent.com/aYbdIM1abwyV...
6984,com.google.android.youtube,YouTube,Video_Players_Editors,4.2,14364672,"1,000,000,000 - 5,000,000,000","December 13, 2016",https://lh5.ggpht.com/jZ8XCjpCQWWZ5GLhbjRAufsw...
6221,com.UCMobile.intl,UC_Browser_Fast_Download,Communication,4.5,14079441,"100,000,000 - 500,000,000","April 26, 2017",https://lh3.googleusercontent.com/dH4iXrVFkT-6...
...,...,...,...,...,...,...,...,...
6247,sk.ipndata.meninyamenafree,Name_days,Events,4.1,2882,"100,000 - 500,000","March 30, 2017",https://lh3.googleusercontent.com/74AECgnx3psY...
2242,com.nearify.android,Nearify_Discover_Events,Events,4.4,2763,"100,000 - 500,000","June 6, 2016",https://lh3.ggpht.com/CYx3IJANEy179hrkSj4qfCA7...
6198,com.eventshigh.nearme.app,Events_High_Meet_Your_City_,Events,4.2,2472,"100,000 - 500,000","May 11, 2017",https://lh3.googleusercontent.com/kSOoODI0T2kS...
5038,tamer.android.prayertimes,Muezzin_New,Events,4.5,2018,"100,000 - 500,000","March 30, 2017",https://lh5.ggpht.com/LABsOEAzu3ZOYWGu2uQqIOJx...


In [12]:
app_packages = np.array(chosen_apps["App Package Name"])

In [13]:
chosen_ui = ui_details[ui_details["App Package Name"].isin(app_packages)]

In [14]:
chosen_ui = chosen_ui.groupby("App Package Name").head(15)

In [15]:
chosen_ui

Unnamed: 0,UI Number,App Package Name,Interaction Trace Number,UI Number in Trace
169,172,de.wetteronline.wetterapp,0,142
170,173,de.wetteronline.wetterapp,0,82
171,174,de.wetteronline.wetterapp,0,226
172,175,de.wetteronline.wetterapp,0,182
173,176,de.wetteronline.wetterapp,0,52
...,...,...,...,...
66041,71919,com.joelapenna.foursquared,0,208
66042,71920,com.joelapenna.foursquared,0,155
66043,71921,com.joelapenna.foursquared,0,173
66044,71922,com.joelapenna.foursquared,0,79


In [16]:
blu_path = Path("../blu")
blu_path.mkdir(exist_ok=True)
blu_path

PosixPath('../blu')

In [17]:
blu_dataset = blu_path.joinpath("dataset")
blu_dataset.mkdir(exist_ok=True)

In [18]:
chosen_apps.to_csv(blu_path.joinpath("app_details.csv"),index=False)

In [19]:
chosen_ui.to_csv(blu_path.joinpath("ui_details.csv"),index=False)

In [20]:
data = {
    "category": [],
    "name": [],
    "filename": []
}

for row in chosen_apps.iterrows():
    d = row[1]
    category = d["Category"]
    name = d["Play Store Name"]
    pkg = d["App Package Name"]
    
    root = blu_dataset.joinpath(category, name)
    root.mkdir(parents=True, exist_ok=True)
    
    ui_names = chosen_ui[chosen_ui["App Package Name"] == pkg]
    for i, image_name in enumerate(ui_names["UI Number"], 1):
        
        image_path = screenshots_path.joinpath(f"{image_name}.jpg")
        json_path = annotations_path.joinpath(f"{image_name}.json")
              
        file_name = f"{i}"
        new_image_path = root.joinpath(f"{file_name}.jpg")
        new_json_path = root.joinpath(f"{file_name}.json")
        
        data["category"].append(category)
        data["name"].append(name)
        data["filename"].append(file_name)
        
        shutil.copy(image_path, new_image_path)
        shutil.copy(json_path, new_json_path)

#         print(image_path, new_image_path)
#         print(json_path, new_json_path)

In [21]:
pd.DataFrame(data).to_csv(blu_path.joinpath("dataset.csv"), index=False)

In [22]:
dataset_details = pd.read_csv(blu_path.joinpath("dataset.csv"))

In [23]:
dataset_details["category"].unique()

array(['Social', 'Communication', 'Video_Players_Editors',
       'News_Magazines', 'Travel_Local', 'Maps_Navigation', 'Education',
       'Entertainment', 'Health_Fitness', 'Shopping', 'Music_Audio',
       'Books_Reference', 'Weather', 'Lifestyle', 'Business', 'Finance',
       'Food_Drink', 'Beauty', 'Sports', 'Comics', 'Parenting',
       'House_Home', 'Art_Design', 'Dating', 'Auto_Vehicles', 'Medical',
       'Events'], dtype=object)

In [24]:
data = dataset_details[dataset_details["category"] == "Social"]
data

Unnamed: 0,category,name,filename
0,Social,Facebook,1
1,Social,Facebook,2
2,Social,Facebook,3
3,Social,Facebook,4
4,Social,Facebook,5
...,...,...,...
656,Social,LinkedIn,11
657,Social,LinkedIn,12
658,Social,LinkedIn,13
659,Social,LinkedIn,14


In [25]:
data_dict = data.drop(columns=['category']).to_dict(orient='records')

In [26]:
app_names = defaultdict(list)
for v in data_dict:
    app_names[v['name']].append(v["filename"])

In [27]:
app_names

defaultdict(list,
            {'Facebook': [1, 2, 3, 4, 5, 6, 7, 8, 9],
             'Instagram': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],
             'Snapchat': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
             'VK': [1, 2, 3, 4, 5, 6],
             'Google_': [1, 2, 3, 4, 5, 6, 7, 8],
             'Pinterest': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14],
             'Tumblr': [1, 2, 3, 4],
             'hike_messenger': [1, 2],
             'musical_ly': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
             'OK': [1],
             'ASKfm': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13],
             'BIGO_LIVE_Live_Stream': [1],
             'ooVoo_Video_Call_Text_Voice': [1, 2, 3, 4, 5, 6, 7],
             'MeetMe_Chat_Meet_New_People': [1],
             'LinkedIn': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]})