Here we test the implementation of an apk image search functionality with similarity matching. 

It requires the installation of the following python packages:

  - [parsel](https://github.com/scrapy/parsel) for html parsing
  - [requests](https://github.com/psf/requests), for http requests
  - [fuzzywuzzy](https://github.com/seatgeek/fuzzywuzzy) for title matching
  
  You can install them (assuming you have a python environment with pip) by running this next block:

In [None]:
import sys
!{sys.executable} -m pip install parsel requests fuzzywuzzy

In [290]:
import os
import re
import shutil
import sys
import time

import requests
from fuzzywuzzy import fuzz
from parsel import Selector

# path to mounted drive
MOUNT_PATH = "/tmp/mnt/"
# non apk files to ignore
FOLDER_BLOCKLIST = [
    '.Trash-1000',
    'APK_packagenames.txt',
    'badgelist.txt'
]
# if the titles in the search results have a lower similarity (0-100) than this threshold ignore them
MIN_TITLE_THRESHOLD = 70 

In [91]:
mount_folders = os.listdir(MOUNT_PATH)
mount_folders[:100]

['.Trash-1000',
 '2MD VR Football Unleashed v1.0.30 -Q2-90Hz-CustomRes -QuestUnderground -steam-1230860 -versionCode-30 -packageName-com.truantpixel.twomdquest',
 'A Fishermans Tale v1.056 -QuestUnderground -steam-559330 -versionCode-14 -packageName-com.innerspacevr.afishermanstale',
 'A Lullaby of Colors v1.2.2 -steam-992960 -versionCode-1 -packageName-com.idumpling.a_lullaby_of_colors',
 'A Moment in Cannes v0.3.22 -steam-252110 -versionCode-1 -packageName-com.azadux.Cannes_Fort',
 'AFFECTED - The Manor v1.26.1.75 -QuestUnderground -steam-707580 -versionCode-75 -packageName-com.fallen.manorquest',
 'APK_packagenames.txt',
 'AUDICA v1.0.3.2.128543601 -QuestUnderground -steam-1020340 -versionCode-128543601 -packageName-com.harmonixmusic.kata',
 'Accounting+ v1.24.35-Q2Patched -QuestUnderground -steam-927270 -versionCode-35 -packageName-com.crowscrowscrows.AccountingPlus',
 'Acron - Attack of the Squirrels! v1.14.94392.114094392 -QuestUnderground -steam-1094870 -MP- -versionCode-1140943

In [243]:
def parse_folder(folder):
    """Removes extra info from the folder name to make search more accurate"""
    replacement_patterns = [
        '-steam.*',
        '-oculus.*',
        '-versionCode.*',
        '-packageName.*',
        '-MP-.*',
        '-NA-.*',
        '-QuestUnderground.*',
        '-Q2.*',
        r'v(?:(\d+)\.?)?(?:(\d+)\.?)?(?:(\d+)\.?\d+)\S*', # version pattern        
    ]

    for pattern in replacement_patterns:
        folder = re.sub(pattern,'',folder)
    return folder.strip()

In [233]:
parsed_folders = [parse_folder(folder) for folder in mount_folders if folder not in FOLDER_BLOCKLIST]
parsed_folders[:23]

['2MD VR Football Unleashed',
 'A Fishermans Tale',
 'A Lullaby of Colors',
 'A Moment in Cannes',
 'AFFECTED - The Manor',
 'AUDICA',
 'Accounting+',
 'Acron - Attack of the Squirrels!',
 'Air Brigade 2',
 'Angry Birds VR Isle of Pigs',
 'Apex Construct  -VRP',
 'Apollo 11',
 'Arizona Sunshine',
 'ArtPlunge',
 'Artifact',
 'Ashtar UFO',
 'Attack on Quest',
 'Audio Trip',
 'Audioshield',
 'B-Team',
 'Bait!',
 'Ballista',
 'BanditSix']

**SEARCH FUNCTION**

We implement the function to perform a steam store search to get the steam appid

In [292]:
def steam_search(search_term, sim_threshold=MIN_TITLE_THRESHOLD):
    """
    Finds the most similar steamp appid from a search term
    """
    resp = requests.get("https://store.steampowered.com/search/",
                        params={'term':search_term, 'vrsupport':1}
                       )
    sel = Selector(resp.text)
    results_titles = sel.xpath("//span[contains(@class, 'title')]/text()").extract()
    if not results_titles:
        print(f'NO SEARCH RESULTS FOR TERM {search_term}')
        return
    results_imgs = [img.extract() for img in sel.xpath("//div[contains(@class, 'search_capsule')]/img/@src")]
    results_titles_similarity = [fuzz.partial_ratio(folder, title) for title in results_titles]
    most_similar_id = results_titles_similarity.index(max(results_titles_similarity))
    most_similar_title = results_titles[most_similar_id]    
    most_similar_title_similarity = results_titles_similarity[most_similar_id]
    if most_similar_title_similarity < sim_threshold:
        print(f"NO VALID MATCH FOR TERM '{search_term}'")
        return
    most_similar_image = results_imgs[most_similar_id] 
    appid = re.match(
        '.*/steam/\w+/(\d+)/.*jpg', 
        most_similar_image).groups(0
    )[0]
    print(f"FOUND MATCH FOR TERM '{search_term}'->'{most_similar_title}', SIMILARITY:{most_similar_title_similarity} APPID:{appid}")
    return appid

Now we can test the search function, we can get a random folder and test a bunch of times

In [293]:
import random

In [295]:
for _ in range(10):
    time.sleep(1)
    folder = random.choice(parsed_folders)
    steam_search(folder)

FOUND MATCH FOR TERM 'Keep Talking and Nobody Explodes'->'Keep Talking and Nobody Explodes', SIMILARITY:100 APPID:341800
FOUND MATCH FOR TERM 'The Wizards'->'The Wizards - Dark Times', SIMILARITY:100 APPID:1103860
NO VALID MATCH FOR TERM 'Together VR  (manual install)'
FOUND MATCH FOR TERM 'Pteranodons Flight - The Flying Dinosaur Game'->'Pteranodon's Flight: The Flying Dinosaur Game', SIMILARITY:96 APPID:1259480
NO SEARCH RESULTS FOR TERM Void Racer - Extreme
FOUND MATCH FOR TERM 'Cave Digger Riches'->'Cave Digger VR', SIMILARITY:93 APPID:844380
FOUND MATCH FOR TERM 'The Line'->'Spec Ops: The Line', SIMILARITY:100 APPID:50300
FOUND MATCH FOR TERM 'Bait!'->'The Legend of Heroes: Trails of Cold Steel - Shining Pom Bait Value Pack 1', SIMILARITY:80 APPID:605360
FOUND MATCH FOR TERM 'AFFECTED - The Manor'->'AFFECTED: The Manor', SIMILARITY:89 APPID:707580
NO VALID MATCH FOR TERM 'Lets Go Chopping'


**RENAMING FOLDERS**

The final step would be to rename the folders and add the steam id. Since i dont have permissions i will create a bunch of mock folders

In [321]:
!mkdir /tmp/mnt_mock

mkdir: cannot create directory ‘/tmp/mnt_mock’: File exists


In [337]:
mock_mount_path = '/tmp/mnt_mock/'

In [338]:
for folder in mount_folders[:1000]:
    if os.path.exists(f'{mock_mount_path}{folder}'):
        continue
    os.mkdir(f'{mock_mount_path}{folder}')

In [339]:
mount_folders = os.listdir(mock_mount_path)
mount_folders[:10]

['Journey Of The Gods v1.0.479431-Q2Patched-90Hz -QuestUnderground -oculus-1853479764707533 -versionCode-479431 -packageName-gg.trs.grappa',
 'Bonfire v1.0.466-Q2Patched -QuestUnderground -steam-1155880 -versionCode-466 -packageName-com.baobab.bonfire',
 'Void Racer - Extreme v1.08.108 -QuestUnderground -oculus-2874244485968712 -versionCode-108 -MP- -packageName-com.coplanar.vre',
 'Escape Legacy v1.20 -steam-940300 -versionCode-20 -packageName-com.StormingTech.EscapeLegacySideQuest',
 'Zombie World VR v1.0 -steam-1206080 -versionCode-4 -packageName-com.Appalga.ZombieWorldVR',
 'Thumper v1.00 -steam-356400 -versionCode-21 -packageName-com.Drool.Thumper.quest',
 'Accounting+ v1.24.35-Q2Patched -QuestUnderground -steam-927270 -versionCode-35 -packageName-com.crowscrowscrows.AccountingPlus',
 'Toy Clash v1.4.0 -steam-620360 -versionCode-42 -packageName-com.fiveminlab.toyclash',
 'Jigsaw 360 v3.0 -steam-836610 -versionCode-54 -packageName-com.JumbliVR.Jigsaw360',
 'Phantom - Covert Ops v1.

In [332]:
folder = mount_folders[1]
folder

'Bonfire v1.0.466-Q2Patched -QuestUnderground -steam-1155880 -versionCode-466 -packageName-com.baobab.bonfire'

In [346]:
def rename_folder(folder, mount_path, appid):
    renamed_folder = re.sub('-steam*','',folder) + f' -steam-{appid}'
    print(f'RENAMING {folder}-->{renamed_folder}')
    os.rename(f'{mount_path}{folder}', f'{mount_path}{renamed_folder}')

In [347]:
appid = '12312'
rename_folder(folder, mock_mount_path, appid)

RENAMING SculptrVR v2.79 -QuestUnderground -steam-418520 -versionCode-162 -packageName-com.sculptrvrinc.sculptrvrapp-->SculptrVR v2.79 -QuestUnderground -418520 -versionCode-162 -packageName-com.sculptrvrinc.sculptrvrapp -steam-12312


In [None]:
def main(mount_path):
    mount_folders = os.listdir(mount_path)[:10]
    parsed_folders = [parse_folder(folder) for folder in mount_folders if folder not in FOLDER_BLOCKLIST]

    for folder in parsed_folders:
        time.sleep(1)
        folder = random.choice(parsed_folders)
        appid = steam_search(folder)
        rename_folder(folder, mount_path, appid)
