-
Notifications
You must be signed in to change notification settings - Fork 1
/
simple_example1.py
53 lines (45 loc) · 3.6 KB
/
simple_example1.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
#Simplest Example
from resources.lib.argdb_scraper import *
#Settings
parsing_settings = {'logging':'info', #Use 'debug' or 'info' based on how much log info you want on the progress
'log_to_file':False, #For debugging purposes, logging to a file if necessary
'concurrent_processes':1, #Not used yet
'overwrite_locals':False, #For efficiency, overwrite local variables when running script, or reuse available local variables in memory
'overwrite_conversions':False, #Saving the parsed file conversions can be saved and is usually not overwritten, this will override that
'match_response':'best', #How to respond to match decisions: best (highest ratio=default) or query (it will ask you to choose)
'keep_no_matches':True, #If no match is found, return same game dict with no merged data. If false, nothing will be added to the merged dict (i.e. the game will be thrown out)
'fuzzy_match_ratio':90.9, #only consider fuzzy matches with at least this score. In testing, I've found anything higher than 90 is a prety close match
'fuzzy_scoring_type':'token_set_ratio', #scoring ratio to use, see fuzzywuzzy manual for the scoring methods
'max_fuzzy_matches':5, #Max number of matches for a fuzzy match. For query matching it will give you this many choices to look at
'use_converted_files':True, #Use the converted version of the file if it already exists
'common_platforms':['Nintendo Entertainment System PD Games'], #A common name for the platform your scraping
}
output_settings = { 'type':'IAGL', #Dat type to output. This is currently the only option. ARGDB will be added later
'output_filename':'NES_PD_GAMES.xml', #Filename to output
'header_name': 'Nintendo Entertainment System PD Games', #IAGL header will be populated with this
'categories': 'Nintendo,PD Games', #IAGL header will be populated with this
'save_output':True, #Simple trigger to turn on and of file saving after parsing
'author':'Zach Morris', #Author for the IAGL header
'base_url':'https://drive.google.com/', #Base URL for the IAGL header
}
#Here's my 3 files
files = ['uc?export=download&id=11-S0be1FQqV_Lvn0e3st9AUtvimStZDg&name=/2048.nes',
'uc?export=download&id=1lvwzcqBCrm4ODPEY9P6ih03uN7SyAiDe&name=/BladeBuster.nes',
'uc?export=download&id=13_QczPj-5faspIj_kB2nNW8BUtE7yBP7&name=/dpadhero2.nes']
#Define your argdb scraper object
argdb_scraper = argdb_scraper(parsing_settings=parsing_settings,output_settings=output_settings)
#Initalize your dat file
dat_file_out = dict()
dat_file_out['datafile'] = dict()
#Bookkeeping can be empty in this example because we're not merging anything
dat_file_out['datafile']['bookkeeping'] = argdb_scraper.get_empty_datafile_bookkeeping_dict()
#Generate the header for the xml file
dat_file_out['datafile']['header'] = argdb_scraper.get_new_IAGL_header_dict(emu_name=output_settings['header_name'],emu_description=output_settings['header_name'],emu_category=output_settings['categories'],emu_version='010101',emu_date='040820',emu_author=output_settings['author'],emu_baseurl=output_settings['base_url'])
#Initialize a list of games
dat_file_out['datafile']['game'] = list()
#Populate the list of games by name/description and rom url
for ff in files:
dat_file_out['datafile']['game'].append(argdb_scraper.get_new_IAGL_game_dict(name = ff.split('/')[-1].split('.')[0],description=ff.split('/')[-1].split('.')[0],rom_in={'@name':ff,'@size':'0'}))
#Save the xml file to ...resources/output/
if output_settings['save_output']:
success = argdb_scraper.output_dat_file(dat_file_out,filename_in=output_settings['output_filename'],pop_these_keys_in=['bookkeeping'])