-
Notifications
You must be signed in to change notification settings - Fork 0
/
create_folders.py
72 lines (62 loc) · 2.51 KB
/
create_folders.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import os
import json
from scraper import download_images
from folder_utils import list_folders
from util import list_folders, sanitize_names_for_folders, get_names_from_csv, remove_html_tags, write_csv_header, write_to_csv, add_row_to_csv
from bs4 import BeautifulSoup
import shutil
def find_names_in_wikimedia_response(filename, names):
"""
Finds names in wikimedia commons image api response, creates folders,
then moves the image into the folder
===Parameters===
filename: filepath and name
names: list of names
"""
query = {}
with open(filename, "r") as read_file:
response = json.load(read_file)
query = response['query']['pages']
read_file.close()
page_ids = list(query.keys())
for pid in page_ids:
filename = query[pid]['title'][5:]
imageinfo = query[pid]['imageinfo'][0]['extmetadata']
object_name_value = imageinfo['ObjectName']['value']
object_name = remove_html_tags(object_name_value)
file_ext = os.path.splitext(query[pid]['title'])[1]
description_value = imageinfo['ImageDescription']['value'] or ''
originaldate = imageinfo['DateTimeOriginal']['value'] or ''
credit_value = imageinfo['Credit']['value'] or ''
credit = remove_html_tags(credit_value)
description = remove_html_tags(description_value)
str_match = [
s for s in names if s in description or object_name]
for item in range(len(str_match)):
folder_name = ''.join(str_match[item]).replace(' ', '_')
filename = folder_name + file_ext
if not os.path.exists(folder_name):
os.mkdir(folder_name)
try:
if not os.path.exists(filename):
continue
else:
shutil.move(f'{filename}', f'{folder_name}/{filename}')
except Exception as e:
print(e)
finally:
continue
def create_folders(parent_dir, names):
folder_names = sanitize_names_for_folders(names)
# if data folder does not exist, create it
if os.path.isdir(parent_dir) == False:
os.mkdir(parent_dir) and os.chdir(parent_dir)
# if if exists, move into it
os.chdir(parent_dir)
for folder in folder_names:
if os.path.isdir(folder) == False and folder != 'Name':
print(f'Creating folder {folder}')
os.mkdir(folder)
else:
print(f'A folder called {folder} already exists')
list_folders(os.getcwd())