/
directory_crawler.py
40 lines (28 loc) · 957 Bytes
/
directory_crawler.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
#import Image
import os
import glob
from subprocess import check_output
class crawler:
""" Recursively crawls a directory looking for files with a given extension.
"""
matches = []
extension = '*.pdf'
def __init__(self, root):
self.root = root
def crawl(self, path=None):
if path == None:
path = self.root
os.chdir(path)
current_dir_contents = os.listdir(path)
directories = []
for item in current_dir_contents:
if os.path.isdir(path + "\\" + item):
directories.append(item)
if directories != []:
for folder in directories:
self.crawl(path + "\\" + folder)
os.chdir(path)
current_dir_matches = glob.glob(self.extension)
for match in current_dir_matches:
self.matches.append(path + "\\" + match)
return self.matches