From 4ddfcc55f6a7f88c11e9d13e06f3450f046c4fd4 Mon Sep 17 00:00:00 2001 From: Zhuoyun Wei Date: Sun, 30 Jul 2023 01:14:05 -0700 Subject: [PATCH] fixup! fix(extract_media): search all js files in data directory for URLs --- contrib/extract_media/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/extract_media/main.py b/contrib/extract_media/main.py index e06f36f..972f8f7 100755 --- a/contrib/extract_media/main.py +++ b/contrib/extract_media/main.py @@ -23,7 +23,7 @@ class TwimgExtractor(scrapy.Spider): name = 'TwimgExtractor' def __init__(self, archive_dir: Path, output_dir: Path, **kwargs): - self.js_files = archive_dir.glob('data/*.js') + self.js_files = (archive_dir / 'data').rglob('*.js') self.media_dir = archive_dir / 'data/tweets_media' self.output_dir = output_dir super().__init__(**kwargs)