From ea0b4ca5591519c6c03075f0535040a7f2dc91cb Mon Sep 17 00:00:00 2001 From: Yoonbae Cho Date: Sat, 17 Aug 2019 20:39:10 +0900 Subject: [PATCH] provide concurrency --- day.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/day.py b/day.py index 57074dc..d943741 100644 --- a/day.py +++ b/day.py @@ -7,7 +7,6 @@ import argparse import scrapy -import os.path from scrapy.exporters import CsvItemExporter from scrapy.crawler import CrawlerProcess @@ -22,6 +21,7 @@ class Spider(scrapy.Spider): custom_settings = { 'FEED_EXPORT_ENCODING': 'utf-8', + 'CONCURRENT_REQUESTS': 20, 'RETRY_ENABLED': True, 'RETRY_TIMES': 5 } @@ -51,17 +51,23 @@ def parse(this, response): if __name__ == '__main__': parser = argparse.ArgumentParser() - parser.add_argument('symbol', help='symbol to fetch') - parser.add_argument('--dir', help='output directory', default='./') + parser.add_argument('--symbol', help='a symbol to fetch') + parser.add_argument('-s', '--source', help='list of symbols') + parser.add_argument('-o', '--output', help='output file') args = parser.parse_args() process = CrawlerProcess(settings={ - 'FEED_URI': 'stdout:' if args.dir is None else os.path.join(args.dir, args.symbol + '.csv'), + 'FEED_URI': 'stdout:' if args.output is None else args.output, 'FEED_FORMAT': 'csv', 'LOG_ENABLED': False }) - process.crawl(Spider, args.symbol) + if args.symbol: + process.crawl(Spider, args.symbol) + else: + with open(args.source) as f: + [process.crawl(Spider, symbol) for symbol in f.read().splitlines()] + process.start()