In [1]:
from scrapy import signals
from scrapy.conf import settings
from scrapy.crawler import CrawlerProcess
from scrapy.xlib.pydispatch import dispatcher
from multiprocessing.queues import Queue
import scrapy
import multiprocessing
import datetime

  from ipykernel import kernelapp as app


In [2]:
class CrawlerWorker(multiprocessing.Process):

    def __init__(self, spider, result_queue):
        multiprocessing.Process.__init__(self)
        self.result_queue = result_queue

        self.crawler = CrawlerProcess(settings)
        #if not hasattr(project, 'crawler'):
        #    self.crawler.install()
        #self.crawler.configure()

        self.items = []
        self.spider = spider
        dispatcher.connect(self._item_passed, signals.item_passed)

    def _item_passed(self, item):
        self.items.append(item)

    def run(self):
        self.crawler.crawl(self.spider)
        self.crawler.start()
        self.crawler.stop()
        self.result_queue.put(self.items)
        

In [3]:
class CanberraWealtherSpider(scrapy.Spider):
    name = "CanberraWealtherSpider"
    allowed_domains = ["www.bom.gov.au"]
    start_urls = ['http://www.bom.gov.au/act/forecasts/canberra.shtml']

    def parse(self, response):
        Max_Temperatures=response.xpath('//em[@class="max"]/text()').extract()
        for temperature in Max_Temperatures:
            yield {"Max_Temperature":temperature.encode('utf-8')}
            
        Min_Temperatures=response.xpath('//em[@class="min"]/text()').extract()
        for temperature in Min_Temperatures:
            yield {"Min_Temperature":temperature.encode('utf-8')}
            
        Summarys=response.xpath('//dd[@class="summary"]/text()').extract()
        for summary in Summarys:
            yield {"summary":summary.encode('utf-8')}

In [4]:
def main():
    result_queue = Queue()
    crawler = CrawlerWorker(CanberraWealtherSpider(), result_queue)
    crawler.start()
    for item in result_queue.get():
        print datetime.datetime.now(),item

In [5]:
if __name__=="__main__":
    main()

2017-01-12 19:42:04 [scrapy] INFO: Scrapy 1.1.1 started (bot: scrapybot)
2017-01-12 19:42:04 [scrapy] INFO: Overridden settings: {}
2017-01-12 19:42:04 [scrapy] INFO: Enabled extensions:
['scrapy.extensions.logstats.LogStats',
 'scrapy.extensions.telnet.TelnetConsole',
 'scrapy.extensions.corestats.CoreStats']
2017-01-12 19:42:04 [scrapy] INFO: Enabled downloader middlewares:
['scrapy.downloadermiddlewares.httpauth.HttpAuthMiddleware',
 'scrapy.downloadermiddlewares.downloadtimeout.DownloadTimeoutMiddleware',
 'scrapy.downloadermiddlewares.useragent.UserAgentMiddleware',
 'scrapy.downloadermiddlewares.retry.RetryMiddleware',
 'scrapy.downloadermiddlewares.defaultheaders.DefaultHeadersMiddleware',
 'scrapy.downloadermiddlewares.redirect.MetaRefreshMiddleware',
 'scrapy.downloadermiddlewares.httpcompression.HttpCompressionMiddleware',
 'scrapy.downloadermiddlewares.redirect.RedirectMiddleware',
 'scrapy.downloadermiddlewares.cookies.CookiesMiddleware',
 'scrapy.downloadermiddlewares.chun

2017-01-12 19:42:12.532775 {'Max_Temperature': '37'}
2017-01-12 19:42:12.533253 {'Max_Temperature': '31'}
2017-01-12 19:42:12.533380 {'Max_Temperature': '29'}
2017-01-12 19:42:12.533488 {'Max_Temperature': '33'}
2017-01-12 19:42:12.533909 {'Max_Temperature': '36'}
2017-01-12 19:42:12.534026 {'Max_Temperature': '34'}
2017-01-12 19:42:12.534102 {'Max_Temperature': '31'}
2017-01-12 19:42:12.534223 {'Min_Temperature': '17'}
2017-01-12 19:42:12.534314 {'Min_Temperature': '19'}
2017-01-12 19:42:12.534405 {'Min_Temperature': '15'}
2017-01-12 19:42:12.534499 {'Min_Temperature': '14'}
2017-01-12 19:42:12.534588 {'Min_Temperature': '16'}
2017-01-12 19:42:12.534695 {'Min_Temperature': '22'}
2017-01-12 19:42:12.534783 {'Min_Temperature': '16'}
2017-01-12 19:42:12.534870 {'summary': 'Partly cloudy.'}
2017-01-12 19:42:12.534960 {'summary': 'A little late rain.'}
2017-01-12 19:42:12.535048 {'summary': 'Shower or two.'}
2017-01-12 19:42:12.535127 {'summary': 'Partly cloudy.'}
2017-01-12 19:42:12.53522