Skip to content

Commit

Permalink
Fork of code from ScraperWiki at https://classic.scraperwiki.com/scra…
Browse files Browse the repository at this point in the history
  • Loading branch information
vlakos committed Jul 29, 2018
0 parents commit 4a19cbf
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 0 deletions.
2 changes: 2 additions & 0 deletions .gitignore
@@ -0,0 +1,2 @@
# Ignore output of scraper
data.sqlite
35 changes: 35 additions & 0 deletions scraper.py
@@ -0,0 +1,35 @@
import scraperwiki
import simplejson
import urllib2
import datetime

# Change QUERY to your search term of choice.
# Examples: 'newsnight', 'from:bbcnewsnight', 'to:bbcnewsnight'
QUERY = 'to:oneworldnl'
RESULTS_PER_PAGE = '100'
LANGUAGE = ''
NUM_PAGES = 15

for page in range(1, NUM_PAGES+1):
base_url = 'http://search.twitter.com/search.json?q=%s&rpp=%s&lang=%s&page=%s' \
% (urllib2.quote(QUERY), RESULTS_PER_PAGE, LANGUAGE, page)
try:
results_json = simplejson.loads(scraperwiki.scrape(base_url))
for result in results_json['results']:
data = {}
data['id'] = result['id']
data['text'] = result['text'].replace(""", "'")
data['from_user'] = result['from_user']
data['profile_image_url'] = result['profile_image_url']
data['geo'] = result['geo']
data['source'] = result['source']
data['iso_language_code'] = result['iso_language_code']
data['from_user_name'] = result['from_user_name']
data['date'] = datetime.datetime.today()
print data['from_user'], data['text']
scraperwiki.sqlite.save(["id"], data)
except:
print 'Oh dear, failed to scrape %s' % base_url
break


0 comments on commit 4a19cbf

Please sign in to comment.