diff --git a/recipe__analyze_users_in_search_results.py b/recipe__analyze_users_in_search_results.py index bb44f76..c839715 100644 --- a/recipe__analyze_users_in_search_results.py +++ b/recipe__analyze_users_in_search_results.py @@ -12,7 +12,8 @@ def analyze_users_in_search_results(t, q, max_pages=15, results_per_page=100): search_api = twitter.Twitter(domain="search.twitter.com") search_results = [] for page in range(1,max_pages+1): - search_results += search_api.search(q=q, rpp=results_per_page, page=page)['results'] + search_results += \ + search_api.search(q=q, rpp=results_per_page, page=page)['results'] # Extract the screen names (the "from_user" field) from the results # and optionally map them to a useful field like the tweet id @@ -30,18 +31,21 @@ def analyze_users_in_search_results(t, q, max_pages=15, results_per_page=100): screen_name_to_tweet_ids[screen_name] += [ result['id'] ] - # Use the /users/lookup resource to resolve profile information for these screen names + # Use the /users/lookup resource to resolve profile information for + # these screen names screen_name_to_info = get_info_by_screen_name(t, screen_name_to_tweet_ids.keys()) - # Extract the home location for each user. Note that the "location" field can be anything - # a user has typed in, and may be something like "Everywhere", "United States" or something else - # that won't geocode to a specific coordinate on a map. + # Extract the home location for each user. Note that the "location" field can + # be anything a user has typed in, and may be something like "Everywhere", + # "United States" or something else that won't geocode to a specific coordinate + # on a map. - screen_name_to_location = dict([(sn, info['location']) for sn, info in screen_name_to_info.items()]) + screen_name_to_location = dict([(sn, info['location']) + for sn, info in screen_name_to_info.items()]) - # Use the various screen_name_to{tweet_ids, info, location} maps to determine interesting things about - # the people who appear in the search results. + # Use the various screen_name_to{tweet_ids, info, location} maps to determine + # interesting things about the people who appear in the search results. return screen_name_to_info, screen_name_to_location, screen_name_to_tweet_ids diff --git a/recipe__crawl.py b/recipe__crawl.py index da9e14e..0b04452 100644 --- a/recipe__crawl.py +++ b/recipe__crawl.py @@ -17,7 +17,8 @@ def get_all_followers_ids(user_id, limit): ids = [] while cursor != 0: - response = make_twitter_request(t, t.followers.ids, user_id=user_id, cursor=cursor) + response = make_twitter_request(t, t.followers.ids, + user_id=user_id, cursor=cursor) if response is not None: ids += response['ids'] @@ -50,8 +51,8 @@ def get_all_followers_ids(user_id, limit): for _fid in queue: _follower_ids = get_all_followers_ids(user_id=_fid, limit=limit) - # Store a fid => _follower_ids mapping in Redis or other database of choice - # In Redis, it might look something like this: + # Store a fid => _follower_ids mapping in Redis or other + # database of choice. In Redis, it might look something like this: rid = get_redis_id('follower_ids', user_id=fid) [ r.sadd(rid, _id) for _id in _follower_ids ] diff --git a/recipe__dorling_cartogram.py b/recipe__dorling_cartogram.py index abc84cc..a42d76a 100644 --- a/recipe__dorling_cartogram.py +++ b/recipe__dorling_cartogram.py @@ -126,8 +126,11 @@ def get_state_frequencies(locations): shutil.copytree('etc/protovis/protovis-3.2', 'out/protovis-3.2') -html = open('etc/protovis/dorling_cartogram/dorling_cartogram.html').read() % (json.dumps(json_data),) -f = open(os.path.join(os.getcwd(), 'out', 'dorling_cartogram', 'dorling_cartogram.html'), 'w') +html = open('etc/protovis/dorling_cartogram/dorling_cartogram.html').read() % \ + (json.dumps(json_data),) + +f = open(os.path.join(os.getcwd(), 'out', 'dorling_cartogram', + 'dorling_cartogram.html'), 'w') f.write(html) f.close() diff --git a/recipe__geocode_profile_locations.py b/recipe__geocode_profile_locations.py index 94a7a22..206afdd 100644 --- a/recipe__geocode_profile_locations.py +++ b/recipe__geocode_profile_locations.py @@ -8,9 +8,9 @@ def geocode_locations(geocoder, locations): - # Some basic replacement transforms may be necessary for geocoding services to function properly - # You may probably need to add your own as you encounter rough edges in the data or with the geocoding - # service you settle on. For example, ... + # Some basic replacement transforms may be necessary for geocoding services to + # function properly. You may probably need to add your own as you encounter rough + # edges in the data or with the geocoding service you settle on. For example, ... replacement_transforms = [('San Francisco Bay', 'San Francisco')] @@ -24,11 +24,11 @@ def geocode_locations(geocoder, locations): if location_to_coords.has_key(location): continue - transformed_location = location + xformed_location = location for transform in replacement_transforms: - transformed_location = transformed_location.replace(*transform) + xformed_location = xformed_location.replace(*transform) while True: @@ -37,39 +37,41 @@ def geocode_locations(geocoder, locations): try: # This call returns a generator - results = geocoder.geocode(transformed_location, exactly_one=False) + results = geocoder.geocode(xformed_location, exactly_one=False) break except HTTPError, e: num_errors += 1 if num_errors >= MAX_HTTP_ERRORS: sys.exit() print >> sys.stderr, e.message - print >> sys.stderr, 'Encountered an urllib2 error. Trying again...' + print >> sys.stderr, 'A urllib2 error. Retrying...' except UnicodeEncodeError, e: print >> sys.stderr, e - print >> sys.stderr, 'Encountered a UnicodeEncodeError...', e.message + print >> sys.stderr, 'A UnicodeEncodeError...', e.message break except geopy.geocoders.google.GQueryError, e: print >> sys.stderr, e - print >> sys.stderr, 'Encountered a ...GQueryError', e.message + print >> sys.stderr, 'A GQueryError', e.message break for result in results: # Each result is of the form ("Description", (X,Y)) - # Unless you have a some special logic for picking the best of many possible - # results, choose the first one returned in results and move along + # Unless you have a some special logic for picking the best of many + # possible results, choose the first one returned in results and move + # along location_to_coords[location] = result[1] location_to_description[location] = result[0] break - # Use location_to_coords and other information of interest to populate a visualization. - # Depending on your particular needs, it is highly likely that you'll want to further - # post process the geocoded locations to filter out locations such as "U.S.A." which will - # plot a placemarker in the geographic center of the United States yet make the visualization - # look skewed in favor of Oklahoma, for example. + # Use location_to_coords and other information of interest to populate a + # visualization. Depending on your particular needs, it is highly likely that + # you'll want to further post process the geocoded locations to filter out + # location such as "U.S.A." which will plot a placemarker in the geographic + # center of the United States yet make the visualization look skewed in favor + # of places like Oklahoma, for example. return location_to_coords, location_to_description @@ -91,8 +93,8 @@ def geocode_locations(geocoder, locations): t = oauth_login() - # This function returns a few useful maps. Let's use the screen_name => location map and - # geocode the locations + # This function returns a few useful maps. Let's use the + # screen_name => location map and geocode the locations _, screen_name_to_location, _ = analyze_users_in_search_results(t, Q, 2) diff --git a/recipe__get_rt_origins.py b/recipe__get_rt_origins.py index eb44ea2..e71dff8 100644 --- a/recipe__get_rt_origins.py +++ b/recipe__get_rt_origins.py @@ -17,10 +17,14 @@ def get_rt_origins(tweet): if tweet['retweet_count'] > 0: rt_origins += [ tweet['user']['name'].lower() ] - # Also, inspect the tweet for the presence of "legacy" retweet patterns such as "RT" and "via" + # Also, inspect the tweet for the presence of "legacy" retweet + # patterns such as "RT" and "via" try: - rt_origins += [ mention.strip() for mention in rt_patterns.findall(tweet['text'])[0][1].split() ] + rt_origins += [ + mention.strip() + for mention in rt_patterns.findall(tweet['text'])[0][1].split() + ] except IndexError, e: pass @@ -34,30 +38,30 @@ def get_rt_origins(tweet): # Assume tweets have been fetched from the /search resource or elsewhere. tweets = \ - [ - { - 'text' : 'RT @ptowbrussell Get @SocialWebMining example code at http://bit.ly/biais2 #w00t' + [ + { + 'text' : 'RT @ptowbrussell Get @SocialWebMining at http://bit.ly/biais2 #w00t' - # ... more tweet fields ... + # ... more tweet fields ... - }, + }, - { - 'text' : 'Get @SocialWebMining example code at http://bit.ly/biais2 #w00t', - 'retweet_count' : 1, - 'user' : { - 'name' : 'ptwobrussell' + { + 'text' : 'Get @SocialWebMining example code at http://bit.ly/biais2 #w00t', + 'retweet_count' : 1, + 'user' : { + 'name' : 'ptwobrussell' - # ... more user fields ... - } + # ... more user fields ... + } - # ... more tweet fields ... + # ... more tweet fields ... - }, - - # ... more tweets ... + }, - ] + # ... more tweets ... + + ] for tweet in tweets: print get_rt_origins(tweet) diff --git a/recipe__get_search_results_for_trending_topic.py b/recipe__get_search_results_for_trending_topic.py index 3358253..ffc2bc9 100644 --- a/recipe__get_search_results_for_trending_topic.py +++ b/recipe__get_search_results_for_trending_topic.py @@ -37,7 +37,8 @@ search_results = [] for page in range(1,MAX_PAGES+1): - search_results += twitter_search.search(q=q, rpp=RESULTS_PER_PAGE, page=page)['results'] + search_results += \ + twitter_search.search(q=q, rpp=RESULTS_PER_PAGE, page=page)['results'] # Exract tweet entities and embed them into search results diff --git a/recipe__harvest_timeline.py b/recipe__harvest_timeline.py index 5ce4624..231e463 100644 --- a/recipe__harvest_timeline.py +++ b/recipe__harvest_timeline.py @@ -102,10 +102,10 @@ def max_finding_reducer(keys, values, rereduce): KW['since_id'] = 1 # Harvest tweets for the given timeline. -# For friend and home timelines, the unofficial limitation is about 800 statuses although -# other documentation may state otherwise. The public timeline only returns 20 statuses -# and gets updated every 60 seconds, so consider using the streaming API for public statuses. -# See http://groups.google.com/group/twitter-development-talk/browse_thread/thread/4678df70c301be43 +# For friend and home timelines, the unofficial limitation is about 800 statuses +# although other documentation may state otherwise. The public timeline only returns +# 20 statuses and gets updated every 60 seconds, so consider using the streaming API +# for public statuses. See http://bit.ly/fgJrAx # Note that the count and since_id params have no effect for the public timeline page_num = 1 @@ -114,7 +114,8 @@ def max_finding_reducer(keys, values, rereduce): api_call = getattr(t.statuses, TIMELINE_NAME + '_timeline') tweets = make_twitter_request(t, api_call, **KW) - # Actually storing tweets in CouchDB is as simple as passing them into a call to db.update + # Actually storing tweets in CouchDB is as simple as passing them + # into a call to db.update db.update(tweets, all_or_nothing=True) diff --git a/recipe__setwise_operations.py b/recipe__setwise_operations.py index 10dc871..5e68d68 100644 --- a/recipe__setwise_operations.py +++ b/recipe__setwise_operations.py @@ -55,7 +55,8 @@ def get_redis_id(key_name, screen_name=None, user_id=None): cursor = response['next_cursor'] - print >> sys.stderr, 'Fetched %i total friend ids for %s' % (r.scard(rid), SCREEN_NAME) + print >> sys.stderr, \ + 'Fetched %i total friend ids for %s' % (r.scard(rid), SCREEN_NAME) if r.scard(rid) >= MAX_IDS: break @@ -80,7 +81,8 @@ def get_redis_id(key_name, screen_name=None, user_id=None): cursor = response['next_cursor'] - print >> sys.stderr, 'Fetched %i total follower ids for %s' % (r.scard(rid), SCREEN_NAME) + print >> sys.stderr, \ + 'Fetched %i total follower ids for %s' % (r.scard(rid), SCREEN_NAME) if r.scard(rid) >= MAX_IDS: break diff --git a/recipe__tweet_entities_tagcloud.py b/recipe__tweet_entities_tagcloud.py index 0c27160..1bea6a7 100644 --- a/recipe__tweet_entities_tagcloud.py +++ b/recipe__tweet_entities_tagcloud.py @@ -120,7 +120,7 @@ def weightTermByFreq(f): if not os.path.isdir('out'): os.mkdir('out') -f = open(os.path.join('out', os.path.basename(HTML_TEMPLATE)), 'w') +f = open(os.path.join(os.getcwd(), 'out', os.path.basename(HTML_TEMPLATE)), 'w') f.write(html_page) f.close() @@ -128,4 +128,4 @@ def weightTermByFreq(f): # Open up the web page in your browser -webbrowser.open("file://" + os.path.join(os.getcwd(), 'out', os.path.basename(HTML_TEMPLATE))) +webbrowser.open("file://" + f.name) diff --git a/recipe__visualize_rt_graph_protovis.py b/recipe__visualize_rt_graph_protovis.py index 03d15e4..f75f093 100644 --- a/recipe__visualize_rt_graph_protovis.py +++ b/recipe__visualize_rt_graph_protovis.py @@ -29,7 +29,8 @@ def write_protovis_output(g, out_file, html_template): links.append({'source' : indexed_nodes[n2], 'target' : indexed_nodes[n1]}) - json_data = json.dumps({"nodes" : [{"nodeName" : n} for n in nodes], "links" : links}, indent=4) + json_data = json.dumps({"nodes" : [{"nodeName" : n} for n in nodes], \ + "links" : links}, indent=4) html = open(html_template).read() % (json_data,) @@ -63,7 +64,9 @@ def write_protovis_output(g, out_file, html_template): search_results = [] for page in range(1,MAX_PAGES+1): - search_results.append(twitter_search.search(q=Q, rpp=RESULTS_PER_PAGE, page=page)) + search_results.append(twitter_search.search(q=Q, + rpp=RESULTS_PER_PAGE, + page=page)) all_tweets = [ tweet for page in search_results