Skip to content

Commit

Permalink
imrpovements to teh parsers
Browse files Browse the repository at this point in the history
  • Loading branch information
zemanel committed Jun 14, 2011
1 parent cd7b9fd commit 496f8a9
Show file tree
Hide file tree
Showing 1,008 changed files with 267 additions and 242 deletions.
54 changes: 28 additions & 26 deletions app/mood/handlers.py
Expand Up @@ -11,33 +11,35 @@
logger = logging.getLogger(__name__)

class HomePage(RequestHandler, Jinja2Mixin):
def get(self):
return self.render_response('home.html', **{
'baseurl' : self.url_for('home', _full=True),
'bookmarklet_src' : self.url_for('bookmarklet-js', _full=True),
})
def get(self):
return self.render_response('home.html', **{
'baseurl' : self.url_for('home', _full=True),
'bookmarklet_src' : self.url_for('bookmarklet-js', _full=True),
})

class BookmarkletPage(RequestHandler, Jinja2Mixin):
def get(self):
return self.render_response('bookmarklet.js', ** {
'baseurl' : self.url_for('home', _full=True),
})
def get(self):
return self.render_response('bookmarklet.js', ** {
'baseurl' : self.url_for('home', _full=True),
})

class NewsItemDetail(RequestHandler, Jinja2Mixin):
def get(self, itemid):
'''Returns the json for a news item comment
'''
jsoncallback = self.request.args.get('jsoncallback', None)
itemid = str(itemid)
newsitem = NewsItem.get_by_key_name(itemid) or self.abort(404)
json_response = json.dumps({
'itemid': itemid,
'is_sentiment_processed' : newsitem.is_sentiment_processed,
'sentiment_type': newsitem.sentiment_type,
'sentiment_score': newsitem.sentiment_score,
'':'',
}, indent=2)
if jsoncallback is None:
return Response(json_response)
else:
return Response("%s(%s)" % (jsoncallback, json_response))
def get(self, itemid):
'''Returns the json for a news item comment
'''
jsoncallback = self.request.args.get('jsoncallback', None)
itemid = str(itemid)
newsitem = NewsItem.get_by_key_name(itemid) or self.abort(404)
json_response = json.dumps({
'itemid': itemid,
'is_sentiment_processed' : newsitem.is_sentiment_processed,
'sentiment_type': newsitem.sentiment_type,
'sentiment_score': newsitem.sentiment_score,
'sentiment_status': newsitem.sentiment_status,
'sentiment_status_info': newsitem.sentiment_status_info,
'':'',
}, indent=2)
if jsoncallback is None:
return Response(json_response)
else:
return Response("%s(%s)" % (jsoncallback, json_response))
38 changes: 19 additions & 19 deletions app/mood/hnsearchapi.py
Expand Up @@ -7,22 +7,22 @@
logger = logging.getLogger(__name__)

class HNSearchAPI(object):

baseurl = "http://api.thriftdb.com/api.hnsearch.com/items/_search?"

def search(self, created_from, created_to, start=0, limit=100, pretty_print=True, sort_by='create_ts desc'):
'''Performs a request to HNSearch
'''
params = {
'filter[fields][type]' : 'comment',
'filter[fields][create_ts]' : '[%s TO %s]' % (created_from, created_to),
'sortby' : sort_by,
'start' : start,
'limit' : limit,
'pretty_print': pretty_print
}
url = self.baseurl + urllib.urlencode(params)
logger.info("Fetching url %s" % url)
result = urllib2.urlopen(url)
return result.read()

baseurl = "http://api.thriftdb.com/api.hnsearch.com/items/_search?"
def search(self, created_from, created_to, start=0, limit=100, pretty_print=True, sort_by='create_ts desc'):
'''Performs a request to HNSearch
'''
params = {
'filter[fields][type]' : 'comment',
'filter[fields][create_ts]' : '[%s TO %s]' % (created_from, created_to),
'sortby' : sort_by,
'start' : start,
'limit' : limit,
'pretty_print': pretty_print
}
url = self.baseurl + urllib.urlencode(params)
logger.info("Fetching url %s" % url)
result = urllib2.urlopen(url)
return result.read()
105 changes: 55 additions & 50 deletions app/mood/jobs.py
@@ -1,5 +1,7 @@
# -*- coding: utf-8 -*-
import logging
import rfc3339
import datetime
from django.utils import simplejson as json
from tipfy.app import Response
from tipfy.handler import RequestHandler
Expand All @@ -11,56 +13,59 @@
logger = logging.getLogger(__name__)

class QueueHNSearchJob(RequestHandler):
def get(self):
'''Poll HNSearch API for news comments
'''
queue = taskqueue.Queue(name='hnsearchapi')
api = HNSearchAPI()
limit=100
try:
created_from = 'NOW-35MINUTES'
#created_from = 'NOW-3HOURS'
created_to = 'NOW'
logging.info("Polling HNSearchAPI from %s to %s " % (created_from, created_to))
result = api.search(created_from=created_from, created_to=created_to, start=0, limit=0)
content = json.loads(result, encoding="utf-8")
hits = int(content['hits'])
logger.info("Got %s hnsearch hits" % content['hits'])
if hits > 1000:
hits = 1000
logger.warn("Number of hits is over limit. Trimming to 1000")
for start in xrange(0, hits, limit):
#taskname = "poll-hnsearch-%s-%s" % (created_from, created_to)
params = {
'created_from' : created_from,
'created_to' : created_to,
'start' : start,
'limit' : limit,
}
task = taskqueue.Task(params=params, method="GET", url="/tasks/poll_hnsearch")
queue.add(task)
logging.info("Created task %s" % task.name)
except Exception, e:
logger.exception(e)
self.abort(500)
return Response('OK', status=200)
def get(self):
'''Poll HNSearch API for news comments
'''
queue = taskqueue.Queue(name='hnsearchapi')
api = HNSearchAPI()
limit=100
try:
#created_from = 'NOW-35MINUTES'
#created_to = 'NOW'
now_rfc = rfc3339.rfc3339(datetime.datetime.now(), utc=True)
created_from = "%s-31MINUTES" % now_rfc
#created_from = "%s-20MINUTES" % now_rfc
created_to = now_rfc
logging.info("Polling HNSearchAPI from %s to %s " % (created_from, created_to))
result = api.search(created_from=created_from, created_to=created_to, start=0, limit=0)
content = json.loads(result, encoding="utf-8")
hits = int(content['hits'])
logger.info("Got %s hnsearch hits" % content['hits'])
if hits > 1000:
hits = 1000
logger.warn("Number of hits is over limit. Trimming to 1000")
for start in xrange(0, hits, limit):
#taskname = "poll-hnsearch-%s-%s" % (created_from, created_to)
params = {
'created_from' : created_from,
'created_to' : created_to,
'start' : start,
'limit' : limit,
}
task = taskqueue.Task(params=params, method="GET", url="/tasks/poll_hnsearch")
queue.add(task)
logging.info("Created task %s" % task.name)
except Exception, e:
logger.exception(e)
self.abort(500)
return Response('OK', status=200)


class QueueAlchemyTasksJob(RequestHandler):
def get(self):
'''Fills a GAP task queue with items sentiment analysis
'''
queue = taskqueue.Queue(name='alchemyapi')
items = NewsItem.all(keys_only=True).filter("is_sentiment_processed", False).filter("is_sentiment_queued", False).order('-create_ts').fetch(limit=100)
for key in items:
keyname = key.name()
# queue sentiment analysis task
taskname = "sentimental-analysis-%s" % keyname
task = taskqueue.Task(params={'itemid':keyname}, name=taskname, method="GET", url="/tasks/poll_alchemyapi")
queue.add(task)
logger.info("Created task %s" % taskname)
# set item as queue
newsitem = NewsItem.get_by_key_name(keyname)
newsitem.is_sentiment_queued = True
newsitem.put()
return Response('OK', status=200)
def get(self):
'''Fills a GAP task queue with items sentiment analysis
'''
queue = taskqueue.Queue(name='alchemyapi')
items = NewsItem.all(keys_only=True).filter("is_sentiment_processed", False).filter("is_sentiment_queued", False).order('-create_ts').fetch(limit=100)
for key in items:
keyname = key.name()
# queue sentiment analysis task
taskname = "sentimental-analysis-%s" % keyname
task = taskqueue.Task(params={'itemid':keyname}, name=taskname, method="GET", url="/tasks/poll_alchemyapi")
queue.add(task)
logger.info("Created task %s" % taskname)
# set item as queue
newsitem = NewsItem.get_by_key_name(keyname)
newsitem.is_sentiment_queued = True
newsitem.put()
return Response('OK', status=200)
43 changes: 22 additions & 21 deletions app/mood/models.py
Expand Up @@ -21,24 +21,25 @@
"""
class NewsItem(db.Model):
#id: will use key of db model

text = db.TextProperty()
create_ts = db.DateTimeProperty()
type = db.StringProperty()
username = db.StringProperty()
parent_id = db.IntegerProperty() # parent_id - The parent item's id

#sentimental analysis
is_sentiment_processed = db.BooleanProperty(default=False)
is_sentiment_queued = db.BooleanProperty(default=False)
sentiment_type = db.StringProperty()
sentiment_score = db.FloatProperty()

#points = db.IntegerProperty()
#parent_id: will use parent of db model instance
#url = db.LinkProperty()
#domain = db.StringProperty()
#title = db.StringProperty()
#num_comments = db.IntegerProperty()
#created_on = db.DateTimeProperty()
itemid = db.IntegerProperty() # migrate
text = db.TextProperty()
create_ts = db.DateTimeProperty()
type = db.StringProperty()
username = db.StringProperty()
parent_id = db.IntegerProperty() # parent_id - The parent item's id

#sentimental analysis
is_sentiment_processed = db.BooleanProperty(default=False)
is_sentiment_queued = db.BooleanProperty(default=False) # migrate
sentiment_type = db.StringProperty()
sentiment_score = db.FloatProperty()
sentiment_status = db.StringProperty() # None | OK | ERROR ;migrate
sentiment_status_info = db.StringProperty() # migrate

#points = db.IntegerProperty()
#parent_id: will use parent of db model instance
#url = db.LinkProperty()
#domain = db.StringProperty()
#title = db.StringProperty()
#num_comments = db.IntegerProperty()
#created_on = db.DateTimeProperty()

0 comments on commit 496f8a9

Please sign in to comment.