Skip to content

Commit

Permalink
New Query functionality, bug fixes, cleaned up tests
Browse files Browse the repository at this point in the history
  • Loading branch information
bobheadxi committed Nov 11, 2017
1 parent 01492ac commit 7171aee
Show file tree
Hide file tree
Showing 6 changed files with 123 additions and 109 deletions.
61 changes: 40 additions & 21 deletions sleuth_backend/solr/query.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,18 @@
#import nltk
'''
Solr query assembling
'''

"""
Solr queries
"""
#import nltk

class Query(object):
"""
This object allows component-based building and manipulation of Solr query strings.
See class for available query manipulations.
Params:
query_str (str): the desired query
as_phrase (str): should this query be formatted as a phrase (default=True)
fields (dict): the Solr fields to apply this query to (default=None)
proximity (int): proximity for parts of the search phrase (default=None)
only works if as_phrase=True
query_str (str): the desired query
as_phrase (bool): should this query be formatted as a phrase (default=True)
escape (bool): should special characters be escaped from the phrase (default=False)
Example Usage:
my_query = Query(query_str)
Expand All @@ -23,20 +21,18 @@ class Query(object):
return str(my_query) # return query string
"""

def __init__(self, query_str, as_phrase=True, fields=None, proximity=None):
def __init__(self, query_str, as_phrase=True, escape=False):
"""
Initialize a query
"""
self.query_str = query_str
self._sanitize()

if escape:
self._escape_special_chars()

if as_phrase:
self._as_phrase(proximity)

if fields:
if type(fields) is not dict:
raise ValueError('Fields must be a dict of field names and boost factors')
self._for_fields(fields)
self._as_phrase()

def __str__(self):
"""
Expand Down Expand Up @@ -74,11 +70,26 @@ def for_single_field(self, field):
Apply given field to query
'''
self.query_str = '{}:{}'.format(field, self.query_str)

def _for_fields(self, fields):

def fuzz(self, factor):
'''
"Fuzzes" the query by a given factor >0 and <2.
Acts differently depending on whether the query is a phrase or not.
For phrases, this factor determines how far about the words of a
phrase can be found.
For terms, this factor determines how many insertions/deletions will
still return a match.
'''
if factor < 0 or factor > 2:
raise ValueError('Factor must be between 0 and 2.')
self.query_str = '{}~{}'.format(self.query_str, factor)

def for_fields(self, fields):
"""
Apply given fields to query
"""
if type(fields) is not dict:
raise ValueError('Fields must be a dict of field names and boost factors')
self._for_fields_helper(self.query_str, list(fields.items()))

def _for_fields_helper(self, query_str, fields):
Expand All @@ -92,14 +103,12 @@ def _for_fields_helper(self, query_str, fields):
self.select_or(query)
self._for_fields_helper(query_str, fields[1:])

def _as_phrase(self, proximity):
def _as_phrase(self):
"""
Format query as entire phrase, and optionally set proximity for
words within the phrase.
"""
self.query_str = '"{}"'.format(self.query_str)
if proximity:
self.query_str = '{}~{}'.format(self.query_str, proximity)

def _sanitize(self):
"""
Expand All @@ -108,3 +117,13 @@ def _sanitize(self):
# TODO: trim useless words like 'and', 'or', 'for'
# from query if as_phrase is false using NLTK POS tagger
self.query_str = ' '.join(self.query_str.split())

def _escape_special_chars(self):
'''
Escape special characters that interfere with Solr's query parser.
Ideally only use on queries where as_phrase=False, since special
characters in phrases do not upset Solr.
'''
special_chars = ['!', '(', ')', '{', '}', '[', ']', '^', '"', '~', '*', '?', ':', '|', '&']
for c in special_chars:
self.query_str = self.query_str.replace(c, '\\'+c)
28 changes: 18 additions & 10 deletions sleuth_backend/tests/test_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,35 +7,34 @@ class TestQuery(TestCase):
Test the Solr query object
"""

def test_basic_init(self):
def test_init(self):
"""
Test initializing a Query
as_phrase and not as_phrase
with and without proximity parameter
"""
query_str = "hello"
query = Query(query_str)
self.assertEqual('"hello"', str(query))

query = Query(query_str, proximity=5)
self.assertEqual('"hello"~5', str(query))

query = Query(query_str, as_phrase=False)
self.assertEqual('hello', str(query))

def test_init_fields(self):
query = Query('wow:wow()', escape=True)
self.assertEqual('"wow\:wow\(\)"', str(query))

def test_for_fields(self):
"""
Test initializing a Query with fields applied
Test applying fields to a Query
"""
query_str = "hello bruno"
fields = {'id':1, 'name':10}
query = Query(query_str, fields=fields)
query = Query("hello bruno")
query.for_fields(fields)
self.assertEqual(
'"hello bruno" OR id:("hello bruno")^1 OR name:("hello bruno")^10',
str(query)
)
not_dict = "not clean"
self.failUnlessRaises(ValueError, Query, query_str, fields=not_dict)
self.failUnlessRaises(ValueError, query.for_fields, not_dict)

def test_boost_importance(self):
"""
Expand Down Expand Up @@ -74,3 +73,12 @@ def test_for_single_field(self):
query = Query("hello bruno")
query.for_single_field('id')
self.assertEqual('id:"hello bruno"', str(query))

def test_fuzz(self):
'''
Test applying a fuzz factor to a query
'''
query = Query("hello bruno")
query.fuzz(2)
self.assertEqual('"hello bruno"~2', str(query))
self.failUnlessRaises(ValueError, query.fuzz, 7)
114 changes: 46 additions & 68 deletions sleuth_backend/tests/test_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ def get(self, param, default):
return self.params[param] if param in self.params else default

class MockRequest(object):

def __init__(self, method, get=None):
self.method = method
if get is not None:
Expand Down Expand Up @@ -47,16 +46,22 @@ def test_apis_without_get(self, mock_query):
self.assertEqual(result.status_code, 405)

@patch('sleuth_backend.solr.connection.SolrConnection.query')
def test_search_without_params(self, mock_query):
def test_apis_without_params(self, mock_query):
mock_query.return_value = {}
mock_request = MockRequest('GET', get=MockGet({}))
result = search(mock_request)
response_body = json.loads(result.content)
self.assertEqual(result.status_code, 400)
self.assertEqual(response_body['errorType'], 'INVALID_SEARCH_REQUEST')
result = getdocument(mock_request)
response_body = json.loads(result.content)
self.assertEqual(result.status_code, 400)
self.assertEqual(response_body['errorType'], 'INVALID_GETDOCUMENT_REQUEST')

@patch('sleuth_backend.solr.connection.SolrConnection.core_names')
@patch('sleuth_backend.solr.connection.SolrConnection.query')
def test_search_with_valid_request(self, mock_query):
def test_apis_with_valid_request(self, mock_query, mock_cores):
# search
mock_query.return_value = {
"type": "genericPage",
"response": {
Expand All @@ -83,13 +88,15 @@ def test_search_with_valid_request(self, mock_query):
mock_request = MockRequest('GET', get=MockGet(params))
result = search(mock_request)
self.assertEqual(result.status_code, 200)

mock_query['response']['docs']['id'] = mock_query['response']['docs']['id'][0]
mock_query['response']['docs']['description'] = mock_query['response']['docs']['description'][0]
mock_response = mock_query.return_value
mock_response['response']['docs'][0]['id'] = 'www.cool.com'
mock_response['response']['docs'][0]['updatedAt'] = ''
mock_response['response']['docs'][0]['name'] = ''
mock_response['response']['docs'][0]['description'] = 'Nice one dude'
self.assertEqual(
result.content.decode("utf-8"),
str({
'data':[mock_query.return_value],
'data':[mock_response],
'request':{
'query':'somequery','types':['genericPage'],
'return_fields':['id','updatedAt','name','description','content'],
Expand All @@ -98,49 +105,46 @@ def test_search_with_valid_request(self, mock_query):
})
)

@patch('sleuth_backend.solr.connection.SolrConnection.core_names')
@patch('sleuth_backend.solr.connection.SolrConnection.query')
def test_search_multicore(self, mock_query, mock_cores):
mock_query.return_value = {
"type": "courseItem",
"response": {
"numFound": 1,
"start": 0,
"docs": [
{
"id": ["www.cool.com"],
"description": ["Nice one dude"],
}
]
},
"highlighting": {
"www.cool.com": {
"content": ['Nice one dude']
}
}
}
# multicore search
mock_cores.return_value = ['courseItem', 'courseItem']
params = { 'q': 'somequery' }
mock_request = MockRequest('GET', get=MockGet(params))
result = search(mock_request)
self.assertEqual(result.status_code, 200)

mock_query['response']['docs']['id'] = mock_query['response']['docs']['id'][0]
mock_query['response']['docs']['description'] = mock_query['response']['docs']['description'][0]
self.assertEqual(
result.content.decode("utf-8"),
str({'data': [{'type': 'courseItem', 'response': {'numFound': 1, 'start': 0, 'docs': [{'id': 'www.cool.com', 'description': 'Nice one dude', 'updatedAt': '', 'name': '', 'content': ''}]}, 'highlighting': {'www.cool.com': {'content': ['Nice one dude']}}}, {'type': 'courseItem', 'response': {'numFound': 1, 'start': 0, 'docs': [
{'id': 'www.cool.com', 'description': 'Nice one dude', 'updatedAt': '', 'name': '', 'content': ''}]}, 'highlighting': {'www.cool.com': {'content': ['Nice one dude']}}}], 'request': {'query': 'somequery', 'types': ['courseItem', 'courseItem'], 'return_fields': ['id', 'updatedAt', 'name', 'description'], 'state': ''}})
)

# getdocument
params = {
'id': 'somequery',
'type': 'genericPage',
'return': 'content'
}
mock_request = MockRequest('GET', get=MockGet(params))
result = getdocument(mock_request)
self.assertEqual(result.status_code, 200)
self.assertEqual(
result.content.decode("utf-8"),
str({'data': {'type': 'genericPage', 'doc': {'id': 'www.cool.com', 'description': 'Nice one dude', 'updatedAt': '', 'name': '', 'content': ''}}, 'request': {
'query': 'somequery', 'types': ['genericPage'], 'return_fields': ['id', 'updatedAt', 'name', 'description', 'content'], 'state': ''}})
)

mock_query.return_value['response']['numFound'] = 0
mock_request = MockRequest('GET', get=MockGet(params))
result = getdocument(mock_request)
self.assertEqual(result.status_code, 200)
self.assertEqual(
result.content.decode("utf-8"),
str({
'data':[mock_query.return_value, mock_query.return_value],
'request':{
'query':'somequery','types':['courseItem','courseItem'],
'return_fields':['id','updatedAt','name','description'],
'state':''
}
'data': {'type': '', 'doc': {}}, 'request': {'query': 'somequery', 'types': ['genericPage'], 'return_fields': ['id', 'updatedAt', 'name', 'description', 'content'], 'state': ''}
})
)

@patch('sleuth_backend.solr.connection.SolrConnection.query')
def test_search_with_error_response(self, mock_query):
def test_apis_with_error_response(self, mock_query):
# Solr response error
mock_query.return_value = {
"error": {
Expand All @@ -160,6 +164,7 @@ def test_search_with_error_response(self, mock_query):
result = search(mock_request)
self.assertEqual(result.status_code, 400)
self.assertEqual(result.content.decode("utf-8"), expected_response)
mock_request = MockRequest('GET', get=MockGet({'id':'query', 'type': 'test'}))
result = getdocument(mock_request)
self.assertEqual(result.status_code, 400)
self.assertEqual(result.content.decode("utf-8"), expected_response)
Expand All @@ -169,6 +174,7 @@ def test_search_with_error_response(self, mock_query):
mock_request = MockRequest('GET', get=MockGet(params))
result = search(mock_request)
self.assertEqual(result.status_code, 400)
mock_request = MockRequest('GET', get=MockGet({'id':'query'}))
result = getdocument(mock_request)
self.assertEqual(result.status_code, 400)

Expand All @@ -177,6 +183,7 @@ def test_search_with_error_response(self, mock_query):
mock_request = MockRequest('GET', get=MockGet(params))
result = search(mock_request)
self.assertEqual(result.status_code, 500)
mock_request = MockRequest('GET', get=MockGet({'id':'query'}))
result = getdocument(mock_request)
self.assertEqual(result.status_code, 500)

Expand All @@ -185,35 +192,6 @@ def test_search_with_error_response(self, mock_query):
mock_request = MockRequest('GET', get=MockGet(params))
result = search(mock_request)
self.assertEqual(result.status_code, 500)
mock_request = MockRequest('GET', get=MockGet({'id':'query'}))
result = getdocument(mock_request)
self.assertEqual(result.status_code, 500)

@patch('sleuth_backend.solr.connection.SolrConnection.query')
def test_getdocument_with_valid_request(self, mock_query):
mock_query.return_value = {
"type": "genericPage",
"response": {
"numFound": 1,
"start": 0,
"docs": [{"id": ["www.cool.com"],"description": ["Nice one dude"],}]
},
"highlighting": {
"www.cool.com": { "content": ['Nice one dude'] }
}
}
params = {
'q': 'somequery',
'type': 'genericPage',
'return': 'content'
}
mock_request = MockRequest('GET', get=MockGet(params))
result = getdocument(mock_request)
self.assertEqual(result.status_code, 200)

self.assertEqual(
result.content.decode("utf-8"),
str({
'data': {'type': 'genericPage', 'doc': {'id': 'www.cool.com', 'description': 'Nice one dude'}},
'request': {'query': '', 'types': ['genericPage'], 'return_fields': ['id', 'updatedAt', 'name', 'description', 'content'], 'state': ''}
})
)
4 changes: 3 additions & 1 deletion sleuth_backend/views/error.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,10 @@ class ErrorTypes(Enum):
SOLR_CONNECTION_ERROR = 1
# Occurs when Solr returns an error response to a search query.
SOLR_SEARCH_ERROR = 2
# Occurs when a search term or core name is missing from a search request.
# Occurs when a search term is missing from a search request.
INVALID_SEARCH_REQUEST = 3
# Occurs when a id is missing from a getdocument request
INVALID_GETDOCUMENT_REQUEST = 4

class SleuthError(Exception):
'''
Expand Down
4 changes: 1 addition & 3 deletions sleuth_backend/views/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,8 +120,6 @@ def search(request):
query_response['type'] = core_to_search
for doc in query_response['response']['docs']:
flatten_doc(doc, return_fields)
if 'description' not in doc:
doc['description'] = ''

responses['data'].append(query_response)

Expand Down Expand Up @@ -165,7 +163,7 @@ def getdocument(request):

kwargs = { 'return_fields': return_fields }

if id is '':
if doc_id is '':
sleuth_error = SleuthError(ErrorTypes.INVALID_GETDOCUMENT_REQUEST)
return HttpResponse(sleuth_error.json(), status=400)

Expand Down
Loading

0 comments on commit 7171aee

Please sign in to comment.