Skip to content

Commit

Permalink
added option to do phrase search or word search
Browse files Browse the repository at this point in the history
  • Loading branch information
verbalhanglider committed May 29, 2018
1 parent 3deffb6 commit b6009eb
Show file tree
Hide file tree
Showing 3 changed files with 49 additions and 39 deletions.
4 changes: 2 additions & 2 deletions marcextraction/interfaces.py
Expand Up @@ -60,7 +60,7 @@ def _set_query_creator(self, flag):
else:
raise ValueError("invalid index type '{}' for query creation".format(flag))

def search(self, query_term, field, subfields, rows=1000):
def search(self, query_term, field, subfields, rows=1000, phrase_search=False):
"""a method to run a search on the index for a particular value in a particular field
Args:
Expand All @@ -76,7 +76,7 @@ def search(self, query_term, field, subfields, rows=1000):
for subfield in subfields:
initial_string = field + subfield
field = self.field_creator(initial_string)
query_chain.append(self.query_creator(field, query_term))
query_chain.append(self.query_creator(field, query_term, phrase_term=phrase_search))
if query_chain:
result = self.solr_index.search(q=' '.join(query_chain), fl='controlfield_001', rows=rows)
else:
Expand Down
75 changes: 39 additions & 36 deletions marcextraction/utils.py
@@ -1,36 +1,39 @@
"""utility functions for working with ole index data
"""

def create_ole_index_field(field_name):
"""a method to return the marc field name as entered in the OLE index
Args:
field_name (str): a MARC field number with a subfield code as a single string. Ex '245a'
"""
return "mdf_{}".format(field_name)

def create_ole_query(field_name, query_term):
"""a method to return the query string for searching for making a field query in OLE
Args:
field_name (str): a MARC field combined with a subfield code with prefix 'mdf_. Ex. 'mdf_245a'.
query_term (str): a word or phrase
Returns:
str. A full query string to be entered into a Solr index for searching on a particular field. Ex. 'mdf_245a:banana'
"""
return "{}:\"{}\"".format(field_name, query_term)

def find_ole_bib_numbers(ole_data_list):
"""a method to find bib numbers from a set of OLE results
Args:
ole_data_list (list): a list of dictionaries containing output from a Solr search of an OLE index.
Returns:
list. an iterable containing strings that should represent bib numbers. Ex. ['1000435999', '10045334500']
"""
output = []
for n_thing in ole_data_list:
output += n_thing.get("controlfield")
return output
"""utility functions for working with ole index data
"""

def create_ole_index_field(field_name):
"""a method to return the marc field name as entered in the OLE index
Args:
field_name (str): a MARC field number with a subfield code as a single string. Ex '245a'
"""
return "mdf_{}".format(field_name)

def create_ole_query(field_name, query_term, phrase_term=False):
"""a method to return the query string for searching for making a field query in OLE
Args:
field_name (str): a MARC field combined with a subfield code with prefix 'mdf_. Ex. 'mdf_245a'.
query_term (str): a word or phrase
Returns:
str. A full query string to be entered into a Solr index for searching on a particular field. Ex. 'mdf_245a:banana'
"""
if phrase_term:
return "{}:\"{}\"".format(field_name, query_term)
else:
return "{}:{}".format(field_name, query_term)

def find_ole_bib_numbers(ole_data_list):
"""a method to find bib numbers from a set of OLE results
Args:
ole_data_list (list): a list of dictionaries containing output from a Solr search of an OLE index.
Returns:
list. an iterable containing strings that should represent bib numbers. Ex. ['1000435999', '10045334500']
"""
output = []
for n_thing in ole_data_list:
output += n_thing.get("controlfield")
return output
9 changes: 8 additions & 1 deletion tests/test_spec.py
Expand Up @@ -110,7 +110,14 @@ def testSearchingVuFindWithTargetedField(self):
searcher = SolrIndexSearcher(
SOLR_INDEX, 'ole')
results = searcher.search('Banana', field='245', subfields=['a'])
self.assertEqual(len(results), 190)
self.assertEqual(len(results), 188)

def testSearchingVuFindWithPhraseSearch(self):
searcher = SolrIndexSearcher(
SOLR_INDEX, 'ole')
results = searcher.search('Social scientist', field='245', subfields=['a'], phrase_search=True)
self.assertEqual(len(results), 120)


def testSearchingOleIndex(self):
url_object = urlparse(OLE_INDEX)
Expand Down

0 comments on commit b6009eb

Please sign in to comment.