Skip to content

Commit

Permalink
Implement keyword ranking
Browse files Browse the repository at this point in the history
  • Loading branch information
sigaloid committed Mar 24, 2023
1 parent edd45ae commit 31ac015
Showing 1 changed file with 24 additions and 3 deletions.
27 changes: 24 additions & 3 deletions backend/scopeBackend/keyword_ranker.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,10 @@
# so that sources with higher primary & secondary keyword count
# and matches are prioritized

#takes in a primary keyword, array of secondary keywords, and the unranked queryset

# takes in a primary keyword, array of secondary keywords, and the unranked
# queryset. secondary_kws MUST be an array of strings. If it is a STRING, it
# will be iterated through by CHARACTER, rapidly inflating its score.
def rank(primary_kw, secondary_kws, queryset):
# 1. Reorder sources based on how many keywords were matched
# - the primary keyword match is most important so reorder on that basis first
Expand All @@ -13,5 +16,23 @@ def rank(primary_kw, secondary_kws, queryset):
# of times the primary keyword appears in a given source
# - then for every source, store the counts for each secondary kw in a dictionary
# and reorder based on the cumulative counts
# - return the final reordered set
pass
# - return the final reordered set
def prim(string):
return string.upper().count(primary_kw.upper())

def sec(string):
return sum([
string.upper().count(secondary.upper())
for secondary in secondary_kws
])

sorted_list = [(query, prim(query), sec(query)) for query in queryset]
sorted_list.sort(key=lambda a: -a[2])
sorted_list.sort(key=lambda a: -a[1])
return sorted_list


# docs = ["Ukraine war", "Syrian war war", "No more w*r", "Peace", "Coronavirus"]

# search = rank("war", ["coronavirus", "peace"], docs)
# print(search)

0 comments on commit 31ac015

Please sign in to comment.