From 31ac0159f910ae4e59f6aa0fe536fc2f3a82dc75 Mon Sep 17 00:00:00 2001
From: Matthew Esposito <matt@matthew.science>
Date: Fri, 24 Mar 2023 15:14:17 -0400
Subject: [PATCH] Implement keyword ranking

---
 backend/scopeBackend/keyword_ranker.py | 27 +++++++++++++++++++++++---
 1 file changed, 24 insertions(+), 3 deletions(-)

diff --git a/backend/scopeBackend/keyword_ranker.py b/backend/scopeBackend/keyword_ranker.py
index e3615d5..0770770 100644
--- a/backend/scopeBackend/keyword_ranker.py
+++ b/backend/scopeBackend/keyword_ranker.py
@@ -2,7 +2,10 @@
 # so that sources with higher primary & secondary keyword count
 # and matches are prioritized
 
-#takes in a primary keyword, array of secondary keywords, and the unranked queryset
+
+# takes in a primary keyword, array of secondary keywords, and the unranked
+# queryset. secondary_kws MUST be an array of strings. If it is a STRING, it
+# will be iterated through by CHARACTER, rapidly inflating its score.
 def rank(primary_kw, secondary_kws, queryset):
     # 1. Reorder sources based on how many keywords were matched
     #   - the primary keyword match is most important so reorder on that basis first
@@ -13,5 +16,23 @@ def rank(primary_kw, secondary_kws, queryset):
     #       of times the primary keyword appears in a given source
     #   - then for every source, store the counts for each secondary kw in a dictionary
     #       and reorder based on the cumulative counts
-    #   - return the final reordered set 
-    pass
\ No newline at end of file
+    #   - return the final reordered set
+    def prim(string):
+        return string.upper().count(primary_kw.upper())
+
+    def sec(string):
+        return sum([
+            string.upper().count(secondary.upper())
+            for secondary in secondary_kws
+        ])
+
+    sorted_list = [(query, prim(query), sec(query)) for query in queryset]
+    sorted_list.sort(key=lambda a: -a[2])
+    sorted_list.sort(key=lambda a: -a[1])
+    return sorted_list
+
+
+# docs = ["Ukraine war", "Syrian war war", "No more w*r", "Peace", "Coronavirus"]
+
+# search = rank("war", ["coronavirus", "peace"], docs)
+# print(search)