In [15]:
import logging
import sys

import requests
from requests import RequestException, HTTPError
import pandas as pd

In [16]:
pd.options.display.max_colwidth = None

In [17]:
logger = logging.getLogger()

In [18]:
MATCH_FAQS_API_URL = "http://localhost:8088/match_faqs"
RUN_SCRAPERS_API_URL = "http://localhost:8088/run_scrapers?update_index=true&return_faqs=false"

In [None]:
# run scrapers (is not needed every time)
response = requests.get(RUN_SCRAPERS_API_URL)
response.json()['scraper_status']

In [None]:
TEST_SEARCHES = [
    {"search_string": "Was ist Corona?", "nationwide_only": True},
    {"search_string": "Kann man sich durch Gegenstände anstecken?", "nationwide_only": True},
    {"search_string": "Darf ich in Berlin noch rausgehen?", "location_string": "Berlin"},
    {"search_string": "Wo kann ich mich in Berlin testen lassen?", "location_string": "Berlin"},
    {"search_string": "Ist draußen Alkohol trinken in Berlin erlaubt?", "location_string": "Berlin"},
    {"search_string": "Gibt es in Berlin eine Sperrstunde?", "location_string": "Berlin"},
    {"search_string": "Darf ich in Greifswald noch rausgehen?", "location_string": "Greifswald"},
    {"search_string": "Wo kann ich mich in Greifswald testen lassen?", "location_string": "Greifswald"},
    {"search_string": "Ist draußen Alkohol trinken in Greifswald erlaubt?", "location_string": "Greifswald"},
    {"search_string": "Gibt es in Greifswald eine Sperrstunde?", "location_string": "Greifswald"}
]

In [None]:
SEARCH_CONFIGS = [
    {"search_mode": "lexical_search"},
    {"search_mode": "semantic_search", "model_name": "distiluse-base-multi"},
    {"search_mode": "semantic_search", "model_name": "quora-distilbert-multi"},
    {"search_mode": "semantic_search", "model_name": "paraphrase-xlm-r-multi"},
    {"search_mode": "lexical_search_semantic_rerank", "model_name": "distiluse-base-multi", \
    "rerank_weights": {"query_weight": 0.5, "rescore_query_weight": 0.5}},
    {"search_mode": "lexical_search_semantic_rerank", "model_name": "distiluse-base-multi", \
    "rerank_weights": {"query_weight": 0.8, "rescore_query_weight": 0.2}},
    {"search_mode": "lexical_search_semantic_rerank", "model_name": "distiluse-base-multi", \
    "rerank_weights": {"query_weight": 0.2, "rescore_query_weight": 0.8}},
    {"search_mode": "lexical_search_semantic_rerank", "model_name": "quora-distilbert-multi", \
    "rerank_weights": {"query_weight": 0.5, "rescore_query_weight": 0.5}},
]

In [None]:
eval_data = {}

for test_search in TEST_SEARCHES:
    if test_search.get('location_string'):
        search_name = f"{test_search['search_string']} ({test_search['location_string']})"
    else:
        search_name = f"{test_search['search_string']} (nationwide)"
    eval_data[search_name] = []

    for search_config in SEARCH_CONFIGS:
        search_params = test_search.copy()
        search_params.update(search_config)        
        execute_search(search_params, search_name, eval_data)

df_eval = pd.DataFrame.from_dict(eval_data, orient='index')
df_eval.columns = [str(search_config) for search_config in SEARCH_CONFIGS]    
display(df_eval)