## Imports

In [None]:
from opensearchpy import OpenSearch
from IPython.display import display, HTML
import json
import pandas as pd
import imgkit
import shutil
import os

## Setup

In [None]:
host = "localhost"
port = 9200
base_url = "http://{}:{}/".format(host, port)
ROOT_DIR = os.path.dirname(os.path.abspath("__file__"))
IMAGE_OUTPUT_DIR = os.path.join(ROOT_DIR, "out")
all_clicks_df = pd.read_csv(
    os.path.abspath(
        os.path.join(
            ROOT_DIR,
            "..",
            "datasets/train.csv",
        )
    ),
    parse_dates=["click_time", "query_time"],
)
query_gb = all_clicks_df.groupby("query")
client = OpenSearch(
    hosts=[{"host": host, "port": port}],
    http_compress=True,  # enables gzip compression for request bodies
    use_ssl=False,
)

## Prepare templates

In [None]:
base_query = {
    "bool": {
        "must": [],
        "should": [
            {
                "match": {
                    "name": {
                        "query": "{{user_query}}",
                        "fuzziness": "1",
                        "prefix_length": 2,
                        "boost": 0.01,
                    }
                }
            },
            {
                "match_phrase": {
                    "name.hyphens": {
                        "query": "{{user_query}}",
                        "slop": 1,
                        "boost": 50,
                    }
                }
            },
            {
                "multi_match": {
                    "query": "{{user_query}}",
                    "type": "phrase",
                    "slop": "6",
                    "minimum_should_match": "2<75%",
                    "fields": [
                        "name^10",
                        "name.hyphens^10",
                        "shortDescription^5",
                        "longDescription^5",
                        "department^0.5",
                        "sku",
                        "manufacturer",
                        "features",
                        "categoryPath",
                    ],
                }
            },
            {
                "terms": {
                    "sku": ['"{{#user_query_split}}","{{/user_query_split}}"'],
                    "boost": 50.0,
                }
            },
            {
                "match": {
                    "name.hyphens": {
                        "query": "{{user_query}}",
                        "operator": "OR",
                        "minimum_should_match": "2<75%",
                    }
                }
            },
        ],
        "minimum_should_match": 1,
    }
}
base_query_template = {
    "script": {
        "lang": "mustache",
        "source": {
            "size": "{{size}}",
            "query": base_query,
        },
    },
    "params": {
        "user_query": "",
        "user_query_split": [],
        "size": 10,
    },
}
hello_world_template = {
    "script": {
        "lang": "mustache",
        "source": {
            "size": "{{size}}",
            "query": {
                "function_score": {
                    "query": base_query,
                    "boost_mode": "multiply",
                    "score_mode": "sum",
                    "functions": [
                        {
                            "filter": {"exists": {"field": "salesRankShortTerm"}},
                            "gauss": {
                                "salesRankShortTerm": {"origin": "1.0", "scale": "100"}
                            },
                        },
                        {
                            "filter": {"exists": {"field": "salesRankMediumTerm"}},
                            "gauss": {
                                "salesRankMediumTerm": {
                                    "origin": "1.0",
                                    "scale": "1000",
                                }
                            },
                        },
                        {
                            "filter": {"exists": {"field": "salesRankLongTerm"}},
                            "gauss": {
                                "salesRankLongTerm": {"origin": "1.0", "scale": "1000"}
                            },
                        },
                        {"script_score": {"script": "0.0001"}},
                    ],
                }
            },
        },
    },
    "params": {"user_query": "", "user_query_split": [], "size": 10},
}
simple_ltr_template = {
    "script": {
        "lang": "mustache",
        "source": {
            "size": "{{size}}",
            "query": base_query,
            "rescore": {
                "window_size": "{{rescore_size}}",
                "query": {
                    "rescore_query": {
                        "sltr": {
                            "params": {"keywords": "{{user_query}}", "click_prior_query": "{{click_prior_query}}"},
                            "model": "ltr_model",
                            "store": "week1",
                        }
                    },
                    "score_mode": "total",
                    "query_weight": "{{main_query_weight}}",
                    "rescore_query_weight": "{{rescore_query_weight}}",
                },
            },
        },
    },
    "params": {
        "user_query": "",
        "user_query_split": [],
        "main_query_weight": 0,
        "rescore_query_weight": 2,
        "rescore_size": 500,
        "size": 10,
        "click_prior_query": ""
    },
}

## Register templates

In [None]:
client.put_script(id="base_query_template", body=base_query_template)
client.put_script(id="hello_world_template", body=hello_world_template)
client.put_script(id="simple_ltr_template", body=simple_ltr_template)

## Output

In [None]:
def create_prior_queries(doc_ids, doc_id_weights, query_times_seen):
    click_prior_query = ""
    if doc_ids is not None and doc_id_weights is not None:
        for idx, doc in enumerate(doc_ids):
            try:
                wgt = doc_id_weights[doc]
                seen = wgt / query_times_seen
                if seen > 0.001:
                    click_prior_query += "%s^%.3f  " % (doc, seen)
            except KeyError as ke:
                pass
    return click_prior_query


def render_comparision(user_query, render_to_img):
    prior_clicks_for_query = query_gb.get_group(user_query)
    prior_doc_ids = prior_clicks_for_query.sku.drop_duplicates()
    prior_doc_id_weights = prior_clicks_for_query.sku.value_counts()
    query_times_seen = prior_clicks_for_query.sku.count()
    click_prior_query = create_prior_queries(
        prior_doc_ids, prior_doc_id_weights, query_times_seen
    )
    base_query_response = client.search_template(
        body={
            "id": "base_query_template",
            "params": {
                "user_query": user_query,
                "user_query_split": user_query.split(),
                "size": size,
            },
        },
        index="bbuy_products",
    )
    hello_world_response = client.search_template(
        body={
            "id": "hello_world_template",
            "params": {
                "user_query": user_query,
                "user_query_split": user_query.split(),
                "size": size,
            },
        },
        index="bbuy_products",
    )
    ltr_response = client.search_template(
        body={
            "id": "simple_ltr_template",
            "params": {
                "user_query": user_query,
                "user_query_split": user_query.split(),
                "main_query_weight": main_query_weight,
                "rescore_query_weight": rescore_query_weight,
                "rescore_size": rescore_size,
                "size": size,
                "click_prior_query": click_prior_query,
            },
        },
        index="bbuy_products",
    )

    def hit_to_html(hit):
        source = hit["_source"]
        return f"""<div style="display: flex; align-items:center; max-width:100%; oveflow:hidden; padding: 10px;">
                        <img style="max-width: 200px; max-height: 80px; margin-right:10px;" src="{source["image"][0]}"/>
                        <span style="font-size: 16px; color: black;">{source["name"][0]}</span>
                    </div>"""

    base_query_html = "\n".join(
        map(lambda hit: hit_to_html(hit), base_query_response["hits"]["hits"])
    )
    hello_world_html = "\n".join(
        map(lambda hit: hit_to_html(hit), hello_world_response["hits"]["hits"])
    )
    ltr_html = "\n".join(
        map(lambda hit: hit_to_html(hit), ltr_response["hits"]["hits"])
    )

    html = f"""<head>
        <style>
        .container {{background: white;}}
        h4 {{font-size: 18px; color: black; background: white;}}
        h3 {{font-size: 20px; color: black; text-align:center; border-bottom:1px solid;}}
        section {{display: inline-block; width:33%; border-right:1px dashed;}}
        </style>
    </head>
    <body>
        <div class="container">
            <h4>Search results for: "{user_query}", rescore_size: {rescore_size}, main_query_weight: {main_query_weight}, rescore_query_weight: {rescore_query_weight}</h4>
            <div>
                <section>
                    <div><h3>Base query</h3>
                    {base_query_html}
                </section>
                <section>
                    <h3>Hello world</h3>
                    {hello_world_html}
                </section>
                <section>
                    <h3>LTR Results</h3>
                    {ltr_html}
                </section>
            </div>
        </div>
    </body>
    """
    if render_to_img:
        imgkit.from_string(
            html,
            f"{user_query}.jpg",
            options={
                "format": "jpeg",
            },
        )
        shutil.move(
            os.path.join(ROOT_DIR, f"{user_query}.jpg"),
            os.path.join(IMAGE_OUTPUT_DIR, f"{user_query}.jpg"),
        )
    else:
        display(HTML(html))


# config
size = 10
rescore_size = 500
main_query_weight = 0
rescore_query_weight = 2
test_queries = ["lcd tv", "Ipad", "Touchpad", "Beats"]

for user_query in test_queries:
    html = render_comparision(user_query=user_query, render_to_img=True)