In [None]:
import os
import json
import asyncio
import pandas as pd
from model_utils.model_logger import ModelLogger
from model_utils.vss_utils import get_vss_client
from azure.cosmos import CosmosClient
from azure.core.credentials import AzureKeyCredential
from azure.search.documents import SearchClient
import utils_patent as utils


COSMOS_DB_URL = "https://rcdata-ussc-t101-dev-cvx.documents.azure.com:443/" #ADD here the cosmosDB endpooint
COSMOS_DATABASE = "lens"
COSMOS_COLLECTION = "patent_metadata"
PRIMARY_KEY = os.environ["cosmosdb_len_secret"]

SEARCH_ENDPOINT = "https://rcdata-dev-cvx.search.windows.net/" #ADD here the cog search endpooint
index_name = "cosmosdb-index-lens"
SEARCH_API_KEY = os.environ["cog_search_secret"]


def init():
    global cosmos_database,\
           cosmos_container,\
           cog_search_client_metadata, \
           logger

    logger = ModelLogger()

    cosmos_container = None
    cog_search_client_metadata = None

    logger.info("setting up cosmos_container")
    
    # initialize cosmos client
    cosmos_key = PRIMARY_KEY
    cosmos_client = CosmosClient(COSMOS_DB_URL, credential=cosmos_key)
    cosmos_database = cosmos_client.get_database_client(COSMOS_DATABASE)
    cosmos_container = cosmos_database.get_container_client(COSMOS_COLLECTION)

    logger.info("cosmos_container created")
    # Get the service endpoint and API key from the environment

    # Create a client
    key = SEARCH_API_KEY
    credential = AzureKeyCredential(key)

    logger.info("setting up cog_search_client_metadata")

    cog_search_client_metadata = SearchClient(endpoint=SEARCH_ENDPOINT,
                        index_name=index_name,
                        credential=credential)
    
    logger.info("cog_search_client_metadata created")
    logger.info("User init() finished for PatentSearch.")

    return True



def run(request_payload, request_headers=None):
    
    # Create objects
    cosmos_reader = utils.CosmosReader(cosmos_container)
    urlrunner = utils.urlRunner()


    #Parse Input
    data = json.loads(request_payload)[0]

    topN = 15
    user_query = data["modelInput"]["query"]
    request_id = data["id"]
    lens_id = user_query.replace("lens_id:", "").replace(" ","").split(",") if user_query.startswith("lens_id") else [""]


    # Get Patent Seeds
    lens_ids = []
    if (lens_id[0] == ""):
        results = cog_search_client_metadata.search(search_text=user_query, top=topN)
        for result in results:
            lens_ids.append(result["lens_id"])
            # print(f"lens_id:{result['lens_id']} pk:{result['lens_id'][0:13].replace('-','')}")
    else:
        lens_ids = lens_id


    # Recover Patent Info
    cosmos_reader.items = lens_ids
    asyncio.get_event_loop().run_until_complete(cosmos_reader.get_items())

    cosmos_output = cosmos_reader.result
    results = []

    for item in cosmos_output:
        results.append({"id":item["id"],
                        "Date of Patent":item["date"],
                        "Title":utils.get_title(item),
                        "Inventors": utils.get_inventors(item),
                        "Patent No." : utils.get_doc_number(item),
                        "fileUrl" : ""})
        urlrunner.items.append(item)

    print("Read cosmos items")

    asyncio.get_event_loop().run_until_complete(urlrunner.get_url_async())
    for patent, url in zip(results, urlrunner.results):
        patent["fileUrl"] = url

    print("Got URL")
    
    # return output
    if len(results) > 0:
        result = pd.DataFrame(results)
        results_sorted= result.sort_values(by=["Date of Patent"], ascending=True)
        df_merged_title_formatted = results_sorted.rename(columns={"id":"Lens Id"})
        return [
            {"id": request_id,
            "modelOutput": [item for item in df_merged_title_formatted.to_dict('records')] },
        ]
    else:
        logger.info("some error calling vss. Probably VSS return id that are not in cosmosdb")
        return [
            {"id": request_id,
            "modelOutput": [{"Date of Patent":None, "Title":"some error happened", "Inventors":None, "Patent No.":None, "fileUrl":None}]
            },
        ]



"""
if __name__ == "__main__":
    init()
    payload = {}
    payload["id"] = "00"
    payload["modelInput"] = {"query" :"carbon sequestration"}

    print(payload)
    r = run(json.dumps([payload]))
    for item in r[0]["modelOutput"]:
        print(item)
    print("done")
"""


