### This notebook covers the expected workflow of the search summarization


In [1]:
import pandas as pd
import numpy as np
from snowflake import connector as sf
import os
import requests
import numpy as np
import pandas as pd
from pathlib import Path
from collections import Counter
from datetime import datetime, date, timedelta
from dateutil import parser
import os
import re
import time

import openai
from dotenv import find_dotenv, load_dotenv

In [2]:
#CONSTANTS
SEARCH_API_RESPONSE_FOLDER = '../result_output'

In [3]:
#result extraction functions
def get_prod_universal_search_result(
    query,
    limit=10,
    offset=0,
    category="TOP",
    sortBy="SCORE",
    returnFields="ALL",
    x_moneylion_user_id="60b64a474ccc2754df20e65f",
    enrich_response={},
):
    """
    Get Universal Search result from MoneyLion API

    returnFields: ALL, OFFERS, VIDEOS
    category: TOP, OFFERS, VIDEOS
    sortBy: SCORE, RELEVANCE, NEWEST, OLDEST

    returns json response and dataframe
    """
    API_URL = "https://universal-search-api.moneylion.com/search/universal-search"

    params = {
        "limit": limit,
        "offset": offset,
        "query": query,
        "category": category,
        "sortBy": sortBy,
        "returnFields": returnFields
    }

    headers = {
        "x-moneylion-user-id": x_moneylion_user_id,
        "accept": "application/json"
    }

    response = requests.get(API_URL, params=params, headers=headers).json()

    # enrich response
    enrich_param_keys = {"query", "category", "sortBy"}
    enrich_response_params = {k: params[k] for k in params.keys() & enrich_param_keys}
    enrich_response = enrich_response_params | enrich_response
    enrich_response["query_category"] = enrich_response.pop("category", "")

    response = response | enrich_response

    _description = response.get("description")
    # print(f"{_description} - {query}")

    return response

def parse_json_response(response, add_result_columns=[]):
    common_columns = [
        "productId",
        "displayTitle",
        "description",
        "category",
        "createdAt",
        "keywords",
        "queryTerms",
    ]

    _query = response.get("query")

    # if there is no offers, return _df_offers with empty columns
    if "OFFERS" in response.get("data", {}):
        _df_offers = (
            pd.json_normalize(response.get("data", {}).get("OFFERS", []))
            .loc[:, common_columns]
            .assign(data="OFFERS")
            .reset_index()
        )
    else:
        _df_offers = pd.DataFrame(columns=common_columns)

    if "VIDEOS" in response.get("data", {}):
        _df_videos = (
            pd.json_normalize(response.get("data", {}).get("VIDEOS", []))
            .loc[:, common_columns]
            .assign(data="VIDEOS")
            .reset_index()
        )
    else:
        _df_videos = pd.DataFrame(columns=common_columns)
        
    _set_enrich_column = {
        "limit",
        "offset",
        "query",
        "query_category",
        "sortBy",
    }
    _set_enrich_column.update(add_result_columns)
    _dct_enrich = {k: response.get(k) for k in response.keys() & _set_enrich_column}

    _df_query = pd.DataFrame.from_records(_dct_enrich, index=[0])

    _df_result = (
        pd.concat([_df_offers, _df_videos], ignore_index=True)
        .rename({"index": "position"}, axis=1)
        .merge(_df_query, how="cross")
    )

    # print(f"Parse {_query}")
    _df_result.head()
    return _df_result



def get_universal_search_result(
    query, 
    fetch_result_limit=10, 
    output_folder_path=SEARCH_API_RESPONSE_FOLDER
):
    """Generate DataFrame with Universal Search result for each query
    Takes in query in a list

    Parameters
    ----------
    query_list : str
        List of query strings, to pass to Universal Search

    fetch_result_limit : int, optional
        Number of result from each category (offers, videos) to be fetched
        default value = 10
        maximum value = 100

    Returns
    -------
    df_result : pd.DataFrame
        DataFrame parsed from Universal Search response
        Contains query, search results and result details
    """
    df_result = pd.DataFrame()

    if len(str(query)) <= 50:
        file_name = query.lower().replace(" ", "_")
        file_path = os.path.join(output_folder_path, f"{file_name}.json")

        response = get_prod_universal_search_result(query, limit=fetch_result_limit)

        if response.get("status") == "PASSED":
            _df_result_temp = parse_json_response(response)
            # check _df_result_temp is not empty
            if not _df_result_temp.empty:
                df_result = pd.concat(
                    [df_result, _df_result_temp], axis=0, ignore_index=True
                )
            else :
                print(f"Empty result - {query}")

    else:
        print(f"Skip - {query}")

    return df_result


In [5]:
#snowflake connector

def get_sf_connection(email, username):
    """
    Connect to Snowflake using Snowflake Connector
    """
    connection = sf.connect(
        user= f'{email}',
        account='moneylion.us-east-1',
        authenticator='externalbrowser',
        database='ANALYTICS_DB',
        warehouse='PROD_ANALYST_READ_WH',
        role=f'PROD_A_USR_{username.upper()}_WRITER' 
    ) 
    
    connection.autocommit = True
    
    return connection


In [6]:
connection_sf = get_sf_connection("xlim@moneylion.com", "XLIM")

Initiating login request with your identity provider. A browser window should have opened for you to complete the login. If you can't see it, check existing browser windows, or your OS settings. Press CTRL+C to abort and try again...
Going to open: https://sso.jumpcloud.com/saml2/snowflakeai?SAMLRequest=lZLdctowEIVfxaNe25IhZUCDyRBoJk75KzhpmjvFFkRBllythMnbV5i4k14kM73TSOfsfquzw8tjKYMDNyC0SlAcERRwletCqF2C7rLrsI8CsEwVTGrFE%2FTKAV2OhsBKWdGxs89qzX87DjbwhRTQ5iFBziiqGQigipUcqM3pZjyf0U5EaGW01bmW6J3lcwcD4MZ6wtZSgPB4z9ZWFOO6rqO6G2mzwx1CCCYD7FUnyZdWf%2FQzfaCPMbk46b3Cy1dvbFdCnb%2FgM6ynswjoTZatwtVyk6Fg3KJOtAJXcrPh5iByfreenQHAE8yXi2%2B%2FZulyETkIOQMbxhEoXW8l2%2FNcl5Wzvm7kT3jLCyz1TvjR02mCqr0obvJ5Pdk%2FHK5M32YXx4f779OyJ3s%2F5M%2Fykd3CfnO7ntinl8F00c9RcN9m2zllmwI4nqpTotZfkU43jEkYDzISU9KnX0nU7fceUTD1iQrFbONssQF09OLKKpfaFQ3dKboO%2FovOBDqvBm36mNH%2FDzzE7%2F1ve7bwX59OV1qK%2FDW41qZk9uNk4ihubkQRbhsp5SUTclwUhgP4hKTU9cRwZv06W%2BM4wqNz138XevQH&RelayState=50933 to authenticate...


In [None]:
# video extraction functions
def extract_video_metadata_query(video_id):
    """
    Extract video metadata from Snowflake
    """
    VIDEO_METADATA_QUERY = """
        select distinct
            _id,
            key,
            video_spec__asset_id,
            generated_title,
            transcript,
            unique_detected_texts,
            ranked_keywords
        from
            analytics_db.todaytab.todaytabcontentspec
        left join analytics_db.todayfeed.video_content_metadata on(
            video_spec__asset_id = asset_id
        )
        where _id = '{video_id}'
    """
    return VIDEO_METADATA_QUERY.format(video_id=video_id)

def extract_video_metadata (df_result):
    """
    Extract video metadata from Snowflake and merge with df_result
    """

    df_result = df_result.drop_duplicates(subset=['position','productId', 'query', 'data'])
    videos_df = df_result[df_result['category'] == 'VIDEOS']
    videos_df['productId'] = videos_df['productId'].str.split(' ').str[0]
    videos_df = videos_df.drop_duplicates(subset=['productId'])
    video_metadata_df = pd.DataFrame()
    for video_id in videos_df['productId']:
        video_query = extract_video_metadata_query(video_id)
        video_metadata_df_1 = pd.read_sql_query(video_query, connection_sf)
        video_metadata_df = pd.concat([video_metadata_df, video_metadata_df_1])
    df_result_enriched = pd.merge(df_result, video_metadata_df, left_on='productId', right_on='_ID', how='left')
    return df_result_enriched

In [181]:
# GENAI_SUMMARY_PROMPT_OVERALL = """
# Context:
# - MoneyLion is a financial technology company that offers a range of financial products and services aimed at helping individuals manage their personal finances more effectively
# - MoneyLion operates as consumer mobile app that provides digital financial services and lifestyle financial contents
# - MoneyLion is the “Go-To Money Destination” for American consumers, staying relevant to consumer's financial life at all stages 
# - MoneyLion's mission is to improve financial inclusivity for underserved Americans
# - MoneyLion's main product are: Instacash, RoarMoney, Credit Builder Loan, Investment Account, Reward, Financial Content, Crypto, Credit Monitoring, Peer to Peer Payments, Round Ups, AI Powered PFM Insights
# - Instacash is a short-term cash advance product, designed to cover immediate financial needs, and repaid automatically on next payday
# - RoarMoney is mobile banking and cash management account
# - Credit Builder Loans is designed to help users to build and improve their credit scores over time
# - Majority of consumers join MoneyLion to get either Instacash, RoarMoney or Credit Builder Loans, with a large majority signing up for Instacash
# - As a financial platform, MoneyLion offers 3rd party financial products, ie: various types of financial product, insurance and loans in the market
# - 3rd party financial product includes: Auto Insurance, Life Insurance, Home Insurance, Travel Insurance, High Yield Savings Account, Student Loans, Personal Loans, Auto Loans, Home Loan, Student Loan Refinance, Credit Card offers, Certificate of Deposit, Bill Payments, etc.
# - MoneyLion users have the following profile: underserved individuals, those in need of short-term funds, credit builders, digitally-oriented users, budget-conscious individuals, financially educated consumers, investment explorers, rewards seekers, those actively comparing financial products.
# In the MoneyLion Search Engine, we categorized our results into 2 categories:
# 1. OFFERS - MoneyLion offers that are relevant to your search query
# 2. VIDEOS - Financial Videos that are relevant to your search query
# The following is the search query and a list of results that surfaces from the search query on the MoneyLion domain. 
# SEARCH QUERY: {search_query}
# RESULTS:
#     LIST OF TOP 5 OFFERS:
#     {offer_list}
#     LIST OF TOP 5 VIDEOS AND THEIR TRANSCRIPTS:
#     {video_list}
# INSTRUCTIONS:
# Provide a one-liner to summarize the keypoints of the results based on the intention of the search query.
# The description should be within 50 words, and should be in a form of a sentence.
# """

In [192]:
# GENAI_SUMMARY_PROMPT_VIDEOS = """
# In the MoneyLion Search Engine, we categorized our results into 2 categories:
# 1. OFFERS - MoneyLion offers that are relevant to your search query
# 2. VIDEOS - Financial Videos that are relevant to your search query
# The following is the search query and a list of results that surfaces from the search query on the MoneyLion domain. 
# SEARCH QUERY: {search_query}
# RESULTS:
#     LIST OF TOP 5 VIDEOS AND THEIR TRANSCRIPTS:
#     {video_list}
# INSTRUCTIONS:
# Provide a one-liner that summarizes the context of the videos based on their transcripts.
# The description should be within 50 words, and should be in a form of a sentence.
# """

In [198]:
# GENAI_SUMMARY_PROMPT_ACTION = """
# Context:
# - MoneyLion is a financial technology company that offers a range of financial products and services aimed at helping individuals manage their personal finances more effectively
# - MoneyLion operates as consumer mobile app that provides digital financial services and lifestyle financial contents
# - MoneyLion is the “Go-To Money Destination” for American consumers, staying relevant to consumer's financial life at all stages 
# - MoneyLion's mission is to improve financial inclusivity for underserved Americans
# - MoneyLion's main product are: Instacash, RoarMoney, Credit Builder Loan, Investment Account, Reward, Financial Content, Crypto, Credit Monitoring, Peer to Peer Payments, Round Ups, AI Powered PFM Insights
# - Instacash is a short-term cash advance product, designed to cover immediate financial needs, and repaid automatically on next payday
# - RoarMoney is mobile banking and cash management account
# - Credit Builder Loans is designed to help users to build and improve their credit scores over time
# - Majority of consumers join MoneyLion to get either Instacash, RoarMoney or Credit Builder Loans, with a large majority signing up for Instacash
# - As a financial platform, MoneyLion offers 3rd party financial products, ie: various types of financial product, insurance and loans in the market
# - 3rd party financial product includes: Auto Insurance, Life Insurance, Home Insurance, Travel Insurance, High Yield Savings Account, Student Loans, Personal Loans, Auto Loans, Home Loan, Student Loan Refinance, Credit Card offers, Certificate of Deposit, Bill Payments, etc.
# - MoneyLion users have the following profile: underserved individuals, those in need of short-term funds, credit builders, digitally-oriented users, budget-conscious individuals, financially educated consumers, investment explorers, rewards seekers, those actively comparing financial products.
# In the MoneyLion Search Engine, we categorized our results into 2 categories:
# 1. OFFERS - MoneyLion offers that are relevant to your search query
# 2. VIDEOS - Financial Videos that are relevant to your search query
# The following is a list of results that surfaces from the search query on the MoneyLion domain. 
# SEARCH QUERY: {search_query}
# RESULTS:
#     LIST OF TOP 5 OFFERS:
#     {offer_list}
#     LIST OF TOP 5 VIDEOS AND THEIR TRANSCRIPTS:
#     {video_list}
# INSTRUCTIONS:
# Provide a one-liner that describes the benefits, features or actions based on the search results.
# The description should be within 50 words, and should be in a form of a sentence.
# """

In [236]:
GENAI_SUMMARY_PROMPT_VIDEO = """
Context:
- MoneyLion is a financial technology company that offers a range of financial products and services aimed at helping individuals manage their personal finances more effectively
- MoneyLion operates as consumer mobile app that provides digital financial services and lifestyle financial contents
- MoneyLion is the “Go-To Money Destination” for American consumers, staying relevant to consumer's financial life at all stages 
- MoneyLion's mission is to improve financial inclusivity for underserved Americans
- MoneyLion's main product are: Instacash, RoarMoney, Credit Builder Loan, Investment Account, Reward, Financial Content, Crypto, Credit Monitoring, Peer to Peer Payments, Round Ups, AI Powered PFM Insights
- Instacash is a short-term cash advance product, designed to cover immediate financial needs, and repaid automatically on next payday
- RoarMoney is mobile banking and cash management account
- Credit Builder Loans is designed to help users to build and improve their credit scores over time
- Majority of consumers join MoneyLion to get either Instacash, RoarMoney or Credit Builder Loans, with a large majority signing up for Instacash
- As a financial platform, MoneyLion offers 3rd party financial products, ie: various types of financial product, insurance and loans in the market
- 3rd party financial product includes: Auto Insurance, Life Insurance, Home Insurance, Travel Insurance, High Yield Savings Account, Student Loans, Personal Loans, Auto Loans, Home Loan, Student Loan Refinance, Credit Card offers, Certificate of Deposit, Bill Payments, etc.
- MoneyLion users have the following profile: underserved individuals, those in need of short-term funds, credit builders, digitally-oriented users, budget-conscious individuals, financially educated consumers, investment explorers, rewards seekers, those actively comparing financial products.
In the MoneyLion Search Engine, we categorized our results into 2 categories:
1. OFFERS - MoneyLion offers that are relevant to your search query
2. VIDEOS - Financial Videos that are relevant to your search query
The following is a list of results that surfaces from the search query on the MoneyLion domain. 
SEARCH QUERY: {search_query}
RESULTS:
    LIST OF TOP 5 VIDEOS AND THEIR TRANSCRIPTS:
    {video_list}
INSTRUCTIONS:
Choose one video and provide a one-liner that describes the key takeaway from the video in a casual and funny tone.
The description should be within 50 words, and should be in a form of a sentence.

SAMPLE:
Did you know you should use your credit card every month? Start building your credit with a credit card today!
Source video : How to Build Credit with a Credit Card
"""

In [197]:
EVALUATION_PROMPT = """
Context:
- MoneyLion is a financial technology company that offers a range of financial products and services aimed at helping individuals manage their personal finances more effectively
- MoneyLion operates as consumer mobile app that provides digital financial services and lifestyle financial contents
- MoneyLion is the “Go-To Money Destination” for American consumers, staying relevant to consumer's financial life at all stages 
- MoneyLion's mission is to improve financial inclusivity for underserved Americans
- MoneyLion's main product are: Instacash, RoarMoney, Credit Builder Loan, Investment Account, Reward, Financial Content, Crypto, Credit Monitoring, Peer to Peer Payments, Round Ups, AI Powered PFM Insights
- Instacash is a short-term cash advance product, designed to cover immediate financial needs, and repaid automatically on next payday
- RoarMoney is mobile banking and cash management account
- Credit Builder Loans is designed to help users to build and improve their credit scores over time
- Majority of consumers join MoneyLion to get either Instacash, RoarMoney or Credit Builder Loans, with a large majority signing up for Instacash
- As a financial platform, MoneyLion offers 3rd party financial products, ie: various types of financial product, insurance and loans in the market
- 3rd party financial product includes: Auto Insurance, Life Insurance, Home Insurance, Travel Insurance, High Yield Savings Account, Student Loans, Personal Loans, Auto Loans, Home Loan, Student Loan Refinance, Credit Card offers, Certificate of Deposit, Bill Payments, etc.
- MoneyLion users have the following profile: underserved individuals, those in need of short-term funds, credit builders, digitally-oriented users, budget-conscious individuals, financially educated consumers, investment explorers, rewards seekers, those actively comparing financial products.

Instuctions:
- Read the following summary and find the most relevant video from the list of videos
- Evaluate summary based on the transcript of the video using the following criterias

Output criteria:
    - return final answer using 3 variables "result_rating", "result_relevance" and "rating_reason"
    - compute "result_rating" as: Rate relevance of summary on the scale of 0 to 10, with 0 being not relevant, and 10 being very relevant
    - compute "result_relevance" as: If summary is relevant, return 1. Else return 0
    - compute "rating_reason" as: Short summary of reason for "result_rating" provided in a few words
    - output final answer in the following format: result_rating=(input value of result_rating here); result_relevance=(input value of result_relevance here); rating_reason=(input value of rating_reason here)

Summary : 
{summary}
List of Videos and their Transcripts: 
{video_list}
let's think step by step
"""

In [221]:
def get_gpt_response(prompt, temperature=1.0, model = "gpt-3.5-turbo"):
    CHAT_COMPLETION_MODEL = model

    CHAT_COMPLETION_API_PARAMS = {
        # use temperature of 0.0 to give the most predictable, factual answer.
        # tweak temperature based on your use case
        "temperature": temperature,
        "model": CHAT_COMPLETION_MODEL,
    }

    messages = [
        {"role": "user", "content": prompt},
    ]

    response = openai.ChatCompletion.create(
        messages=messages, **CHAT_COMPLETION_API_PARAMS
    )

    response_content = response["choices"][0]["message"]["content"]

    return response, response_content

def extract_top_results (df_result_enriched, top_n=5):
    """
    Extract top n results from df_result_enriched
    """

def combine_elements(data):
    # if data is a float, return empty string
    if isinstance(data, float):
        return ''
    combined_list = []
    for sublist in data:
        combined = f"'{sublist[0]}' - '{sublist[1]}'"
        combined_list.append(combined)
    return '\n'.join(combined_list)
  
def get_gpt_summary(df_result_enriched):
    """
    Generate summary using GPT-3.5 turbo
    """
    df_result_enriched.columns = map(str.lower, df_result_enriched.columns)
    # extract for data = OFFERS
    df_offers = df_result_enriched[df_result_enriched['data'] == 'OFFERS']
    df_offers = df_offers[['query', 'data', 'position', 'displaytitle', 'description']]
    df_offers = df_offers.groupby('query').head(5)
    df_offers['offer_list'] = df_offers[['displaytitle', 'description']].values.tolist()
    df_offers.drop(['position', 'displaytitle', 'description'], axis=1, inplace=True)
    df_offers = df_offers.groupby('query')['offer_list'].apply(list).reset_index(name='offer_list')
    
    # extract for data = VIDEOS
    df_videos = df_result_enriched[df_result_enriched['data'] == 'VIDEOS']
    df_videos = df_videos[['query', 'data', 'position', 'displaytitle', 'transcript', 'unique_detected_texts']]
    df_videos = df_videos.groupby('query').head(5)
    df_videos['transcript'] = df_videos['transcript'].fillna(df_videos['unique_detected_texts'])
    df_videos['video_list'] = df_videos[['displaytitle', 'transcript']].values.tolist()
    df_videos.drop(['position', 'displaytitle', 'transcript', 'unique_detected_texts'], axis=1, inplace=True)
    df_videos = df_videos.groupby('query')['video_list'].apply(list).reset_index(name='video_list')

    df = pd.merge(df_offers, df_videos, on='query', how='outer')

    # combine into offer list and video list
    df['combined_offers'] = df['offer_list'].apply(combine_elements)
    df['combined_videos'] = df['video_list'].apply(combine_elements)
    df.drop(['offer_list', 'video_list'], axis=1, inplace=True)
    prompt = GENAI_SUMMARY_PROMPT_VIDEO.format(search_query = df['query'][0], offer_list=df['combined_offers'][0], video_list=df['combined_videos'][0])
    response, summary = get_gpt_response(prompt)
    df['summary'] = summary
    df['summary_length'] = df['summary'].str.len()

    return df['summary'][0], df['summary_length'][0], df['combined_offers'][0], df['combined_videos'][0]

def is_subset(word1, word_list):
    if isinstance(word1, str):
        for word in word_list:
            if set(word1.lower()).issubset(set(word.lower())):
                return True


In [None]:
#get list of query from csv
df_query = pd.read_csv('../../top_300_inputs.csv')
TOP_300_QUERY_LIST = df_query['INPUT'].tolist()
data = {'Query': [],'Offer List':[], 'Video List':[] , 'Summary': [], 'Summary_length': [], 'Rating': [], 'Relevance': [], 'Reason': []}
df = pd.DataFrame(data)

In [None]:
for query in TOP_300_QUERY_LIST:
    # if the query exists in the df, skip
    if query in df['Query'].tolist() or not isinstance(query, str):
        continue
    # get universal search result
    df_result = get_universal_search_result(query)
    if not df_result.empty:
        print("summarizing for " + query)
        # extract video metadata based on productId in videos
        df_result_enriched = extract_video_metadata(df_result)
        while True:
            summary, summary_length, offer_list, video_list = get_gpt_summary(df_result_enriched)
            if summary_length < 200:
                break
            else:
                print("retrying for " + query)
        #evaluation
        evaluation_prompt = EVALUATION_PROMPT.format(summary=summary, offer_list=offer_list, video_list=video_list)
        m1, m2, m3 = ("", "", "")
        while True:
            try:
                _response, evaluation_response_content = get_gpt_response(evaluation_prompt,temperature=0 , model = "gpt-4")

                m1 = re.search(r"result_rating=([0-9]+)", evaluation_response_content).groups()[0]
                m2 = re.search(r"result_relevance=([0-1])", evaluation_response_content).groups()[
                    0
                ]
                m3 = re.search(r"rating_reason=(.*)\b", evaluation_response_content).groups()[0]
                if m1 and m2 and m3:
                    print(m1, m2, m3)
                    break
            except:
                print(f"Retry evaluation for {query}")
                pass
        new_data = pd.DataFrame({'Query': [query], 'Offer List': [offer_list], 'Video List': [video_list], 'Summary': [summary], 'Summary_length': [summary_length], 'Rating': [m1], 'Relevance': [m2], 'Reason': [m3]})
        df = pd.concat([df, new_data], ignore_index=True)
        print("SUMMARY : \n"+ summary)

    else:
        print("No result found, fail silently")


In [None]:
df.to_csv('../../top_300_summary.csv', index=False)

In [204]:
SEARCH_SUGGESTIONS_LIST = [
    "Afford a Tesla",
    "Investing",
    "Tips for saving money on monthly bills",
    "How does Instacash work?",
    "Latest trends in the crypto market",
    "Maximizing the benefits of Rewards",
    "Tips for improving credit score",
    "How to earn rewards on purchases?",
    "Ways to save money and budget effectively",
    "Saving",
    "Maximize your savings with RoarMoney",
    "Investment opportunities",
    "Warren Buffett",
    "Ways to build credit with Credit Builder Plus",
    "Free Starbucks",
    "How to invest in cryptocurrency?",
    "Pay less taxes",
    "Improve credit score",
    "Understanding the benefits of life insurance",
    "Importance of credit monitoring",
    "Tips for managing credit card debt",
    "What are the benefits of rewards?",
    "How to monitor my credit score?",
    "Debt",
    "How to create a budget that fits my lifestyle?",
    "Inflation",
    "Sneaker heads"
]

ML_MAIN_PRODUCT = [
    "Instacash",
    "RoarMoney",
    "Credit Builder Loan",
    "Investment Account",
    "Reward",
    "Financial Content",
    "Crypto",
    "Credit Monitoring",
    "Peer to Peer Payments",
    "Round Ups",
    "AI Powered PFM Insights",
]

ML_3P_PRODUCT = [
    "Auto Insurance",
    "Life Insurance",
    "Home Insurance",
    "Travel Insurance",
    "High Yield Savings Account",
    "Student Loans",
    "Personal Loans",
    "Auto Loans",
    "Home Loan",
    "Student Loan Refinance",
    "Credit Card offers",
    "Certificate of Deposit",
    "Bill Payments",
]

RANDOM_SEARCH_TERM = [
    "Weather",
    "News",
    "Recipes",
    "Sports",
    "Travel",
    "Movies",
    "Music",
    "Dictionary",
    "Restaurants",
    "Stocks",
    "Health",
    "Fashion",
    "Technology",
    "History",
    "Fitness",
    "Games",
    "Maps",
    "Finance",
    "Art",
    "Education",
    "Jobs",
    "Shopping",
    "Events",
    "Books",
    "Cars",
    "Real Estate",
    "Science",
    "Politics",
    "Holidays",
    "Social Media",
    "Recipes",
    "DIY",
    "Reviews",
    "Home",
    "Beauty",
    "Pets",
    "Gardening",
    "Fitness",
    "Hobbies",
    "Travel",
    "Food",
    "Weather",
    "Movies",
    "Music",
    "Sports",
    "Technology",
    "Health",
    "News",
    "Science",
    "Business"
]

### Scope testing

Extract 30 random queries from different categories , which includes:
- First Party Products
- Third Party Products
- Suggestion List
- Random Search Terms

Run list of 10 random queries on the summarization model for the following iterations:
- Summarizing the overall context of the results ( title and description/transcript)
- Summarizing the videos, and instruct it to point out the key points of the video
- Provide benefits/perks related to the search query based on the context of the results


Note that execution time is only calculated on the summarization generation and not the evaluation, the execution time is ended when the loop breaks

In [213]:
#create a new list with random 10 search terms from SEARCH_SUGGESTIONS_LIST , ML_MAIN_PRODUCT, ML_3P_PRODUCT, RANDOM_SEARCH_TERM, randomize every time the code is run
import random
random.shuffle(SEARCH_SUGGESTIONS_LIST)
random.shuffle(ML_MAIN_PRODUCT)
random.shuffle(ML_3P_PRODUCT)
random.shuffle(RANDOM_SEARCH_TERM)
random_search_term_list = SEARCH_SUGGESTIONS_LIST[:4] + ML_MAIN_PRODUCT[:2] + ML_3P_PRODUCT[:2] + RANDOM_SEARCH_TERM[:2]
print(random_search_term_list)

['Saving', 'Tips for improving credit score', 'How to invest in cryptocurrency?', 'Maximize your savings with RoarMoney', 'AI Powered PFM Insights', 'Financial Content', 'Life Insurance', 'Travel Insurance', 'Holidays', 'Beauty']


In [214]:
# from the list , count how many search terms are from SEARCH_SUGGESTIONS_LIST , ML_MAIN_PRODUCT, ML_3P_PRODUCT, RANDOM_SEARCH_TERM
search_suggestion_count = 0
ml_main_product_count = 0
ml_3p_product_count = 0
random_search_term_count = 0

for search_term in random_search_term_list:
    if search_term in SEARCH_SUGGESTIONS_LIST:
        search_suggestion_count += 1
    elif search_term in ML_MAIN_PRODUCT:
        ml_main_product_count += 1
    elif search_term in ML_3P_PRODUCT:
        ml_3p_product_count += 1
    elif search_term in RANDOM_SEARCH_TERM:
        random_search_term_count += 1

print("search_suggestion_count: ", search_suggestion_count)
print("ml_main_product_count: ", ml_main_product_count)
print("ml_3p_product_count: ", ml_3p_product_count)
print("random_search_term_count: ", random_search_term_count)


search_suggestion_count:  4
ml_main_product_count:  2
ml_3p_product_count:  2
random_search_term_count:  2


In [247]:
# Create an empty DataFrame to store the data
data = {'Query': [], 'Top 5 Offers': [], 'Top 5 Videos': [] , 'Summary': [], 'Summary_Length': [], 'Rating': [], 'Relevance': [], 'Reason': [], 'Execution_Time': []}
df_scope = pd.DataFrame(data)

In [248]:
for query in random_search_term_list:
    # if the query exists in the df, skip
    if query in df['Query'].tolist() or not isinstance(query, str):
        continue
    # get universal search result
    df_result = get_universal_search_result(query)
    if not df_result.empty:
        print("summarizing for " + query)
        # extract video metadata based on productId in videos
        df_result_enriched = extract_video_metadata(df_result)
        start_time = time.time()
        while True:
            summary, summary_length, offer_list, video_list = get_gpt_summary(df_result_enriched)
            if len(summary.split()) <= 50:
                end_time = time.time()
                break
            else:
                print("retrying for " + query)
        execution_time = end_time - start_time
        #evaluation
        evaluation_prompt = EVALUATION_PROMPT.format(summary=summary, offer_list=offer_list, video_list=video_list)
        m1, m2, m3 = ("", "", "")
        while True:
            try:
                _response, evaluation_response_content = get_gpt_response(evaluation_prompt,temperature=0 , model = "gpt-4")

                m1 = re.search(r"result_rating=([0-9]+)", evaluation_response_content).groups()[0]
                m2 = re.search(r"result_relevance=([0-1])", evaluation_response_content).groups()[
                    0
                ]
                m3 = re.search(r"rating_reason=(.*)\b", evaluation_response_content).groups()[0]
                if m1 and m2 and m3:
                    print(m1, m2, m3)
                    break
            except:
                print(f"Retry evaluation for {query}")
                pass
        new_data = pd.DataFrame({'Query': [query], 'Top 5 Offers': [offer_list], 'Top 5 Videos': [video_list], 'Summary': [summary], 'Summary_length': [summary_length], 'Rating': [m1], 'Relevance': [m2], 'Reason': [m3], 'Execution_Time': [execution_time]})
        df_scope = pd.concat([df_scope, new_data], ignore_index=True)
        print("SUMMARY : \n"+ summary)

    else:
        print("No result found, fail silently")


summarizing for Saving


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  videos_df['productId'] = videos_df['productId'].str.split(' ').str[0]
  video_metadata_df_1 = pd.read_sql_query(video_query, connection_sf)


9 1 The summary is highly relevant as it discusses the benefits of a high yield savings account, which aligns with the offers and videos provided by MoneyLion. The summary could have included more specific details about MoneyLion's services to achieve a perfect score
SUMMARY : 
Stop using your regular savings account and switch to a high yield savings account. You'll have easy access to your money, but it won't be too tempting to spend. Plus, you'll actually earn some decent interest. Cha-ching! 💰
Source video: High Yield Savings: Unlocking Rewards
summarizing for Tips for improving credit score


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  videos_df['productId'] = videos_df['productId'].str.split(' ').str[0]
  video_metadata_df_1 = pd.read_sql_query(video_query, connection_sf)


8 1 Summary aligns with the theme of credit building and management, which is a key aspect of MoneyLion's offerings and content. However, it does not mention MoneyLion or its specific products directly
SUMMARY : 
Don't toss your friend's credit card! Become an authorized user to instantly boost your credit score, just make sure the card meets the requirements. Say goodbye to that tiny inquiry and focus on paying your bills on time and keeping your debt under 30%.
summarizing for How to invest in cryptocurrency?


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  videos_df['productId'] = videos_df['productId'].str.split(' ').str[0]
  video_metadata_df_1 = pd.read_sql_query(video_query, connection_sf)


7 1 'Summary and source video are relevant to the context of MoneyLion's financial services, specifically the crypto service. However, the summary does not cover the full range of services and offers provided by MoneyLion
SUMMARY : 
Investing in cryptocurrency? Just use the three-step system: read the white paper, check the supply and market cap, and gauge social media buzz. If people are talking about it, chances are it's worth buying - cha-ching! 
Source video: 'Investing in Crypto: A 3-Step System'
summarizing for Maximize your savings with RoarMoney


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  videos_df['productId'] = videos_df['productId'].str.split(' ').str[0]
  video_metadata_df_1 = pd.read_sql_query(video_query, connection_sf)


8 1 Summary is relevant to the source video and the 'Savings' offer. However, it does not touch upon other offers and videos
SUMMARY : 
Maximize your savings in your 20s, because you don't want to be stuck eating ramen noodles at 40. Start with your emergency fund, then tackle your short-term goals, and finally save for those big dreams. Adulting has never tasted so good!
Source video: Maximize Savings in Your 20s
summarizing for AI Powered PFM Insights


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  videos_df['productId'] = videos_df['productId'].str.split(' ').str[0]
  video_metadata_df_1 = pd.read_sql_query(video_query, connection_sf)


1 0 The summary is about AI taking over jobs and does not mention or relate to MoneyLion, its services, or its target users
SUMMARY : 
AI might take over your job, but don't worry, graphic designers have a low risk of being replaced. So if you're not a graphic designer, sorry, you might be in trouble!
Source video: 80% of Jobs Changed by AI?
summarizing for Financial Content


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  videos_df['productId'] = videos_df['productId'].str.split(' ').str[0]
  video_metadata_df_1 = pd.read_sql_query(video_query, connection_sf)


8 1 Summary is relevant to the source video and offers, but does not mention all the services provided by MoneyLion
SUMMARY : 
Ladies, don't wait for a man to be your financial plan! Take control of your money, budget wisely, and invest in yourself. You are a financially wise woman!
Source video: Become a Financially Wise Woman
summarizing for Life Insurance


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  videos_df['productId'] = videos_df['productId'].str.split(' ').str[0]
  video_metadata_df_1 = pd.read_sql_query(video_query, connection_sf)


8 1 Summary is relevant as it discusses life insurance, which is one of the 3rd party financial products offered by MoneyLion. However, it does not mention other key products and services of MoneyLion
SUMMARY : 
Life insurance isn't just for the rich! It's a way to create generational wealth and provide tax-free income to your loved ones. So don't brush off life insurance, find the right coverage and secure your family's financial future today!
Source video: Life Insurance: Don't Disregard
summarizing for Travel Insurance


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  videos_df['productId'] = videos_df['productId'].str.split(' ').str[0]
  video_metadata_df_1 = pd.read_sql_query(video_query, connection_sf)


8 1 Summary is relevant to the source video and the topic of life insurance, but does not cover all aspects of MoneyLion's services and offers
SUMMARY : 
Life insurance is actually important, not a scam! Don't disregard it, find the right coverage and provide tax-free income to your loved ones. Trust me, you don't want to leave them empty-handed. 
Source video: Life Insurance: Don't Disregard
summarizing for Holidays


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  videos_df['productId'] = videos_df['productId'].str.split(' ').str[0]
  video_metadata_df_1 = pd.read_sql_query(video_query, connection_sf)


7 1 Summary focuses on budgeting and avoiding debt, which aligns with MoneyLion's target audience and mission. However, it does not directly mention or relate to the specific products or services offered by MoneyLion
SUMMARY : 
Don't let the holiday spending frenzy put you in debt! Make a budget and stick to it, because spending time with loved ones is more important than material gifts. Start the new year without the headache of credit card debt.
summarizing for Beauty


  video_metadata_df_1 = pd.read_sql_query(video_query, connection_sf)


8 1 'Summary is relevant to the source video "Retiring at 30?" and touches on the theme of financial planning and budgeting, which aligns with MoneyLion's services and target audience
SUMMARY : 
"Retire early and still have a social life? Just create a strict budget and include your investments as an expense. Who said you can't have it all? #EarlyRetirementGoals"
Source video: Retiring at 30?


In [260]:
# categorize 'Query' back to its source list
df_scope['Query_Category'] = df_scope['Query'].apply(lambda x: 'SEARCH_SUGGESTIONS_LIST' if x in SEARCH_SUGGESTIONS_LIST else ('ML_MAIN_PRODUCT' if x in ML_MAIN_PRODUCT else ('ML_3P_PRODUCT' if x in ML_3P_PRODUCT else 'RANDOM_SEARCH_TERM')))

In [261]:
df_scope.to_csv('../../scope_summary_GENAI_SUMMARY_PROMPT_VIDEO.csv', index=False)

### Debounce input testing
Objective : To show that the model is capable of intepreting the full context from a non-complete search query
Example : If user searches 'ro' , it should generate a summary on a video based on the intended context 'roarmoney'

Generally, the debounce would not impact the summarization since it only reads the video metadata, but its for a checking point

In [250]:
# extract random queries from top_300_inputs.csv
df = pd.read_csv('../../top_300_inputs.csv')
df = df.sample(n=20)
# convert INPUT column to list
random_debounce_terms_list = df['INPUT'].tolist()
random_debounce_terms_list

['membership',
 'car',
 'ca',
 'money',
 'ch',
 'password',
 'peer boost',
 'instacash',
 'cr',
 'ze',
 'mobil',
 'apple',
 'investing',
 'deb',
 'ro',
 'change p',
 'che',
 'se',
 'credit b',
 'roar mon']

In [253]:
# Create an empty DataFrame to store the data
data = {'Query': [], 'Top 5 Offers': [], 'Top 5 Videos': [] , 'Summary': [], 'Summary_Length': [], 'Rating': [], 'Relevance': [], 'Reason': [], 'Execution_Time': []}
df_debounce = pd.DataFrame(data)

In [254]:
for query in random_debounce_terms_list:
    # if the query exists in the df, skip
    if query in df_debounce['Query'].tolist() or not isinstance(query, str):
        continue
    # get universal search result
    df_result = get_universal_search_result(query)
    if not df_result.empty:
        print("summarizing for " + query)
        # extract video metadata based on productId in videos
        df_result_enriched = extract_video_metadata(df_result)
        start_time = time.time()
        while True:
            summary, summary_length, offer_list, video_list = get_gpt_summary(df_result_enriched)
            if len(summary.split()) <= 50:
                end_time = time.time()
                break
            else:
                print("retrying for " + query)
        execution_time = end_time - start_time
        #evaluation
        evaluation_prompt = EVALUATION_PROMPT.format(summary=summary, offer_list=offer_list, video_list=video_list)
        m1, m2, m3 = ("", "", "")
        while True:
            try:
                _response, evaluation_response_content = get_gpt_response(evaluation_prompt,temperature=0 , model = "gpt-4")

                m1 = re.search(r"result_rating=([0-9]+)", evaluation_response_content).groups()[0]
                m2 = re.search(r"result_relevance=([0-1])", evaluation_response_content).groups()[
                    0
                ]
                m3 = re.search(r"rating_reason=(.*)\b", evaluation_response_content).groups()[0]
                if m1 and m2 and m3:
                    print(m1, m2, m3)
                    break
            except:
                print(f"Retry evaluation for {query}")
                pass
        new_data = pd.DataFrame({'Query': [query], 'Top 5 Offers': [offer_list], 'Top 5 Videos': [video_list], 'Summary': [summary], 'Summary_length': [summary_length], 'Rating': [m1], 'Relevance': [m2], 'Reason': [m3], 'Execution_Time': [execution_time]})
        df_debounce = pd.concat([df_debounce, new_data], ignore_index=True)
        print("SUMMARY : \n"+ summary)

    else:
        print("No result found, fail silently")


summarizing for membership


  video_metadata_df_1 = pd.read_sql_query(video_query, connection_sf)


8 1 Summary is relevant to the source video and offers, but does not cover all aspects of MoneyLion's services
SUMMARY : 
Don't want to pay for a Costco membership? Just get a Costco gift card for as little as $25 and shop to your heart's content without any membership fees! It's like having your cake and eating it too, all while saving some bucks!
Source video: Shop Costco for Free
summarizing for car


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  videos_df['productId'] = videos_df['productId'].str.split(' ').str[0]
  video_metadata_df_1 = pd.read_sql_query(video_query, connection_sf)


6 1 'Summary is relevant to some offers and videos, but not all
SUMMARY : 
Don't fall for those expensive car upgrades like nitrogen-filled tires and paddle shifters! Save yourself from headaches and wasted money by sticking to the essentials. Say no to useless upgrades and keep your wallet happy!
Source video: Unnecessary Car Upgrades
summarizing for ca


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  videos_df['productId'] = videos_df['productId'].str.split(' ').str[0]
  video_metadata_df_1 = pd.read_sql_query(video_query, connection_sf)


2 0 Summary does not match the context of MoneyLion's financial services and products
SUMMARY : 
Are you wondering if you can afford a fancy Tesla? Well, you better be making six figures, my friend! The key takeaway? Don't drool over that electric dream car unless you're rolling in the dough. 
Source video: Can You Afford a Tesla?
summarizing for money


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  videos_df['productId'] = videos_df['productId'].str.split(' ').str[0]
  video_metadata_df_1 = pd.read_sql_query(video_query, connection_sf)


7 1 Summary provides financial advice that aligns with MoneyLion's mission and services, but does not directly mention or promote specific products or offers
SUMMARY : 
Wanna retire early? Save and invest 20% of your income, minimize your housing and transportation expenses, and treat yourself with 15% of your paycheck. Because who says you can't enjoy life while building wealth?
Source video: Money Mastery: How to Do It
summarizing for ch


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  videos_df['productId'] = videos_df['productId'].str.split(' ').str[0]
  video_metadata_df_1 = pd.read_sql_query(video_query, connection_sf)


0 0 The summary is about a recipe for chili cheese fries, which is not relevant to the financial services and products offered by MoneyLion
SUMMARY : 
Get ready to satisfy your cheesy cravings with this delicious and wallet-friendly recipe for chili cheese fries! Say goodbye to expensive takeout and hello to a cheap and easy homemade meal that will leave you wanting more.
Empty result - password
No result found, fail silently
summarizing for peer boost


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  videos_df['productId'] = videos_df['productId'].str.split(' ').str[0]
  video_metadata_df_1 = pd.read_sql_query(video_query, connection_sf)


7 1 Summary is relevant to the source video and the theme of savings and wealth growth, but does not directly relate to the specific offers or other videos
SUMMARY : 
Looking to boost your savings? Learn how to save 50% or more of your income and watch your wealth grow. Trust me, you'll be living in a mansion in Detroit while your friends are stuck in tiny studio apartments in Boston. #SavingsGoals
Source video: "Boost Your Wealth: Save 50%+"
summarizing for instacash


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  videos_df['productId'] = videos_df['productId'].str.split(' ').str[0]
  video_metadata_df_1 = pd.read_sql_query(video_query, connection_sf)


retrying for instacash
9 1 Accurate and engaging description of Instacash product and its uses
SUMMARY : 
Whenever you're in a financial jam, MoneyLion's got your back with Instacash! Get up to $1,000 in cash advances to cover those unexpected expenses. No more stressing about school costs or waiting for your next payday. MoneyLion has your Instacash needs covered! #InstantCashFix
summarizing for cr


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  videos_df['productId'] = videos_df['productId'].str.split(' ').str[0]
  video_metadata_df_1 = pd.read_sql_query(video_query, connection_sf)


8 1 Summary is relevant as it discusses credit cards, which aligns with MoneyLion's Credit Builder product and the content of the source video and other videos. However, it does not mention other key products or services offered by MoneyLion
SUMMARY : 
So, credit cards are like borrowing money from a friend who charges you a fortune if you don't pay them back. Get one, use it wisely, and increase your chances of buying a car, getting student loans, and even buying a house!
Source video: Credit Card Basics
summarizing for ze


  video_metadata_df_1 = pd.read_sql_query(video_query, connection_sf)


8 1 Summary is relevant as it discusses budgeting, a key aspect of financial management which is a focus of MoneyLion. However, it does not directly mention or relate to any specific MoneyLion product or service
SUMMARY : 
Budgeting your finances can be fun and mindful with the zero-based budgeting method- every dollar you earn has a purpose, just like that jar of cookies you buy to satisfy your cravings! 🍪💰
Source video: Zero-Based Budgeting: A Mindful Way
summarizing for mobil


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  videos_df['productId'] = videos_df['productId'].str.split(' ').str[0]
  video_metadata_df_1 = pd.read_sql_query(video_query, connection_sf)


retrying for mobil
2 0 'Summary does not match the context of MoneyLion's financial services and products
SUMMARY : 
Did you know the cloud and cyber security are like peanut butter and jelly? They're both important, but watch out for those hackers! Stay protected and keep your data secure, because even governments can't escape the cyber threat.
Source video: What could the future of mobility look like?
summarizing for apple


  video_metadata_df_1 = pd.read_sql_query(video_query, connection_sf)


0 0 'Summary not related to MoneyLion's financial services
SUMMARY : 
Bill Gates must be kicking himself for selling his shares in Apple back in the day. If he held onto them, he could've been $75 billion richer. Talk about a missed opportunity! 
Source video: Apple: Bill Gates' $75 Billion Mistake
summarizing for investing


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  videos_df['productId'] = videos_df['productId'].str.split(' ').str[0]
  video_metadata_df_1 = pd.read_sql_query(video_query, connection_sf)


7 1 Summary is relevant to the source video and offer, but does not cover all aspects of the context provided
SUMMARY : 
Don't beat yourself up over not knowing about investing. It's okay to start now, even if you didn't know about the Roth IRA when you were a little fetus. It's all good, so just jump in and get started! 
Source video: Investing 101: It's Okay
summarizing for deb


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  videos_df['productId'] = videos_df['productId'].str.split(' ').str[0]
  video_metadata_df_1 = pd.read_sql_query(video_query, connection_sf)


retrying for deb
8 1 Summary discusses the concept of debt, which is relevant to the offers and videos about managing debt, loans, and credit building. However, it does not directly mention any of MoneyLion's specific products or services
SUMMARY : 
Stick it to the man and borrow all the money you can! Debt isn't always a bad thing if you use it wisely. Learn how the rich get rich and how the poor stay poor by understanding good debt versus bad debt.
Source video: Debt Secrets: Rich vs Poor Explained
summarizing for ro


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  videos_df['productId'] = videos_df['productId'].str.split(' ').str[0]
  video_metadata_df_1 = pd.read_sql_query(video_query, connection_sf)


5 1 The summary is somewhat relevant as it discusses financial decision-making and investment strategies, which aligns with MoneyLion's mission to improve financial inclusivity and its product offerings such as investment accounts. However, it does not directly mention or relate to any specific MoneyLion products or services, hence the average rating
SUMMARY : 
Make money moves like Roger Federer - he turned a $10 million sponsorship into $600 million by asking for equity in a big company instead! Time to negotiate like a tennis champ!
Source video: Roger's $600M Secret
summarizing for change p


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  videos_df['productId'] = videos_df['productId'].str.split(' ').str[0]
  video_metadata_df_1 = pd.read_sql_query(video_query, connection_sf)


3 0 'Summary does not directly relate to the specific offers and videos provided by MoneyLion
SUMMARY : 
Wanna beat inflation? Stop wishing things cost less and start wishing you made more money! Find out how to increase your income and kick inflation's butt in this hilarious video.
Source video: 'Beat Inflation: Make More'
summarizing for che


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  videos_df['productId'] = videos_df['productId'].str.split(' ').str[0]
  video_metadata_df_1 = pd.read_sql_query(video_query, connection_sf)


7 1 The summary is relevant as it provides a tip on saving money, which aligns with MoneyLion's mission of helping individuals manage their personal finances more effectively. However, it does not directly mention or relate to any of the specific MoneyLion products or services
SUMMARY : 
Don't break the bank at the grocery store! Use the self checkout lane to avoid impulse buying and save yourself $314 a month on all those tempting goodies.
Source video: Check yourself out when shopping
summarizing for se


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  videos_df['productId'] = videos_df['productId'].str.split(' ').str[0]
  video_metadata_df_1 = pd.read_sql_query(video_query, connection_sf)


6 1 'Summary is somewhat relevant as it discusses financial advice related to securing an apartment, which aligns with MoneyLion's focus on financial education and services. However, it does not directly mention or relate to the specific products or services offered by MoneyLion
SUMMARY : 
Securing the dream apartment? Don't forget to take photos before signing the lease, or you'll risk losing your security deposit. Plus, negotiate prorating move-in specials to save some serious cash! Cha-ching!
Source video: 'Securing the Dream Apartment'
summarizing for credit b


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  videos_df['productId'] = videos_df['productId'].str.split(' ').str[0]
  video_metadata_df_1 = pd.read_sql_query(video_query, connection_sf)


8 1 Summary is relevant as it discusses building credit, which aligns with MoneyLion's Credit Builder Loans product and the content of the source video and some offers. However, it does not mention other key aspects of MoneyLion's services
SUMMARY : 
Start building good credit early by piggybacking off someone else's credit card, or wait until you're 18 and start from scratch. Either way, you'll be on your way to a solid credit profile and getting that dream car! 
Source video: Boost Your Credit Before You're 18
summarizing for roar mon


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  videos_df['productId'] = videos_df['productId'].str.split(' ').str[0]
  video_metadata_df_1 = pd.read_sql_query(video_query, connection_sf)


retrying for roar mon
8 1 'Summary is relevant as it pertains to one of the financial products (Life Insurance) that MoneyLion offers. However, it does not cover the full range of services and products offered by MoneyLion
SUMMARY : 
"Single and ready to mingle? Don't let that stop you from securing your future! Life insurance is cheaper when you're young and healthy, so get covered now and be prepared for whatever life throws your way!"
Source video: Single? Secure Your Future Now


In [257]:
# categorize 'Query' back to its source list
df_debounce['Query_Category'] = df_debounce['Query'].apply(lambda x: 'SEARCH_SUGGESTIONS_LIST' if x in SEARCH_SUGGESTIONS_LIST else ('ML_MAIN_PRODUCT' if x in ML_MAIN_PRODUCT else ('ML_3P_PRODUCT' if x in ML_3P_PRODUCT else 'RANDOM_SEARCH_TERM')))

In [258]:
df_debounce.to_csv('../../debounce_summary_GENAI_SUMMARY_PROMPT_VIDEO.csv', index=False)

### Single input testing
Test out self inputs for summamrization module

In [242]:
query = input()
# get universal search result
df_result = get_universal_search_result(query)
if not df_result.empty:
    # extract video metadata based on productId in videos
    df_result_enriched = extract_video_metadata(df_result)
    summary, summary_length, offer_list, video_list = get_gpt_summary(df_result_enriched)
    # check if summary length is > 200 characters or 50 words
    print("FINAL SUMMARY : \n"+ summary)
    print("TOTAL CHARACTERS : " + str(summary_length))

else:
    print("No result found, fail silently")


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  videos_df['productId'] = videos_df['productId'].str.split(' ').str[0]
  video_metadata_df_1 = pd.read_sql_query(video_query, connection_sf)


FINAL SUMMARY : 
Boost your money life with RoarMoney! Transfer cash instantly, pay friends for Taco Time, and add funds on a custom schedule. It's like having a powerful account that roars!
Source video: 3 NEW WAYS TO BOOST YOUR MONEY LIFE
TOTAL CHARACTERS : 223
