# Adidas Product Search

In [71]:
!pip install pandas --quiet

## Importing Required Libraries

In [96]:
import pandas as pd
import os

from IPython.display import Markdown

from langchain.vectorstores import FAISS
from langchain.document_loaders import CSVLoader
from langchain.embeddings import OpenAIEmbeddings
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI

In [97]:
from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv())

## Loading Data

We use the `CSVLoader` module to load data from a CSV file

In [98]:
file = "../data/adidas_usa.csv"
df = pd.read_csv(file)
df.head()

Unnamed: 0,index,url,name,sku,selling_price,original_price,currency,availability,color,category,...,source_website,breadcrumbs,description,brand,images,country,language,average_rating,reviews_count,crawled_at
0,0,https://www.adidas.com/us/beach-shorts/FJ5089....,Beach Shorts,FJ5089,40,,USD,InStock,Black,Clothing,...,https://www.adidas.com,Women/Clothing,Splashing in the surf. Making memories with yo...,adidas,"https://assets.adidas.com/images/w_600,f_auto,...",USA,en,4.5,35,2021-10-23 17:50:17.331255
1,1,https://www.adidas.com/us/five-ten-kestrel-lac...,Five Ten Kestrel Lace Mountain Bike Shoes,BC0770,150,,USD,InStock,Grey,Shoes,...,https://www.adidas.com,Women/Shoes,Lace up and get after it. The Five Ten Kestrel...,adidas,"https://assets.adidas.com/images/w_600,f_auto,...",USA,en,4.8,4,2021-10-23 17:50:17.423830
2,2,https://www.adidas.com/us/mexico-away-jersey/G...,Mexico Away Jersey,GC7946,70,,USD,InStock,White,Clothing,...,https://www.adidas.com,Kids/Clothing,"Clean and crisp, this adidas Mexico Away Jerse...",adidas,"https://assets.adidas.com/images/w_600,f_auto,...",USA,en,4.9,42,2021-10-23 17:50:17.530834
3,3,https://www.adidas.com/us/five-ten-hiangle-pro...,Five Ten Hiangle Pro Competition Climbing Shoes,FV4744,160,,USD,InStock,Black,Shoes,...,https://www.adidas.com,Five Ten/Shoes,The Hiangle Pro takes on the classic shape of ...,adidas,"https://assets.adidas.com/images/w_600,f_auto,...",USA,en,3.7,7,2021-10-23 17:50:17.615054
4,4,https://www.adidas.com/us/mesh-broken-stripe-p...,Mesh Broken-Stripe Polo Shirt,GM0239,65,,USD,InStock,Blue,Clothing,...,https://www.adidas.com,Men/Clothing,Step up to the tee relaxed. This adidas golf p...,adidas,"https://assets.adidas.com/images/w_600,f_auto,...",USA,en,4.7,11,2021-10-23 17:50:17.702680


In [99]:
df.columns

Index(['index', 'url', 'name', 'sku', 'selling_price', 'original_price',
       'currency', 'availability', 'color', 'category', 'source',
       'source_website', 'breadcrumbs', 'description', 'brand', 'images',
       'country', 'language', 'average_rating', 'reviews_count', 'crawled_at'],
      dtype='object')

In [100]:
description_file = "../data/adidas_usa_descriptions.csv"

loader = CSVLoader(file_path = description_file)
docs = loader.load()

## *Creating Product Info Dictionaries

This code is not relevant for this notebook, however it will be used to display the information of the product on the Streamlit application

In [101]:
default_url = "https://st4.depositphotos.com/14953852/22772/v/450/depositphotos_227725020-stock-illustration-image-available-icon-flat-vector.jpg"

image_urls = df["images"].str.split("~").apply(lambda x: [img for img in x if img.endswith("laydown.jpg")])
image_urls = image_urls.apply(lambda x: x[0] if x else default_url)

image_dict = {df['name'][i]: image_urls[i] for i in range(len(df))}


description_dict = {df['name'][i]: df["description"][i] for i in range(len(df))}
price_dict = {df['name'][i]: df["selling_price"][i] for i in range(len(df))}
url_dict = {df['name'][i]: df["url"][i] for i in range(len(df))}


## Creating Embeddings

Create word embeddings using `OpenAIEmbeddings`. These embeddings will represent textual data in a numerical format.

In [102]:
embeddings = OpenAIEmbeddings()

## Setting Up FAISS Vector Store

Initialize a vector store using `FAISS`. This vector store will allow us to perform similarity searches over our data.

In [103]:
db = FAISS.from_documents(docs, embeddings)

## Build LangCahin for Question Answering

Configuring a LangChain for retrieval-based question answering (QA)

In [104]:
llm = ChatOpenAI(temperature=0.0)

qa_stuff = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=db.as_retriever(),
    verbose=True,
)


## Generating Responses

Provide a query and display the results generated.

In [107]:
query = "lightweight shorts"
response = qa_stuff.run(query)



[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


In [110]:
display(Markdown(response))

The AEROREADY 3-Stripes 8-Inch Shorts and the Marathon 20 Shorts are both lightweight shorts.

## *Get Product Info 

Not relevant for notebook. Used for Streamlit application

In [111]:
product_info = []

product_names = []
for key in image_dict:
    if key in response:
        product_names.append(key)


for name in product_names:
    product_info.append({
        "image_url": image_dict[name],
        "product": name,
        "price": price_dict[name],
        "description": description_dict[name],
        "url": url_dict[name]
    })

In [112]:
product_info

[{'image_url': 'https://assets.adidas.com/images/w_600,f_auto,q_auto/fa21ec8826254716b2d9ad1a00a9a7c8_9366/Marathon_20_Shorts_Red_H31065_01_laydown.jpg',
  'product': 'Marathon 20 Shorts',
  'price': 27,
  'description': "Meet the workhorse of workout wear. Soft and breathable, these adidas running shorts are sporty and super comfortable. AEROREADY helps keep moisture off your skin and your mind on your run. Now you're ready for anything.",
  'url': 'https://www.adidas.com/us/marathon-20-shorts/H31065.html'},
 {'image_url': 'https://assets.adidas.com/images/w_600,f_auto,q_auto/16e9b06ae43947fab0c7ad02003a9aa7_9366/AEROREADY_3-Stripes_8-Inch_Shorts_White_H20107_01_laydown.jpg',
  'product': 'AEROREADY 3-Stripes 8-Inch Shorts',
  'price': 32,
  'description': "Don't be fooled by their minimalist looks. Although these training shorts have streetwear appeal, they can handle HIIT and boot camp workouts. The woven fabric is light and airy, so you feel free to move. The secure waistband sits 