In [71]:
!pip install pandas --quiet
!pip install langchain --quiet
!pip install docarray --quiet
!pip install python-dotenv==1.0.0 --quiet
!pip install openai --quiet
!pip install tiktoken --quiet
!pip install beautifulsoup4 --quiet
!pip install faiss-cpu --quiet


In [4]:
import pandas as pd
import os

In [14]:
from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv())

In [5]:
file = "../data/adidas_usa.csv"
df = pd.read_csv(file)
df.head()

Unnamed: 0,index,url,name,sku,selling_price,original_price,currency,availability,color,category,...,source_website,breadcrumbs,description,brand,images,country,language,average_rating,reviews_count,crawled_at
0,0,https://www.adidas.com/us/beach-shorts/FJ5089....,Beach Shorts,FJ5089,40,,USD,InStock,Black,Clothing,...,https://www.adidas.com,Women/Clothing,Splashing in the surf. Making memories with yo...,adidas,"https://assets.adidas.com/images/w_600,f_auto,...",USA,en,4.5,35,2021-10-23 17:50:17.331255
1,1,https://www.adidas.com/us/five-ten-kestrel-lac...,Five Ten Kestrel Lace Mountain Bike Shoes,BC0770,150,,USD,InStock,Grey,Shoes,...,https://www.adidas.com,Women/Shoes,Lace up and get after it. The Five Ten Kestrel...,adidas,"https://assets.adidas.com/images/w_600,f_auto,...",USA,en,4.8,4,2021-10-23 17:50:17.423830
2,2,https://www.adidas.com/us/mexico-away-jersey/G...,Mexico Away Jersey,GC7946,70,,USD,InStock,White,Clothing,...,https://www.adidas.com,Kids/Clothing,"Clean and crisp, this adidas Mexico Away Jerse...",adidas,"https://assets.adidas.com/images/w_600,f_auto,...",USA,en,4.9,42,2021-10-23 17:50:17.530834
3,3,https://www.adidas.com/us/five-ten-hiangle-pro...,Five Ten Hiangle Pro Competition Climbing Shoes,FV4744,160,,USD,InStock,Black,Shoes,...,https://www.adidas.com,Five Ten/Shoes,The Hiangle Pro takes on the classic shape of ...,adidas,"https://assets.adidas.com/images/w_600,f_auto,...",USA,en,3.7,7,2021-10-23 17:50:17.615054
4,4,https://www.adidas.com/us/mesh-broken-stripe-p...,Mesh Broken-Stripe Polo Shirt,GM0239,65,,USD,InStock,Blue,Clothing,...,https://www.adidas.com,Men/Clothing,Step up to the tee relaxed. This adidas golf p...,adidas,"https://assets.adidas.com/images/w_600,f_auto,...",USA,en,4.7,11,2021-10-23 17:50:17.702680


In [6]:
df.columns

Index(['index', 'url', 'name', 'sku', 'selling_price', 'original_price',
       'currency', 'availability', 'color', 'category', 'source',
       'source_website', 'breadcrumbs', 'description', 'brand', 'images',
       'country', 'language', 'average_rating', 'reviews_count', 'crawled_at'],
      dtype='object')

In [35]:
file = "../data/adidas_usa.csv"
df = pd.read_csv(file)

In [64]:
image_urls = df["images"].str.split("~").apply(lambda x: [img for img in x if img.endswith("laydown.jpg")]).to_list()
image_dict = {df['name'][i]: image_urls[i] for i in range(len(df))}


In [75]:
from langchain.vectorstores import FAISS
from langchain.document_loaders import CSVLoader
from langchain.indexes import VectorstoreIndexCreator
from langchain.vectorstores import DocArrayInMemorySearch
from langchain.embeddings import OpenAIEmbeddings
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI

loader = CSVLoader(file_path = file)
docs = loader.load()

embeddings = OpenAIEmbeddings()

db = FAISS.from_documents(docs, embeddings)

llm = ChatOpenAI(temperature=0.0)

qa_stuff = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=db.as_retriever(),
    verbose=True,
)

query = "I am looking for a White T Shirt"
response = qa_stuff.run(query)



[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


In [78]:
db.save_local("db")

In [69]:
vector_db = DocArrayInMemorySearch.from_documents(docs, embeddings)
#vector_db.dump("db")

AttributeError: 'DocArrayInMemorySearch' object has no attribute 'dump'

In [76]:
response

'Sure! I can help you with that. Here are some white t-shirts available:\n\n1. Short Sleeve Graphic Tee\n   - Brand: adidas\n   - Price: $24\n   - Description: Honor your hard work. Reward your efforts. OR just start with the reward and let it fuel everything else. When you slip on this t-shirt in the morning, that\'s exactly what you\'re doing. Cotton jersey feels soft against the skin. The eye-catching graphic on the front leaves no doubt that you\'re part of the adidas team.\n   - [View Product](https://www.adidas.com/us/short-sleeve-graphic-tee/GV5159.html)\n\n2. Graphic Tee\n   - Brand: adidas\n   - Price: $24\n   - Description: You have enough to think about. Cross "decide what to wear" off the list. Grab this adidas t-shirt, and you\'re good to go. Level up your game when you\'re off the clock with a fresh take on a classic logo. Score bonus points with a silky soft feel. End of story.\n   - [View Product](https://www.adidas.com/us/graphic-tee/GU3637.html)\n\n3. Long Sleeve Grap

In [77]:
display(Markdown(response))

Sure! I can help you with that. Here are some white t-shirts available:

1. Short Sleeve Graphic Tee
   - Brand: adidas
   - Price: $24
   - Description: Honor your hard work. Reward your efforts. OR just start with the reward and let it fuel everything else. When you slip on this t-shirt in the morning, that's exactly what you're doing. Cotton jersey feels soft against the skin. The eye-catching graphic on the front leaves no doubt that you're part of the adidas team.
   - [View Product](https://www.adidas.com/us/short-sleeve-graphic-tee/GV5159.html)

2. Graphic Tee
   - Brand: adidas
   - Price: $24
   - Description: You have enough to think about. Cross "decide what to wear" off the list. Grab this adidas t-shirt, and you're good to go. Level up your game when you're off the clock with a fresh take on a classic logo. Score bonus points with a silky soft feel. End of story.
   - [View Product](https://www.adidas.com/us/graphic-tee/GU3637.html)

3. Long Sleeve Graphic Tee
   - Brand: adidas
   - Price: $28
   - Description: Need another reason to slip into the soft fabric, casual vibes and easy-to-move-in comfort of this adidas t-shirt? How about knowing that every time you do, you're part of a movement towards helping end plastic waste? Head out and show gratitude to the oceans with every step you take. Making a statement has never felt so good.
   - [View Product](https://www.adidas.com/us/long-sleeve-graphic-tee/GU1468.html)

4. Camo Print Graphic Tee (Kids)
   - Brand: adidas
   - Price: $19
   - Description: They've never been one to blend in. Your little one is all energy, all the time. This kids' adidas tee pairs camo print with pops of bright color to keep their spirits high. Soft cotton fabric means they stay comfy no matter where their adventures take them.
   - [View Product](https://www.adidas.com/us/camo-print-graphic-tee/H20304.html)

Please let me know if you need more information or if there's anything else I can assist you with!

In [54]:
import re

product_names = re.findall(r'\d+\. ([^\n]+)', response)
product_prices = re.findall(r'Price: \$([\d.]+)', response)
product_descriptions = re.findall(r'Description: ([^\n]+)', response)
product_urls = re.findall(r'\[View Product\]\(([^)]+)\)', response)

In [55]:
product_urls

['https://www.adidas.com/us/short-sleeve-graphic-tee/GV5159.html',
 'https://www.adidas.com/us/graphic-tee/GU3637.html',
 'https://www.adidas.com/us/long-sleeve-graphic-tee/GU1468.html',
 'https://www.adidas.com/us/camo-print-graphic-tee/H20304.html']

In [65]:
product_images = []

for product_name in product_names:
    product_images.append(image_dict.get(product_name))

product_images

[['https://assets.adidas.com/images/w_600,f_auto,q_auto/435ebc4133ec458e85b6ad3500e3a5ec_9366/Short_Sleeve_Graphic_Tee_White_GV5159_01_laydown.jpg'],
 ['https://assets.adidas.com/images/w_600,f_auto,q_auto/986bd0fce531401c9c9cad12010a8df6_9366/Graphic_Tee_White_GU3637_01_laydown.jpg'],
 ['https://assets.adidas.com/images/w_600,f_auto,q_auto/b3abdc62d2594c2b97a5ad1200d1b8a4_9366/Long_Sleeve_Graphic_Tee_White_GU1468_01_laydown.jpg'],
 None]