In [27]:
import numpy as np
import pandas as pd
from scipy import spatial
import os
import re
import json
from pathlib import Path
from dotenv import load_dotenv
import openai
import tiktoken

# Custom Functions
from fncs.utilities import (
    create_openai_client,
    response_generator,
    prompt_builder,
    calculate_total_cost
    )
from fncs.retrieval import (
    get_embedding,
    search_text,
    control_chunk_context
    )

# Load environment vars:
load_dotenv()
base_url_voc = os.getenv("OPENAI_BASE_VOC")
api_key_voc = os.getenv("OPENAI_API_VOC")
# Deployment model names
chat_name = 'gpt-4o-mini'
emb_name = 'text-embedding-3-small'
# Initialising OpenAI client
openai_client = create_openai_client(api_key= api_key_voc, base_url= base_url_voc)
tokenizer = tiktoken.encoding_for_model("gpt-4o-mini")

### Loading dataset

In [18]:
proj_dir = Path(os.getcwd())
df = pd.read_csv(proj_dir / "data" / "2023_fashion_trends_embeddings.csv")
df.head(3)

Unnamed: 0,text,embeddings
0,Title: 7 Fashion Trends That Will Take Over 20...,"[-0.0008604738395661116, 0.02634955383837223, ..."
1,Title: 7 Fashion Trends That Will Take Over 20...,"[0.01805400848388672, 0.049275610595941544, 0...."
2,Title: 7 Fashion Trends That Will Take Over 20...,"[0.0642574205994606, 0.023316336795687675, -0...."


The embeddings are stored as text/string in the DataFrame and need to be converted to lists/arrays

In [19]:
import ast
# Converting the string representations of embeddings to actual lists
df['embeddings'] = df['embeddings'].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) else x)

Checking transformation

In [23]:
type(df[['embeddings']].iloc[0].values[0])

list

### Calculating Cosine Distances based on query

In [24]:
query = "What is the most popular fashion trend about pants in 2023?"
query_emb = get_embedding(text=query, client = openai_client, model=emb_name)

In [25]:
df_sorted = search_text(df=df, embs_query=query_emb, cosine='distance')

In [26]:
df_sorted

Unnamed: 0,text,embeddings,distance
1,Title: 7 Fashion Trends That Will Take Over 20...,"[0.01805400848388672, 0.049275610595941544, 0....",0.328703
58,Title: Spring/Summer 2023 Fashion Trends: 21 E...,"[0.03040272183716297, 0.039167024195194244, 0....",0.386368
3,Title: 7 Fashion Trends That Will Take Over 20...,"[0.030550595372915268, 0.04003358259797096, -0...",0.406843
44,Title: Spring/Summer 2023 Fashion Trends: 21 E...,"[0.022972876206040382, 0.05934659391641617, 0....",0.409787
5,Title: 7 Fashion Trends That Will Take Over 20...,"[0.01975318044424057, 0.05273978039622307, 0.0...",0.427201
...,...,...,...
28,Title: These Are the Spring 2023 Trends Vogue ...,"[0.021780189126729965, 0.024652592837810516, 0...",0.637589
78,Title: Spring/Summer 2023 Fashion Trends: 21 E...,"[0.034109439700841904, -0.016433794051408768, ...",0.640354
34,Title: These Are the Spring 2023 Trends Vogue ...,"[0.05425485223531723, 0.03444666042923927, -0....",0.641840
27,Title: These Are the Spring 2023 Trends Vogue ...,"[0.031974636018276215, 0.0020109834149479866, ...",0.643273


### Prompt Template

In [30]:
system_prompt = "You are an expert fashion trend analyser. Based only on the provided information you must analyse and summarise the trends and provide an accurate answer."

print(f"System Prompt Tokens: {len(tokenizer.encode(system_prompt))}")

System Prompt Tokens: 28


In [41]:
user_prompt = \
""" Answer the question: {}
---Based only on the provided information---
{}
"""

print(f"User Prompt Tokens BEFORE context insertion: {len(tokenizer.encode(user_prompt))}")

User Prompt Tokens BEFORE context insertion: 14


#### Apply token controller function ( fnc: control_chunk_context )

In [42]:
#parameter that control the prompt tokens:
max_token_count = 1000

In [43]:
current_token_count = len(tokenizer.encode(user_prompt)) + len(tokenizer.encode(system_prompt))
# Create context from sorted dataframe according to the max token limit
context = control_chunk_context(
    df_sorted,
    current_token_count,
    max_token_count,
    tokenizer = tokenizer
)

In [46]:
# prompt template params
context_inprompt = "\n----\n".join(context)

user_prompt = user_prompt.format(query, context_inprompt)
print(user_prompt)

 Answer the question: What is the most popular fashion trend about pants in 2023?
---Based only on the provided information---
Title: 7 Fashion Trends That Will Take Over 2023 — Shop Them Now

2023 Fashion Trend: Cargo Pants. Utilitarian wear is in for 2023, which sets the stage for the return of the cargo pant. But these aren't the shapeless, low-rise pants of the Y2K era. For spring, this trend is translated into tailored silhouettes, interesting pocket placements, elevated fabrics like silk and organza, and colors that go beyond khaki and olive.

Source URL: www.refinery29.com

----
Title: Spring/Summer 2023 Fashion Trends: 21 Expert-Approved Looks You Need to See

Every buyer I have spoken to has been most excited by the many pairs of perfectly cut trousers in the spring/summer 2023 collections, which actually should hardly come as a surprise. It's been the year of the trouser after all, and that looks set to continue as designers have become more and more playful with their pants.

In [47]:
print(f"User Prompt Tokens AFTER context insertion: {len(tokenizer.encode(user_prompt))}")

User Prompt Tokens AFTER context insertion: 971


In [48]:
final_prompt = prompt_builder(system_content= system_prompt, user_content_prompt= user_prompt)

additional_options = \
            {
                "temperature": 0,
            }

response, response_full = response_generator(openai_client, chat_model=chat_name, prompts=final_prompt, options=additional_options)

cost_eur = calculate_total_cost(response_usage= response_full.usage,
                                deployment_name= chat_name)

In [49]:
response

'In 2023, the most popular fashion trend regarding pants is the resurgence of cargo pants, characterized by tailored silhouettes and innovative pocket placements, moving away from the traditional low-rise styles of the Y2K era. This trend is part of a broader utilitarian wear movement that emphasizes functionality and style. Additionally, there is a significant focus on various trouser styles, including wide-leg and slouchy fits, which reflect a shift towards more relaxed silhouettes in response to post-lockdown preferences. Denim also plays a crucial role, with baggy and loose-fitting styles gaining popularity, indicating a departure from skinny jeans. Overall, the trends highlight a blend of practicality and comfort, with an emphasis on versatile and stylish designs.'

In [50]:
cost_eur

0.0002511431

In [51]:
response_full

ChatCompletion(id='chatcmpl-B9Hi4a4PTrgaN5ibLOsOxz6Eq57fd', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='In 2023, the most popular fashion trend regarding pants is the resurgence of cargo pants, characterized by tailored silhouettes and innovative pocket placements, moving away from the traditional low-rise styles of the Y2K era. This trend is part of a broader utilitarian wear movement that emphasizes functionality and style. Additionally, there is a significant focus on various trouser styles, including wide-leg and slouchy fits, which reflect a shift towards more relaxed silhouettes in response to post-lockdown preferences. Denim also plays a crucial role, with baggy and loose-fitting styles gaining popularity, indicating a departure from skinny jeans. Overall, the trends highlight a blend of practicality and comfort, with an emphasis on versatile and stylish designs.', refusal=None, role='assistant', audio=None, function_call=

### Demonstrating Performance