In [1]:
from openai import OpenAI
import os
from dotenv import load_dotenv
import json
from sklearn.metrics.pairwise import cosine_similarity
load_dotenv()

True

# Get LLM response

In [27]:
client = OpenAI(
            api_key=os.getenv("RUNPOD_TOKEN"),
            base_url=os.getenv("RUNPOD_CHATBOT_URL"),
        )
model_name = os.getenv("MODEL_NAME")

In [11]:
response = client.chat.completions.create(
        model=model_name,
        messages=[{'role':'user','content':"Which is the closest earth like planet?"}],
        temperature=0.0,
        top_p=0.8,
        max_tokens=2000,
    )

In [12]:
response

ChatCompletion(id='chatcmpl-e6bb7472c7be4184a8a19d41304f58cb', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content="The closest Earth-like planet to our solar system is Proxima b, which orbits Proxima Centauri, the closest star to the Sun. Proxima b is a terrestrial planet with a mass similar to that of Earth and orbits within the habitable zone of its star, where liquid water could potentially exist on its surface.\n\nProxima b was discovered in 2016 by the European Southern Observatory (ESO) using the radial velocity method. The planet's mass is estimated to be around 1.3 times that of Earth, and its surface temperature could be suitable for liquid water to exist.\n\nHowever, it's worth noting that Proxima b is a bit larger than Earth and has a more eccentric orbit, which could affect its climate and habitability. Additionally, the star Proxima Centauri is a red dwarf, which is a different type of star than the Sun, and it emits more ul

In [13]:
response.choices[0].message.content

"The closest Earth-like planet to our solar system is Proxima b, which orbits Proxima Centauri, the closest star to the Sun. Proxima b is a terrestrial planet with a mass similar to that of Earth and orbits within the habitable zone of its star, where liquid water could potentially exist on its surface.\n\nProxima b was discovered in 2016 by the European Southern Observatory (ESO) using the radial velocity method. The planet's mass is estimated to be around 1.3 times that of Earth, and its surface temperature could be suitable for liquid water to exist.\n\nHowever, it's worth noting that Proxima b is a bit larger than Earth and has a more eccentric orbit, which could affect its climate and habitability. Additionally, the star Proxima Centauri is a red dwarf, which is a different type of star than the Sun, and it emits more ultraviolet and X-ray radiation, which could have an impact on the planet's atmosphere.\n\nOther notable Earth-like planets that are close to our solar system includ

In [24]:

def get_chatbot_response(client,model_name,messages,temperature=0):
    input_messages = []
    for message in messages:
        input_messages.append({"role": message["role"], "content": message["content"]})

    response = client.chat.completions.create(
        model=model_name,
        messages=input_messages,
        temperature=temperature,
        top_p=0.8,
        max_tokens=2000,
    ).choices[0].message.content
    
    return response


In [15]:
messages = [{'role':'user','content':"What's the capital of Taiwan?"}]
response = get_chatbot_response(client,model_name,messages)
print(response)

The capital of Taiwan is Taipei.


# Prompt Engineering Experiments

## How to get structured output?

In [17]:
# Using system prompt to make LLMs/Chatbot behave a certain way. It can be used as a role.
system_prompt = """
You are a helpful assistant that answers questions about capitals of countries.
Your output should be in a structured JSON format exactly like the one below. You aren't allowed to write anything other than the JSON object:
[
    {
     "country": the country that you will get the capital of
     "capital": the capital of the country stated   
    }
]
"""
messages = [{"role":"system", "content":system_prompt}]
messages.append({"role":"user", "content": "What's the capital of Albania?"})
response = get_chatbot_response(client, model_name, messages)
print(response)

[
    {
     "country": "Albania",
     "capital": "Tirana"
    }
]


In [18]:
type(response)
# Since its a string, we can now cast it into a list or dictionary.

str

In [20]:
json_response = json.loads(response)
json_response

[{'country': 'Albania', 'capital': 'Tirana'}]

In [None]:
type(json_response)

list

In [22]:
type(json_response[0]), json_response[0]['capital']

(dict, 'Tirana')

## Structuring the Inputs

In [25]:
user_input = """
Get me the capitals of the following countries:
```
1. Russia 
2. Saudi Arabia
3. Colombia
```
"""

In [26]:
messages = [{"role":"system", "content":system_prompt}]
messages.append({"role":"user", "content": user_input})
response = get_chatbot_response(client, model_name, messages)
print(response)

[
    {
     "country": "Russia",
     "capital": "Moscow"
    },
    {
     "country": "Saudi Arabia",
     "capital": "Riyadh"
    },
    {
     "country": "Colombia",
     "capital": "Bogotá"
    }
]


In [27]:
json_response = json.loads(response)
json_response

[{'country': 'Russia', 'capital': 'Moscow'},
 {'country': 'Saudi Arabia', 'capital': 'Riyadh'},
 {'country': 'Colombia', 'capital': 'Bogotá'}]

## Giving model the time to think i.e Chain of Thought

In [None]:
# Refer this paper: https://arxiv.org/pdf/2205.11916
# Chain of thought increases accuracy and performance.

In [None]:
# A simple example without CoT
user_prompt = """
Calculate the result of this equation: 1+3
Your output should be in a structured JSON format exactly like the one below. You aren't allowed to write anything other than the JSON object:
{
 "result": The final number resulting from solving the equation above   
}
"""
messages = [{"role":"user", "content":user_prompt}]
response = get_chatbot_response(client, model_name, messages)
print(response)

{
  "result": 4
}


In [None]:
# Lets try a problem that is a little more complicated...
678/27 + 3876 - 2672*8 +97837/578*9

-15951.475394079202

In [32]:
user_prompt = """
Calculate the result of this equation: 678/27+3876-2672*8+97837/578*9
Your output should be in a structured JSON format exactly like the one below. You aren't allowed to write anything other than the JSON object:
{
 "result": The final number resulting from solving the equation above   
}
"""
messages = [{"role":"user", "content":user_prompt}]
response = get_chatbot_response(client, model_name, messages)
print(response)

{
  "result": 0.5
}


In [None]:
# So the model isn't able to solve this equation as of now and the margin of error is huge.
# Let's try adding the steps to the answering process.
user_prompt = """
Calculate the result of this equation: 678/27+3876-2672*8+97837/578*9
Your output should be in a structured JSON format exactly like the one below. You aren't allowed to write anything other than the JSON object:
{
 steps: This is where you solve the equation bit by bit following the PEMDAS rules. Show your strategy and calculate each step leading to the final answer. Feel free to write in free text.
 result: The final number resulting from solving the equation above   
}
"""
messages = [{"role":"user", "content":user_prompt}]
response = get_chatbot_response(client, model_name, messages)
print(response)

# Margin of error improves significantly, which can be improved further by manipulatin the model to think and rationalize in better ways and guiding its thought process.

{
  "steps": "First, we need to follow the PEMDAS rules: Parentheses, Exponents, Multiplication and Division (from left to right), Addition and Subtraction (from left to right). 
  1. Divide 678 by 27: 678 / 27 = 25.11
  2. Multiply 2672 by 8: 2672 * 8 = 21376
  3. Divide 97837 by 578: 97837 / 578 = 169.5
  4. Multiply 169.5 by 9: 169.5 * 9 = 1524.5
  5. Add 25.11 and 3876: 25.11 + 3876 = 3901.11
  6. Subtract 21376 from 3901.11: 3901.11 - 21376 = -17474.89
  7. Subtract 1524.5 from -17474.89: -17474.89 - 1524.5 = -19000.39",
  "result": -19000.39
}


# Retrieval Augmented Generation - RAGs

In [None]:
"""
RAG enhances LLMs by integrating external data sources, allowing the models to generate more accurate and reliable text. 
The working principle of RAG involves two main components: a retriever and a generator.

The retriever component acts as a search engine, sifting through vast amounts of external data to find information relevant to a user’s query. 
It retrieves the most pertinent documents or text passages that can serve as a factual foundation for the generative component.
The generator component then leverages the retrieved information alongside its own internal knowledge to craft a response. 
The retrieved text provides context and ensures the generated response is grounded in factual accuracy.

RAG addresses several challenges faced by traditional LLMs, such as the lack of provenance and the issue of hallucination, 
by providing a clear audit trail and ensuring more factual consistency.
This makes RAG particularly useful for applications where real-time, accurate, and traceable data is crucial, 
such as in enterprise use-cases, customer service, and legal documentation.

In terms of use cases, RAG can be applied in various scenarios, including creating customer service chatbots that can refer to manuals or FAQs, 
generating informative product descriptions, and creating social media marketing posts that utilize industry trends or customer sentiment analysis.
RAG can be implemented in two main types: simple (or naive) and complex. Simple RAG systems handle straightforward queries needing direct answers, 
while complex RAG systems are designed for intricate queries that require piecing together information from multiple sources.
"""

In [35]:
# Let's try something: Since Iphone 16 was launcehd after the llama 3.1 model was trained.....
user_prompt = """
What's new in the iphone 16?
"""
messages = [{"role":"user", "content":user_prompt}]
response = get_chatbot_response(client, model_name, messages)
print(response)

Since I'm an AI trained until December 2023, I don't have information on the iPhone 16 as it has not been officially announced by Apple yet. However, I can provide you with some general information on the latest iPhone models and their features.

The latest iPhone models available in the market are the iPhone 14 series, which includes the iPhone 14, iPhone 14 Plus, iPhone 14 Pro, and iPhone 14 Pro Max. These models come with various features such as:

1. Improved cameras with a new 48MP main camera and a 12MP front camera.
2. A faster A16 Bionic chip for enhanced performance.
3. Longer battery life and faster charging capabilities.
4. A new 6.7-inch display on the iPhone 14 Pro and iPhone 14 Pro Max.
5. A 120Hz refresh rate for smoother visuals.
6. Improved water resistance and durability.

As for the iPhone 16, it's likely to be announced in the future, and I'll be happy to provide you with more information once it's officially released.

However, based on rumors and leaks, here are s

In [2]:
# Let's save a basic Iphone 16 description:
iphone_16 = """
The iPhone 16 introduces several exciting updates, making it one of Apple's most advanced smartphones to date. It features a larger 6.1-inch display for the base model and a 6.7-inch screen for the iPhone 16 Plus, with thinner bezels and a more durable Ceramic Shield. The iPhone 16 Pro and Pro Max boast even larger displays, measuring 6.3 and 6.9 inches respectively, offering the thinnest bezels seen on any Apple product so far.

Powered by the new A18 chip (A18 Pro for the Pro models), these phones deliver significant performance improvements, with enhanced neural engine capabilities, faster GPU for gaming, and machine learning tasks. The camera systems are also upgraded, with the base iPhone 16 sporting a dual-camera setup with a 48MP main sensor. The Pro models offer a 48MP Ultra Wide and 5x telephoto camera, enhanced by Apple’s "Camera Control" button for more flexible photography options.

Apple also introduced advanced audio features like "Audio Mix," which uses machine learning to separate background sounds from speech, allowing for more refined audio capture during video recording. Battery life has been extended, especially in the iPhone 16 Pro Max, which is claimed to have the longest-lasting battery of any iPhone.

Additionally, Apple has switched to USB-C for faster charging and data transfer, and the Pro models now support up to 2x faster video encoding. The starting prices remain consistent with previous generations, with the iPhone 16 starting at $799, while the Pro models start at $999
"""

In [None]:
user_prompt = """
{iphone_16}
What's new in the iphone 16?
"""
messages = [{"role":"user", "content":user_prompt}]
response = get_chatbot_response(client, model_name, messages)
print(response)

# Providing context at the beginning of the prompt improves our answer a lot, 
# but we can't put information like this in all prompts as we have a token limit and the data we might need to put in exceeds this limit.
# And we also need to put the most relevant part of the information in the system for it to work efficiently.
# We use embeddings to determine the relevance and we will use the serverless endpoint for embeddings in the Runpod itself

I'm assuming you're referring to the iPhone 16, which is a hypothetical device as Apple has not officially announced any iPhone model by this name. However, based on rumors and leaks, here are some potential features that might be included in the iPhone 16:

1. **Improved Cameras**: The iPhone 16 might feature a quad-camera setup with a 48MP primary sensor, a 12MP front camera, and enhanced features like 8K video recording, improved low-light performance, and advanced portrait mode capabilities.
2. **Faster Processor**: The iPhone 16 could be powered by Apple's A17 Bionic chip, which would provide a significant boost in performance and efficiency.
3. **Longer Battery Life**: The iPhone 16 might feature a larger battery, which would provide all-day battery life and potentially even two-day battery life.
4. **Enhanced Display**: The iPhone 16 could feature a higher refresh rate display (up to 120Hz), improved color accuracy, and a higher screen resolution.
5. **New Design**: The iPhone 1

## Automatically extracting context data from a database.

In [3]:
samsung_s23 = """
The Samsung Galaxy S23 brings some incremental but notable upgrades to its predecessor, the Galaxy S22. It features the Snapdragon 8 Gen 2 processor, a powerful chip optimized for the S23 series, delivering enhanced performance, especially for gaming and multitasking. This chip ensures top-tier speed and efficiency across all models, from the base S23 to the larger S23+ and S23 Ultra​.

In terms of design, the S23's camera module has been streamlined by removing the raised metal contour around the cameras, creating a cleaner, sleeker look. It also sports the same 6.1-inch 120Hz AMOLED display, protected by tougher Gorilla Glass Victus 2, making it more resistant to scratches and drops​.

The S23 Ultra stands out with its 200MP main camera, offering impressive photo clarity, especially in low-light conditions. The selfie camera across the series has been updated to a 12MP sensor, resulting in sharper selfies. The Ultra model also includes productivity tools such as the S-Pen, which remains an essential feature for note-taking and creative tasks​.

Battery life is solid, with the S23 Ultra featuring a 5000mAh battery that lasts comfortably through a day of heavy use. However, charging speeds still lag behind some competitors, with 45W wired charging, which is slower than other brands offering up to 125W charging​.

Overall, the Galaxy S23 series enhances performance, durability, and camera quality, making it a strong contender for users seeking a high-performance flagship.
"""

In [4]:
data = [iphone_16, samsung_s23]

In [5]:
user_prompt = """
What's new in the iphone 16?
"""

In [6]:
embedding_client = OpenAI(
    api_key=os.getenv('RUNPOD_TOKEN'),
    base_url=os.getenv('RUNPOD_EMBEDDING_URL'),
    
)

In [7]:
output = embedding_client.embeddings.create(input=user_prompt, model='BAAI/bge-small-en-v1.5')

In [8]:
output

CreateEmbeddingResponse(data=[Embedding(embedding=[-0.052291225641965866, -0.029946113005280495, 0.060258541256189346, -0.006330351810902357, -0.031151894479990005, -0.04475127533078194, 0.018254613503813744, 0.03483029082417488, -0.017460934817790985, 0.046674422919750214, 0.05262701213359833, 0.016484098508954048, -0.06703533977270126, -0.00667377095669508, 0.09560777246952057, 0.022329850122332573, 0.1178307831287384, -0.15360738337039948, -0.07637632638216019, 0.01759830303490162, -0.040080782026052475, -0.022390902042388916, -0.03943973407149315, -0.032449252903461456, 0.03171662986278534, 0.05995327979326248, 0.0024668911937624216, -0.009959143586456776, -0.024039311334490776, -0.1387106478214264, -0.003941684029996395, 0.0004314197867643088, 0.019078819081187248, 0.02228406071662903, -0.0026862977538257837, -0.03663133084774017, -0.00576180312782526, 0.03138083964586258, -0.01083676889538765, 0.059678543359041214, 0.03101452812552452, 0.09286042302846909, -0.10281194001436234, -

In [9]:
def get_embedding(embedding_client,model_name,text_input):
    output = embedding_client.embeddings.create(input = text_input,model=model_name)
    
    embeddings = []
    for embedding_object in output.data:
        embeddings.append(embedding_object.embedding)

    return embeddings

In [10]:
output = get_embedding(embedding_client,'BAAI/bge-small-en-v1.5',user_prompt)
# model_name here doesn't matter as OpenAI only handles openai models and treats all other models the same way.

In [11]:
output

[[-0.052291225641965866,
  -0.029946113005280495,
  0.060258541256189346,
  -0.006330351810902357,
  -0.031151894479990005,
  -0.04475127533078194,
  0.018254613503813744,
  0.03483029082417488,
  -0.017460934817790985,
  0.046674422919750214,
  0.05262701213359833,
  0.016484098508954048,
  -0.06703533977270126,
  -0.00667377095669508,
  0.09560777246952057,
  0.022329850122332573,
  0.1178307831287384,
  -0.15360738337039948,
  -0.07637632638216019,
  0.01759830303490162,
  -0.040080782026052475,
  -0.022390902042388916,
  -0.03943973407149315,
  -0.032449252903461456,
  0.03171662986278534,
  0.05995327979326248,
  0.0024668911937624216,
  -0.009959143586456776,
  -0.024039311334490776,
  -0.1387106478214264,
  -0.003941684029996395,
  0.0004314197867643088,
  0.019078819081187248,
  0.02228406071662903,
  -0.0026862977538257837,
  -0.03663133084774017,
  -0.00576180312782526,
  0.03138083964586258,
  -0.01083676889538765,
  0.059678543359041214,
  0.03101452812552452,
  0.092860423

In [12]:
len(output[0])

384

In [13]:
user_prompt_embeddings = get_embedding(embedding_client,'BAAI/bge-small-en-v1.5',user_prompt)[0]

In [14]:
user_prompt_embeddings

[-0.052291225641965866,
 -0.029946113005280495,
 0.060258541256189346,
 -0.006330351810902357,
 -0.031151894479990005,
 -0.04475127533078194,
 0.018254613503813744,
 0.03483029082417488,
 -0.017460934817790985,
 0.046674422919750214,
 0.05262701213359833,
 0.016484098508954048,
 -0.06703533977270126,
 -0.00667377095669508,
 0.09560777246952057,
 0.022329850122332573,
 0.1178307831287384,
 -0.15360738337039948,
 -0.07637632638216019,
 0.01759830303490162,
 -0.040080782026052475,
 -0.022390902042388916,
 -0.03943973407149315,
 -0.032449252903461456,
 0.03171662986278534,
 0.05995327979326248,
 0.0024668911937624216,
 -0.009959143586456776,
 -0.024039311334490776,
 -0.1387106478214264,
 -0.003941684029996395,
 0.0004314197867643088,
 0.019078819081187248,
 0.02228406071662903,
 -0.0026862977538257837,
 -0.03663133084774017,
 -0.00576180312782526,
 0.03138083964586258,
 -0.01083676889538765,
 0.059678543359041214,
 0.03101452812552452,
 0.09286042302846909,
 -0.10281194001436234,
 -0.02255

In [15]:
data_embeddings = [get_embedding(embedding_client,'BAAI/bge-small-en-v1.5',x)[0] for x in data]

In [16]:
len(data_embeddings)

2

In [17]:
data_similarity =  cosine_similarity([user_prompt_embeddings],data_embeddings )

In [None]:
data_similarity
# Higher the score more the similarity.

array([[0.74291352, 0.53812339]])

In [19]:
closest_entry_index = data_similarity.argmax()
closest_entry_index

0

In [20]:
data[closest_entry_index]

'\nThe iPhone 16 introduces several exciting updates, making it one of Apple\'s most advanced smartphones to date. It features a larger 6.1-inch display for the base model and a 6.7-inch screen for the iPhone 16 Plus, with thinner bezels and a more durable Ceramic Shield. The iPhone 16 Pro and Pro Max boast even larger displays, measuring 6.3 and 6.9 inches respectively, offering the thinnest bezels seen on any Apple product so far.\n\nPowered by the new A18 chip (A18 Pro for the Pro models), these phones deliver significant performance improvements, with enhanced neural engine capabilities, faster GPU for gaming, and machine learning tasks. The camera systems are also upgraded, with the base iPhone 16 sporting a dual-camera setup with a 48MP main sensor. The Pro models offer a 48MP Ultra Wide and 5x telephoto camera, enhanced by Apple’s "Camera Control" button for more flexible photography options.\n\nApple also introduced advanced audio features like "Audio Mix," which uses machine l

In [21]:
user_prompt_with_data = f"""
{data[closest_entry_index]}
{user_prompt}
"""

In [22]:
user_prompt_with_data

'\n\nThe iPhone 16 introduces several exciting updates, making it one of Apple\'s most advanced smartphones to date. It features a larger 6.1-inch display for the base model and a 6.7-inch screen for the iPhone 16 Plus, with thinner bezels and a more durable Ceramic Shield. The iPhone 16 Pro and Pro Max boast even larger displays, measuring 6.3 and 6.9 inches respectively, offering the thinnest bezels seen on any Apple product so far.\n\nPowered by the new A18 chip (A18 Pro for the Pro models), these phones deliver significant performance improvements, with enhanced neural engine capabilities, faster GPU for gaming, and machine learning tasks. The camera systems are also upgraded, with the base iPhone 16 sporting a dual-camera setup with a 48MP main sensor. The Pro models offer a 48MP Ultra Wide and 5x telephoto camera, enhanced by Apple’s "Camera Control" button for more flexible photography options.\n\nApple also introduced advanced audio features like "Audio Mix," which uses machine

In [28]:

messages = [{"role":"user", "content":user_prompt_with_data}]
response = get_chatbot_response(client, model_name, messages)
print(response)

According to the information provided, the new features and updates in the iPhone 16 include:

1. Larger display sizes:
   - Base model: 6.1-inch display
   - iPhone 16 Plus: 6.7-inch display
   - iPhone 16 Pro: 6.3-inch display
   - iPhone 16 Pro Max: 6.9-inch display

2. Thinner bezels and a more durable Ceramic Shield

3. New A18 chip (A18 Pro for Pro models) for improved performance, including:
   - Enhanced neural engine capabilities
   - Faster GPU for gaming
   - Machine learning tasks

4. Upgraded camera systems:
   - Base iPhone 16: Dual-camera setup with a 48MP main sensor
   - Pro models: 48MP Ultra Wide and 5x telephoto camera, with Apple's "Camera Control" button

5. Advanced audio features:
   - "Audio Mix" for refined audio capture during video recording

6. Extended battery life, especially in the iPhone 16 Pro Max

7. Switch to USB-C for faster charging and data transfer

8. Support for up to 2x faster video encoding in Pro models

9. Starting prices remain consistent 

In [29]:
type(response)

str