In [4]:
!pip install --quiet openai

# ChatBot

In [39]:
import openai
from openai import AzureOpenAI

import os
import requests

from dotenv import load_dotenv

In [40]:
load_dotenv('/home/sagemaker-user/user-default-efs/CLONED_REPOS/LLM-World/.env')

def my_gpt(model, ask_a_question):
    api_response = requests.get(os.getenv('AZURE_OPENAI_BASEURL'))
    payload = api_response.json()

    myClient = AzureOpenAI(
        api_key = os.getenv('AZURE_OPENAI_KEY'),
        api_version = '2024-12-01-preview',
        azure_endpoint = payload['nonprod'][model][0]['endpoint']
    )

    convo = []
    convo.append({'role':'user','content':ask_a_question})
    response = myClient.chat.completions.create(
    model = model,
    max_tokens = 30000,
    messages = convo,
    temperature = 0)

    reply = response.choices[0].message.content
    convo.append({'role':'assistant','content':reply})

    return convo

In [41]:
response = requests.get(os.getenv('AZURE_OPENAI_BASEURL'))
returned_payload = response.json()

list_of_models = [models for models in enumerate(returned_payload['nonprod'].keys())]
print(list_of_models)

[(0, 'dall-e-3'), (1, 'gpt-35-turbo'), (2, 'gpt-35-turbo-instruct'), (3, 'gpt-4-turbo'), (4, 'gpt-4.1'), (5, 'gpt-4.1-mini'), (6, 'gpt-4.1-nano'), (7, 'gpt-4.5'), (8, 'gpt-4o-audio'), (9, 'gpt-4o-global'), (10, 'gpt-4o-mini-global'), (11, 'gpt-4o-regional'), (12, 'gpt-image-1'), (13, 'o1'), (14, 'o1-mini'), (15, 'o3'), (16, 'o3-mini'), (17, 'o4-mini'), (18, 'text-embedding-3-large'), (19, 'text-embedding-3-small'), (20, 'text-embedding-ada-002')]


In [42]:
model_index = 4

mymodel = list_of_models[model_index][1]
print(mymodel)

gpt-4.1


In [43]:
my_gpt(mymodel,'How far is saturn from the sun?')


[{'role': 'user', 'content': 'How far is saturn from the sun?'},
 {'role': 'assistant',
  'content': 'Saturn is, on average, about **1.43 billion kilometers** (or **886 million miles**) away from the Sun. This distance is also commonly expressed as **9.5 astronomical units (AU)**, where 1 AU is the average distance from the Earth to the Sun (about 150 million kilometers or 93 million miles).\n\nKeep in mind that Saturn’s distance from the Sun varies slightly because its orbit is not a perfect circle, but this average value is widely used.'}]

In [44]:
list_of_questions = ['Where are kangaroos mostly found?', 'What countries are best for growing apples?', 'Who let the dogs out?']

[my_gpt(mymodel, question) for question in list_of_questions]

[[{'role': 'user', 'content': 'Where are kangaroos mostly found?'},
  {'role': 'assistant',
   'content': 'Kangaroos are **mostly found in Australia**. They are native to the Australian continent and are commonly seen in a variety of habitats, including grasslands, forests, savannas, and bushland. Some species of kangaroos can also be found in **Tasmania** and **New Guinea**. However, the vast majority of kangaroos live in Australia, where they are a well-known symbol of the country.'}],
 [{'role': 'user', 'content': 'What countries are best for growing apples?'},
  {'role': 'assistant',
   'content': 'The best countries for growing apples typically have **temperate climates** with cold winters and moderate summers, as apple trees require a period of winter chill to produce fruit. The following countries are renowned for their apple production, both in terms of quantity and quality:\n\n### 1. **China**\n- **World’s largest producer** (by far).\n- Major apple-growing regions: Shandong, 

# Fine-tuning Directed Preference Optimization (DPO)

In [49]:
from openai import OpenAI
import json

In [50]:
training_data = [{
  "input": {
    "messages": [
      {
        "role": "user",
        "content": "Hello, can you tell me what is a vector space?"
      }
    ],
    "tools": [],
    "parallel_tool_calls": 'true'
  },
  "preferred_output": [
    {
      "role": "assistant",
      "content": "A vector space in linear algebra is a set of objects called vectors, where two operations (vector addition and scalar multiplication) are defined and satisfy specific axioms. Essentially, it's a mathematical structure where vectors can be added and scaled while maintaining certain properties."
    }
  ],
  "non_preferred_output": [
    {
      "role": "assistant",
      "content": "A mathematical object that has both magnitude (or length) and direction."
    }
  ]
}]

file_path = '/home/sagemaker-user/user-default-efs/CLONED_REPOS/LLM-World/Files/fine_tune_data.jsonl'

with open(file_path, 'w') as f:
  for entry in training_data:
    f.write(json.dumps(entry)+'\n')

In [None]:
client = OpenAI()

trainin_file = client.files.create( file=open(file_path, 'rb'), purpose='fine-tune')

job = client.fine_tuning.jobs.create(
    training_file = trainin_file.id,
    model = mymodel,
    method={
        'type':'dpo',
        'dpo':{
            'hyperparameters':{'beta':0.1},
        },
    },
)

In [None]:
client.fine_tuning.jobs.list()

In [None]:
latest_job = jobs.data[0]

print(latest_job.fine_tuned_model)

In [None]:
# Eval fine-tuned model

model_response = client.chat.completions.create(
    model = '',
    messages = [
        {'role': 'user', 'content': 'Hi, can you describe what a vector space is?'}
    ]
)

print(model_response.choices[0])
