# Steps 3 & 4: Querying a Completion Model with a Custom Text Prompt

In [2]:
import tiktoken

In [4]:
tokenizer = tiktoken.get_encoding('cl100k_base')

In [5]:
tokenizer

<Encoding 'cl100k_base'>

In [6]:
tokenizer.encode('This is a question')

[2028, 374, 264, 3488]

# Build the Custom Text Prompt:

In [9]:
question = 'When did Russia invade Ukraine?'
tokenized = tokenizer.encode(question)
tokenized, len(tokenized)

([4599, 1550, 8524, 75272, 19278, 30], 6)

# Composing a Prompt

In [10]:
prompt_template = """
Answer the question based on the context below, and if the question
can't be answered based on the context, say "I don't know"

Context: 

{}

---

Question: {}
Answer:"""

In [23]:
question = 'When did Russia invade Ukraine?'

In [24]:
print(prompt_template.format('context', question))


Answer the question based on the context below, and if the question
can't be answered based on the context, say "I don't know"

Context: 

context

---

Question: When did Russia invade Ukraine?
Answer:


In [43]:
max_token_count = 1000

In [26]:
tokenized_question = tokenizer.encode(question)
tokenized_prompt = tokenizer.encode(prompt_template)

In [27]:
len(tokenized_question), len(tokenized_prompt)

(6, 41)

In [28]:
current_token_count = len(tokenized_question) + len(tokenized_prompt)
current_token_count

47

In [30]:
import pandas as pd

df = pd.read_csv('distances_sorted.csv')
df

Unnamed: 0,text,embeddings,distances
0,March 2 – Russian invasion of Ukraine: Russia ...,"[-5.313744259183295e-05, -0.019540982320904732...",0.107204
1,April 3 – Russian invasion of Ukraine: As Russ...,"[-0.012207494117319584, -0.012519340962171555,...",0.111251
2,November 11 – Russian invasion of Ukraine: Ukr...,"[-0.012295315973460674, -0.014077062718570232,...",0.115467
3,September 21 – Russian invasion of Ukraine: Fo...,"[-0.025522246956825256, -0.022120986133813858,...",0.116897
4,October 29 – Russian invasion of Ukraine: In r...,"[-0.00995244737714529, -0.030325081199407578, ...",0.117591
...,...,...,...
174,2022 (MMXXII) was a common year starting on S...,"[-0.0029914826154708862, -0.019716661423444748...",0.291345
175,March 31 – Expo 2020 closes in Dubai after a 6...,"[-0.0032101301476359367, -0.04666922986507416,...",0.292565
176,"November 11 – The cryptocurrency exchange FTX,...","[0.002234421204775572, -0.025721479207277298, ...",0.293966
177,November 20 – 2022 Nepalese general election: ...,"[-0.00431521050632, -0.0008002328686416149, -0...",0.294986


In [45]:
context = []
current_token_count = len(tokenized_question) + len(tokenized_prompt)

for text in df.text.values:
    text_token_count = len(tokenizer.encode(text))
    try:
       current_token_count += text_token_count
    except:
        print(text_token_count)

    if current_token_count <= max_token_count:
        context.append(text)
    else:
        break

In [46]:
context

['March 2 – Russian invasion of Ukraine: Russia seizes its first large city, the Black Sea port of Kherson, as shelling intensifies across many parts of Ukraine, including civilian areas.',
 "April 3 – Russian invasion of Ukraine: As Russia's forces retreat from areas near Kyiv, it is accused by Ukraine of war crimes, amid mounting evidence of indiscriminate civilian killings, including the Bucha massacre.",
 'November 11 – Russian invasion of Ukraine: Ukrainian forces recapture Kherson, the only regional capital to be taken by Russia since the start of the war.',
 'September 21 – Russian invasion of Ukraine: Following a major counteroffensive by Ukraine in the east of the country, Putin announces a partial mobilisation of Russia and threatens nuclear retaliation, saying "this is not a bluff".',
 'October 29 – Russian invasion of Ukraine: In response to an alleged Ukrainian drone attack against the Black Sea Fleet, Russia withdraw from a U.N.-brokered deal on the shipment of grain, whi

In [48]:
print(prompt_template.format(context, question))


Answer the question based on the context below, and if the question
can't be answered based on the context, say "I don't know"

Context: 

['March 2 – Russian invasion of Ukraine: Russia seizes its first large city, the Black Sea port of Kherson, as shelling intensifies across many parts of Ukraine, including civilian areas.', "April 3 – Russian invasion of Ukraine: As Russia's forces retreat from areas near Kyiv, it is accused by Ukraine of war crimes, amid mounting evidence of indiscriminate civilian killings, including the Bucha massacre.", 'November 11 – Russian invasion of Ukraine: Ukrainian forces recapture Kherson, the only regional capital to be taken by Russia since the start of the war.', 'September 21 – Russian invasion of Ukraine: Following a major counteroffensive by Ukraine in the east of the country, Putin announces a partial mobilisation of Russia and threatens nuclear retaliation, saying "this is not a bluff".', 'October 29 – Russian invasion of Ukraine: In response t

In [50]:
print(prompt_template.format('\n\n###\n\n'.join(context), question))


Answer the question based on the context below, and if the question
can't be answered based on the context, say "I don't know"

Context: 

March 2 – Russian invasion of Ukraine: Russia seizes its first large city, the Black Sea port of Kherson, as shelling intensifies across many parts of Ukraine, including civilian areas.

###

April 3 – Russian invasion of Ukraine: As Russia's forces retreat from areas near Kyiv, it is accused by Ukraine of war crimes, amid mounting evidence of indiscriminate civilian killings, including the Bucha massacre.

###

November 11 – Russian invasion of Ukraine: Ukrainian forces recapture Kherson, the only regional capital to be taken by Russia since the start of the war.

###

September 21 – Russian invasion of Ukraine: Following a major counteroffensive by Ukraine in the east of the country, Putin announces a partial mobilisation of Russia and threatens nuclear retaliation, saying "this is not a bluff".

###

October 29 – Russian invasion of Ukraine: In 

# Send Custom Text Prompt to Completion Model

Using the prompt stringweu created,we will  query an OpenAI Completion model to get an answer. Specify a max_tokens of 150low

In [51]:
import openai

openai.api_key = ''

In [53]:
response = openai.Completion.create(
    model='text-davinci-003', 
    prompt=prompt_template.format('\n\n###\n\n'.join(context), question)
)

In [54]:
response['choices'][0]['text']

' February 21–24, 2020.'