# Question-Answer System

In [1]:
import pandas as pd
import numpy as np
import openai
from openai.embeddings_utils import distances_from_embeddings

In [2]:
df = pd.read_csv('processed/embeddings.csv', index_col=0)

# Convert embeddings to numpy array
df['embeddings'] = df['embeddings'].apply(eval).apply(np.array)
df.head()

Unnamed: 0,text,n_tokens,embeddings
0,. View IT - Dubai's number one property websi...,331,"[-0.000547883624676615, 3.8956197386141866e-05..."
1,Beds 1 2 3 4 5 6 ViewIT ...,486,"[0.004306596703827381, 0.018169928342103958, -..."
2,. View IT - Dubai's number one property websi...,331,"[-0.0005997214466333389, 2.804505356834852e-06..."
3,Beds 1 2 3 4 5 6 ViewIT ...,486,"[0.004306596703827381, 0.018169928342103958, -..."
4,about#contact form. View IT - Dubai's number ...,488,"[-0.005938783288002014, -0.014180973172187805,..."


### Convert questions (prompts) into embeddings

In [3]:
def create_context(question, df, maxlen=1800, size="ada"):
    '''
    Create a context for a question by finding the most similar context from the DataFrame
    '''

    # Get embeddings for question
    q_embeddings = openai.Embedding.create(input=question, engine="text-embedding-ada-002")['data'][0]['embedding']

    # Get distance from embeddings
    df['distances'] = distances_from_embeddings(q_embeddings, df['embeddings'].values, distance_metric='cosine')

    returns = []
    cur_len = 0

    # Sort by distance and add text to context till context is too long
    for i, row in df.sort_values('distances', ascending=True).iterrows():

        # Add length of text to current length
        cur_len += row['n_tokens'] + 4

        # If context too long, break
        if cur_len > maxlen:
            break

        # Else add it to the text being returned
        returns.append(row['text'])

    # Return context
    return '\n\n###\n\n'.join(returns)

In [4]:
def answer_question(
        df=df,
        model= "text-davinci-003",
        question = "Hi, please introduce yourself", # This is the default question
        max_len = 1800,
        size = 'ada',
        debug = False,
        max_tokens = 500,
        stop_sequence = None
):
    '''
    Answer a question based on the most similar context from DataFrame texts
    '''
    context = create_context(
        question,
        df,
        maxlen=max_len,
        size=size
    )

    # if debug enabled, print raw response
    if debug:
        print('Context:\n' + context)
        print('\n\n')

    try:
        # create Completion using question and context
        response = openai.Completion.create(
            prompt= f"""You are a virtual property broker for the real estate company 'ViewIt'. Be friendly and welcoming and answer the question based on the context below.
            
            Context: {context}
            
            ---
            
            Question: {question}
            Answer: """,
            temperature=0,
            max_tokens=max_tokens,
            top_p=1,
            frequency_penalty=0,
            presence_penalty=0,
            stop=stop_sequence,
            model=model
        )
        return response['choices'][0]['text'].strip()
    except Exception as e:
        print(e)
        return ''

In [5]:
answer_question(df, question='What kind of properties are available?')

'We have a wide range of properties available for sale and rent, ranging from apartments, townhouses, and villas. You can find properties with a minimum rent of 20,000 and a maximum rent of 1,000,000, and with a minimum of 1 bedroom and a maximum of 6 bedrooms.'