In [1]:
import json

In [2]:
with open('documents.json', 'rt') as f:
    docs_raw= json.load(f)

In [3]:
documents = []

for course_dict in docs_raw:
    for doc in course_dict['documents']:
        doc['course'] = course_dict['course']
        documents.append(doc)

In [4]:
documents[0]

{'text': "The purpose of this document is to capture frequently asked technical questions\nThe exact day and hour of the course will be 15th Jan 2024 at 17h00. The course will start with the first  “Office Hours'' live.1\nSubscribe to course public Google Calendar (it works from Desktop only).\nRegister before the course starts using this link.\nJoin the course Telegram channel with announcements.\nDon’t forget to register in DataTalks.Club's Slack and join the channel.",
 'section': 'General course-related questions',
 'question': 'Course - When will the course start?',
 'course': 'data-engineering-zoomcamp'}

In [5]:
q = 'can I enroll the course even it has already started?'

In [6]:
def search(query, doc):
    boost = {'question': 3.0, 'section': .5}

    result = index.search(
        query = query,
        filter_dict={'course':'data-engineering-zoomcamp'},
        boost_dict= boost,
        num_results= 10 
    )

    return result

In [7]:
def build_prompt(query, result):

    prompt_temp = '''
    you're a course teaching assistant. Answer the QUESTION based on the CONTEXT.
    Use only the facts from the CONTEXT when answering the QUESTION.
    
    QUESTION: {question}
    
    CONTEXT:
    {context}
    '''.strip()
    
    context = ""

    for doc in result:
        context = context + f"section: {doc['section']}\nquestions: {doc['question']}\nanswer: {doc['text']}\n\n"

    prompt = prompt_temp.format(question = query, context = context).strip()

    return prompt

In [8]:
def llm(prompt):
    response = client.chat.completions.create(
        model='phi3',
        messages=[{"role": "user", "content": prompt}]
    )
    
    return response.choices[0].message.content

In [9]:
query = 'if the course has already started, can i enroll the course?'

In [10]:
from elasticsearch import Elasticsearch

In [11]:
from openai import OpenAI

In [12]:
client = OpenAI(
    base_url = 'http://localhost:11434/v1/',
    api_key = 'ollama',
)

In [13]:
def llm(prompt):
    response = client.chat.completions.create(
        model = 'phi3',
        messages= [
            {
                'role':'user',
                'content': prompt
            }
            
        ]
    )
    return response.choices[0].message.content

In [14]:
es_client = Elasticsearch('http://localhost:9200')

In [15]:
index_setting = {
    "settings": {
        "number_of_shards": 1,
        "number_of_replicas": 0
    },
    "mappings": {
        "properties": {
            "text": {"type": "text"},
            "section": {"type": "text"},
            "question": {"type": "text"},
            "course": {"type": "keyword"} 
        }
    }
}

In [16]:
index_name = "course-questions"

In [17]:
es_client.indices.create(index=index_name, body= index_setting)

ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'course-questions'})

In [18]:
for doc in documents:
    es_client.index(index=index_name, document=doc)

In [19]:
def elastics_search(query):

    search_query ={
        "size": 5,
        "query": {
            "bool": {
                "must": {
                    "multi_match": {
                        "query": query,
                        "fields": ["question^3", "text", "section"],
                        "type": "best_fields"
                    }
                },
                "filter": {
                    "term": {
                        "course": "data-engineering-zoomcamp"
                    }
                }
            }
        }
    }

    # search
    response = es_client.search(index= index_name, body= search_query)

    result_docs = []

    for hits in response['hits']['hits']:
        result_docs.append(hits['_source'])


    return result_docs

    
    
    

In [20]:
def rag_es(query, doc):

    search_result = elastics_search(query)
    prompt = build_prompt(query, search_result)
    answer = llm(prompt)

    return answer
    

In [21]:
rag_es('Can i enroll the course after it started', doc)

"Based on these options, if you want to join or enroll in a course that has already started but haven't completed yet, it seems feasible as per previous practices of this program with regards to keeping materials and allowing pace adjustment after courses finish (https://pastebin.com/Y6JQxG5D). Regarding deadlines for turning final projects just submit your project on time rather than waiting until the end if you've already started course, but be mindful that these late submissions may not receive full credit due to potential disruptions in instructor contact (found here: https://pastebin.com/GKHFm4k9). However, for specific information about a single bootcamp or the current enrollment policies and deadlines of this course run specifically by @ZoomCampQA on Twitter, you will have to ask its corresponding questions directly (https://pastebin.com/cRj7qKLB#1:~*<|im_sep|>== Question ==\nWhy should we not leave submissions for the final projects until the day they are due? Considering past 

In [22]:
pip install streamlit


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.1.2[0m[39;49m -> [0m[32;49m24.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython -m pip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [23]:
import streamlit as st

In [26]:
st.text_input?

[0;31mSignature:[0m
[0mst[0m[0;34m.[0m[0mtext_input[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0mlabel[0m[0;34m:[0m [0;34m'str'[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mvalue[0m[0;34m:[0m [0;34m'str | SupportsStr | None'[0m [0;34m=[0m [0;34m''[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mmax_chars[0m[0;34m:[0m [0;34m'int | None'[0m [0;34m=[0m [0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mkey[0m[0;34m:[0m [0;34m'Key | None'[0m [0;34m=[0m [0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mtype[0m[0;34m:[0m [0;34m"Literal['default', 'password']"[0m [0;34m=[0m [0;34m'default'[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mhelp[0m[0;34m:[0m [0;34m'str | None'[0m [0;34m=[0m [0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mautocomplete[0m[0;34m:[0m [0;34m'str | None'[0m [0;34m=[0m [0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mon_change[0m[0;34m:[0m [0;34m'WidgetCallback | None'[0m [0;34m