# Try with your Data

In [None]:
import os
import langchain
import textwrap
import warnings

In [None]:
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain
from langchain_community.llms import LlamaCpp
from langchain_core.callbacks import StreamingStdOutCallbackHandler
from langchain_core.documents import Document
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_qdrant import Qdrant
from langchain_huggingface import HuggingFaceEmbeddings

In [None]:
from llama_cpp import Llama
from scipy import spatial
from qdrant_client import QdrantClient

In [None]:
from ssec_tutorials import (
    OLMO_MODEL,
    QDRANT_PATH,
    QDRANT_COLLECTION_NAME,
    download_qdrant_data,
)

In [None]:
warnings.filterwarnings("ignore")

## Load OLMo

In [None]:
olmo = LlamaCpp(
    model_path=str(OLMO_MODEL),
    # TODO: What should be the other parameters here? Uncomment the next cell and run it.
)

In [None]:
# Uncomment this line to understand your available options.
# LlamaCpp?

## Data Setup

Use notebooks from the Appendix folder to set up your data pipeline. You can use csv, pdfs, etc. as your data source. 

Note: if you're running things on Codespace, [refer to this link](https://stackoverflow.com/questions/62284623/how-can-i-upload-a-file-to-a-github-codespaces-environment) and upload your data to `resources/` folder.  

In [None]:
# Add your code here.

In [None]:
# Ensure you can retrieve relevant context based on your prompt.
# context = retriever.invoke("What's my relevant question?"))
# or
# Load your data into the context variable
# context = <Add context from your data file>

## Prompt Engineering

In [None]:
# Create a prompt template using OLMo's tokenizer chat template we saw in module 1.
prompt_template = PromptTemplate.from_template(
    template=olmo.client.metadata["tokenizer.chat_template"],
    template_format="jinja2",
    partial_variables={"add_generation_prompt": True, "eos_token": "<|endoftext|>"},
)

In [None]:
# Test the prompt you want to send to OLMo.

question = "What's the question?"
context = "Use context from above"

# Add prompt instructions
prompt_template.format(
    messages=[
        {
            "role": "user",
            "content": f"""<Add your prompt instructions>:

            Context: {context}
            
            Question: {question}""",
        }
    ]
)

## RAG

In [None]:
# Chain the prompt template and olmo
llm_chain = prompt_template | olmo

In [None]:
question = "What's the question?"
context = "Use the context from above"

# Invoke the chain with a question and other parameters.
llm_chain.invoke(
    {
        "messages": [
            {
                "role": "user",
                "content": f"""<Add your prompt instructions>:
    
                Context: {context}
                
                Question: {question}""",
            }
        ],
    },
    config={"callbacks": [StreamingStdOutCallbackHandler()]},
)