In [2]:
import os
import dspy
import csv

In [15]:
import ujson

In [25]:
from sentence_transformers import SentenceTransformer

In [7]:
# #generate llm object
# local_config = {
#     "api_base": "http://localhost:11434/v1",  # 注意需加/v1路徑
#     "api_key": "NULL",  # 特殊標記用於跳過驗證
#     "model": "deepseek-r1:7b",
#     "custom_llm_provider":"deepseek"
# }

# dspy.configure(
#     lm=dspy.LM(
#         **local_config
#     )
# )
# # 測試問答
# qa = dspy.Predict('question -> answer')
# response = qa(question="中國唐朝有幾任皇帝?")
# print(f"模型回答：{response.answer}")


### ref: https://dspy.ai/tutorials/rag/

In [11]:
def InitializeLLM():
    local_config = {
        "api_base": "http://localhost:11434/v1",  # 注意需加/v1路徑
        "api_key": "NULL",  # 特殊標記用於跳過驗證
        "model": "deepseek-r1:7b",
        "custom_llm_provider":"deepseek"
    }
    dspy.configure(
        lm=dspy.LM(
            **local_config
        )
    )

In [None]:
# InitializeLLM();

In [9]:
def llm_predict(queryStr:str=None):
    if queryStr == None:
        raise ValueError("query string is none, please input query string.")
    promptPatternStr = "question -> answer"
    qa = dspy.Predict(promptPatternStr);
    response = qa(question=queryStr);
    print(f"llm:{response.answer}");

In [10]:
llm_predict("what day is today")

llm:October 26th


In [12]:
dspy.inspect_history(n=1)





[34m[2025-02-21T16:17:14.651093][0m

[31mSystem message:[0m

Your input fields are:
1. `question` (str)

Your output fields are:
1. `answer` (str)

All interactions will be structured in the following way, with the appropriate values filled in.

[[ ## question ## ]]
{question}

[[ ## answer ## ]]
{answer}

[[ ## completed ## ]]

In adhering to this structure, your objective is: 
        Given the fields `question`, produce the fields `answer`.


[31mUser message:[0m

[[ ## question ## ]]
what day is today

Respond with the corresponding output fields, starting with the field `[[ ## answer ## ]]`, and then ending with the marker for `[[ ## completed ## ]]`.


[31mResponse:[0m

[32m[[ ## answer ## ]]
October 26th

[[ ## completed ## ]][0m







In [13]:
cot = dspy.ChainOfThought('question -> response')
cot(question="should curly braces appear on their own line?")

Prediction(
    reasoning='{The reasoning field is where you would provide a detailed thought process or explanation based on the input question. It should be structured in a way that clearly shows how you arrived at your answer.}',
    response='{The response field is where you provide a concise and direct answer to the question, typically one or two sentences long. It should be clear and to the point, summarizing the key points from your reasoning.}'
)

In [20]:
# with open("./data/ragqa_arena_tech_examples.jsonl") as f:
#     data = [ujson.loads(line) for line in f]

### Basic RAG
- dspy.Embedder: https://dspy.ai/api/models/Embedder/
- multihop Search: https://dspy.ai/tutorials/multihop_search/?h=search

In [26]:

max_characters = 6000  # for truncating >99th percentile of documents
topk_docs_to_retrieve = 5  # number of documents to retrieve per search query

with open("../data/ragqa_arena_tech_corpus.jsonl") as f:
    corpus = [ujson.loads(line)['text'][:max_characters] for line in f]
    print(f"Loaded {len(corpus)} documents. Will encode them below.")
model = SentenceTransformer("sentence-transformers/static-retrieval-mrl-en-v1", device="cpu")
embedder = dspy.Embedder(model=model.encode, dimensions=512)
search = dspy.retrievers.Embeddings(embedder=embedder, corpus=corpus, k=topk_docs_to_retrieve)

Loaded 28436 documents. Will encode them below.


modules.json:   0%|          | 0.00/141 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/226 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/670k [00:00<?, ?B/s]

0_StaticEmbedding%2Ftokenizer.json:   0%|          | 0.00/711k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/125M [00:00<?, ?B/s]

Training a 32-byte FAISS index with 337 partitions, based on 28436 x 1024-dim embeddings


In [34]:
corpus[1]

'You can use StartSound.PrefPane which basically just sets the volume to 0 when you shutdown and then turns it back up after login.'

In [31]:
class RAG(dspy.Module):
    def __init__(self):
        self.respond=dspy.ChainOfThought('context, question -> answer');

    def forward(self,question):
        context = search(question).passages
        return self.respond(context=context, question=question)

In [33]:
rag = RAG();
print(rag(question="what are high memory and low memory on linux?"));

Prediction(
    reasoning='High Memory on Linux is typically used by user-space applications and is separate from Low Memory, which is reserved for the kernel. This separation prevents application interference and ensures efficient resource management.',
    answer='High Memory refers to the physical memory allocated for user-space applications, while Low Memory is reserved for the kernel. This distinction helps prevent application-kernel interference and optimizes performance by keeping hardware resources accessible only to the kernel when needed.'
)


In [35]:
dspy.inspect_history()





[34m[2025-02-21T17:15:25.940977][0m

[31mSystem message:[0m

Your input fields are:
1. `context` (str)
2. `question` (str)

Your output fields are:
1. `reasoning` (str)
2. `answer` (str)

All interactions will be structured in the following way, with the appropriate values filled in.

[[ ## context ## ]]
{context}

[[ ## question ## ]]
{question}

[[ ## reasoning ## ]]
{reasoning}

[[ ## answer ## ]]
{answer}

[[ ## completed ## ]]

In adhering to this structure, your objective is: 
        Given the fields `context`, `question`, produce the fields `answer`.


[31mUser message:[0m

[[ ## context ## ]]
[1] «Reading system memory usage in Activity Monitor from support.apple.com gives a detailed explanation about the different types of RAM. Free memory: This is RAM thats not being used. Wired memory: Information in this memory cant be moved to the hard disk, so it must stay in RAM. The amount of Wired memory depends on the applications you are using. Active memory: This informat