In [16]:
import os
import sys
import logging
import ollama
import nest_asyncio
import numpy as np
from nano_graphrag import GraphRAG, QueryParam
from nano_graphrag.base import BaseKVStorage
from nano_graphrag._utils import compute_args_hash, wrap_embedding_func_with_attrs

# 设置日志级别
logging.basicConfig(level=logging.WARNING)
logging.getLogger("nano-graphrag").setLevel(logging.INFO)

# Assumed llm model settings
MODEL = "llama3.1:ctx32k"

# Assumed embedding model settings
EMBEDDING_MODEL = "nomic-embed-text"
EMBEDDING_MODEL_DIM = 768
EMBEDDING_MODEL_MAX_TOKENS = 8192

WORKING_DIR = "./nano_graphrag_cache_ollama_TEST"
nest_asyncio.apply()

主要用于初始化和导入

In [17]:
async def ollama_model_if_cache(
    prompt, system_prompt=None, history_messages=[], **kwargs
) -> str:
    kwargs.pop("max_tokens", None)
    kwargs.pop("response_format", None)

    ollama_client = ollama.AsyncClient()
    messages = []
    if system_prompt:
        messages.append({"role": "system", "content": system_prompt})

    hashing_kv: BaseKVStorage = kwargs.pop("hashing_kv", None)
    messages.extend(history_messages)
    messages.append({"role": "user", "content": prompt})
    if hashing_kv is not None:
        args_hash = compute_args_hash(MODEL, messages)
        if_cache_return = await hashing_kv.get_by_id(args_hash)
        if if_cache_return is not None:
            return if_cache_return["return"]

    response = await ollama_client.chat(model=MODEL, messages=messages, **kwargs)

    result = response["message"]["content"]

    if hashing_kv is not None:
        await hashing_kv.upsert({args_hash: {"return": result, "model": MODEL}})

    return result

@wrap_embedding_func_with_attrs(
    embedding_dim=EMBEDDING_MODEL_DIM,
    max_token_size=EMBEDDING_MODEL_MAX_TOKENS,
)
async def ollama_embedding(texts: list[str]) -> np.ndarray:
    embed_text = []
    for text in texts:
        data = ollama.embeddings(model=EMBEDDING_MODEL, prompt=text)
        embed_text.append(data["embedding"])

    return embed_text

主要定义了函数ollama_model_if_cache用来检测缓存中是不是有当前prompt对应的回答

In [18]:
def remove_if_exist(file):
    if os.path.exists(file):
        os.remove(file)



def insert():
    from time import time

    with open("../tests/mock_data.txt", encoding="utf-8-sig") as f:
        FAKE_TEXT = f.read()

    remove_if_exist(f"{WORKING_DIR}/vdb_entities.json")
    remove_if_exist(f"{WORKING_DIR}/kv_store_full_docs.json")
    remove_if_exist(f"{WORKING_DIR}/kv_store_text_chunks.json")
    remove_if_exist(f"{WORKING_DIR}/kv_store_community_reports.json")
    remove_if_exist(f"{WORKING_DIR}/graph_chunk_entity_relation.graphml")

    rag = GraphRAG(
        working_dir=WORKING_DIR,
        enable_llm_cache=True,
        best_model_func=ollama_model_if_cache,
        cheap_model_func=ollama_model_if_cache,
        embedding_func=ollama_embedding,
    )
    start = time()
    rag.insert(FAKE_TEXT)
    print("indexing time:", time() - start)
    # rag = GraphRAG(working_dir=WORKING_DIR, enable_llm_cache=True)
    # rag.insert(FAKE_TEXT[half_len:])



graphrag的insert函数

In [19]:
insert()

INFO:nano-graphrag:Load KV full_docs with 0 data
INFO:nano-graphrag:Load KV text_chunks with 0 data
INFO:nano-graphrag:Load KV llm_response_cache with 104 data
INFO:nano-graphrag:Load KV community_reports with 0 data
INFO:nano-vectordb:Init {'embedding_dim': 768, 'metric': 'cosine', 'storage_file': './nano_graphrag_cache_ollama_TEST/vdb_entities.json'} 0 data
INFO:nano-graphrag:[New Docs] inserting 1 docs
INFO:nano-graphrag:[New Chunks] inserting 42 chunks
INFO:nano-graphrag:[Entity Extraction]...


⠹ Processed 42(100%) chunks,  409 entities(duplicated), 165 relations(duplicated)


INFO:nano-graphrag:Inserting 285 vectors to entities
INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/embeddings "HTTP/1.1 200 OK"
I

⠙ Processed 1 communities

INFO:nano-graphrag:JSON data successfully extracted.


⠹ Processed 2 communities

INFO:nano-graphrag:JSON data successfully extracted.


⠸ Processed 3 communities

INFO:nano-graphrag:JSON data successfully extracted.


⠼ Processed 4 communities

INFO:nano-graphrag:JSON data successfully extracted.


⠴ Processed 5 communities

INFO:nano-graphrag:JSON data successfully extracted.


⠦ Processed 6 communities

INFO:nano-graphrag:JSON data successfully extracted.


⠧ Processed 7 communities

INFO:nano-graphrag:JSON data successfully extracted.


⠇ Processed 8 communities

INFO:nano-graphrag:JSON data successfully extracted.


⠏ Processed 9 communities

INFO:nano-graphrag:JSON data successfully extracted.


⠋ Processed 10 communities

INFO:nano-graphrag:JSON data successfully extracted.


⠙ Processed 11 communities

INFO:nano-graphrag:JSON data successfully extracted.


⠹ Processed 12 communities

INFO:nano-graphrag:JSON data successfully extracted.


⠸ Processed 13 communities

INFO:nano-graphrag:JSON data successfully extracted.


⠼ Processed 14 communities

INFO:nano-graphrag:JSON data successfully extracted.


⠴ Processed 15 communities

INFO:nano-graphrag:JSON data successfully extracted.


⠦ Processed 16 communities

INFO:nano-graphrag:JSON data successfully extracted.


⠧ Processed 17 communities

INFO:nano-graphrag:JSON data successfully extracted.


⠇ Processed 18 communities

INFO:nano-graphrag:JSON data successfully extracted.


⠏ Processed 19 communities


INFO:nano-graphrag:Writing graph with 295 nodes, 149 edges


indexing time: 35.29619359970093


In [26]:
def query(user_query):
    rag = GraphRAG(
        working_dir=WORKING_DIR,
        best_model_func=ollama_model_if_cache,
        cheap_model_func=ollama_model_if_cache,
        embedding_func=ollama_embedding,
    )
    print(
        rag.query(
            user_query, param=QueryParam(mode="global")
        )
    )


In [31]:
user_query = "what is the topic of the book?"
query(user_query)


INFO:nano-graphrag:Load KV full_docs with 1 data
INFO:nano-graphrag:Load KV text_chunks with 42 data
INFO:nano-graphrag:Load KV llm_response_cache with 106 data
INFO:nano-graphrag:Load KV community_reports with 19 data
INFO:nano-graphrag:Loaded graph from ./nano_graphrag_cache_ollama_TEST/graph_chunk_entity_relation.graphml with 295 nodes, 149 edges
INFO:nano-vectordb:Load (285, 768) data
INFO:nano-vectordb:Init {'embedding_dim': 768, 'metric': 'cosine', 'storage_file': './nano_graphrag_cache_ollama_TEST/vdb_entities.json'} 285 data
INFO:nano-graphrag:Revtrieved 19 communities
INFO:nano-graphrag:Grouping to 1 groups for global search
INFO:nano-graphrag:JSON data successfully extracted.


**Topic of the Book**

Based on the reports from multiple analysts, it appears that the book's primary focus revolves around two main themes.

### 1. **Ebenezer Scrooge's Transformation**

The most important aspect of the book, according to Analyst 0 (Importance Score: 90), is Ebenezer Scrooge's transformation and experiences during Christmas. The story explores themes such as kindness, generosity, and redemption, highlighting Scrooge's personal growth throughout the narrative.

### 2. **Character Relationships and Themes**

The second most important aspect, according to Analyst 0 (Importance Score: 85), is the exploration of relationships between characters like Jacob Marley, The Spirit of Christmas Past, and Tiny Tim. These interactions play a crucial role in shaping Scrooge's understanding and behavior, adding depth to the story.

**Key Takeaways**
----------------

* The book primarily focuses on Ebenezer Scrooge's transformation and experiences during Christmas.
* The story also d

-----------------------------------------------下面是细化的全局查询流程-------------------------------------------