In [22]:
import chromadb
from llama_index.vector_stores.chroma import ChromaVectorStore
from llama_index.core import DocumentSummaryIndex
from langchain_ollama import OllamaEmbeddings
from llama_index.core import VectorStoreIndex, SummaryIndex
qwen_emb = OllamaEmbeddings(
    model="rjmalagon/gte-qwen2-7b-instruct:f16",
)
# Create a Chroma client and collection
chroma_client = chromadb.HttpClient()
chroma_collection = chroma_client.get_or_create_collection("new_db_line")
chroma_collection
# Set up the ChromaVectorStore and StorageContext
vector_store = ChromaVectorStore(chroma_collection=chroma_collection, embeddings=qwen_emb)



In [23]:
from llama_index.llms.ollama import Ollama

qwen = Ollama(model="qwen2:7b-instruct")

In [24]:
from langchain_ollama import OllamaEmbeddings

qwen_emb = OllamaEmbeddings(
    model="rjmalagon/gte-qwen2-7b-instruct:f16",
)

In [130]:
e_doc = qwen_emb.embed_documents(["这是一个猫常用的玩具，充气之后玩"])
e_doc2 = qwen_emb.embed_documents(["好"])
e_q = qwen_emb.embed_query("有猫这个字")

In [131]:
import torch
e_doc =torch.tensor(e_doc)
e_doc2 = torch.tensor(e_doc2)
e_q = torch.tensor(e_q)

scores = (e_q @ e_doc.T) * 100
print(scores.tolist())
scores = (e_q @ e_doc2.T) * 100
print(scores.tolist())

[51.86387252807617]
[52.478660583496094]


In [20]:
from llama_index.core import Settings
from llama_index.llms.ollama import Ollama

Settings.llm = qwen
Settings.embed_model = qwen_emb

In [None]:
qwen_emb.embed_query("猫")

In [None]:
chroma_collection.query(query_embeddings=qwen_emb.embed_query("气球"), n_results=20)

In [7]:
nodes = vector_store.get_nodes([])
from llama_index.core import Document
docs = [Document(text=n.text) for n in nodes]

In [None]:
docs


In [13]:
from llama_index.core import StorageContext, SummaryIndex
storage_context = StorageContext.from_defaults(vector_store=vector_store)

index = VectorStoreIndex.from_vector_store(vector_store,
     embed_model=qwen_emb
)

s_index = SummaryIndex.from_documents(docs, storage_context=storage_context)


In [21]:
s_index.as_query_engine().query("气球")

Response(response='气球作为例子，在讨论中涉及到的是基于位置仿真的准确度提升及机器人的位置规划。这表明利用这样的仿真方法能显著提高抓取软性物体如气球的准确性。通过增加数据集量和优化操作流程，可以确保在实际应用中的表现更好。该技术不仅限于理论研究，也是解决通用抓取问题的关键所在，尤其是面对数据限制时。在实际案例中，已开发出仿真模型以估计物理参数，并构建了联合方程来辅助机器人操作。通过与真实环境的结合验证，展现了智能系统的重要性以及包括力觉、听觉、语言控制和视觉在内的多种感知能力的应用价值。尽管使用游戏引擎可提供快速响应，但需注意其在物理准确度上的局限性。因此，在面对未知物体抓取任务时，优化算法及充分的数据积累是提高机器人操作效率的关键。\n\n综上所述，气球案例展示了人工智能与机器人技术如何在复杂环境中进行协同工作，尤其是在解决非结构化物品的处理问题上，展现出通过仿真、数据驱动的方法来提升性能的技术路径。', source_nodes=[NodeWithScore(node=TextNode(id_='b49c627a-a4a1-484f-a408-37124fe206c6', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='88f8f9af-dc95-4a85-9c23-06cca8d154f8', node_type=<ObjectType.DOCUMENT: '4'>, metadata={}, hash='437b2386e1dd7c3d419403fae5e510f720dce3d5c2de92528337c6c12af104c2')}, text='我们测出来的这个误差是相对比较小的', mimetype='text/plain', start_char_idx=0, end_char_idx=17, text_template='{metadata_str}\n\n{content}', metadata_template='{key}: {value}', 

In [97]:
nodes = vector_store.get_nodes([])
from llama_index.core import Document
docs = [Document(text=n.text) for n in nodes]

In [None]:
nodes

In [83]:
from llama_index.core.node_parser import HTMLNodeParser, SentenceSplitter
splitter = SentenceSplitter(chunk_size=8,chunk_overlap=3)

In [None]:
from llama_index.core.ingestion import IngestionPipeline

# generate a pipeline for each parser
# keep embedding model fixed
pipeline_dict = {}

pipeline = IngestionPipeline(
    documents=docs,
    transformations=[
splitter
    ],
)
result = pipeline.run()

In [None]:
result

In [None]:
index = DocumentSummaryIndex.from_documents(docs, transformations=[splitter])

In [None]:
index.as_query_engine().query("猫")

In [None]:
index.as_query_engine().get_prompts()

In [None]:
index.as_retriever(response_mode="tree_summarize").retrieve("人工智能")

In [41]:
from llama_index.core.indices.document_summary import (
    DocumentSummaryIndexLLMRetriever,
)

In [42]:
retriever = DocumentSummaryIndexLLMRetriever(
    index=index,
    # choice_select_prompt=None,
    # choice_batch_size=10,
    # choice_top_k=1,
    # format_node_batch_fn=None,
    # parse_choice_select_answer_fn=None,
)

In [None]:
retriever.retrieve("cat")

In [46]:

from llama_index.core.indices.document_summary import (
    DocumentSummaryIndexEmbeddingRetriever,
)

In [47]:
retriever = DocumentSummaryIndexEmbeddingRetriever(
    index,
    # similarity_top_k=1,
)

In [52]:
retrieved_nodes = retriever.retrieve("猫")

In [None]:
print(retrieved_nodes[0].node.get_text())

In [54]:

from llama_index.core.response_synthesizers import TreeSummarize

In [None]:
TreeSummarize().get_prompts()

In [None]:

chroma_collection = db.get_or_create_collection("new_db_line")
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
storage_context = StorageContext.from_defaults(vector_store=vector_store)

index = VectorStoreIndex.from_documents(
    documents, storage_context=storage_context, embed_model=embed_model
)

# load from disk
db2 = chromadb.PersistentClient(path="./chroma_db")
chroma_collection = db2.get_or_create_collection("quickstart")
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
index = VectorStoreIndex.from_vector_store(
    vector_store,
    embed_model=embed_model,
)