In [1]:
from typing import List, Dict
from qdrant_client import QdrantClient
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
from clearml import Task

huggingface_token = "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"
model_name = "HuggingFaceTB/SmolLM2-135M"
tokenizer = AutoTokenizer.from_pretrained(model_name, token=huggingface_token)
model = AutoModelForCausalLM.from_pretrained(model_name, token=huggingface_token).half()
# Add padding token to avoid padding error
tokenizer.add_special_tokens({'pad_token': '[PAD]'})
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)

def retrieve_relevant_chunks(user_input: str, collection_name: str = "", top_k: int = 5) -> List[Dict]:
    """根据用户输入从 Qdrant 中检索相关的文本块"""
    task = Task.init(project_name="Retrive_Pipeline", task_name="User Input Retrieval")
    logger = task.get_logger()

    client = QdrantClient(host="localhost", port=6333)

    print(f"Using device: {device}")

    try:
        logger.report_text(f"Generating embedding for user input: {user_input}")
        inputs = tokenizer(user_input, return_tensors="pt", padding=True, truncation=True, max_length=512).to(device)
        print("Tokenized inputs:", inputs)

        with torch.no_grad():
            outputs = model(**inputs)
            user_embedding = outputs.logits.mean(dim=1).squeeze().tolist()
            print("Generated embedding size:", len(user_embedding))
            print("Generated embedding:", user_embedding[:3])  # 打印嵌入前5个值

        logger.report_text("User input embedding generated successfully.")
    except Exception as e:
        logger.report_text(f"Error generating embedding for user input: {e}")
        print(f"Error generating embedding: {e}")
        task.close()
        return []

    try:
        logger.report_text("Retrieving relevant chunks from Qdrant.")
        collections = client.get_collections()
        print("Available collections in Qdrant:", collections)

        search_result = client.search(
            collection_name=collection_name,
            query_vector=user_embedding,
            limit=top_k
        )
        print("Search results:", search_result)

        logger.report_text(f"Retrieved {len(search_result)} relevant chunks from Qdrant.")
    except Exception as e:
        logger.report_text(f"Error retrieving chunks from Qdrant: {e}")
        print(f"Error retrieving chunks: {e}")
        task.close()
        return []

    retrieved_chunks = []
    for point in search_result:
        if "text" in point.payload:
            retrieved_chunks.append({"text": point.payload["text"], "score": point.score})
        else:
            print("Warning: 'text' key not found in payload.")
            logger.report_text("Warning: 'text' key not found in payload.")

    task.close()
    return retrieved_chunks

def generate_prompt(user_input: str, retrieved_chunks: List[Dict]) -> str:
    """生成包含检索到的文档内容和用户输入的 Prompt"""
    prompt_template = "Here are some relevant documents to answer your query:\n"
    for chunk in retrieved_chunks[:3]:  # 限制为前3个片段
        prompt_template += f"- {chunk['text']}\n"

    prompt_template += f"\nUser question: {user_input}\nAnswer:"
    #print("Generated prompt:", prompt_template[:500])  # 打印前500字符的 prompt
    return prompt_template

def generate_answer(prompt: str) -> str:
    """使用 smollm 135 生成回答"""
    task = Task.init(project_name="Retrive_Pipeline", task_name="Response Generation")
    logger = task.get_logger()

    try:
        logger.report_text("Generating answer using smollm 135.")
        inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True, max_length=512).to(device)
        print("Prompt inputs:", inputs)

        with torch.no_grad():
            outputs = model.generate(
                inputs['input_ids'],
                attention_mask=inputs['attention_mask'],  # 确保 attention_mask 被传递
                max_new_tokens=150,  # 限制生成的 token 数量
                do_sample=True,  # 启用采样模式
                temperature=0.7  # 控制生成多样性
            )
            response_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
        #print("Generated response:", response_text)
        logger.report_text("Answer generated successfully.")
    except Exception as e:
        logger.report_text(f"Error generating answer: {e}")
        print(f"Error generating answer: {e}")
        response_text = "Sorry, I couldn't generate an answer at this time."

    task.close()
    return response_text


user_input = "Tell me how can I navigate to a specific pose - include replanning aspects in your answer."
user_input_2 = "Can you provide me with code for this task?"

retrieved_chunks = retrieve_relevant_chunks(user_input, "github_embedding", 5)
if not retrieved_chunks:
    print("No relevant documents found.")
else:
    prompt = generate_prompt(user_input, retrieved_chunks)
    answer = generate_answer(prompt)
    print("Generated Answer:\n", answer)


retrieved_chunks = retrieve_relevant_chunks(user_input_2, "github_embedding", 5)
if not retrieved_chunks:
    print("No relevant documents found.")
else:
    prompt = generate_prompt(user_input_2, retrieved_chunks)
    answer = generate_answer(prompt)
    print("Generated Answer:\n", answer)

ClearML Task: created new task id=fb29a9481bb04c468e5423669bb83df9
2024-12-08 23:08:42,018 - clearml.Task - INFO - Storing jupyter notebook directly as code
ClearML results page: https://app.clear.ml/projects/3f463b1b1b52479ea3acd087ccf688c9/experiments/fb29a9481bb04c468e5423669bb83df9/output/log
Using device: cuda
Generating embedding for user input: Tell me how can I navigate to a specific pose - include replanning aspects in your answer.
Tokenized inputs: {'input_ids': tensor([[31530,   549,   638,   416,   339,  6776,   288,   253,  1678,  9571,
           731,  1453,  3842, 18808,  3260,   281,   469,  2988,    30]],
       device='cuda:0'), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]],
       device='cuda:0')}


  attn_output = torch.nn.functional.scaled_dot_product_attention(


Generated embedding size: 49152
Generated embedding: [15.6015625, -0.63916015625, -0.58251953125]
User input embedding generated successfully.
Retrieving relevant chunks from Qdrant.
Available collections in Qdrant: collections=[CollectionDescription(name='github_embedding'), CollectionDescription(name='demonstrate_embedding'), CollectionDescription(name='document_embeddings'), CollectionDescription(name='youtube_embedding'), CollectionDescription(name='medium_embedding')]
Search results: [ScoredPoint(id='48487901-3f56-4485-811a-2f6d7baa4e10', version=1139, score=0.99143386, payload={'text': "be the one that minimizes the robot's velocity while maintaining its direction.", 'domain': 'github.com', 'path': '/moveit/moveit2', 'query': ''}, vector=None, shard_key=None, order_value=None), ScoredPoint(id='b3591af3-9e72-4ace-a5c9-1908848d8bb2', version=3128, score=0.9912358, payload={'text': 'robot should move to the desired position. All axis should start and stop at the same time. ---', 'do



ClearML Task: created new task id=9e2f40dba18448a19614752bb6003973
ClearML results page: https://app.clear.ml/projects/3f463b1b1b52479ea3acd087ccf688c9/experiments/9e2f40dba18448a19614752bb6003973/output/log
Generating answer using smollm 135.
Prompt inputs: {'input_ids': tensor([[ 4590,   359,   634,  4006,  5660,   288,  2988,   469,  8520,    42,
           198,    29,   325,   260,   582,   338, 28767,   260,  8085,   506,
         10874,   979,  4719,   624,  4376,    30,   198,    29,  8085,   868,
          1485,   288,   260,  6253,  2548,    30,  2018,  6867,   868,  1120,
           284,  2853,   418,   260,  1142,   655,    30, 21749,   198,    29,
          1679, 17920,    30,  4454,    28,   357,   314,  1636,   288,  5202,
         17990, 11160,    28,   527, 13144,   260,  8085,  2853,   281,  3433,
           282,   253, 17990,  1569,    30,   198,   198, 11126,  1962,    42,
         17269,   549,   638,   416,   339,  6776,   288,   253,  1678,  9571,
           731, 

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Answer generated successfully.




Generated Answer:
 Here are some relevant documents to answer your query:
- be the one that minimizes the robot's velocity while maintaining its direction.
- robot should move to the desired position. All axis should start and stop at the same time. ---
- local trajectory. Additionally, it is possible to enable collision checking, which lets the robot stop in front of a collision object.

User question: Tell me how can I navigate to a specific pose - include replanning aspects in your answer.
Answer: You can use the position and orientation of the robot to find the desired pose. You can also use a pose-based environment, which allows the robot to move around the environment.

How can I find the desired pose?
Answer: You can find the desired pose by using the robot position and orientation, but you should also consider the robot's speed. The more you decrease the speed, the more you can find the desired pose. The goal is to find the pose that minimizes the robot's velocity.

How can I f



ClearML Task: created new task id=ef307aaec7df4467a830351b9cfc1de3
ClearML results page: https://app.clear.ml/projects/3f463b1b1b52479ea3acd087ccf688c9/experiments/ef307aaec7df4467a830351b9cfc1de3/output/log


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Generating answer using smollm 135.
Prompt inputs: {'input_ids': tensor([[ 4590,   359,   634,  4006,  5660,   288,  2988,   469,  8520,    42,
           198,    29,   282,   260,  4234,   428, 10022,    74, 10190, 19544,
           288,   288, 14556,   260, 33062,  2050,    30,  1117,  7964,  4928,
           510,  1893,   260, 15896, 14069,  4060,    79,  3221,    42, 27767,
          4029,   198,    29,   803,  2909,   288,  1167, 10295,   282,  1066,
          1000,  1672,  2399,  1028,   288, 11102,   749,   105,  1672,  1379,
         12216,   327,   288,    29,  1425,    29, 23083,  7748,  1672,  4835,
          5895,    29,  5835,    43,   787,  1345,  6603,  1971,   550,   670,
           253,  8542,   327,  1215,  8369,   366,   267,  9573, 10646,   282,
           260,  1898,   365, 12609, 10479,   281,   253,  4405,   282, 10646,
           422,    28,  1593,   260, 10479,  2258,   260,  2376,   282,   260,
          4405,    25,  1672,  4835,  1456, 17937,  4107,   216,  



Answer generated successfully.




Generated Answer:
 Here are some relevant documents to answer your query:
- of the topic RVIZ subscribes to to visualize the EE path. An empty string disables the publisher.", default_value: "", }
- add code to allow execution of follow() * port test to groovy * placeholder for to-be-added algorithm * minor touch-ups; no real functional changes other than a bias for state samplers wrt dimension of the space (when sampling in a ball of dimension D, focus the sampling towards the surface of the ball) * minor & incomplete fix 0.2.5 (2012-11-26) ------------------ * update to new message API 0.2.4 (2012-11-23) ------------------ * improve error message * stricter error checking * update include path 0.2.3 (2012-11-21 22:47) ------------------------ * use generalized version of getMaximumExtent() 0.2.2 (2012-11-21 22:41) ------------------------ * more fixes to planners * removed bad include dir * fixed some plugin issues * fixed include dirs in ompl ros interface * added gitignore for ompl