<a href="https://colab.research.google.com/github/xuxinyue18-dot/deepseekAPI/blob/main/deepseek.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## 使用说明
1. 首次运行前，请确保已经安装 `openai`、`requests`、`beautifulsoup4` 等依赖。
2. 请在运行前将 `DEEPSEEK_API_KEY` 设置为有效的 DeepSeek API 密钥。若需启用联网搜索功能，请额外配置 `SERPER_API_KEY`。
3. 可以通过环境变量或其他安全方式提供密钥，避免将密钥写入代码。
4. 运行最后一个单元格后，即可通过命令行与 DeepSeek 进行对话。输入 `退出` 结束对话，输入 `清除` 清除上下文。


In [None]:
# 如果在全新环境中运行，请先安装所需依赖。
# 在 Jupyter/Colab 中可以取消注释下一行命令：
# %pip install openai requests beautifulsoup4


In [None]:
import os
import sys
import time
import ipaddress
from typing import Any, Dict, List, Optional, Tuple, Union
from urllib.parse import urlparse

import requests
from bs4 import BeautifulSoup
from openai import OpenAI

BASE_URL = "https://api.deepseek.com"
DEEPSEEK_API_KEY = os.getenv("DEEPSEEK_API_KEY", "YOUR_DEEPSEEK_API_KEY")
SERPER_API_KEY = os.getenv("SERPER_API_KEY")


def _require_api_key(value: str, env_name: str) -> str:
    """确保已经提供了对应的 API Key。"""
    if not value or value.startswith("YOUR_"):
        raise ValueError(
            f"请先设置 {env_name} 环境变量，或在代码中替换占位符为真实密钥。"
        )
    return value


client = OpenAI(
    api_key=_require_api_key(DEEPSEEK_API_KEY, "DEEPSEEK_API_KEY"),
    base_url=BASE_URL,
)



In [None]:
from dataclasses import dataclass
from typing import Dict, List, Optional, Tuple
import re
import textwrap


def print_streaming(text: str, delay: float = 0.02) -> None:
    """以打字机效果打印文本。"""
    for char in text:
        sys.stdout.write(char)
        sys.stdout.flush()
        time.sleep(delay)
    sys.stdout.write("\n")
    sys.stdout.flush()


@dataclass
class OptimizedPrompt:
    """存储优化后的提示词及其评估信息。"""

    original: str
    optimized: str
    structured: str
    score: int
    feedback: List[str]


OPTIMIZER_SYSTEM_PROMPT = (
    "你是一名提示词工程专家。请在确保语义不变的前提下，"
    "将输入润色为结构化、包含目标与约束的高质量提示词。"
)

STRUCTURED_PROMPT_TEMPLATE = textwrap.dedent(
    """\
    ### 角色
    你是一名专业的智能助手，擅长拆解复杂问题并提供循序渐进的解决方案。

    ### 背景
    {background}

    ### 用户请求
    {optimized_prompt}

    ### 输出要求
    {output_requirements}
    """
)


UNSAFE_URL_MESSAGE = "检测到不受信任的链接，已跳过内容抓取。"

class PromptOptimizer:
    """负责提示词润色、结构化包装以及质量评估。"""

    def __init__(self) -> None:
        self._cache: Dict[str, OptimizedPrompt] = {}
        self._last_used: Optional[OptimizedPrompt] = None

    def optimize(self, prompt: str) -> OptimizedPrompt:
        normalized = prompt.strip()
        if not normalized:
            raise ValueError("提示词不能为空。")

        cached = self._cache.get(normalized)
        if cached:
            return self._clone_payload(cached)

        optimized = self._call_model(normalized)
        structured = build_structured_prompt(optimized)
        score, feedback = review_prompt(structured)

        payload = OptimizedPrompt(
            original=normalized,
            optimized=optimized,
            structured=structured,
            score=score,
            feedback=feedback,
        )
        snapshot = self._clone_payload(payload)
        self._cache[normalized] = snapshot
        return self._clone_payload(snapshot)

    def reuse_last(self) -> Optional[OptimizedPrompt]:
        return self._last_used

    def _clone_payload(self, payload: OptimizedPrompt) -> OptimizedPrompt:
        return OptimizedPrompt(
            original=payload.original,
            optimized=payload.optimized,
            structured=payload.structured,
            score=payload.score,
            feedback=list(payload.feedback),
        )

    def mark_used(self, payload: OptimizedPrompt) -> None:
        """记录最近一次真正使用的提示词。"""
        self._last_used = payload

    def _call_model(self, prompt: str) -> str:
        try:
            response = client.chat.completions.create(
                model="deepseek-chat",
                messages=[
                    {"role": "system", "content": OPTIMIZER_SYSTEM_PROMPT},
                    {"role": "user", "content": prompt},
                ],
                temperature=0,
                top_p=1,
                stream=False,
            )
        except Exception as exc:
            print_streaming(f"提示词优化失败，将使用原始问题：{exc}")
            return prompt

        try:
            content = response.choices[0].message.content.strip()
        except (AttributeError, IndexError, KeyError):
            print_streaming("提示词优化失败，将使用原始问题：响应格式不符合预期。")
            return prompt
        return content or prompt



def _escape_braces(value: str) -> str:
    """Escapes curly braces to keep ``str.format`` safe."""
    return value.replace("{", "{{").replace("}", "}}")


def _contains_any(text: str, keywords: Tuple[str, ...]) -> bool:
    """Check whether any keyword appears in the text (case-insensitive for ASCII)."""
    lowered = text.lower()
    return any(keyword in text or keyword.lower() in lowered for keyword in keywords)


def _infer_background(optimized_prompt: str) -> str:
    """Infer background instructions based on the optimized prompt content."""
    if _contains_any(optimized_prompt, ("搜索结果", "标题:", "链接:", "内容:")):
        return "已提供来自网络搜索的资料，请先概括资料并结合自身知识回答。"

    background_rules: Tuple[Tuple[Tuple[str, ...], str], ...] = (
        (
            ("代码", "编程", "开发", "algorithm", "函数", "script"),
            "用户正在寻求技术方案或代码示例，请结合最佳实践给出答案。",
        ),
        (
            ("策略", "规划", "计划", "roadmap", "方案", "milestone"),
            "用户期望获得系统化的策略或执行计划，请提供分步骤指导。",
        ),
        (
            ("写作", "撰写", "文案", "文章", "报告", "总结"),
            "用户需要高质量的内容创作，请结合受众和目标组织结构与语气。",
        ),
    )

    for keywords, background in background_rules:
        if _contains_any(optimized_prompt, keywords):
            return background

    return "暂无额外背景，请根据既有知识准确回答。"


def _infer_output_requirements(optimized_prompt: str) -> List[str]:
    """Generate context-aware output requirements for the structured prompt."""
    base_requirements: List[str] = [
        "使用中文回答。",
        "先给出结论，再展示关键推理步骤。",
        "如引用外部资料，请标注其作用或来源。",
        "语言保持专业、客观、简明。",
    ]

    additional_rules: Tuple[Tuple[Tuple[str, ...], str], ...] = (
        (
            ("搜索结果", "标题:", "链接:", "内容:"),
            "优先引用搜索结果中的关键信息，并标注出处。",
        ),
        (
            ("代码", "编程", "algorithm", "函数", "script", "sql"),
            "提供可运行的代码或伪代码示例，并解释关键实现细节。",
        ),
        (
            ("步骤", "流程", "过程", "指南", "instruction"),
            "按照编号列出执行步骤，确保可操作性。",
        ),
        (
            ("比较", "对比", "优缺点", "差异", "pros", "cons"),
            "以表格或分点形式比较不同选项的优缺点。",
        ),
        (
            ("计划", "规划", "roadmap", "里程碑", "milestone", "时间表"),
            "给出分阶段的计划，并说明每个阶段的目标与衡量标准。",
        ),
        (
            ("总结", "概括", "综述", "复盘"),
            "附上简明摘要，突出核心结论与后续行动建议。",
        ),
        (
            ("数据", "指标", "统计", "数字", "data"),
            "列出关键数据或指标，并解释它们的意义或来源。",
        ),
    )

    for keywords, requirement in additional_rules:
        if _contains_any(optimized_prompt, keywords):
            base_requirements.append(requirement)

    deduped: List[str] = list(dict.fromkeys(base_requirements))
    return deduped


def _format_bullet_list(items: List[str]) -> str:
    """Format requirements as a bullet list with escaped braces."""
    sanitized = [_escape_braces(item) for item in items if item]
    return "\n".join(f"- {entry}" for entry in sanitized)


def build_structured_prompt(optimized_prompt: str) -> str:
    """将优化后的提示词包装为结构化模板。"""
    cleaned_prompt = optimized_prompt.strip()
    background = _infer_background(cleaned_prompt)
    requirements = _infer_output_requirements(cleaned_prompt)

    return STRUCTURED_PROMPT_TEMPLATE.format(
        background=_escape_braces(background),
        optimized_prompt=_escape_braces(cleaned_prompt),
        output_requirements=_format_bullet_list(requirements),
    ).strip()


def review_prompt(prompt: str) -> Tuple[int, List[str]]:
    """依据简单规则对提示词进行打分并给出改进建议。"""
    score = 100
    feedback: List[str] = []

    length = len(prompt)
    if length < 120:
        score -= 15
        feedback.append("提示词较短，可补充更多上下文或期望输出。")
    if length > 1800:
        score -= 10
        feedback.append("提示词过长，建议删减无关描述以聚焦重点。")

    if "### 输出要求" not in prompt:
        score -= 10
        feedback.append("缺少输出要求，可明确格式、语言或内容限制。")
    if "### 用户请求" not in prompt:
        score -= 10
        feedback.append("建议显式指出用户的核心需求，提升清晰度。")
    bullet_count = prompt.count("\n- ")
    if bullet_count < 3:
        score -= 5
        feedback.append("输出要求建议列出多个可执行要点。")
    if not re.search(r"[。．.!?]", prompt):
        score -= 5
        feedback.append("提示词缺少完整句子，建议补充清晰的描述。")

    return max(0, min(100, score)), feedback


prompt_optimizer = PromptOptimizer()


def search_web(query: str, num_results: int = 3) -> Union[Dict[str, Any], str]:
    """使用 Serper API 进行搜索。"""
    query = query.strip()
    if not query:
        return "搜索关键词不能为空。"
    if not SERPER_API_KEY:
        return "搜索功能未启用：请先配置 SERPER_API_KEY。"

    payload = {"q": query, "num": num_results}
    headers = {
        "X-API-KEY": SERPER_API_KEY,
        "Content-Type": "application/json",
    }

    try:
        response = requests.post(
            "https://google.serper.dev/search",
            json=payload,
            headers=headers,
            timeout=15,
        )
        response.raise_for_status()
        data = response.json()
        if not data.get("organic"):
            return "搜索未返回有效结果。"
        return data
    except requests.RequestException as exc:
        return f"搜索请求失败：{exc}"
    except ValueError:
        return "搜索响应解析失败。"


def _is_safe_url(url: str) -> bool:
    """Validate that the URL uses a public HTTP(S) endpoint."""
    parsed = urlparse(url.strip())
    if parsed.scheme not in {"http", "https"}:
        return False
    hostname = parsed.hostname
    if not hostname:
        return False
    lowered = hostname.lower()
    if lowered in {"localhost"}:
        return False
    try:
        normalized_host = hostname.split("%", 1)[0]
        ip = ipaddress.ip_address(normalized_host)
    except ValueError:
        return True
    return not (
        ip.is_private
        or ip.is_loopback
        or ip.is_multicast
        or ip.is_reserved
        or ip.is_link_local
    )

def get_webpage_content(url: str) -> str:
    """抓取网页并提取纯文本内容。"""
    normalized_url = url.strip()
    if not _is_safe_url(normalized_url):
        return UNSAFE_URL_MESSAGE
    try:
        headers = {
            "User-Agent": (
                "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
                "(KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36"
            )
        }
        response = requests.get(normalized_url, headers=headers, timeout=15)
        response.raise_for_status()
        soup = BeautifulSoup(response.text, "html.parser")
        for script in soup(["script", "style"]):
            script.decompose()
        text = soup.get_text(separator="\n", strip=True)
        lines = [line for line in (segment.strip() for segment in text.split("\n")) if line]
        return "\n".join(lines)[:1000]
    except requests.RequestException as exc:
        return f"获取网页内容出错：{exc}"


def get_ai_streaming_response(messages: List[Dict[str, str]], temperature: float = 0.0) -> str:
    """以流式方式获取 DeepSeek 回复。"""
    try:
        response = client.chat.completions.create(
            model="deepseek-chat",
            messages=messages,
            temperature=temperature,
            top_p=1,
            stream=True,
        )
    except Exception as exc:
        error_message = f"API 调用失败：{exc}"
        print_streaming(error_message)
        return error_message

    full_reply = []
    sys.stdout.write("助手: ")
    sys.stdout.flush()

    for chunk in response:
        delta = chunk.choices[0].delta.content
        if delta:
            full_reply.append(delta)
            sys.stdout.write(delta)
            sys.stdout.flush()
            time.sleep(0.01)

    sys.stdout.write("\n")
    sys.stdout.flush()
    return "".join(full_reply).strip()


SYSTEM_PROMPT = {
    "role": "system",
    "content": textwrap.dedent(
        """\
        你是 DeepSeek 智能助手，需严格遵守以下要求：
        - 当用户请求“搜索”或提供搜索资料时，先总结资料再回答。
        - 若无外部信息，则依靠已有知识提供结构化、可执行的建议。
        - 回答需使用中文，确保条理清晰、步骤明确。
        """
    ).strip(),
}


def run_cli_assistant() -> None:
    """通过命令行与 DeepSeek 对话。"""
    print_streaming("欢迎使用 DeepSeek 智能助手！")
    print("功能说明：")
    print("1. 直接输入问题进行对话")
    print("2. 输入'搜索：关键词'可触发联网搜索")
    print("3. 输入'清除'清除对话历史")
    print("4. 输入'继续'复用上一次优化后的提示词")
    print("5. 输入'退出'结束对话\n")

    messages: List[Dict[str, str]] = [SYSTEM_PROMPT.copy()]

    while True:
        user_input = input("用户: ").strip()

        if user_input.lower() in {"退出", "exit"}:
            print_streaming("对话已结束，再见！")
            break

        if user_input.lower() in {"清除", "clear"}:
            messages = [SYSTEM_PROMPT.copy()]
            print_streaming("历史记录已清除！")
            continue

        if user_input.lower() in {"继续", "repeat"}:
            reused = prompt_optimizer.reuse_last()
            if not reused:
                print_streaming("暂无可复用的提示词，请先输入新问题。")
                continue
            print_streaming("复用上一次的优化提示词，直接进入回答阶段。")
            messages.append({"role": "user", "content": reused.structured})
            assistant_reply = get_ai_streaming_response(messages)
            messages.append({"role": "assistant", "content": assistant_reply})
            print()
            continue

        if user_input.startswith(("搜索:", "搜索：")):
            normalized = user_input.replace("搜索：", "搜索:", 1)
            if ":" in normalized:
                search_query = normalized.split(":", 1)[1].strip()
            else:
                search_query = user_input[len("搜索"):].strip(" ：:")
            if not search_query:
                print_streaming("请输入搜索关键词！")
                continue
            print_streaming(f"正在搜索：{search_query}")
            search_results = search_web(search_query)

            if isinstance(search_results, str):
                print_streaming(f"搜索失败，将直接使用 AI 回答。（{search_results}）")
                user_input = search_query
            else:
                combined_info: List[str] = []
                for result in search_results.get("organic", [])[:2]:
                    title = result.get("title", "")
                    link = result.get("link", "")
                    snippet = result.get("snippet", "")
                    content = get_webpage_content(link)
                    combined_info.append(
                        f"标题: {title}\n链接: {link}\n摘要: {snippet}\n内容: {content}\n"
                    )

                if combined_info:
                    user_input = (
                        f"请基于以下搜索结果回答关于 '{search_query}' 的问题：\n\n"
                        + "\n---\n".join(combined_info)
                    )
                else:
                    user_input = f"搜索 '{search_query}' 没有找到足够的信息，请结合已有知识回答。"

        print_streaming("正在优化提示词...")
        try:
            optimized_payload = prompt_optimizer.optimize(user_input)
        except ValueError as exc:
            print_streaming(str(exc))
            continue

        print_streaming(f"优化后的提示词：{optimized_payload.optimized}")
        print_streaming("结构化提示词如下，可二次确认或复用：")
        print_streaming(optimized_payload.structured)
        print_streaming(f"提示词质量评分：{optimized_payload.score}/100")
        if optimized_payload.feedback:
            print_streaming("改进建议：")
            for item in optimized_payload.feedback:
                print_streaming(f"- {item}")

        print_streaming("是否使用该结构化提示词继续对话？输入 y 确认，n 放弃，e 手动编辑。")
        selected_structured: Optional[str] = None
        while True:
            choice = input("确认 (y/n/e): ").strip().lower()
            if choice in {"", "y", "yes"}:
                selected_structured = optimized_payload.structured
                break
            if choice in {"n", "no"}:
                print_streaming("已取消本次优化，请重新输入问题或调整后再试。")
                break
            if choice in {"e", "edit"}:
                print_streaming("请输入修改后的结构化提示词，结束请输入单独一行 END：")
                custom_lines: List[str] = []
                while True:
                    line = input()
                    if line.strip().upper() == "END":
                        break
                    custom_lines.append(line)
                custom_prompt = "\n".join(custom_lines).strip()
                if not custom_prompt:
                    print_streaming("未检测到有效文本，将继续使用自动生成的结构化提示词。")
                    selected_structured = optimized_payload.structured
                else:
                    optimized_payload.structured = custom_prompt
                    selected_structured = custom_prompt
                break
            else:
                print_streaming("请输入 y 使用，n 放弃，或 e 编辑。")
        if not selected_structured:
            continue
        prompt_optimizer.mark_used(optimized_payload)
        messages.append({"role": "user", "content": selected_structured})
        assistant_reply = get_ai_streaming_response(messages)
        messages.append({"role": "assistant", "content": assistant_reply})
        print()


if __name__ == "__main__":
    run_cli_assistant()
