In [1]:
import os
os.chdir('../')
import streamlit as st
from webui_pages.utils import *
from st_aggrid import AgGrid, JsCode
from st_aggrid.grid_options_builder import GridOptionsBuilder
import pandas as pd
from server.knowledge_base.utils import get_file_path, LOADER_DICT
from server.knowledge_base.kb_service.base import get_kb_details, get_kb_file_details
from typing import Literal, Dict, Tuple
from configs import (kbs_config,
                     EMBEDDING_MODEL, DEFAULT_VS_TYPE,
                     CHUNK_SIZE, OVERLAP_SIZE, ZH_TITLE_ENHANCE)
from server.utils import list_embed_models, list_online_embed_models
import os
import time

cell_renderer = JsCode("""function(params) {if(params.value==true){return '✓'}else{return '×'}}""")

def config_aggrid(
        df: pd.DataFrame,
        columns: Dict[Tuple[str, str], Dict] = {},
        selection_mode: Literal["single", "multiple", "disabled"] = "single",
        use_checkbox: bool = False,
) -> GridOptionsBuilder:
    gb = GridOptionsBuilder.from_dataframe(df)
    gb.configure_column("No", width=40)
    for (col, header), kw in columns.items():
        gb.configure_column(col, header, wrapHeaderText=True, **kw)
    gb.configure_selection(
        selection_mode=selection_mode,
        use_checkbox=use_checkbox,
        pre_selected_rows=st.session_state.get("selected_rows", [0]),
    )
    gb.configure_pagination(
        enabled=True,
        paginationAutoPageSize=False,
        paginationPageSize=10
    )
    return gb

In [None]:
kb = "samples"
doc_details = pd.DataFrame(get_kb_file_details(kb))
selected_rows = []

if not len(doc_details):
    st.info(f"知识库 `{kb}` 中暂无文件")
else:
    st.write(f"知识库 `{kb}` 中已有文件:")
    st.info("知识库中包含源文件与向量库，请从下表中选择文件后操作")
    doc_details.drop(columns=["kb_name"], inplace=True)
    doc_details = doc_details[[
        "No", "file_name", "document_loader", "text_splitter", "docs_count", "in_folder", "in_db",
    ]]
    doc_details["in_folder"] = doc_details["in_folder"].replace(True, "✓").replace(False, "×")
    doc_details["in_db"] = doc_details["in_db"].replace(True, "✓").replace(False, "×")
    
    # 配置网格以显示文档加载器和分词器信息
    gb = config_aggrid(
        doc_details,
        {
            ("No", "序号"): {},
            ("file_name", "文档名称"): {},
            ("document_loader", "文档加载器"): {},
            ("docs_count", "文档数量"): {},
            ("text_splitter", "分词器"): {},
            ("in_folder", "源文件"): {"cellRenderer": cell_renderer},
            ("in_db", "向量库"): {"cellRenderer": cell_renderer},
        },
        "multiple",
    )

    doc_grid = AgGrid(
        doc_details,
        gb.build(),
        columns_auto_size_mode="FIT_CONTENTS",
        theme="alpine",
        custom_css={
            "#gridToolBar": {"display": "none"},
        },
        allow_unsafe_jscode=True,
        enable_enterprise_modules=False
    )

    selected_rows = doc_grid.get("selected_rows", [])


In [None]:
print("#")

In [None]:
if selected_rows:
    print("#")
api = ApiRequest()
api.base_url

In [6]:
# if selected_rows:
#     file_name = selected_rows[0]["file_name"]
api = ApiRequest()
selected_kb = "cloudResources"
# file_name = "大模型语料数据/Pass/业务上云/宝兰德Web服务器软件3.1.0用户手册.pdf"
# file_name = "RUIJIE/20210910211119_RG-S6000E系列交换机RGOS 11.4(1)B12P32S1版本配置手册(V1.0).pdf"
file_name = "chapter_1_配置指南-以太网交换.md"

docs = api.search_kb_docs(knowledge_base_name=selected_kb,file_name=file_name)
data = [
    {"seq": i + 1, "id": x["id"], "page_content": x["page_content"], "source": x["metadata"].get("source"),
     "type": x["type"],
     "metadata": json.dumps(x["metadata"], ensure_ascii=False),
     "to_del": "",
     } for i, x in enumerate(docs)]
df = pd.DataFrame(data)

gb = GridOptionsBuilder.from_dataframe(df)
gb.configure_columns(["id", "source", "type", "metadata"], hide=True)
gb.configure_column("seq", "No.", width=100)
gb.configure_column("page_content", "内容", editable=True, autoHeight=True, wrapText=True, flex=1,
                    cellEditor="agLargeTextCellEditor", cellEditorPopup=True)
gb.configure_column("to_del", "删除", editable=True, width=50, wrapHeaderText=True,
                    cellEditor="agCheckboxCellEditor", cellRender="agCheckboxCellRenderer")
gb.configure_selection()
edit_docs = AgGrid(df, gb.build())

# if st.button("保存更改"):
#     origin_docs = {
#         x["id"]: {"page_content": x["page_content"], "type": x["type"], "metadata": x["metadata"]} for x in
#         docs}
#     changed_docs = []
#     for index, row in edit_docs.data.iterrows():
#         origin_doc = origin_docs[row["id"]]
#         if row["page_content"] != origin_doc["page_content"]:
#             if row["to_del"] not in ["Y", "y", 1]:
#                 changed_docs.append({
#                     "page_content": row["page_content"],
#                     "type": row["type"],
#                     "metadata": json.loads(row["metadata"]),
#                 })

#     if changed_docs:
#         if api.update_kb_docs(knowledge_base_name=selected_kb,
#                               file_names=[file_name],
#                               docs={file_name: changed_docs}):
#             st.toast("更新文档成功")
#         else:
#             st.toast("更新文档失败")

2024-07-12 16:02:22,498 - _client.py[line:1027] - INFO: HTTP Request: POST http://127.0.0.1:7861/knowledge_base/search_docs "HTTP/1.1 200 OK"


In [7]:
docs[22]

{'page_content': '{"head1": "配置指南-以太网交换", "head2": "1  接口", "head3": "1.3 功能详解", "head4": "1.3.2 接口的描述和管理状态", "head5": "工作原理", "head6": "接口的管理状态"}\n在某些情况下，用户可能需要禁用某个接口。用户可以通过设置接口的管理状态来直接关闭一个接口。如果关闭一个接口，则 这个接口上将不会接收和发送任何帧，这个接口将丧失这个接口对应的所有功能。用户也可以通过设置管理状态来重新打开一 个已经关闭的接口。接口的管理状态有两种：Up 和 Down，当端口被关闭时，端口的管理状态为 Down，否则为 Up。',
 'metadata': {'head1': '配置指南-以太网交换',
  'head2': '1  接口',
  'head3': '1.3 功能详解',
  'head4': '1.3.2 接口的描述和管理状态',
  'head5': '工作原理',
  'head6': '接口的管理状态'},
 'type': 'Document',
 'id': '387a19ed-27fa-4395-b5d1-9237be19544f',
 'score': 3.0}

In [None]:
selected_rows
doc_details["text_splitter"]

In [None]:
# doc_details
kb_list = {x["kb_name"]: x for x in get_kb_details()}
kb_list
doc_details

In [None]:
print("#")

In [9]:
str1 = '''==============================/chat/knowledge_base_chat  input {
    "query": "RUIJIERG-S6000E系列交换机,如何配置三层AP口",
    "knowledge_base_name": "cloudResources",
    "stream": true
}==============================
==============================/chat/knowledge_base_chat  output==============================
+ 可选配置。  
+ 可以在接口配置模式下，执行no switchport命令将一个二层AP口配置成三层AP口。使用switchport命令时，可以
将一个三层AP口配置成二层AP口。'''
print(str1)

    "query": "RUIJIERG-S6000E系列交换机,如何配置三层AP口",
    "knowledge_base_name": "cloudResources",
    "stream": true
+ 可选配置。  
+ 可以在接口配置模式下，执行no switchport命令将一个二层AP口配置成三层AP口。使用switchport命令时，可以
将一个三层AP口配置成二层AP口。
