# 后台文档管理

## AOS 管理

In [None]:
import io
import os
import boto3
import json
from opensearchpy import OpenSearch, RequestsHttpConnection
from sagemaker.huggingface.model import HuggingFacePredictor

from dotenv import load_dotenv
load_dotenv()

host = 'https://search-devax-search-bot-kcbw3pozuzdstr77ir44wzwmfy.us-east-1.es.amazonaws.com' 
port = 443
region = 'us-east-1' 

llm_endpoint_name = "chatglm2-lmi-model"
vetctor_endpoint_name='text2vector'
index_name = 'doc_embeddings'

auth = (os.environ.get("AOS_USER"), os.environ.get("AOS_PWD")) 

predictor_vector = HuggingFacePredictor(
  endpoint_name= vetctor_endpoint_name
)


client = OpenSearch(
    hosts = [f'{host}:{port}'],
    http_auth = auth,
    use_ssl = True,
    verify_certs = True,
    connection_class = RequestsHttpConnection
)

def update_data(id, data):
    response = client.index(
        index=index_name,
        body=data, 
        id=id
    )

    return response


## 创建索引

In [None]:
index_body = {
      "settings": {
        "index": {
          "knn": True
        }
      },
      "mappings": {
        "properties": {
          "content_vec": {
            "type": "knn_vector",
            "index": True, 
            "dimension": 768,
            "method": {
              "name": "hnsw",
              "space_type": "l2",
              "engine": "nmslib",
              "parameters": {
                "ef_construction": 128,
                "m": 24
              }
            }
          }
        }
      }
    }
client.indices.create(index_name, body=index_body)

In [6]:

# client.indices.delete(index=index_name)

{'acknowledged': True}

## 插入文档

In [None]:

sol_dir = './docs_ec/'

file_list = os.listdir(sol_dir)
for file_path in file_list:
  file_name = sol_dir + file_path
  sol_file = open(file_name, 'r')
  line = sol_file.read()
  line = line.strip()
  if line:
    print("Inserting: {}... {}".format(line[:20], file_name))
    update_data(None, {
      "text": line,
      "origion": file_name,
      "content_vec": predictor_vector.predict({"text": line}),
    })

print("Done")