In [18]:
from elasticsearch_dsl import connections, Index
from elasticsearch import Elasticsearch
from elasticsearch_dsl import Document, Text, Date, Integer, Index
# 创建连接并指定别名
connections.create_connection(
    alias='my_connection', hosts=['http://localhost:9200'], timeout=20)



# 使用指定的别名创建索引对象
index = Index('papers', using='my_connection')
index.create()
# 获取索引映射
mapping = index.get_mapping()
print(mapping)

{'papers': {'mappings': {}}}


In [10]:
from elasticsearch import Elasticsearch
from elasticsearch.exceptions import NotFoundError, UnsupportedProductError

# 创建Elasticsearch客户端
es = Elasticsearch(hosts=['http://localhost:9200'])

# 获取索引的详细信息
index_name = 'papers'
try:
    index_info = es.indices.get(index=index_name)
    print(index_info)
except NotFoundError:
    print(f"Index '{index_name}' not found.")
except UnsupportedProductError:
    print("The client noticed that the server is not Elasticsearch and we do not support this unknown product.")
except Exception as e:
    print(f"An error occurred: {e}")

The client noticed that the server is not Elasticsearch and we do not support this unknown product.


In [19]:
import requests

# 发送HTTP请求获取索引信息
url = 'http://localhost:9200/papers'
response = requests.get(url)

if response.status_code == 200:
    print("Index information:")
    print(response.json())
else:
    print(f"Failed to get index information. Status code: {response.status_code}")
    print(response.text)

Index information:
{'papers': {'aliases': {}, 'mappings': {}, 'settings': {'index': {'routing': {'allocation': {'include': {'_tier_preference': 'data_content'}}}, 'number_of_shards': '1', 'provided_name': 'papers', 'creation_date': '1722895931764', 'number_of_replicas': '1', 'uuid': 'T_rXO719RuKcjOeOSSzN0A', 'version': {'created': '8070099'}}}}}


In [21]:
from elasticsearch_dsl import connections, Index, Document, Text, Integer

# 创建连接并指定别名
connections.create_connection(alias='my_connection', hosts=['http://localhost:9200'], timeout=20)

# 定义 Paper 文档模型
class Paper(Document):
    paper_id = Text()
    ISSN = Text()
    MEDIA = Text()
    TITLE = Text()
    PRINTDATE = Text()
    PAPER_CLASS = Text()
    ESI = Text()
    LANGUAGE = Text()
    BJSQK = Text()
    DISCIPLINE = Text()

    class Index:
        name = 'papers'
        using = 'my_connection'

# 定义 Teacher 文档模型
class Teacher(Document):
    人员类别 = Text()
    JZGBH = Integer()
    NAME = Text()
    当前状态 = Text()
    SEX = Text()
    所在单位 = Text()
    work_unit = Text()
    teachership = Text()
    聘任专业技术职务级别 = Text()
    岗位类别 = Text()
    岗位等级 = Text()
    最高学历 = Text()
    degree = Text()
    graduate_institution = Text()
    毕业年月 = Text()
    所学专业 = Text()
    授学位年月 = Text()
    研究方向 = Text()
    学科类别 = Text()
    XKLBDM_DISPLAY = Text()
    一级学科 = Text()
    YJXKDM_DISPLAY = Text()
    二级学科 = Text()
    EJXKDM_DISPLAY = Text()

    class Index:
        name = 'teachers'
        using = 'my_connection'

# 创建索引并映射文档模型
papers_index = Index('papers', using='my_connection')
teachers_index = Index('teachers', using='my_connection')

# 删除索引（如果已存在）
if papers_index.exists():
    papers_index.delete()
if teachers_index.exists():
    teachers_index.delete()

# 创建索引
papers_index.create()
teachers_index.create()

# 将文档模型映射到索引中
Paper.init()
Teacher.init()

In [28]:
import mysql.connector
from elasticsearch.helpers import bulk
from elasticsearch_dsl import connections, Index, Document, Text, Integer

# 创建连接并指定别名
connections.create_connection(alias='my_connection', hosts=['http://localhost:9200'], timeout=20)

# 定义 Teacher 文档模型
class Teacher(Document):
    人员类别 = Text()
    JZGBH = Integer()
    NAME = Text()
    当前状态 = Text()
    SEX = Text()
    所在单位 = Text()
    work_unit = Text()
    teachership = Text()
    聘任专业技术职务级别 = Text()
    岗位类别 = Text()
    岗位等级 = Text()
    最高学历 = Text()
    degree = Text()
    graduate_institution = Text()
    毕业年月 = Text()
    所学专业 = Text()
    授学位年月 = Text()
    研究方向 = Text()
    学科类别 = Text()
    XKLBDM_DISPLAY = Text()
    一级学科 = Text()
    YJXKDM_DISPLAY = Text()
    二级学科 = Text()
    EJXKDM_DISPLAY = Text()

    class Index:
        name = 'teachers'
        using = 'my_connection'

# 创建索引并映射文档模型
teachers_index = Index('teachers', using='my_connection')

# 删除索引（如果已存在）
if teachers_index.exists():
    teachers_index.delete()

# 创建索引
teachers_index.create()

# 将文档模型映射到索引中
Teacher.init()

# 连接到 MySQL 数据库
cnx = mysql.connector.connect(
    user='root',
    password='root',
    host='127.0.0.1',
    database='summer_project'
)
cursor = cnx.cursor(dictionary=True)

# 查询 teachers 表数据
cursor.execute("SELECT * FROM teachers")
teachers_data = cursor.fetchall()

# 将数据转换为 Elasticsearch 文档格式
actions = [
    {
        "_index": "teachers",
        "_id": teacher["JZGBH"],  # 使用 JZGBH 作为文档的 _id
        "_source": {
            "人员类别": teacher["人员类别"],
            "JZGBH": teacher["JZGBH"],
            "NAME": teacher["NAME"],
            "当前状态": teacher["当前状态"],
            "SEX": teacher["SEX"],
            "所在单位": teacher["所在单位"],
            "work_unit": teacher["work_unit"],
            "teachership": teacher["teachership"],
            "聘任专业技术职务级别": teacher["聘任专业技术职务级别"],
            "岗位类别": teacher["岗位类别"],
            "岗位等级": teacher["岗位等级"],
            "最高学历": teacher["最高学历"],
            "degree": teacher["degree"],
            "graduate_institution": teacher["graduate_institution"],
            "毕业年月": teacher["毕业年月"],
            "所学专业": teacher["所学专业"],
            "授学位年月": teacher["授学位年月"],
            "研究方向": teacher["研究方向"],
            "学科类别": teacher["学科类别"],
            "XKLBDM_DISPLAY": teacher["XKLBDM_DISPLAY"],
            "一级学科": teacher["一级学科"],
            "YJXKDM_DISPLAY": teacher["YJXKDM_DISPLAY"],
            "二级学科": teacher["二级学科"],
            "EJXKDM_DISPLAY": teacher["EJXKDM_DISPLAY"]
        }
    }
    for teacher in teachers_data
]

# 批量导入数据到 Elasticsearch
bulk(connections.get_connection(alias='my_connection'), actions)

# 关闭 MySQL 连接
cursor.close()
cnx.close()

In [2]:
import mysql.connector
from elasticsearch.helpers import bulk
from elasticsearch_dsl import connections, Index, Document, Text, Integer

# 创建连接并指定别名
connections.create_connection(alias='my_connection', hosts=['http://localhost:9200'], timeout=20)

# 定义 Paper 文档模型
class Paper(Document):
    paper_id = Text()
    ISSN = Text()
    MEDIA = Text()
    TITLE = Text()
    PRINTDATE = Text()
    CITEDTIMES = Integer()
    YEAR = Integer()
    PAPER_CLASS = Text()
    IMPACTFACTOR = Integer()
    ESI = Text()
    LANGUAGE = Text()
    BJSQK = Text()
    DISCIPLINE = Text()
    keywords = Text()

    class Index:
        name = 'papers'
        using = 'my_connection'

# 创建索引并映射文档模型
papers_index = Index('papers', using='my_connection')

# 删除索引（如果已存在）
if papers_index.exists():
    papers_index.delete()

# 创建索引
papers_index.create()

# 将文档模型映射到索引中
Paper.init()

# 连接到 MySQL 数据库
cnx = mysql.connector.connect(
    user='root',
    password='root',
    host='127.0.0.1',
    database='summer_project'
)
cursor = cnx.cursor(dictionary=True)

# 查询 papers 表数据
cursor.execute("SELECT * FROM papers")
papers_data = cursor.fetchall()

# 将数据转换为 Elasticsearch 文档格式
actions = [
    {
        "_index": "papers",
        "_id": paper["paper_id"],  # 使用 paper_id 作为文档的 _id
        "_source": {
            "paper_id": paper["paper_id"],
            "ISSN": paper["ISSN"],
            "MEDIA": paper["MEDIA"],
            "TITLE": paper["TITLE"],
            "YEAR": paper["YEAR"],
            "PAPER_CLASS": paper["PAPER_CLASS"],
            "ESI": paper["ESI"],
            "LANGUAGE": paper["LANGUAGE"],
            "BJSQK": paper["BJSQK"],
            "DISCIPLINE": paper["DISCIPLINE"],
            "keywords": paper["keywords"]
        }
    }
    for paper in papers_data
]

# 批量导入数据到 Elasticsearch
bulk(connections.get_connection(alias='my_connection'), actions)

# 关闭 MySQL 连接
cursor.close()
cnx.close()

In [3]:
#查看ES当前所有索引
from elasticsearch import Elasticsearch
es = Elasticsearch(hosts=['http://localhost:9200'])


TypeError: Positional arguments can't be used with Elasticsearch API methods. Instead only use keyword arguments.