In [1]:
!pip install elasticsearch==7.0.1

Collecting elasticsearch==7.0.1
  Downloading elasticsearch-7.0.1-py2.py3-none-any.whl.metadata (6.8 kB)
Downloading elasticsearch-7.0.1-py2.py3-none-any.whl (83 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m83.2/83.2 kB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: elasticsearch
Successfully installed elasticsearch-7.0.1


In [2]:
import os
import json
from elasticsearch import Elasticsearch

In [3]:
def create_index_if_not_exists(es, index_name):
    """
    Tạo một index mới trong Elasticsearch nếu nó chưa tồn tại.

    Parameters:
    - es: đối tượng Elasticsearch đã được khởi tạo.
    - index_name: tên của index Elasticsearch cần tạo.

    Returns:
    - None
    """
    if not es.indices.exists(index=index_name):
        try:
            es.indices.create(index=index_name)
            print(f"Index '{index_name}' created successfully.")
        except Exception as e:
            print(f"Error creating index '{index_name}': {str(e)}")
    else:
        print(f"Index '{index_name}' already exists.")


In [4]:
def load_backup_to_elasticsearch(backup_file_path, es, index_name):
    """
    Tải dữ liệu từ tệp backup lên Elasticsearch.

    Parameters:
    - backup_file_path: đường dẫn đến tệp backup.
    - es: đối tượng Elasticsearch đã được khởi tạo.
    - index_name: tên của index Elasticsearch để lưu trữ văn bản.

    Returns:
    - None
    """
    create_index_if_not_exists(es, index_name)

    if not os.path.exists(backup_file_path):
        print(f"Error: Backup file {backup_file_path} does not exist.")
        return

    with open(backup_file_path, 'r', encoding='utf-8') as f:
        data = json.load(f)

    for doc in data:
        doc_id = f"{doc['video_name']}_{doc['frame_id']}"
        document_body = {
            "video_name": doc['video_name'],
            "frame": doc['frame_id'],
            "text": doc['text'],
            "prob": doc['avg_prob']
        }

        try:
            #if es.exists(index=index_name, id=doc_id):
               # print(f"Document {doc_id} already exists. Next...")
              #  continue
          #  else:
            print(f"Document {doc_id} does not exist. Indexing new document...")
            es.index(index=index_name, id=doc_id, body=document_body)
        except Exception as e:
            print(f"Error checking/adding document {doc_id}: {str(e)}")


# Cách sử dụng cloudflare
- B1: tải cloudflare về máy
```
  winget install --id Cloudflare.cloudflared
```
  - Note: Kiểm tra cloundflare đã được tải về chưa
  ```
    cloudflared --version
  ```
- B2: Chạy Tunnel
```
  cloudflared tunnel --url http://localhost:9200
```
- B5: Lấy host rồi dán lên đây:
```
    Ví dụ:
    Your quick Tunnel has been created! Visit it at (it may take some time to be reachable):
    https://dome-disciplines-privilege-exhibitions.trycloudflare.com
```

In [5]:
# Example usage
es = Elasticsearch(['https://nvidia-paintball-futures-mortality.trycloudflare.com'])  # Điền host
index_name = 'ocr_video_1'

for i in range(1, 31): # Có thể chính sửa

  backup_file_path = f'/kaggle/input/output-reg-vietocr/reg_L01_V0{str(i).zfill(2)}.json'
  load_backup_to_elasticsearch(backup_file_path, es, index_name)
  print(f"Done L01_V0{str(i).zfill(2)}")

Index 'ocr_video_1' already exists.
Document L01_V001_0 does not exist. Indexing new document...
Document L01_V001_10 does not exist. Indexing new document...
Document L01_V001_1000 does not exist. Indexing new document...
Document L01_V001_1010 does not exist. Indexing new document...
Document L01_V001_10200 does not exist. Indexing new document...
Document L01_V001_10240 does not exist. Indexing new document...
Document L01_V001_10280 does not exist. Indexing new document...
Document L01_V001_10290 does not exist. Indexing new document...
Document L01_V001_1030 does not exist. Indexing new document...
Document L01_V001_10300 does not exist. Indexing new document...
Document L01_V001_10660 does not exist. Indexing new document...
Document L01_V001_10670 does not exist. Indexing new document...
Document L01_V001_10690 does not exist. Indexing new document...
Document L01_V001_10700 does not exist. Indexing new document...
Document L01_V001_10740 does not exist. Indexing new document...