In [9]:
import sys
!{sys.executable} -m pip install elasticsearch==6.8.2

Collecting elasticsearch==6.8.2
  Downloading elasticsearch-6.8.2-py2.py3-none-any.whl (90 kB)
[K     |████████████████████████████████| 90 kB 1.4 MB/s eta 0:00:01
Installing collected packages: elasticsearch
Successfully installed elasticsearch-6.8.2


In [10]:
from datetime import datetime
from elasticsearch import Elasticsearch

In [16]:
ELASTIC_URL = '0.0.0.0:9200'

es = Elasticsearch(ELASTIC_URL)
es.info()

{'name': '-orWJ9H',
 'cluster_name': 'docker-cluster',
 'cluster_uuid': 'wGvvrOBZT12LXKS_LIJATg',
 'version': {'number': '6.5.4',
  'build_flavor': 'default',
  'build_type': 'tar',
  'build_hash': 'd2ef93d',
  'build_date': '2018-12-17T21:17:40.758843Z',
  'build_snapshot': False,
  'lucene_version': '7.5.0',
  'minimum_wire_compatibility_version': '5.6.0',
  'minimum_index_compatibility_version': '5.0.0'},
 'tagline': 'You Know, for Search'}

In [30]:
t9_index = {
  "settings": {
    "index": {
      "analysis": {
        "analyzer": {
          "custom_analyzer": {
            "type": "custom",
            "tokenizer": "standard",
            "filter": [
              "lowercase",
              "custom_edge_ngram"
            ]
          }
        },
        "filter": {
          "custom_edge_ngram": {
            "type": "edge_ngram",
            "min_gram": 2,
            "max_gram": 10
          }
        }
      }
    }
  },
  "mappings": {
    "my_type": {
      "properties": {
        "text": {
          "type": "text",
          "analyzer": "custom_analyzer",
          "search_analyzer": "standard"
        }
      }
    }
  }
}

In [35]:
es.indices.create('test_t9', body=t9_index)

{'acknowledged': True, 'shards_acknowledged': True, 'index': 'test_t9'}

In [46]:
print(json.dumps(es.indices.get_settings('test_t9'), indent=4))

{
    "test_t9": {
        "settings": {
            "index": {
                "number_of_shards": "5",
                "provided_name": "test_t9",
                "creation_date": "1615578363995",
                "analysis": {
                    "filter": {
                        "custom_edge_ngram": {
                            "type": "edge_ngram",
                            "min_gram": "2",
                            "max_gram": "10"
                        }
                    },
                    "analyzer": {
                        "custom_analyzer": {
                            "filter": [
                                "lowercase",
                                "custom_edge_ngram"
                            ],
                            "type": "custom",
                            "tokenizer": "standard"
                        }
                    }
                },
                "number_of_replicas": "1",
                "uuid": "Q5bamA9eSgO5NecZjq4jrA"

In [48]:
from typing import Generator, List

import psycopg2


class PostgresStorage:
    """
    Base class for working with PostgreSQL
    """

    conn: psycopg2.extensions.connection

    def __init__(self, conn):
        self.conn = conn

    @classmethod
    def connect(cls,
                host: str,
                port: int = 5432,
                user: str = 'postgres',
                password: str = 'password',
                dbname: str = 'postgres'):
        return cls(conn=psycopg2.connect(
            host=host, port=port, user=user, password=password, dbname=dbname)
        )

    def exec_query(self, query: str, params: List[str]) -> Generator:
        cursor = self.conn.cursor()
        try:
            cursor.execute(query, params)
        except psycopg2.Error as e:
            self.conn.rollback()
            raise e
        return cursor.fetchall()

    def exec(self, sql: str, params: List[str]):
        cursor = self.conn.cursor()
        try:
            cursor.execute(sql, params)
        except psycopg2.Error as e:
            self.conn.rollback()
            raise e
        self.conn.commit()

In [63]:
import os

PG_USER = os.environ.get('PG_USER', 'postgres')
PG_PASS = os.environ.get('PG_PASS', 'password')
PG_HOST = os.environ.get('PG_HOST', '172.17.0.2')
PG_PORT = os.environ.get('PG_PORT', 5432)
PG_DBNAME = os.environ.get('PG_DBNAME', 'vnkrtv')

In [66]:
storage = PostgresStorage.connect(
    host=PG_HOST,
    port=PG_PORT,
    user=PG_USER,
    password=PG_PASS,
    dbname=PG_DBNAME
)

In [88]:
from typing import Generator, Iterable
import re

import tqdm
import nltk
import razdel


class Tokenizer:
    remove_punctuation = re.compile(r'[^а-яА-ЯёЁ ]')

    @classmethod
    def tokenize(cls, text: str) -> Generator:
        return (cls.remove_punctuation.sub('', sentence.text.lower().strip())
                for sentence in razdel.sentenize(text))


class TextProcessor:
    tokenizer = Tokenizer()

    @classmethod
    def get_sentences_gens(cls, texts: Iterable) -> Generator:
        for text in texts:
            for sentence in cls.tokenizer.tokenize(text):
                yield sentence
                
    @classmethod
    def get_word_lists_gen(cls, texts: Iterable) -> Generator:
        for text in tqdm.tqdm(texts):
            for sentence in cls.tokenizer.tokenize(text):
                yield [_.text for _ in razdel.tokenize(sentence)]

    @classmethod
    def get_text_gen(cls, text_gen: Iterable) -> Generator:
        for sentence in cls.get_sentences_gens(text_gen):
            yield [_.text for _ in razdel.tokenize(sentence)]

    @classmethod
    def get_ngram_gen(cls, text_gen: Iterable, ngram_size: int = 3) -> Generator:
        for sentence in cls.get_sentences_gens(text_gen):
            yield [''.join(item) for item in nltk.ngrams(sentence, ngram_size)]


In [70]:
train_text = [
    row[0] for row in storage.exec_query('''
    SELECT text
      FROM posts INNER JOIN habs 
        ON posts.post_id = habs.post_id 
     WHERE hab = %s 
     LIMIT 2000''', params=['Машинное обучение'])
]
len(train_text)

2000

In [96]:
t9_index = {
  "settings": {
    "index": {
      "analysis": {
        "analyzer": {
          "custom_analyzer": {
            "type": "custom",
            "tokenizer": "standard",
            "filter": [
              "lowercase",
              "custom_edge_ngram"
            ]
          }
        },
        "filter": {
          "custom_edge_ngram": {
            "type": "edge_ngram",
            "min_gram": 2,
            "max_gram": 10
          }
        }
      }
    }
  },
  "mappings": {
    "sentence": {
      "properties": {
        "text": {
          "type": "text",
          "analyzer": "custom_analyzer",
          "search_analyzer": "standard"
        }
      }
    }
  }
}
es.indices.create('t9_app_3', body=t9_index)

{'acknowledged': True, 'shards_acknowledged': True, 'index': 't9_app_3'}

In [98]:
window_size = 5

for sentence in TextProcessor.get_word_lists_gen(train_text):
#     for i in range(len(sentence) // window_size):
#         words = sentence[i * window_size: (i + 1) * window_size]
#         text = ' '.join(sentence[i * window_size: (i + 1) * window_size])
#         print(text)
    es.index(index='t9_app_3', doc_type='sentence', body={
        'text': ' '.join(sentence)
    })
#         if len(words) < window_size:
#             break






  0%|          | 0/2000 [00:00<?, ?it/s][A[A[A[A[A




  0%|          | 1/2000 [00:02<1:21:08,  2.44s/it][A[A[A[A[A




  0%|          | 2/2000 [00:03<1:04:12,  1.93s/it][A[A[A[A[A




  0%|          | 3/2000 [00:03<51:44,  1.55s/it]  [A[A[A[A[A




  0%|          | 4/2000 [00:04<43:57,  1.32s/it][A[A[A[A[A




  0%|          | 5/2000 [00:05<40:25,  1.22s/it][A[A[A[A[A




  0%|          | 6/2000 [00:06<40:06,  1.21s/it][A[A[A[A[A




  0%|          | 7/2000 [00:07<31:26,  1.06it/s][A[A[A[A[A




  0%|          | 8/2000 [00:07<25:28,  1.30it/s][A[A[A[A[A




  0%|          | 9/2000 [00:08<27:03,  1.23it/s][A[A[A[A[A




  0%|          | 10/2000 [00:08<23:08,  1.43it/s][A[A[A[A[A




  1%|          | 11/2000 [00:09<21:55,  1.51it/s][A[A[A[A[A




  1%|          | 12/2000 [00:13<54:10,  1.63s/it][A[A[A[A[A




  1%|          | 13/2000 [00:14<45:34,  1.38s/it][A[A[A[A[A




  1%|          | 14/2000 [00:14<37:00,  1

  6%|▌         | 121/2000 [02:12<1:11:16,  2.28s/it][A[A[A[A[A




  6%|▌         | 122/2000 [02:14<59:59,  1.92s/it]  [A[A[A[A[A




  6%|▌         | 123/2000 [02:15<56:05,  1.79s/it][A[A[A[A[A




  6%|▌         | 124/2000 [02:17<58:03,  1.86s/it][A[A[A[A[A




  6%|▋         | 125/2000 [02:19<55:18,  1.77s/it][A[A[A[A[A




  6%|▋         | 126/2000 [02:20<49:08,  1.57s/it][A[A[A[A[A




  6%|▋         | 127/2000 [02:21<43:23,  1.39s/it][A[A[A[A[A




  6%|▋         | 128/2000 [02:23<52:36,  1.69s/it][A[A[A[A[A




  6%|▋         | 129/2000 [02:26<1:06:52,  2.14s/it][A[A[A[A[A




  6%|▋         | 130/2000 [02:27<49:13,  1.58s/it]  [A[A[A[A[A




  7%|▋         | 131/2000 [02:28<49:06,  1.58s/it][A[A[A[A[A




  7%|▋         | 132/2000 [02:28<36:32,  1.17s/it][A[A[A[A[A




  7%|▋         | 133/2000 [02:29<32:09,  1.03s/it][A[A[A[A[A




  7%|▋         | 134/2000 [02:30<34:55,  1.12s/it][A[A[A[A[A




  7%|▋      

 12%|█▏        | 237/2000 [05:25<36:33,  1.24s/it][A[A[A[A[A




 12%|█▏        | 238/2000 [05:28<50:20,  1.71s/it][A[A[A[A[A




 12%|█▏        | 239/2000 [05:30<54:14,  1.85s/it][A[A[A[A[A




 12%|█▏        | 240/2000 [05:32<55:26,  1.89s/it][A[A[A[A[A




 12%|█▏        | 241/2000 [05:34<55:56,  1.91s/it][A[A[A[A[A




 12%|█▏        | 242/2000 [05:35<47:15,  1.61s/it][A[A[A[A[A




 12%|█▏        | 243/2000 [05:36<42:59,  1.47s/it][A[A[A[A[A




 12%|█▏        | 244/2000 [05:36<34:04,  1.16s/it][A[A[A[A[A




 12%|█▏        | 245/2000 [05:37<27:07,  1.08it/s][A[A[A[A[A




 12%|█▏        | 246/2000 [05:38<31:49,  1.09s/it][A[A[A[A[A




 12%|█▏        | 247/2000 [05:39<30:04,  1.03s/it][A[A[A[A[A




 12%|█▏        | 248/2000 [05:41<36:28,  1.25s/it][A[A[A[A[A




 12%|█▏        | 249/2000 [05:41<27:11,  1.07it/s][A[A[A[A[A




 12%|█▎        | 250/2000 [05:42<29:15,  1.00s/it][A[A[A[A[A




 13%|█▎        | 251

 18%|█▊        | 354/2000 [08:01<43:04,  1.57s/it][A[A[A[A[A




 18%|█▊        | 355/2000 [08:03<48:25,  1.77s/it][A[A[A[A[A




 18%|█▊        | 356/2000 [08:04<43:59,  1.61s/it][A[A[A[A[A




 18%|█▊        | 357/2000 [08:05<35:08,  1.28s/it][A[A[A[A[A




 18%|█▊        | 358/2000 [08:05<30:46,  1.12s/it][A[A[A[A[A




 18%|█▊        | 359/2000 [08:09<51:18,  1.88s/it][A[A[A[A[A




 18%|█▊        | 360/2000 [08:13<1:04:09,  2.35s/it][A[A[A[A[A




 18%|█▊        | 361/2000 [08:15<1:05:45,  2.41s/it][A[A[A[A[A




 18%|█▊        | 362/2000 [08:15<48:29,  1.78s/it]  [A[A[A[A[A




 18%|█▊        | 363/2000 [08:16<36:57,  1.35s/it][A[A[A[A[A




 18%|█▊        | 364/2000 [08:17<34:15,  1.26s/it][A[A[A[A[A




 18%|█▊        | 365/2000 [08:18<34:59,  1.28s/it][A[A[A[A[A




 18%|█▊        | 366/2000 [08:18<25:50,  1.05it/s][A[A[A[A[A




 18%|█▊        | 367/2000 [08:20<31:26,  1.16s/it][A[A[A[A[A




 18%|█▊       

 24%|██▎       | 472/2000 [11:02<36:38,  1.44s/it][A[A[A[A[A




 24%|██▎       | 473/2000 [11:04<35:40,  1.40s/it][A[A[A[A[A




 24%|██▍       | 475/2000 [11:05<31:23,  1.24s/it][A[A[A[A[A




 24%|██▍       | 476/2000 [11:08<41:46,  1.64s/it][A[A[A[A[A




 24%|██▍       | 477/2000 [11:09<36:10,  1.42s/it][A[A[A[A[A




 24%|██▍       | 478/2000 [11:11<44:16,  1.75s/it][A[A[A[A[A




 24%|██▍       | 479/2000 [11:12<36:57,  1.46s/it][A[A[A[A[A




 24%|██▍       | 480/2000 [11:13<35:40,  1.41s/it][A[A[A[A[A




 24%|██▍       | 481/2000 [11:14<29:37,  1.17s/it][A[A[A[A[A




 24%|██▍       | 482/2000 [11:22<1:18:31,  3.10s/it][A[A[A[A[A




 24%|██▍       | 483/2000 [11:23<1:02:34,  2.47s/it][A[A[A[A[A




 24%|██▍       | 484/2000 [11:25<1:03:55,  2.53s/it][A[A[A[A[A




 24%|██▍       | 485/2000 [11:26<52:01,  2.06s/it]  [A[A[A[A[A




 24%|██▍       | 486/2000 [11:27<43:37,  1.73s/it][A[A[A[A[A




 24%|██▍    

 29%|██▉       | 589/2000 [13:56<35:31,  1.51s/it][A[A[A[A[A




 30%|██▉       | 590/2000 [13:56<27:07,  1.15s/it][A[A[A[A[A




 30%|██▉       | 591/2000 [13:59<37:02,  1.58s/it][A[A[A[A[A




 30%|██▉       | 592/2000 [13:59<29:45,  1.27s/it][A[A[A[A[A




 30%|██▉       | 593/2000 [14:02<39:35,  1.69s/it][A[A[A[A[A




 30%|██▉       | 594/2000 [14:03<36:03,  1.54s/it][A[A[A[A[A




 30%|██▉       | 595/2000 [14:06<44:39,  1.91s/it][A[A[A[A[A




 30%|██▉       | 596/2000 [14:06<33:43,  1.44s/it][A[A[A[A[A




 30%|██▉       | 597/2000 [14:07<31:08,  1.33s/it][A[A[A[A[A




 30%|██▉       | 598/2000 [14:08<29:32,  1.26s/it][A[A[A[A[A




 30%|██▉       | 599/2000 [14:09<24:17,  1.04s/it][A[A[A[A[A




 30%|███       | 600/2000 [14:09<20:08,  1.16it/s][A[A[A[A[A




 30%|███       | 601/2000 [14:10<21:51,  1.07it/s][A[A[A[A[A




 30%|███       | 602/2000 [14:12<24:59,  1.07s/it][A[A[A[A[A




 30%|███       | 603

 35%|███▌      | 706/2000 [16:50<19:29,  1.11it/s][A[A[A[A[A




 35%|███▌      | 707/2000 [16:50<15:01,  1.43it/s][A[A[A[A[A




 35%|███▌      | 708/2000 [16:52<24:21,  1.13s/it][A[A[A[A[A




 35%|███▌      | 709/2000 [16:55<32:35,  1.51s/it][A[A[A[A[A




 36%|███▌      | 710/2000 [16:58<44:00,  2.05s/it][A[A[A[A[A




 36%|███▌      | 711/2000 [17:00<39:56,  1.86s/it][A[A[A[A[A




 36%|███▌      | 712/2000 [17:01<37:11,  1.73s/it][A[A[A[A[A




 36%|███▌      | 713/2000 [17:02<29:45,  1.39s/it][A[A[A[A[A




 36%|███▌      | 714/2000 [17:02<23:17,  1.09s/it][A[A[A[A[A




 36%|███▌      | 715/2000 [17:02<18:18,  1.17it/s][A[A[A[A[A




 36%|███▌      | 716/2000 [17:03<14:32,  1.47it/s][A[A[A[A[A




 36%|███▌      | 717/2000 [17:04<17:32,  1.22it/s][A[A[A[A[A




 36%|███▌      | 718/2000 [17:04<17:21,  1.23it/s][A[A[A[A[A




 36%|███▌      | 719/2000 [17:05<13:28,  1.58it/s][A[A[A[A[A




 36%|███▌      | 720

 41%|████▏     | 825/2000 [19:25<40:31,  2.07s/it][A[A[A[A[A




 41%|████▏     | 826/2000 [19:27<41:56,  2.14s/it][A[A[A[A[A




 41%|████▏     | 827/2000 [19:30<41:47,  2.14s/it][A[A[A[A[A




 41%|████▏     | 828/2000 [19:30<33:53,  1.74s/it][A[A[A[A[A




 41%|████▏     | 829/2000 [19:32<31:59,  1.64s/it][A[A[A[A[A




 42%|████▏     | 830/2000 [19:32<24:05,  1.24s/it][A[A[A[A[A




 42%|████▏     | 831/2000 [19:33<23:03,  1.18s/it][A[A[A[A[A




 42%|████▏     | 832/2000 [19:33<18:16,  1.06it/s][A[A[A[A[A




 42%|████▏     | 833/2000 [19:34<16:28,  1.18it/s][A[A[A[A[A




 42%|████▏     | 834/2000 [19:35<16:57,  1.15it/s][A[A[A[A[A




 42%|████▏     | 835/2000 [19:37<21:05,  1.09s/it][A[A[A[A[A




 42%|████▏     | 836/2000 [19:37<18:06,  1.07it/s][A[A[A[A[A




 42%|████▏     | 837/2000 [19:39<23:46,  1.23s/it][A[A[A[A[A




 42%|████▏     | 838/2000 [19:42<34:50,  1.80s/it][A[A[A[A[A




 42%|████▏     | 839

 47%|████▋     | 942/2000 [22:56<27:05,  1.54s/it][A[A[A[A[A




 47%|████▋     | 943/2000 [22:59<32:38,  1.85s/it][A[A[A[A[A




 47%|████▋     | 944/2000 [23:01<33:01,  1.88s/it][A[A[A[A[A




 47%|████▋     | 945/2000 [23:01<26:11,  1.49s/it][A[A[A[A[A




 47%|████▋     | 946/2000 [23:02<23:09,  1.32s/it][A[A[A[A[A




 47%|████▋     | 947/2000 [23:03<19:43,  1.12s/it][A[A[A[A[A




 47%|████▋     | 948/2000 [23:04<17:58,  1.03s/it][A[A[A[A[A




 47%|████▋     | 949/2000 [23:05<16:42,  1.05it/s][A[A[A[A[A




 48%|████▊     | 950/2000 [23:06<16:07,  1.09it/s][A[A[A[A[A




 48%|████▊     | 951/2000 [23:16<1:03:52,  3.65s/it][A[A[A[A[A




 48%|████▊     | 952/2000 [23:18<56:49,  3.25s/it]  [A[A[A[A[A




 48%|████▊     | 953/2000 [23:18<41:41,  2.39s/it][A[A[A[A[A




 48%|████▊     | 954/2000 [23:18<29:57,  1.72s/it][A[A[A[A[A




 48%|████▊     | 955/2000 [23:19<26:31,  1.52s/it][A[A[A[A[A




 48%|████▊     |

 53%|█████▎    | 1058/2000 [25:42<31:40,  2.02s/it][A[A[A[A[A




 53%|█████▎    | 1059/2000 [25:45<33:43,  2.15s/it][A[A[A[A[A




 53%|█████▎    | 1060/2000 [25:47<35:05,  2.24s/it][A[A[A[A[A




 53%|█████▎    | 1061/2000 [25:47<26:54,  1.72s/it][A[A[A[A[A




 53%|█████▎    | 1062/2000 [25:48<22:44,  1.46s/it][A[A[A[A[A




 53%|█████▎    | 1063/2000 [25:50<22:03,  1.41s/it][A[A[A[A[A




 53%|█████▎    | 1064/2000 [25:51<21:36,  1.39s/it][A[A[A[A[A




 53%|█████▎    | 1065/2000 [25:51<15:35,  1.00s/it][A[A[A[A[A




 53%|█████▎    | 1066/2000 [25:53<18:44,  1.20s/it][A[A[A[A[A




 53%|█████▎    | 1067/2000 [25:53<15:45,  1.01s/it][A[A[A[A[A




 53%|█████▎    | 1068/2000 [25:54<16:20,  1.05s/it][A[A[A[A[A




 53%|█████▎    | 1069/2000 [25:56<16:45,  1.08s/it][A[A[A[A[A




 54%|█████▎    | 1070/2000 [25:56<15:13,  1.02it/s][A[A[A[A[A




 54%|█████▎    | 1071/2000 [25:58<17:10,  1.11s/it][A[A[A[A[A




 54%|█

 59%|█████▊    | 1173/2000 [28:34<18:51,  1.37s/it][A[A[A[A[A




 59%|█████▊    | 1174/2000 [28:37<25:24,  1.85s/it][A[A[A[A[A




 59%|█████▉    | 1175/2000 [28:38<21:16,  1.55s/it][A[A[A[A[A




 59%|█████▉    | 1176/2000 [28:39<19:35,  1.43s/it][A[A[A[A[A




 59%|█████▉    | 1177/2000 [28:40<19:14,  1.40s/it][A[A[A[A[A




 59%|█████▉    | 1178/2000 [28:42<20:00,  1.46s/it][A[A[A[A[A




 59%|█████▉    | 1179/2000 [28:42<15:10,  1.11s/it][A[A[A[A[A




 59%|█████▉    | 1180/2000 [28:42<11:17,  1.21it/s][A[A[A[A[A




 59%|█████▉    | 1181/2000 [28:44<16:44,  1.23s/it][A[A[A[A[A




 59%|█████▉    | 1182/2000 [28:45<13:49,  1.01s/it][A[A[A[A[A




 59%|█████▉    | 1183/2000 [28:46<13:54,  1.02s/it][A[A[A[A[A




 59%|█████▉    | 1184/2000 [28:47<13:58,  1.03s/it][A[A[A[A[A




 59%|█████▉    | 1185/2000 [28:56<46:45,  3.44s/it][A[A[A[A[A




 59%|█████▉    | 1186/2000 [28:57<35:37,  2.63s/it][A[A[A[A[A




 59%|█

 64%|██████▍   | 1288/2000 [31:38<14:04,  1.19s/it][A[A[A[A[A




 64%|██████▍   | 1289/2000 [31:40<17:58,  1.52s/it][A[A[A[A[A




 64%|██████▍   | 1290/2000 [31:42<19:44,  1.67s/it][A[A[A[A[A




 65%|██████▍   | 1291/2000 [31:43<18:05,  1.53s/it][A[A[A[A[A




 65%|██████▍   | 1292/2000 [31:44<17:04,  1.45s/it][A[A[A[A[A




 65%|██████▍   | 1293/2000 [31:44<12:21,  1.05s/it][A[A[A[A[A




 65%|██████▍   | 1294/2000 [31:45<12:04,  1.03s/it][A[A[A[A[A




 65%|██████▍   | 1295/2000 [31:48<17:58,  1.53s/it][A[A[A[A[A




 65%|██████▍   | 1296/2000 [31:48<13:26,  1.15s/it][A[A[A[A[A




 65%|██████▍   | 1297/2000 [31:49<11:19,  1.03it/s][A[A[A[A[A




 65%|██████▍   | 1298/2000 [31:51<13:57,  1.19s/it][A[A[A[A[A




 65%|██████▍   | 1299/2000 [31:53<17:40,  1.51s/it][A[A[A[A[A




 65%|██████▌   | 1300/2000 [31:53<13:41,  1.17s/it][A[A[A[A[A




 65%|██████▌   | 1301/2000 [31:56<20:15,  1.74s/it][A[A[A[A[A




 65%|█

 70%|███████   | 1403/2000 [34:15<13:13,  1.33s/it][A[A[A[A[A




 70%|███████   | 1404/2000 [34:16<11:59,  1.21s/it][A[A[A[A[A




 70%|███████   | 1405/2000 [34:16<08:56,  1.11it/s][A[A[A[A[A




 70%|███████   | 1406/2000 [34:18<13:00,  1.31s/it][A[A[A[A[A




 70%|███████   | 1407/2000 [34:19<13:11,  1.33s/it][A[A[A[A[A




 70%|███████   | 1408/2000 [34:22<17:57,  1.82s/it][A[A[A[A[A




 70%|███████   | 1409/2000 [34:24<15:41,  1.59s/it][A[A[A[A[A




 70%|███████   | 1410/2000 [34:24<12:07,  1.23s/it][A[A[A[A[A




 71%|███████   | 1411/2000 [34:25<12:22,  1.26s/it][A[A[A[A[A




 71%|███████   | 1412/2000 [34:32<29:21,  3.00s/it][A[A[A[A[A




 71%|███████   | 1413/2000 [34:33<21:18,  2.18s/it][A[A[A[A[A




 71%|███████   | 1414/2000 [34:33<15:23,  1.58s/it][A[A[A[A[A




 71%|███████   | 1415/2000 [34:34<13:31,  1.39s/it][A[A[A[A[A




 71%|███████   | 1416/2000 [34:35<14:35,  1.50s/it][A[A[A[A[A




 71%|█

 76%|███████▌  | 1519/2000 [36:45<14:24,  1.80s/it][A[A[A[A[A




 76%|███████▌  | 1520/2000 [36:46<12:26,  1.56s/it][A[A[A[A[A




 76%|███████▌  | 1521/2000 [36:48<13:14,  1.66s/it][A[A[A[A[A




 76%|███████▌  | 1522/2000 [36:53<21:30,  2.70s/it][A[A[A[A[A




 76%|███████▌  | 1523/2000 [36:55<18:50,  2.37s/it][A[A[A[A[A




 76%|███████▌  | 1524/2000 [36:56<16:37,  2.10s/it][A[A[A[A[A




 76%|███████▋  | 1525/2000 [36:57<12:30,  1.58s/it][A[A[A[A[A




 76%|███████▋  | 1526/2000 [36:58<13:05,  1.66s/it][A[A[A[A[A




 76%|███████▋  | 1527/2000 [37:00<13:30,  1.71s/it][A[A[A[A[A




 76%|███████▋  | 1528/2000 [37:01<11:19,  1.44s/it][A[A[A[A[A




 76%|███████▋  | 1529/2000 [37:02<09:52,  1.26s/it][A[A[A[A[A




 76%|███████▋  | 1530/2000 [37:02<07:22,  1.06it/s][A[A[A[A[A




 77%|███████▋  | 1531/2000 [37:03<06:36,  1.18it/s][A[A[A[A[A




 77%|███████▋  | 1532/2000 [37:05<10:04,  1.29s/it][A[A[A[A[A




 77%|█

 82%|████████▏ | 1635/2000 [39:45<08:23,  1.38s/it][A[A[A[A[A




 82%|████████▏ | 1636/2000 [39:46<07:24,  1.22s/it][A[A[A[A[A




 82%|████████▏ | 1637/2000 [39:47<07:01,  1.16s/it][A[A[A[A[A




 82%|████████▏ | 1638/2000 [39:50<09:31,  1.58s/it][A[A[A[A[A




 82%|████████▏ | 1639/2000 [39:50<07:11,  1.20s/it][A[A[A[A[A




 82%|████████▏ | 1640/2000 [39:51<06:27,  1.08s/it][A[A[A[A[A




 82%|████████▏ | 1641/2000 [39:51<05:58,  1.00it/s][A[A[A[A[A




 82%|████████▏ | 1642/2000 [39:54<08:04,  1.35s/it][A[A[A[A[A




 82%|████████▏ | 1643/2000 [39:55<08:11,  1.38s/it][A[A[A[A[A




 82%|████████▏ | 1644/2000 [39:56<06:59,  1.18s/it][A[A[A[A[A




 82%|████████▏ | 1645/2000 [39:56<05:25,  1.09it/s][A[A[A[A[A




 82%|████████▏ | 1646/2000 [39:58<06:26,  1.09s/it][A[A[A[A[A




 82%|████████▏ | 1647/2000 [39:59<06:17,  1.07s/it][A[A[A[A[A




 82%|████████▏ | 1648/2000 [40:01<08:59,  1.53s/it][A[A[A[A[A




 82%|█

 88%|████████▊ | 1751/2000 [42:38<07:41,  1.85s/it][A[A[A[A[A




 88%|████████▊ | 1752/2000 [42:41<09:05,  2.20s/it][A[A[A[A[A




 88%|████████▊ | 1753/2000 [42:42<07:00,  1.70s/it][A[A[A[A[A




 88%|████████▊ | 1754/2000 [42:46<09:50,  2.40s/it][A[A[A[A[A




 88%|████████▊ | 1755/2000 [42:47<08:39,  2.12s/it][A[A[A[A[A




 88%|████████▊ | 1756/2000 [42:48<06:44,  1.66s/it][A[A[A[A[A




 88%|████████▊ | 1757/2000 [42:50<06:38,  1.64s/it][A[A[A[A[A




 88%|████████▊ | 1758/2000 [42:50<05:21,  1.33s/it][A[A[A[A[A




 88%|████████▊ | 1759/2000 [42:57<11:27,  2.85s/it][A[A[A[A[A




 88%|████████▊ | 1760/2000 [42:58<09:25,  2.35s/it][A[A[A[A[A




 88%|████████▊ | 1761/2000 [42:59<07:38,  1.92s/it][A[A[A[A[A




 88%|████████▊ | 1762/2000 [43:00<06:42,  1.69s/it][A[A[A[A[A




 88%|████████▊ | 1763/2000 [43:01<05:50,  1.48s/it][A[A[A[A[A




 88%|████████▊ | 1764/2000 [43:03<06:06,  1.55s/it][A[A[A[A[A




 88%|█

 93%|█████████▎| 1867/2000 [45:10<02:18,  1.04s/it][A[A[A[A[A




 93%|█████████▎| 1868/2000 [45:11<01:47,  1.22it/s][A[A[A[A[A




 93%|█████████▎| 1869/2000 [45:11<01:43,  1.27it/s][A[A[A[A[A




 94%|█████████▎| 1870/2000 [45:12<01:43,  1.26it/s][A[A[A[A[A




 94%|█████████▎| 1871/2000 [45:16<03:46,  1.75s/it][A[A[A[A[A




 94%|█████████▎| 1872/2000 [45:17<03:03,  1.43s/it][A[A[A[A[A




 94%|█████████▎| 1873/2000 [45:19<03:22,  1.59s/it][A[A[A[A[A




 94%|█████████▎| 1874/2000 [45:19<02:27,  1.17s/it][A[A[A[A[A




 94%|█████████▍| 1875/2000 [45:20<02:01,  1.03it/s][A[A[A[A[A




 94%|█████████▍| 1876/2000 [45:20<01:43,  1.20it/s][A[A[A[A[A




 94%|█████████▍| 1877/2000 [45:20<01:23,  1.47it/s][A[A[A[A[A




 94%|█████████▍| 1878/2000 [45:24<02:56,  1.45s/it][A[A[A[A[A




 94%|█████████▍| 1879/2000 [45:24<02:14,  1.11s/it][A[A[A[A[A




 94%|█████████▍| 1880/2000 [45:26<02:30,  1.25s/it][A[A[A[A[A




 94%|█

 99%|█████████▉| 1982/2000 [48:10<00:31,  1.75s/it][A[A[A[A[A




 99%|█████████▉| 1983/2000 [48:14<00:41,  2.42s/it][A[A[A[A[A




 99%|█████████▉| 1984/2000 [48:15<00:34,  2.17s/it][A[A[A[A[A




 99%|█████████▉| 1985/2000 [48:17<00:28,  1.92s/it][A[A[A[A[A




 99%|█████████▉| 1986/2000 [48:17<00:22,  1.60s/it][A[A[A[A[A




 99%|█████████▉| 1987/2000 [48:18<00:16,  1.24s/it][A[A[A[A[A




 99%|█████████▉| 1988/2000 [48:19<00:13,  1.11s/it][A[A[A[A[A




 99%|█████████▉| 1989/2000 [48:20<00:14,  1.30s/it][A[A[A[A[A




100%|█████████▉| 1990/2000 [48:22<00:13,  1.35s/it][A[A[A[A[A




100%|█████████▉| 1991/2000 [48:23<00:11,  1.31s/it][A[A[A[A[A




100%|█████████▉| 1992/2000 [48:25<00:11,  1.46s/it][A[A[A[A[A




100%|█████████▉| 1993/2000 [48:26<00:08,  1.25s/it][A[A[A[A[A




100%|█████████▉| 1994/2000 [48:29<00:10,  1.83s/it][A[A[A[A[A




100%|█████████▉| 1995/2000 [48:30<00:07,  1.55s/it][A[A[A[A[A




100%|█

In [74]:
import tqdm

for sentence in tqdm.tqdm(train_corpus):
    es.index(index='test_t9', doc_type='my_type', body={
        'text': sentence
    })

 17%|█▋        | 31900/187405 [08:04<42:24, 61.11it/s]  

KeyboardInterrupt: 