In [1]:
%config IPCompleter.greedy=True
import re
import json
from collections import defaultdict
from tqdm import tqdm_notebook as tqdm
from elasticsearch import Elasticsearch
from elasticsearch.helpers import parallel_bulk
from pymystem3 import Mystem
from sklearn.feature_extraction.text import CountVectorizer
import requests
from time import time

In [2]:
es = Elasticsearch([{'host': 'localhost', 'port': 9200, 'timeout': 360, 'maxsize': 25}])

In [3]:
settings = {
    'mappings': {
        'properties': {
            'content': {
                'type': 'text'
            }
        }
    }
}

In [4]:
def recreate_index():
    es.indices.delete(index='hw2index')
    es.indices.create(index='hw2index', body=settings)

In [5]:
recreate_index()

In [6]:
def create_es_action(index, doc_id, document):
    return {
        '_index': index,
        '_id': doc_id,
        '_source': document
    }

In [7]:
class Document:
    def __init__(self, doc_url, doc_id, sz_bytes, sz_words):
        self.url = doc_url       # document url
        self.id = doc_id         # unique document id (str)
        self.sz_bytes = sz_bytes # document size in bytes before deleting html markup
        self.sz_words = sz_words # number of words in document before deleting html markup
        self.words = []          # list of words in document after deleting html markup
        self.links = []          # lisk of links in document

In [8]:
import os

In [9]:
from tqdm import tqdm
from tqdm import tqdm_notebook
import pickle

class BaseDocumentProcessor:
    def process(self, document):
        pass
    def result(self):
        pass

def process_file(d, f, processor, pbar):
    print("processing", os.path.join(d, f))
    with open(os.path.join(d, f), "rb") as fin:
        while True:
            pbar.update(1)
            try:
                document = pickle.load(fin)
            except:
                break
            processor.process(document)

def process_collection(directory, processor):
    pbar = tqdm(total = 200000)
    for file in os.listdir(directory):
        if (file.endswith(".out")):
            process_file(directory, file, processor, pbar)

In [10]:
COLLECTION_DIRECTORY = "byweb" # directory with .out files to process

class IndexDocs(BaseDocumentProcessor):
    def __init__(self):
        """ do all initialization here """
        self.actions = []
    
    def process(self, document):
        """ document: Document (see first cell)
            process each document here """
        #print(json.dumps({'content' : document.words}))
        self.actions.append(create_es_action('hw2index', document.id, json.dumps({'content' : document.words})))
        
    def result(self):
        return self.actions
        


In [11]:
def es_actions_generator():
    processor = IndexDocs()        
    process_collection(COLLECTION_DIRECTORY, processor)
    return processor.result()

In [12]:
import time

In [13]:
start = time.time()
for ok, result in tqdm_notebook(parallel_bulk(es, es_actions_generator(), queue_size=4, thread_count=4, chunk_size=1000)):
    if not ok:
        print(result)
end = time.time()
print('Time=' + str(end - start))


  0%|          | 230/200000 [00:00<03:15, 1022.83it/s]

processing byweb/byweb.5.out


 10%|█         | 20250/200000 [00:18<02:26, 1228.25it/s]

processing byweb/byweb.4.out


 20%|██        | 40147/200000 [00:34<02:00, 1330.84it/s]

processing byweb/byweb.6.out


 30%|███       | 60168/200000 [00:51<01:36, 1452.38it/s]

processing byweb/byweb.7.out


 40%|████      | 80018/200000 [01:08<02:05, 953.31it/s] 

processing byweb/byweb.3.out


 50%|█████     | 100149/200000 [01:29<01:24, 1177.95it/s]

processing byweb/byweb.2.out


 60%|██████    | 120106/200000 [01:46<00:54, 1467.51it/s]

processing byweb/byweb.0.out


 70%|███████   | 140135/200000 [02:07<01:05, 919.56it/s] 

processing byweb/byweb.1.out


 80%|████████  | 160148/200000 [02:28<00:42, 945.39it/s] 

processing byweb/byweb.9.out


 90%|█████████ | 180101/200000 [02:50<00:31, 623.70it/s] 

processing byweb/byweb.8.out


100%|█████████▉| 199983/200000 [03:12<00:00, 1479.79it/s]

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

200010it [03:30, 1479.79it/s]                            


Time=526.7886590957642


In [14]:
import requests
param = (('v', ''),) # '-v' is for --verbose

# call the class's method to get an HTTP response model
resp = requests.get('http://localhost:9200/_cat/indices', params=param)


In [15]:
resp.text

'health status index    uuid                   pri rep docs.count docs.deleted store.size pri.store.size\nyellow open   hw2index zhD-IERTSaO-K6Z-Ta7X4w   1   1     196928            0      2.9gb          2.9gb\n'

In [16]:
settings = {
    'mappings': {
        'properties': {
            'content': {
                'type': 'text'
            }
        }
    },
    "settings": {
        "analysis" : {
            "analyzer" : {
                "my_analyzer" : {
                    "tokenizer" : "standard",
                    "filter" : ["lowercase", "my_snow"]
                }
            },
            "filter" : {
                "my_snow" : {
                    "type" : "snowball",
                    "language" : "russian"
                }
            }
        }
    }
}

In [17]:
recreate_index()

In [18]:
start = time.time()
for ok, result in tqdm_notebook(parallel_bulk(es, es_actions_generator(), queue_size=4, thread_count=4, chunk_size=1000)):
    if not ok:
        print(result)
end = time.time()
print('Time=' + str(end - start))




  0%|          | 0/200000 [00:00<?, ?it/s][A
  0%|          | 36/200000 [00:00<09:19, 357.70it/s][A
  0%|          | 117/200000 [00:00<07:45, 429.24it/s]

processing byweb/byweb.5.out


[A
  0%|          | 221/200000 [00:00<06:23, 520.52it/s][A
  0%|          | 278/200000 [00:00<06:15, 532.00it/s][A
  0%|          | 351/200000 [00:00<05:45, 577.94it/s][A
  0%|          | 458/200000 [00:00<04:57, 669.64it/s][A
  0%|          | 532/200000 [00:00<05:03, 657.86it/s][A
  0%|          | 641/200000 [00:00<04:27, 744.88it/s][A
  0%|          | 723/200000 [00:00<04:39, 713.36it/s][A
  0%|          | 831/200000 [00:01<04:11, 792.48it/s][A
  0%|          | 917/200000 [00:01<04:38, 715.42it/s][A
  1%|          | 1032/200000 [00:01<04:07, 804.92it/s][A
  1%|          | 1121/200000 [00:01<04:27, 744.27it/s][A
  1%|          | 1275/200000 [00:01<03:45, 880.10it/s][A
  1%|          | 1378/200000 [00:01<04:10, 791.47it/s][A
  1%|          | 1491/200000 [00:01<03:48, 868.41it/s][A
  1%|          | 1589/200000 [00:01<04:23, 753.07it/s][A
  1%|          | 1677/200000 [00:02<04:12, 786.03it/s][A
  1%|          | 1764/200000 [00:02<04:43, 700.36it/s][A
  1%|          | 18

  7%|▋         | 14797/200000 [00:17<04:02, 762.86it/s][A
  7%|▋         | 14888/200000 [00:17<04:22, 705.97it/s][A
  7%|▋         | 14970/200000 [00:18<04:13, 730.06it/s][A
  8%|▊         | 15158/200000 [00:18<03:26, 894.00it/s][A
  8%|▊         | 15271/200000 [00:18<03:33, 864.32it/s][A
  8%|▊         | 15374/200000 [00:18<04:00, 767.51it/s][A
  8%|▊         | 15465/200000 [00:18<04:09, 738.62it/s][A
  8%|▊         | 15549/200000 [00:18<05:01, 611.49it/s][A
  8%|▊         | 15621/200000 [00:18<05:07, 600.49it/s][A
  8%|▊         | 15689/200000 [00:19<05:22, 571.30it/s][A
  8%|▊         | 15752/200000 [00:19<05:16, 582.50it/s][A
  8%|▊         | 15827/200000 [00:19<04:55, 624.17it/s][A
  8%|▊         | 15917/200000 [00:19<04:28, 685.58it/s][A
  8%|▊         | 15990/200000 [00:19<05:03, 606.10it/s][A
  8%|▊         | 16077/200000 [00:19<04:36, 664.71it/s][A
  8%|▊         | 16149/200000 [00:19<04:51, 631.05it/s][A
  8%|▊         | 16232/200000 [00:19<04:31, 677.74it/s]

processing byweb/byweb.4.out



 10%|█         | 20230/200000 [00:23<02:42, 1107.00it/s][A
 10%|█         | 20342/200000 [00:23<02:49, 1058.30it/s][A
 10%|█         | 20450/200000 [00:23<02:55, 1021.84it/s][A
 10%|█         | 20554/200000 [00:24<02:57, 1013.35it/s][A
 10%|█         | 20663/200000 [00:24<02:53, 1035.10it/s][A
 10%|█         | 20768/200000 [00:24<03:01, 986.38it/s] [A
 10%|█         | 20898/200000 [00:24<02:48, 1062.57it/s][A
 11%|█         | 21007/200000 [00:24<02:54, 1028.50it/s][A
 11%|█         | 21113/200000 [00:24<02:52, 1036.32it/s][A
 11%|█         | 21340/200000 [00:24<02:24, 1238.11it/s][A
 11%|█         | 21482/200000 [00:24<02:18, 1287.15it/s][A
 11%|█         | 21659/200000 [00:24<02:07, 1399.40it/s][A
 11%|█         | 21822/200000 [00:25<02:02, 1457.01it/s][A
 11%|█         | 21977/200000 [00:25<02:04, 1431.84it/s][A
 11%|█         | 22127/200000 [00:25<02:13, 1332.57it/s][A
 11%|█         | 22267/200000 [00:25<02:20, 1261.72it/s][A
 11%|█         | 22399/200000 [00:25<02

 19%|█▊        | 37377/200000 [00:39<01:59, 1364.62it/s][A
 19%|█▉        | 37520/200000 [00:39<02:00, 1347.71it/s][A
 19%|█▉        | 37716/200000 [00:39<01:49, 1484.89it/s][A
 19%|█▉        | 37872/200000 [00:39<01:56, 1392.08it/s][A
 19%|█▉        | 38057/200000 [00:39<01:47, 1502.85it/s][A
 19%|█▉        | 38215/200000 [00:39<01:58, 1370.39it/s][A
 19%|█▉        | 38360/200000 [00:39<02:02, 1314.35it/s][A
 19%|█▉        | 38497/200000 [00:40<02:10, 1233.10it/s][A
 19%|█▉        | 38626/200000 [00:40<02:12, 1218.14it/s][A
 19%|█▉        | 38752/200000 [00:40<02:23, 1123.34it/s][A
 19%|█▉        | 38871/200000 [00:40<02:21, 1142.42it/s][A
 19%|█▉        | 38989/200000 [00:40<02:26, 1095.49it/s][A
 20%|█▉        | 39162/200000 [00:40<02:10, 1229.67it/s][A
 20%|█▉        | 39292/200000 [00:40<02:10, 1230.27it/s][A
 20%|█▉        | 39420/200000 [00:40<02:14, 1196.18it/s][A
 20%|█▉        | 39551/200000 [00:40<02:10, 1226.68it/s][A
 20%|█▉        | 39677/200000 [00:41<02:

processing byweb/byweb.6.out



 20%|██        | 40414/200000 [00:41<01:52, 1418.20it/s][A
 20%|██        | 40561/200000 [00:41<01:55, 1380.25it/s][A
 20%|██        | 40747/200000 [00:41<01:46, 1495.96it/s][A
 20%|██        | 40903/200000 [00:41<01:47, 1474.96it/s][A
 21%|██        | 41055/200000 [00:42<01:50, 1433.02it/s][A
 21%|██        | 41202/200000 [00:42<01:53, 1402.00it/s][A
 21%|██        | 41345/200000 [00:42<01:56, 1360.80it/s][A
 21%|██        | 41483/200000 [00:42<02:01, 1309.93it/s][A
 21%|██        | 41623/200000 [00:42<01:59, 1326.26it/s][A
 21%|██        | 41766/200000 [00:42<01:58, 1340.92it/s][A
 21%|██        | 41901/200000 [00:42<02:05, 1262.76it/s][A
 21%|██        | 42064/200000 [00:42<01:56, 1352.85it/s][A
 21%|██        | 42203/200000 [00:42<01:57, 1338.05it/s][A
 21%|██        | 42353/200000 [00:42<01:54, 1378.11it/s][A
 21%|██        | 42493/200000 [00:43<01:54, 1380.11it/s][A
 21%|██▏       | 42633/200000 [00:43<02:02, 1280.22it/s][A
 21%|██▏       | 42792/200000 [00:43<01

 29%|██▉       | 58537/200000 [00:56<01:49, 1292.59it/s][A
 29%|██▉       | 58672/200000 [00:57<01:54, 1237.23it/s][A
 29%|██▉       | 58813/200000 [00:57<01:50, 1281.39it/s][A
 29%|██▉       | 58945/200000 [00:57<01:49, 1284.49it/s][A
 30%|██▉       | 59076/200000 [00:57<01:54, 1228.95it/s][A
 30%|██▉       | 59204/200000 [00:57<01:53, 1242.93it/s][A
 30%|██▉       | 59330/200000 [00:57<01:59, 1181.17it/s][A
 30%|██▉       | 59450/200000 [00:57<02:02, 1150.26it/s][A
 30%|██▉       | 59594/200000 [00:57<01:54, 1223.35it/s][A
 30%|██▉       | 59719/200000 [00:57<01:58, 1179.80it/s][A
 30%|██▉       | 59851/200000 [00:58<01:55, 1218.51it/s][A
 30%|███       | 60016/200000 [00:58<01:45, 1322.08it/s][A
 30%|███       | 60152/200000 [00:58<01:55, 1211.97it/s][A

processing byweb/byweb.7.out



 30%|███       | 60287/200000 [00:58<01:51, 1250.12it/s][A
 30%|███       | 60416/200000 [00:58<02:08, 1086.01it/s][A
 30%|███       | 60531/200000 [00:58<02:17, 1012.06it/s][A
 30%|███       | 60658/200000 [00:58<02:09, 1077.37it/s][A
 30%|███       | 60771/200000 [00:58<02:11, 1055.73it/s][A
 30%|███       | 60881/200000 [00:59<02:23, 969.76it/s] [A
 30%|███       | 60982/200000 [00:59<02:27, 944.63it/s][A
 31%|███       | 61099/200000 [00:59<02:18, 1001.23it/s][A
 31%|███       | 61203/200000 [00:59<02:24, 959.14it/s] [A
 31%|███       | 61302/200000 [00:59<02:23, 966.90it/s][A
 31%|███       | 61413/200000 [00:59<02:17, 1004.33it/s][A
 31%|███       | 61554/200000 [00:59<02:05, 1099.02it/s][A
 31%|███       | 61668/200000 [00:59<02:13, 1034.29it/s][A
 31%|███       | 61777/200000 [00:59<02:11, 1050.26it/s][A
 31%|███       | 61963/200000 [00:59<01:55, 1196.56it/s][A
 31%|███       | 62092/200000 [01:00<02:11, 1049.82it/s][A
 31%|███       | 62207/200000 [01:00<02:2

 38%|███▊      | 76273/200000 [01:14<02:03, 1000.36it/s][A
 38%|███▊      | 76394/200000 [01:14<02:36, 790.51it/s] [A
 38%|███▊      | 76570/200000 [01:14<02:10, 946.84it/s][A
 38%|███▊      | 76705/200000 [01:15<01:58, 1037.89it/s][A
 38%|███▊      | 76832/200000 [01:15<02:20, 874.64it/s] [A
 38%|███▊      | 76940/200000 [01:15<02:54, 705.83it/s][A
 39%|███▊      | 77030/200000 [01:15<03:36, 567.24it/s][A
 39%|███▊      | 77113/200000 [01:15<03:16, 625.05it/s][A
 39%|███▊      | 77190/200000 [01:15<03:25, 598.65it/s][A
 39%|███▊      | 77349/200000 [01:16<02:46, 736.38it/s][A
 39%|███▊      | 77446/200000 [01:16<03:02, 672.61it/s][A
 39%|███▉      | 77531/200000 [01:16<03:05, 660.66it/s][A
 39%|███▉      | 77621/200000 [01:16<02:50, 716.60it/s][A
 39%|███▉      | 77703/200000 [01:16<03:01, 673.64it/s][A
 39%|███▉      | 77787/200000 [01:16<02:50, 715.34it/s][A
 39%|███▉      | 77868/200000 [01:16<02:44, 740.42it/s][A
 39%|███▉      | 78000/200000 [01:16<02:23, 851.88it

processing byweb/byweb.3.out



 40%|████      | 80305/200000 [01:19<01:48, 1105.01it/s][A
 40%|████      | 80426/200000 [01:19<02:25, 823.27it/s] [A
 40%|████      | 80526/200000 [01:19<02:22, 835.90it/s][A
 40%|████      | 80634/200000 [01:19<02:13, 892.69it/s][A
 40%|████      | 80733/200000 [01:19<02:22, 835.64it/s][A
 40%|████      | 80825/200000 [01:20<02:24, 822.12it/s][A
 40%|████      | 80913/200000 [01:20<02:40, 740.47it/s][A
 41%|████      | 81039/200000 [01:20<02:21, 842.48it/s][A
 41%|████      | 81132/200000 [01:20<02:27, 804.90it/s][A
 41%|████      | 81223/200000 [01:20<02:23, 830.39it/s][A
 41%|████      | 81311/200000 [01:20<02:23, 824.27it/s][A
 41%|████      | 81415/200000 [01:20<02:15, 876.40it/s][A
 41%|████      | 81506/200000 [01:20<02:39, 743.50it/s][A
 41%|████      | 81597/200000 [01:21<02:30, 785.06it/s][A
 41%|████      | 81681/200000 [01:21<03:11, 619.18it/s][A
 41%|████      | 81772/200000 [01:21<02:52, 684.43it/s][A
 41%|████      | 81849/200000 [01:21<03:13, 609.47it/

 47%|████▋     | 94314/200000 [01:35<02:36, 675.25it/s][A
 47%|████▋     | 94429/200000 [01:35<02:17, 769.07it/s][A
 47%|████▋     | 94513/200000 [01:35<02:16, 771.30it/s][A
 47%|████▋     | 94614/200000 [01:36<02:07, 828.70it/s][A
 47%|████▋     | 94716/200000 [01:36<02:05, 837.42it/s][A
 47%|████▋     | 94809/200000 [01:36<02:02, 861.98it/s][A
 47%|████▋     | 94939/200000 [01:36<01:49, 958.77it/s][A
 48%|████▊     | 95040/200000 [01:36<01:49, 954.72it/s][A
 48%|████▊     | 95151/200000 [01:36<01:45, 993.53it/s][A
 48%|████▊     | 95263/200000 [01:36<01:41, 1028.29it/s][A
 48%|████▊     | 95369/200000 [01:36<01:41, 1026.75it/s][A
 48%|████▊     | 95520/200000 [01:36<01:32, 1133.47it/s][A
 48%|████▊     | 95638/200000 [01:36<01:35, 1097.92it/s][A
 48%|████▊     | 95784/200000 [01:37<01:28, 1180.06it/s][A
 48%|████▊     | 95907/200000 [01:37<01:30, 1148.49it/s][A
 48%|████▊     | 96025/200000 [01:37<01:32, 1120.86it/s][A
 48%|████▊     | 96140/200000 [01:37<01:50, 935.7

processing byweb/byweb.2.out



 50%|█████     | 100269/200000 [01:41<01:53, 881.53it/s][A
 50%|█████     | 100361/200000 [01:41<01:56, 854.67it/s][A
 50%|█████     | 100449/200000 [01:41<01:57, 846.75it/s][A
 50%|█████     | 100536/200000 [01:41<01:57, 848.36it/s][A
 50%|█████     | 100629/200000 [01:41<01:54, 871.05it/s][A
 50%|█████     | 100718/200000 [01:41<01:53, 876.22it/s][A
 50%|█████     | 100807/200000 [01:41<01:54, 864.85it/s][A
 50%|█████     | 100895/200000 [01:42<02:40, 617.80it/s][A
 50%|█████     | 100968/200000 [01:42<03:25, 482.57it/s][A
 51%|█████     | 101079/200000 [01:42<02:50, 580.94it/s][A
 51%|█████     | 101165/200000 [01:42<02:33, 643.52it/s][A
 51%|█████     | 101259/200000 [01:42<02:18, 710.37it/s][A
 51%|█████     | 101384/200000 [01:42<02:00, 815.15it/s][A
 51%|█████     | 101506/200000 [01:42<01:48, 904.52it/s][A
 51%|█████     | 101628/200000 [01:42<01:40, 980.35it/s][A
 51%|█████     | 101737/200000 [01:43<01:38, 1002.51it/s][A
 51%|█████     | 101867/200000 [01:43<

 58%|█████▊    | 116832/200000 [01:56<01:03, 1300.98it/s][A
 58%|█████▊    | 116966/200000 [01:57<01:06, 1250.33it/s][A
 59%|█████▊    | 117094/200000 [01:57<01:06, 1253.44it/s][A
 59%|█████▊    | 117222/200000 [01:57<01:11, 1157.48it/s][A
 59%|█████▊    | 117341/200000 [01:57<01:16, 1076.67it/s][A
 59%|█████▊    | 117452/200000 [01:57<01:16, 1073.43it/s][A
 59%|█████▉    | 117562/200000 [01:57<01:16, 1079.86it/s][A
 59%|█████▉    | 117672/200000 [01:57<01:23, 983.52it/s] [A
 59%|█████▉    | 117795/200000 [01:57<01:18, 1045.19it/s][A
 59%|█████▉    | 117903/200000 [01:57<01:21, 1004.52it/s][A
 59%|█████▉    | 118013/200000 [01:58<01:19, 1029.82it/s][A
 59%|█████▉    | 118136/200000 [01:58<01:15, 1082.67it/s][A
 59%|█████▉    | 118247/200000 [01:58<01:17, 1050.28it/s][A
 59%|█████▉    | 118438/200000 [01:58<01:07, 1213.61it/s][A
 59%|█████▉    | 118570/200000 [01:58<01:19, 1017.91it/s][A
 59%|█████▉    | 118710/200000 [01:58<01:13, 1105.70it/s][A
 59%|█████▉    | 118832/

processing byweb/byweb.0.out



 60%|██████    | 120318/200000 [01:59<01:03, 1263.32it/s][A
 60%|██████    | 120455/200000 [02:00<01:08, 1161.55it/s][A
 60%|██████    | 120638/200000 [02:00<01:01, 1287.26it/s][A
 60%|██████    | 120778/200000 [02:00<01:04, 1227.60it/s][A
 60%|██████    | 120909/200000 [02:00<01:08, 1152.84it/s][A
 61%|██████    | 121031/200000 [02:00<01:08, 1154.52it/s][A
 61%|██████    | 121152/200000 [02:00<01:07, 1170.58it/s][A
 61%|██████    | 121273/200000 [02:00<01:21, 970.83it/s] [A
 61%|██████    | 121472/200000 [02:00<01:08, 1146.05it/s][A
 61%|██████    | 121605/200000 [02:01<01:14, 1047.73it/s][A
 61%|██████    | 121823/200000 [02:01<01:03, 1236.59it/s][A
 61%|██████    | 121971/200000 [02:01<01:16, 1021.30it/s][A
 61%|██████    | 122097/200000 [02:01<01:18, 997.64it/s] [A
 61%|██████    | 122213/200000 [02:01<01:15, 1034.36it/s][A
 61%|██████    | 122330/200000 [02:01<01:12, 1068.72it/s][A
 61%|██████    | 122446/200000 [02:01<01:18, 993.20it/s] [A
 61%|██████▏   | 122553

 68%|██████▊   | 135682/200000 [02:16<01:01, 1052.76it/s][A
 68%|██████▊   | 135855/200000 [02:16<00:53, 1191.26it/s][A
 68%|██████▊   | 135993/200000 [02:16<01:01, 1035.47it/s][A
 68%|██████▊   | 136114/200000 [02:16<01:06, 956.92it/s] [A
 68%|██████▊   | 136229/200000 [02:16<01:03, 1007.36it/s][A
 68%|██████▊   | 136340/200000 [02:16<01:06, 950.73it/s] [A
 68%|██████▊   | 136449/200000 [02:16<01:04, 987.07it/s][A
 68%|██████▊   | 136554/200000 [02:17<01:03, 992.28it/s][A
 68%|██████▊   | 136658/200000 [02:17<01:10, 903.51it/s][A
 68%|██████▊   | 136794/200000 [02:17<01:03, 1001.57it/s][A
 68%|██████▊   | 136901/200000 [02:17<01:07, 940.67it/s] [A
 69%|██████▊   | 137094/200000 [02:17<00:56, 1103.79it/s][A
 69%|██████▊   | 137220/200000 [02:17<01:06, 944.32it/s] [A
 69%|██████▊   | 137390/200000 [02:17<00:57, 1087.79it/s][A
 69%|██████▉   | 137517/200000 [02:17<01:00, 1026.73it/s][A
 69%|██████▉   | 137633/200000 [02:18<01:05, 953.78it/s] [A
 69%|██████▉   | 137739/200

processing byweb/byweb.1.out



 70%|███████   | 140230/200000 [02:20<01:08, 866.69it/s][A
 70%|███████   | 140345/200000 [02:21<01:04, 928.94it/s][A
 70%|███████   | 140442/200000 [02:21<01:06, 895.45it/s][A
 70%|███████   | 140567/200000 [02:21<01:00, 975.99it/s][A
 70%|███████   | 140669/200000 [02:21<01:08, 866.99it/s][A
 70%|███████   | 140769/200000 [02:21<01:05, 900.19it/s][A
 70%|███████   | 140863/200000 [02:21<01:05, 905.55it/s][A
 70%|███████   | 140957/200000 [02:21<01:06, 894.54it/s][A
 71%|███████   | 141083/200000 [02:21<01:00, 973.73it/s][A
 71%|███████   | 141184/200000 [02:21<01:06, 879.07it/s][A
 71%|███████   | 141285/200000 [02:22<01:04, 913.30it/s][A
 71%|███████   | 141380/200000 [02:22<01:03, 920.26it/s][A
 71%|███████   | 141475/200000 [02:22<01:08, 856.32it/s][A
 71%|███████   | 141593/200000 [02:22<01:02, 932.39it/s][A
 71%|███████   | 141690/200000 [02:22<01:06, 872.86it/s][A
 71%|███████   | 141807/200000 [02:22<01:01, 943.64it/s][A
 71%|███████   | 141949/200000 [02:22<0

 77%|███████▋  | 153489/200000 [02:37<00:50, 922.59it/s][A
 77%|███████▋  | 153606/200000 [02:37<00:47, 984.85it/s][A
 77%|███████▋  | 153786/200000 [02:37<00:40, 1138.40it/s][A
 77%|███████▋  | 153911/200000 [02:38<00:45, 1002.06it/s][A
 77%|███████▋  | 154022/200000 [02:38<00:45, 1012.00it/s][A
 77%|███████▋  | 154131/200000 [02:38<01:03, 723.03it/s] [A
 77%|███████▋  | 154270/200000 [02:38<00:54, 843.95it/s][A
 77%|███████▋  | 154374/200000 [02:38<00:56, 809.08it/s][A
 77%|███████▋  | 154469/200000 [02:38<00:54, 837.82it/s][A
 77%|███████▋  | 154602/200000 [02:38<00:48, 941.72it/s][A
 77%|███████▋  | 154762/200000 [02:39<00:42, 1073.39it/s][A
 77%|███████▋  | 154884/200000 [02:39<00:43, 1041.61it/s][A
 77%|███████▋  | 154999/200000 [02:39<00:42, 1051.38it/s][A
 78%|███████▊  | 155112/200000 [02:39<00:44, 1000.03it/s][A
 78%|███████▊  | 155218/200000 [02:39<00:47, 942.27it/s] [A
 78%|███████▊  | 155317/200000 [02:39<00:56, 796.55it/s][A
 78%|███████▊  | 155416/200000 

processing byweb/byweb.9.out



 80%|████████  | 160245/200000 [02:44<00:48, 817.48it/s][A
 80%|████████  | 160333/200000 [02:44<00:47, 834.40it/s][A
 80%|████████  | 160434/200000 [02:45<00:45, 877.92it/s][A
 80%|████████  | 160547/200000 [02:45<00:41, 940.63it/s][A
 80%|████████  | 160645/200000 [02:45<00:41, 949.11it/s][A
 80%|████████  | 160758/200000 [02:45<00:39, 994.99it/s][A
 80%|████████  | 160860/200000 [02:45<00:42, 921.63it/s][A
 80%|████████  | 160955/200000 [02:45<00:46, 842.94it/s][A
 81%|████████  | 161081/200000 [02:45<00:41, 935.35it/s][A
 81%|████████  | 161227/200000 [02:45<00:36, 1048.27it/s][A
 81%|████████  | 161340/200000 [02:45<00:36, 1051.86it/s][A
 81%|████████  | 161451/200000 [02:46<00:36, 1064.23it/s][A
 81%|████████  | 161593/200000 [02:46<00:33, 1147.87it/s][A
 81%|████████  | 161713/200000 [02:46<00:35, 1064.50it/s][A
 81%|████████  | 161836/200000 [02:46<00:34, 1109.17it/s][A
 81%|████████  | 161951/200000 [02:46<00:34, 1100.95it/s][A
 81%|████████  | 162064/200000 [

 87%|████████▋ | 173714/200000 [03:01<00:31, 827.32it/s][A
 87%|████████▋ | 173819/200000 [03:01<00:29, 882.96it/s][A
 87%|████████▋ | 173911/200000 [03:01<00:29, 888.09it/s][A
 87%|████████▋ | 174062/200000 [03:01<00:25, 1011.55it/s][A
 87%|████████▋ | 174171/200000 [03:01<00:27, 949.11it/s] [A
 87%|████████▋ | 174308/200000 [03:01<00:24, 1042.24it/s][A
 87%|████████▋ | 174420/200000 [03:02<00:24, 1037.86it/s][A
 87%|████████▋ | 174557/200000 [03:02<00:22, 1119.19it/s][A
 87%|████████▋ | 174675/200000 [03:02<00:22, 1129.44it/s][A
 87%|████████▋ | 174827/200000 [03:02<00:20, 1217.19it/s][A
 87%|████████▋ | 174954/200000 [03:02<00:21, 1165.32it/s][A
 88%|████████▊ | 175098/200000 [03:02<00:20, 1235.57it/s][A
 88%|████████▊ | 175226/200000 [03:02<00:20, 1195.03it/s][A
 88%|████████▊ | 175349/200000 [03:02<00:20, 1194.78it/s][A
 88%|████████▊ | 175471/200000 [03:02<00:21, 1145.42it/s][A
 88%|████████▊ | 175588/200000 [03:03<00:25, 952.14it/s] [A
 88%|████████▊ | 175707/200

processing byweb/byweb.8.out



 90%|█████████ | 180243/200000 [03:08<00:20, 957.70it/s][A
 90%|█████████ | 180344/200000 [03:09<00:21, 935.06it/s][A
 90%|█████████ | 180442/200000 [03:09<00:20, 942.29it/s][A
 90%|█████████ | 180539/200000 [03:09<00:20, 931.98it/s][A
 90%|█████████ | 180635/200000 [03:09<00:21, 905.16it/s][A
 90%|█████████ | 180727/200000 [03:09<00:22, 869.47it/s][A
 90%|█████████ | 180816/200000 [03:09<00:22, 855.89it/s][A
 90%|█████████ | 180919/200000 [03:09<00:21, 898.31it/s][A
 91%|█████████ | 181011/200000 [03:09<00:21, 903.48it/s][A
 91%|█████████ | 181128/200000 [03:09<00:19, 966.97it/s][A
 91%|█████████ | 181227/200000 [03:10<00:20, 933.84it/s][A
 91%|█████████ | 181345/200000 [03:10<00:18, 993.33it/s][A
 91%|█████████ | 181447/200000 [03:10<00:19, 972.44it/s][A
 91%|█████████ | 181546/200000 [03:10<00:20, 917.95it/s][A
 91%|█████████ | 181640/200000 [03:10<00:21, 851.03it/s][A
 91%|█████████ | 181728/200000 [03:10<00:21, 858.28it/s][A
 91%|█████████ | 181845/200000 [03:10<0

 97%|█████████▋| 193484/200000 [03:24<00:09, 708.74it/s][A
 97%|█████████▋| 193569/200000 [03:24<00:08, 745.91it/s][A
 97%|█████████▋| 193646/200000 [03:25<00:10, 590.65it/s][A
 97%|█████████▋| 193712/200000 [03:25<00:23, 270.46it/s][A
 97%|█████████▋| 193762/200000 [03:25<00:26, 233.55it/s][A
 97%|█████████▋| 193802/200000 [03:26<00:29, 207.96it/s][A
 97%|█████████▋| 193876/200000 [03:26<00:23, 265.11it/s][A
 97%|█████████▋| 193932/200000 [03:26<00:19, 314.66it/s][A
 97%|█████████▋| 194031/200000 [03:26<00:15, 395.62it/s][A
 97%|█████████▋| 194095/200000 [03:26<00:13, 437.35it/s][A
 97%|█████████▋| 194201/200000 [03:26<00:10, 530.76it/s][A
 97%|█████████▋| 194290/200000 [03:26<00:09, 602.42it/s][A
 97%|█████████▋| 194392/200000 [03:26<00:08, 686.24it/s][A
 97%|█████████▋| 194478/200000 [03:26<00:08, 631.27it/s][A
 97%|█████████▋| 194563/200000 [03:27<00:07, 683.29it/s][A
 97%|█████████▋| 194645/200000 [03:27<00:07, 716.68it/s][A
 97%|█████████▋| 194725/200000 [03:27<00

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


200010it [03:47, 627.02it/s]                            [A


Time=529.8263740539551


In [20]:
requests.get('http://localhost:9200/_cat/indices', params=param).text

'health status index    uuid                   pri rep docs.count docs.deleted store.size pri.store.size\nyellow open   hw2index -uNUMQd-S-u79MiVyW-0tg   1   1     193690            0      2.8gb          2.8gb\n'