## Elastic search 

> How to Query Elasticsearch with Python

> https://marcobonzanini.com/2015/02/02/how-to-query-elasticsearch-with-python/

In [2]:
!pip install elasticsearch


Collecting elasticsearch
  Downloading elasticsearch-7.13.0-py2.py3-none-any.whl (354 kB)
[K     |████████████████████████████████| 354 kB 783 kB/s eta 0:00:01
Installing collected packages: elasticsearch
Successfully installed elasticsearch-7.13.0


In [4]:
# make sure ES is up and running
import requests
res = requests.get('http://elasticsearch:9200')
print(res.content)

b'{\n  "name" : "bdcc249eb467",\n  "cluster_name" : "docker-cluster",\n  "cluster_uuid" : "Sd28q8SoRKm-ktPgXKZGdw",\n  "version" : {\n    "number" : "7.8.1",\n    "build_flavor" : "default",\n    "build_type" : "docker",\n    "build_hash" : "b5ca9c58fb664ca8bf9e4057fc229b3396bf3a89",\n    "build_date" : "2020-07-21T16:40:44.668009Z",\n    "build_snapshot" : false,\n    "lucene_version" : "8.5.1",\n    "minimum_wire_compatibility_version" : "6.8.0",\n    "minimum_index_compatibility_version" : "6.0.0-beta1"\n  },\n  "tagline" : "You Know, for Search"\n}\n'


In [9]:
#connect to our cluster
from elasticsearch import Elasticsearch
es = Elasticsearch([{'host': 'elasticsearch', 'port': 9200}])

In [7]:
Data=[{
  "_id": {
    "$oid": "5968dd23fc13ae04d9000001"
  },
  "product_name": "sildenafil citrate",
  "supplier": "Wisozk Inc",
  "quantity": 261,
  "unit_cost": "$10.47"
}, {
  "_id": {
    "$oid": "5968dd23fc13ae04d9000002"
  },
  "product_name": "Mountain Juniperus ashei",
  "supplier": "Keebler-Hilpert",
  "quantity": 292,
  "unit_cost": "$8.74"
}, {
  "_id": {
    "$oid": "5968dd23fc13ae04d9000003"
  },
  "product_name": "Dextromathorphan HBr",
  "supplier": "Schmitt-Weissnat",
  "quantity": 211,
  "unit_cost": "$20.53"
}]

In [10]:
#index some test data
es.index(index='sales', doc_type='sales', id=1, body={'test': 'test'})



{'_index': 'sales',
 '_type': 'sales',
 '_id': '1',
 '_version': 1,
 'result': 'created',
 '_shards': {'total': 2, 'successful': 1, 'failed': 0},
 '_seq_no': 0,
 '_primary_term': 1}

In [11]:
es.index(index='test-index1', doc_type='test1', id=1, body={'test': 'test'})

{'_index': 'test-index1',
 '_type': 'test1',
 '_id': '1',
 '_version': 1,
 'result': 'created',
 '_shards': {'total': 2, 'successful': 1, 'failed': 0},
 '_seq_no': 0,
 '_primary_term': 1}

In [12]:

#delete test data and try with something more interesting
es.delete(index='test-index1', doc_type='test1', id=1)



{'_index': 'test-index1',
 '_type': 'test1',
 '_id': '1',
 '_version': 2,
 'result': 'deleted',
 '_shards': {'total': 2, 'successful': 1, 'failed': 0},
 '_seq_no': 1,
 '_primary_term': 1}

In [15]:
es.index(index='sw', doc_type='people', id=1, body={
	"name": "Ali Ahmad",
	"height": "172",
	"mass": "77",
	"hair_color": "black",
	"birth_year": "19111997",
	"gender": "male",
})

{'_index': 'sw',
 '_type': 'people',
 '_id': '1',
 '_version': 7,
 'result': 'updated',
 '_shards': {'total': 2, 'successful': 1, 'failed': 0},
 '_seq_no': 8,
 '_primary_term': 2}

In [19]:
Data=[{
	"name": "Ali Ahmad",
	"height": "172",
	"mass": "77",
	"hair_color": "black",
	"birth_year": "19111997",
	"gender": "male",
},
{
	"name": "Sarah Ahmad",
	"height": "152",
	"mass": "65",
	"hair_color": "brown",
	"birth_year": "20012003",
	"gender": "female",
},{
	"name": "Ali Salman",
	"height": "192",
	"mass": "95",
	"hair_color": "black",
	"birth_year": "19102000",
	"gender": "male",
},{
	"name": "Mohammad Abdellatif",
	"height": "172",
	"mass": "72.5",
	"hair_color": "black",
	"birth_year": "05031985",
	"gender": "male",
}]

In [20]:
#let's iterate data items
for i, data_i in enumerate(Data,start=1):
    es.index(index='sw', doc_type='people', id=i, body=data_i)
    
print("done")

done


In [17]:
es.get(index='sw', doc_type='people', id=3)




{'_index': 'sw',
 '_type': 'people',
 '_id': '3',
 '_version': 2,
 '_seq_no': 5,
 '_primary_term': 1,
 'found': True,
 '_source': {'name': 'Ali Salman',
  'height': '192',
  'mass': '95',
  'hair_color': 'black',
  'birth_year': '19102000',
  'gender': 'male'}}

In [18]:
#let's see if we find anyone with a name or surname starting with lu
es.search(index="sw", body={ "query": { "match": { "name": "Ali" } }})

{'took': 3,
 'timed_out': False,
 '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0},
 'hits': {'total': {'value': 2, 'relation': 'eq'},
  'max_score': 0.4700036,
  'hits': [{'_index': 'sw',
    '_type': 'people',
    '_id': '1',
    '_score': 0.4700036,
    '_source': {'name': 'Ali Ahmad',
     'height': '172',
     'mass': '77',
     'hair_color': 'black',
     'birth_year': '19111997',
     'gender': 'male'}},
   {'_index': 'sw',
    '_type': 'people',
    '_id': '3',
    '_score': 0.4700036,
    '_source': {'name': 'Ali Salman',
     'height': '192',
     'mass': '95',
     'hair_color': 'black',
     'birth_year': '19102000',
     'gender': 'male'}}]}}

In [21]:
%%bash
curl -XPOST http://localhost:9200/sw/people/_search?pretty=true 

{
  "took" : 1,
  "timed_out" : false,
  "_shards" : {
    "total" : 1,
    "successful" : 1,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 3,
      "relation" : "eq"
    },
    "max_score" : 1.0,
    "hits" : [
      {
        "_index" : "sw",
        "_type" : "people",
        "_id" : "1",
        "_score" : 1.0,
        "_source" : {
          "name" : "Ali Ahmad",
          "height" : "172",
          "mass" : "77",
          "hair_color" : "black",
          "birth_year" : "19111997",
          "gender" : "male"
        }
      },
      {
        "_index" : "sw",
        "_type" : "people",
        "_id" : "2",
        "_score" : 1.0,
        "_source" : {
          "name" : "Sarah Ahmad",
          "height" : "152",
          "mass" : "65",
          "hair_color" : "brown",
          "birth_year" : "20012003",
          "gender" : "female"
        }
      },
      {
        "_index" : "sw",
        "_type" : "people",
        "_id" : "3",


  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  1261  100  1261    0     0   111k      0 --:--:-- --:--:-- --:--:--  111k


## Display Kebana 

In [1]:
from IPython.display import IFrame
HTML="http://localhost:5601"
IFrame(HTML, width=1600, height=800)

## Display a dash board link

In [24]:
from IPython.display import IFrame
HTML="http://localhost:5601/goto/45e870a234266053018601eba1b19be0"
IFrame(HTML, width=1600, height=800)



In [27]:
import numpy as np
import pandas as pd
from pandas_profiling import ProfileReport

df = pd.DataFrame(
    np.random.rand(100, 5),
    columns=["a", "b", "c", "d", "e"]
)

profile = ProfileReport(df, title="Pandas Profiling Report")
profile.to_file('test.html')

HBox(children=(HTML(value='Export report to file'), FloatProgress(value=0.0, max=1.0), HTML(value='')))




In [31]:
!pip install pandas_profiling==2.7.1

Collecting pandas_profiling==2.7.1
  Downloading pandas_profiling-2.7.1-py2.py3-none-any.whl (252 kB)
Collecting visions[type_image_path]==0.4.1
  Downloading visions-0.4.1-py3-none-any.whl (58 kB)
Installing collected packages: visions, pandas-profiling
  Attempting uninstall: visions
    Found existing installation: visions 0.6.0
    Uninstalling visions-0.6.0:
      Successfully uninstalled visions-0.6.0
  Attempting uninstall: pandas-profiling
    Found existing installation: pandas-profiling 2.11.0
    Uninstalling pandas-profiling-2.11.0:
      Successfully uninstalled pandas-profiling-2.11.0
Successfully installed pandas-profiling-2.7.1 visions-0.4.1


In [51]:
import re
patt=re.compile('\d+')
List=re.findall(patt,Text)
List

['20208001',
 '20208003',
 '20208002',
 '20208004',
 '20208020',
 '20208017',
 '20208021',
 '20208019',
 '20208024',
 '20208032',
 '20208056',
 '20208055',
 '20208062',
 '20208065',
 '20208067',
 '20208075',
 '20208088',
 '20208089',
 '20208081',
 '20208086',
 '20208113',
 '20208110',
 '20208120']

In [61]:
import random
random.choice(List)

'20208004'

'20208089'