In [1]:
import requests
import json
import pandas as pd
from pyspark.sql import SparkSession
from pyspark.context import SparkContext as sc
from pyspark import SparkConf

url = "https://elastic-leitos.saude.gov.br/leito_ocupacao/_search"

payload = json.dumps({
  "size": 10000,
  "query": {
    "bool": {
      "must": [
        {
          "match": {
            "estadoSigla": "MA"
          }
        }
      ]
    }
  }
})
headers = {
  'Authorization': 'Basic dXNlci1hcGktbGVpdG9zOmFRYkxMM1pTdGFUcjM4dGo=',
  'Content-Type': 'application/json'
}


In [2]:
# Spark session
spark = SparkSession \
        .builder\
        .getOrCreate()

In [3]:
response = requests.request("POST", url, headers=headers, data=payload)

In [4]:
sc = sc.getOrCreate(SparkConf().setMaster("local[*]"))


In [5]:
df = spark.read.json(sc.parallelize([response.text]))

In [6]:
df.show()

+------------+--------------------+---------+----+
|     _shards|                hits|timed_out|took|
+------------+--------------------+---------+----+
|[0, 0, 3, 3]|[[[2457156, leito...|    false|  12|
+------------+--------------------+---------+----+



In [7]:
df.printSchema()

root
 |-- _shards: struct (nullable = true)
 |    |-- failed: long (nullable = true)
 |    |-- skipped: long (nullable = true)
 |    |-- successful: long (nullable = true)
 |    |-- total: long (nullable = true)
 |-- hits: struct (nullable = true)
 |    |-- hits: array (nullable = true)
 |    |    |-- element: struct (containsNull = true)
 |    |    |    |-- _id: string (nullable = true)
 |    |    |    |-- _index: string (nullable = true)
 |    |    |    |-- _score: double (nullable = true)
 |    |    |    |-- _source: struct (nullable = true)
 |    |    |    |    |-- algumaOcupacaoInformada: boolean (nullable = true)
 |    |    |    |    |-- altas: long (nullable = true)
 |    |    |    |    |-- cnes: string (nullable = true)
 |    |    |    |    |-- dataNotificacaoOcupacao: string (nullable = true)
 |    |    |    |    |-- estado: string (nullable = true)
 |    |    |    |    |-- estadoSigla: string (nullable = true)
 |    |    |    |    |-- municipio: string (nullable = true)
 |   

In [8]:
df2 = df.select(df.hits.hits)

In [9]:
df2.printSchema()

root
 |-- hits.hits: array (nullable = true)
 |    |-- element: struct (containsNull = true)
 |    |    |-- _id: string (nullable = true)
 |    |    |-- _index: string (nullable = true)
 |    |    |-- _score: double (nullable = true)
 |    |    |-- _source: struct (nullable = true)
 |    |    |    |-- algumaOcupacaoInformada: boolean (nullable = true)
 |    |    |    |-- altas: long (nullable = true)
 |    |    |    |-- cnes: string (nullable = true)
 |    |    |    |-- dataNotificacaoOcupacao: string (nullable = true)
 |    |    |    |-- estado: string (nullable = true)
 |    |    |    |-- estadoSigla: string (nullable = true)
 |    |    |    |-- municipio: string (nullable = true)
 |    |    |    |-- nomeCnes: string (nullable = true)
 |    |    |    |-- obitos: long (nullable = true)
 |    |    |    |-- ocupHospCli: long (nullable = true)
 |    |    |    |-- ocupHospUti: long (nullable = true)
 |    |    |    |-- ocupSRAGCli: long (nullable = true)
 |    |    |    |-- ocupSRAGUti: l

In [10]:
df.createOrReplaceTempView('source_view')

df5 = spark.sql("""select
  r._source.*
from source_view
lateral view explode(hits.hits) as r
""")

df5.printSchema()

root
 |-- algumaOcupacaoInformada: boolean (nullable = true)
 |-- altas: long (nullable = true)
 |-- cnes: string (nullable = true)
 |-- dataNotificacaoOcupacao: string (nullable = true)
 |-- estado: string (nullable = true)
 |-- estadoSigla: string (nullable = true)
 |-- municipio: string (nullable = true)
 |-- nomeCnes: string (nullable = true)
 |-- obitos: long (nullable = true)
 |-- ocupHospCli: long (nullable = true)
 |-- ocupHospUti: long (nullable = true)
 |-- ocupSRAGCli: long (nullable = true)
 |-- ocupSRAGUti: long (nullable = true)
 |-- ocupacaoInformada: boolean (nullable = true)
 |-- ofertaHospCli: long (nullable = true)
 |-- ofertaHospUti: long (nullable = true)
 |-- ofertaRespiradores: long (nullable = true)
 |-- ofertaSRAGCli: long (nullable = true)
 |-- ofertaSRAGUti: long (nullable = true)



In [12]:
df5.show()

+-----------------------+-----+-------+-----------------------+--------+-----------+--------------------+--------------------+------+-----------+-----------+-----------+-----------+-----------------+-------------+-------------+------------------+-------------+-------------+
|algumaOcupacaoInformada|altas|   cnes|dataNotificacaoOcupacao|  estado|estadoSigla|           municipio|            nomeCnes|obitos|ocupHospCli|ocupHospUti|ocupSRAGCli|ocupSRAGUti|ocupacaoInformada|ofertaHospCli|ofertaHospUti|ofertaRespiradores|ofertaSRAGCli|ofertaSRAGUti|
+-----------------------+-----+-------+-----------------------+--------+-----------+--------------------+--------------------+------+-----------+-----------+-----------+-----------+-----------------+-------------+-------------+------------------+-------------+-------------+
|                   true|    0|2457156|   2020-06-19T14:24:...|Maranhão|         MA|  São João dos Patos|HOSPITAL REGIONAL...|     0|          4|          0|          1|      