## SQL запросы к Elasticsearch

In [None]:
import pandas as pd
from elasticsearch import Elasticsearch
import plotly.graph_objects as go
import plotly.express as px
import os
import qgrid
import datetime
import pytz
import ipywidgets as widgets

In [None]:
class Histogram:
    
    def __init__(self):
        self.url = 'http://dockertest.rgwork.ru:9094/elasticsearch/' if os.path.exists('local-file.txt') else 'http://es01:9200/'
        self.es = Elasticsearch(self.url, http_auth=("admin", os.getenv('RGPASS')))
        self.hours = 100
        self.widget = self.create_widget()
        self.update_plot()    


    def create_widget(self):
        """Создает виджет гистограммы"""

        fig = px.bar(x=['a'], y=[1], title='темп индексации', template='plotly_white')
        fig.update_traces(marker_color='#3CC')

        # axes and grid
        fig.layout.yaxis.title.text = 'индексировано статей за час'
        fig.layout.xaxis.title.text = 'дата/время'
        fig.update_xaxes(showgrid=True, gridwidth=1)
    #     fig.layout.title.font.size = 20
    #     fig.layout.title.x = 0.5
    #     fig.layout.title.y = 0.9

        # общее количество статей
        fig.add_annotation(text="1200000", xref="paper", yref="paper",
                      x=1, y=1, showarrow=False, font_size=50, opacity=1.0)
        # пояснение к общему количеству
        fig.add_annotation(text="всего индексировано", xref="paper", yref="paper",
                      x=1, y=0.80, showarrow=False, font_size=16, opacity=1.0)
        fig.add_vline(x=datetime.datetime.now(pytz.timezone('Europe/Moscow')).strftime('%Y-%m-%dT%H:%M:%S'),
                     line={'color':'lightpink', 'width':1, 'dash':'solid'})
    #     # indicator общего количества
    #     fig.add_trace(go.Indicator(mode = "number", value = 450, title = {'text': "Всего"}, 
    #                                domain = {'x': [0.4, 0.6], 'y': [0.7, 1]}, visible=True ))
    #     print(fig)
        return go.FigureWidget(fig)


    def refresh_widget(self, xvalues=['a'], yvalues=[1], count=100):
        """Перерисовывает гистограмму с новыми данными"""
        bar = self.widget.data[0]
        bar.x = xvalues
        bar.y = yvalues
        bar.text = bar.y.tolist()
        self.widget.layout.annotations[0].text = f'{count}'
        x=datetime.datetime.now(pytz.timezone('Europe/Moscow')).strftime('%Y-%m-%dT%H:%M:%S')
        self.widget.layout.shapes[0].x0=x
        self.widget.layout.shapes[0].x1=x


    def update_plot(self):
        """Считывает данные из Эластик и обновляет гистограмму"""

        sql = f"""
            SELECT HISTOGRAM(date_modified, INTERVAL 1 HOUR) AS interval, count(*) AS count 
            FROM articles 
            GROUP by interval 
            ORDER BY interval 
            DESC LIMIT {self.hours} 
        """
        res = self.es.sql.query({"query":sql})
        df = pd.DataFrame.from_records(res['rows'],columns=['interval','count'])     
        count = self.es.count(index='articles')['count']
        
        self.refresh_widget(df['interval'], df['count'], count)


    def set_hours(self, hours:int):
        """Устанавливает переменную hours"""
        self.hours = hours
        self.update_plot()

    
class ArticlesGrid:
    def __init__(self):
        self.url = 'http://dockertest.rgwork.ru:9094/elasticsearch/' if os.path.exists('local-file.txt') else 'http://es01:9200/'
        self.es = Elasticsearch(self.url, http_auth=("admin", "rosgas2011"))
        self.df = self.update_data()        
        self.widget = self.create_widget()
    
    def create_widget(self):
        """Создает виджет таблицы"""
        cols = {
            'index': { 'maxWidth': 0, 'minWidth': 0, 'width': 0 },
            'date_modified':{'width':50}
        }
        self.widget = qgrid.show_grid(self.df, grid_options={'filterable':True}, column_definitions=cols)
        return self.widget
        
    
    def update_data(self):
        """Обновляет данные в таблице"""
        body = {
            "size": 1000,
            "_source": [ "date_modified","link_title", "url" ],
            "sort": [{ "date_modified": {"order": "desc"}}]
        }
        res = self.es.search(index='articles', body=body)
        sources = (hh['_source'] for hh in res['hits']['hits'])
        self.df = pd.DataFrame.from_dict(sources)
        return self.df



# hist = Histogram()
# display(hist.widget)
# grid = ArticlesGrid() 
# display(grid.widget)

In [None]:
def start_updating(times=10, delay=5):
    for i in range(times):
        update_gui()
        widg.layout.title = f'обновлено {i+1}/{times}'
        time.sleep(delay)        

        
hist = Histogram()
grid = ArticlesGrid() 

def update_all(b):
    hist.update_plot()
    grid.update_data()
    grid.widget.df = grid.df

def display_gui():
    """Строит пользовательский интерфейс"""
    
    interact_update = widgets.interactive(hist.set_hours, hours=widgets.IntSlider(min=1, max=960, step=1, value=100))
    interact_update.children[0].description = 'показ. часов'

    button_update = widgets.Button(description='обновить')
    button_update.on_click(update_all)

    histogram_hbox = widgets.HBox([interact_update, button_update])
    
    display(histogram_hbox)    
 

display_gui()
display(hist.widget)
display(grid.widget)


In [None]:
# import multiprocessing
# import time

# # object to save module variables
# HISTOGRAM = {
#     'process': None
#     'total':0,
#     'totla_last_day':0,
# }

# # bar
# def bar():
#     for i in range(100):
#         print(f"Tick {i}")
#         time.sleep(10)

# if __name__ == '__main__':
#     # Start bar as a process
#     p = multiprocessing.Process(target=bar)
#     p.start()

#     # Wait for 10 seconds or until process finishes
#     print('joining')
#     p.join(1)
#     print('joined')

#     # If thread is still active
#     if p.is_alive():
#         print("running... let's kill it...")

#         # Terminate - may not work if process is stuck for good
#         p.terminate()
#         # OR Kill - will work for sure, no chance for process to finish nicely however
# #         p.kill()

#         p.join()
#     print("alive" if p.is_alive() else "dead")

TODO
=====

- заменить запрос на нормальный
- сделать кнопки пуска останова обновления диаграммы
