Visualisation des différentes statistiques de Dbnary
=============

In [1]:
# RDF
import rdflib
from rdflib.namespace import *

# Data analys 
import numpy as np

#  PLotting
import bqplot as bq
from ipywidgets import Layout, Box, HBox, VBox

from IPython.display import Markdown, clear_output

import warnings
warnings.filterwarnings("ignore")

ENDPOINT: str = "http://kaiko.getalp.org/sparql"
HTML_COLORS = ['Aqua', 'Fuchsia', 'Teal', 'Purple', 'Blue', 'Red', 'Navy', 'Maroon', 'Olive', 'Gray', 'Lime', 'Silver', 'Green', 'Black']

### Classe qui retourne un DataFrame des résultats d'une requête SPARQL et autes fonctions utilitaires

In [2]:
# %load ../../src/SPARQL_query
import datetime
import time as tm
from typing import NoReturn

import pandas as pd
from IPython.display import display
from SPARQLWrapper import SPARQLWrapper
from ipywidgets import widgets


def add_progress_bar(fun: callable):
    def function_modif(*args, **kwargs):
        progress_bar = widgets.IntProgress(bar_style='success', description='Loading:')
        display(progress_bar)
        kwargs['widget'] = progress_bar
        ret = fun(*args, **kwargs)
        progress_bar.close()
        return ret

    return function_modif


class SPARQLquery:
    """
    Class allowing to make a query on a remote SPARQL server, its main characteristics are :
     - Taking into account the big answers by concatenating them as they are received
     - Ability to access the size of the database
     - Ability to retrieve the response in `pandas` data frame format
    """

    def __init__(self, endpoint: str, query: str, verbose: bool = False, step: int = 5000,
                 widget: widgets.IntProgress = None) -> NoReturn:
        """


        :param endpoint: Url to the remote SPARQL service
        :param query: The query
        :param verbose: If the detail text will be displayed
        :param step: The max number of result to receive
        """
        self.sparql = SPARQLWrapper(endpoint)
        self.sparql.setReturnFormat("json")

        self.query: str = query
        self.verbose: bool = verbose
        self.step: int = step
        self.resultSize: int = self.get_result_size()
        self.is_widget: bool = False

        if widget:
            self.widget = widget
            self.widget.max = self.resultSize
            self.widget.value = 0
            self.is_widget = True

    def get_result_size(self) -> int:
        """
        Function return the size of a query (only in SELECT query).
        """

        if self.query.strip().startswith("SELECT") or self.query.strip().startswith(
                "select"):  # Modifie the query to count the number of answer

            if self.verbose:
                print(tm.strftime(f"[%H:%M:%S] Obtention du nombre de résultats avant exécuter la requête"))

            start: int = 7  # We detect the position of the first variable after the select
            while self.query[start] != '?':
                start += 1
            end: int = start
            while self.query[end:end + 5] != "WHERE" and self.query[end:end + 5] != "where":
                end += 1

            mot: str = self.query[start: end - 1]  # THe name of the variable

            self.sparql.setQuery(self.query.replace(mot, f"(COUNT (*) as ?cnt)", 1))
            processed_results: dict = self.sparql.query().convert()  # Do the query
            number_of_results: int = int(processed_results['results']['bindings'][0]['cnt']['value'])

            if self.verbose:
                print(tm.strftime(f"[%H:%M:%S] Il y a  {number_of_results} résultats..."))

            return number_of_results
        return 1

    def get_sparql_dataframe(self, query: str, text: str = "") -> pd.DataFrame:
        """
        Helper function to convert SPARQL results into a Pandas data frame.

        Credit: Douglas Fils

        :param query: The query to perform
        :param text: optional text to print in verbose mode
        """

        if self.verbose:
            print(tm.strftime(f"[%H:%M:%S] Transmission {text} en cours..."), end='')

        self.sparql.setQuery(query)

        processed_results: dict = self.sparql.query().convert()

        if self.verbose:
            print(tm.strftime(f"\r[%H:%M:%S] Transmission {text} réussi, conversion en Data Frame..."), end='')

        cols = processed_results['head']['vars']

        out = [[row.get(c, {}).get('value') for c in cols] for row in processed_results['results']['bindings']]

        if self.is_widget:
            if text == "":
                self.widget.value = self.widget.max
            else:
                self.widget.value = int(text.split(' ')[0])

        if self.verbose:
            print(tm.strftime(f" Effectué"))

        return pd.DataFrame(out, columns=cols)

    def do_query(self) -> pd.DataFrame:
        """
        Performs the query all at once if the result is not too big or little by little otherwise,
        if the query is not a selection it will be done all at once.

        :return: The result of the query
        """
        if self.resultSize > self.step:
            query = self.query + f" LIMIT {self.step}"
            return pd.concat(
                [self.get_sparql_dataframe(query + f" OFFSET {value}", f"{value} sur {self.resultSize}") for value in
                 range(0, self.resultSize, self.step)])
        return self.get_sparql_dataframe(self.query)


@add_progress_bar
def get_datasets(endpoint: str, verbose: bool = False, widget: widgets.IntProgress = None):
    """
    Dbnary specific function;

    Get all datasets available names on Dbnary and their description.

    :param endpoint: The address of the SPARQL server
    :param verbose: If the detail text will be displayed
    :param widget: If the detail widget will be displayed
    :return: The data frame of all datasets available names and their description
    """

    query: str = "SELECT ?dataset ?commentaire WHERE {?dataset a qb:DataSet ; rdfs:comment ?commentaire}"

    if verbose:
        print(tm.strftime(f"[%H:%M:%S] Requête au serveur des différents datasets disponible... "))

    list_datasets: pd.DataFrame = SPARQLquery(endpoint, query, verbose=verbose,
                                              widget=widget).do_query()  # We recovers all DataSets Structure

    if verbose:
        print(tm.strftime(f"[%H:%M:%S] Il y a {len(list_datasets)} datasets disponibles"))

    return list_datasets


@add_progress_bar
def get_features(endpoint: str, dataset_name: str, widget: widgets.IntProgress = None) -> pd.DataFrame:
    """
    Dbnary specific function;

    Get all features available names on a dataset in Dbnary.

    :param endpoint: The address of the SPARQL server
    :param dataset_name: The name of the dataset where you want to have its features
    :param widget: If the detail widget will be displayed
    :return: The data frame of all datasets features names available
    """
    query: str = f"""DESCRIBE ?item WHERE {'{'} ?item qb:dataSet <{dataset_name}> {'}'} LIMIT 1"""
    result: pd.DataFrame = SPARQLquery(endpoint, query, widget=widget).do_query()
    return result['p'].to_frame(name=None).set_axis(["Caractéristiques"], axis=1)


@add_progress_bar
def download_dataset(endpoint: str, dataset_name: str, features_names: list[str],
                     widget: widgets.IntProgress = None) -> pd.DataFrame:
    """
    Dbnary specific function;

    Download and return all selected features of a dataset

    :param endpoint: The address of the SPARQL server
    :param dataset_name: The name of the dataset where you want to download its features
    :param features_names: The names of features to download
    :param widget: If the detail widget will be displayed
    :return: The data frame of selected and downloaded characteristics of a dataset
    """

    # We will build the query
    query: str = "SELECT "
    vars_list: list[str] = [item.split('#')[-1] for item in features_names]
    for item in vars_list:
        query += f"?{item} "
    query += f"WHERE {'{'} ?o qb:dataSet <{dataset_name}> . "
    for uri, name in zip(features_names, vars_list):
        query += f"?o <{uri}> ?{name} . "
    query += "} "

    # Do the query
    return SPARQLquery(endpoint, query, widget=widget).do_query()


def transformation_date(date: int) -> datetime.datetime:
    if int(date[6:]) == 0:
        return datetime.datetime(year=int(date[:4]), month=int(date[4:6]), day=int(date[6:]) + 1)
    return datetime.datetime(year=int(date[:4]), month=int(date[4:6]), day=int(date[6:]))


### On commence par chercher tout les différents types de datasets et on va proposer à l'utilisateur de choisir quel dataset télécharger

### Traitement des certains Datasets particulier, le code ci-dessous n'est pas généralisable
#### 1. dbnaryNymRelationsCube

In [3]:
dataset = "http://kaiko.getalp.org/dbnary/statistics/dbnaryNymRelationsCube"
features = ('http://kaiko.getalp.org/dbnary#count', 'http://kaiko.getalp.org/dbnary#wiktionaryDumpVersion', 'http://kaiko.getalp.org/dbnary#nymRelation', 'http://kaiko.getalp.org/dbnary#observationLanguage')
data1 = download_dataset(ENDPOINT, dataset, features)
data1['count'] = data1['count'].astype(int) # Change type of values to int

relations = data1['nymRelation'].unique()
labels = [item.split('#')[-1] for item in relations]

data1 = data1.pivot_table(columns='nymRelation', index = ['wiktionaryDumpVersion', 'observationLanguage'], aggfunc=lambda x: max(x)).reset_index().sort_values(by='observationLanguage').sort_values(by='wiktionaryDumpVersion')

data1["wiktionaryDumpVersion"] = data1["wiktionaryDumpVersion"].map(transformation_date)


out1 = widgets.Output()

choice1 = widgets.ToggleButtons(options=[('Statistiques globales', 'glob'), ('Par pays', 'pays')],  description='Choix:',
    disabled=False, tooltips=['Statistiques de tout les pays par années', 'Statistiques d\' pays au cours du temps'])

def event1(obj):
    with out1:
        clear_output()
        if choice1.value == "pays":
            user_choice = widgets.Dropdown(options = list(data1["observationLanguage"].unique()), description="Choix:")

            choosed_data = data1[data1["observationLanguage"] == user_choice.value]

            y_sc = bq.LinearScale()
            x_ord = bq.scales.DateScale()
            
            line = bq.Lines(x=choosed_data["wiktionaryDumpVersion"] , y=choosed_data["count"][relations].T, stroke_width=1, display_legend=True, labels= labels, scales={'x': x_ord, 'y': y_sc})
            ax_x = bq.Axis(scale=x_ord, grid_lines='solid', label='Date', tick_format = '%m %Y')
            ax_y = bq.Axis(scale=y_sc, orientation='vertical', grid_lines='solid', label='Valeur', label_offset='-50')
            fig = bq.Figure(marks=[line], axes=[ax_x, ax_y], title=f"Différentes relations lexicales dans l'extraction {user_choice.value}", animation_duration = 1000)

            def edit_graph(obj):
                choosed_data = data1[data1["observationLanguage"] == user_choice.value]
                line.y = choosed_data["count"][relations].T
                line.x = choosed_data["wiktionaryDumpVersion"]
                fig.title = f"Différentes relations lexicales dans l'extraction {user_choice.value}"
            
        if choice1.value == "glob":
            user_choice = widgets.Dropdown(options = [(np.datetime_as_string(item, unit='D'), item) for item in data1["wiktionaryDumpVersion"].unique()],
                                           description="Choix:", value = max(data1["wiktionaryDumpVersion"].unique()))
            
            x_ord = bq.OrdinalScale()
            y_sc = bq.LinearScale()
            
            choosed_data = data1[data1["wiktionaryDumpVersion"] == user_choice.value]
            
            x = choosed_data["observationLanguage"].values
            y = choosed_data["count"][relations].T
            
            bar = bq.Bars(x=x, y=y, scales={'x': x_ord, 'y':y_sc}, type='stacked', labels = labels, color_mode = 'element', display_legend=True,  colors =["red", "blue", "cyan", "pink", "lime", "purple", "orange", "fuchsia"])
            ax_x = bq.Axis(scale=x_ord, grid_lines='solid', label='Pays')
            ax_y = bq.Axis(scale=y_sc, orientation='vertical', grid_lines='solid', label='Valeur', label_offset='-50')
            fig = bq.Figure(marks=[bar], axes=[ax_x, ax_y], title=f"Nombre de relations lexicales dans l'extraction du {np.datetime_as_string(user_choice.value, unit='D')}", animation_duration = 1000)
            
            def edit_graph(obj):
                choosed_data = data1[data1["wiktionaryDumpVersion"] == user_choice.value]
                bar.x = choosed_data["observationLanguage"].values
                bar.y = choosed_data["count"][relations].T
                fig.title = f"Nombre de relations lexicales dans l'extraction du {np.datetime_as_string(user_choice.value, unit='D')}"
            
            def add_pie_chart_in_tooltip(chart, d):
                idx = d["data"]["index"]
                bar.tooltip = widgets.HTML(pd.DataFrame(data1[data1["wiktionaryDumpVersion"] == user_choice.value].iloc[idx]["count"]).to_html())
                
                
            
            bar.on_hover(add_pie_chart_in_tooltip)    
        display(user_choice, fig)
        user_choice.observe(edit_graph,'value')
    
choice1.observe(event1, 'value')
display(choice1, out1)
event1(None)


IntProgress(value=0, bar_style='success', description='Loading:')

ToggleButtons(description='Choix:', options=(('Statistiques globales', 'glob'), ('Par pays', 'pays')), tooltip…

Output()

#### 2. dbnaryStatisticsCube

In [4]:
dataset = "http://kaiko.getalp.org/dbnary/statistics/dbnaryStatisticsCube"
features = ('http://kaiko.getalp.org/dbnary#lexicalEntryCount', 'http://kaiko.getalp.org/dbnary#lexicalSenseCount', 
            'http://kaiko.getalp.org/dbnary#observationLanguage', 'http://kaiko.getalp.org/dbnary#wiktionaryDumpVersion', 
            'http://kaiko.getalp.org/dbnary#pageCount', 'http://kaiko.getalp.org/dbnary#translationsCount')
data2 = download_dataset(ENDPOINT, dataset, features).sort_values(by='wiktionaryDumpVersion')

categories = ["lexicalEntryCount", "translationsCount", "lexicalSenseCount", "pageCount"]
data2[categories] = data2[categories].astype(int)

data2["wiktionaryDumpVersion"] = data2["wiktionaryDumpVersion"].map(transformation_date)

out2 = widgets.Output()

choice2 = widgets.ToggleButtons(options=[('Statistiques globales', 'glob'), ('Par pays', 'pays')],  description='Choix:',
    disabled=False, tooltips=['Statistiques de tout les pays par années', 'Statistiques d\' pays au cours du temps'])

def event2(obj):
    with out2:
        clear_output()
        if choice2.value == "pays":
            user_choice = widgets.Dropdown(options = list(data2["observationLanguage"].unique()), description="Choix:")

            choosed_data = data2[data2["observationLanguage"] == user_choice.value]

            y_sc = bq.LinearScale()
            x_ord = bq.scales.DateScale()

            line = bq.Lines(x=choosed_data["wiktionaryDumpVersion"] , y=choosed_data[categories].T, stroke_width=1, display_legend=True, labels=categories, scales={'x': x_ord, 'y': y_sc})
            ax_x = bq.Axis(scale=x_ord, grid_lines='solid', label='Date', tick_format = '%m %Y')
            ax_y = bq.Axis(scale=y_sc, orientation='vertical', grid_lines='solid', label='Valeur', label_offset='-50')
            fig = bq.Figure(marks=[line], axes=[ax_x, ax_y], title=f"Nombre d'éléments dans l'extraction {user_choice.value}", animation_duration = 1000)

            def edit_graph(obj):
                choosed_data = data2[data2["observationLanguage"] == user_choice.value]
                line.y = choosed_data[categories].T
                line.x = choosed_data["wiktionaryDumpVersion"]
                fig.title = f"Nombre d'éléments dans l'extraction {user_choice.value}"
                
        if choice2.value == "glob":
            user_choice = widgets.Dropdown(options = [(np.datetime_as_string(item, unit='D'), item) for item in data2["wiktionaryDumpVersion"].unique()], description="Choix:", value = max(data2["wiktionaryDumpVersion"].unique()))
            
            x_ord = bq.OrdinalScale()
            y_sc = bq.LinearScale()
            
            choosed_data = data2[data2["wiktionaryDumpVersion"] == user_choice.value]
            
            x = choosed_data["observationLanguage"].values
            y = choosed_data[categories].T
            
            bar = bq.Bars(x=x, y=y, scales={'x': x_ord, 'y':y_sc}, type='stacked', labels = categories, color_mode = 'element', display_legend=True, colors=HTML_COLORS)
            ax_x = bq.Axis(scale=x_ord, grid_lines='solid', label='Pays')
            ax_y = bq.Axis(scale=y_sc, orientation='vertical', grid_lines='solid', label='Valeur', label_offset='-50')
            fig = bq.Figure(marks=[bar], axes=[ax_x, ax_y], title=f"Nombre de relations lexicales dans l'extraction du {np.datetime_as_string(user_choice.value, unit='D')}", animation_duration = 1000)
            
            def edit_graph(obj):
                choosed_data = data2[data2["wiktionaryDumpVersion"] == user_choice.value]
                bar.x = choosed_data["observationLanguage"].values
                bar.y = choosed_data[categories].T
                fig.title = f"Nombre de relations lexicales dans l'extraction du {np.datetime_as_string(user_choice.value, unit='D')}"
            
            def add_pie_chart_in_tooltip(chart, d):
                idx = d["data"]["index"]
                bar.tooltip = widgets.HTML(pd.DataFrame(data2[data2["wiktionaryDumpVersion"] == user_choice.value].iloc[idx]).to_html())
            
            bar.on_hover(add_pie_chart_in_tooltip)    
        display(user_choice, fig)
        user_choice.observe(edit_graph,'value')
        
choice2.observe(event2, 'value')
display(choice2, out2)
event2(None)

IntProgress(value=0, bar_style='success', description='Loading:')

ToggleButtons(description='Choix:', options=(('Statistiques globales', 'glob'), ('Par pays', 'pays')), tooltip…

Output()

#### 3. dbnaryTranslationsCube

In [5]:
dataset = "http://kaiko.getalp.org/dbnary/statistics/dbnaryTranslationsCube"
features = ('http://www.w3.org/ns/lemon/lime#language', 'http://kaiko.getalp.org/dbnary#count',
            'http://kaiko.getalp.org/dbnary#wiktionaryDumpVersion', 'http://kaiko.getalp.org/dbnary#observationLanguage')
data3 = download_dataset(ENDPOINT, dataset, features)

relations = data3['language'].unique()
relations = relations[relations != "number_of_languages"]
labels = [item.split('#')[-1] for item in relations]

data3["count"] = data3["count"].astype(int)
data3["wiktionaryDumpVersion"] = data3["wiktionaryDumpVersion"].map(transformation_date)

data3 = data3.pivot_table(columns='language', index = ['wiktionaryDumpVersion', 'observationLanguage'], aggfunc=lambda x: max(x)).reset_index()

out3 = widgets.Output()

choice3 = widgets.ToggleButtons(options=[('Statistiques globales', 'glob'), ('Par pays', 'pays')],  description='Choix:',
    disabled=False, tooltips=['Statistiques de tout les pays par années', 'Statistiques d\' pays au cours du temps'])

def event3(obj):
    with out3:
        clear_output()
        if choice3.value == "pays":
            user_choice = widgets.Dropdown(options = list(data3["observationLanguage"].unique()), description="Choix:")

            choosed_data = data3[data3["observationLanguage"] == user_choice.value]

            y_sc = bq.LinearScale()
            y_sc2 = bq.LinearScale()
            x_ord = bq.scales.DateScale()

            line = bq.Lines(x=choosed_data["wiktionaryDumpVersion"] , y=choosed_data["count"][relations].T, stroke_width=1, display_legend=True, labels= labels, scales={'x': x_ord, 'y': y_sc})
            line1 = bq.Lines(x=choosed_data["wiktionaryDumpVersion"], y = choosed_data["count"]["number_of_languages"].values, scales={'x': x_ord, 'y':y_sc2}, stroke_width=1, display_legend=True, labels= ["Number of languages"], colors = ['green'], line_style = "dashed")
            ax_x = bq.Axis(scale=x_ord, grid_lines='solid', label='Date', tick_format = '%m %Y')
            ax_y = bq.Axis(scale=y_sc, orientation='vertical', grid_lines='solid', label='Valeur', label_offset='-50')
            ax_y2 = bq.Axis(scale=y_sc2, orientation='vertical', grid_lines='solid', label='Nombre de langues', label_offset='+50', side = "right", label_color = "green")
            fig = bq.Figure(marks=[line, line1], axes=[ax_x, ax_y, ax_y2], title=f"Nombre de traductions l'extraction {user_choice.value}", animation_duration = 1000)

            def edit_graph(obj):
                choosed_data = data3[data3["observationLanguage"] == user_choice.value]
                line.y = choosed_data["count"][relations].T
                line.x = choosed_data["wiktionaryDumpVersion"]
                line1.x = choosed_data["wiktionaryDumpVersion"]
                line1.y = choosed_data["count"]["number_of_languages"].values
                fig.title = f"Nombre de traductions l'extraction {user_choice.value}"
                
        if choice3.value == "glob":
            
            
            user_choice = widgets.Dropdown(options = [(np.datetime_as_string(item, unit='D'), item) for item in data3["wiktionaryDumpVersion"].unique()], description="Choix:", value = max(data3["wiktionaryDumpVersion"].unique()))
            
            x_ord = bq.OrdinalScale()
            y_sc = bq.LinearScale()
            y_sc2 = bq.LinearScale()
            
            choosed_data = data3[data3["wiktionaryDumpVersion"] == user_choice.value].sort_values(by="observationLanguage")
            
            x = choosed_data["observationLanguage"].values
            y = choosed_data["count"][relations].T
            
            bar = bq.Bars(x=x, y=y, scales={'x': x_ord, 'y':y_sc}, type='stacked', labels = labels, color_mode = 'element', display_legend=True,  colors=HTML_COLORS)
            line = bq.Lines(x=x, y = choosed_data["count"]["number_of_languages"].values, scales={'x': x_ord, 'y':y_sc2}, stroke_width=1, display_legend=True, labels= ["Number of languages"], colors = ["green"])
            ax_x = bq.Axis(scale=x_ord, grid_lines='solid', label='Pays')
            ax_y = bq.Axis(scale=y_sc, orientation='vertical', grid_lines='solid', label='Valeur', label_offset='-50')
            ax_y2 = bq.Axis(scale=y_sc2, orientation='vertical', grid_lines='solid', label='Nombre de langues', label_offset='+50', side = "right", label_color = "green")
            fig = bq.Figure(marks=[bar, line], axes=[ax_x, ax_y, ax_y2], title=f"Nombre de traductions dans l'extraction du {np.datetime_as_string(user_choice.value, unit='D')}", animation_duration = 1000, legend_location="top-left")
            
            def edit_graph(obj):
                choosed_data = data3[data3["wiktionaryDumpVersion"] == user_choice.value].sort_values(by="observationLanguage")
                bar.x = choosed_data["observationLanguage"].values
                bar.y = choosed_data["count"][relations].T
                line.x = bar.x
                line.y = choosed_data["count"]["number_of_languages"].values
                fig.title = f"Nombre de traductions lexicales dans l'extraction du {np.datetime_as_string(user_choice.value, unit='D')}"
            
            def add_pie_chart_in_tooltip(chart, d):
                idx = d["data"]["index"]
                bar.tooltip = widgets.HTML(pd.DataFrame(data3[data3["wiktionaryDumpVersion"] == user_choice.value].iloc[idx]["count"]).to_html())
            
            bar.on_hover(add_pie_chart_in_tooltip)
        display(user_choice, fig)
        user_choice.observe(edit_graph,'value')

choice3.observe(event3, 'value')
display(choice3, out3)
event3(None)

IntProgress(value=0, bar_style='success', description='Loading:')

ToggleButtons(description='Choix:', options=(('Statistiques globales', 'glob'), ('Par pays', 'pays')), tooltip…

Output()

#### 4. enhancementConfidenceDataCube

In [6]:
dataset = "http://kaiko.getalp.org/dbnary/statistics/enhancementConfidenceDataCube"
features = ('http://kaiko.getalp.org/dbnary#precisionMeasure', 'http://kaiko.getalp.org/dbnary#recallMeasure',
            'http://kaiko.getalp.org/dbnary#f1Measure', 'http://kaiko.getalp.org/dbnary#wiktionaryDumpVersion',
            'http://kaiko.getalp.org/dbnary#enhancementMethod', 'http://kaiko.getalp.org/dbnary#observationLanguage')

data4t = download_dataset(ENDPOINT, dataset, features).sort_values(by='wiktionaryDumpVersion')

categories = ["precisionMeasure", "recallMeasure", "f1Measure"]
data4t[categories] = data4t[categories].astype(float)

data4t["wiktionaryDumpVersion"] = data4t["wiktionaryDumpVersion"].map(transformation_date)

out4 = widgets.Output()

choice4 = widgets.ToggleButtons(options=[('Statistiques globales', 'glob'), ('Par pays', 'pays')],  description='Choix:',
    disabled=False, tooltips=['Statistiques de tout les pays par années', 'Statistiques d\' pays au cours du temps'])
choice4bis = widgets.ToggleButtons(options=[('Aléatoire', 'random'), ('Dbnary tversky', 'dbnary_tversky')],  description='Méthode d\'amélioration:',
    disabled=False)

def event4(obj):
    with out4:
        clear_output()
        data4 = data4t[data4t["enhancementMethod"] == choice4bis.value]
        if choice4.value == "pays":
            user_choice = widgets.Dropdown(options = list(data4["observationLanguage"].unique()), description="Choix:")

            choosed_data = data4[data4["observationLanguage"] == user_choice.value]

            y_sc = bq.LinearScale()
            x_ord = bq.scales.DateScale()

            line = bq.Lines(x=choosed_data["wiktionaryDumpVersion"] , y=choosed_data[categories].T, stroke_width=1, display_legend=True, labels=categories, scales={'x': x_ord, 'y': y_sc})
            ax_x = bq.Axis(scale=x_ord, grid_lines='solid', label='Date', tick_format = '%m %Y')
            ax_y = bq.Axis(scale=y_sc, orientation='vertical', grid_lines='solid', label='Valeur', label_offset='-50')
            fig = bq.Figure(marks=[line], axes=[ax_x, ax_y], title=f"{user_choice.value}", animation_duration = 1000)

            def edit_graph(obj):
                choosed_data = data4[data4["observationLanguage"] == user_choice.value]
                line.y = choosed_data[categories].T
                line.x = choosed_data["wiktionaryDumpVersion"]
                fig.title = f"{user_choice.value}"
                
        if choice4.value == "glob":
            user_choice = widgets.Dropdown(options = [(np.datetime_as_string(item, unit='D'), item) for item in data4["wiktionaryDumpVersion"].unique()], description="Choix:", value = max(data4["wiktionaryDumpVersion"].unique()))
            
            x_ord = bq.OrdinalScale()
            y_sc = bq.LinearScale()
            
            choosed_data = data4[data4["wiktionaryDumpVersion"] == user_choice.value]
            
            x = choosed_data["observationLanguage"].values
            y = choosed_data[categories].T
            
            bar = bq.Bars(x=x, y=y, scales={'x': x_ord, 'y':y_sc}, type='stacked', labels = categories, color_mode = 'element', display_legend=True, colors=HTML_COLORS)
            bar = bq.Bars(x=x, y=y, scales={'x': x_ord, 'y':y_sc}, type='stacked', labels = categories, color_mode = 'element', display_legend=True, colors=HTML_COLORS)
            ax_x = bq.Axis(scale=x_ord, grid_lines='solid', label='Pays')
            ax_y = bq.Axis(scale=y_sc, orientation='vertical', grid_lines='solid', label='Valeur', label_offset='-50')
            fig = bq.Figure(marks=[bar], axes=[ax_x, ax_y], title=f"{np.datetime_as_string(user_choice.value, unit='D')}", animation_duration = 1000)
            
            def edit_graph(obj):
                choosed_data = data4[data4["wiktionaryDumpVersion"] == user_choice.value]
                bar.x = choosed_data["observationLanguage"].values
                bar.y = choosed_data[categories].T
                fig.title = f"{np.datetime_as_string(user_choice.value, unit='D')}"
            
            def add_pie_chart_in_tooltip(chart, d):
                idx = d["data"]["index"]
                bar.tooltip = widgets.HTML(pd.DataFrame(data4[data4["wiktionaryDumpVersion"] == user_choice.value].iloc[idx]).to_html())
            
            bar.on_hover(add_pie_chart_in_tooltip)    
        display(user_choice, fig)
        user_choice.observe(edit_graph,'value')
        
choice4.observe(event4, 'value')
choice4bis.observe(event4, 'value')
display(choice4, choice4bis, out4)
event4(None)

IntProgress(value=0, bar_style='success', description='Loading:')

ToggleButtons(description='Choix:', options=(('Statistiques globales', 'glob'), ('Par pays', 'pays')), tooltip…

ToggleButtons(description="Méthode d'amélioration:", options=(('Aléatoire', 'random'), ('Dbnary tversky', 'dbn…

Output()

#### 5. translationGlossesCube

In [7]:
dataset = "http://kaiko.getalp.org/dbnary/statistics/translationGlossesCube"
features = ('http://kaiko.getalp.org/dbnary#translationsWithSenseNumber', 'http://kaiko.getalp.org/dbnary#translationsWithSenseNumberAndTextualGloss',
            'http://kaiko.getalp.org/dbnary#translationsWithTextualGloss', 'http://kaiko.getalp.org/dbnary#wiktionaryDumpVersion',
            'http://kaiko.getalp.org/dbnary#observationLanguage', 'http://kaiko.getalp.org/dbnary#translationsWithNoGloss')

data5 = download_dataset(ENDPOINT, dataset, features).sort_values(by='wiktionaryDumpVersion')

categories = ["translationsWithSenseNumber", "translationsWithSenseNumberAndTextualGloss", "translationsWithTextualGloss", "translationsWithNoGloss"]
data5[categories] = data5[categories].astype(int)

data5["wiktionaryDumpVersion"] = data5["wiktionaryDumpVersion"].map(transformation_date)

out5 = widgets.Output()

choice5 = widgets.ToggleButtons(options=[('Statistiques globales', 'glob'), ('Par pays', 'pays')],  description='Choix:',
    disabled=False, tooltips=['Statistiques de tout les pays par années', 'Statistiques d\' pays au cours du temps'])

def event5(obj):
    with out5:
        clear_output()
        if choice5.value == "pays":
            user_choice = widgets.Dropdown(options = list(data5["observationLanguage"].unique()), description="Choix:")

            choosed_data = data5[data5["observationLanguage"] == user_choice.value]

            y_sc = bq.LinearScale()
            x_ord = bq.scales.DateScale()

            line = bq.Lines(x=choosed_data["wiktionaryDumpVersion"] , y=choosed_data[categories].T, stroke_width=1, display_legend=True, labels=categories, scales={'x': x_ord, 'y': y_sc})
            ax_x = bq.Axis(scale=x_ord, grid_lines='solid', label='Date', tick_format = '%m %Y')
            ax_y = bq.Axis(scale=y_sc, orientation='vertical', grid_lines='solid', label='Valeur', label_offset='-50')
            fig = bq.Figure(marks=[line], axes=[ax_x, ax_y], title=f"{user_choice.value}", animation_duration = 1000)

            def edit_graph(obj):
                choosed_data = data5[data5["observationLanguage"] == user_choice.value]
                line.y = choosed_data[categories].T
                line.x = choosed_data["wiktionaryDumpVersion"]
                fig.title = f"{user_choice.value}"
                
        if choice5.value == "glob":
            user_choice = widgets.Dropdown(options = [(np.datetime_as_string(item, unit='D'), item) for item in data5["wiktionaryDumpVersion"].unique()], description="Choix:", value = max(data5["wiktionaryDumpVersion"].unique()))
            
            x_ord = bq.OrdinalScale()
            y_sc = bq.LinearScale()
            
            choosed_data = data5[data5["wiktionaryDumpVersion"] == user_choice.value]
            
            
            x = choosed_data["observationLanguage"].values
            y = choosed_data[categories].T
            
            bar = bq.Bars(x=x, y=y, scales={'x': x_ord, 'y':y_sc}, type='stacked', labels = categories, color_mode = 'element', display_legend=True, colors=HTML_COLORS)
            ax_x = bq.Axis(scale=x_ord, grid_lines='solid', label='Pays')
            ax_y = bq.Axis(scale=y_sc, orientation='vertical', grid_lines='solid', label='Valeur', label_offset='-50')
            fig = bq.Figure(marks=[bar], axes=[ax_x, ax_y], title=f"{np.datetime_as_string(user_choice.value, unit='D')}", animation_duration = 1000)
            
            def edit_graph(obj):
                choosed_data = data5[data5["wiktionaryDumpVersion"] == user_choice.value]
                bar.x = choosed_data["observationLanguage"].values
                bar.y = choosed_data[categories].T
                fig.title = f"{np.datetime_as_string(user_choice.value, unit='D')}"
            
            def add_pie_chart_in_tooltip(chart, d):
                idx = d["data"]["index"]
                bar.tooltip = widgets.HTML(pd.DataFrame(data5[data5["wiktionaryDumpVersion"] == user_choice.value].iloc[idx]).to_html())
            
            bar.on_hover(add_pie_chart_in_tooltip)    
        display(user_choice, fig)
        user_choice.observe(edit_graph,'value')
        
choice5.observe(event5, 'value')
display(choice5, out5)
event5(None)

IntProgress(value=0, bar_style='success', description='Loading:')

ToggleButtons(description='Choix:', options=(('Statistiques globales', 'glob'), ('Par pays', 'pays')), tooltip…

Output()