# Data Links

This lists usefull Data sources : 

- UCI Machine Learning repository : https://archive.ics.uci.edu/ml/index.php
- UEA & UCR Time Series Classification Repository : https://timeseriesclassification.com/dataset.php

In [15]:
import logging

import pandas as pd
import plotly.graph_objects as go

logger = logging.getLogger(__name__)


def plot(traces, show=True, **kwargs):
    """
    General plot functions used to plot any plotly list of traces.

    :param traces: list of plotly traces to plot
    :type traces: list

    :param show: Boolean controlling whether or not to plot the curves
    :type show: bool, optional

    :param kwargs: optional arguments used in plot functions.
    Possible kwargs are :
    - x_axis_name string representing name of x axis
    - y_axis_name string representing name of y axis
    - x_min float value or string date representing minimal value to show along x_axis
    - x_max float value or string date representing maximal value to show along x_axis
    - y_min float value representing minimal value to show along y_axis
    - y_max float value  representing maximal value to show along y_axis
    - title tile of the graph
    - template string indicating plotly graph template to use. Possible choices are :
            "plotly", "plotly_white", "plotly_dark", "ggplot2", "seaborn", "simple_white", "none".
            See https://plot.ly/python/templates for more informations

    :return: plotly figure object
    """
    x_axis_name = kwargs.pop('x_axis_name', None)
    y_axis_name = kwargs.pop('y_axis_name', None)
    template = kwargs.pop('template', 'plotly_dark')
    widget = kwargs.pop('widget', False)
    fig = go.Figure()
    for trace in traces:
        fig.add_trace(trace)

    props = {}
    for arg_name in ['x_min', 'x_max', 'y_min', 'y_max']:
        props[arg_name] = kwargs[arg_name] if arg_name in kwargs else None

    fig.update_layout(
        title=kwargs['title'] if 'title' in kwargs else '',
        xaxis={'title': x_axis_name,
               'range': [props['x_min'], props['x_max']]},
        yaxis={'title': y_axis_name,
               'range': [props['y_min'], props['y_max']]},
        showlegend=True,
        legend=dict(x=-0.1, y=1.1, bgcolor='rgba(0,0,0,0)'),  # use of rgba to make rectangle transparent
        legend_orientation="h",
        template=template
    )
    if widget:
        return go.FigureWidget(fig)
    else:
        if show is True:
            fig.show()
        return fig


def plot_evolution(keys, df, show=True, additional_traces=None, webgl=False, **kwargs):
    """
    Plots time evolution of input keys contained in df and add optional additional traces.

    :param keys: list of quantities names corresponding to df pandas DataFrame columns names
    :type keys: list

    :param df: pandas DataFrame indexed by string date containing keys values
    :type df: pandas DataFrame

    :param show: Boolean controlling whether or not to plot the curves
    :type show: bool, optional

    :param additional_traces: list of plotly traces to add to current plot
    :type additional_traces: list, optional

    :param webgl: Boolean controlling whether or not to use webgl plots
    :type webgl: bool, optional

    :param kwargs: optional arguments used in plot functions.
    Possible kwargs are :
    - colors: list of string relative to curves colors (ex: colors= ['red', 'blue'])
    - names: list of string representing different curves names which will appear in legend
    (ex: names=['PACT1', 'PCS600'])
    - modes: list of curves string modes to use. Possible choices are 'lines', 'markers', 'lines+markers' (ex:
    modes = ['lines', 'lines'])
    - x_axis_name string representing name of x axis
    - y_axis_name string representing name of y axis
    - x_min float value or string date representing minimal value to show along x_axis
    - x_max float value or string date representing maximal value to show along x_axis
    - y_min float value representing minimal value to show along y_axis
    - y_max float value  representing maximal value to show along y_axis
    - title tile of the graph
    - template string indicating plotly graph template to use. Possible choices are :
            "plotly", "plotly_white", "plotly_dark", "ggplot2", "seaborn", "simple_white", "none".
            See https://plot.ly/python/templates for more informations

    :return: plotly figure object
    """
    modes = kwargs.pop('modes', None)
    names = kwargs.pop('names', None)
    colors = kwargs.pop('colors', None)
    widget = kwargs.pop('widget', False)
        
    
    if 'x_axis_name' not in kwargs:
        kwargs['x_axis_name'] = 'Time'

    plotting_function = go.Scattergl if webgl is True else go.Scatter

    traces = [
        plotting_function(
            x=df.index,
            y=df[key],
            name=names[ind] if names is not None else key,
            mode=modes[ind] if modes is not None else 'lines+markers',
            line={"color": colors[ind] if colors is not None else None}
        )
        for ind, key in enumerate(keys)
        if not df[key].isna().all()
    ]
    if additional_traces is not None:
        traces += additional_traces
    fig = plot(
            traces=traces,
            show=show,
            widget=widget,
            **kwargs
        )
    if widget:
         return fig
    else:
        df


def plot_hist(keys, df, quantiles=None, show=True, **kwargs):
    """
    Plots histogram distribution of input keys contained in df and add optional vertical lines.

    :param keys: list of quantities names corresponding to df pandas DataFrame columns names
    :type keys: list

    :param df: pandas DataFrame indexed by string date containing keys values
    :type df: pandas DataFrame

    :param quantiles: list of float values between 0 and 1 representing quantiles to plot as a vertical bar (with
    values plotted in scientif notation). The quantiles will be ploted relative to first quantity of keys list.
    If unvalid_included=True, the quantiles will be ploted relative to valid quantities only.
    :type quantiles: list, optional

    :param show: Boolean controlling whether or not to plot the curves
    :type show: bool, optional

    :param kwargs: optional arguments used in plot functions.
    Possible kwargs are :
    - colors: list of string relative to curves colors (ex: colors= ['red', 'blue'])
    - names: list of string representing different curves names which will appear in legend
    (ex: names=['PACT1', 'PCS600'])
    - modes: list of curves string modes to use. Possible choices are 'lines', 'markers', 'lines+markers' (ex:
    modes = ['lines', 'lines'])
    - x_axis_name string representing name of x axis
    - y_axis_name string representing name of y axis
    - x_min float value or string date representing minimal value to show along x_axis
    - x_max float value or string date representing maximal value to show along x_axis
    - y_min float value representing minimal value to show along y_axis
    - y_max float value  representing maximal value to show along y_axis
    - title tile of the graph
    - nbinsx int representing the number of histograms bars to use
    - template string indicating plotly graph template to use. Possible choices are :
            "plotly", "plotly_white", "plotly_dark", "ggplot2", "seaborn", "simple_white", "none".
            See https://plot.ly/python/templates for more informations

    :return: plotly figure object
    """
    # No need to use webgl here because points are aggregated
    names = kwargs.pop('names', None)
    colors = kwargs.pop('colors', None)
    nbinsx = kwargs.pop('nbinsx', None)

    quantiles = quantiles if quantiles is not None else []

    traces = [
        go.Histogram(
            x=df[key],
            name=names[ind] if names is not None else key,
            nbinsx=nbinsx,  # To specify the maximum number of bins
            marker={"color": colors[ind] if colors is not None else None}

        )
        for ind, key in enumerate(keys)
        if not df[key].isna().all()
    ]

    if 'y_axis_name' not in kwargs:
        kwargs['y_axis_name'] = 'Number of elements'

    fig = plot(
        traces=traces,
        show=False,
        **kwargs
    )

    fig.update_layout(
        barmode='stack',
        shapes=[
            go.layout.Shape(
                type="line",
                yref="paper",
                x0=df[keys[0]].quantile(quantile),  # quantiles only for first key
                y0=0,
                x1=df[keys[0]].quantile(quantile),
                y1=1
            )
            for quantile in quantiles
            if not df[keys[0]].isna().all()
        ],
        annotations=[
            dict(
                x=df[keys[0]].quantile(quantile),
                y=1,
                xref='x',
                yref='paper',
                xanchor='left',
                text='Q' + str(quantile) + ': ' + str(format(df[keys[0]].quantile(quantile), ".2e")),
                showarrow=False,
                arrowhead=0,
            )
            for quantile in quantiles
            if not df[keys[0]].isna().all()
        ]
    )
    if show is True:
        fig.show()
    return df


def plot_xy(df, x_name, y_names, z_name=None, show=True, date_format='%Y-%m-%dT%H:%M:%SZ', webgl=False, **kwargs):
    """
    Plots evolution of one or several input keys (in y_names) regarding an other one (x_name).
    It is possible to add an extra quantity used as markers coloration (using z_name)
    It also returns the pandas DataFrame used to perform the plot.

    :param df: pandas DataFrame containing columns relative to y_names, x_name and optional z_name quantities.
    :type df: pandas.DataFrame

    :param x_name: string, relative to quantity, used as x abscissa and contained in df
    :type x_name: str

    :param y_names: list of strings relative to quantities used as y curves and contained in df
    :type y_names: list

    :param z_name: string relative to quantity, used as optional marker coloration and contained in df
    :type z_name: str, optional

    :param show: Boolean controlling whether or not to plot the curves
    :type show: bool, optional

    :param date_format: string which indicates date format
    :type date_format: str, optional

    :param webgl: Boolean controlling whether or not to use webgl plots
    :type webgl: bool, optional

    :param kwargs: optional arguments used in plot functions.
    Possible kwargs are :
    - colors: list of string relative to curves colors (ex: colors= ['red', 'blue'])
    - names: list of string representing different curves names which will appear in legend
    (ex: names=['PACT1', 'PCS600'])
    - modes: list of curves string modes to use. Possible choices are 'lines', 'markers', 'lines+markers' (ex:
    modes = ['lines', 'lines'])
    - x_axis_name string representing name of x axis
    - y_axis_name string representing name of y axis
    - x_min float value or string date representing minimal value to show along x_axis
    - x_max float value or string date representing maximal value to show along x_axis
    - y_min float value representing minimal value to show along y_axis
    - y_max float value  representing maximal value to show along y_axis
    - title tile of the graph
    - template string indicating plotly graph template to use. Possible choices are :
            "plotly", "plotly_white", "plotly_dark", "ggplot2", "seaborn", "simple_white", "none".
            See https://plot.ly/python/templates for more informations

    :return: plotly figure object
    """

    plotting_function = go.Scattergl if webgl is True else go.Scatter

    colors = kwargs.pop('colors', None)
    modes = kwargs.pop('modes', None)
    names = kwargs.pop('names', None)

    marker = dict(color=df[z_name],
                  colorscale='Jet',
                  colorbar=dict(title=z_name, len=0.8, lenmode='fraction'),
                  opacity=0.8) if z_name is not None else None
    traces = [
        plotting_function(
            x=df[x_name],
            y=df[y_name],
            name=names[ind] if names is not None else y_name,
            marker=marker,
            mode=modes[ind] if modes is not None else 'markers',
            line={"color": colors[ind] if colors is not None else None},
            hoverinfo='text',
            text=[
                x_name + ' : ' + str(round(df[x_name].iloc[ind], 2)) + '<br>' + y_name + ' : ' + str(
                    round(df[y_name].iloc[ind], 2)) + ('<br>' + z_name + ' : ' + str(
                        round(df[z_name].iloc[ind], 2)) if z_name is not None else '') + (
                        '<br>' + df.index[ind].strftime(date_format)
                        if isinstance(df.index, pd.core.indexes.datetimes.DatetimeIndex) else '')
                for ind in range(len(df))]
        )
        for ind, y_name in enumerate(y_names)
        if not df[y_name].isna().all()
    ]
    if 'y_axis_name' not in kwargs:
        if len(y_names) == 1:
            kwargs['y_axis_name'] = y_names[0]
        else:
            kwargs['y_axis_name'] = ''
    if 'x_axis_name' not in kwargs:
        kwargs['x_axis_name'] = x_name
    plot(traces=traces, show=show, **kwargs)

    return df


def plot_bar(keys, x, df, show=True, **kwargs):
    """
    Plots bar from input keys, a pandas DataFrame df and a list of names used in x used in x axis.

    :param keys: list of quantities names corresponding to df pandas DataFrame columns names
    :type keys: list

    :param x: list of strings names used for x axis bar legends. It must match df index
    :type x: list

    :param df: pandas DataFrame containing keys values indexed by input x list
    :type df: pandas DataFrame

    :param show: Boolean controlling whether or not to plot the curves
    :type show: bool, optional

    :param kwargs: optional arguments used in plot functions.
    Possible kwargs are :
    - colors: list of string relative to curves colors (ex: colors= ['red', 'blue'])
    - names: list of string representing different curves names which will appear in legend
    (ex: names=['PACT1', 'PCS600'])

    - x_axis_name string representing name of x axis
    - y_axis_name string representing name of y axis
    - x_min float value or string date representing minimal value to show along x_axis
    - x_max float value or string date representing maximal value to show along x_axis
    - y_min float value representing minimal value to show along y_axis
    - y_max float value  representing maximal value to show along y_axis
    - title tile of the graph
    - template string indicating plotly graph template to use. Possible choices are :
            "plotly", "plotly_white", "plotly_dark", "ggplot2", "seaborn", "simple_white", "none".
            See https://plot.ly/python/templates for more informations

    :return: plotly figure object
    """
    names = kwargs.pop('names', None)
    colors = kwargs.pop('colors', None)

    traces = [
        go.Bar(
            name=names[ind] if names is not None else key,
            x=x,
            y=df[key],
            marker_color=colors[ind] if colors is not None else None
        )
        for ind, key in enumerate(keys)

    ]
    plot(traces=traces, show=show, **kwargs)
    return df


def plot_failure_polar(key, df, angle_filter, show=True, **kwargs):
    """
    Plots a polar bar of failures impact angles (in degrees) relative to input failure key impact.
    Failures with impact angle in [90 - angle_limit, 90 + angle_limit] are filtered.

    :param key: Name of reference failure to use
    :type kwargs: str

    :param df: pandas DataFrame containing angle_with_<input_failure_key> column containing failures impact angles with
    input key
    :type df: pandas DataFrame

    :param angle_filter: float representing filter limit. Failures with impact angle in
     [90 - angle_filter, 90 + angle_filter] are filtered
    :type angle_filter: float

    :param show: Boolean controlling whether or not to plot the curves
    :type show: bool, optional

    :param kwargs: optional arguments used in plot functions.
    Possible kwargs are :
        - title tile of the graph
        - template string indicating plotly graph template to use. Possible choices are :
            "plotly", "plotly_white", "plotly_dark", "ggplot2", "seaborn", "simple_white", "none".
            See https://plot.ly/python/templates for more informations

    :return: plotly figure object
    """
    # values zones
    compass_traces = [
        go.Barpolar(
            r=[1, 1, 1, 1, 1],
            theta=[10, 32.5, 90, 147.5, 170],
            width=[20, 25, 90, 25, 20],
            showlegend=False,
            opacity=0.6,
            hoverinfo='skip',
            marker_color=['firebrick', 'coral', 'lightgreen', 'coral', 'firebrick']
        )
    ]

    for failure in df.index:
        if not (
                (90 - angle_filter <= df.loc[failure, 'angle_with_' + key] <= 90 + angle_filter) or (
                3. * 90 - angle_filter <= df.loc[failure, 'angle_with_' + key] <= 3. * 90 + angle_filter)
        ):
            compass_traces.append(
                go.Scatterpolar(
                    r=[0, 1],
                    theta=[0, df.loc[failure, 'angle_with_' + key]],
                    name=failure,
                    marker=dict(symbol='y-up')))


    fig = plot(
        traces=compass_traces,
        show=False,
        **kwargs
    )

    fig.update_layout(
        polar=dict(
            radialaxis={
                'visible': True,
                'range': [0, 1]
            },
            sector=[-1, 181]
        )
    )
    if show:
        fig.show()
    return df


def plot_pie_chart(keys, values, show=True, **kwargs):
    """
    Plots pie-charts from keys (list of string names) and associated list of values.

    :param keys: list of string names to use
    :type keys: list

    :param values: list of float values associated with list of string names
    :type values: list

    :param show: Boolean controlling whether or not to plot the curves
    :type show: bool, optional

    :param kwargs: optional arguments used in plot functions.
    Possible kwargs are :
    - colors: list of string relative to area colors (ex: colors= ['red', 'blue', 'orange'])
    - title tile of the graph
    - template string indicating plotly graph template to use. Possible choices are :
            "plotly", "plotly_white", "plotly_dark", "ggplot2", "seaborn", "simple_white", "none".
            See https://plot.ly/python/templates for more informations

    :return: plotly figure object
    """
    colors = kwargs.pop('colors', None)
    traces = [
        go.Pie(
            labels=keys,
            values=values,
            marker=dict(colors=colors if colors is not None else None)
        )
    ]
    plot(traces=traces, show=show, **kwargs)
    return values


In [13]:
dwind_parc = pd.read_csv('wind_production/data/parc-regional-annuel-prod-eolien-solaire.csv', sep=';')
dwind_parc

Unnamed: 0,Année,Code INSEE région,Région,Parc installé éolien (MW),Parc installé solaire (MW),Géo-shape région,Géo-point région
0,2001,24,Centre-Val de Loire,,,"{""type"": ""MultiPolygon"", ""coordinates"": [[[[2....","47.4875464351,1.68135599697"
1,2001,53,Bretagne,10.0,,"{""type"": ""MultiPolygon"", ""coordinates"": [[[[-2...","48.1686687703,-2.82931653075"
2,2001,84,Auvergne-Rhône-Alpes,3.0,,"{""type"": ""MultiPolygon"", ""coordinates"": [[[[6....","45.5156201847,4.53513455451"
3,2002,28,Normandie,0.2,,"{""type"": ""MultiPolygon"", ""coordinates"": [[[[-1...","49.1210533267,0.119605844224"
4,2003,28,Normandie,0.2,,"{""type"": ""MultiPolygon"", ""coordinates"": [[[[-1...","49.1210533267,0.119605844224"
...,...,...,...,...,...,...,...
242,2018,84,Auvergne-Rhône-Alpes,553.0,942.0,"{""type"": ""MultiPolygon"", ""coordinates"": [[[[6....","45.5156201847,4.53513455451"
243,2019,11,Ile-de-France,89.0,123.0,"{""type"": ""MultiPolygon"", ""coordinates"": [[[[1....","48.7096042274,2.5032434043"
244,2019,27,Bourgogne-Franche-Comté,808.0,294.0,"{""type"": ""MultiPolygon"", ""coordinates"": [[[[7....","47.2355233483,4.80603616068"
245,2019,52,Pays de la Loire,1012.0,557.0,"{""type"": ""MultiPolygon"", ""coordinates"": [[[[-2...","47.4728416727,-0.817672745303"


In [17]:
plot_evolution(df=dwind_parc,keys=['Parc installé éolien (MW)'], widget=True)

FigureWidget({
    'data': [{'mode': 'lines+markers',
              'name': 'Parc installé éolien (MW)',
     …

In [18]:
!pip freeze

absl-py==0.10.0
alembic==1.4.2
argon2-cffi @ file:///home/conda/feedstock_root/build_artifacts/argon2-cffi_1596629847793/work
astunparse==1.6.3
async-generator==1.10
attrs @ file:///home/conda/feedstock_root/build_artifacts/attrs_1597959372343/work
backcall @ file:///home/conda/feedstock_root/build_artifacts/backcall_1592338393461/work
backports.functools-lru-cache==1.6.1
beautifulsoup4 @ file:///home/conda/feedstock_root/build_artifacts/beautifulsoup4_1597679909012/work
bleach @ file:///home/conda/feedstock_root/build_artifacts/bleach_1588608214987/work
blinker==1.4
blis==0.4.1
bokeh @ file:///home/conda/feedstock_root/build_artifacts/bokeh_1592869569111/work
Bottleneck==1.3.2
branca==0.4.1
brotlipy==0.7.0
cachetools==4.1.1
catalogue==1.0.0
certifi==2020.6.20
certipy==0.1.3
cffi @ file:///home/conda/feedstock_root/build_artifacts/cffi_1595805769248/work
chardet==3.0.4
click==7.1.2
click-plugins==1.1.1
cligj==0.5.0
cloudpickle @ file:///home/conda/feedstock_root/build_artifacts/cloudpi