In [2]:
import plotly.graph_objects as go
import dash
import plotly.express as px

import dash_html_components as html

import dash_core_components as dcc
from dash.dependencies import Input, Output
import plotly.offline as pyo
import numpy as np
import pandas as pd
from os.path import exists

print(dcc.__version__)
external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']

app  = dash.Dash(__name__, external_stylesheets=external_stylesheets)

colors = {
    'background': '#111111',
    'text': '#7FDBFF'
}

processed_csv_path = "processed_dataset.csv"

country_codes = ['BR', 'CA', 'DE', 'FR', 'GB', 'IN', 'JP', 'KR', 'MX', 'RU', 'US']

category_ids_to_names_dict = {1: "Film & Animation",
2: "Autos & Vehicles",
10: "Music",
15: "Pets & Animals",
17: "Sports",
18: "Short Movies",
19: "Travel & Events",
20: "Gaming",
21: "Videoblogging",
22: "People & Blogs",
23: "Comedy",
24: "Entertainment",
25: "News & Politics",
26: "Howto & Style",
27: "Education",
28: "Science & Technology",
29: "Nonprofits & Activism",
30: "Movies",
31: "Anime/Animation",
32: "Action/Adventure",
33: "Classics",
34: "Comedy",
35: "Documentary",
36: "Drama",
37: "Family",
38: "Foreign",
39: "Horror",
40: "Sci-Fi/Fantasy",
41: "Thriller",
42: "Shorts",
43: "Shows",
44: "Trailers"}

category_names_to_ids_dict = {"Film & Animation":1,
"Autos & Vehicles":2,
"Music":10,
"Pets & Animals":15,
"Sports":17,
"Short Movies":18,
"Travel & Events":19,
"Gaming":20,
"Videoblogging":21,
"People & Blogs":22,
"Comedy":23,
"Entertainment":24,
"News & Politics":25,
"Howto & Style":26,
"Education":27,
"Science & Technology":28,
"Nonprofits & Activism":29,
"Movies":30,
"Anime/Animation":31,
"Action/Adventure":32,
"Classics":33,
"Comedy":34,
"Documentary":35,
"Drama":36,
"Family":37,
"Foreign":38,
"Horror":39,
"Sci-Fi/Fantasy":40,
"Thriller":41,
"Shorts":42,
"Shows":43,
"Trailers":44}

def prepareData(processed_csv_path):

    df = pd.read_csv(processed_csv_path)
    df.publishedAt= pd.to_datetime(df.publishedAt)
    df.trending_date= pd.to_datetime(df.trending_date)

    df['month'] = df['trending_date'].apply(lambda x: x.strftime('%B-%Y'))
    df['date'] = df['trending_date'].dt.date

    return df

def extractFilterOptions(df):
    available_regions = df['region'].unique()
    available_categories = df['category_text'].unique()

    return available_regions, available_categories

df = prepareData(processed_csv_path)
available_regions, available_categories = extractFilterOptions(df)

available_colors = ["#a6cee3","#1f78b4","#b2df8a", "#33a02c","#fb9a99","#e31a1c","#fdbf6f","#ff7f00","#cab2d6","#6a3d9a","#ffff99","#b15928"]

### Christians
dates_from_processed_dataset = pd.read_csv("processed_dataset.csv", usecols=[7])
dates_from_processed_dataset.sort_values('trending_date')
unique_dates = dates_from_processed_dataset.trending_date.unique()
unique_dates = np.array(unique_dates)
no_of_unique_dates = len(unique_dates)

country_list = []
for i in country_codes:
    country_list.append({'label': i, 'value': i})

categories_list = []
for (key, value) in category_ids_to_names_dict.items():
    categories_list.append({'label': value, 'value': value})

current_data_set = "Dataset/Titledata/BR/BR_title_totals.csv"
df_for_title_chart  = pd.read_csv(current_data_set)

x = ['Did use () or []', 'Did use CAPS', 'Did use emojis']

app.layout = html.Div(children=[
    html.Div(
        dcc.Dropdown(
            id="title_drop_down", #TODO: rename to
            options=country_list,
            value=['BR'],
            multi=True
        ),
        style={"text-align": "center", "display": "inline-block", "width":"100%"}),
    html.Div(
        dcc.Dropdown(id="categories_drop_down", options=categories_list, value=["Music","Gaming"], multi=True),
             style={"text-align": "center", "display": "inline-block", "width":"100%"}),
    html.Div(children=[
        html.Div(dcc.Graph(id='title_bar_chart'),
                 style={"text-align": "center", "display": "inline-block", "width":"40%"}),
        html.Div(dcc.Graph(
            id='stacked-area-chart',
        ), style={"text-align": "center", "display": "inline-block", "width":"40%"})], style={"text-align": "center", "display": "inline-block", "width":"100%"}),
    html.Div(dcc.RangeSlider(
            id="dataslider",
            min=0,
            max=no_of_unique_dates,
            updatemode='mouseup',
            step=1,
            value=[0, no_of_unique_dates],
            pushable=1), style={"text-align": "center", "display": "inline-block", "width":"100%"}),
     html.Div(id='my-output', style={"text-align": "center", "display": "inline-block", "width":"100%"})
            ], style={"border": "1px solid black"})


def update_data(input_countries, input_categories, slider_interval):
    #Should not use global, ideally - But only if multi-user session according to the tutorials (we just making a prototype, so I think it's okay?)
    global current_data_set, df_test, total_titles, did_use_par_or_bracks, did_use_caps, did_use_emojis, did_not_use_par_or_bracks, did_not_use_caps, did_not_use_emojis

    total_titles = 0
    did_use_par_or_bracks = 0
    did_use_caps = 0
    did_use_emojis = 0
    did_not_use_par_or_bracks = 0
    did_not_use_caps = 0
    did_not_use_emojis = 0

    for input_country in input_countries:
        if len(input_categories) == 0:
            if slider_interval[0] == 0 and slider_interval[1] == no_of_unique_dates:
                current_data_set = "Dataset/Titledata/" + input_country + "/" + input_country + "_title_totals.csv"
                df_test  = pd.read_csv(current_data_set)

                total_titles += np.array(df_test.total_number_of_titles)[0]
                did_use_par_or_bracks += np.array(df_test.number_of_titles_with_parenthesis_or_squarebracket_usage)[0]
                did_use_caps += np.array(df_test.number_of_titles_with_caps_usage)[0]
                did_use_emojis += np.array(df_test.number_of_titles_with_emoji_usage)[0]
            else:
                current_data_set = "Dataset/Titledata/" + input_country + "/" + input_country + "_allcategories_totals_per_day.csv"
                df_test  = pd.read_csv(current_data_set)
                mask = (df_test['date'] >= unique_dates[slider_interval[0]]) & (df_test['date'] <= unique_dates[slider_interval[1]-1])
                df_test = df_test.loc[mask]

                for index, row in df_test.iterrows():
                    total_titles += row['total_number_of_titles']
                    did_use_par_or_bracks += row['number_of_titles_with_parenthesis_or_squarebracket_usage']
                    did_use_caps += row['number_of_titles_with_caps_usage']
                    did_use_emojis += row['number_of_titles_with_emoji_usage']
        else:
            for input_category in input_categories:
                category_id = category_names_to_ids_dict[input_category]
                current_data_set = "Dataset/Titledata/" + input_country + "/" + input_country + "_category" + str(category_id) + "_totals_per_day.csv"
                file_exists = exists(current_data_set)
                if file_exists:
                    df_test  = pd.read_csv(current_data_set)
                else:
                    continue

                mask = (df_test['date'] >= unique_dates[slider_interval[0]]) & (df_test['date'] <= unique_dates[slider_interval[1]-1])
                df_test = df_test.loc[mask]

                for index, row in df_test.iterrows():
                    total_titles += row['total_number_of_titles']
                    did_use_par_or_bracks += row['number_of_titles_with_parenthesis_or_squarebracket_usage']
                    did_use_caps += row['number_of_titles_with_caps_usage']
                    did_use_emojis += row['number_of_titles_with_emoji_usage']
    did_use_par_or_bracks = (did_use_par_or_bracks/total_titles)*100 if total_titles > 0 else 0
    did_use_caps = (did_use_caps/total_titles)*100 if total_titles > 0 else 0
    did_use_emojis = (did_use_emojis/total_titles)*100 if total_titles > 0 else 0

    did_not_use_par_or_bracks = 100 - did_use_par_or_bracks if total_titles > 0 else 0
    did_not_use_caps = 100 - did_use_caps if total_titles > 0 else 0
    did_not_use_emojis = 100 - did_use_emojis if total_titles > 0 else 0

@app.callback(
    Output(component_id='title_bar_chart', component_property='figure'),
    Input("title_drop_down", "value"),
    Input("categories_drop_down", "value"),
    Input("dataslider", "value"))
def update_output_div(input_countries, input_categories, slider_interval):
    update_data(input_countries, input_categories, slider_interval)

    fig = go.Figure()
    fig.add_trace(go.Bar(name="Yes", x=x, y=[did_use_par_or_bracks, did_use_caps, did_use_emojis], marker_color='rgb(44, 127, 184)'))
    fig.add_trace(go.Bar(name="No", x=x, y=[did_not_use_par_or_bracks, did_not_use_caps, did_not_use_emojis], marker_color='rgb(254, 178, 76)'))

    fig.update_layout(title_text='Titles')
    fig.update_layout(barmode='stack')
    fig.update_layout(yaxis_range=(0, 100))
    fig.update_layout(transition={
                'duration': 500,
                'easing': 'cubic-in-out'
        })

    return fig

@app.callback(
    Output('my-output', component_property='children'),
    Input('dataslider', 'value'))
def settext(slider_interval):
    return unique_dates[slider_interval[0]] + " - " + unique_dates[slider_interval[1]-1]

@app.callback(
    dash.dependencies.Output('stacked-area-chart', 'figure'),
    [dash.dependencies.Input('title_drop_down', 'value'),
     dash.dependencies.Input('categories_drop_down', 'value')])
def update_graph(regionInput, categoryInput):
    ctx = dash.callback_context

    selected_region = regionInput
    selected_categories = categoryInput


    if ctx.triggered[0]['prop_id'].split('.')[0] == "filter-time":
        if ctx.triggered[0]['prop_id'].split('.')[1] == "Day":
            selected_time_format = "date"

    """
    if ctx.triggered[0]['prop_id'].split('.')[0] == "display-categories":
        category_deleted = False
        for i,x in enumerate(selected_categories):
           if categoryInput in x:
               del selected_categories[i]
               category_deleted = True
        if not category_deleted:
            selected_categories.append(categoryInput)
           """


    fig = go.Figure() ### TODO add weeks and months
    color_count = 0
    for categories in selected_categories:

        videos_that_match = df.loc[(df.category_text==categories) & (df.region == selected_region[0])]
        videos_that_match_count = videos_that_match.trending_date.value_counts()
        videos_that_match_count = videos_that_match_count.sort_index()

        x = videos_that_match_count.index
        y = videos_that_match_count.values

        ## example on filter on time
        ##x=df.loc[(df.category_text==categories) & (df.region == selected_region),selected_time_format],
        ##y=df.loc[(df.category_text==categories) & (df.region == selected_region),selected_metric],


        color_count += 1
        fig.add_trace(go.Scatter(
            x = x,
            y = y,
            mode='lines',
            name=categories,
            line=dict(width=0.5, color=available_colors[color_count-1]),
            stackgroup='one', # define stack group
            groupnorm='percent' # sets the normalization for the sum of the stackgroup
        ))


    fig.update_layout(
     title = " trending YouTube data",
     title_font_size = 20, legend_font_size = 10,
     showlegend=True,
     width = 800, height = 700,
     yaxis=dict(type='linear',ticksuffix='%'))

    fig.update_xaxes(
         title_text = 'Date',
         title_font=dict(size=15, family='Verdana', color='black'),
         tickfont=dict(family='Calibri', color='black', size=12))

    fig.update_yaxes(
         title_text = "Number of videos(%)", range = (0,100),
         title_font=dict(size=15, family='Verdana', color='black'),
         tickfont=dict(family='Calibri', color='black', size=12))


    return fig

if __name__ == '__main__' :
    app.run_server()


2.0.0
Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
   Use a production WSGI server instead.
 * Debug mode: off


 * Running on http://127.0.0.1:8050/ (Press CTRL+C to quit)
