In [None]:
from bokeh.io import output_notebook, show, save
from bokeh.models import ColumnDataSource, NumeralTickFormatter, Legend
from bokeh.plotting import figure


import pandas as pd

In [None]:
output_notebook()

# Dane

In [None]:
artists_word_count = pd.read_csv('data/artists_word_count_agg.csv')
dt_artists_agg_data = pd.read_csv('data/dt_artists_agg_data.csv')

dt_album_agg_data = pd.read_csv('data/dt_album_agg_data.csv')

artists_metadata = pd.read_csv('data/artists_metadata.csv')
artists_metadata = artists_metadata.set_index('artist')

albums_metadata = pd.read_csv('data/albums_metadata.csv')
albums_metadata = albums_metadata.set_index(['artist', 'album_id'])

In [None]:
dt_album_agg_data_aug = dt_album_agg_data.join(albums_metadata, on=['artist', 'album_id'])

In [None]:
dt_album_agg_data_aug.shape

In [None]:
dt_album_agg_data_aug.head()

In [None]:
dt_artists_agg_data_aug = dt_artists_agg_data.join(artists_metadata, on = 'artist')

# dt_artists_agg_data_aug['images.url'].isna().sum()

# dt_artists_agg_data_aug.head()

# Wulgaryzmy vs liczba słów na sekundę

In [None]:
dt_album_2dplot = dt_album_agg_data_aug.query('N_songs_wL >= 5').rename(columns={'album_images.url': 'url'})
dt_album_2dplot['Avg_vulg_ptc'] = pd.Series(["{0:.2f}%".format(val * 100) for val in dt_album_2dplot['Avg_vulg']], index = dt_album_2dplot.index)

In [None]:
dt_album_2dplot.head()

In [None]:
ALBUM_TOOLTIPS = """
    <div>
        <div>
            <img
                src="@url" height="200" width="200"
                style="float: left; margin: 0px 15px 15px 0px;"
                border="2"
            ></img>
        </div>
        <div>
            <span style="font-size: 17px; font-weight: bold;">@artist</span>
        </div>
        <div>
            <span style="font-size: 15px; font-weight: bold;">@album_name</span>
        </div>
        <div>
            <span style="font-size: 14px;">@album_label</span>
        </div>
        <div>
            <span style="font-size: 14px;">@album_release_date</span>
        </div>
        <div>
            <span style="font-size: 14px;">@N_words_per_s słów/sek.</span>
        </div>
        <div>
            <span style="font-size: 14px;">@Avg_vulg_ptc wulg</span>
        </div>
        <div>
            <span style="font-size: 14px;">Na podst. tekstów z @N_songs_wL / @N_songs utworów</span>
        </div>
    </div>
"""

In [None]:
source = ColumnDataSource(dt_album_2dplot)

p = figure(plot_width=800, plot_height=600, tooltips=ALBUM_TOOLTIPS,
           title="Wulgaryzmy vs słowa/s, albumy z >5 utworów", margin = [20, 0, 20, 0])

p.circle('N_words_per_s', 'Avg_vulg', size=10, source=source)

p.yaxis[0].formatter = NumeralTickFormatter(format="0.0%")

p.title.text_font_size = "16pt"


show(p)

In [None]:
artist_album_cnt = dt_album_2dplot.groupby('artist').size()
# artist_list = list(artist_album_cnt[artist_album_cnt > 2].index)
artist_list = list(set(artist_album_cnt.index) - set(['Sentino']))
artist_list.sort()
# artist_list_chunks = [artist_list[i:i + 10] for i in range(0, len(artist_list), 10)]
artist_list_chunks = [artist_list[i:i + 10] for i in range(0, len(artist_list), 10)]
artist_list_chunks.reverse()

In [None]:
visible_default = ['Peja', 'Quebonafide', 'Taco Hemingway', 'Fisz Emade Tworzywo']

p = figure(plot_width=950, plot_height=1000, tooltips=ALBUM_TOOLTIPS,
           title="Wulgaryzmy vs słowa/s, albumy wybranych raperów")


# p.add_layout(Legend(), 'above')

for chunk_ix, artist_list_tmp in enumerate(artist_list_chunks):
    circles = {}
    row_y_loc = range(15*len(artist_list_chunks), 0, -15)[chunk_ix]
    for artist in artist_list_tmp:
        source = ColumnDataSource(dt_album_2dplot.query('artist=="'+ artist +'"'))
        circles[artist] = p.circle('N_words_per_s', 'Avg_vulg', size=10, source=source, 
                                   alpha=0.6, hover_fill_color="purple")
        circles[artist].visible = True if artist in visible_default else False
        
    legend = Legend(items=[(x, [circles[x]]) for x in artist_list_tmp], location=(0,row_y_loc), orientation='horizontal',
               label_text_font_size = "9pt", glyph_height = 10 #, spacing=15
                   )
    p.add_layout(legend, 'above')
    
   

p.legend.click_policy="hide"

p.yaxis[0].formatter = NumeralTickFormatter(format="0.0%")

p.title.text_font_size = "16pt"


show(p)