In [176]:
import altair as alt
import pandas as pd
import json

In [177]:
targets = ['target_race', 
               'target_religion', 
               'target_origin', 
               'target_gender', 
               'target_sexuality',
               'target_age']

In [178]:
def get_percentages(dataset):

    target_percentages = {target: None  for target in targets}
    
    for target_group in dataset:

        current_group = list(dataset[target_group])
        sum_of_data_points = sum(current_group)
        
        if sum_of_data_points != 0:
            
            percentages = [(cluster / sum_of_data_points) * 100 for cluster in current_group]
            target_percentages[target_group] = percentages

    return target_percentages

## single pie chart

In [179]:
dataset = pd.read_json('data_viz/clusters.json')
num_clusters = range(8)

In [180]:
target_percentages = get_percentages(dataset)

In [181]:
# for now we're just going to use the first target group
single = target_percentages['target_race']
theta = single
color = num_clusters
single = pd.DataFrame({"theta":theta, "color":color})

In [182]:
alt.Chart(single).mark_arc().encode(
    theta=alt.Theta(field = 'theta', type = 'quantitative', stack = True),
    color=alt.Color(field = 'color', type = 'nominal', title='cluster'),
    order=alt.Order(field="theta")
).properties(
        title = f'target_race',
        width = 150,
        height = 150
)

We have a simple pie chart. What if we want to see the percent values for each target group? <br>


In [183]:
# base chart we previously made
base = alt.Chart(single).mark_arc().encode(
    theta=alt.Theta(field = 'theta', type = 'quantitative', stack = True),
    color=alt.Color(field = 'color', type = 'nominal', title='cluster')
).properties(
        title = f'target_race',
        width = 250,
        height = 250
    )

pie = base.mark_arc(outerRadius=100).encode(order=alt.Order(field="theta"))

# mark text will allow us to display the percent values
text = base.mark_text(radius = 110).encode(

    # we just need 1 decimal placae
    text=alt.Text('theta:Q', format='.1f'),

    # show text when theta > 0
    color=alt.condition(
        alt.datum.theta > 0,
        alt.value("black"),
        alt.value(None)
    )
)
pie + text

Now what if we wanted to display a pie chart for every target group <br>
We will use repeat()

In [184]:
target_percentages = get_percentages(dataset)

In [185]:
charts = []

for target in target_percentages:

    single = target_percentages[target]
    theta = single
    color = num_clusters
    single = pd.DataFrame({"theta":theta, "color":color})

    # base chart we previously made
    base = alt.Chart(single).mark_arc().encode(
        theta=alt.Theta(field = 'theta', type = 'quantitative', stack = True),
        color=alt.Color(field = 'color', type = 'nominal', title='Cluster')
    )

    pie = base.mark_arc(outerRadius=100).encode(
        order = alt.Order(field="theta")
        ).properties(
        title = f'{target}',
        width = 100,
        height = 100
    )

    # mark text will allow us to display the percent values
    text = base.mark_text(radius = 110).encode(

        # we just need 1 decimal placae
        text=alt.Text('theta:Q', format='.1f'),

        # show text when theta > 0
        color=alt.condition(
            alt.datum.theta > 0,
            alt.value("black"),
            alt.value(None)
        )
    )
    pie.configure_legend(labelFontSize=18)
    
    charts.append(pie + text)

In [186]:
alt.vconcat(*[alt.hconcat(*charts[i:i+3]) for i in range(0, len(charts), 3)])