# Office Hours - W2 2

## Imports

In [1]:
import pandas as pd
import altair as alt

## Data sourcing

In [195]:
data = pd.DataFrame(
    columns=["category", "amount", "level"],
    data=[["red", 23, "lvl_1"], ["blue", 12, "lvl_3"], ["green", 19, "lvl_2"], ["red", 8, "lvl_1"], ["green", 6, "lvl_2"]]
)

data

Unnamed: 0,category,amount,level
0,red,23,lvl_1
1,blue,12,lvl_3
2,green,19,lvl_2
3,red,8,lvl_1
4,green,6,lvl_2


## Text on charts

In [None]:
base_chart = alt.Chart(data[:3]).encode(
    theta=alt.Theta("amount").stack(True),
    color=alt.Color("category", scale=alt.Scale(range=[ "#0000FF", "#00FF00", "#FF0000"])).legend(None)
)

In [None]:
pie_chart = base_chart.mark_arc(outerRadius=120)
labels = base_chart.mark_text(radius=140, size=20).encode(text="category")

In [197]:
pie_chart + labels

## Wide and long formats

In [110]:
data # in long format

Unnamed: 0,category,amount,level
0,red,23,lvl_1
1,blue,12,lvl_3
2,green,19,lvl_2
3,red,8,lvl_1
4,green,6,lvl_2


In [203]:
wide_data = data.pivot_table(index="category", columns=["level"])

wide_data

Unnamed: 0_level_0,amount,amount,amount
level,lvl_1,lvl_2,lvl_3
category,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
blue,,,12.0
green,,12.5,
red,15.5,,


In [204]:
wide_data = wide_data.droplevel(0, axis=1).rename_axis(None, axis=1).reset_index()

wide_data

Unnamed: 0,category,lvl_1,lvl_2,lvl_3
0,blue,,,12.0
1,green,,12.5,
2,red,15.5,,


In [205]:
wide_data.melt(id_vars="category")

Unnamed: 0,category,variable,value
0,blue,lvl_1,
1,green,lvl_1,
2,red,lvl_1,15.5
3,blue,lvl_2,
4,green,lvl_2,12.5
5,red,lvl_2,
6,blue,lvl_3,12.0
7,green,lvl_3,
8,red,lvl_3,


## Conditional display

In [137]:
max_val = int(data[:3]["amount"].max())

base_chart = alt.Chart(data[:3]).mark_bar().encode(
    x=alt.Theta("amount").stack(True),
    y=alt.Color("category"),
    color=alt.when(alt.datum.amount == max_val).then(alt.value("red")).otherwise(alt.value("lightgray"))
)

base_chart

In [201]:
coloured_data = data[:3]

coloured_data["is_max"] = coloured_data["amount"] == coloured_data["amount"].max()

alt.Chart(coloured_data).mark_bar().encode(
    x="amount",
    y=alt.Y("category").sort("-x"),
    color="is_max"
)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  coloured_data["is_max"] = coloured_data["amount"] == coloured_data["amount"].max()
