# Module 5 Pyramid Practice in Python with `plotnine`

In this notebook, you'll see the `plotnine` equivalents of the pyramid practice. 

In [None]:
from plotnine import *
from plotnine.data import diamonds
import pandas as pd
import numpy as np

In [None]:
ggplot(data=diamonds) + geom_bar(aes(x="cut", fill="cut"))

In [None]:
ggplot(diamonds) + geom_bar(aes(x="cut", fill="clarity"))

In [None]:
ggplot(diamonds) + geom_bar(aes(x="cut", fill="clarity"), position="identity")

In [None]:
ggplot(diamonds) + geom_bar(aes(x="cut", fill="clarity"), position="stack")

In [None]:
ggplot(diamonds) + geom_bar(aes(x="cut", fill="clarity"), position="fill")

In [None]:
ggplot(diamonds) + geom_bar(aes(x="cut", fill="clarity"), position="dodge")

In [None]:
ggplot(diamonds) + geom_bar(aes(x="cut", fill="clarity"), position="dodge") + coord_flip()

In [None]:
ggplot(diamonds) + geom_bar(aes(x="cut", fill="cut"), position="dodge")

**Unfortunately, `coord_polar` is not implemented in `plotnine`, so we can't draw a pie chart like in ggplot2 in R, but we can use matplotlib or plotly to create pie charts.** 


In [None]:
# Here is a simple example:

import matplotlib.pyplot as plt
 
hours = [4, 8, 12]
activities = ['Sleep', 'Code', 'Debug']
colors = ['r', 'b', 'y']
 
plt.pie(hours, labels=activities, colors=colors, startangle=90, autopct='%.1f%%')
plt.show()

In [None]:
# And in plotly:

from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import plotly.graph_objs as go

init_notebook_mode(connected=True)

colors = ['#FF0000', '#0000FF', '#FFFF00']

trace = go.Pie(labels=activities, values=hours, marker=dict(colors=colors))

data = [trace]
iplot(data)

**Now that we know how to manipulate position and coordinates, let's create a population pyramid.**


In [None]:
census = pd.read_csv("/dsa/data/all_datasets/census/census2010pyr.csv")
census.head()

In [None]:
ggplot(census, aes(x="age", y="pop")) + geom_bar(stat="identity")

In [None]:
ggplot(census, aes(x="age", y="pop", fill="sex")) + geom_bar(stat="identity")

In [None]:
ggplot(census, aes(x="age", y="pop", fill="sex")) + geom_bar(data = census[census["sex"]=="Female"], stat="identity")

In [None]:
p = ggplot(census, aes(x="age", y="pop", fill="sex")) + geom_bar(data = census[census["sex"]=="Female"], stat="identity")
p = p+ geom_bar(data = census[census["sex"]=="Male"], stat="identity")
p

In [None]:
p = ggplot(census, aes(x="age", y="pop", fill="sex")) + geom_bar(data = census[census["sex"]=="Female"], stat="identity")
p = p+ geom_bar(data = census[census["sex"]=="Male"], stat="identity",mapping= aes(y="-pop"))
p

In [None]:
p = ggplot(census, aes(x="age", y="pop", fill="sex")) + geom_bar(data = census[census["sex"]=="Female"], stat="identity")
p = p+ geom_bar(data = census[census["sex"]=="Male"], stat="identity",mapping= aes(y="-pop"))
p + coord_flip() + theme_minimal() + scale_y_continuous(labels=abs) + ggtitle("US Population(2010 Census)")

In [None]:
co = pd.read_csv("/dsa/data/all_datasets/census/countries.csv")

**We need to rename the "Life expectancy" column to a name that complies with the rules of variable naming so that 
 we can do arithmetic like this in `aes(y="-lifeExp")`**

In [None]:
co.rename(columns={'Life expectancy':'lifeExp'}, inplace=True)
co.head()

In [None]:
p = ggplot(data=co, mapping=aes(x="Continent", y="lifeExp", fill="Continent"))
p = p + geom_bar(co[co["lifeExp"]>65], stat="summary", fun_y=np.mean)
p = p + geom_bar(data=co[co["lifeExp"]<=65], stat="summary", fun_y=np.mean, mapping=aes(y="-lifeExp"))
p = p + geom_abline(slope=0) + scale_y_continuous(labels=abs, limits = [-90, 90], breaks = list(range(-90, 90, 10)))
p = p + coord_flip() + ylab("averages for 65 and younger vs 66 and older") + xlab("Continents")
p = p + theme_minimal() + ggtitle("Average Life Expectancy in Continents") + theme(legend_position="none")
p