In [None]:
import pandas as pd
import plotly.express as px
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
pd.read_csv("IndiaTop10Cities.csv")

In [None]:
df = pd.read_csv("IndiaTop10Cities.csv",thousands=',')# data source https://worldpopulationreview.com/countries/india-population
df

In [None]:
df.info()

In [None]:
px.bar(df, x="City", y="Population", width=700, title="Current Population of the top 10 Cities.")

In [None]:
px.bar(df, x="City", y="Population", width=700, title="Current Population of the top 10 Cities.").update_layout(
    xaxis={"categoryorder":'category ascending'})

In [None]:
px.bar(df.sort_values(by=['City']), x="City", y="Population", width=700, title="Current Population of the top 10 Cities.")

In [None]:
px.bar(df, y="City", x="Population", width=700)

In [None]:
px.bar(df, y="City", x="Population", width=700).update_layout(yaxis={'categoryorder':'total ascending'})

In [None]:
px.bar(df, y="City", x="Population", width=700).update_layout(yaxis={'categoryorder':'category descending'})

In [None]:
sns.barplot(df,x="Population",y="City", color = "royalblue")

In [None]:
sns.barplot(df.sort_values(by=['City']),x="Population",y="City", color = "royalblue")

### Population Pyramid

In [None]:
df=pd.read_csv("India-2020.csv")# Source https://www.populationpyramid.net/india/2020/
df.head()

In [None]:
pd.melt(df.head(),id_vars=['Age'], value_vars=['M', 'F']) # Wide format to Long format conversion

In [None]:
df_melt = pd.melt(df,id_vars=['Age'], value_vars=['M', 'F'],var_name='Gender', value_name='population')
df_melt

In [None]:
px.bar(df_melt,x="population", y = "Age", color="Gender", barmode="relative") #One of 'group', 'overlay' or 'relative'

In [None]:
df["F"] =  -df["F"]
df.head()

In [None]:
df_melt = pd.melt(df,id_vars=['Age'], value_vars=['M', 'F'],var_name='Gender', value_name='population')

In [None]:
px.bar(df_melt,x="population", y = "Age", color="Gender", barmode="relative") 
#'relative' mode, bars are stacked above zero for positive values and below zero for negative values
# see https://plotly.com/python-api-reference/generated/plotly.express.bar

In [None]:
px.bar(df_melt,x="population", y = "Age", color="Gender", barmode="relative").update_layout(
    title = 'Population Pyramid of India-2021',
    height = 500,
    title_font_size = 22,
    bargap = 0.0,
    xaxis = dict(
        tickvals = [-60000000, -40000000, -20000000,
                     0, 20000000, 40000000, 60000000],                           
        ticktext = ['8M', '6M', '4M', '2M', '0', 
              '2M', '4M', '6M', '8M'],
        title = 'Population in Millions',
        title_font_size = 14
    )
)

### [World bank data](https://databank.worldbank.org/home.aspx) access.

In [None]:
#!pip install wbgapi
import wbgapi as wb
# See https://github.com/tgherzog/wbgapi for details

In [None]:
help(wb.series)

In [None]:
wb.series.info(q="women")

In [None]:
wb.data.DataFrame('SG.GEN.PARL.ZS', wb.region.members('AFR'))

In [None]:
wb.series.info(q="population")

In [None]:
df_percent_women = wb.data.DataFrame('SP.POP.TOTL.FE.ZS',wb.economy.coder("INDIA"))
df_percent_women.head()

In [None]:
df_population = wb.data.DataFrame('SP.POP.TOTL',wb.economy.coder("INDIA"))
df_population.head()

In [None]:
df=df_population.T
df.head()

In [None]:
df=df.rename(columns={"IND":"total"})
df.head()

In [None]:
df_w = df_percent_women.T
df_w.head()

In [None]:
df_w = df_w.rename(columns={"IND":"Female %"})
df_w.head()

In [None]:
df_concat = pd.concat([df, df_w], axis=1)

In [None]:
df_concat

In [None]:
df_concat

In [None]:
df_concat["Male"] = df_concat["total"]*(100 - df_concat["Female %"])/100

In [None]:
df_concat["Female"] = df_concat["total"]*df_concat["Female %"]/100
df_concat.head()

In [None]:
df_concat=df_concat.reset_index()

In [None]:
df_concat.head()

In [None]:
df_melt = pd.melt(df_concat,id_vars=['index','total'], value_vars=['Male', 'Female'],var_name='gender', value_name='population')
df_melt.head()

In [None]:
px.bar(df_melt, x="index", y = "population", color="gender")

In [None]:
px.bar(df_melt, x="index", y = "population", color="gender", barmode="group")

In [None]:
df_concat["Female"] = -df_concat["Female"]
df_concat

In [None]:
df_concat["index"] = pd.to_datetime(df_concat["index"],format="YR%Y")
df_concat.head()

In [None]:
df_negmelt = pd.melt(df_concat,id_vars=['index'], value_vars=['Male', 'Female'],var_name='gender', value_name='population')
df_negmelt.head()

In [None]:
px.bar(df_negmelt, x="index", y = "population", color="gender", barmode="relative").update_layout(title = 'Male Female Population India 1960-2021',
    height = 600,
    title_font_size = 22, 
    bargap = 0.0, 
    yaxis = dict(
        tickvals = [-700000000,-600000000, -400000000, -200000000,
                     0, 200000000, 400000000, 600000000, 700000000,],                           
        ticktext = ['700M', '600M', '400M', '200M', '0', 
              '200M', '400M', '600M', '700M'],
        title = 'Population in Millions',
        title_font_size = 14
    ),
    xaxis = dict(title="Year")
)

In [None]:
px.bar(df_negmelt, y="index", x = "population", color="gender", barmode="relative", orientation="h").update_layout(title = 'Population of Male-Female of India from 1960-2022',
    title_font_size = 22,
    #bargap = 0.0, bargroupgap = 0,
    xaxis = dict(
        tickvals = [-700000000,-600000000, -400000000, -200000000,
                     0, 200000000, 400000000, 600000000, 700000000,],                           
        ticktext = ['700M', '600M', '400M', '200M', '0', 
              '200M', '400M', '600M', '700M'],
        title = 'Population in Millions',
        title_font_size = 14
    ),
    yaxis = dict(title="year")                                                                                                         
)

In [None]:
px.bar(df_negmelt, y="index", x = "population", color="gender", barmode="relative", orientation="h").update_layout(
    title = 'Population of Male-Female of India from 1960-2022',
    title_font_size = 22,
    #bargap = 0.0, bargroupgap = 0,
    xaxis = dict(
        tickvals = [-700000000,-600000000, -400000000, -200000000,
                     0, 200000000, 400000000, 600000000, 700000000,],                           
        ticktext = ['700M', '600M', '400M', '200M', '0', 
              '200M', '400M', '600M', '700M'],
        title = 'Population in Millions',
        title_font_size = 14
    ),
    yaxis = dict(title="year",autorange="reversed")                                                                                                         
)

### Pie Chart of Religion Composition of India
Data Source: https://censusindia.gov.in

In [None]:
#!pip install xlrd
url = "https://censusindia.gov.in/nada/index.php/catalog/11361/download/14474/DDW00C-01%20MDDS.XLS"
df = pd.read_excel(url)
df.head()

In [None]:
#df=pd.read_excel(url,header = [2,3], index_col = [5,6])
df=pd.read_excel(url,header = [2,3], index_col = [5,6])
df.head(6)

In [None]:
df.columns.values

In [None]:
df.loc[("INDIA","Total")][("Total", "Persons")]

In [None]:
df.columns = ['_'.join(col) for col in df.columns.values]
df.columns

In [None]:
columns = df.columns[df.columns.str.contains("_Persons")]
columns

In [None]:
df.loc["INDIA"][columns]

In [None]:
df1=df.loc["INDIA"][columns]
df1

In [None]:
df1 = df1.drop(columns=['Total_Persons'])
df1

In [None]:
df2 = df1.T
df2.head()

In [None]:
df2 = df2.reset_index()
df2.head()

In [None]:
df2["index"]=df2["index"].str.removesuffix("_Persons")

In [None]:
df2

In [None]:
fig = px.pie(df2, values='Total', names='index', title='Religions composition of India')
fig.show()

In [None]:
others = df2.iloc[[4,5,6,7]].sum()
others

In [None]:
df2=df2.drop([4,5,6,7])
df2

In [None]:
others

In [None]:
others[0]

In [None]:
others[0]="Others"

In [None]:
df2=df2.append(others,ignore_index=True)

In [None]:
df2

In [None]:
px.pie(df2, values='Total', names='index', title='Religions composition of India', height=500, width = 500)

# 

In [None]:
px.pie(df2, values='Total', names='index', title='Religions composition of India', height=600, width = 600, hole=0.5).update_layout(
    legend={"x": 0.5,"y": 0.5, "xanchor" : "center"})

In [None]:
px.pie(df2, values='Total', names='index', title='Religions composition of India', height=600, width = 600, hole=0.5).update_layout(
    legend={"x": 0.5,"y": 0.5, "xanchor" : "center"}).update_traces(pull=0.1)

In [None]:
px.pie(df2, values='Total', names='index', title='Religions composition of India', height=600, width = 600, hole=0.6).update_layout(
    legend={"x": 0.5,"y": 0.5, "xanchor" : "center"}).update_traces(pull=[0,0.1])

In [None]:
import matplotlib.pyplot as plt

In [None]:
plt.pie(df2["Total"], counterclock = False, startangle = 90, autopct = '%1.0f%%', pctdistance=1.1,wedgeprops=dict(width=0.5))
plt.legend(df2["index"], 
           loc="center left",
          bbox_to_anchor=(1, 0, 0.5, 1))
plt.show()

In [None]:
df2

In [None]:
plt.pie(df2["Total"], counterclock = False, startangle = 90, autopct = '%1.0f%%', pctdistance=1.1,
        wedgeprops=dict(width=0.3), 
        explode = [0,0.1,0,0,0])
plt.legend(df2["index"], 
           loc="center left",
          bbox_to_anchor=(1, 0, 0.5, 1))
plt.show()