In [4]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as pp
%matplotlib inline

In [202]:
import urllib.request
import os.path
import zipfile

data_url = "https://www.ssa.gov/oact/babynames/state/namesbystate.zip"
local_filename = "babynamesbystate.zip"
if not os.path.exists(local_filename): # if the data exists don't download again
    with urllib.request.urlopen(data_url) as resp, open(local_filename, 'wb') as f:
        f.write(resp.read())

zf = zipfile.ZipFile(local_filename, 'r')

st_name = ['NH.TXT', 'MA.TXT',  'ME.TXT', 'VT.TXT']
field_names = ['State', 'Gender', 'Year', 'Name', 'Count']
babynames = pd.DataFrame()

for st in st_name:
    with zf.open(st) as fh:
        st_babynames = pd.read_csv(fh, header=None, names=field_names)
        babynames = pd.concat([babynames, st_babynames])

babynames.sample(10)

Unnamed: 0,State,Gender,Year,Name,Count
55589,MA,F,2006,Giovanna,18
101112,MA,M,1992,Edison,5
21786,VT,M,1965,Thomas,72
2684,VT,F,1931,Claire,18
45073,MA,F,1994,Lyndsey,19
112482,MA,M,2009,Branden,7
29759,ME,M,1921,Ellsworth,7
46028,MA,F,1995,Franchesca,11
14472,MA,F,1946,Corrine,13
9287,MA,F,1933,Miriam,36


In [203]:
babynames_index = babynames.set_index(['Name','State','Year']).sort_index()

In [204]:
babynames_index.index

MultiIndex([( 'Aaden', 'MA', 2008),
            ( 'Aaden', 'MA', 2009),
            ( 'Aaden', 'MA', 2012),
            ( 'Aaden', 'ME', 2009),
            ('Aadhya', 'MA', 2013),
            ('Aadhya', 'MA', 2014),
            ('Aadhya', 'MA', 2015),
            ('Aadhya', 'MA', 2016),
            ('Aadhya', 'MA', 2018),
            ('Aadhya', 'MA', 2019),
            ...
            (  'Zuri', 'MA', 2018),
            (  'Zuri', 'MA', 2019),
            (  'Zuri', 'MA', 2020),
            ( 'Zyair', 'MA', 2020),
            ('Zyaire', 'MA', 2016),
            ('Zyaire', 'MA', 2018),
            ('Zyaire', 'MA', 2019),
            ('Zyaire', 'MA', 2020),
            ( 'Zylah', 'MA', 2020),
            (  'Zyon', 'MA', 2013)],
           names=['Name', 'State', 'Year'], length=242303)

In [205]:
babynames_index

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Gender,Count
Name,State,Year,Unnamed: 3_level_1,Unnamed: 4_level_1
Aaden,MA,2008,M,10
Aaden,MA,2009,M,25
Aaden,MA,2012,M,5
Aaden,ME,2009,M,5
Aadhya,MA,2013,F,8
...,...,...,...,...
Zyaire,MA,2018,M,5
Zyaire,MA,2019,M,6
Zyaire,MA,2020,M,10
Zylah,MA,2020,F,5


In [206]:
babynames_index.loc[('Bruce','ME')]

Unnamed: 0_level_0,Gender,Count
Year,Unnamed: 1_level_1,Unnamed: 2_level_1
1917,M,5
1918,M,6
1923,M,5
1924,M,12
1925,M,8
...,...,...
1991,M,12
1992,M,8
1994,M,5
1995,M,5


In [207]:
babynames_index.loc[('Bruce',slice(None),1960)]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Gender,Count
Name,State,Year,Unnamed: 3_level_1,Unnamed: 4_level_1
Bruce,MA,1960,M,405
Bruce,ME,1960,M,127
Bruce,NH,1960,M,59
Bruce,VT,1960,M,43


In [208]:
babynames_index.loc[('Bruce','ME')]['Count'].sum()

3843

In [209]:
babynames_index.loc[('Bruce','ME'),:].max()

Gender      M
Count     156
dtype: object

In [210]:
babynames_index.swaplevel('Name', 'State').sort_index()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Gender,Count
State,Name,Year,Unnamed: 3_level_1,Unnamed: 4_level_1
MA,Aaden,2008,M,10
MA,Aaden,2009,M,25
MA,Aaden,2012,M,5
MA,Aadhya,2013,F,8
MA,Aadhya,2014,F,7
...,...,...,...,...
VT,Zoey,2014,F,10
VT,Zoey,2015,F,10
VT,Zoey,2016,F,12
VT,Zoey,2017,F,8


In [154]:
babynames_index.sum(level="Name")

Unnamed: 0_level_0,Count
Name,Unnamed: 1_level_1
Aaden,45
Aadhya,50
Aadi,5
Aadya,29
Aaliyah,1825
...,...
Zuri,48
Zyair,6
Zyaire,28
Zylah,5


In [211]:
babynames_index = babynames.set_index(['Name','State','Year','Gender']).sort_index()
babynames_index

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Count
Name,State,Year,Gender,Unnamed: 4_level_1
Aaden,MA,2008,M,10
Aaden,MA,2009,M,25
Aaden,MA,2012,M,5
Aaden,ME,2009,M,5
Aadhya,MA,2013,F,8
...,...,...,...,...
Zyaire,MA,2018,M,5
Zyaire,MA,2019,M,6
Zyaire,MA,2020,M,10
Zylah,MA,2020,F,5


In [212]:
babynames_index = babynames_index.unstack()
babynames_index

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Count,Count
Unnamed: 0_level_1,Unnamed: 1_level_1,Gender,F,M
Name,State,Year,Unnamed: 3_level_2,Unnamed: 4_level_2
Aaden,MA,2008,,10.0
Aaden,MA,2009,,25.0
Aaden,MA,2012,,5.0
Aaden,ME,2009,,5.0
Aadhya,MA,2013,8.0,
...,...,...,...,...
Zyaire,MA,2018,,5.0
Zyaire,MA,2019,,6.0
Zyaire,MA,2020,,10.0
Zylah,MA,2020,5.0,


In [213]:
babynames_index = babynames_index.unstack()
babynames_index

Unnamed: 0_level_0,Unnamed: 1_level_0,Count,Count,Count,Count,Count,Count,Count,Count,Count,Count,Count,Count,Count,Count,Count,Count,Count,Count,Count,Count,Count
Unnamed: 0_level_1,Gender,F,F,F,F,F,F,F,F,F,F,...,M,M,M,M,M,M,M,M,M,M
Unnamed: 0_level_2,Year,1910,1911,1912,1913,1914,1915,1916,1917,1918,1919,...,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020
Name,State,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3,Unnamed: 19_level_3,Unnamed: 20_level_3,Unnamed: 21_level_3,Unnamed: 22_level_3
Aaden,MA,,,,,,,,,,,...,,5.0,,,,,,,,
Aaden,ME,,,,,,,,,,,...,,,,,,,,,,
Aadhya,MA,,,,,,,,,,,...,,,,,,,,,,
Aadi,MA,,,,,,,,,,,...,,,,,,,,,,
Aadya,MA,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Zuri,MA,,,,,,,,,,,...,,,,,,,,,,
Zyair,MA,,,,,,,,,,,...,,,,,,,,,,6.0
Zyaire,MA,,,,,,,,,,,...,,,,,,7.0,,5.0,6.0,10.0
Zylah,MA,,,,,,,,,,,...,,,,,,,,,,


In [214]:
babynames_index.loc[('Bruce')]

Unnamed: 0_level_0,Count,Count,Count,Count,Count,Count,Count,Count,Count,Count,Count,Count,Count,Count,Count,Count,Count,Count,Count,Count,Count
Gender,F,F,F,F,F,F,F,F,F,F,...,M,M,M,M,M,M,M,M,M,M
Year,1910,1911,1912,1913,1914,1915,1916,1917,1918,1919,...,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020
State,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3,Unnamed: 19_level_3,Unnamed: 20_level_3,Unnamed: 21_level_3
MA,,,,,,,,,,,...,12.0,6.0,9.0,8.0,5.0,6.0,7.0,12.0,9.0,9.0
ME,,,,,,,,,,,...,,,,,,,,,,
NH,,,,,,,,,,,...,,,,,,,,,,
VT,,,,,,,,,,,...,,,,,,,,,,


In [218]:
babynames_index = babynames.set_index(['State','Name','Year'],drop=False).sort_index()
babynames_index

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,State,Gender,Year,Name,Count
State,Name,Year,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
MA,Aaden,2008,MA,M,2008,Aaden,10
MA,Aaden,2009,MA,M,2009,Aaden,25
MA,Aaden,2012,MA,M,2012,Aaden,5
MA,Aadhya,2013,MA,F,2013,Aadhya,8
MA,Aadhya,2014,MA,F,2014,Aadhya,7
...,...,...,...,...,...,...,...
VT,Zoey,2014,VT,F,2014,Zoey,10
VT,Zoey,2015,VT,F,2015,Zoey,10
VT,Zoey,2016,VT,F,2016,Zoey,12
VT,Zoey,2017,VT,F,2017,Zoey,8


In [219]:
bruce_bn = babynames_index[babynames_index["Name"]=="Bruce"]
bruce_bn

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,State,Gender,Year,Name,Count
State,Name,Year,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
MA,Bruce,1911,MA,M,1911,Bruce,5
MA,Bruce,1912,MA,M,1912,Bruce,6
MA,Bruce,1913,MA,M,1913,Bruce,5
MA,Bruce,1914,MA,M,1914,Bruce,6
MA,Bruce,1915,MA,M,1915,Bruce,5
...,...,...,...,...,...,...,...
VT,Bruce,1984,VT,M,1984,Bruce,5
VT,Bruce,1985,VT,M,1985,Bruce,5
VT,Bruce,1986,VT,M,1986,Bruce,5
VT,Bruce,1988,VT,M,1988,Bruce,6


In [220]:
babynames_index = babynames_index.unstack()
babynames_index

ValueError: Index contains duplicate entries, cannot reshape

In [225]:
babynames_index = babynames.set_index(['State','Name','Year', 'Gender'],drop=False).sort_index()
babynames_index

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,State,Gender,Year,Name,Count
State,Name,Year,Gender,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
MA,Aaden,2008,M,MA,M,2008,Aaden,10
MA,Aaden,2009,M,MA,M,2009,Aaden,25
MA,Aaden,2012,M,MA,M,2012,Aaden,5
MA,Aadhya,2013,F,MA,F,2013,Aadhya,8
MA,Aadhya,2014,F,MA,F,2014,Aadhya,7
...,...,...,...,...,...,...,...,...
VT,Zoey,2014,F,VT,F,2014,Zoey,10
VT,Zoey,2015,F,VT,F,2015,Zoey,10
VT,Zoey,2016,F,VT,F,2016,Zoey,12
VT,Zoey,2017,F,VT,F,2017,Zoey,8


In [226]:
babynames_index = babynames_index.unstack()
babynames_index

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,State,State,Gender,Gender,Year,Year,Name,Name,Count,Count
Unnamed: 0_level_1,Unnamed: 1_level_1,Gender,F,M,F,M,F,M,F,M,F,M
State,Name,Year,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
MA,Aaden,2008,,MA,,M,,2008.0,,Aaden,,10.0
MA,Aaden,2009,,MA,,M,,2009.0,,Aaden,,25.0
MA,Aaden,2012,,MA,,M,,2012.0,,Aaden,,5.0
MA,Aadhya,2013,MA,,F,,2013.0,,Aadhya,,8.0,
MA,Aadhya,2014,MA,,F,,2014.0,,Aadhya,,7.0,
...,...,...,...,...,...,...,...,...,...,...,...,...
VT,Zoey,2014,VT,,F,,2014.0,,Zoey,,10.0,
VT,Zoey,2015,VT,,F,,2015.0,,Zoey,,10.0,
VT,Zoey,2016,VT,,F,,2016.0,,Zoey,,12.0,
VT,Zoey,2017,VT,,F,,2017.0,,Zoey,,8.0,


In [227]:
babynames_index = babynames_index.unstack()
babynames_index

Unnamed: 0_level_0,Unnamed: 1_level_0,State,State,State,State,State,State,State,State,State,State,...,Count,Count,Count,Count,Count,Count,Count,Count,Count,Count
Unnamed: 0_level_1,Gender,F,F,F,F,F,F,F,F,F,F,...,M,M,M,M,M,M,M,M,M,M
Unnamed: 0_level_2,Year,1910,1911,1912,1913,1914,1915,1916,1917,1918,1919,...,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020
State,Name,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3,Unnamed: 19_level_3,Unnamed: 20_level_3,Unnamed: 21_level_3,Unnamed: 22_level_3
MA,Aaden,,,,,,,,,,,...,,5.0,,,,,,,,
MA,Aadhya,,,,,,,,,,,...,,,,,,,,,,
MA,Aadi,,,,,,,,,,,...,,,,,,,,,,
MA,Aadya,,,,,,,,,,,...,,,,,,,,,,
MA,Aaliyah,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
VT,Zander,,,,,,,,,,,...,5.0,,,,5.0,,,,,
VT,Zane,,,,,,,,,,,...,,,,,,,,5.0,5.0,
VT,Zebulon,,,,,,,,,,,...,,,,,,,,,,
VT,Zoe,,,,,,,,,,,...,,,,,,,,,,


In [200]:
babynames_index = babynames_index.drop(["State", "Name", "Year", "Gender"], axis=1)
babynames_index

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Count
State,Name,Year,Gender,Unnamed: 4_level_1
MA,Aaden,2008,M,10
MA,Aaden,2009,M,25
MA,Aaden,2012,M,5
MA,Aadhya,2013,F,8
MA,Aadhya,2014,F,7
...,...,...,...,...
VT,Zoey,2014,F,10
VT,Zoey,2015,F,10
VT,Zoey,2016,F,12
VT,Zoey,2017,F,8


In [None]:
babynames_index = babynames_index.drop(["State", "Name", "Year", "Gender"], axis=1)
babynames_index

In [201]:
babynames_index.reset_index()

Unnamed: 0,State,Name,Year,Gender,Count
0,MA,Aaden,2008,M,10
1,MA,Aaden,2009,M,25
2,MA,Aaden,2012,M,5
3,MA,Aadhya,2013,F,8
4,MA,Aadhya,2014,F,7
...,...,...,...,...,...
242298,VT,Zoey,2014,F,10
242299,VT,Zoey,2015,F,10
242300,VT,Zoey,2016,F,12
242301,VT,Zoey,2017,F,8
