In [1]:
# Import stuff
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import datetime as dt
import json
import seaborn as sns
import re
import statistics

%matplotlib inline 

# Some Notes regarding this data(census)

* I have included the data file layout in my data folder, so some of this is just repeated from there.
* The estimates are based on the 2010 Census and were created without incorporation or consideration of the 2020 Census results. 
* All geographic boundaries for these population estimates are as of January 1, 2020.
* An "A" in the 2010 Census field indicates a government that was formed or incorporated after the 2010 Census. 


In [2]:
og_df = pd.read_csv('../data/census/city_pop_est_2010_2020.csv')
city_pop = og_df

In [3]:
og_df.head()

Unnamed: 0,SUMLEV,STATE,COUNTY,PLACE,COUSUB,CONCIT,PRIMGEO_FLAG,FUNCSTAT,NAME,STNAME,...,POPESTIMATE2011,POPESTIMATE2012,POPESTIMATE2013,POPESTIMATE2014,POPESTIMATE2015,POPESTIMATE2016,POPESTIMATE2017,POPESTIMATE2018,POPESTIMATE2019,POPESTIMATE2020
0,40,1,0,0,0,0,0,A,Alabama,Alabama,...,4799642,4816632,4831586,4843737,4854803,4866824,4877989,4891628,4907965,4921532
1,162,1,0,124,0,0,0,A,Abbeville city,Alabama,...,2694,2645,2629,2610,2602,2587,2578,2565,2555,2553
2,162,1,0,460,0,0,0,A,Adamsville city,Alabama,...,4474,4453,4430,4399,4371,4335,4304,4285,4254,4211
3,162,1,0,484,0,0,0,A,Addison town,Alabama,...,750,745,744,742,734,734,728,725,723,717
4,162,1,0,676,0,0,0,A,Akron town,Alabama,...,347,347,344,338,338,335,332,332,328,327


In [4]:
# 'SUMLEV' code 40 will show the pop at a state level
state_pop = og_df[og_df['SUMLEV'] == 40]

In [5]:
state_pop.head()

Unnamed: 0,SUMLEV,STATE,COUNTY,PLACE,COUSUB,CONCIT,PRIMGEO_FLAG,FUNCSTAT,NAME,STNAME,...,POPESTIMATE2011,POPESTIMATE2012,POPESTIMATE2013,POPESTIMATE2014,POPESTIMATE2015,POPESTIMATE2016,POPESTIMATE2017,POPESTIMATE2018,POPESTIMATE2019,POPESTIMATE2020
0,40,1,0,0,0,0,0,A,Alabama,Alabama,...,4799642,4816632,4831586,4843737,4854803,4866824,4877989,4891628,4907965,4921532
1109,40,2,0,0,0,0,0,A,Alaska,Alaska,...,722349,730810,737626,737075,738430,742575,740983,736624,733603,731158
1459,40,4,0,0,0,0,0,A,Arizona,Arizona,...,6473416,6556344,6634690,6732873,6832810,6944767,7048088,7164228,7291843,7421401
1680,40,5,0,0,0,0,0,A,Arkansas,Arkansas,...,2941038,2952876,2960459,2968759,2979732,2991815,3003855,3012161,3020985,3030522
2853,40,6,0,0,0,0,0,A,California,California,...,37636311,37944551,38253768,38586706,38904296,39149186,39337785,39437463,39437610,39368078


# Time to explore some of this data
\
I want to start with Nashville's population and find cities with similar sizes. \
+- 10% of Nashville's pop should be good for cities within similar size

In [6]:
city_pop['SUMLEV'].value_counts()

157    23705
61     21062
162    19494
71     13837
50      3143
172      115
40        51
170        8
Name: SUMLEV, dtype: int64

In [7]:
'''
I will be using 'SUMLEV' 162 because that is the instance for That code specifies that entry is an 'incorporated place'

The census defines an 'incorporated place' as:
    "An incorporated place, under the Census Bureau's definition,[2] is a type of governmental unit incorporated under state law as a city, town (except the New England states, New York, and Wisconsin),[3] borough (except in Alaska and New York),
    [4] or village, and having legally prescribed limits, powers, and functions".
'''

city_pop = city_pop[city_pop['SUMLEV'] == 162]

In [8]:
# Dropping some unnecessary columns
city_pop = city_pop.drop(columns=['SUMLEV','STATE', 'COUNTY', 'PLACE', 'COUSUB', 'CONCIT', 'PRIMGEO_FLAG', 'FUNCSTAT'])

# Renaming some columns
city_pop = city_pop.rename(columns={"NAME": "CITY", "STNAME": "STATE"})

In [9]:
# Filtering by 2 criteria, trying to narrow down Nashville down to those within TN
# df loc - locate within this df the following: under the 'NAME' column, find all strings that contain 'Nashville'. Under the 'STNAME' column, limit it TN
nash_pop = city_pop.loc[(city_pop['CITY'].str.contains("Nashville")) & (city_pop['STATE']=='Tennessee')]

In [10]:
nash_pop

Unnamed: 0,CITY,STATE,CENSUS2010POP,ESTIMATESBASE2010,POPESTIMATE2010,POPESTIMATE2011,POPESTIMATE2012,POPESTIMATE2013,POPESTIMATE2014,POPESTIMATE2015,POPESTIMATE2016,POPESTIMATE2017,POPESTIMATE2018,POPESTIMATE2019,POPESTIMATE2020
70455,Nashville-Davidson metropolitan government (ba...,Tennessee,601222,603465,604644,612689,625824,636375,645741,656089,662344,664220,667772,669725,671295


In [12]:
city_pop

Unnamed: 0,CITY,STATE,CENSUS2010POP,ESTIMATESBASE2010,POPESTIMATE2010,POPESTIMATE2011,POPESTIMATE2012,POPESTIMATE2013,POPESTIMATE2014,POPESTIMATE2015,POPESTIMATE2016,POPESTIMATE2017,POPESTIMATE2018,POPESTIMATE2019,POPESTIMATE2020
1,Abbeville city,Alabama,2688,2705,2699,2694,2645,2629,2610,2602,2587,2578,2565,2555,2553
2,Adamsville city,Alabama,4522,4487,4481,4474,4453,4430,4399,4371,4335,4304,4285,4254,4211
3,Addison town,Alabama,758,754,751,750,745,744,742,734,734,728,725,723,717
4,Akron town,Alabama,356,356,355,347,347,344,338,338,335,332,332,328,327
5,Alabaster city,Alabama,30352,31095,31192,31361,31673,31969,32177,32767,33040,33291,33442,33522,33701
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
81264,Wamsutter town,Wyoming,451,451,450,452,463,488,509,502,494,485,478,471,467
81265,Wheatland town,Wyoming,3627,3629,3626,3623,3628,3625,3645,3647,3592,3556,3548,3494,3522
81266,Worland city,Wyoming,5487,5487,5488,5437,5419,5423,5331,5336,5273,5160,5077,5037,4994
81267,Wright town,Wyoming,1807,1807,1810,1813,1863,1860,1856,1886,1862,1760,1754,1751,1758
