In [1]:
import requests
import geopandas as gpd
import pandas as pd

In [2]:
from bs4 import BeautifulSoup 
from shapely.geometry import Point
from tqdm.notebook import tqdm

## Open GIS Data

In [3]:
file_path = 'data/cb_2018_us_state_500k/cb_2018_us_state_500k.shp'
states_df = gpd.read_file(file_path)

## Get Population Data

In [4]:
state_populations = pd.read_excel('data/NST-EST2023-POP.xlsx', sheet_name=None, engine='openpyxl')

In [5]:
state_populations_df = state_populations['NST-EST2023-POP'][['table with row headers in column A and column headers in rows 3 through 4. (leading dots indicate sub-parts)','Unnamed: 5']]
state_populations_df = state_populations_df.rename(columns={'table with row headers in column A and column headers in rows 3 through 4. (leading dots indicate sub-parts)': "NAME",'Unnamed: 5':'POPULATION'})
state_populations_df['NAME'] = state_populations_df['NAME'].str[1:]

In [6]:
states_with_population_df = states_df.merge(state_populations_df, on='NAME', how='left')
states_with_population_df = states_with_population_df[['STUSPS','NAME','POPULATION','geometry']]

## Get Winery Data

In [7]:
headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36'}
r = requests.get('https://worldpopulationreview.com/state-rankings/homeless-population-by-state', headers=headers) 

In [8]:
soup = BeautifulSoup(r.text, 'html.parser')

In [14]:
table = soup.find('tbody') 

In [15]:
table

<tbody class="simple-table-body relative z-10 text-sm"><tr class="table-row"><th class="px-3 py-1.5 text-left align-middle text-sm text-gray-900 md:px-4"><span><a class="text-wpr-link" href="/states/district-of-columbia" to="/states/district-of-columbia">District of Columbia</a></span></th><td class="px-3 py-1.5 text-sm text-gray-600 md:px-4"><span>65.8</span></td></tr><tr class="table-row"><th class="px-3 py-1.5 text-left align-middle text-sm text-gray-900 md:px-4"><span><a class="text-wpr-link" href="/states/california" to="/states/california">California</a></span></th><td class="px-3 py-1.5 text-sm text-gray-600 md:px-4"><span>43.7</span></td></tr><tr class="table-row"><th class="px-3 py-1.5 text-left align-middle text-sm text-gray-900 md:px-4"><span><a class="text-wpr-link" href="/states/vermont" to="/states/vermont">Vermont</a></span></th><td class="px-3 py-1.5 text-sm text-gray-600 md:px-4"><span>43.1</span></td></tr><tr class="table-row"><th class="px-3 py-1.5 text-left align-mi

In [19]:
almost_data = soup.find("div",class_ = "overflow-y-scroll h-[250px]")

In [61]:
homeless_dict = {}
for d in almost_data:
    print(d.text)
    for i, s in enumerate(d.text):
        try: 
            int(s)
        except Exception as e:
            continue
        state = d.text[:i]
        state = d.text[:i]
        homeless_int = int(d.text[i:].replace(",",""))
        homeless_dict[state] = homeless_int
        break;

California171,521
New York74,178
Florida25,959
Washington25,211
Texas24,432
Oregon17,959
Massachusetts15,507
Arizona13,553
Pennsylvania12,691
Georgia10,689
Ohio10,654
Tennessee10,567
Colorado10,397
North Carolina9,382
Illinois9,212
New Jersey8,752
Michigan8,206
Minnesota7,917
Nevada7,618
Louisiana7,373
Virginia6,529
Missouri5,992
Hawaii5,967
Indiana5,449
Maryland5,349
Wisconsin4,775
Maine4,411
District of Columbia4,410
Kentucky3,984
Oklahoma3,754
Alabama3,752
South Carolina3,608
Utah3,557
Connecticut2,930
Vermont2,780
New Mexico2,560
Arkansas2,459
Iowa2,419
Kansas2,397
Delaware2,369
Alaska2,320
Nebraska2,246
Idaho1,998
New Hampshire1,605
Montana1,585
Rhode Island1,577
South Dakota1,389
West Virginia1,375
Mississippi1,196
Wyoming648
North Dakota610


In [65]:
homeless_per_state_series = pd.Series(homeless_dict, name="homeless")
homeless_per_state_df = homeless_per_state_series.reset_index()
homeless_per_state_df = homeless_per_state_df.rename(columns={'index':'NAME'})

## Merge Data

In [66]:
homeless_per_state_df = states_with_population_df.merge(homeless_per_state_df, on='NAME', how='right')

In [67]:
homeless_per_state_df['per_100k'] = homeless_per_state_df['homeless']/(homeless_per_state_df['POPULATION']/100000)
homeless_per_state_df['per_500k'] = homeless_per_state_df['homeless']/(homeless_per_state_df['POPULATION']/500_000)
homeless_per_state_df['per_1m'] = homeless_per_state_df['homeless']/(homeless_per_state_df['POPULATION']/1_000_000)

In [69]:
homeless_per_state_df.to_file('data/homeless.gpkg')