# CoC to Census Tract Mapping
In this section, we will associate each Continuum of Care (CoC) with relevant census data, such as census level demographics, income, and cost of living data. To do so, we will use shape files of each CoC and US Census Bureau tracts. Note that tracts may change on an annual basis; therefore, these data must be generated for each year.

In [55]:
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import os
from pathlib import Path
import numpy as np
from shapely.geometry import Point
from urllib.request import urlretrieve
from urllib.parse import urlparse
import requests
import time
import zipfile
import glob


In [24]:
# Run this cell to clear variables
%reset

## Algorithm
CoCs can contain many US Census Bureau tracts. For each tract constituting a CoC, we note the fraction of the total tract encompassed in the CoC. Tracts of which less than 1% of the total tract area is within the CoC are omitted.

Algorithmically, this is done by performing intersections between shape files and then calculating the total area of overlap after converting to an area-preserving coordinate reference system. The data are stored in dictionaries for each state.

In [33]:
def create_cocs_tract_crosswalk(state = 'MA', tracts_dir = 'tl_2024_25_tract'):
    
    directory = Path.cwd()
    coc_path = directory / 'data' / 'coc-shapefiles' / state
    tracts_path = directory / 'data' / tracts_dir
    cocs = [coc for coc in os.listdir(coc_path) if coc.startswith(state + '_')]

    # We must represent each CoC as a combination of Census tracts
    cocs_tract_crosswalk = {}
    tracts = gpd.read_file(tracts_path / str(tracts_dir + '.shp'))

    for coc in cocs:
        coc_tract_crosswalk = {}
        coc_file = coc_path / coc / str(coc + '.shp')
        if coc_file.is_file():
            coc_gpd = gpd.read_file(coc_file)
            overlapping_tracts = gpd.sjoin(tracts, coc_gpd, how="inner", predicate="intersects")
            # FOR DEBUGGING: Plot the overlapping tracts
            # fig, axes = plt.subplots(ncols = 2)
            # coc_gpd.plot(
            #     ax=axes[0]
            # )
            # overlapping_tracts.plot(
            #     ax=axes[1]
            # )
            
            # For those tracts that overlap, we estimate how much of each tract is contained in the CoC
            # First, project the CoC and tracts into an area-preserving coordinate system
            coc_projected = coc_gpd.to_crs(epsg=6933)
            tracts_projected = overlapping_tracts.to_crs(epsg=6933)
            for tract in tracts_projected.itertuples():
                intersection_area = tract.geometry.intersection(coc_projected.geometry).area
                tract_area = tract.geometry.area
                overlap = (intersection_area.iat[0]/tract_area)
            
                # Only keep tracts for which at least 1% of the tract is in CoC
                if overlap > 0.01: coc_tract_crosswalk[tract.GEOID] = round(overlap, 4)
        
        cocs_tract_crosswalk[coc] = coc_tract_crosswalk
    
    return cocs_tract_crosswalk

ma_cocs_tract_crosswalk = create_cocs_tract_crosswalk(state='MA', tracts_dir='tl_2024_25_tract')

In [54]:
def download_file_with_progress(url, local_filename=None, download_dir="data/coc-shapefiles"):
    """
    Download a file with progress tracking
    """
    # Create download directory
    os.makedirs(download_dir, exist_ok=True)
    
    # Extract filename from URL if not provided
    if local_filename is None:
        parsed_url = urlparse(url)
        local_filename = os.path.basename(parsed_url.path)
    
    # Full path for the file
    file_path = os.path.join(download_dir, local_filename)
    
    print(f"Downloading: {url}")
    print(f"Saving to: {file_path}")
    print("-" * 50)
    
    try:
        # Start the download
        response = requests.get(url, stream=True, timeout=30)
        response.raise_for_status()  # Raise an exception for bad status codes
        
        # Get file size if available
        total_size = int(response.headers.get('content-length', 0))
        
        # Download the file
        downloaded_size = 0
        start_time = time.time()
        
        with open(file_path, 'wb') as f:
            for chunk in response.iter_content(chunk_size=8192):
                if chunk:
                    f.write(chunk)
                    downloaded_size += len(chunk)
                    
                    # Show progress
                    if total_size > 0:
                        percent = (downloaded_size / total_size) * 100
                        speed = downloaded_size / (time.time() - start_time) / 1024 / 1024  # MB/s
                        print(f"\rProgress: {percent:.1f}% ({downloaded_size/(1024*1024):.1f}/{total_size/(1024*1024):.1f} MB) - Speed: {speed:.1f} MB/s", end='')
                    else:
                        print(f"\rDownloaded: {downloaded_size/(1024*1024):.1f} MB", end='')
        
        # Final stats
        total_time = time.time() - start_time
        final_size = downloaded_size / (1024 * 1024)  # Convert to MB
        avg_speed = final_size / total_time if total_time > 0 else 0
        
        print(f"\n✅ Download complete!")
        print(f"File size: {final_size:.1f} MB")
        print(f"Time taken: {total_time:.1f} seconds")
        print(f"Average speed: {avg_speed:.1f} MB/s")
        print(f"Saved to: {os.path.abspath(file_path)}")
        
        return file_path
        
    except requests.exceptions.RequestException as e:
        print(f"\n❌ Download failed: {str(e)}")
        return None
    except Exception as e:
        print(f"\n❌ Error: {str(e)}")
        return None

def download_coc_state_file(state_code, year, download_dir="data/coc-shapefiles"):
    """
    Download CoC state shapefile using the pattern from your URL
    """
    url = f"https://files.hudexchange.info/reports/published/CoC_GIS_State_Shapefile_{state_code}_{year}.zip"
    filename = f"CoC_GIS_State_Shapefile_{state_code}.zip"
    
    return download_file_with_progress(url, filename, download_dir + "/" + str(year))

In [None]:
years = range(2007, 2024)
states = ["AL", "AK", "AZ", "AR", "CA", "CO", "CT", "DE", "FL", "GA", 
          "HI", "ID", "IL", "IN", "IA", "KS", "KY", "LA", "ME", "MD", 
          "MA", "MI", "MN", "MS", "MO", "MT", "NE", "NV", "NH", "NJ", 
          "NM", "NY", "NC", "ND", "OH", "OK", "OR", "PA", "RI", "SC", 
          "SD", "TN", "TX", "UT", "VT", "VA", "WA", "WV", "WI", "WY"]

# for year in range(2007, 2025):
#     for state in states:
#         download_coc_state_file(state, year)

Downloading: https://files.hudexchange.info/reports/published/CoC_GIS_State_Shapefile_AL_2024.zip
Saving to: data/coc-shapefiles/2024/CoC_GIS_State_Shapefile_AL.zip
--------------------------------------------------
Progress: 100.0% (1.1/1.1 MB) - Speed: 10.2 MB/s
✅ Download complete!
File size: 1.1 MB
Time taken: 0.1 seconds
Average speed: 10.2 MB/s
Saved to: /Users/nauman/Documents/mit/summer-2025/uplift/15-uplift/homelessness-prediction/data/coc-shapefiles/2024/CoC_GIS_State_Shapefile_AL.zip
Downloading: https://files.hudexchange.info/reports/published/CoC_GIS_State_Shapefile_AK_2024.zip
Saving to: data/coc-shapefiles/2024/CoC_GIS_State_Shapefile_AK.zip
--------------------------------------------------
Progress: 100.0% (2.8/2.8 MB) - Speed: 6.2 MB/s
✅ Download complete!
File size: 2.8 MB
Time taken: 0.5 seconds
Average speed: 6.2 MB/s
Saved to: /Users/nauman/Documents/mit/summer-2025/uplift/15-uplift/homelessness-prediction/data/coc-shapefiles/2024/CoC_GIS_State_Shapefile_AK.zip
Do

In [59]:
def unzip_all(folder_path):
    """
    Unzip all .zip files in a folder
    """
    zip_files = glob.glob(os.path.join(folder_path, "*.zip"))
    
    for zip_file in zip_files:
        # Create folder name without .zip extension
        extract_to = os.path.splitext(zip_file)[0]
        
        try:
            with zipfile.ZipFile(zip_file, 'r') as zip_ref:
                zip_ref.extractall(extract_to)
            print(f"✅ Extracted: {os.path.basename(zip_file)}")
        except Exception as e:
            print(f"❌ Failed: {os.path.basename(zip_file)} - {e}")

In [None]:
# for year in range(2007, 2025):
#     unzip_all(f'data/coc-shapefiles/{year}')

✅ Extracted: CoC_GIS_State_Shapefile_NJ.zip
✅ Extracted: CoC_GIS_State_Shapefile_AR.zip
✅ Extracted: CoC_GIS_State_Shapefile_WI.zip
✅ Extracted: CoC_GIS_State_Shapefile_LA.zip
✅ Extracted: CoC_GIS_State_Shapefile_ME.zip
✅ Extracted: CoC_GIS_State_Shapefile_UT.zip
✅ Extracted: CoC_GIS_State_Shapefile_CO.zip
✅ Extracted: CoC_GIS_State_Shapefile_MD.zip
✅ Extracted: CoC_GIS_State_Shapefile_NH.zip
✅ Extracted: CoC_GIS_State_Shapefile_MS.zip
✅ Extracted: CoC_GIS_State_Shapefile_OH.zip
✅ Extracted: CoC_GIS_State_Shapefile_WY.zip
✅ Extracted: CoC_GIS_State_Shapefile_NM.zip
✅ Extracted: CoC_GIS_State_Shapefile_MA.zip
✅ Extracted: CoC_GIS_State_Shapefile_MT.zip
✅ Extracted: CoC_GIS_State_Shapefile_OK.zip
✅ Extracted: CoC_GIS_State_Shapefile_NY.zip
✅ Extracted: CoC_GIS_State_Shapefile_HI.zip
✅ Extracted: CoC_GIS_State_Shapefile_SC.zip
✅ Extracted: CoC_GIS_State_Shapefile_IL.zip
✅ Extracted: CoC_GIS_State_Shapefile_KS.zip
✅ Extracted: CoC_GIS_State_Shapefile_IN.zip
✅ Extracted: CoC_GIS_State_Shape