***Adding coverage coordinates to 1997 Station Data***

In [1]:
import pandas as pd
import numpy as np
import json

data = pd.read_csv('Exports/Data/3.1997StationDatawAntennaInfo.csv')

with open('Exports/Data/4.coverage_coords.json', 'r') as f:
    results = json.load(f)

result_df = pd.DataFrame(results).T
result_df.index = result_df.index.astype(int)

In [2]:
print(f'min: {result_df.index.min()}')
print(f'max: {result_df.index.max()}')
missing_indices = set(range(result_df.index.min(), result_df.index.max() + 1)) - set(result_df.index)
if missing_indices:
    print(f"Missing indices: {sorted(missing_indices)}")
else:
    print("All indices are present.")

min: 0
max: 4909
All indices are present.


In [3]:
data = pd.merge(data, result_df, how='inner', left_index=True, right_index=True)

In [4]:
mismatched_data = data.query('vertices.notna() and (eng_latitude != lat or eng_longitude != lon)').reset_index(drop=True).copy()

matched_data = data.query('not (vertices.notna() and (eng_latitude != lat or eng_longitude != lon))').reset_index(drop=True).copy()

print(f"Number of mismatched rows: {len(mismatched_data)}")
print(f"Number of matched rows: {len(matched_data)}")

Number of mismatched rows: 302
Number of matched rows: 4608


In [5]:
mismatched_data = mismatched_data.sort_values(['eng_latitude', 'eng_longitude']).reset_index(drop=True)

ordered_api_data = mismatched_data[['lat', 'lon', 'vertices']].sort_values(['lat', 'lon']).reset_index(drop=True)

mismatched_data[['lat', 'lon', 'vertices']] = ordered_api_data

In [6]:
data = pd.concat([matched_data, mismatched_data], ignore_index=True)
assert data.query('vertices.notna() and (eng_latitude != lat or eng_longitude != lon)').empty

In [7]:
missing_data = data.query('eng_latitude != lat or eng_longitude != lon or vertices.isna()')
print(f'portion with missing data: {len(missing_data) / len(data):.2%}')
missing_data[['eng_latitude', 'eng_longitude', 'lat', 'lon', 'vertices']].head(10)

portion with missing data: 0.16%


Unnamed: 0,eng_latitude,eng_longitude,lat,lon,vertices
755,39.420833,-104.654167,,,
1364,29.576111,-95.51,,,
1585,41.61,-122.623889,,,
1746,39.469167,-98.093611,,,
2252,40.213333,-85.46,,,
2793,36.12,-78.38,,,
2935,31.671667,-84.058889,,,
2961,34.046667,-81.783611,,,


In [8]:
# # Re run API on these
# import requests
# import time

# def get_coverage_data(lat, lon, rcamsl, erp, nradial = 36, field = 60, curve = 0, unit = 'm', pop = 'false', area = 'false'):
#     results = {}
#     # Define the base URL
#     base_url = "https://geo.fcc.gov/api/contours/coverage.json"
    
#     # Define parameters
#     params = {
#         "serviceType": "fm",
#         "lat": lat,
#         "lon": lon,
#         "nradial": nradial,
#         "rcamsl": rcamsl,
#         "field": field,
#         "erp": erp,
#         "curve": curve,
#         "pop": pop,
#         "area": area,
#         "unit": unit
#     }
    
#     # Make the request
#     response = requests.get(base_url, params=params)
    
#     # Check for successful response
#     if response.status_code == 200:
#         # Pull JSON data
#         data = response.json()
        
#         # Pull antenna coord
#         antenna_lat = data['features'][0]['properties']['antenna_lat']
#         antenna_lon = data['features'][0]['properties']['antenna_lon']

#         # Store
#         results['lat'] = antenna_lat
#         results['lon'] = antenna_lon

#         # Pull vertices
#         results['vertices'] = data['features'][0]['geometry']['coordinates'][0][0]
#         return results
#     else:
#         print("Error:", response.status_code)
#         return None



# missing_results = {}

# for idx, row in missing_data.iterrows():
#     print(f"Processing {idx}")
#     lat = row['eng_latitude']
#     lon = row['eng_longitude']
#     rcamsl = row['eng_rcamsl']
#     erp = row['eng_erp']

#     result = get_coverage_data(lat = lat, lon = lon, rcamsl = rcamsl, erp = erp)
#     missing_results[idx] = result
    
#     time.sleep(15)

# # save results
# with open('Exports/Data/6.coverage_coords_missing.json', 'w') as f:
#     json.dump(missing_results, f)

The API is not working at the moment. I will come back and fill in the missing values later. For now, I will drop

In [9]:
# load missing_results
with open('Exports/Data/6.coverage_coords_missing.json', 'r') as f:
    missing_results = json.load(f)

missing_results_df = pd.DataFrame(missing_results).T

In [10]:
missing_results_df

Unnamed: 0,lat,lon,vertices
782,39.420833,-104.654167,"[[-104.6541666667, 39.907301373], [-104.542376..."
1391,29.576111,-95.51,"[[-95.51, 30.0093990656], [-95.4229759326, 30...."
1612,41.61,-122.623889,"[[-122.6238888889, 42.3069327092], [-122.45980..."
1773,39.469167,-98.093611,"[[-98.0936111111, 39.9011903159], [-97.9946007..."
2279,40.213333,-85.46,"[[-85.46, 40.4314153147], [-85.4105020533, 40...."
2820,36.12,-78.38,"[[-78.38, 36.3575650914], [-78.3267511782, 36...."
2962,31.671667,-84.058889,"[[-84.0588888889, 32.0054027916], [-83.9897435..."
2988,34.046667,-81.783611,"[[-81.7836111111, 34.2680274288], [-81.7363903..."


In [11]:
missing_results_df.index = missing_results_df.index.astype(int)

missing_results_df.head()

for idx, row in missing_results_df.iterrows():
    data.at[idx, 'lat'] = row['lat']
    data.at[idx, 'lon'] = row['lon']
    data.at[idx, 'vertices'] = row['vertices']

In [12]:
data = data.query('not (eng_latitude != lat or eng_longitude != lon or vertices.isna())').copy()

In [13]:
assert data.query('eng_latitude != lat or eng_longitude != lon or vertices.isna()').empty
data.drop(columns=[
    'eng_erp', 'eng_latitude', 'eng_longitude', 'eng_rcamsl'
    ], inplace=True)

***Unioning antenna coverage***

For rows with the same call sign, I will union coverage polygons. *(Recall that stat_letters uniquely identify each station in 1997. I assume these letters are the source of truth about 1997. I match eng_variables to these call signs).*

In [14]:
from shapely.geometry import Polygon

data.vertices = data.vertices.apply(lambda x: Polygon(x))

def union_fn(vertices_series):

    list_param = list(vertices_series)

    union_polygon = list_param[0]
    for poly in list_param[1:]:
        union_polygon = union_polygon.union(poly)

    return union_polygon


In [15]:
unioned_vertices = data.groupby('stat_letters')['vertices'].agg(union_fn)
data['geometry'] = data['stat_letters'].map(unioned_vertices)
data.drop(columns=['vertices'], inplace=True)

In [16]:
# Convert to geopandas
import geopandas as gpd

data.reset_index(drop=True, inplace=True)
data = gpd.GeoDataFrame(data, geometry='geometry')
data = data.set_crs("EPSG:4326")  # CRS for geographic coordinates (lat/lon)

***Investigation of rating and genres in data***

In [17]:
print(f'Share of stations w/ rating data: {100*(data.stat_rating.notna().sum() / len(data)): .2f}%')

Share of stations w/ rating data:  55.17%


In [18]:
genres_per_format = list(data[[col for col in data.columns if 'format' in col]].apply(lambda x: set(x.dropna())))
all_genres = set.union(*genres_per_format)

genre_count = {}
for genre in all_genres:
    genre_count[genre] = (data[[col for col in data.columns if 'format' in col]] == genre).any(axis=1).sum()

pd.Series(genre_count).head()

Jazz                   59
Top-40                310
Middle-of-the-Road     29
NOA                    25
Big Band                1
dtype: int64

***Creating HH exposure variables***

In [19]:
import itertools

format_columns = data[[col for col in data.columns if 'format' in col]]

# Define the formats
formats = ['Hip Hop', 'Black', 'Urban Contemporary']

# Generate all possible combinations of formats
combinations = []
for r in range(1, len(formats) + 1):
    combinations.extend(itertools.combinations(formats, r))

# Create new variables for each combination
for combo in combinations:
    combo_name = '_'.join(combo)
    data[f'HH_conc_{combo_name}'] = (
        format_columns.map(lambda x: x in combo).sum(axis=1) / format_columns.count(axis=1)
    )

hip_hop_conc_cols = [col for col in data.columns if 'HH_conc' in col]

data = data[['stat_letters', 'lat', 'lon', 'geometry', 'stat_rating'] + hip_hop_conc_cols]
assert data[hip_hop_conc_cols].isna().sum().sum() == 0
data.head()

Unnamed: 0,stat_letters,lat,lon,geometry,stat_rating,HH_conc_Hip Hop,HH_conc_Black,HH_conc_Urban Contemporary,HH_conc_Hip Hop_Black,HH_conc_Hip Hop_Urban Contemporary,HH_conc_Black_Urban Contemporary,HH_conc_Hip Hop_Black_Urban Contemporary
0,KXGE,42.541111,-90.612778,"POLYGON ((-90.61278 42.74231, -90.5664 42.7343...",5.15,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,WCKZ,41.333611,-85.052222,"POLYGON ((-85.05222 41.52934, -85.00677 41.526...",0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,KBMB,38.566389,-121.479444,"POLYGON ((-121.47944 38.77447, -121.43394 38.7...",3.775,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,KAJM,34.43,-111.504444,"POLYGON ((-111.50444 34.9885, -111.40084 34.91...",0.466667,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,KJCD,40.096389,-104.901111,"POLYGON ((-104.90111 40.76437, -104.74927 40.7...",2.075,0.0,0.0,0.0,0.0,0.0,0.0,0.0


***Adding Date of Initial Exposure***

In [20]:
filepath = 'Exports/Data/5.InitialExpDates.csv'
initial_exp_data = pd.read_csv(filepath, index_col=0)
initial_exp_data.rename(columns=lambda x: f'initial_exp_{x}', inplace=True)

merged_data = data.merge(initial_exp_data, how='left', left_on='stat_letters', right_index=True, validate='m:1', indicator=True)
assert merged_data['_merge'].unique() == 'both'
merged_data.drop(columns='_merge', inplace=True)

In [21]:
merged_data.head()

Unnamed: 0,stat_letters,lat,lon,geometry,stat_rating,HH_conc_Hip Hop,HH_conc_Black,HH_conc_Urban Contemporary,HH_conc_Hip Hop_Black,HH_conc_Hip Hop_Urban Contemporary,HH_conc_Black_Urban Contemporary,HH_conc_Hip Hop_Black_Urban Contemporary,initial_exp_Hip Hop,initial_exp_Black,initial_exp_Urban Contemporary
0,KXGE,42.541111,-90.612778,"POLYGON ((-90.61278 42.74231, -90.5664 42.7343...",5.15,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,
1,WCKZ,41.333611,-85.052222,"POLYGON ((-85.05222 41.52934, -85.00677 41.526...",0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,1999.0
2,KBMB,38.566389,-121.479444,"POLYGON ((-121.47944 38.77447, -121.43394 38.7...",3.775,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,
3,KAJM,34.43,-111.504444,"POLYGON ((-111.50444 34.9885, -111.40084 34.91...",0.466667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,
4,KJCD,40.096389,-104.901111,"POLYGON ((-104.90111 40.76437, -104.74927 40.7...",2.075,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,


In [22]:
merged_data.to_file('Exports/Data/6.StationCoverageExposureData.geojson', driver='GeoJSON')