First, I create a crosswalk from radio station markets to U.S. counties.

In [1]:
import pandas as pd

# Load HipHopStations/station_genre.dta
file_path = 'Imports/Station Data/Genre_Data/station_genre.dta'
hip_hop_df = pd.read_stata(file_path)

# Load Imports/Crosswalks/county_fips_master.csv
file_path = 'Imports/Crosswalks/county_fips_master.csv'
county_fips_df = pd.read_csv(file_path)

# Load Imports/Crosswalks/countytomarket.xlsx
file_path = 'Imports/Crosswalks/countytomarket.xlsx'
county_market_df = pd.read_excel(file_path)

# Rename "market" to "Market"
county_market_df.rename(columns={"market": "Market"}, inplace=True)

# Fill in missing values in county_market_df["Market"]
county_market_df["Market"] = county_market_df["Market"].ffill()

# Remove rows where county_market_df["Market"] = "Puerto Rico"
county_market_df = county_market_df[county_market_df["Market"] != "Puerto Rico"]

# Remove county_fips_df["county"]
county_fips_df.drop(columns=["county"], inplace=True)

# Keep first five columns of county_fips_df
county_fips_df = county_fips_df.iloc[:, :5]

# Convert county_fips_df["FIPS"] to integer
county_fips_df["FIPS"] = county_fips_df["FIPS"].astype(int)

# Convert county_fips_df["county_name"] and county_market_df["county"] to title case
county_market_df["county"] = county_market_df["county"].str.title()
county_fips_df["county_name"] = county_fips_df["county_name"].str.title()

# Rename "District of Columbia" to "Washington" in county_fips_df["county_name"]
county_fips_df["county_name"] = county_fips_df["county_name"].str.replace("District Of Columbia", "Washington")

# Remove spaces at the end of strings in county_market_df["county"]
county_market_df["county"] = county_market_df["county"].str.rstrip()

# Remove "County" and "City" from every entry in county_fips_df["county_name"]
county_fips_df["county_name"] = county_fips_df["county_name"].str.replace(" County", "")
county_fips_df["county_name"] = county_fips_df["county_name"].str.replace(" Parish", "")
county_fips_df["county_name"] = county_fips_df["county_name"].str.replace(" Municipality", "")


# Add county_fips_df to county_market_df along county_market_df["county","homestate"] = county_fips_df["county_name", "state_name"]
mcf_df = county_market_df.merge(county_fips_df, left_on=["county", "homestate"], right_on=["county_name", "state_abbr"], how="left")

# Remove "county_name", "state_abbr", "long_name" and "state_name" from mcf_df
mcf_df.drop(columns=["county_name", "state_abbr", "long_name", "state_name"], inplace=True)

# Assert that there are no missing values in mcf_df
assert mcf_df.isnull().sum().sum() == 0






Now, I add ORI codes to the cross walk.

In [2]:
import pandas as pd

# Load Imports/Crosswalks/OriCode/OriCode.tsv
ori_code_file_path = 'Imports/Crosswalks/OriCode/OriCode.tsv'
ori_code_full_df = pd.read_csv(ori_code_file_path, sep='\t')


# Only keep rows where REPORT_FLAG = 1
# ori_code_full_df = ori_code_full_df[ori_code_full_df["REPORT_FLAG"] == 1]

# Keep only the columns "ORI9", "ORI7", "NAME", "FIPS"
ori_code_df = ori_code_full_df[['ORI9', 'ORI7', 'NAME', 'FIPS']]

# Identify entries in mcf_df["FIPS"] that are not in ori_code_df["FIPS"]
missing_fips = mcf_df[~mcf_df["FIPS"].isin(ori_code_df["FIPS"])]

# Assert missing_fips is empty
assert missing_fips.empty, "There are FIPS values in mcf_df that are not in ori_code_df."

# Merge ori_code_df with mcf_df on "FIPS"
df = mcf_df.merge(ori_code_df, on="FIPS", how="left")

# Assert there are rows where df["ORI9"] is missing
assert df["ORI9"].isnull().sum() == 0, "There are missing values in df['ORI9']."

# Save df to Exports/Crosswalks/ORI_FIPS_Market_crosswalk.csv
df.to_csv('Exports/Crosswalks/1.ORI_FIPS_Market_crosswalk.csv', index=False)
