In [75]:
import pandas as pd
import numpy as np

### Comparing poultry in AK

Across the two datasets, there are 7 CAFOs registered in both, while 10/17 CAFOs in Iowa's dataset are unique and 299/306 CAFOs in Counterglow's dataset are unique.

In [220]:
counterglow = pd.read_csv("../data/Counterglow+Facility+List+Complete.csv")
iowa_ar = pd.read_csv("../data/Poultry CAFOs in Arkansas.csv")

In [102]:
counterglow["Name"] = counterglow["Name"].astype(str, copy=False).apply(lambda x : x.title())

In [103]:
iowa_ar["NAME"] = iowa_ar["NAME"].apply(lambda x : x.title())

In [106]:
cg_ar = counterglow[counterglow["State"]=="AR"]["Name"]

both = []
only_iowa = []

for i in list(iowa_ar["NAME"]):
    if i in list(cg_ar):
        both.append(i)
    else:
        only_iowa.append(i)

In [100]:
only_cg = []

for i in cg_ar:
    if i not in both:
        only_cg.append(i)

len(only_cg)

299

In [107]:
both, only_iowa, only_cg

(['Cal-Maine Foods',
  'Todd Avery Poultry',
  'Hindsville Research Farm',
  'Khoi Nguyen Poultry Pads',
  'Nguyen Poultry',
  'U Of A Poultry Farm',
  'Robert Squires Farm'],
 ['Benton County Foods/Feemster',
  'Pitcher Farms',
  'Tyson Foods-Rison Hatchery',
  'Keith Smith Co., Inc.',
  'Con Agra Foods-Refrig Foods',
  'Wesley Farms',
  'Pilgrims Pride-Nashville Feed',
  'Mcminn Breeder Houses',
  'Arkansas Egg Co.,Inc./Summers',
  'Tyson Foods-Lincoln Hatchery'],
 ['\xa0Central Ark Prca Rodeo\xa0',
  '\xa0Hills Auto Sales Pro Rodeo\xa0',
  '\xa0Magnolia Stampede\xa0',
  '\xa0Old Fort Days Rodeo\xa0',
  '\xa0Sharp County Fair Rodeo\xa0',
  '1St West.Bank/ Pam Beahm Farm',
  'Ackerman, Gary',
  'Alexander, Scott/S & A Farms',
  'Alexander, Steve',
  'Alexander, Steve & Kim',
  'Ar Tech Univ.Foun./Alpha Farm',
  'Arch Farms, Llc',
  'Arkansas Department Of Corrections',
  'Arkansas Egg Co. Llc - Appletown',
  'Arnold, Billy',
  'Aubrey Kinion - Tyson',
  'Ayers, Ronnie',
  'Bacon Creek

### Generalized comparison function

- Alabama had the most CAFOs in common between the two datasets, all other states had far fewer or none
- Arkansas, North Carolina, and Texas had the least data from the University of Iowa, otherwise the number of entries between the two datasets was usually balanced
- Unnamed/NaN/Unknown farms were excluded from both datasets before analysis

In [173]:
def compare_CAFOs(cg_path, iowa_path, state):
    counterglow = pd.read_csv(cg_path)
    iowa = pd.read_csv(iowa_path)
    counterglow["Name"] = counterglow["Name"].astype(str, copy=False).apply(lambda x : x.title())
    iowa["NAME"] = iowa["NAME"].apply(lambda x : x.title())

    cg_state = [i for i in counterglow[counterglow["State"]==state]["Name"] if i != "Unknown" and i != "Nan"]
    iowa_unique = list(iowa["NAME"].dropna())

    both, only_iowa, only_cg = [], [], []

    for i in iowa_unique:
        if i in cg_state:
            both.append(i)
        else:
            only_iowa.append(i)

    for i in cg_state:
        if i not in both:
            only_cg.append(i)

    print("Common to both: " + str(len(both)) + "\nOnly in Iowa dataset: " + str(len(only_iowa)) + "\nOnly in Counterglow: " + str(len(only_cg)))
    return (both, only_iowa, only_cg)

In [174]:
cg_path = "../data/Counterglow+Facility+List+Complete.csv"
iowa_path = "../data/Poultry CAFOs in Mississippi.csv"

compare_CAFOs(cg_path, iowa_path, "MS")

Common to both: 30
Only in Iowa dataset: 530
Only in Counterglow: 523


(['Judson Wilson, Poultry',
  'Brenda Perry, Poultry',
  'Jason Smith, Poultry Farm Two',
  'Tucker Estes, Poultry',
  'Charles Cumberland, Poultry',
  'Eliza Thomas, Poultry',
  'Nathan Hayes, Poultry',
  'Julian Franco, Poultry Farm 2',
  'Johnnie W Marshall, Poultry',
  'David Breazeale, Poultry',
  'Phil Rhinewalt, Poultry',
  'Clint And Jessica Akerman, Poultry',
  'Chris Carter, Poultry',
  'Jerry Watkins, Poultry',
  'Chris Sims, Poultry',
  'Tim And Sherry Conner, Poultry',
  'Steve Thornton, Poultry',
  'Rickey Johnson, Poultry',
  'Jeremy Bardon, Poultry',
  'Kathy Hill, Poultry',
  'Harry Mclain, Poultry',
  'Rod Yelverton, Poultry',
  'Charles David Williams, Poultry',
  'Joey Thrash, Poultry',
  'Ryan Hillhouse, Poultry Farm Number 2',
  'Josh Slay, Poultry',
  'Scott Mincey, Poultry',
  'Danny Ginn, Poultry',
  'Tristan Howell, Poultry',
  'Barry Sharp, Poultry'],
 ['Randy Flake, Poultry',
  'Sorgum Branch Poultry Farm',
  'Devon Sharp, Poultry',
  'Bobby Wilson, Poultry'

In [175]:
cg_path = "../data/Counterglow+Facility+List+Complete.csv"
iowa_path = "../data/Poultry CAFOs in Alabama.csv"

compare_CAFOs(cg_path, iowa_path, "AL")

Common to both: 576
Only in Iowa dataset: 456
Only in Counterglow: 437


(['Abercrombie Farms',
  'Beaty Poultry Farm',
  'C And C Farms',
  'Chaney Branch Farms',
  'Cox Farms',
  'Eric Sutton Poultry',
  'Four H Farm',
  'Helms Poultry Farms',
  'Mcrae Poultry Farms',
  'Noel Welch Farms',
  'Parsons Farm',
  'Pinnacle Farms',
  'Shane Bowman Farms',
  'Southern Pride Poultry Farm',
  'Todd Sconyers Farm',
  'Tyler Poultry Farms',
  'Woodham Farms',
  'Big Spring Creek Farm',
  'Curtis Hawkins Farm',
  'Dixieland Farms',
  'Double H Farm',
  'Faith Farm',
  'Fort Bragg Farm',
  'Graves Creek Farm',
  'Green Acres Farm',
  'Holmes Farms Inc.',
  'Maze Farm Inc.',
  'Miller Poultry, Llc',
  'Murphree Valley Farms',
  'Rock Springs Farm, Llc',
  'Sdh Farm',
  'Slap Happy Farm',
  'Smith Farms',
  'Whitley Poultry Farm',
  'Charles Kilpatrick',
  'Craig Grant',
  'Faron Frazier Farm',
  'Gafford Farms',
  'Harold Parmer',
  'James M Kilpatrick Farm',
  'Jerry Stinson',
  'Ken Burkett',
  'Lloyd Shell Farm',
  'Mitchell Farms',
  'Randy Smith',
  'Tommy Thomps

In [176]:
cg_path = "../data/Counterglow+Facility+List+Complete.csv"
iowa_path = "../data/Poultry CAFOs in Arkansas.csv"

compare_CAFOs(cg_path, iowa_path, "AR")

Common to both: 7
Only in Iowa dataset: 10
Only in Counterglow: 232


(['Cal-Maine Foods',
  'Todd Avery Poultry',
  'Hindsville Research Farm',
  'Khoi Nguyen Poultry Pads',
  'Nguyen Poultry',
  'U Of A Poultry Farm',
  'Robert Squires Farm'],
 ['Benton County Foods/Feemster',
  'Pitcher Farms',
  'Tyson Foods-Rison Hatchery',
  'Keith Smith Co., Inc.',
  'Con Agra Foods-Refrig Foods',
  'Wesley Farms',
  'Pilgrims Pride-Nashville Feed',
  'Mcminn Breeder Houses',
  'Arkansas Egg Co.,Inc./Summers',
  'Tyson Foods-Lincoln Hatchery'],
 ['\xa0Central Ark Prca Rodeo\xa0',
  '\xa0Hills Auto Sales Pro Rodeo\xa0',
  '\xa0Magnolia Stampede\xa0',
  '\xa0Old Fort Days Rodeo\xa0',
  '\xa0Sharp County Fair Rodeo\xa0',
  '1St West.Bank/ Pam Beahm Farm',
  'Ackerman, Gary',
  'Alexander, Scott/S & A Farms',
  'Alexander, Steve',
  'Alexander, Steve & Kim',
  'Ar Tech Univ.Foun./Alpha Farm',
  'Arch Farms, Llc',
  'Arkansas Department Of Corrections',
  'Arkansas Egg Co. Llc - Appletown',
  'Arnold, Billy',
  'Aubrey Kinion - Tyson',
  'Ayers, Ronnie',
  'Bacon Creek

In [177]:
cg_path = "../data/Counterglow+Facility+List+Complete.csv"
iowa_path = "../data/Poultry CAFOs in North Carolina.csv"

compare_CAFOs(cg_path, iowa_path, "NC")

Common to both: 10
Only in Iowa dataset: 9
Only in Counterglow: 322


(['Flint Ridge Pullet Farm',
  'Gardner Farms 2',
  'Hyde County Egg Farm',
  'Hyde County Egg Farm',
  'Red Hill Eggs Llc',
  'Rose Poultry Farm',
  'Spring Hope Poultry Inc 2',
  'A&D Poultry',
  "Latta'S Egg Ranch",
  "Simpson'S Eggs Inc"],
 ['Xiong Farm',
  'Chicken Hollow Farms',
  'D&M Poultry',
  'Edwin Reid Poultry Farm',
  'Production Enterprise Inc. Farm',
  'Yang Farm',
  'Warren Boone Farm',
  'Twin J Farms',
  'Enterprise Farm Caledonia'],
 ['A.D. And Carlton Williard',
  'Acorn Ridge',
  'Acre Station Meat Farm',
  'Alan Coble Farm',
  'Alan Smith Dairy Farm',
  'Anderson Farm',
  'Armo Llc',
  'Arrowhead Poultry',
  'Asj Mathis Farms, Llc',
  'Aycoth Farm',
  'B&P Farms',
  'Back\xa0Forty\xa0Farm (Bw\xa0Pope,\xa0Inc.)',
  'Barbara Walker/Walker Farms',
  'Barbara Wall',
  'Barefoot\xa0Farm',
  'Bb Dairy',
  'Beal Family Farms',
  'Beam Dairy',
  'Beam Dairy',
  'Beard Farm',
  'Beaver-Rill Farms, Inc',
  'Beeson Farm',
  'Benson Sow Farm Llc',
  'Bethel Farm',
  'Beville

In [178]:
cg_path = "../data/Counterglow+Facility+List+Complete.csv"
iowa_path = "../data/Poultry CAFOs in South Carolina.csv"

compare_CAFOs(cg_path, iowa_path, "SC")

Common to both: 0
Only in Iowa dataset: 629
Only in Counterglow: 32


([],
 ['Mark Speer Poultry Farm',
  'Nicholas Stokes Poultry',
  'Padgett Poultry House',
  'Padgett Poultry House',
  'Mcfarlan Farm',
  'Red Legged Rooster',
  'Marc Marsh/ Rooster Coop',
  'Smith, S L,  Breeder Farm',
  'Rafael Poultry',
  'Creekside Farm',
  'Quail Creek Farm',
  'Evans, Mitchell/Broiler Facili',
  'Sanders Breeder Farm',
  'Windy Hill Poultry Farm',
  'Fickling, Tal/Breeder Farm',
  'Humble Acres Breeder Farm',
  'A.T. Gales Breeder Operation',
  'Cullen Bolen, Llc',
  'Walker Nix Breeder Farm',
  'Swartz Breeder Facility',
  'Norris Farms (Breeders)',
  'Den-Mar Farms (Mccaskill)',
  'South Fork Farms',
  'Corn House Farm',
  'Knight Breeder Farm',
  'Huckleberry Head Farm',
  'Osborne Poultry Farm',
  'Sandifer & Son Farms',
  'Smoak Poultry Farm',
  'Four Winds Farm',
  'Bay Branch Poultry Farm',
  'Carey Frick Breeder Farm',
  'Vintage Acres, Llc',
  'Samples Poultry Farms',
  'Brown Poultry Farm',
  'Vasser Poultry Farm',
  'Bolen Poultry Farm',
  'Jeff David

In [180]:
cg_path = "../data/Counterglow+Facility+List+Complete.csv"
iowa_path = "../data/Poultry CAFOs in Texas.csv"

compare_CAFOs(cg_path, iowa_path, "TX")

Common to both: 10
Only in Iowa dataset: 16
Only in Counterglow: 521


(['Texas Department Of Criminal Justice',
  'Texas Department Of Criminal Justice',
  'Texas Department Of Criminal Justice',
  'Texas Department Of Criminal Justice',
  'Texas Department Of Criminal Justice',
  'Texas Department Of Criminal Justice',
  'Texas Department Of Criminal Justice',
  'Texas Department Of Criminal Justice',
  'Texas Department Of Criminal Justice',
  'Texas Department Of Criminal Justice'],
 ['Feather Crest Farms Inc',
  'Brunner, Allan James',
  'Idalou Egg Ranch Lp',
  'Cal-Maine Foods Inc',
  'Cal-Maine Foods Inc',
  'Cal-Maine Foods Inc',
  'Cal-Maine Foods Inc',
  'Cal-Maine Foods Inc',
  'Cal-Maine Foods Inc',
  'Kieke Egg Farm Llc',
  'Cal-Maine Foods Inc',
  'Wharton County Foods Llc',
  'Feather Crest Farms Inc',
  'Red River Valley Egg Farm Llc',
  'Cal-Maine Foods Inc',
  'Cal-Maine Foods Inc'],
 ['\xa0\xa0Matagorda County Fair & Rodeo',
  '\xa0Abc Pro Rodeo\xa0',
  '\xa0Angelina Benefit Rodeo\xa0',
  '\xa0Bandera Pro Rodeo\xa0',
  '\xa0Bell County

### Location Analysis

- Some variance between lat/long values across the two datasets, even for names with an exact match
- Trying Haversine distance to find closest location candidates 

In [193]:
counterglow[counterglow["Name"]=="Cal-Maine Foods"]

Unnamed: 0,Name,Lat,Lat.1,Address,City,State,County,Description,Business/company name,Postal address,Phone number,Region,Facility name,Number of animals,Full address,Website URL,Postcode,Suburb/city,Contracted to,Farm Type
3009,Cal-Maine Foods,36.347275,-93.443962,"234 Pickens Street Green Forest 72638, United ...",,AR,Carroll County,,,,,,,,,,,,,Chickens (Eggs)


In [243]:
iowa_ar.loc[:, ["NAME", "ADDRESS", "LAT", "LONG"]]

Unnamed: 0,NAME,ADDRESS,LAT,LONG
0,BENTON COUNTY FOODS/FEEMSTER,PO BOX 38 SILOAM SPRINGS,36.176832,-94.551391
1,CAL-MAINE FOODS,PO BOX 1480 GREEN FOREST,36.335347,-93.436019
2,PITCHER FARMS,1511 Greene 628 Road Paragould,36.133914,-90.535014
3,TYSON FOODS-RISON HATCHERY,PO BOX 466 RISON,33.958435,-92.190145
4,"Keith Smith Co., Inc.",130 K-Tech Lane Hot Springs,34.521777,-93.12044
5,CON AGRA FOODS-REFRIG FOODS,307 DODGEN PLACE OZARK,35.485014,-93.81818
6,Wesley Farms,2450 Green 405 Rd Rector,36.263118,-90.292601
7,TODD AVERY POULTRY,254 Nevada 230 Prescott,33.811033,-93.459637
8,PILGRIMS PRIDE-NASHVILLE FEED,P.O. Box 811 Nashville,33.945669,-93.847129
9,Hindsville Research Farm,PO Box 2020 Springdale,36.154074,-94.154221


#### Matching Counterglow to Iowa Southern US full dataset (based on lat/long only)

In [273]:
southern_us = pd.read_csv("../data/Poultry CAFOs in the Southern US.csv")
counterglow['Coord'] = counterglow[['Lat', 'Lat.1']].apply(lambda x: (x['Lat'], x['Lat.1']), axis=1)

In [274]:
counterglow['Coord'] = counterglow[['Lat', 'Lat.1']].apply(lambda x: (x['Lat'], x['Lat.1']), axis=1)

##### Function that calculates lat/long distance

In [592]:
from math import radians, cos, sin, asin, sqrt

def haversine(lon1, lat1, lon2, lat2):
    """
    Calculate the great circle distance in kilometers between two points 
    on the earth (specified in decimal degrees)
    """
    # convert decimal degrees to radians 
    lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])

    # haversine formula 
    dlon = lon2 - lon1 
    dlat = lat2 - lat1 
    a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
    c = 2 * asin(sqrt(a)) 
    r = 6371 # Radius of earth in kilometers. Use 3956 for miles. Determines return value units.
    return c * r

##### Finding closest location matches

In [281]:
candidate_points = np.array(counterglow['Coord'])

matches = []

for index, row in southern_us.iterrows():
    target_point = np.array([row["LAT"], row["LONG"]])

    min_distance = float('inf')
    closest_point = None

    for point in candidate_points:
        distance = haversine(target_point[1], target_point[0], point[1], point[0])
        if distance < min_distance:
            min_distance = distance
            closest_point = point

    threshold = 0.3048 # 1000 feet in km

    if min_distance <= threshold:
        matches.append(closest_point)
    else:
        matches.append(0)

southern_us["Counterglow location matches"] = matches

In [285]:
southern_us[southern_us["Counterglow location matches"]!=0] # 626 matches! (out of 3234 in Iowa dataset)

Unnamed: 0,FID,ADDRESS,STATE,ZIP,LAT,LONG,CAFO_TYPE,CAFO_SUBTY,SIS_CODE,ANIMAL_CNT,...,AU,E_AU,MANURE_AMT,E_MANURE_A,T_MANURE_A,NOTE,Geo_note,x,y,Counterglow location matches
13,13,"4920 LA-145, Choudrant",LA,71227,32.505945,-92.537616,Polutry,POULTRY,SIS0290,,...,,,,,,Poultry prob = 0.996,,-10301240,3829909,"(32.504795, -92.53846)"
18,18,"421 Par Rd 308, Simsboro",LA,71275,32.530988,-92.855886,Polutry,POULTRY,SIS0295,,...,,,,,,Poultry prob = 0.9973,,-10336670,3833215,"(32.532471, -92.856804)"
19,19,"Unnamed Road, Simsboro",LA,71275,32.533685,-92.855882,Polutry,POULTRY,SIS0297,,...,,,,,,Poultry prob = 0.9567,,-10336669,3833571,"(32.532471, -92.856804)"
20,20,"10710 US-80, Simsboro",LA,71275,32.533662,-92.836777,Polutry,POULTRY,SIS0299,,...,,,,,,Poultry prob = 0.9663,,-10334543,3833568,"(32.534241, -92.837662)"
21,21,"262 Traylor Rd, Simsboro",LA,71275,32.541741,-92.827210,Polutry,POULTRY,SIS0300,,...,,,,,,Poultry prob = 0.9833,,-10333478,3834635,"(32.540588, -92.826561)"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3136,3179,1716 COUNTRYSIDE DR BLACKSTOCK,SC,29014,34.545504,-81.118967,Poultry,POULTRY (Turkey);,SIS0253,,...,,,0,11446,11446,0.59 lb per day-animal,,-9030122,4102288,"(34.545502, -81.118797)"
3138,3181,PO BOX 618 CAMDEN,SC,29128,34.100642,-80.516346,Poultry,POULTRY (Turkey);,SIS0253,,...,,,0,26550,26550,0.59 lb per day-animal,,-8963039,4042325,"(34.1005599, -80.5156885)"
3203,3246,368 E DOC GARRIS RD LANCASTER,SC,29067,34.475265,-80.618540,Poultry,POULTRY (Turkey);,SIS0253,,...,,,0,35400,35400,0.59 lb per day-animal,,-8974415,4092799,"(34.4755, -80.6196)"
3222,3268,307 DODGEN PLACE OZARK,AR,72949,35.485014,-93.818180,Poultry,POULTRY (Layer);,SIS0254,,...,,,0,,0,,,-10443793,4229990,"(35.485016, -93.818176)"


#### Matching Counterglow entries to Iowa (state by state)

Returns index of corresponding Counterglow row

In [291]:
match_df = pd.DataFrame(columns=['Name','Latitude','Longitude','Name/Location','Fuzzy Name/Location','Name Match','Location Match','No Match'])

In [295]:
match_df

Unnamed: 0,Name,Latitude,Longitude,Name/Location,Fuzzy Name/Location,Name Match,Location Match,No Match


In [289]:
iowa_ar.head()

Unnamed: 0,FID,NO,ID_1,ID_2,ID3,DATE_BEGIN,DATA_EXPIR,NAME,ADDRESS,STATE,...,ANIMAL_CNT,E_ANIMAL_C,AU,E_AU,MANURE_AMT,E_MANURE_A,T_MANURE_A,NOTE,x,y
0,0,6972,04-00204,4166-WR-6,,,,BENTON COUNTY FOODS/FEEMSTER,PO BOX 38 SILOAM SPRINGS,AR,...,,,,,0,,0,,-10525413,4324981
1,3,6984,08-00058,3841-WR-2,,,,CAL-MAINE FOODS,PO BOX 1480 GREEN FOREST,AR,...,,,,,0,,0,,-10401251,4346864
2,4,6991,11-00358,ARR155307,,,,PITCHER FARMS,1511 Greene 628 Road Paragould,AR,...,,,,,0,,0,,-10078312,4319064
3,5,6994,13-00009,ARR000015750,,,,TYSON FOODS-RISON HATCHERY,PO BOX 466 RISON,AR,...,,,,,0,,0,,-10262561,4023223
4,7,7014,17-01453,5318-W,,,,"Keith Smith Co., Inc.",130 K-Tech Lane Hot Springs,AR,...,,,,,0,,0,,-10366121,4099082


In [290]:
from fuzzywuzzy import fuzz

In [593]:
def cafo_match(counterglow, iowa_state):
    name, latitude, longitude, name_location, fuzzyname_location, name_match, location_match, no_match = ([] for i in range(8))
    for i, srow in iowa_state.iterrows():
        iowa_loc = (srow["LAT"], srow["LONG"])
        iowa_name = srow["NAME"].upper()

        name.append(iowa_name)
        latitude.append(iowa_loc[0])
        longitude.append(iowa_loc[1])
        name_location.append(None)
        fuzzyname_location.append(None)
        name_match.append(None)
        location_match.append(None)
        no_match.append(True)

        for j, crow in counterglow.iterrows():
            # check name match
            cg_name = crow["Name"]
            if type(cg_name)==str:
                cg_name = crow["Name"].upper()
            cg_loc = (crow["Lat"], crow["Lat.1"])
            if iowa_name == cg_name:
                nmatch = 1
            elif fuzz.token_sort_ratio(iowa_name, cg_name) > 85:
                nmatch = 2
            else:
                nmatch = 0
            
            # check location match - within 1000 feet
            dist = haversine(iowa_loc[1], iowa_loc[0], cg_loc[1], cg_loc[0])
            thresh = 0.3048
            if dist <= thresh:
                if nmatch == 1:
                    name_location[i] = j # index of corresponding entry in cg
                elif nmatch == 2:
                    fuzzyname_location[i] = j
                else: 
                    location_match[i] = j
            else: # dist > thresh
                if nmatch == 1 or nmatch == 2:
                    name_match[i] = j
            
    dictionary = {
        'Name': name,
        'Latitude': latitude,
        'Longitude': longitude,
        'Name/Location': name_location,
        'Fuzzy Name/Location': fuzzyname_location,
        'Name Match': name_match,
        'Location Match': location_match,
        'No Match': no_match
    }

    match_df = pd.DataFrame(dictionary)
    for i, drow in match_df.iterrows():
        if drow[["Name/Location", "Fuzzy Name/Location", "Name Match", "Location Match"]].notnull().any():
            match_df.loc[i, "No Match"] = False

    return match_df

In [459]:
ark = cafo_match(counterglow, iowa_ar)

#Common to both: 7
#Only in Iowa dataset: 10
#Only in Counterglow: 232

# 'Cal-Maine Foods',
#  'Todd Avery Poultry',
#  'Hindsville Research Farm',
#  'Khoi Nguyen Poultry Pads',
#  'Nguyen Poultry',
#  'U Of A Poultry Farm',
#  'Robert Squires Farm'],

In [460]:
ark

Unnamed: 0,Name,Latitude,Longitude,Name/Location,Fuzzy Name/Location,Name Match,Location Match,No Match
0,BENTON COUNTY FOODS/FEEMSTER,36.176832,-94.551391,,,,,True
1,CAL-MAINE FOODS,36.335347,-93.436019,,,3020.0,,False
2,PITCHER FARMS,36.133914,-90.535014,,,16522.0,,False
3,TYSON FOODS-RISON HATCHERY,33.958435,-92.190145,,,,,True
4,"KEITH SMITH CO., INC.",34.521777,-93.12044,,,,,True
5,CON AGRA FOODS-REFRIG FOODS,35.485014,-93.81818,,,,2843.0,False
6,WESLEY FARMS,36.263118,-90.292601,,,,,True
7,TODD AVERY POULTRY,33.811033,-93.459637,,,21113.0,,False
8,PILGRIMS PRIDE-NASHVILLE FEED,33.945669,-93.847129,,,,,True
9,HINDSVILLE RESEARCH FARM,36.154074,-94.154221,,,9249.0,,False


#### Mapping Overlaps with Folium

In [506]:
import folium

In [581]:
ark_map = folium.Map(location=[ark.Latitude.mean(), ark.Longitude.mean()], zoom_start=7, control_scale=True)

In [582]:
def add_points(state_map, state_df, color):
    for index, location_info in state_df.iterrows():
        folium.Marker([location_info["Latitude"], location_info["Longitude"]], popup=location_info["Name"], icon=folium.Icon(color=color)).add_to(state_map) 

In [583]:
add_points(ark_map, ark[ark["No Match"]==False], "green") # adding points that matched on both

In [584]:
add_points(ark_map, ark[ark["No Match"]], "blue") # adding points unique to Iowa

In [585]:
# adding points unique to Counterglow — need to drop from Counterglow df by index
non_null_values = []

for col in ["Name/Location", "Fuzzy Name/Location", "Name Match", "Location Match"]:
    non_null_values.extend(ark[col].dropna().tolist())

cg_unique = counterglow.copy()
cg_unique.rename(columns={"Lat": "Latitude", "Lat.1": "Longitude", "Business/company name": "Name"}, inplace=True)

for i in non_null_values:
    cg_unique.drop(i, inplace=True)

In [586]:
cg_unique.head()

Unnamed: 0,Name,Latitude,Longitude,Address,City,State,County,Description,Name.1,Postal address,...,Region,Facility name,Number of animals,Full address,Website URL,Postcode,Suburb/city,Contracted to,Farm Type,Coord
0,78,43.396729,-95.923149,"1757 Lily Avenue George 51237, United States",,IA,Lyon County,,,,...,,,,,,,,,Pigs (Meat),"(43.396729, -95.923149)"
1,84,42.92535,-96.429291,"1804 500th Street Hawarden 51023, United States",,IA,Sioux County,,,,...,,,,,,,,,Pigs (Meat),"(42.92535, -96.429291)"
2,87,40.658218,-92.410202,"21166 Mahogany Avenue Bloomfield 52537, United...",,IA,Davis County,,,,...,,,,,,,,,Pigs (Meat),"(40.658218, -92.410202)"
3,88,40.643219,-92.409889,"27268 Mahogany Avenue Bloomfield 52537, United...",,IA,Davis County,,,,...,,,,,,,,,Pigs (Meat),"(40.643219, -92.409889)"
4,89,40.660851,-92.421219,"21166 280th Street Bloomfield 52537, United St...",,IA,Davis County,,,,...,,,,,,,,,Pigs (Meat),"(40.660851, -92.421219)"


In [587]:
add_points(ark_map, cg_unique[cg_unique['State']=="AR"], "red")

In [588]:
ark_map

#### Running more states

In [604]:
path_ms = "../data/Poultry CAFOs in Mississippi.csv"
iowa_ms = pd.read_csv(path_ms)

In [598]:
al = cafo_match(counterglow, iowa_ms)

In [600]:
al_map = folium.Map(location=[al.Latitude.mean(), al.Longitude.mean()], zoom_start=7, control_scale=True) 

In [601]:
add_points(al_map, al[al["No Match"]==False], "green") # adding points that matched on both

In [602]:
add_points(al_map, al[al["No Match"]], "blue") # adding points unique to Iowa

In [626]:
# adding points unique to Counterglow — need to drop from Counterglow df by index

non_null_values = []

for col in ["Name/Location", "Fuzzy Name/Location", "Name Match", "Location Match"]:
    non_null_values.extend(al[col].dropna().tolist())

non_null_values = pd.Series(non_null_values).drop_duplicates().tolist()

In [627]:
cg_unique = counterglow.copy()
cg_unique.rename(columns={"Lat": "Latitude", "Lat.1": "Longitude", "Business/company name": "Name"}, inplace=True)

for i in non_null_values:
    cg_unique.drop(i, inplace=True)

cg_unique.head()

Unnamed: 0,Name,Latitude,Longitude,Address,City,State,County,Description,Name.1,Postal address,...,Region,Facility name,Number of animals,Full address,Website URL,Postcode,Suburb/city,Contracted to,Farm Type,Coord
0,78,43.396729,-95.923149,"1757 Lily Avenue George 51237, United States",,IA,Lyon County,,,,...,,,,,,,,,Pigs (Meat),"(43.396729, -95.923149)"
1,84,42.92535,-96.429291,"1804 500th Street Hawarden 51023, United States",,IA,Sioux County,,,,...,,,,,,,,,Pigs (Meat),"(42.92535, -96.429291)"
2,87,40.658218,-92.410202,"21166 Mahogany Avenue Bloomfield 52537, United States",,IA,Davis County,,,,...,,,,,,,,,Pigs (Meat),"(40.658218, -92.410202)"
3,88,40.643219,-92.409889,"27268 Mahogany Avenue Bloomfield 52537, United States",,IA,Davis County,,,,...,,,,,,,,,Pigs (Meat),"(40.643219, -92.409889)"
4,89,40.660851,-92.421219,"21166 280th Street Bloomfield 52537, United States",,IA,Davis County,,,,...,,,,,,,,,Pigs (Meat),"(40.660851, -92.421219)"


In [628]:
add_points(al_map, cg_unique[cg_unique['State']=="AL"], "red")

In [629]:
al_map