In [241]:
import pandas as pd
import geopandas as gpd
from pathlib import Path

### FSIS

In [242]:
FSIS_PATH = Path("../data/raw/MPI_Directory_by_Establishment_Name_29_04_24.csv")
df_fsis = pd.read_csv(FSIS_PATH, dtype={"duns_number": str})
df_fsis = df_fsis.dropna(subset=["activities"])
df_fsis = df_fsis[df_fsis.activities.str.lower().str.contains("poultry slaughter")]
df_fsis = df_fsis[df_fsis['size'] == "Large"]
df_fsis["duns_number"] = df_fsis["duns_number"].str.replace("-", "")

In [243]:
gdf_fsis = gpd.GeoDataFrame(df_fsis, geometry=gpd.points_from_xy(df_fsis.longitude, df_fsis.latitude))
gdf_fsis.set_crs(epsg=4326, inplace=True).head(3)

Unnamed: 0,establishment_id,establishment_number,establishment_name,duns_number,street,city,state,zip,phone,grant_date,activities,dbas,district,circuit,size,latitude,longitude,county,fips_code,geometry
98,2351,P7927,"AMICK FARMS, LLC",,274 NEALSON STREET,HURLOCK,MD,21643,(410) 943-3989,2021-09-07,Poultry Processing; Poultry Slaughter,,80,8004,Large,38.634688,-75.857209,Dorchester County,24019.0,POINT (-75.85721 38.63469)
160,3356,M4653A+P4653A+V4653A,"Agri Star Meat and Poultry, LLC",,220 West Street,Postville,IA,52162,(563) 864-7811,2019-06-21,Certification - Export; Meat Processing; Meat ...,Aaron's Beef; Agriprocessors; Iowa Best Beef; ...,25,2529,Large,43.087656,-91.581162,Allamakee County,19005.0,POINT (-91.58116 43.08766)
231,2260,P935,Allen Harim LLC,,18752 Harbeson Road,Harbeson,DE,19951,(302) 684-1640,2021-03-30,Poultry Processing; Poultry Slaughter,,80,8004,Large,38.720021,-75.288278,Sussex County,10005.0,POINT (-75.28828 38.72002)


In [244]:
gdf_fsis.shape

(156, 20)

In [245]:
gdf_fsis['duns_number'].unique()

array([nan, '035499086', '122262280', '071374854', '062980412',
       '156939969', '799910448', '034130328', '007334170', '794724146',
       '937853083', '005213962', '058671124', '607042454', '138275198',
       '797613275'], dtype=object)

### NETS

In [246]:
df_nets = pd.read_csv(
        "../data/raw/nets/NETSData2022_RAFI(WithAddresses).txt",
        sep="\t",
        encoding="latin-1",
        dtype={"DunsNumber": str},
        low_memory=False,
    )
df_nets_naics = pd.read_csv(
        "../data/raw/nets/NAICS2022_RAFI.csv",
        dtype={"DunsNumber": str},
        low_memory=False,
    )
df_nets = pd.merge(df_nets, df_nets_naics, on="DunsNumber", how="left")

In [247]:
df_nets.shape

(71032, 455)

In [248]:
for col in df_nets.columns:
    print(col)

DunsNumber
Company
TradeName
Address
City
State
ZipCode
ZIP4
Officer
Title
Area
Phone
Region
HQDuns
HQCompany
HQTradeName
HQAddress
HQCity
HQState
HQZipCode
HQZIP4
HQOfficer
HQTitle
HQArea
HQPhone
Subsidiary
Related
Kids
CBSA
FipsCounty
CityCode
Latitude
Longitude
LevelCode
EstCat
Emp90
EmpC90
Emp91
EmpC91
Emp92
EmpC92
Emp93
EmpC93
Emp94
EmpC94
Emp95
EmpC95
Emp96
EmpC96
Emp97
EmpC97
Emp98
EmpC98
Emp99
EmpC99
Emp00
EmpC00
Emp01
EmpC01
Emp02
EmpC02
Emp03
EmpC03
Emp04
EmpC04
Emp05
EmpC05
Emp06
EmpC06
Emp07
EmpC07
Emp08
EmpC08
Emp09
EmpC09
Emp10
EmpC10
Emp11
EmpC11
Emp12
EmpC12
Emp13
EmpC13
Emp14
EmpC14
Emp15
EmpC15
Emp16
EmpC16
Emp17
EmpC17
Emp18
EmpC18
Emp19
EmpC19
Emp20
EmpC20
Emp21
EmpC21
Emp22
EmpC22
EmpHere
EmpHereC
SizeCat
SIC2
SIC3
SIC4
SIC6
SIC8
SIC8_2
SIC8_3
SIC8_4
SIC8_5
SIC8_6
SICChange
SIC90
SIC91
SIC92
SIC93
SIC94
SIC95
SIC96
SIC97
SIC98
SIC99
SIC00
SIC01
SIC02
SIC03
SIC04
SIC05
SIC06
SIC07
SIC08
SIC09
SIC10
SIC11
SIC12
SIC13
SIC14
SIC15
SIC16
SIC17
SIC18
SIC19
SIC20
SIC21
SI

In [249]:
gdf_nets = gpd.GeoDataFrame(df_nets, geometry=gpd.points_from_xy(-df_nets.Longitude, df_nets.Latitude))
gdf_nets.set_crs(epsg=4326, inplace=True).head(3)

Unnamed: 0,DunsNumber,Company,TradeName,Address,City,State,ZipCode,ZIP4,Officer,Title,...,NAICS14,NAICS15,NAICS16,NAICS17,NAICS18,NAICS19,NAICS20,NAICS21,NAICS22,geometry
0,1663876,BROCKS BRONCO BALLS ...,...,5772 E IRONWOOD BLF ...,CAVE CREEK,AZ,85331,7708,LEE PALLO,OWNER,...,,,,,,,,,,POINT (-111.95680 33.86040)
1,2344232,WATSONS QULTY TURKEY PDTS INC ...,...,641 STATE RTE 168 ...,BLACKWOOD,NJ,8012,0,ALBERT O WATSON,PRESIDENT,...,,,,,,,,,,POINT (-75.05970 39.79120)
2,2842160,GETZ LAND AND CATTLE ...,...,5673 COUNTY ROAD H ...,GOVE,KS,67736,6024,,,...,,,,,,,,,,POINT (-100.37820 38.80050)


In [250]:
gdf_fsis = gdf_fsis.to_crs(epsg=9822)
gdf_nets = gdf_nets.to_crs(epsg=9822)
gdf_fsis['buffered'] = gdf_fsis.geometry.buffer(1000)

In [251]:
from fuzzywuzzy import fuzz

In [252]:
PARENT_CORPS = {
    "House of Raeford Farms of LA": "Raeford Farms Louisiana",
    "Mar-Jac Poultry-AL": "MARSHALL DURBIN FOOD CORP",
    "Mar-Jac Poultry-MS": "MARSHALL DURBIN FOOD CORP",
    "Perdue Foods, LLC": "PERDUE FARMS INC"
}

In [253]:
sindex_nets = gdf_nets.sindex

matches = []
no_spatial_match = []
no_string_match = []
no_string_match_multiple = []

for index, row in gdf_fsis.iterrows():
    matched = False
    possible_matches_index = list(sindex_nets.intersection(row['buffered'].bounds))
    possible_matches = gdf_nets.iloc[possible_matches_index]
    print(len(possible_matches))
    spatial_matches = possible_matches[possible_matches.geometry.intersects(row['buffered'])] # [['Company', 'Address', 'City', 'HQDuns']]
    if len(spatial_matches) == 0:
        unmatched_dict = {
                'DUNS': row['duns_number'],
                'FSIS Company': row['establishment_name'],
                'DBAs': row['dbas'],
                'FSIS Address': row['street'],
                'FSIS City': row['city'],
            }
        no_spatial_match.append(unmatched_dict)
        continue

    match_info = []  # Collect information on all potential matches
    spatial_matches_info = []

    for _, match in spatial_matches.iterrows():
        company_match = fuzz.token_sort_ratio(row["establishment_name"].upper(), match["Company"].upper()) > 70
        address_match = fuzz.token_sort_ratio(row["street"].upper(), match["Address"].upper()) > 70
        alt_name_match = False
        if row['establishment_name'] in PARENT_CORPS:
            alt_name_match = fuzz.token_sort_ratio(PARENT_CORPS.get(row['establishment_name'], "").upper(), match["Company"].upper()) > 70

        if company_match or address_match or alt_name_match:
            # print("Record matched!")
            extended_row = row.to_dict()
            extended_row.update({
                'Matched_Company': match['Company'],
                'Matched_Address': match['Address'],
                'Matched_City': match['City'],
                'HQDuns': match['HQDuns'],
                'HQ Company': match['HQCompany'],
                'Sales Last Year': match['SalesHere'],
                'Company_Match_Score': company_match,
                'Address_Match_Score': address_match,
            })
            matches.append(extended_row)
            matched = True
            break # TODO: check multiple matches later
        else:
            spatial_matches_info.append({
                'DUNS': row['duns_number'],
                'FSIS Company': row['establishment_name'],
                'DBAs': row['dbas'],
                'Matched_Company': match['Company'],
                'FSIS Address': row['street'],
                'Matched_Address': match['Address'],
                'FSIS City': row['city'],
                'Matched_City': match['City'],
            })

    if not matched:
        no_string_match.append({
            'DUNS': row['duns_number'],
            'FSIS Company': row['establishment_name'],
            'DBAs': row['dbas'],
            'FSIS Address': row['street'],
            'FSIS City': row['city'],
            'FSIS State': row['state']
        })
        no_string_match_multiple.extend(spatial_matches_info)  # Append as dictionary for uniform format

# Convert to DataFrame for easier review and manipulation
df_matches = pd.DataFrame(matches)
df_no_spatial = pd.DataFrame(no_spatial_match)
df_no_string = pd.DataFrame(no_string_match)
df_no_string_multiple = pd.DataFrame(no_string_match_multiple)

1
3
1
2
2
6
0
5
2
3
3
8
2
5
3
2
4
1
1
2
1
2
1
4
5
2
3
1
3
9
4
5
2
1
0
2
1
1
1
1
2
3
3
3
5
2
2
3
2
2
1
2
7
11
1
1
2
10
3
1
1
3
2
0
0
3
0
4
1
3
0
1
3
1
1
1
3
2
2
2
1
3
1
9
3
2
3
8
1
5
1
3
8
5
0
1
2
1
6
1
0
4
0
3
0
1
1
1
0
1
1
1
6
0
2
1
9
10
2
2
3
0
1
2
0
3
3
4
4
1
1
3
2
1
1
3
2
3
1
1
1
2
1
3
4
2
7
1
2
1
1
1
1
3
1
7


In [254]:
len(df_matches), len(gdf_fsis)

(132, 156)

In [255]:
len(df_no_string), len(no_spatial_match), len(no_string_match_multiple)

(8, 16, 33)

In [256]:
COL_ORDER = [
    "establishment_id", "establishment_number", "establishment_name", "duns_number", 
    "street", "city", "state", "zip", "Matched_Company", "Matched_Address", "Matched_City", 
    "Company_Match_Score", "Address_Match_Score", "phone", "grant_date", "activities", "dbas", 
    "district", "circuit", "size", "latitude", "longitude", "county", "fips_code", "geometry", 
    "buffered"
]

In [257]:
df_matches[COL_ORDER].to_csv("matches.csv", index=False)
df_no_spatial.to_csv("no_spatial_match.csv", index=False)
df_no_string.to_csv("no_string_match.csv", index=False)
df_no_string_multiple.to_csv("no_string_match_multiple.csv", index=False)

### Merge On DUNS Number

In [258]:
df_unmatched = pd.concat([df_no_spatial, df_no_string], sort=True)

In [259]:
df_unmatched.columns

Index(['DBAs', 'DUNS', 'FSIS Address', 'FSIS City', 'FSIS Company',
       'FSIS State'],
      dtype='object')

In [260]:
df_duns = pd.merge(
    df_unmatched, df_nets, left_on="DUNS", right_on="DunsNumber", how="inner"
)

In [261]:
df_duns

Unnamed: 0,DBAs,DUNS,FSIS Address,FSIS City,FSIS Company,FSIS State,DunsNumber,Company,TradeName,Address,...,NAICS13,NAICS14,NAICS15,NAICS16,NAICS17,NAICS18,NAICS19,NAICS20,NAICS21,NAICS22
0,"Arkie Chicken; Brennan Packing Company, Inc.; ...",58671124,5322 South Park Drive,Broken Bow,"Tyson Foods, Inc",,58671124,TYSON FOODS INC ...,TYSON ...,HWY 259 S ...,...,,,,,,,,,,
1,"Covington Farms; Sanderson Farms, Inc.; Sander...",797613275,700 McDonald Avenue,Albertville,Wayne Farms LLC,,797613275,WAYNE FARMS LLC ...,...,4110 CONTINENTAL DR ...,...,311615.0,311615.0,311615.0,311615.0,311615.0,311615.0,311615.0,311615.0,311615.0,311615.0
2,,62980412,3672 S. Keller Rd.,Vincennes,"Farbest Foods, Inc.",IN,62980412,FARBEST FOODS INC ...,...,1155 W 12TH AVE STE B ...,...,,,,,,,,,,
3,Forester Farmers Market; George's Farmers Mark...,799910448,1810 St. Louis Street,Batesville,"Ozark Mountain Poultry, Inc.",AR,799910448,OZARK MOUNTAIN POULTRY INC ...,OMP FOODS ...,750 W EASY ST ...,...,311615.0,311615.0,311615.0,311615.0,311615.0,311615.0,311615.0,311615.0,311615.0,311615.0


In [262]:
df_duns.to_csv("matched_duns.csv", index=False)

In [263]:
# TODO: Fix this
df_duns.groupby(by='HQDuns')['Sales Last Year'].sum().reset_index()

KeyError: 'Column not found: Sales Last Year'

In [None]:
len(df_matches['HQDuns'].unique())

47

In [None]:
df_matches['Sales Last Year'].sort_values()

9      5.500000e+04
27     6.238100e+04
2      9.615700e+04
44     1.330740e+05
64     1.500000e+05
           ...     
46     4.030785e+08
88     4.191156e+08
19     7.078526e+08
103    8.365224e+08
114    1.076559e+09
Name: Sales Last Year, Length: 132, dtype: float64

In [None]:
avg_sales = df_matches.groupby(by='HQDuns').agg({
    'HQ Company': 'first',
    'Sales Last Year': 'mean'
}).reset_index()
avg_sales

Unnamed: 0,HQDuns,HQ Company,Sales Last Year
0,3017464,G AND H FORTY-NINERS INC ...,12000000.0
1,3199692,NASH JOHNSON & SONS FARMS INC ...,29779100.0
2,3492519,CAGLES INC ...,179276700.0
3,5092358,DARLING INGREDIENTS INC ...,3331000.0
4,5416698,COOPER HATCHERY INC ...,7425000.0
5,6147383,HORMEL FOODS CORPORATION ...,59586730.0
6,6249189,,137500.0
7,6343958,MOUNTAIRE CORPORATION ...,55333630.0
8,6903702,TYSON FOODS INC ...,216397100.0
9,6982409,DLISTED ...,42838660.0


In [None]:
# TODO: dlisted? No parent corp? Examine this example
corp2parent = {
    "Pilgrim's Pride Corporation": "7334170",
    # TODO: Multiple Perdues??: 54567780, 22666820
    "Peco Foods, Inc": "34130328",
    # "Tyson Foods, Inc": Multiple: 12319076, 6903702

}

In [None]:
avg_sales['HQ Company'].unique()

array(['G AND H FORTY-NINERS INC                          ',
       'NASH JOHNSON & SONS FARMS INC                     ',
       'CAGLES INC                                        ',
       'DARLING INGREDIENTS INC                           ',
       'COOPER HATCHERY INC                               ',
       'HORMEL FOODS CORPORATION                          ', None,
       'MOUNTAIRE CORPORATION                             ',
       'TYSON FOODS INC                                   ',
       'DLISTED                                           ',
       'CONAGRA FOODS INC                                 ',
       'PILGRIMS PRIDE CORPORATION                        ',
       'FOSTER POULTRY FARMS                              ',
       'KOCH FOODS OF GADSDEN LLC                         ',
       'PERDUE FARMS INC                                  ',
       'SIMMONS PREPARED FOODS INC                        ',
       'TIP TOP POULTRY INC                               ',
       'PECO FOODS

In [None]:
with pd.option_context("display.max_columns", None):
    display(df_duns[df_duns["DunsNumber"].notnull()].head(1))
    

Unnamed: 0,establishment_id,establishment_number,establishment_name,duns_number,street,city,state,zip,phone,grant_date,activities,dbas,district,circuit,size,latitude,longitude,county,fips_code,geometry,buffered,DunsNumber,Company,TradeName,Address,City,State,ZipCode,ZIP4,Officer,Title,Area,Phone,Region,HQDuns,HQCompany,HQTradeName,HQAddress,HQCity,HQState,HQZipCode,HQZIP4,HQOfficer,HQTitle,HQArea,HQPhone,Subsidiary,Related,Kids,CBSA,FipsCounty,CityCode,Latitude,Longitude,LevelCode,EstCat,Emp90,EmpC90,Emp91,EmpC91,Emp92,EmpC92,Emp93,EmpC93,Emp94,EmpC94,Emp95,EmpC95,Emp96,EmpC96,Emp97,EmpC97,Emp98,EmpC98,Emp99,EmpC99,Emp00,EmpC00,Emp01,EmpC01,Emp02,EmpC02,Emp03,EmpC03,Emp04,EmpC04,Emp05,EmpC05,Emp06,EmpC06,Emp07,EmpC07,Emp08,EmpC08,Emp09,EmpC09,Emp10,EmpC10,Emp11,EmpC11,Emp12,EmpC12,Emp13,EmpC13,Emp14,EmpC14,Emp15,EmpC15,Emp16,EmpC16,Emp17,EmpC17,Emp18,EmpC18,Emp19,EmpC19,Emp20,EmpC20,Emp21,EmpC21,Emp22,EmpC22,EmpHere,EmpHereC,SizeCat,SIC2,SIC3,SIC4,SIC6,SIC8,SIC8_2,SIC8_3,SIC8_4,SIC8_5,SIC8_6,SICChange,SIC90,SIC91,SIC92,SIC93,SIC94,SIC95,SIC96,SIC97,SIC98,SIC99,SIC00,SIC01,SIC02,SIC03,SIC04,SIC05,SIC06,SIC07,SIC08,SIC09,SIC10,SIC11,SIC12,SIC13,SIC14,SIC15,SIC16,SIC17,SIC18,SIC19,SIC20,SIC21,SIC22,Industry,IndustryGroup,HQDuns90,HQDuns91,HQDuns92,HQDuns93,HQDuns94,HQDuns95,HQDuns96,HQDuns97,HQDuns98,HQDuns99,HQDuns00,HQDuns01,HQDuns02,HQDuns03,HQDuns04,HQDuns05,HQDuns06,HQDuns07,HQDuns08,HQDuns09,HQDuns10,HQDuns11,HQDuns12,HQDuns13,HQDuns14,HQDuns15,HQDuns16,HQDuns17,HQDuns18,HQDuns19,HQDuns20,HQDuns21,HQDuns22,HQDunsChange,FIPS90,FIPS91,FIPS92,FIPS93,FIPS94,FIPS95,FIPS96,FIPS97,FIPS98,FIPS99,FIPS00,FIPS01,FIPS02,FIPS03,FIPS04,FIPS05,FIPS06,FIPS07,FIPS08,FIPS09,FIPS10,FIPS11,FIPS12,FIPS13,FIPS14,FIPS15,FIPS16,FIPS17,FIPS18,FIPS19,FIPS20,FIPS21,FIPS22,FipsChange,OutofBis,YearStart,PayDexMin90,PayDexMax90,PayDexMin91,PayDexMax91,PayDexMin92,PayDexMax92,PayDexMin93,PayDexMax93,PayDexMin94,PayDexMax94,PayDexMin95,PayDexMax95,PayDexMin96,PayDexMax96,PayDexMin97,PayDexMax97,PayDexMin98,PayDexMax98,PayDexMin99,PayDexMax99,PayDexMin00,PayDexMax00,PayDexMin01,PayDexMax01,PayDexMin02,PayDexMax02,PayDexMin03,PayDexMax03,PayDexMin04,PayDexMax04,PayDexMin05,PayDexMax05,PayDexMin06,PayDexMax06,PayDexMin07,PayDexMax07,PayDexMin08,PayDexMax08,PayDexMin09,PayDexMax09,PayDexMin10,PayDexMax10,PayDexMin11,PayDexMax11,PayDexMin12,PayDexMax12,PayDexMin13,PayDexMax13,PayDexMin14,PayDexMax14,PayDexMin15,PayDexMax15,PayDexMin16,PayDexMax16,PayDexMin17,PayDexMax17,PayDexMin18,PayDexMax18,PayDexMin19,PayDexMax19,PayDexMin20,PayDexMax20,PayDexMin21,PayDexMax21,PayDexMin22,PayDexMax22,DnBRating90,DnBRating91,DnBRating92,DnBRating93,DnBRating94,DnBRating95,DnBRating96,DnBRating97,DnBRating98,DnBRating99,DnBRating00,DnBRating01,DnBRating02,DnBRating03,DnBRating04,DnBRating05,DnBRating06,DnBRating07,DnBRating08,DnBRating09,DnBRating10,DnBRating11,DnBRating12,DnBRating13,DnBRating14,DnBRating15,DnBRating16,DnBRating17,DnBRating18,DnBRating19,DnBRating20,DnBRating21,DnBRating22,Sales90,SalesC90,Sales91,SalesC91,Sales92,SalesC92,Sales93,SalesC93,Sales94,SalesC94,Sales95,SalesC95,Sales96,SalesC96,Sales97,SalesC97,Sales98,SalesC98,Sales99,SalesC99,Sales00,SalesC00,Sales01,SalesC01,Sales02,SalesC02,Sales03,SalesC03,Sales04,SalesC04,Sales05,SalesC05,Sales06,SalesC06,Sales07,SalesC07,Sales08,SalesC08,Sales09,SalesC09,Sales10,SalesC10,Sales11,SalesC11,Sales12,SalesC12,Sales13,SalesC13,Sales14,SalesC14,Sales15,SalesC15,Sales16,SalesC16,Sales17,SalesC17,Sales18,SalesC18,Sales19,SalesC19,Sales20,SalesC20,Sales21,SalesC21,Sales22,SalesC22,SalesHere,SalesHereC,SalesGrowth,SalesGrowthPeer,MoveYears,LastMove,MoveSIC4,OriginCity,OriginState,OriginZIP,DestCity,DestState,DestZIP,MoveEmp,EmpC,MoveSales,MoveSalesC,PubPriv,LegalStat,ForeignOwn,ImpExpInd,GovtContra,Minority,GenderCEO,WomenOwned,Relocate,MoveOften,Cottage,FirstYear,LastYear,Address_First,City_First,State_First,ZipCode_First,CBSA_First,FipsCounty_First,CityCode_First,NAICS90,NAICS91,NAICS92,NAICS93,NAICS94,NAICS95,NAICS96,NAICS97,NAICS98,NAICS99,NAICS00,NAICS01,NAICS02,NAICS03,NAICS04,NAICS05,NAICS06,NAICS07,NAICS08,NAICS09,NAICS10,NAICS11,NAICS12,NAICS13,NAICS14,NAICS15,NAICS16,NAICS17,NAICS18,NAICS19,NAICS20,NAICS21,NAICS22
0,3850,P165S+V165S,Bachoco OK Foods,35499086,3921 Reed Lane,Fort Smith,AR,72904,(800) 643-2506,2021-03-24,Certification - Export; Poultry Processing; Po...,Bachoco; Bachoco USA,35,3505,Large,35.42356,-94.385296,Sebastian County,5131.0,POINT (-5401392.376756892 5006953.750900381),POLYGON ((-5400392.376756892 5006953.750900381...,35499086,O K FOODS INC ...,BACHOCO ...,4601 N 6TH ST ...,FORT SMITH,AR,72904,2208,TRENT GOINS,CHIEF EXECUTIVE OFFICER,479,7834186,"Fort Smith, AR-OK ...",35499086,O K FOODS INC ...,BACHOCO ...,4601 N 6TH ST ...,FORT SMITH,AR,72904.0,2208.0,TRENT GOINS,CHIEF EXECUTIVE OFFICER,479,7834186,Y,14,15,22900,5131,53082,35.4241,94.3831,D,Headquarters,6.0,0.0,6.0,0.0,6.0,0.0,3.0,0.0,3.0,0.0,70.0,0.0,70.0,0.0,2700.0,0.0,3400.0,1.0,3400.0,0.0,250.0,0.0,250.0,0.0,250.0,0.0,250.0,0.0,250.0,0.0,250.0,0.0,250.0,0.0,250.0,0.0,250.0,0.0,250.0,0.0,250.0,0.0,250.0,0.0,250.0,0.0,250.0,0.0,250.0,0.0,250.0,0.0,250.0,0.0,250.0,0.0,250.0,0.0,250.0,0.0,250.0,0.0,250.0,0.0,250.0,0.0,250,0,5_250to499,20,201,2015,201506,20150601,58120000.0,20110000.0,,,,No,20150608.0,20150608.0,20150608.0,20150601.0,20150601.0,20150601.0,20150601.0,20150601.0,20150601.0,20150601.0,20150601.0,20150601.0,20150601.0,20150601.0,20150601.0,20150601.0,20150601.0,20150601.0,20150601.0,20150601.0,20150601.0,20150601.0,20150601.0,20150601.0,20150601.0,20150601.0,20150601.0,20150601.0,20150601.0,20150601.0,20150601.0,20150601.0,20150601.0,"Poultry, processed, nsk",Meat Products,6338057.0,6338057.0,6338057.0,6338057.0,6338057.0,6338057.0,6338057.0,6338057.0,6338057.0,6338057.0,6338057.0,6338057.0,6338057.0,6338057.0,6338057.0,6338057.0,6338057.0,6338057.0,6338057.0,6338057.0,6338057.0,6338057.0,6338057.0,6338057.0,6338057.0,6338057.0,6338057.0,35499086.0,35499086.0,35499086.0,35499086.0,35499086.0,35499086.0,Yes,5131.0,5131.0,5131.0,5131.0,5131.0,5131.0,5131.0,5131.0,5131.0,5131.0,5131.0,5131.0,5131.0,5131.0,5131.0,5131.0,5131.0,5131.0,5131.0,5131.0,5131.0,5131.0,5131.0,5131.0,5131.0,5131.0,5131.0,5131.0,5131.0,5131.0,5131.0,5131.0,5131.0,No,,1930,69.0,73.0,59.0,69.0,59.0,71.0,68.0,77.0,68.0,78.0,77.0,79.0,75.0,78.0,75.0,78.0,76.0,79.0,71.0,76.0,72.0,77.0,72.0,77.0,73.0,78.0,77.0,79.0,78.0,80.0,70.0,78.0,77.0,79.0,75.0,78.0,75.0,76.0,76.0,77.0,78.0,79.0,78.0,79.0,79.0,79.0,79.0,79.0,77.0,79.0,75.0,77.0,75.0,76.0,73.0,78.0,73.0,76.0,76.0,77.0,74.0,76.0,67.0,77.0,61.0,72.0,--,--,--,--,--,--,1R2,1R2,1R2,--,1R2,1R2,1R2,1R3,1R3,1R3,1R3,1R3,1R3,1R3,1R3,1R3,--,--,--,--,1R3,1R3,1R2,1R2,1R2,1R2,1R2,598800.0,3.0,599400.0,3.0,598800.0,3.0,299400.0,3.0,299400.0,3.0,6986000.0,3.0,6986000.0,3.0,269460000.0,3.0,350863000.0,3.0,358020000.0,3.0,26400000.0,3.0,26400000.0,3.0,24950000.0,3.0,24950000.0,3.0,24950000.0,3.0,24950000.0,3.0,24950000.0,3.0,24950000.0,3.0,24950000.0,3.0,24950000.0,3.0,24900000.0,3.0,24850000.0,3.0,24800000.0,3.0,30225000.0,3.0,30325000.0,3.0,30175000.0,3.0,30175000.0,3.0,28947500.0,3.0,28947250.0,3.0,32500000.0,3.0,34091000.0,3.0,31250000.0,3.0,62337606.0,3.0,62337606.0,3.0,1.0,1.0,19981994,1998.0,2015.0,FORT SMITH ...,AR,72902.0,FORT SMITH ...,AR,72904.0,3400.0,1.0,350863000.0,3.0,N,I,Y,B,Y,,M,,1,Y,,1989,2022,READ LANE AND 6TH STREET ...,FORT SMITH,AR,72901,22900,5131,53082,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [None]:
df_duns

Unnamed: 0,establishment_id,establishment_number,establishment_name,duns_number,street,city,state,zip,phone,grant_date,...,NAICS13,NAICS14,NAICS15,NAICS16,NAICS17,NAICS18,NAICS19,NAICS20,NAICS21,NAICS22
0,3850,P165S+V165S,Bachoco OK Foods,35499086,3921 Reed Lane,Fort Smith,AR,72904,(800) 643-2506,2021-03-24,...,,,,,,,,,,
1,124228,P45131,"Farbest Foods, Inc.",62980412,3672 S. Keller Rd.,Vincennes,IN,47591,(812) 683-4200,2015-03-18,...,,,,,,,,,,
2,4567,P1307,Mar-Jac Poultry-AL,156939969,3301 3rd Avenue P. O. Box 931,Jasper,AL,35501,(205) 387-1441,2022-05-02,...,311615.0,311615.0,311615.0,311615.0,311615.0,311615.0,311615.0,311615.0,311615.0,311615.0
3,126027,P46091,"Ozark Mountain Poultry, Inc.",799910448,1810 St. Louis Street,Batesville,AR,72501,(870) 569-2549,2020-01-08,...,311615.0,311615.0,311615.0,311615.0,311615.0,311615.0,311615.0,311615.0,311615.0,311615.0
4,4530,P18557,"Sanderson Farms, Inc.",937853083,4039 River Ridge Road,Summit,MS,39666,(601) 684-9375,2019-08-01,...,311615.0,311615.0,311615.0,311615.0,311615.0,311615.0,311615.0,311615.0,311615.0,311615.0
5,3879,P7085+V7085,"Tyson Foods, Inc",58671124,5322 South Park Drive,Broken Bow,OK,74728,(580) 584-9191,2022-01-26,...,,,,,,,,,,
6,4495,P1317+V1317,Wayne Farms LLC,797613275,700 McDonald Avenue,Albertville,AL,35950,(256) 878-3404,2021-04-21,...,311615.0,311615.0,311615.0,311615.0,311615.0,311615.0,311615.0,311615.0,311615.0,311615.0


In [None]:
df_duns.to_csv("duns.csv", index=False)