In [22]:
import geopandas
import pandas as pd
import numpy as np
from shapely.ops import unary_union

In [23]:
gdf = geopandas.read_file("lots.geojson")

In [24]:
df = pd.read_csv('cook_county_cleaned3.csv')

In [25]:
df.head()

Unnamed: 0.1,Unnamed: 0,Cleaned Name,Purchaser,Residence,Social Status,Aliquot Parts or Lot,Section Number,Township,Range,Meridian,...,Total Price,Type of Sale,Date of Purchase,Volume,Page,Shapefile Object ID,Voided,Lot Matches,Name Matches,Duplicate
0,0,ABBE CHRISTIAN,ABBE CHRISTIAN,UNKNOWN,A,LOT2NWNE,16.0,36N,13E,3.0,...,160.08,SC,09/01/1852,819,64,8570.0,False,,0,False
1,1,ABBOTT ALLISON,ABBOTT ALLISON,UNKNOWN,A,LOT1SW,6.0,40N,13E,3.0,...,99.65,FD,02/17/1841,687,191,4828.0,False,,1,False
2,2,ABBOTT JOSEPH,ABBOTT JOSEPH,UNKNOWN,,NWSW,3.0,37N,12E,3.0,...,360.0,CN,05/09/1853,L7A,180,7367.0,False,2.0,2,False
3,3,ABBOTT SAMUEL S,ABBOTT SAMUEL,UNKNOWN,,SW,14.0,40N,13E,3.0,...,200.0,FD,11/27/1838,687,194,5097.0,False,,3;4,False
4,4,ABBOTT SAMUEL S,ABBOTT SAMUEL S,UNKNOWN,,SE,15.0,40N,13E,3.0,...,200.0,FD,11/17/1838,687,194,5100.0,False,,3;4,False


In [26]:
gdf.head()

Unnamed: 0,TWPNUM,RNGNUM,SECTION,PART,INDIAN_BOUNDARY,SECTION_OBJECTID,MERIDIAN,RNGDIR,TWPDIR,geometry
0,42,9,5,SWSW,,3397,3,E,N,"POLYGON ((-88.21927 42.13920, -88.21925 42.142..."
1,42,9,5,SWSE,,3397,3,E,N,"POLYGON ((-88.21441 42.14294, -88.20955 42.142..."
2,42,9,5,SESW,,3397,3,E,N,"POLYGON ((-88.20955 42.14294, -88.20468 42.142..."
3,42,9,5,SESE,,3397,3,E,N,"POLYGON ((-88.20468 42.14294, -88.19983 42.142..."
4,42,9,5,SWNW,,3397,3,E,N,"POLYGON ((-88.21923 42.14671, -88.21441 42.146..."


In [27]:
for col in ['TWPNUM', 'RNGNUM', "SECTION"]:
    gdf[col] = gdf[col].astype(int)

In [28]:
df.loc[df['Aliquot Parts or Lot'].str.contains('NIBL'), "IBL"] = 'N'
df.loc[df['Aliquot Parts or Lot'].str.contains('SIBL'), "IBL"] = 'S'
df.loc[df['IBL'].isnull(), "IBL"] = ''

In [29]:
test = "TESTVOIDVOTB"
to_strip = ['2', '4', 'VOID', 'VO', "V", 'PRA', 'FR', 'TB', "MA", "TE", 'NIBL', 'SIBL']

for string in to_strip:
    test = test.replace(string, "")
    
print(test)

ST


In [30]:
def strip_common_strings(lot):
    for string in to_strip:
        lot = lot.replace(string, "")
    
    return lot

In [31]:
new_lots = df['Aliquot Parts or Lot'].apply(strip_common_strings)

In [32]:
new_lots

0         LOTNWNE
1          LOT1SW
2            NWSW
3              SW
4              SE
           ...   
12280        NWNE
12281        NENE
12282        NWNE
12283    LOT1SESW
12284     ELOT1NW
Name: Aliquot Parts or Lot, Length: 12285, dtype: object

In [33]:
df['part'] = new_lots

In [34]:
class TooManyMatches(Exception):
    pass

class NoMatch(Exception):
    pass

In [35]:
def get_part_geometry(row, lot):
    twp = int(row['Township'][:-1])
    rng = int(row["Range"][:-1])
    sect = int(row['Section Number'])

    geometries = gdf.loc[(gdf['TWPNUM'] == twp)\
      &(gdf['RNGNUM'] == rng)\
      &(gdf['SECTION'] == sect)\
      &(gdf["PART"] == lot)
      &(gdf['INDIAN_BOUNDARY'] == row['IBL']),\
      'geometry'
     ].values

    if len(geometries) > 1:
        print(row)
        print(lot)
        raise TooManyMatches
        
    if len(geometries) < 1:
        raise NoMatch
    
    return geometries[0]
    

def get_parts(row):
    legit_sections = ['NW', "NE", "SW", "SE"]
    legit_directions = {'N' : ['NE', "NW"], "E": ['NE', "SE"], "S": ['SE', 'SW'], "W": ['NW', 'SW']}
    part = row['part']

    try:
        if len(part) > 1 and len(part) < 5:
            first_sect = part[-2:] 

            if first_sect not in legit_sections:
                return None

            if len(part) == 4:
                last_sect = part[:-2]

                if last_sect not in legit_sections:
                    return None

                return get_part_geometry(row, first_sect + last_sect)


            if len(part) == 3:
                direc = part[0]
                last_sects = legit_directions.get(direc)

                if last_sects:  
                    geometries = [get_part_geometry(row, first_sect + last_sect) for last_sect in last_sects]                    
                    return unary_union(geometries)

            if len(part) == 2:
                geometries = [get_part_geometry(row, first_sect + last_sect) for last_sect in legit_sections]
                return unary_union(geometries)
            
        elif len(part) == 1:
            first_sects = legit_directions.get(part)
            geometries = []
            for first_sect in first_sects:
                for last_sect in legit_sections:
                    geometries.append(get_part_geometry(row, first_sect+last_sect))
                        
            return unary_union(geometries)
        
        elif row['Aliquot Parts or Lot'] in ['SEC', 'SECMA']:
            geometries = []
            for first_sect in legit_sections:
                for last_sect in legit_sections:
                    geometries.append(get_part_geometry(row, first_sect+last_sect))
            
            return geometries
                
    except NoMatch:
        return None
    
    
    return None
    

In [36]:
geometries = df.apply(get_parts, axis=1)

  result[:] = values


In [37]:
df['geometry'] = geometries

In [38]:
df.to_csv("cook_county_with_geometries.csv")

In [39]:
len(df)

12285

In [40]:
len(df.loc[df['geometry'].notnull()])

7035

In [41]:
df.dtypes

Unnamed: 0                int64
Cleaned Name             object
Purchaser                object
Residence                object
Social Status            object
Aliquot Parts or Lot     object
Section Number          float64
Township                 object
Range                    object
Meridian                float64
County of Purchase       object
Acres                    object
Price per Acre           object
Total Price              object
Type of Sale             object
Date of Purchase         object
Volume                   object
Page                      int64
Shapefile Object ID     float64
Voided                     bool
Lot Matches              object
Name Matches             object
Duplicate                  bool
IBL                      object
part                     object
geometry                 object
dtype: object

In [42]:
newgdf = geopandas.GeoDataFrame(df)

newgdf.to_file("cook_county_with_geometries.geojson", driver="GeoJSON")