In [1]:
import pandas as pd
import geopandas as gpd
import contextily as cx
import matplotlib.pyplot as plt


In [2]:
oil = pd.read_csv('data/kern_county_oil.csv')
oil.head(1)

Unnamed: 0,LeaseName,WellDesign,WellStatus,WellType,WellTypeLa,OperatorNa,Place,CountyName,Latitude,Longitude,GISSource,isDirectio,WellSymbol
0,Southern Pacific,Southern Pacific 11H,Canceled,OG,Oil & Gas,California Resources Production Corporation,Kern County,Kern,35.401291,-118.860405,Operator,N,CanceledOG


In [3]:
oilsubset = oil[['WellStatus', 'WellTypeLa','Latitude','Longitude']]
oilsubset.head()

Unnamed: 0,WellStatus,WellTypeLa,Latitude,Longitude
0,Canceled,Oil & Gas,35.401291,-118.860405
1,Canceled,Oil & Gas,35.397495,-118.855888
2,Canceled,Oil & Gas,35.404583,-118.850471
3,Canceled,Oil & Gas,35.40361,-118.849937
4,Canceled,Oil & Gas,35.405106,-118.856483


In [4]:
oilsubset = oilsubset[oilsubset.WellStatus != 'Canceled']
                      

In [5]:
oilsubset = oilsubset[oilsubset.WellStatus != 'Plugged']

In [6]:
oilsubset = oilsubset[oilsubset.WellStatus != 'Unknown']

In [7]:
#creating value counts for well status values 
oil_count = oilsubset.value_counts('WellStatus').reset_index(name='WellCount')

In [8]:
#adding this value count category to data 
oilsubset = oilsubset.merge(oil_count, how='left', on='WellStatus')
oilsubset

Unnamed: 0,WellStatus,WellTypeLa,Latitude,Longitude,WellCount
0,Idle,Oil & Gas,35.402748,-118.857201,27930
1,Idle,Oil & Gas,35.397060,-118.860001,27930
2,Idle,Oil & Gas,35.393887,-118.860725,27930
3,Idle,Oil & Gas,35.397015,-118.858070,27930
4,Idle,Oil & Gas,35.395214,-118.860275,27930
...,...,...,...,...,...
76372,New,Oil & Gas,35.084312,-119.239343,2251
76373,New,Oil & Gas,35.084136,-119.238809,2251
76374,New,Oil & Gas,35.047459,-119.182383,2251
76375,New,Oil & Gas,35.068882,-119.242922,2251


In [9]:
gdfoil = gpd.GeoDataFrame(
    oilsubset, geometry = gpd.points_from_xy(oilsubset.Longitude,oilsubset.Latitude),crs = "EPSG:4326")
gdfoil

Unnamed: 0,WellStatus,WellTypeLa,Latitude,Longitude,WellCount,geometry
0,Idle,Oil & Gas,35.402748,-118.857201,27930,POINT (-118.85720 35.40275)
1,Idle,Oil & Gas,35.397060,-118.860001,27930,POINT (-118.86000 35.39706)
2,Idle,Oil & Gas,35.393887,-118.860725,27930,POINT (-118.86073 35.39389)
3,Idle,Oil & Gas,35.397015,-118.858070,27930,POINT (-118.85807 35.39701)
4,Idle,Oil & Gas,35.395214,-118.860275,27930,POINT (-118.86028 35.39521)
...,...,...,...,...,...,...
76372,New,Oil & Gas,35.084312,-119.239343,2251,POINT (-119.23934 35.08431)
76373,New,Oil & Gas,35.084136,-119.238809,2251,POINT (-119.23881 35.08414)
76374,New,Oil & Gas,35.047459,-119.182383,2251,POINT (-119.18238 35.04746)
76375,New,Oil & Gas,35.068882,-119.242922,2251,POINT (-119.24292 35.06888)


In [10]:
census = gpd.read_file('data/kern_income.geojson')


In [11]:
census = census[['geoid','geometry']]

In [None]:
#merging without converting to geodataframe
oilcensus2 = gpd.join(census, oilsubset)
oilcensus2.head(1)

In [12]:
oilcensus = gpd.sjoin(census, gdfoil)
oilcensus.head(1)

Unnamed: 0,geoid,geometry,index_right,WellStatus,WellTypeLa,Latitude,Longitude,WellCount
0,05000US06029,"MULTIPOLYGON (((-119.91366 35.43926, -119.9232...",63762,Active,Oil & Gas,35.033882,-119.37056,46049


In [None]:
#exporting merged and subsetted dataset to not run so much memory
#trying to export not geodataframe oil data to see if that cuts down on memory 
oilcensus2.to_file(filename='oilcensus2.geojson', driver='GeoJSON')

In [13]:
oilcensus.shape

(152754, 8)

In [14]:
#exporting merged and subsetted dataset to not run so much memory
oilcensus.to_file(filename='oilcensus.geojson', driver='GeoJSON')

In [None]:
fig,ax = plt.subplots(figsize=(12,12))

oilcensus.plot(ax=ax,column='WellCount',legend=True,cmap='Reds')
fig.show()