In [1]:
# In this notebook we explore England flood map

In [2]:
data_url = 'https://www.kaggle.com/getthedata/open-flood-risk-by-postcode/home'

In [3]:
import numpy as np
import pandas as pd
from pandas import Series, DataFrame

In [4]:
import folium
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [5]:
forig = pd.read_csv('open_flood_risk_by_postcode.csv',header=None)

forig.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,TR23 0PR,\N,,\N,\N,\N,87897,15021,49.953605,-6.352647
1,TR23 0WA,\N,,\N,\N,\N,87903,15077,49.954111,-6.352609
2,TR22 0PL,\N,,\N,\N,\N,88364,8478,49.895171,-6.340873
3,TR24 0QE,\N,,\N,\N,\N,88849,15298,49.956586,-6.339638
4,TR24 0QL,\N,,\N,\N,\N,88857,15392,49.957433,-6.339603


In [6]:
forig.rename(index=str,inplace=True,
             columns={0: 'postcode',1:'fid',
             2:'prob',3:'suitability',4:'pubdate',
             5:'insurancerisk',6:'easting',7:'northing',
             8:'lat',9:'lon'})

forig.head()

Unnamed: 0,postcode,fid,prob,suitability,pubdate,insurancerisk,easting,northing,lat,lon
0,TR23 0PR,\N,,\N,\N,\N,87897,15021,49.953605,-6.352647
1,TR23 0WA,\N,,\N,\N,\N,87903,15077,49.954111,-6.352609
2,TR22 0PL,\N,,\N,\N,\N,88364,8478,49.895171,-6.340873
3,TR24 0QE,\N,,\N,\N,\N,88849,15298,49.956586,-6.339638
4,TR24 0QL,\N,,\N,\N,\N,88857,15392,49.957433,-6.339603


Dataset exploration

In [7]:
print(f'Dataset is {forig.shape[0]} rows by {forig.shape[1]} columns')

Dataset is 1443995 rows by 10 columns


In [8]:
forig.info()
# dataset info shows no null values
# this will need to be corrected

<class 'pandas.core.frame.DataFrame'>
Index: 1443995 entries, 0 to 1443994
Data columns (total 10 columns):
postcode         1443995 non-null object
fid              1443995 non-null object
prob             1443995 non-null object
suitability      1443995 non-null object
pubdate          1443995 non-null object
insurancerisk    1443995 non-null object
easting          1443995 non-null int64
northing         1443995 non-null int64
lat              1443995 non-null float64
lon              1443995 non-null float64
dtypes: float64(2), int64(2), object(6)
memory usage: 121.2+ MB


In [9]:
forig.describe()

Unnamed: 0,easting,northing,lat,lon
count,1443995.0,1443995.0,1443995.0,1443995.0
mean,451407.2,274950.6,52.36327,-1.252836
std,87680.43,130774.9,1.180242,1.2687
min,87897.0,8478.0,49.89517,-6.352647
25%,389957.5,173230.0,51.44678,-2.150858
50%,446315.0,252458.0,52.15679,-1.314461
75%,523439.5,389856.0,53.4028,-0.214226
max,655448.0,656014.0,55.79742,1.760443


Column exploration

In [10]:
forig['fid'].value_counts()[:10]
# the EA's ID for an area

\N         1321988
1407444       8149
1407419       3626
1407454       1604
143204        1368
1407476       1077
1407466        952
1023362        952
560313         940
558494         757
Name: fid, dtype: int64

In [11]:
forig['prob'].value_counts()
# likelihood of flooding
# High - >3.3%
# Medium - <= 3.3% & >1%
# Low - <=1% & >0.1%
# Very Low - <=0.1%

None        1321988
Low           80001
Medium        24928
High          12305
Very Low       4773
Name: prob, dtype: int64

In [12]:
forig['suitability'].value_counts()
# scale suitable to the flood likelihood 

\N                           1321988
County to Town                 70851
Town to Street                 29771
National to County             13257
Street to Parcels of land       8117
Property (inc. internal)          11
Name: suitability, dtype: int64

In [13]:
forig['pubdate'].value_counts()[:10]
# Date (financial quarter) of publication 

\N            1321988
2013-12-22      71618
2009-08-01      18761
2011-08-04       8071
2010-02-01       6050
2011-10-20       4529
2013-12-02       3641
2011-05-12       3592
2013-08-08       1716
2013-06-20       1222
Name: pubdate, dtype: int64

In [14]:
forig['insurancerisk'].value_counts()
# Yes == significant risk of flooding
# Yes == 1 in 75 chance of flooding in any year

\N     1321988
No      101603
Yes      20404
Name: insurancerisk, dtype: int64

Visualize map borders

In [15]:
maxlat = forig[forig['lat'] == forig['lat'].max()]
maxlat = tuple([maxlat.iloc[0,8],maxlat.iloc[0,9]])

minlat = forig[forig['lat'] == forig['lat'].min()]
minlat = tuple([minlat.iloc[0,8],minlat.iloc[0,9]])

maxlon = forig[forig['lon'] == forig['lon'].max()]
maxlon = tuple([maxlon.iloc[0,8],maxlon.iloc[0,9]])

minlon = forig[forig['lon'] == forig['lon'].min()]
minlon = tuple([minlon.iloc[0,8],minlon.iloc[0,9]])

print(maxlat, minlat, maxlon, minlon)

(55.797415, -2.030216) (49.895171000000005, -6.340873) (52.479231000000006, 1.760443) (49.953604999999996, -6.352647)


In [16]:
map_border = folium.Map(
    [51.4571, -0.1231],
    zoom_start=4,
    tiles='Stamen Terrain'
    )

folium.Marker(maxlat,popup='Maxlat').add_to(map_border)
folium.Marker(minlat,popup='Minlat').add_to(map_border)
folium.Marker(maxlon,popup='Maxlon').add_to(map_border)
folium.Marker(minlon,popup='Minlon').add_to(map_border)

map_border

# confirmed that border is UK

Visualize high flood risk areas

In [17]:
fhigh = forig[forig['prob'] == 'High']

fhigh = fhigh.iloc[:, 8:10]

print(fhigh.shape)

fhigh.head(2)

(12305, 2)


Unnamed: 0,lat,lon
251,50.128392,-5.676455
261,50.128465,-5.673858


In [18]:
# compile high flood risk coordinates

list_high = []

for row in fhigh.itertuples():
    
    list_high.append(tuple([row[1],row[2]]))
    
list_high = list_high[:200]

len(list_high)

200

In [19]:
map_high = folium.Map(
    [50.1593, -5.2460],
    zoom_start=8,
    tiles='Stamen Terrain'
    )

for coord in list_high:

    folium.Circle(
    radius=50,
    location=coord,
    color='crimson',
    fill=False,
    ).add_to(map_high)
    
map_high.add_child(folium.LatLngPopup())

map_high

FID Exploration

In [20]:
map_border = folium.Map(
    [50.4855, -3.8013],
    zoom_start=6,
    tiles='Stamen Terrain'
    )

nwdot = tuple((50.6000, -4.3300))
swdot = tuple((50.1200, -4.3300))
nedot = tuple((50.6000, -3.1500))
sedot = tuple((50.1200, -3.1500))


folium.Marker(nwdot,popup='nwdot').add_to(map_border)
folium.Marker(swdot,popup='swdot').add_to(map_border)
folium.Marker(nedot,popup='nedot').add_to(map_border)
folium.Marker(sedot,popup='sedot').add_to(map_border)

map_border.add_child(folium.LatLngPopup())

map_border

# cutting out an area to understand FIDs

In [21]:
forig.head(2)

Unnamed: 0,postcode,fid,prob,suitability,pubdate,insurancerisk,easting,northing,lat,lon
0,TR23 0PR,\N,,\N,\N,\N,87897,15021,49.953605,-6.352647
1,TR23 0WA,\N,,\N,\N,\N,87903,15077,49.954111,-6.352609


In [22]:
ffidh = forig[(forig['lat'] >= 50.1200) & (forig['lat'] <= 50.6000) &
             (forig['lon'] >= -4.3300) & (forig['lon'] <= -3.1500) &
             (forig['prob'] == 'High')
            ]

ffidm = forig[(forig['lat'] >= 50.1200) & (forig['lat'] <= 50.6000) &
             (forig['lon'] >= -4.3300) & (forig['lon'] <= -3.1500) &
             (forig['prob'] == 'Medium')
            ]

ffidl = forig[(forig['lat'] >= 50.1200) & (forig['lat'] <= 50.6000) &
             (forig['lon'] >= -4.3300) & (forig['lon'] <= -3.1500) &
             (forig['prob'] == 'Low')
            ]

print(f'ffidh has {ffidh.shape[0]} rows')
print(f'ffidm has {ffidm.shape[0]} rows')
print(f'ffidl has {ffidl.shape[0]} rows')

ffidh has 318 rows
ffidm has 394 rows
ffidl has 467 rows


In [23]:
ffidhc = ffidh.iloc[:, 8:10]
ffidmc = ffidm.iloc[:, 8:10]
ffidlc = ffidl.iloc[:, 8:10]

list_fidh = []
list_fidm = []
list_fidl = []

for row in ffidhc.itertuples():
    
    list_fidh.append(tuple([row[1],row[2]]))
    
for row in ffidmc.itertuples():
    
    list_fidm.append(tuple([row[1],row[2]]))
    
for row in ffidlc.itertuples():
    
    list_fidl.append(tuple([row[1],row[2]]))

print(f'ffidh: {list_fidh[:3]}')
print(f'ffidm: {list_fidm[:3]}')
print(f'ffidl: {list_fidl[:3]}')

ffidh: [(50.397983, -4.316582), (50.397928, -4.316392), (50.398099, -4.3163860000000005)]
ffidm: [(50.405605, -4.2352360000000004), (50.346318, -4.220025), (50.347179, -4.218744)]
ffidl: [(50.504709999999996, -4.315042), (50.504523999999996, -4.314455000000001), (50.504729, -4.314098)]


In [24]:
# Visualizing flood risk with FID designated area
# Flood risk level varies by color

map_fid = folium.Map(
    [50.4855, -3.8013],
    zoom_start=9,
    tiles='Stamen Toner'
    )

for coord in list_fidh:

    folium.Circle(
    radius=50,
    location=coord,
    color='crimson',
    fill=False,
    ).add_to(map_fid)
    
for coord in list_fidm:

    folium.Circle(
    radius=50,
    location=coord,
    color='orange',
    fill=False,
    ).add_to(map_fid)
    
for coord in list_fidl:

    folium.Circle(
    radius=50,
    location=coord,
    color='yellow',
    fill=False,
    ).add_to(map_fid)

map_fid

Re-examine and save dataset

In [25]:
forig.head()

Unnamed: 0,postcode,fid,prob,suitability,pubdate,insurancerisk,easting,northing,lat,lon
0,TR23 0PR,\N,,\N,\N,\N,87897,15021,49.953605,-6.352647
1,TR23 0WA,\N,,\N,\N,\N,87903,15077,49.954111,-6.352609
2,TR22 0PL,\N,,\N,\N,\N,88364,8478,49.895171,-6.340873
3,TR24 0QE,\N,,\N,\N,\N,88849,15298,49.956586,-6.339638
4,TR24 0QL,\N,,\N,\N,\N,88857,15392,49.957433,-6.339603


In [26]:
if forig.shape[0] == len(forig['postcode'].unique()):
    print('All postcodes are unique')

All postcodes are unique


In [27]:
# removing non-flood risk data points

fedit = forig[forig['prob'] != 'None']

print(fedit.shape)

fedit['prob'].value_counts()

(122007, 10)


Low         80001
Medium      24928
High        12305
Very Low     4773
Name: prob, dtype: int64

In [28]:
# save fedit for part 02

try:
    fedit.to_csv('englandfloodrisk.csv',mode='x')
    print('File saved')
    
except:
    print('File exists')

File exists


In [29]:
# Next section I will attempt cluster analysis for flood risk