## Workbench for analysing sinkhole population data at the Dead Sea

### read data

In [38]:
import pandas as pd
import numpy as np

# all holes at zeelim, digitised from Avni et al 2016
zeelim = pd.read_csv('../data/DeadSeaWestHolesZeelim.csv', header=None) 

# all western holes except zeelim, digitised from Avni et al 2016
DS_west_all = pd.read_csv('../data/DeadSeaWestHolesTotal.csv', header=None) 

# all holes at GAH, my own work
GAH = pd.read_csv('../data/SH_total.txt', sep="\t", header=None) #\t sep = tab delimited
#GAH = pd.read_fwf('../data/SH_total.txt') # load txt file using read_fwf (fwf = fixed width formatted)

# name columns
zeelim.columns = ["year", "cum_no_holes"]
DS_west_all.columns = ["year", "cum_no_holes"]
GAH.columns = ["year", "holes"] # holes in that year rather than cumulative no. holes

print('Ze\'elim:', zeelim.head(), sep='\n')
print('Western Total:', DS_west_all.head(), sep='\n')
print('Ghor Al-Haditha:', GAH.head(), sep='\n');

Ze'elim:
          year  cum_no_holes
0  1999.966755      0.418240
1  2002.759309     10.740664
2  2003.573803     20.655086
3  2004.900266     25.448267
4  2005.877660     26.568447
Western Total:
          year  cum_no_holes
0  1980.000000      0.000000
1  1996.848404    223.470550
2  1999.966755    414.739580
3  2002.014628    752.411946
4  2002.736037    851.536665
Ghor Al-Haditha:
   year  holes
0  1985      7
1  1986      2
2  1992     19
3  1994     37
4  1995      6


### clean data

In [66]:
# round values in zeelim and DS west to integers
zeelim = zeelim.round()
DS_west_all = DS_west_all.round()

# reverse cumulative nature of western data to get number of holes per year
def reverseCum(df):
    cumulative = df['cum_no_holes']
    output = [0] * len(cumulative)
    for i in range(0, len(cumulative)-1):
        output[(i+1)] = cumulative[(i+1)] - cumulative[(i)]
    output[0] = cumulative[0]
    df['holes'] = output
    return output

output1 = reverseCum(zeelim)
output2 = reverseCum(DS_west_all)

print('Ze\'elim:', zeelim.head(), sep='\n')
print('Western Total:', DS_west_all.head(), sep='\n')

# resample and interpolate data as appropriate

# create column for cumulative and non-cumulative holes as appropriate


Ze'elim:
     year  cum_no_holes  holes
0  2000.0           0.0    0.0
1  2003.0          11.0   11.0
2  2004.0          21.0   10.0
3  2005.0          25.0    4.0
4  2006.0          27.0    2.0
Western Total:
     year  cum_no_holes  holes
0  1980.0           0.0    0.0
1  1997.0         223.0  223.0
2  2000.0         415.0  192.0
3  2002.0         752.0  337.0
4  2003.0         852.0  100.0


### plot data