# running mordecai via elasticsearch

set up mordecai via https://github.com/ryerson-ggl/mordecai-geoparser-on-windows

within mordecai-geoparser-on-windows: 
1. activate mordecai-2.0.1.post1
2. run docker container (open Docker Desktop and run geonames_index under Containers/Apps)
3. docker ps -f name=geonames_index
4. http://localhost:9200/_cat/indices?v
5. health status should be yellow; if nothing's shown, consider changing internet service
6. run jupyter notebook from within mordecai-2.0.1.post1 environment

# import data

In [1]:
import warnings
warnings.filterwarnings('ignore')

import pandas as pd
pd.set_option('display.max_columns', None)  # or 1000
pd.set_option('display.max_rows', 1000)  # or 1000

d = pd.read_csv("raw/emdat_public_2021_08_08_query_uid-lkltYs.csv")[5:].reset_index(drop=True)
d.columns = d.iloc[0]
d = d[1:].reset_index(drop=True)

d2 = d[d["Location"].notnull()].reset_index(drop=True) # removed 69 rows without Location value
loca = d2[["Dis No", "Location"]]

In [2]:
loca

Unnamed: 0,Dis No,Location
0,2000-0372-CHN,"Quanzhou, Zhangzhou, Xiamen districts (Fujian ..."
1,2000-9186-AFG,"Kandahar, Hilmand, Nimroz, Zabul, Uruzgan prov..."
2,2000-0373-BGD,"Bakalia, Kotwali, Chandgaon, Pahartali, Hathaz..."
3,2000-0484-BTN,"Pasakha area (Sampheling district, Chhukha pro..."
4,2000-0677-CHN,"Xuwen area (Zhanjiang district, Guandong provi..."
...,...,...
3420,2020-0216-YEM,"Hadhramaut, Shabwa, Al Mahrah"
3421,2020-0314-YEM,"Marib, Dhale, Abyan, Hadhramaut, Ibb, Hodeidah..."
3422,2020-0319-YEM,"Lahij, Ibb, Shabwah, Abyan and Sana'a governor..."
3423,2021-0449-YEM,"Marib,Taizz, al-Mahrah, Hadramawt, Shabwa, Aby..."


# geoparse

In [3]:
from mordecai import Geoparser
geo = Geoparser()

Models path: C:\Users\TzeMin\miniconda3\envs\mordecai-2.0.1.post1\lib\site-packages\mordecai\models\


In [None]:
dataf = []
for row in loca.values:
    disno = row[0]
    res = geo.geoparse(row[1])
    
    for d in res:
        if d.get("geo"):
            location = d["word"]
            country = d["geo"]["country_code3"]
            lon, lat = d["geo"]["lon"], d["geo"]["lat"]
            dataf.append([disno, location, country, lon, lat])
        else:
            continue

In [7]:
df = pd.DataFrame(dataf, columns=["Dis No", "Locat", "CountryCode", "Lon", "Lat"])
df

Unnamed: 0,Dis No,Locat,CountryCode,Lon,Lat
0,2000-0372-CHN,Quanzhou,CHN,118.58583,24.91389
1,2000-0372-CHN,Zhangzhou,CHN,117.65556,24.51333
2,2000-0372-CHN,Xiamen,CHN,118.08187,24.47979
3,2000-0372-CHN,Fujian Sheng,CHN,118,26.25
4,2000-0372-CHN,Jiangxi Sheng,CHN,115.66667,27.66667
...,...,...,...,...,...
11330,2020-0314-YEM,Ibb,YEM,44.16667,14
11331,2021-0449-YEM,Marib,YEM,45.32581,15.46253
11332,2021-0449-YEM,Hadramawt,YEM,49,16
11333,2021-0449-YEM,Shabwa,YEM,47,15


In [8]:
df.to_csv("processed/results.csv", index=False)

# finalise data

In [10]:
df = pd.read_csv("processed/results.csv")
d3 = df.merge(d2, on="Dis No", how="left").reset_index(drop=True)
d3

Unnamed: 0,Dis No,Locat,CountryCode,Lon,Lat,Year,Seq,Glide,Disaster Group,Disaster Subgroup,Disaster Type,Disaster Subtype,Disaster Subsubtype,Event Name,Country,ISO,Region,Continent,Location,Origin,Associated Dis,Associated Dis2,OFDA Response,Appeal,Declaration,Aid Contribution,Dis Mag Value,Dis Mag Scale,Latitude,Longitude,Local Time,River Basin,Start Year,Start Month,Start Day,End Year,End Month,End Day,Total Deaths,No Injured,No Affected,No Homeless,Total Affected,Reconstruction Costs ('000 US$),Insured Damages ('000 US$),Total Damages ('000 US$),CPI,Adm Level,Admin1 Code,Admin2 Code,Geo Locations
0,2000-0372-CHN,Quanzhou,CHN,118.58583,24.91389,2000,0372,,Natural,Hydrological,Flood,Riverine flood,,,China,CHN,Eastern Asia,Asia,"Quanzhou, Zhangzhou, Xiamen districts (Fujian ...",Monsoonal rain,Rain,"Slide (land, mud, snow, rock)",,,,,76000,Km2,,,,,2000,6,17,2000,6,30,43,10,,31000,31010,,,19000,67.35575898,1;2,914,13006;13009;13010,"Jiangxi Sheng (Adm1). Quanzhou, Xiamen, Zhangz..."
1,2000-0372-CHN,Zhangzhou,CHN,117.65556,24.51333,2000,0372,,Natural,Hydrological,Flood,Riverine flood,,,China,CHN,Eastern Asia,Asia,"Quanzhou, Zhangzhou, Xiamen districts (Fujian ...",Monsoonal rain,Rain,"Slide (land, mud, snow, rock)",,,,,76000,Km2,,,,,2000,6,17,2000,6,30,43,10,,31000,31010,,,19000,67.35575898,1;2,914,13006;13009;13010,"Jiangxi Sheng (Adm1). Quanzhou, Xiamen, Zhangz..."
2,2000-0372-CHN,Xiamen,CHN,118.08187,24.47979,2000,0372,,Natural,Hydrological,Flood,Riverine flood,,,China,CHN,Eastern Asia,Asia,"Quanzhou, Zhangzhou, Xiamen districts (Fujian ...",Monsoonal rain,Rain,"Slide (land, mud, snow, rock)",,,,,76000,Km2,,,,,2000,6,17,2000,6,30,43,10,,31000,31010,,,19000,67.35575898,1;2,914,13006;13009;13010,"Jiangxi Sheng (Adm1). Quanzhou, Xiamen, Zhangz..."
3,2000-0372-CHN,Fujian Sheng,CHN,118.00000,26.25000,2000,0372,,Natural,Hydrological,Flood,Riverine flood,,,China,CHN,Eastern Asia,Asia,"Quanzhou, Zhangzhou, Xiamen districts (Fujian ...",Monsoonal rain,Rain,"Slide (land, mud, snow, rock)",,,,,76000,Km2,,,,,2000,6,17,2000,6,30,43,10,,31000,31010,,,19000,67.35575898,1;2,914,13006;13009;13010,"Jiangxi Sheng (Adm1). Quanzhou, Xiamen, Zhangz..."
4,2000-0372-CHN,Jiangxi Sheng,CHN,115.66667,27.66667,2000,0372,,Natural,Hydrological,Flood,Riverine flood,,,China,CHN,Eastern Asia,Asia,"Quanzhou, Zhangzhou, Xiamen districts (Fujian ...",Monsoonal rain,Rain,"Slide (land, mud, snow, rock)",,,,,76000,Km2,,,,,2000,6,17,2000,6,30,43,10,,31000,31010,,,19000,67.35575898,1;2,914,13006;13009;13010,"Jiangxi Sheng (Adm1). Quanzhou, Xiamen, Zhangz..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11330,2020-0314-YEM,Ibb,YEM,44.16667,14.00000,2020,0314,,Natural,Hydrological,Flood,,,,Yemen,YEM,Western Asia,Asia,"Marib, Dhale, Abyan, Hadhramaut, Ibb, Hodeidah...",Heavy rains,,,,,,,,Km2,,,,,2020,8,1,2020,8,5,20,6,9000,,9006,,,,,1,3407;3408;3411;3418;3419;3421;144970,,"Abyan, Al Dhale'e, Al Hudaydah, Hadramaut, Haj..."
11331,2021-0449-YEM,Marib,YEM,45.32581,15.46253,2021,0449,,Natural,Hydrological,Flood,,,,Yemen,YEM,Western Asia,Asia,"Marib,Taizz, al-Mahrah, Hadramawt, Shabwa, Aby...",Heavy rains,,,,,,,,Km2,,,,,2021,7,16,2021,7,28,14,,1000,,1000,,,,,,,,
11332,2021-0449-YEM,Hadramawt,YEM,49.00000,16.00000,2021,0449,,Natural,Hydrological,Flood,,,,Yemen,YEM,Western Asia,Asia,"Marib,Taizz, al-Mahrah, Hadramawt, Shabwa, Aby...",Heavy rains,,,,,,,,Km2,,,,,2021,7,16,2021,7,28,14,,1000,,1000,,,,,,,,
11333,2021-0449-YEM,Shabwa,YEM,47.00000,15.00000,2021,0449,,Natural,Hydrological,Flood,,,,Yemen,YEM,Western Asia,Asia,"Marib,Taizz, al-Mahrah, Hadramawt, Shabwa, Aby...",Heavy rains,,,,,,,,Km2,,,,,2021,7,16,2021,7,28,14,,1000,,1000,,,,,,,,


In [11]:
d3.to_csv("processed/combined_results.csv", index=False)