## Header 
Author : Amina Matt and Yichen Wang  
Date created : 20.12.2021  
Date last modified : 20.12.2021  
Python version : 3.8  
Description : Text processing of the CARICOM Compilation Archive (CCA) https://louverture.ch/cca/ 

### Librairies

In [228]:
# -*- coding: utf-8 -*-
import pandas as pd
import json
import math #for isnan
from pandas.io.json import json_normalize

### Initialization 

In [229]:
#PATHS
DATA_FOLDER = './data/'
caricom_sample = DATA_FOLDER +'Caricom_Archive_Sample_Schema1.txt'
caricom = DATA_FOLDER +'Caricom_Archive.txt'

### Load 

In [230]:
df = pd.read_pickle("./caricom_with_geoid.pkl")
#df.iloc[60:80]

### Functions

In [231]:
def add_coordinates(col_lat,col_lon,or_lat,or_lon):
    '''
    Describe: function that create a geojson with data from dataframe
    '''
    geojson_structure['geometry']['coordinates'] =  [[col_lat, col_lon], [or_lat, or_lon]]
    return geojson_structure

In [232]:
def add_person(person_name):
    '''
    Describe: function that create a geojson with data from dataframe
    '''
    geojson_structure['properties']['person'] =  person_name
    return geojson_structure

In [239]:
df.head(1)

Unnamed: 0,person,date,origin,colonial_Location,confidence_date,confidence_person,confidence_origin,whole_entry,col_loc_geonameid,col_loc_geo_name,col_capital,col_latitude,col_longitude,origin_as_found,origin_loc_geonameid,origin_geo_name,origin_latitude,origin_longitude
0,Arthur Thellusson,,Geneva,Antigua and Barbuda,100.0,25.0,33.333333,"=> Arthur Thellusson, son of Lord Rendlesham a...",3576396,Antigua and Barbuda,St. John's,17.12096,-61.84329,Geneva,2660650.0,Genève,46.20222,6.14569


### Dataframe processing

In [233]:
# Create a list in which new geojson are added for each dataframe entry

# empty list
geojson_with_coo_list = []

# loop on dataframe
for i in range(len(df)):
    entry = df.iloc[i]
    
    #define structure 
    geojson_structure ={"type": "Feature",
     "properties": {
         "person": '',
         "date": '',
         "origin": '',
     },
     "geometry":
         { "type": "LineString", 
          "coordinates": []
         }
    }
    # get entry values
    or_lat = entry['origin_latitude']
    or_lon = entry['origin_longitude']
    col_lat = entry['col_latitude']
    col_lon = entry['col_longitude']
    person_name = entry['person']
    
    # no lines if NaN values
    if math.isnan(or_lat) or  math.isnan(or_lon) or  math.isnan(col_lat) or  math.isnan(col_lon) :
        continue
    else :   
        # create geojson with coordinates
        geojson_with_coo = add_coordinates(or_lon,or_lat,col_lon,col_lat)
        geojson_with_coo = add_person(person_name)

        # add to list
        geojson_with_coo_list.append(geojson_with_coo)

In [234]:
#geojson_with_coo_list

### JSON for the collection

In [235]:
overall_json = {"type": "FeatureCollection","features": []}
overall_json['features']= geojson_with_coo_list
#overall_json

### Dump GeoJSON

In [236]:
a_file = open("lines.json", "w")
a_file = json.dump(overall_json, a_file) 

## Debugging

In [237]:
df.head()

Unnamed: 0,person,date,origin,colonial_Location,confidence_date,confidence_person,confidence_origin,whole_entry,col_loc_geonameid,col_loc_geo_name,col_capital,col_latitude,col_longitude,origin_as_found,origin_loc_geonameid,origin_geo_name,origin_latitude,origin_longitude
0,Arthur Thellusson,,Geneva,Antigua and Barbuda,100.0,25.0,33.333333,"=> Arthur Thellusson, son of Lord Rendlesham a...",3576396,Antigua and Barbuda,St. John's,17.12096,-61.84329,Geneva,2660650.0,Genève,46.20222,6.14569
1,Jamaica Kincaid,,Geneva,Antigua and Barbuda,100.0,100.0,100.0,"=> In her book A Small Place (1988), the Antig...",3576396,Antigua and Barbuda,St. John's,17.12096,-61.84329,Geneva,2660650.0,Genève,46.20222,6.14569
2,Henry Peschier,1781.0,Geneva,Antigua and Barbuda,50.0,25.0,25.0,"=> In 1781, Henry Peschier (1741-1791) from a ...",3576396,Antigua and Barbuda,St. John's,17.12096,-61.84329,Geneva,2660650.0,Genève,46.20222,6.14569
3,Peter Thelluson,1767.0,Geneva,Barbados,100.0,100.0,33.333333,"=> In 1767, Peter Thelluson (1737-1797), a Swi...",3374084,Barbados,Bridgetown,13.10732,-59.62021,Geneva,2660650.0,Genève,46.20222,6.14569
4,JeanAntoine Bertrand,,Geneva,Dominica,25.0,25.0,12.5,=> Jean-Antoine Bertrand (1726-1780) from the ...,3575830,Dominica,Roseau,15.30174,-61.38808,Geneva,2660650.0,Genève,46.20222,6.14569


In [238]:
df['origin_geo_name'].unique()

array(['Genève', 'Zürich', 'Bern', 'Sankt Gallen', 'Schaffhausen',
       'Basel', 'Vevey', 'Lausanne', 'Aarau', 'Luzern',
       'Yverdon-les-Bains', 'Winterthur', 'Biel/Bienne', 'Fribourg',
       'Chur', 'Zug'], dtype=object)

In [223]:
col_loc_geo_name

KeyError: 'geo_origin'

In [196]:
df[df['col_capital']=='Kingston']['col']

Unnamed: 0,person,date,origin,colonial_Location,confidence_date,confidence_person,confidence_origin,whole_entry,col_loc_geonameid,col_loc_geo_name,col_capital,col_latitude,col_longitude,origin_as_found,origin_loc_geonameid,origin_geo_name,origin_latitude,origin_longitude
21,Johann Bernhard Rtzer,born,,Jamaica,33.333333,33.333333,50.0,=> Johann Bernhard Rätzer (born 1726) was a go...,3489940,Jamaica,Kingston,44.22976,-76.48098,,2661552.0,Bern,,
22,John Campbell,1796–1862,,Jamaica,100.0,100.0,25.0,"=> John Campbell, 2nd Marquess of Breadalbane ...",3489940,Jamaica,Kingston,44.22976,-76.48098,,2661552.0,Bern,,
23,George Thompson,1804-1878,,Jamaica,100.0,100.0,100.0,=> English abolitionist George Thompson (1804-...,3489940,Jamaica,Kingston,44.22976,-76.48098,,2661552.0,Bern,,
56,Hans Rudolf Zeller,1639-1700,Zurich,Jamaica,100.0,33.333333,50.0,=> Hans Rudolf Zeller (1639-1700) and Hans Hei...,3489940,Jamaica,Kingston,44.22976,-76.48098,Zurich,2657900.0,Zürich,47.36667,8.55
57,Caspar Landolt von Oehrlingen,,Zurich,Jamaica,100.0,50.0,20.0,=> Caspar Landolt von Oehrlingen from the Cant...,3489940,Jamaica,Kingston,44.22976,-76.48098,Zurich,2657900.0,Zürich,47.36667,8.55
136,Robert Raillard,1660–1691,Ble,Jamaica,100.0,100.0,100.0,=> One Robert Raillard (1660–1691)‏‎ from Bâle...,3489940,Jamaica,Kingston,44.22976,-76.48098,,2661552.0,Bern,,
137,Jeremias Mller,,Ble,Jamaica,100.0,100.0,25.0,=> Medical doctor Jeremias Müller from Bâle ow...,3489940,Jamaica,Kingston,44.22976,-76.48098,,2661552.0,Bern,,
148,Auguste Forel,1848–1931,Morges,Jamaica,100.0,100.0,20.0,=> Auguste Forel (1848–1931) from Morges in th...,3489940,Jamaica,Kingston,44.22976,-76.48098,,2661552.0,Bern,,
251,Samuel Mller,,Jamaica,Jamaica,100.0,50.0,14.285714,"=> 1719-1734, the city state of Berne as well ...",3489940,Jamaica,Kingston,44.22976,-76.48098,Jamaica,5122520.0,Jamaica,40.69149,-73.80569
252,Pierre Cottier,,Rougement,Jamaica,33.333333,100.0,33.333333,=> Pierre Cottier from Rougement (Canton of Be...,3489940,Jamaica,Kingston,44.22976,-76.48098,,2661552.0,Bern,,
