## Header 
Author : Amina Matt and Yichen Wang  
Date created : 20.12.2021  
Date last modified : 20.12.2021  
Python version : 3.8  
Description : Text processing of the CARICOM Compilation Archive (CCA) https://louverture.ch/cca/ 

### Librairies

In [33]:
# -*- coding: utf-8 -*-
import pandas as pd
import json
import math #for isnan
from pandas.io.json import json_normalize

### Initialization 

In [34]:
#PATHS
DATA_FOLDER = './data/'
caricom_sample = DATA_FOLDER +'Caricom_Archive_Sample_Schema1.txt'
caricom = DATA_FOLDER +'Caricom_Archive.txt'

### Load 

In [35]:
df = pd.read_pickle("./caricom_with_geoid.pkl")
#df.iloc[60:80]

In [36]:
df.activities

0                                 [slave owner]
1                                          None
2               [plantation owner, slave owner]
3                                 [slave owner]
4      [trading, slave owner, plantation owner]
                         ...                   
320                               [slave owner]
321                          [racist, military]
322                               [slave owner]
323                                        None
324                                  [military]
Name: activities, Length: 325, dtype: object

### Functions

In [37]:
def add_coordinates(col_lat,col_lon,or_lat,or_lon):
    '''
    Describe: function that create a geojson with data from dataframe
    '''
    geojson_structure['geometry']['coordinates'] =  [[col_lat, col_lon], [or_lat, or_lon]]
    return geojson_structure

In [38]:
def add_person(person_name):
    '''
    Describe: function that create a geojson with data from dataframe
    '''
    geojson_structure['properties']['person'] =  person_name
    return geojson_structure

In [39]:
df.head(1)

Unnamed: 0,person,date,origin,colonial_Location,confidence_date,confidence_person,confidence_origin,activities,whole_entry,col_loc_geonameid,col_loc_geo_name,col_capital,col_latitude,col_longitude,origin_as_found,origin_loc_geonameid,origin_geo_name,origin_latitude,origin_longitude
0,Arthur Thellusson,,Geneva,Antigua and Barbuda,100.0,25.0,33.333333,[slave owner],"=> Arthur Thellusson, son of Lord Rendlesham a...",3576396,Antigua and Barbuda,St. John's,17.12096,-61.84329,Geneva,2660650.0,Genève,46.20222,6.14569


### Dataframe processing

In [40]:
# Create a list in which new geojson are added for each dataframe entry

# empty list
geojson_with_coo_list = []

# loop on dataframe
for i in range(len(df)):
    entry = df.iloc[i]
    
    #define structure 
    geojson_structure ={"type": "Feature",
     "properties": {
         "person": '',
         "date": '',
         "origin": '',
         "colonial_location":'',
         "activities":'',
         "full_entry":'',
     },
     "geometry":
         { "type": "LineString", 
          "coordinates": []
         }
    }
    # get entry values
    or_lat = entry['origin_latitude']
    or_lon = entry['origin_longitude']
    col_lat = entry['col_latitude']
    col_lon = entry['col_longitude']
    person_name = entry['person']
    
    # no lines if NaN values
    if math.isnan(or_lat) or  math.isnan(or_lon) or  math.isnan(col_lat) or  math.isnan(col_lon) :
        continue
    else :   
        # create geojson with coordinates
        geojson_with_coo = add_coordinates(or_lon,or_lat,col_lon,col_lat)
        geojson_with_coo = add_person(person_name)
        geojson_structure['properties']['date'] =  entry['date']
        geojson_structure['properties']['origin'] =  entry['origin']
        geojson_structure['properties']['activities'] =  entry['activities']
        geojson_structure['properties']['full_entry'] =  entry['whole_entry']
        geojson_structure['properties']['colonial_location'] =  entry['colonial_Location']

        # add to list
        geojson_with_coo_list.append(geojson_with_coo)

In [41]:
geojson_with_coo_list

[{'type': 'Feature',
  'properties': {'person': 'Arthur Thellusson',
   'date': None,
   'origin': 'Geneva',
   'colonial_location': 'Antigua and Barbuda',
   'activities': ['slave owner'],
   'full_entry': '=> Arthur Thellusson, son of Lord Rendlesham and grandson of Peter Thellusson, born into a Geneva banking family, who had bought the original Brodsworth Hall estate in South Yorkshire (GB) in 1790, married the daughter of Antigua slave owner Sir Christopher Bethell-Codrington. '},
  'geometry': {'type': 'LineString',
   'coordinates': [[6.14569, 46.20222], [-61.84329, 17.12096]]}},
 {'type': 'Feature',
  'properties': {'person': 'Jamaica Kincaid',
   'date': None,
   'origin': 'Geneva',
   'colonial_location': 'Antigua and Barbuda',
   'activities': None,
   'full_entry': '=> In her book A Small Place (1988), the Antiguan writer Jamaica Kincaid indicts the Antiguan government, the tourist industry and Antigua’s British colonial legacy by saying: '},
  'geometry': {'type': 'LineStri

### JSON for the collection

In [42]:
overall_json = {"type": "FeatureCollection","features": []}
overall_json['features']= geojson_with_coo_list
#overall_json

### Dump GeoJSON

In [43]:
a_file = open("lines.json", "w")
a_file = json.dump(overall_json, a_file) 

## Debugging

In [25]:
df.colonial_Location.isnull().sum()

0

In [238]:
df['origin_geo_name'].unique()

array(['Genève', 'Zürich', 'Bern', 'Sankt Gallen', 'Schaffhausen',
       'Basel', 'Vevey', 'Lausanne', 'Aarau', 'Luzern',
       'Yverdon-les-Bains', 'Winterthur', 'Biel/Bienne', 'Fribourg',
       'Chur', 'Zug'], dtype=object)

In [223]:
col_loc_geo_name

KeyError: 'geo_origin'

In [196]:
df[df['col_capital']=='Kingston']['col']

Unnamed: 0,person,date,origin,colonial_Location,confidence_date,confidence_person,confidence_origin,whole_entry,col_loc_geonameid,col_loc_geo_name,col_capital,col_latitude,col_longitude,origin_as_found,origin_loc_geonameid,origin_geo_name,origin_latitude,origin_longitude
21,Johann Bernhard Rtzer,born,,Jamaica,33.333333,33.333333,50.0,=> Johann Bernhard Rätzer (born 1726) was a go...,3489940,Jamaica,Kingston,44.22976,-76.48098,,2661552.0,Bern,,
22,John Campbell,1796–1862,,Jamaica,100.0,100.0,25.0,"=> John Campbell, 2nd Marquess of Breadalbane ...",3489940,Jamaica,Kingston,44.22976,-76.48098,,2661552.0,Bern,,
23,George Thompson,1804-1878,,Jamaica,100.0,100.0,100.0,=> English abolitionist George Thompson (1804-...,3489940,Jamaica,Kingston,44.22976,-76.48098,,2661552.0,Bern,,
56,Hans Rudolf Zeller,1639-1700,Zurich,Jamaica,100.0,33.333333,50.0,=> Hans Rudolf Zeller (1639-1700) and Hans Hei...,3489940,Jamaica,Kingston,44.22976,-76.48098,Zurich,2657900.0,Zürich,47.36667,8.55
57,Caspar Landolt von Oehrlingen,,Zurich,Jamaica,100.0,50.0,20.0,=> Caspar Landolt von Oehrlingen from the Cant...,3489940,Jamaica,Kingston,44.22976,-76.48098,Zurich,2657900.0,Zürich,47.36667,8.55
136,Robert Raillard,1660–1691,Ble,Jamaica,100.0,100.0,100.0,=> One Robert Raillard (1660–1691)‏‎ from Bâle...,3489940,Jamaica,Kingston,44.22976,-76.48098,,2661552.0,Bern,,
137,Jeremias Mller,,Ble,Jamaica,100.0,100.0,25.0,=> Medical doctor Jeremias Müller from Bâle ow...,3489940,Jamaica,Kingston,44.22976,-76.48098,,2661552.0,Bern,,
148,Auguste Forel,1848–1931,Morges,Jamaica,100.0,100.0,20.0,=> Auguste Forel (1848–1931) from Morges in th...,3489940,Jamaica,Kingston,44.22976,-76.48098,,2661552.0,Bern,,
251,Samuel Mller,,Jamaica,Jamaica,100.0,50.0,14.285714,"=> 1719-1734, the city state of Berne as well ...",3489940,Jamaica,Kingston,44.22976,-76.48098,Jamaica,5122520.0,Jamaica,40.69149,-73.80569
252,Pierre Cottier,,Rougement,Jamaica,33.333333,100.0,33.333333,=> Pierre Cottier from Rougement (Canton of Be...,3489940,Jamaica,Kingston,44.22976,-76.48098,,2661552.0,Bern,,
