In [13]:
# Helper functions to convert latlng
# from https://stackoverflow.com/questions/33997361/how-to-convert-degree-minute-second-to-degree-decimal

import re

def dms2dd(direction, degrees, minutes, seconds):
    dd = float(degrees) + float(minutes)/60 + float(seconds)/(60*60);
    if direction == 'W' or direction == 'S':
        dd *= -1
    return dd;

def dd2dms(deg):
    d = int(deg)
    md = abs(deg - d) * 60
    m = int(md)
    sd = (md - m) * 60
    return [d, m, sd]

def parse_dms(dms):
    parts = re.split('[º°\'",.]+', dms)
    lat = dms2dd(parts[0], parts[1], parts[2], parts[3])

    return (lat)

In [14]:
import csv, re

def parse_csv (infile, outfile):

    with open(infile) as csvfile:
        reader = csv.reader(csvfile)

        next(reader, None)

        outlist = []

        for row in reader:
            if row[0].isnumeric():
                buffer = {
                    "name_indo": "",
                    "name_hanzi": "",
                    "name_pinyin": "",
                    "neighbourhood": "",
                    "gps_lat": "",
                    "gps_lon": "",
                    "address": "",
                    "ref_no": ""
                }
                
                buffer["name_indo"] = row[1].strip()
                buffer["name_hanzi"] = row[2].strip()
                lat = row[3].strip() + "," + row[4].strip() + "." + row[5].strip()
                lat = lat.replace(" ", ",")
                lat = lat.replace(",,", ",")

                buffer["gps_lat"] = parse_dms(lat)

                address = row[6]
                
                buffer["ref_no"] = row[7]
            else:
                buffer["neighbourhood"] = row[1].strip()
                buffer["name_pinyin"] = row[2].strip()
                
                lon = row[3].strip() + "," + row[4].strip() + "." + row[5].strip()
                lon = lon.replace(" ", ",")
                lon = lon.replace(",,", ",")

                buffer["gps_lon"] = parse_dms(lon)

                address = address.strip() + ", "+ row[6].strip()
                buffer["address"] = address

                outlist.append(buffer)

        keys = outlist[0].keys()
        with open(outfile, 'w', newline='', encoding='utf-8')  as output_file:
            dict_writer = csv.DictWriter(output_file, keys)
            dict_writer.writeheader()
            dict_writer.writerows(outlist)

In [15]:
parse_csv("raw_barat.csv", "1_West.csv")
parse_csv("raw_tengah.csv", "1_Central.csv")
parse_csv("raw_selatan.csv", "1_South.csv")
parse_csv("raw_timur.csv", "1_East.csv")
parse_csv("raw_utara.csv", "1_North.csv")

['1', 'VIHARA TRIDHARMA BUMI RAYA', '山口洋中央伯公廟', 'N 00º', "54' 34", '28"', 'Jl. Sejahtera No.1', '1']


NameError: name 'parse_dms' is not defined

In [26]:
# Compile changes into a single csv

import csv, json

array = [
    ["1_West.csv", "west"],
    ["1_Central.csv", "central"],
    ["1_South.csv", "south"],
    ["1_East.csv", "east"],
    ["1_North.csv", "north"]
]

outlist = []

for entry in array:
    with open(entry[0]) as csvfile:
        reader = csv.reader(csvfile)
        
        idnum = 0
        
        next(reader, None)
        
        for line in reader:
            id = entry[1][0] + str(idnum)
            outlist.append({
                "name_indo": line[0],
                "name_hanzi": line[1],
                "name_pinyin": line[2],
                "neighbourhood": line[3],
                "gps_lat": line[4],
                "gps_lon": line[5],
                "address": line[6],
                "ref_no": line[7],
                "region": entry[1],
                "id": id
            })
            
            idnum += 1

outlist2 = []
for line in outlist:
    outlist2.append({
        "name": line["name_indo"] + line["neighbourhood"],
        "name_chinese": line["name_hanzi"] + " " + line["name_pinyin"],
        "id": line["id"]  ,
        "address": line["address"]
    })
print(outlist2)
    
keys = outlist[0].keys()
with open("2_combined.csv", 'w', newline='', encoding='utf-8')  as output_file:
            dict_writer = csv.DictWriter(output_file, keys)
            dict_writer.writeheader()
            dict_writer.writerows(outlist)

with open("2_combined.json", 'w', newline='', encoding='utf-8') as output_file:
    json.dump(outlist2 , output_file)

[{'name': 'VIHARA TRIDHARMA BUMI RAYAPUSAT KOTA SINGKAWANG', 'name_chinese': '山口洋中央伯公廟 Shan Kou Yang Zhong Yang Bo Gong Miao', 'id': 'w0', 'address': 'Jl. Sejahtera No.1, RT 48/06 Kelurahan Melayu'}, {'name': 'VIHARA TRIDHARMA ADI MOKKHAJL. P. DIPONEGORO', 'name_chinese': '華嶽宮/華光帝(華都街) Hua Yue Gong / Huang Guang Di', 'id': 'w1', 'address': 'Jl. P. Diponegoro No.1, RT 02/01 Kelurahan Pasiran'}, {'name': 'VIHARA TRIDHARMA KALAMA KWANTIJL. P. DIPONEGORO', 'name_chinese': '協天官/關帝(華都街) Xie Tian Gong / Guan Di', 'id': 'w2', 'address': 'Jl. P. Diponegoro No.1, RT 02/01 Kelurahan Pasiran'}, {'name': 'CETIYA FA SAN THAI CIONGJL. P. DIPONEGORO', 'name_chinese': '華山大将廟(吧西蘭) Hua Shan Da Jiang Miao', 'id': 'w3', 'address': 'Jl. P. Diponegoro Gg. Tujuh Belas, RT 56/18 Kelurahan Pasiran'}, {'name': 'VIHARA DHARMA BUDDHA MAITREYAJL. SM. TSJAFIOEDDIN', 'name_chinese': '彌勒大道慧發佛堂 Mi Le Da Dao Hui Fa Fo Tang', 'id': 'w4', 'address': 'Jl. SM. Tsjafioeddin No.32, RT 14/06 Kelurahan Melayu'}, {'name': 'CETIY

In [27]:
def csv_to_geojson(infile, outfile):

    import csv, json
    from geojson import Feature, FeatureCollection, Point

    features = []
    with open(infile, newline='') as csvfile:
        
        reader = csv.reader(csvfile, delimiter=',')
        next(reader, None)
        
        for name_indo, name_hanzi, name_pinyin, neighbourhood, gps_lat, gps_lon, address, ref_no,region, id in reader:
            latitude, longitude = map(float, (gps_lat, gps_lon))
                        
            features.append(
                Feature(
                    geometry = Point((longitude, latitude)),
                    properties = {
                        'name_indo': name_indo,
                        'name_hanzi': name_hanzi,
                        'name_pinyin': name_pinyin,
                        'neighbourhood': neighbourhood,
                        'gps_lat': gps_lat,
                        'gps_lon': gps_lon,
                        'address': address,
                        "ref_no": ref_no,
                        "region": region,
                        "id": id
                    }
                )
            )
            
        collection = FeatureCollection(features)
        
        with open(outfile, "w") as f:
            f.write('%s' % collection)

In [28]:
csv_to_geojson("2_combined.csv", "3_combined.geojson")