In [30]:
import pandas as pd
header_dic = {'STATE': "State FIPS code", 
                  'NAME': "State name", 
                  'POPESTIMATE2019': "7/1/2019 resident total population estimate", 
                 'NPOPCHG_2019': "Numeric change in resident total population 7/1/2018 to 7/1/2019",
                 'BIRTHS2019': 'Births in period 7/1/2018 to 6/30/2019',
                 'DEATHS2019': 'Deaths in period 7/1/2018 to 6/30/2019',
                'NATURALINC2019': 'Natural increase in period 7/1/2018 to 6/30/2019',
                 'INTERNATIONALMIG2019':'Net international migration in period 7/1/2018t o 6/30/2019',
                 'DOMESTICMIG2019': 'Net domestic migration in period 7/1/2018 to 6/30/2019',
                 'NETMIG2019':'Net migration in period 7/1/2018 to 6/30/2019',
                 'RESIDUAL2019':'Residual for period 7/1/2018 to 6/30/2019',
                 'RBIRTH2019': 'Birth rate in period 7/1/2018 to 6/30/2019',
                 'RDEATH2019': 'Death rate in period 7/1/2018 to 6/30/2019',
                 'RNATURALINC2019':'Natural increase rate in period 7/1/2018 to 6/30/2019',
                 'RINTERNATIONALMIG2019':'Net international migration rate in period 7/1/2018 to 6/30/2019',
                 'RDOMESTICMIG2019': 'Net domestic migration rate in period 7/1/2018 to 6/30/2019',
                 'RNETMIG2019': 'Net migration rate in period 7/1/2018 to 6/30/2019'}

def loadCSV_new(fileName):
    header = header_dic.keys()
    
    data = pd.read_csv(fileName)
    data['STATE']= data.STATE.map("{:02}".format)  ## convert the STATE to 2-digit FIPS
    data = data[header]

    census_stat = [list(row) for row in data.values]
    return census_stat

In [31]:
from rdflib.namespace import CSVW, DC, DCAT, DCTERMS, DOAP, FOAF, ODRL2, ORG, OWL, \
                           PROF, PROV, RDF, RDFS, SDO, SH, SKOS, SOSA, SSN, TIME, \
                           VOID, XMLNS, XSD
from rdflib import Namespace
from rdflib import Graph
from rdflib import URIRef, BNode, Literal
#from datetime import datetime
import datetime as datetime
import os
from glob import glob
import sys

SOSA = SSN
covid = Namespace("http://covid.geog.ucsb.edu/lod/ontology/")
covid_research = Namespace("http://covid.geog.ucsb.edu/lod/research/")
covid_forecast = Namespace("http://covid.geog.ucsb.edu/lod/prediction/")
covid_target = Namespace("http://covid.geog.ucsb.edu/lod/target/")
covid_instant = Namespace("http://covid.geog.ucsb.edu/lod/instant/")
covid_place =Namespace("http://covid.geog.ucsb.edu/lod/place/")
covid_target_type = Namespace("http://covid.geog.ucsb.edu/lod/target-type/")

covid_model = Namespace("http://covid.geog.ucsb.edu/lod/model/")
covid_method = Namespace("http://covid.geog.ucsb.edu/lod/method/")
covid_method_type = Namespace("http://covid.geog.ucsb.edu/lod/methodtype/")
covid_assumption_type = Namespace("http://covid.geog.ucsb.edu/lod/assumptiontype/")
covid_obs_property = Namespace("http://covid.geog.ucsb.edu/lod/observedproperty/")
covid_assumption = Namespace("http://covid.geog.ucsb.edu/lod/assumption/")

#covid_assumption_social_distancing = Namespace("http://covid.geog.ucsb.edu/lod/assumption-social-distancing/")
#covid_assumption_hospitalization_rate = Namespace("http://covid.geog.ucsb.edu/lod/assumption-hospitalization-rate/")
#covid_contributor = Namespace("http://covid.geog.ucsb.edu/lod/contributor/")
covid_owner = Namespace("http://covid.geog.ucsb.edu/lod/owner/") 
covid_license = Namespace("http://covid.geog.ucsb.edu/lod/license/")
covid_modelDesignation = Namespace("http://covid.geog.ucsb.edu/lod/modelDesignation/")
covid_fundingResource = Namespace("http://covid.geog.ucsb.edu/lod/fundingResource/")
covid_groundTruth = Namespace("http://covid.geog.ucsb.edu/lod/groundTruth/")
    #covid_organization = Namespace("http://covid.geog.ucsb.edu/lod/organization/") 
GEO = Namespace("http://www.opengis.net/ont/geosparql#")

WD = Namespace("http://www.wikidata.org/entity/")
WDT = Namespace("http://www.wikidata.org/prop/direct/")


In [32]:
def triplify_obs_instances(obs_property, point_value, graph, subject):
    graph.add((subject, SOSA.observedProperty, census_obs_property[obs_property]))
    graph.add((subject, covid['point'], Literal(float(point_value)))) ### the predicate can be sosa:hasSimpleResult

def triplify_obs_collection(place, time, members, graph, subject):
    graph.add((subject, RDF.type, census['StatsCollection']))
    graph.add((subject, SOSA.hasFeatureOfInterest, covid_place[place]))
    graph.add((subject, SOSA.phenomenonTime, covid_instant[time]))

    for member in members:
        graph.add((subject, SOSA.hasMember, member))

def triplify_obs_property(obs_property, label, graph):
    subject = census_obs_property[obs_property]
    graph.add((subject, RDF.type, SOSA.ObservableProperty))
    graph.add((subject, RDFS.label, Literal(label)))

In [38]:
census_data = loadCSV_new('/Users/rui/Desktop/NSF-RAPID/OW-Analysis/US Census/nst-est2019-alldata.csv')

In [46]:
covid = Namespace("http://covid.geog.ucsb.edu/lod/ontology/")
covid_instant = Namespace("http://covid.geog.ucsb.edu/lod/instant/")
covid_place =Namespace("http://covid.geog.ucsb.edu/lod/place/")
census = Namespace("http://econ.geog.ucsb.edu/lod/census/")
census_obs_property = Namespace("http://covid.geog.ucsb.edu/lod/census_obs_property/") 
census_stat = Namespace("http://covid.geog.ucsb.edu/lod/census_stat/")
census_stat_collection = Namespace("http://covid.geog.ucsb.edu/lod/census_stat_collection/") 

census_g = Graph()

census_g.bind('covid', covid)
census_g.bind('covid-place', covid_place)
census_g.bind('covid-instant', covid_instant)
census_g.bind('census-obs-property', census_obs_property)
census_g.bind('census-obs', census_stat)
census_g.bind('census-obs-collection',census_stat_collection)
census_g.bind('sosa', SOSA)
census_g.bind('census', census)



for item in census_data:
    if item[0]!="00":
        state_fips = item[0]
        state_name = item[1]
        state_pop_est = item[2]
        state_num_pop_change = item[3]
        state_birth = item[4]
        state_death = item[5]
        state_natural_inc = item[6]
        state_intel_migration = item[7]
        state_domestic_migration = item[8]
        state_migration = item[9]
        state_residual = item[10]
        state_birth_rate = item[11]
        state_death_rate = item[12]
        state_natural_increate_rate = item[13]
        state_intel_migration_rate = item[14]
        state_domestic_migration_rate = item[15]
        state_migration_rate = item[16]

        census_coll_str = "census-stat-collection-"+state_fips
        pop_est_str = "census-stat-pop-est-"+state_fips
        num_pop_change_str = 'census-stat-pop-change-'+state_fips
        birth_str = 'census-stat-birth-'+state_fips
        death_str = 'census-stat-death-'+state_fips
        natural_inc_str = 'census-stat-natural-inc-'+state_fips
        intel_migration_str = 'census-stat-intel-migration-'+state_fips
        domestic_migration_str = 'census-stat-domestic-migration-'+state_fips
        migration_str = 'census-stat-net-migration-'+state_fips
        residual_str = 'census-stat-residual-'+state_fips
        birth_rate_str = 'census-stat-birth-rate-'+state_fips
        death_rate_str = 'census-stat-death-rate-'+state_fips
        natural_inc_rate_str = 'census-stat-natural-inc-rate-'+state_fips
        intel_migration_rate_str = 'census-stat-intel-migration-rate-'+state_fips
        domestic_migration_rate_str = 'census-stat-domestic-migration-rate-'+state_fips
        migration_rate_str = 'census-stat-net-migration-rate-'+state_fips

        ## observation
        census_stat_collection_obj = census_stat_collection[census_coll_str]
        pop_est_obj = census_stat[pop_est_str]
        num_pop_change_obj = census_stat[num_pop_change_str]
        birth_obj = census_stat[birth_str]
        death_obj = census_stat[death_str]
        natural_inc_obj = census_stat[natural_inc_str]
        intel_migration_obj = census_stat[intel_migration_str]
        domestic_migration_obj = census_stat[domestic_migration_str]
        migration_obj = census_stat[migration_str]
        residual_obj = census_stat[residual_str]
        birth_rate_obj = census_stat[birth_rate_str]
        death_reate_obj = census_stat[death_rate_str]
        natural_inc_rate_obj = census_stat[natural_inc_rate_str]
        intel_migration_rate_obj = census_stat[intel_migration_rate_str]
        domestic_migration_rate_obj = census_stat[domestic_migration_rate_str]
        migration_rate_obj = census_stat[migration_rate_str]

        members = [pop_est_obj, num_pop_change_obj, birth_obj, death_obj, natural_inc_obj, intel_migration_obj,
                  domestic_migration_obj, migration_obj, residual_obj, birth_rate_obj, death_reate_obj,
                  natural_inc_rate_obj, intel_migration_rate_obj, domestic_migration_rate_obj, migration_rate_obj]

        ## time 
        census_stat_collection_time = covid_instant['2019']

        ## place 
        census_stat_collection_place = covid_place[state_fips]

        census_g.add((census_stat_collection_obj, RDF.type, census['StatsCollection']))
        census_g.add((census_stat_collection_obj, SOSA.hasFeatureOfInterest, covid_place[state_fips]))
        census_g.add((census_stat_collection_obj, SOSA.phenomenonTime, covid_instant['2019']))
        census_g.add((census_stat_collection_obj, SOSA.madeBySensor, census['CensusBureau']))
        census_stat_collection_label = "State level census statistics made in 2019 by the U.S. Census Bureau at %s"%(state_name)
        census_g.add((census_stat_collection_obj, RDFS.label, Literal(census_stat_collection_label)))

        for member in members:
            census_g.add((census_stat_collection_obj, SOSA.hasMember, member))

        triplify_obs_instances('POPESTIMATE2019', state_pop_est, census_g, pop_est_obj)
        triplify_obs_instances('NPOPCHG_2019', state_num_pop_change, census_g, num_pop_change_obj)
        triplify_obs_instances('BIRTHS2019', state_birth, census_g, birth_obj)
        triplify_obs_instances('DEATHS2019', state_death, census_g, death_obj)
        triplify_obs_instances('NATURALINC2019', state_natural_inc, census_g, natural_inc_obj)
        triplify_obs_instances('INTERNATIONALMIG2019', state_intel_migration, census_g, intel_migration_obj)
        triplify_obs_instances('DOMESTICMIG2019', state_domestic_migration, census_g, domestic_migration_obj)
        triplify_obs_instances('NETMIG2019', state_migration, census_g, migration_obj)
        triplify_obs_instances('RESIDUAL2019', state_residual, census_g, residual_obj)
        triplify_obs_instances('RBIRTH2019', state_birth_rate, census_g, birth_rate_obj)
        triplify_obs_instances('RDEATH2019', state_death_rate, census_g, death_reate_obj)
        triplify_obs_instances('RNATURALINC2019', state_natural_increate_rate, census_g, natural_inc_rate_obj)
        triplify_obs_instances('RINTERNATIONALMIG2019', state_intel_migration_rate, census_g, intel_migration_rate_obj)
        triplify_obs_instances('RDOMESTICMIG2019', state_domestic_migration_rate, census_g, domestic_migration_rate_obj)
        triplify_obs_instances('RNETMIG2019', state_migration_rate, census_g, migration_rate_obj)

for key in header_dic:
    if key not in ['STATE', 'NAME']:
        triplify_obs_property(key, header_dic[key], census_g)
census_g.serialize(destination="census2019.ttl", format='turtle')