In [229]:
# import libraries
from rdflib import Graph,Literal,RDF,URIRef
from rdflib.namespace import FOAF,XSD,RDFS
from rdflib import Namespace
import pandas as pd
import random

In [57]:
g = Graph()

In [58]:
KG_SDM = Namespace("http://kg_sdm.org/")
g.bind("kg_sdm",KG_SDM) # the uri will be repliced

## Adding person sublasses

In [59]:
# Author subclass Person
# Author: school
# Person: name
# Create a table of names and schools which will be later sliced to Author, Editor etc.

In [60]:
people_names = pd.read_csv('data_kg_sdm/authors.csv')
people_names = people_names.drop(columns='ID')
people_names.rename(columns = {'name':'person_name'}, inplace=True)

In [61]:
people_names

Unnamed: 0,person_name
0,Gautam A.
1,Crandall J.W.
2,Goodrich M.A.
3,de Moura Oliveira P.B.
4,Hedengren J.D.
...,...
2167,Allen B.L.
2168,Anapalli S.
2169,Blackshaw R.E.
2170,Lyon D.J.


In [62]:
schools = pd.read_csv('data_kg_sdm/schools.csv')
schools = schools.drop(columns='ID')
schools.rename(columns = {'name':'school_name'}, inplace=True)

In [63]:
academics = people_names.loc[:schools.size-1]
academics['school_name'] = schools

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  academics['school_name'] = schools


In [64]:
academics

Unnamed: 0,person_name,school_name
0,Gautam A.,"Computer Science Department, Brigham Young Uni..."
1,Crandall J.W.,"INESC-TEC Technology and Science, Campus da FE..."
2,Goodrich M.A.,"Department of Engineering, University of Trás..."
3,de Moura Oliveira P.B.,"Department of Chemical Engineering, Brigham Y..."
4,Hedengren J.D.,"Task Committee Secretary and Lead Engineer, Op..."
...,...,...
1456,Hedges D.M.,United States Department of Agriculture-Agric...
1457,Steffensen S.C.,United States Department of Agriculture-Agric...
1458,Puleo C.,"Agriculture and Agri-Food Canada, Lethbridge,..."
1459,Galligan C.,"Department of Crop and Soil Sciences, Washing..."


In [65]:
# We suppose 60% are Authors, 20% Reviewsrs, 10% Chair or Editor

In [66]:
total = len(academics)
nb_auth = int(total * 0.6)
nb_rev = int(total * 0.2)
nb_chair_editor = int(total * 0.1)

In [67]:
authors = academics.loc[:nb_auth]
reviewers = academics.loc[nb_auth : nb_auth + nb_rev]
chair = academics.loc[nb_auth + nb_rev : nb_auth + nb_rev + nb_chair_editor ]
editor = academics.loc[ nb_auth + nb_rev + nb_chair_editor :nb_auth + nb_rev + nb_chair_editor + nb_chair_editor]

In [68]:
editor.head()

Unnamed: 0,person_name,school_name
1314,Choudhury D.,"Department of Architecture, Building and Plan..."
1315,Chanda A.,"ISISE, University of Minho, Department of Civ..."
1316,Ruchti T.,"Utah State University, ENGR 419J, Logan, UT 8..."
1317,Upchurch W.,"Naval Undersea Warfare Center, Newport, RI, U..."
1318,Lee J.H.,"Pennsylvania State University, State College,..."


In [77]:
import string

In [225]:
def URLparse(url:str):
    url=url.replace("\'","_").replace("\"","_")
    for i in string.punctuation:
        url = url.replace(i,"_")
    url = url.replace(" ","_")
    return url

In [70]:
person_type_list = [authors,reviewers,chair,editor]
person_sub_type_ls = ["Author","Reviewers","Chair","Editor"]

In [71]:
Person = URIRef("http://kg_sdm.org/Person")

for i,person_tp in enumerate(person_type_list):
    preson_sub_type = URIRef(f"http://kg_sdm.org/{person_sub_type_ls[i]}")
    
    for name, school in zip(person_tp['person_name'], person_tp['school_name']):
    #   editor_node = URIRef(f"http://kg_sdm.org/{URLparse(name)}")
        parsed_name = URLparse(name)
        person_node = URIRef(f"http://kg_sdm.org/{parsed_name}")
        name_lit = Literal(str(name))
        school_lit = Literal(str(school))

        # add subclass type
        g.add((person_node, RDF.type, preson_sub_type))
        # add school
        g.add((person_node, KG_SDM.school, school_lit))
        # add name of person
        g.add((person_node, FOAF.name, name_lit))

## Adding paper submitted in Venue 

In [207]:
articles = pd.read_csv('data_kg_sdm/articles.csv')
articles = articles.loc[:len(authors)-1]
articles

Unnamed: 0,ID,title,volume,DOI
0,1,Self-assessment of Proficiency of Intelligent ...,1210 AISC,10.1007/978-3-030-51758-8_15
1,2,Bridging theory to practice: Feedforward and c...,695 LNEE,10.1007/978-3-030-58653-9_3
2,3,Development of sediment management guidelines ...,146,10.1061/(ASCE)HY.1943-7900.0001822
3,4,Structural design space exploration using prin...,20,10.1115/1.4047428
4,5,Religion-focused dating apps: A Q methodology ...,55,10.1016/j.tele.2020.101448
...,...,...,...,...
872,873,Highly compressible origami bellows for microg...,,10.2514/6.2017-5341
873,874,Interspecific hybridization for upland cotton ...,,10.1201/9781315369259
874,875,Thermophysical properties of thin fibers via p...,112,10.1016/j.ijheatmasstransfer.2017.05.046
875,876,Formability of magnesium alloy AZ31B from room...,Part F8,10.1007/978-3-319-52392-7_91


In [208]:
publications = pd.read_csv('data_kg_sdm/publications.csv')
publications

Unnamed: 0,ID,name,Type
0,1,Advances in Intelligent Systems and Computing,Conference Paper
1,2,Lecture Notes in Electrical Engineering,Conference Paper
2,3,Journal of Hydraulic Engineering,Journal
3,4,Journal of Computing and Information Science i...,Journal
4,5,Telematics and Informatics,Conference Paper
...,...,...,...
399,400,CAD Computer Aided Design,Conference Paper
400,401,International Symposium on Advances in Computa...,Conference Paper
401,402,AIAA SPACE and Astronautics Forum and Expositi...,Conference Paper
402,403,IEEE Transactions on Control of Network Systems,Conference Paper


In [209]:
# import random
# article_publisher_link = pd.read_csv('data_kg_sdm/article_published_by.csv')
# random.seed(0)
# publisher_ID = [random.randint(1, 404) for i in range(len(article_publisher_link))]
# publisher_ID
# article_publisher_link['publisher_ID'] = publisher_ID
# # article_publisher_link.drop(columns=article_publisher_link.columns[0], axis=1, inplace=True)
# article_publisher_link.to_csv('data_kg_sdm/article_published_by.csv',index=False)

In [211]:
article_publisher_link = pd.read_csv('data_kg_sdm/article_published_by.csv')
article_publisher_link

Unnamed: 0,article_ID,publisher_ID,year
0,1,198,2020
1,2,389,2020
2,3,216,2020
3,4,21,2018
4,5,133,2018
...,...,...,...
875,876,360,2017
876,877,229,2015
877,878,205,2015
878,879,95,2016


In [212]:
article_pub = pd.merge(articles, article_publisher_link, left_on='ID', right_on='article_ID')
articles_publishedin = pd.merge(article_pub, publications, left_on='publisher_ID', right_on='ID',how='left')
articles_publishedin

Unnamed: 0,ID_x,title,volume,DOI,article_ID,publisher_ID,year,ID_y,name,Type
0,1,Self-assessment of Proficiency of Intelligent ...,1210 AISC,10.1007/978-3-030-51758-8_15,1,198,2020,198,International Conference on Transportation and...,Conference Paper
1,2,Bridging theory to practice: Feedforward and c...,695 LNEE,10.1007/978-3-030-58653-9_3,2,389,2020,389,AAAI Workshop - Technical Report,Conference Paper
2,3,Development of sediment management guidelines ...,146,10.1061/(ASCE)HY.1943-7900.0001822,3,216,2020,216,Advances in Heat Transfer,Conference Paper
3,4,Structural design space exploration using prin...,20,10.1115/1.4047428,4,21,2018,21,Proceedings - 2020 IEEE 21st International Con...,Conference Paper
4,5,Religion-focused dating apps: A Q methodology ...,55,10.1016/j.tele.2020.101448,5,133,2018,133,IEEE Radiation Effects Data Workshop,Conference Paper
...,...,...,...,...,...,...,...,...,...,...
872,873,Highly compressible origami bellows for microg...,,10.2514/6.2017-5341,873,311,2016,311,Proceedings of the 20th International Conferen...,Conference Paper
873,874,Interspecific hybridization for upland cotton ...,,10.1201/9781315369259,874,353,2015,353,International Journal of Human Computer Studies,Journal
874,875,Thermophysical properties of thin fibers via p...,112,10.1016/j.ijheatmasstransfer.2017.05.046,875,79,2016,79,Proceedings - Annual Reliability and Maintaina...,Conference Paper
875,876,Formability of magnesium alloy AZ31B from room...,Part F8,10.1007/978-3-319-52392-7_91,876,360,2017,360,Proceedings - 2017 IEEE 17th International Con...,Conference Paper


In [214]:
total = len(articles_publishedin)
nb_joun = int(total * 0.2)
nb_workshop = int(total * 0.3)
nb_symposium = int(total * 0.5)

In [215]:
journals = articles_publishedin.loc[:nb_joun]
workshops = articles_publishedin.loc[nb_joun : nb_joun + nb_workshop]
symposium = articles_publishedin.loc[nb_joun + nb_workshop : ]

In [221]:
journals

Unnamed: 0,ID_x,title,volume,DOI,article_ID,publisher_ID,year,ID_y,name,Type
0,1,Self-assessment of Proficiency of Intelligent ...,1210 AISC,10.1007/978-3-030-51758-8_15,1,198,2020,198,International Conference on Transportation and...,Conference Paper
1,2,Bridging theory to practice: Feedforward and c...,695 LNEE,10.1007/978-3-030-58653-9_3,2,389,2020,389,AAAI Workshop - Technical Report,Conference Paper
2,3,Development of sediment management guidelines ...,146,10.1061/(ASCE)HY.1943-7900.0001822,3,216,2020,216,Advances in Heat Transfer,Conference Paper
3,4,Structural design space exploration using prin...,20,10.1115/1.4047428,4,21,2018,21,Proceedings - 2020 IEEE 21st International Con...,Conference Paper
4,5,Religion-focused dating apps: A Q methodology ...,55,10.1016/j.tele.2020.101448,5,133,2018,133,IEEE Radiation Effects Data Workshop,Conference Paper
...,...,...,...,...,...,...,...,...,...,...
171,172,Free Space Excitation in Optofluidic Devices f...,,10.1109/IPCon.2019.8908404,172,326,2019,326,Proceedings of the International Conference on...,Conference Paper
172,173,Multiplexed Detection of Single Antibiotic Dru...,,10.1109/IPCon.2019.8908388,173,97,2017,97,IEEE Aerospace and Electronic Systems Magazine,Conference Paper
173,174,Three-Dimensional Hydrodynamic Focusing Design...,,10.1109/IPCon.2019.8908414,174,311,2018,311,Proceedings of the 20th International Conferen...,Conference Paper
174,175,Transparency: Transitioning From Human–Machine...,13,10.1177/1555343419842776,175,295,2017,295,Fire Safety Journal,Journal


In [220]:
venue_type_list = [journals,workshops,symposium]
venue_sub_type_ls = ["Journal","Workshop","Symposium"]

In [226]:
for i,venue_tp in enumerate(venue_type_list):

    venue_type = venue_sub_type_ls[i]
    venue_sub_type = URIRef(f"http://kg_sdm.org/{venue_type}")

    for index,row in venue_tp.iterrows():
        year = row['year']
        year_literal = Literal(int(year))
        submissiontitle = row['title']
        confname = row['name']
        
        # parsing conference
        conf_title = URLparse(confname)
        conf_node = URIRef(f"http://kg_sdm.org/venue/{conf_title}")
        venue_lit = Literal(str(conf_title))

        # parsing submission
        sub_title = URLparse(submissiontitle)
        sub_node = URIRef(f"http://kg_sdm.org/Submission/{sub_title}")

        
        # parsing for publication
        # assuming only half of the articles got accepted
        if index%2 == 0:
            if venue_type=='Journal':
                Pub_title = conf_title+'_volume_'+str(random.randint(1, 5))
                Pub_title_lit = Literal(str(Pub_title))
                
                pub_node = URIRef(f"http://kg_sdm.org/Publication/{Publication_title}")
                g.add(pub_node, RDF.type, KG_SDM.Volume)
                g.add(sub_node,KG_SDM.published_in,pub_node)
                g.add(pub_node, KG_SDM.publication_title,Pub_title_lit)
                g.add(pub_node, KG_SDM.publication_year,year_literal)

            # if (venue_type=='Workshop' or venue_type=='Symposium')
            else:
                Pub_title = conf_title+'_proceddings_'
                Pub_title_lit = Literal(str(Pub_title))
                
                pub_node = URIRef(f"http://kg_sdm.org/Publication/{Publication_title}")
                g.add(pub_node, RDF.type, KG_SDM.Proceddings)
                g.add(sub_node,KG_SDM.published_in,pub_node)
                g.add(pub_node, KG_SDM.publication_title,Pub_title_lit)
                g.add(pub_node, KG_SDM.publication_year,year_literal)



        # add subclass type
        g.add((conf_node, RDF.type, venue_sub_type))
        # add name of venue
        g.add((conf_node, KG_SDM.venue_title, venue_lit))

        # connect conference and submission
        g.add((sub_node,KG_SDM.submitted_to,conf_node))

In [227]:
print(g.serialize('abox.ttl', format="ttl"))

[a rdfg:Graph;rdflib:storage [a rdflib:Store;rdfs:label 'Memory']].


In [228]:
print(g.serialize())

@prefix foaf: <http://xmlns.com/foaf/0.1/> .
@prefix kg_sdm: <http://kg_sdm.org/> .

kg_sdm:Aarsnes_U_J_ a kg_sdm:Chair,
        kg_sdm:Reviewers ;
    kg_sdm:school " Cardiff University Brain Research Imaging Centre (CUBRIC), Cardiff University, Cardiff, United Kingdom" ;
    foaf:name "Aarsnes U.J." .

kg_sdm:Abaimov_S_ a kg_sdm:Author ;
    kg_sdm:school " Department of Biology, Utah Valley University, SB 242M, Orem, UT  84058, United States" ;
    foaf:name "Abaimov S." .

kg_sdm:Abildskov_T_J_ a kg_sdm:Editor ;
    kg_sdm:school " Department of Physics and Astronomy, KU Leuven, Celestijnenlaan 200D, Heverlee, B-3001, Belgium" ;
    foaf:name "Abildskov T.J." .

kg_sdm:Abliz_D_ a kg_sdm:Author ;
    kg_sdm:school " Department of Instructional Psychology and Technology, Brigham Young University, 150-H MCKB, Provo, UT  84602, United States" ;
    foaf:name "Abliz D." .

kg_sdm:Abut_F_ a kg_sdm:Reviewers ;
    kg_sdm:school " Marriott School of Management, Brigham Young Univ., 730 TNR