In [1]:
# import libraries
from rdflib import Graph,Literal,RDF,URIRef
from rdflib.namespace import FOAF,XSD,RDFS
from rdflib import Namespace
import pandas as pd
import string
import random
import numpy as np

In [2]:
g = Graph()

In [3]:
KG_SDM = Namespace("http://kg_sdm.org/")
g.bind("kg_sdm",KG_SDM) # the uri will be repliced

In [4]:
def save_rdf_file(g,filename,rdf_format='ttl'):
    g.serialize(filename+'.'+rdf_format,format= rdf_format)

# adding Persons and subclasses

In [5]:
people_names = pd.read_csv('data_kg_sdm/authors.csv')
# people_names = people_names.drop(columns='ID')
people_names.rename(columns = {'name':'person_name'}, inplace=True)
people_names.head()

Unnamed: 0,ID,person_name
0,57218202833,Gautam A.
1,7004904337,Crandall J.W.
2,7005513246,Goodrich M.A.
3,6508306234,de Moura Oliveira P.B.
4,9277159100,Hedengren J.D.


In [6]:
author_school = pd.read_csv('data_kg_sdm/author_belongs_school.csv')
author_school['author_ID'] = author_school['author_ID'].astype(int)

author_school.head()

Unnamed: 0,author_ID,org_ID
0,57218202833,1
1,7004904337,1
2,7005513246,1
3,6508306234,2
4,9277159100,3


In [7]:
schools = pd.read_csv('data_kg_sdm/schools.csv')
# schools = schools.drop(columns='ID')
schools.rename(columns = {'name':'school_name'}, inplace=True)
schools.head()

Unnamed: 0,ID,school_name
0,1,"Computer Science Department, Brigham Young Uni..."
1,2,"INESC-TEC Technology and Science, Campus da FE..."
2,3,"Department of Engineering, University of Trás..."
3,4,"Department of Chemical Engineering, Brigham Y..."
4,5,"Task Committee Secretary and Lead Engineer, Op..."


In [8]:
academics = pd.merge(schools, author_school, left_on='ID', right_on='org_ID')
academics = pd.merge(academics, people_names, left_on='author_ID', right_on='ID')
# academics.drop(columns=['ID_x', 'author_ID', 'org_ID', 'ID_y'], inplace=True)

In [9]:
# We suppose 60% are Authors, 20% Reviewsrs, 10% Chair or Editor

In [10]:
total = len(academics)
nb_auth = int(total * 0.6)
nb_rev = int(total * 0.2)
nb_chair_editor = int(total * 0.1)

In [11]:
authors = academics.loc[:nb_auth]
reviewers = academics.loc[nb_auth : nb_auth + nb_rev]
chair = academics.loc[nb_auth + nb_rev : nb_auth + nb_rev + nb_chair_editor ]
editor = academics.loc[ nb_auth + nb_rev + nb_chair_editor :nb_auth + nb_rev + nb_chair_editor + nb_chair_editor]

In [12]:
authors.head()

Unnamed: 0,ID_x,school_name,author_ID,org_ID,ID_y,person_name
0,1,"Computer Science Department, Brigham Young Uni...",57218202833,1,57218202833,Gautam A.
1,1,"Computer Science Department, Brigham Young Uni...",7004904337,1,7004904337,Crandall J.W.
2,1,"Computer Science Department, Brigham Young Uni...",7005513246,1,7005513246,Goodrich M.A.
3,1,"Computer Science Department, Brigham Young Uni...",55078951500,1,55078951500,Bodily P.M.
4,1,"Computer Science Department, Brigham Young Uni...",57215280005,1,57215280005,Jacobsen A.J.


In [13]:


def URLparse(url:str):
    url=url.replace("\'","_").replace("\"","_")
    for i in string.punctuation:
        url = url.replace(i,"_")
    url = url.replace(" ","_")
    return url



In [14]:
person_type_list = [authors,reviewers,chair,editor]
person_sub_type_ls = ["Author","Reviewers","Chair","Editor"]

In [15]:


Person = URIRef("http://kg_sdm.org/Person")

for i,person_tp in enumerate(person_type_list):
    preson_sub_type = URIRef(f"http://kg_sdm.org/{person_sub_type_ls[i]}")
    
    for name, school in zip(person_tp['person_name'], person_tp['school_name']):
    #   editor_node = URIRef(f"http://kg_sdm.org/{URLparse(name)}")
        parsed_name = URLparse(name)
        person_node = URIRef(f"http://kg_sdm.org/Person/{parsed_name}")
        name_lit = Literal(str(name))
        school_lit = Literal(str(school))

        # add subclass type
        g.add((person_node, RDF.type, preson_sub_type))
        # add school
        g.add((person_node, KG_SDM.school, school_lit))
        # add name of person
        g.add((person_node, FOAF.name, name_lit))



In [16]:
# print(g.serialize('person_links.ttl',format= rdf_format))
save_rdf_file(g,'person_links','ttl')

# Submission

In [17]:
author_article = pd.read_csv('data_kg_sdm/author_written_article.csv')
author_article['author_ID'] = author_article['author_ID'].astype(int)
author_article.head()

Unnamed: 0,author_ID,article_ID
0,57218202833,1
1,7004904337,1
2,7005513246,1
3,6508306234,2
4,9277159100,2


In [18]:
articles = pd.read_csv('data_kg_sdm/articles.csv')

In [19]:
paper_type = ['Demo','FullPaper', 'Poster', 'Short']
keywords = ['ML', 'NLP', 'Database', 'Graph']

In [20]:
articles['year'] = [ random.randint(2000,2022) for i in range(len(articles))]
articles['type'] = [random.choice(paper_type) for i in range(len(articles))]
articles['keyword'] = [random.choice(keywords) for i in range(len(articles))]
articles['accepted'] = ""
articles['accepted'].loc[:500] = True
articles['accepted'].loc[500:] = False

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  articles['accepted'].loc[:500] = True
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  articles['accepted'].loc[500:] = False


In [21]:
articles.head()

Unnamed: 0,ID,title,volume,DOI,year,type,keyword,accepted
0,1,Self-assessment of Proficiency of Intelligent ...,1210 AISC,10.1007/978-3-030-51758-8_15,2013,Poster,NLP,True
1,2,Bridging theory to practice: Feedforward and c...,695 LNEE,10.1007/978-3-030-58653-9_3,2002,Demo,Graph,True
2,3,Development of sediment management guidelines ...,146,10.1061/(ASCE)HY.1943-7900.0001822,2007,FullPaper,ML,True
3,4,Structural design space exploration using prin...,20,10.1115/1.4047428,2015,Short,NLP,True
4,5,Religion-focused dating apps: A Q methodology ...,55,10.1016/j.tele.2020.101448,2000,FullPaper,ML,True


In [22]:
# authors['person_name']

In [23]:
# article_pub

In [24]:
authors

Unnamed: 0,ID_x,school_name,author_ID,org_ID,ID_y,person_name
0,1,"Computer Science Department, Brigham Young Uni...",57218202833,1,57218202833,Gautam A.
1,1,"Computer Science Department, Brigham Young Uni...",7004904337,1,7004904337,Crandall J.W.
2,1,"Computer Science Department, Brigham Young Uni...",7005513246,1,7005513246,Goodrich M.A.
3,1,"Computer Science Department, Brigham Young Uni...",55078951500,1,55078951500,Bodily P.M.
4,1,"Computer Science Department, Brigham Young Uni...",57215280005,1,57215280005,Jacobsen A.J.
...,...,...,...,...,...,...
1299,562,"Department of Chemistry and Biochemistry, Brig...",57211500187,562,57211500187,Membreno K.
1300,563,"Department of Physics and Astronomy, Brigham ...",57209912956,563,57209912956,Avval T.G.
1301,563,"Department of Physics and Astronomy, Brigham ...",57211503118,563,57211503118,Carver V.
1302,563,"Department of Physics and Astronomy, Brigham ...",7006120837,563,7006120837,Allred D.D.


#### join articles and authors

In [25]:
article_pub = pd.merge(articles, author_article, left_on='ID', right_on='article_ID')
articles_publishedin = pd.merge(article_pub, authors, left_on='author_ID', right_on='author_ID')
articles_publishedin = articles_publishedin[['title', 'year', 'type', 'keyword', 'accepted', 'person_name', 'school_name']]
articles_publishedin.dropna(inplace=True)

# TODO: ACCEPTED

In [26]:
len(articles_publishedin)

2849

In [27]:
for a in articles.itertuples():
    print(a)
    break

Pandas(Index=0, ID=1, title='Self-assessment of Proficiency of Intelligent Systems: Challenges and Opportunities', volume='1210 AISC', DOI='10.1007/978-3-030-51758-8_15', year=2013, type='Poster', keyword='NLP', accepted=True)


In [28]:
for _, article_title, year, paper_type, keyword, accepted, author_name, school_name in articles_publishedin.itertuples():
    # get the author node
    author_node = URIRef(f"http://kg_sdm.org/Person/{URLparse(author_name)}")
    
    # create the submission onde
    submission_node = URIRef(f"http://kg_sdm.org/Submission/{URLparse(article_title)}")
    g.add((submission_node, RDF.type, KG_SDM.Submission))
    
    # author wrote a paper
    g.add((author_node, KG_SDM.writes, submission_node))
                            
    
    # data for submission
    paper_title_lit = Literal(str(article_title))
    paper_year_lit = Literal(int(year))
    g.add((submission_node, KG_SDM.paper_title, paper_title_lit))
    g.add((submission_node, KG_SDM.paper_year, paper_year_lit))
                            
    # keywords
    # create the node
    # NLP rdf type Keyword
    keyword_node = URIRef(f"http://kg_sdm.org/{keyword}")
#     g.add((keyword_node, RDF.type, KG_SDM.Keyword))
    # connect with submission
    # Submisson a NLP
    g.add((submission_node, KG_SDM.related_to, keyword_node))

    # paper type
    # create paper type
    paper_node = URIRef(f"http://kg_sdm.org/{URLparse(paper_type)}")
#     g.add((paper_node, RDF.type, KG_SDM.PaperType))
    g.add((submission_node, KG_SDM.of_type, paper_node))
    
    
    

# DecisionProcess

In [29]:
reviewers.head()

Unnamed: 0,ID_x,school_name,author_ID,org_ID,ID_y,person_name
1303,565,"Brigham Young University, Chemistry and Bioch...",57209530205,565,57209530205,Hooper K.
1304,566,"Department of Mechanical Engineering, Brigham ...",57215116223,566,57215116223,Baker N.F.
1305,566,"Department of Mechanical Engineering, Brigham ...",57188658434,566,57188658434,Thomas J.J.
1306,566,"Department of Mechanical Engineering, Brigham ...",36682113800,566,36682113800,Dykes K.
1307,567,"National Renewable Energy Laboratory, Nationa...",57191952713,567,57191952713,Stanley A.P.J.


In [30]:
reviewProcess = articles_publishedin.drop_duplicates(subset=['title'])

In [31]:
reviewProcess.head()

Unnamed: 0,title,year,type,keyword,accepted,person_name,school_name
0,Self-assessment of Proficiency of Intelligent ...,2013,Poster,NLP,True,Gautam A.,"Computer Science Department, Brigham Young Uni..."
2,Moderating operator influence in human-swarm s...,2021,FullPaper,NLP,True,Crandall J.W.,"Computer Science Department, Brigham Young Uni..."
3,Cooperating in long-term relationships with ti...,2021,FullPaper,Graph,True,Crandall J.W.,"Computer Science Department, Brigham Young Uni..."
4,Information design in crowdfunding under thres...,2005,FullPaper,NLP,False,Crandall J.W.,"Computer Science Department, Brigham Young Uni..."
7,Intent-based robotic path-replanning: When to ...,2001,Short,Database,True,Goodrich M.A.,"Computer Science Department, Brigham Young Uni..."


In [32]:
list(reviewers.sample(random.randint(2,4))['person_name'].values)

['Hartman B.', 'Durfee D.S.', 'Downing J.M.', 'Wang Y.']

In [33]:
reviews = []
comments = []
rejacc = []

# for i in range(len(decision)):
#     c=[]
#     l=[]
#     for j in range(randint(2,4)):
#         r.append(''.join(choice(string.ascii_letters))
#     c.append(reviewers.sample(len(r))['person_name'])
#     print(c,r)

for i in range(len(reviewProcess)): # for every row
    # create N reviewers and comments
    N = random.randint(2,4)
    rev_list = list(reviewers.sample(N)['person_name'])
    acc = [random.random()>0.5 for i in range(N)]
    com_list = []
    for j in range(N):
        comment = ''.join((random.choice('abcdefghijklmnopqrstuvwxyz') for i in range(5)))
        com_list.append(comment)
    reviews.append(rev_list)
    comments.append(com_list)
    rejacc.append(acc)
    
reviewProcess['reviewer_name'] = reviews
reviewProcess['comment'] = comments
reviewProcess['decision'] = rejacc

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  reviewProcess['reviewer_name'] = reviews
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  reviewProcess['comment'] = comments
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  reviewProcess['decision'] = rejacc


In [34]:
reviewProcess.head()

Unnamed: 0,title,year,type,keyword,accepted,person_name,school_name,reviewer_name,comment,decision
0,Self-assessment of Proficiency of Intelligent ...,2013,Poster,NLP,True,Gautam A.,"Computer Science Department, Brigham Young Uni...","[Slezak T.J., Whitehead N.]","[ynspl, gpflv]","[True, True]"
2,Moderating operator influence in human-swarm s...,2021,FullPaper,NLP,True,Crandall J.W.,"Computer Science Department, Brigham Young Uni...","[Bjoraker G.L., Liu W.]","[mkmus, suuge]","[False, False]"
3,Cooperating in long-term relationships with ti...,2021,FullPaper,Graph,True,Crandall J.W.,"Computer Science Department, Brigham Young Uni...","[Moss N.D., Samelson R.]","[prsqw, rnxfz]","[False, False]"
4,Information design in crowdfunding under thres...,2005,FullPaper,NLP,False,Crandall J.W.,"Computer Science Department, Brigham Young Uni...","[Palomba E., Aarsnes U.J.]","[vzyde, tppue]","[True, False]"
7,Intent-based robotic path-replanning: When to ...,2001,Short,Database,True,Goodrich M.A.,"Computer Science Department, Brigham Young Uni...","[Taylor B.A., Blonquist J., Sowards J.B.]","[vdjea, qopat, awfqn]","[True, True, True]"


In [35]:
reviewProcess.apply(pd.Series.explode)

Unnamed: 0,title,year,type,keyword,accepted,person_name,school_name,reviewer_name,comment,decision
0,Self-assessment of Proficiency of Intelligent ...,2013,Poster,NLP,True,Gautam A.,"Computer Science Department, Brigham Young Uni...",Slezak T.J.,ynspl,True
0,Self-assessment of Proficiency of Intelligent ...,2013,Poster,NLP,True,Gautam A.,"Computer Science Department, Brigham Young Uni...",Whitehead N.,gpflv,True
2,Moderating operator influence in human-swarm s...,2021,FullPaper,NLP,True,Crandall J.W.,"Computer Science Department, Brigham Young Uni...",Bjoraker G.L.,mkmus,False
2,Moderating operator influence in human-swarm s...,2021,FullPaper,NLP,True,Crandall J.W.,"Computer Science Department, Brigham Young Uni...",Liu W.,suuge,False
3,Cooperating in long-term relationships with ti...,2021,FullPaper,Graph,True,Crandall J.W.,"Computer Science Department, Brigham Young Uni...",Moss N.D.,prsqw,False
...,...,...,...,...,...,...,...,...,...,...
2812,Introducing second-Year students to concept-Ba...,2020,FullPaper,Graph,False,Richard F.R.Y.,"Brigham Young University, United States",Lim H.,ayobl,True
2816,Semi-empirical model for fire spread in chamis...,2019,Demo,Database,False,Fletcher M.E.,"Department of Chemical Engineering, Brigham Yo...",Veletsianos G.,tbssr,True
2816,Semi-empirical model for fire spread in chamis...,2019,Demo,Database,False,Fletcher M.E.,"Department of Chemical Engineering, Brigham Yo...",Farnham T.L.,tavjj,True
2840,Ash deposition during advanced oxy-coal combus...,2003,FullPaper,Graph,False,Wang Y.,"Department of Chemical Engineering, University...",Ding S.,leuep,False


In [36]:
for a in reviewProcess.itertuples():
    print(a)
    break

Pandas(Index=0, title='Self-assessment of Proficiency of Intelligent Systems: Challenges and Opportunities', year=2013, type='Poster', keyword='NLP', accepted=True, person_name='Gautam A.', school_name='Computer Science Department, Brigham Young University, Provo, UT  84604, United States', reviewer_name=['Slezak T.J.', 'Whitehead N.'], comment=['ynspl', 'gpflv'], decision=[True, True])


In [37]:
for _, article_name, year, _, _, _, _, _, reviewer_name, comment, accepted in reviewProcess.itertuples():
    # get the submission node
    submission_node = URIRef(f"http://kg_sdm.org/Submission/{URLparse(article_title)}")

    # create the reviewProcess node
    review_process = URIRef(f"http://kg_sdm.org/DecisionProcess/{URLparse(author_name+'_'+article_name)}")
    g.add((review_process, RDF.type, KG_SDM.DecisionProcess))
    
    g.add((submission_node, KG_SDM.goes_through, review_process))
    
    # get the reviewer node
    reviewer_node = URIRef(f"http://kg_sdm.org/Person/{URLparse(author_name)}")
#     already added before
#     g.add((reviewer_node, RDF.type, KG_SDM.Reviewer))
    g.add((reviewer_node, KG_SDM.participates_in, review_process))
    
    comment_lit = Literal(str(comment))
    
    decision_lit = Literal(bool(accepted))
    
    g.add((review_process, KG_SDM.comment, comment_lit))
    g.add((review_process, KG_SDM.decision, decision_lit))
    


In [38]:
#     # get the author node
#     # create the submission onde
#     submission_node = URIRef(f"http://kg_sdm.org/Submission/{URLparse(article_title)}")
    
#     # author wrote a paper
#     g.add((author_node, KG_SDM.written, submission_node))
                                                   
#     # data for submission
#     paper_title_lit = Literal(str(article_title))
#     paper_year_lit = Literal(int(year))
#     g.add((submission_node, KG_SDM.paper_title, paper_title_lit))
#     g.add((submission_node, KG_SDM.paper_year, paper_year_lit))
                            

# Adding paper submitted in Venue

In [39]:
articles = pd.read_csv('data_kg_sdm/articles.csv')
articles = articles.loc[:len(authors)-1]
articles['accepted'] = ""
articles['accepted'].loc[:500] = True
articles['accepted'].loc[500:] = False
articles.head()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  articles['accepted'].loc[:500] = True
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  articles['accepted'].loc[500:] = False


Unnamed: 0,ID,title,volume,DOI,accepted
0,1,Self-assessment of Proficiency of Intelligent ...,1210 AISC,10.1007/978-3-030-51758-8_15,True
1,2,Bridging theory to practice: Feedforward and c...,695 LNEE,10.1007/978-3-030-58653-9_3,True
2,3,Development of sediment management guidelines ...,146,10.1061/(ASCE)HY.1943-7900.0001822,True
3,4,Structural design space exploration using prin...,20,10.1115/1.4047428,True
4,5,Religion-focused dating apps: A Q methodology ...,55,10.1016/j.tele.2020.101448,True


In [40]:
publications = pd.read_csv('data_kg_sdm/publications.csv')
publications

Unnamed: 0,ID,name,Type
0,1,Advances in Intelligent Systems and Computing,Conference Paper
1,2,Lecture Notes in Electrical Engineering,Conference Paper
2,3,Journal of Hydraulic Engineering,Journal
3,4,Journal of Computing and Information Science i...,Journal
4,5,Telematics and Informatics,Conference Paper
...,...,...,...
399,400,CAD Computer Aided Design,Conference Paper
400,401,International Symposium on Advances in Computa...,Conference Paper
401,402,AIAA SPACE and Astronautics Forum and Expositi...,Conference Paper
402,403,IEEE Transactions on Control of Network Systems,Conference Paper


In [41]:
article_publisher_link = pd.read_csv('data_kg_sdm/article_published_by.csv')
article_publisher_link

Unnamed: 0,article_ID,publisher_ID,year
0,1,198,2020
1,2,389,2020
2,3,216,2020
3,4,21,2018
4,5,133,2018
...,...,...,...
875,876,360,2017
876,877,229,2015
877,878,205,2015
878,879,95,2016


In [42]:
article_pub = pd.merge(articles, article_publisher_link, left_on='ID', right_on='article_ID')
articles_publishedin = pd.merge(article_pub, publications, left_on='publisher_ID', right_on='ID',how='left')
articles_publishedin

Unnamed: 0,ID_x,title,volume,DOI,accepted,article_ID,publisher_ID,year,ID_y,name,Type
0,1,Self-assessment of Proficiency of Intelligent ...,1210 AISC,10.1007/978-3-030-51758-8_15,True,1,198,2020,198,International Conference on Transportation and...,Conference Paper
1,2,Bridging theory to practice: Feedforward and c...,695 LNEE,10.1007/978-3-030-58653-9_3,True,2,389,2020,389,AAAI Workshop - Technical Report,Conference Paper
2,3,Development of sediment management guidelines ...,146,10.1061/(ASCE)HY.1943-7900.0001822,True,3,216,2020,216,Advances in Heat Transfer,Conference Paper
3,4,Structural design space exploration using prin...,20,10.1115/1.4047428,True,4,21,2018,21,Proceedings - 2020 IEEE 21st International Con...,Conference Paper
4,5,Religion-focused dating apps: A Q methodology ...,55,10.1016/j.tele.2020.101448,True,5,133,2018,133,IEEE Radiation Effects Data Workshop,Conference Paper
...,...,...,...,...,...,...,...,...,...,...,...
875,876,Formability of magnesium alloy AZ31B from room...,Part F8,10.1007/978-3-319-52392-7_91,False,876,360,2017,360,Proceedings - 2017 IEEE 17th International Con...,Conference Paper
876,877,A summary of data-aided equalizer experiments ...,,,False,877,229,2015,229,Science Robotics,Conference Paper
877,878,Joining Dissimilar Material Using Friction Sti...,,10.1007/978-3-319-52383-5_16,False,878,205,2015,205,"Journal of Guidance, Control, and Dynamics",Journal
878,879,A minimal realization technique for the dynami...,4,10.1109/TCNS.2015.2498468,False,879,95,2016,95,Geotechnical Special Publication,Conference Paper


In [43]:
total = len(articles_publishedin)
nb_joun = int(total * 0.2)
nb_workshop = int(total * 0.3)
nb_symposium = int(total * 0.5)

In [44]:
journals = articles_publishedin.loc[:nb_joun]
workshops = articles_publishedin.loc[nb_joun : nb_joun + nb_workshop]
symposium = articles_publishedin.loc[nb_joun + nb_workshop : ]

In [45]:
journals.head()

Unnamed: 0,ID_x,title,volume,DOI,accepted,article_ID,publisher_ID,year,ID_y,name,Type
0,1,Self-assessment of Proficiency of Intelligent ...,1210 AISC,10.1007/978-3-030-51758-8_15,True,1,198,2020,198,International Conference on Transportation and...,Conference Paper
1,2,Bridging theory to practice: Feedforward and c...,695 LNEE,10.1007/978-3-030-58653-9_3,True,2,389,2020,389,AAAI Workshop - Technical Report,Conference Paper
2,3,Development of sediment management guidelines ...,146,10.1061/(ASCE)HY.1943-7900.0001822,True,3,216,2020,216,Advances in Heat Transfer,Conference Paper
3,4,Structural design space exploration using prin...,20,10.1115/1.4047428,True,4,21,2018,21,Proceedings - 2020 IEEE 21st International Con...,Conference Paper
4,5,Religion-focused dating apps: A Q methodology ...,55,10.1016/j.tele.2020.101448,True,5,133,2018,133,IEEE Radiation Effects Data Workshop,Conference Paper


In [46]:
venue_type_list = [journals,workshops,symposium]
venue_sub_type_ls = ["Journal","Workshop","Symposium"]

In [70]:
for i,venue_tp in enumerate(venue_type_list):
    
    
    print(venue_sub_type_ls[i])
    venue_type = venue_sub_type_ls[i]
    venue_sub_type = URIRef(f"http://kg_sdm.org/{venue_type}")
    venue_tp.dropna(inplace=True)
    
    for index,row in venue_tp.iterrows():
        year = row['year']
        year_literal = Literal(int(year))
        submissiontitle = row['title']
        confname = row['name']
        decision = row['accepted']
        
        # parsing conference
        conf_title = URLparse(confname)
        conf_node = URIRef(f"http://kg_sdm.org/venue/{conf_title}")
        venue_lit = Literal(str(conf_title))

        # parsing submission
        sub_title = URLparse(submissiontitle)
        sub_node = URIRef(f"http://kg_sdm.org/Submission/{sub_title}")

        
        # parsing for publication
        # assuming only half of the articles got accepted
        if decision:
            if venue_type=='Journal':
                Pub_title = conf_title+'_volume_'+str(random.randint(1, 5))
                Pub_title_lit = Literal(str(Pub_title))
                
                pub_node = URIRef(f"http://kg_sdm.org/Publication/{Pub_title}")
                g.add((pub_node, RDF.type, KG_SDM.Volume))
                g.add((sub_node,KG_SDM.published_in,pub_node))
                g.add((pub_node, KG_SDM.publication_title,Pub_title_lit))
                g.add((pub_node, KG_SDM.publication_year,year_literal))

            # if (venue_type=='Workshop' or venue_type=='Symposium')
            else:
                Pub_title = conf_title+'_proceddings_'
                Pub_title_lit = Literal(str(Pub_title))
                
                pub_node = URIRef(f"http://kg_sdm.org/Publication/{Pub_title}")
                g.add((pub_node, RDF.type, KG_SDM.Proceddings))
                g.add((sub_node,KG_SDM.published_in,pub_node))
                g.add((pub_node, KG_SDM.publication_title,Pub_title_lit))
                g.add((pub_node, KG_SDM.publication_year,year_literal))



        # add subclass type
        g.add((conf_node, RDF.type, venue_sub_type))
        # add name of venue
        g.add((conf_node, KG_SDM.venue_title, venue_lit))

        # connect conference and submission
        g.add((sub_node,KG_SDM.submitted_to,conf_node))



Journal
Workshop
Symposium


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  venue_tp.dropna(inplace=True)


# Adding handlers

In [48]:
chair

Unnamed: 0,ID_x,school_name,author_ID,org_ID,ID_y,person_name
1737,989,"Department of Physics and Astronomy, Brigham Y...",57204435650,989,57204435650,Laughlin E.
1738,989,"Department of Physics and Astronomy, Brigham Y...",57204415715,989,57204415715,Howe L.
1739,991,Department of Civil and Environmental Engineer...,57193610705,991,57193610705,Talbot M.
1740,994,"Aerion Technologies Corporation, Mechanical En...",15833172400,994,15833172400,Rajnarayan D.
1741,996,"New Product Development, Intuitive Surgical I...",55916540700,996,55916540700,Grames C.
...,...,...,...,...,...,...
1950,1201,"Department of Statistics, Brigham Young Unive...",57193016069,1201,57193016069,Tracy J.B.
1951,1203,"Human and Computer Interaction Group, Departm...",13605498800,1203,13605498800,Verbert K.
1952,1204,Department of Mechanical and Aerospace Engine...,57203230997,1204,57203230997,Ferguson S.M.
1953,1205,"Dipartimento di Fisica, Università degli Studi...",57191031931,1205,57191031931,Brusati M.


In [49]:
editor

Unnamed: 0,ID_x,school_name,author_ID,org_ID,ID_y,person_name
1954,1205,"Dipartimento di Fisica, Università degli Studi...",6701695863,1205,6701695863,Citterio M.
1955,1206,"INFN Milano, via G. Celoria, 16, Milano, 2013...",37098602100,1206,37098602100,Camplani A.
1956,1206,"INFN Milano, via G. Celoria, 16, Milano, 2013...",7006631262,1206,7006631262,Lazzaroni M.
1957,1207,NSF Center for High Performance Reconfigurabl...,56404157200,1207,56404157200,Takai H.
1958,1208,"Brookhaven National Laboratory, UptonNY, Unit...",56424241200,1208,56424241200,Chen H.
...,...,...,...,...,...,...
2167,1457,United States Department of Agriculture-Agric...,7401698226,1457,7401698226,Allen B.L.
2168,1458,United States Department of Agriculture-Agric...,8702836000,1458,8702836000,Anapalli S.
2169,1459,"Agriculture and Agri-Food Canada, Lethbridge,...",7005306331,1459,7005306331,Blackshaw R.E.
2170,1460,"Department of Crop and Soil Sciences, Washing...",7102554886,1460,7102554886,Lyon D.J.


In [50]:
journals['editors'] = editor.iloc[:len(journals)]['person_name'].values
journals.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  journals['editors'] = editor.iloc[:len(journals)]['person_name'].values


Unnamed: 0,ID_x,title,volume,DOI,accepted,article_ID,publisher_ID,year,ID_y,name,Type,editors
0,1,Self-assessment of Proficiency of Intelligent ...,1210 AISC,10.1007/978-3-030-51758-8_15,True,1,198,2020,198,International Conference on Transportation and...,Conference Paper,Citterio M.
1,2,Bridging theory to practice: Feedforward and c...,695 LNEE,10.1007/978-3-030-58653-9_3,True,2,389,2020,389,AAAI Workshop - Technical Report,Conference Paper,Camplani A.
2,3,Development of sediment management guidelines ...,146,10.1061/(ASCE)HY.1943-7900.0001822,True,3,216,2020,216,Advances in Heat Transfer,Conference Paper,Lazzaroni M.
3,4,Structural design space exploration using prin...,20,10.1115/1.4047428,True,4,21,2018,21,Proceedings - 2020 IEEE 21st International Con...,Conference Paper,Takai H.
4,5,Religion-focused dating apps: A Q methodology ...,55,10.1016/j.tele.2020.101448,True,5,133,2018,133,IEEE Radiation Effects Data Workshop,Conference Paper,Chen H.


In [51]:
conferences = workshops.append(workshops)

  conferences = workshops.append(workshops)


In [52]:
conferences

Unnamed: 0,ID_x,title,volume,DOI,accepted,article_ID,publisher_ID,year,ID_y,name,Type
176,177,An APMonitor Temperature Lab PID Control Exper...,2019-September,10.1109/ETFA.2019.8869247,True,177,201,2018,201,Proceedings of the International Telemetering ...,Conference Paper
177,178,Design of a Clinical Decision Support System f...,23,10.1109/JBHI.2018.2877595,True,178,47,2017,47,Electronics (Switzerland),Conference Paper
178,179,Structure descriptor based on just noticeable ...,58,10.1364/AO.58.006504,True,179,190,2018,190,Journal of Aircraft,Journal
179,180,3D hydrodynamic flow focusing-based micromixer...,293,10.1016/j.snb.2019.05.026,True,180,60,2017,60,International Journal of Machine Tools and Man...,Journal
180,181,Thermal and hydrothermal stability of pure and...,284,10.1016/j.micromeso.2019.04.005,True,181,19,2018,19,JOM,Conference Paper
...,...,...,...,...,...,...,...,...,...,...,...
436,437,Identification and Prioritization of Critical ...,14,10.1080/15578771.2017.1372535,True,437,37,2018,37,Journal of Science Education and Technology,Journal
437,438,The emerging impact of cell-free chemical bios...,53,10.1016/j.copbio.2017.12.019,True,438,155,2017,155,IEEE International Conference on Communications,Conference Paper
438,439,Efficiency of Generalized Regular k-point grids,153,10.1016/j.commatsci.2018.06.031,True,439,207,2016,207,Biomicrofluidics,Conference Paper
439,440,Rapid Operational Access and Maneuver Support ...,15,10.1177/1548512918772028,True,440,169,2016,169,Construction and Building Materials,Conference Paper


In [53]:
chair_names = list(chair['person_name'].values)
chair_names

['Laughlin E.',
 'Howe L.',
 'Talbot M.',
 'Rajnarayan D.',
 'Grames C.',
 'Dillon P.B.',
 'Eixenberger J.G.',
 'Jensen D.W.',
 'Machanzi T.',
 'Wald M.O.',
 'Richey M.C.',
 'Campbell C.',
 'Hansen N.',
 'Evans S.',
 'Cobos D.',
 'Campbell A.',
 'Hopkins B.',
 'Campbell E.',
 'Russell D.N.',
 'Ishimwe E.',
 'Coffman R.A.',
 'Lopez J.S.',
 'Vera-Grunauer X.',
 'Salvatierra G.',
 'Leslie Youd T.',
 'Error B.M.',
 'Bush W.S.',
 'Briggs F.',
 'Sloan C.',
 'Crawford D.C.',
 'Freedman D.',
 'Szymanski R.',
 'Hopkins E.',
 'Abraham Martin R.',
 'Heiner B.K.',
 'Micah Downing J.',
 'McKinley R.L.',
 'Vaugh A.B.',
 'Hales Swift S.',
 'Reichman B.',
 'Harker B.M.',
 'Cook M.R.',
 'Hwang J.T.',
 'Migliaccio M.',
 'Heron M.L.',
 'High G.',
 'Brower D.',
 'Witherow K.',
 'Wilson J.C.',
 'Shen W.',
 'Lopes C.V.',
 'Yan K.',
 'Riding K.A.',
 'Brueseke M.',
 'Risovi-Hendrickson W.',
 'Peterman R.J.',
 'Mosavi H.',
 'Amoroso S.',
 'Milana G.',
 'Lusvardi C.',
 'Monaco P.',
 'Morales C.',
 'Ledezma C.',

In [54]:
conf_chairs = [chair_names[random.randint(0,len(chair_names)-1)]  for i in range(len(conferences))]
conf_chairs

['Durdle C.A.',
 'Sun D.',
 'Akamine M.',
 'Migliaccio M.',
 'Coombs D.',
 'Mosavi H.',
 'Vallée T.',
 'Brower D.',
 'Villalon-Reina J.E.',
 'Jafek B.',
 'Lopes R.',
 'Lee K.',
 'Morales C.',
 'Sanchez D.G.',
 'Reichman B.',
 'Risovi-Hendrickson W.',
 'Simon B.',
 'Hendershott J.',
 'Gudbrandsen M.',
 'Tamini A.',
 'Kirk R.',
 'Jonas R.K.',
 'Han J.',
 'Le Gall A.',
 'Folkman S.',
 'Riding K.A.',
 'White S.',
 'Simon B.',
 'Jalbrzikowski M.',
 'Pisano D.J.',
 'Luna A.',
 'Bertels C.',
 'Lusvardi C.',
 'Jagadeesh C.',
 'Ching C.R.K.',
 'Rochas L.',
 'Kates W.R.',
 'Chen Y.-F.',
 'Seaman C.H.',
 'Poussin H.',
 'Folkman S.',
 'Ahmad W.',
 'Jalbrzikowski M.',
 'Hayes A.',
 'Pingel N.',
 'Watts-Englert J.',
 'Lopez J.S.',
 'Johnson C.',
 'Bertrand R.',
 'Seaman C.H.',
 'Black R.A.',
 'Harker B.M.',
 'Risovi-Hendrickson W.',
 'Van Amelsvoort T.',
 'Shen W.',
 'Johnson C.',
 'High G.',
 'Ruparel K.',
 'Brusati M.',
 'Campbell L.E.',
 'Kipp R.M.',
 'Wall S.',
 'Clark M.R.',
 'Howe L.',
 'Vera-

In [55]:
conferences['chair'] = conf_chairs

In [56]:
conferences = conferences.drop_duplicates(subset=['name'])
conferences

Unnamed: 0,ID_x,title,volume,DOI,accepted,article_ID,publisher_ID,year,ID_y,name,Type,chair
176,177,An APMonitor Temperature Lab PID Control Exper...,2019-September,10.1109/ETFA.2019.8869247,True,177,201,2018,201,Proceedings of the International Telemetering ...,Conference Paper,Durdle C.A.
177,178,Design of a Clinical Decision Support System f...,23,10.1109/JBHI.2018.2877595,True,178,47,2017,47,Electronics (Switzerland),Conference Paper,Sun D.
178,179,Structure descriptor based on just noticeable ...,58,10.1364/AO.58.006504,True,179,190,2018,190,Journal of Aircraft,Journal,Akamine M.
179,180,3D hydrodynamic flow focusing-based micromixer...,293,10.1016/j.snb.2019.05.026,True,180,60,2017,60,International Journal of Machine Tools and Man...,Journal,Migliaccio M.
180,181,Thermal and hydrothermal stability of pure and...,284,10.1016/j.micromeso.2019.04.005,True,181,19,2018,19,JOM,Conference Paper,Coombs D.
...,...,...,...,...,...,...,...,...,...,...,...,...
434,435,Cryptanalysis of Lempel-Ziv Compressed and Enc...,2018-August,10.1109/ISWCS.2018.8491196,True,435,268,2018,268,ACM-BCB 2018 - Proceedings of the 2018 ACM Int...,Conference Paper,Hopkins B.
435,436,Erratum: Hardware and software improvements to...,57,10.1364/AO.57.008528,True,436,374,2017,374,Structural and Multidisciplinary Optimization,Conference Paper,Paillou P.
438,439,Efficiency of Generalized Regular k-point grids,153,10.1016/j.commatsci.2018.06.031,True,439,207,2016,207,Biomicrofluidics,Conference Paper,Haber J.M.
439,440,Rapid Operational Access and Maneuver Support ...,15,10.1177/1548512918772028,True,440,169,2016,169,Construction and Building Materials,Conference Paper,Tamini A.


In [57]:
for index,row in conferences.iterrows():
    authorname = row['chair']
    confname = row['name']

    # parsing conference
    conf_title = URLparse(confname)
    conf_node = URIRef(f"http://kg_sdm.org/venue/{conf_title}")
    venue_lit = Literal(str(conf_title))

    # parsing authors
    per_title = URLparse(authorname)
    per_node = URIRef(f"http://kg_sdm.org/Person/{per_title}")


    g.add((per_node, KG_SDM.handles, conf_node))

In [58]:
for index,row in journals.iterrows():
    authorname = row['editors']
    confname = row['name']

    # parsing conference
    conf_title = URLparse(confname)
    conf_node = URIRef(f"http://kg_sdm.org/venue/{conf_title}")
    venue_lit = Literal(str(conf_title))

    # parsing authors
    per_title = URLparse(authorname)
    per_node = URIRef(f"http://kg_sdm.org/Person/{per_title}")


    g.add((per_node, KG_SDM.handles, conf_node))

In [59]:
save_rdf_file(g,"abox",rdf_format='ttl')

In [62]:
authors.drop_duplicates()

Unnamed: 0,ID_x,school_name,author_ID,org_ID,ID_y,person_name
0,1,"Computer Science Department, Brigham Young Uni...",57218202833,1,57218202833,Gautam A.
1,1,"Computer Science Department, Brigham Young Uni...",7004904337,1,7004904337,Crandall J.W.
2,1,"Computer Science Department, Brigham Young Uni...",7005513246,1,7005513246,Goodrich M.A.
3,1,"Computer Science Department, Brigham Young Uni...",55078951500,1,55078951500,Bodily P.M.
4,1,"Computer Science Department, Brigham Young Uni...",57215280005,1,57215280005,Jacobsen A.J.
...,...,...,...,...,...,...
1299,562,"Department of Chemistry and Biochemistry, Brig...",57211500187,562,57211500187,Membreno K.
1300,563,"Department of Physics and Astronomy, Brigham ...",57209912956,563,57209912956,Avval T.G.
1301,563,"Department of Physics and Astronomy, Brigham ...",57211503118,563,57211503118,Carver V.
1302,563,"Department of Physics and Astronomy, Brigham ...",7006120837,563,7006120837,Allred D.D.
