### Adding precedingEvent relation in Reactome curator project 

This code imports a rtpj project file from the Reactome curator tool and adds new precedingEvent instances to reactions based on curator reviewed information from shared Google spreadsheets.

Details of this process is documented [here](https://docs.google.com/document/d/1BljDy1GrGZmGfdZUheZ1ANJwXwC1lPCLrEmPF3OP6bQ/edit).

In [None]:
def precedingEvent_Adder(XML_Root, first_stID, first_class, second_stID):
    #search for second reaction
    c=0
    for node in XML_Root.iter('instance'):
        if node.attrib['DB_ID']==second_stID:
            c=1
            #print(node.attrib['displayName'])
            #search for precedingEvent in second reaction
            x=0
            for subnode in node.iter('attribute'):
                if subnode.attrib['name']=='precedingEvent':
                    #print(subnode.attrib['referTo'])
                    #check if second reaction already has the first reaction as a precedingEvent
                    if subnode.attrib['referTo']==first_stID:
                        x=1
            #do nothing if first reaction is already a precedingEvent
            if x==1: 
                #print("Already Exists!")
                break
            #else add the first reaction to preceedingEvent attribute
            else:
                #print("New precedingEvent added!")
                tag=ET.SubElement(node,'attribute')
                tag.attrib['name']='precedingEvent'
                tag.attrib['class']=first_class
                tag.attrib['referTo']=first_stID
                node.attrib['isDirty']="true"
    if c==0:
        print(second_stID,"not found in project!")

In [None]:
import xml.etree.ElementTree as ET
#path
notebook_path = os.path.abspath("precedingEvent_Curation_Semi_Automation.ipynb")
path=notebook_path.rsplit('/',1)
path=path[0]+'/Reaction_Connectivity/'
InputProject='Complete_ImmuneSystem_Input.rtpj'
MyTree=ET.parse(path+InputProject)
MyRoot=MyTree.getroot()

In [None]:
#viewing the xml
from xml.dom import minidom

dom=minidom.parseString(ET.tostring(MyRoot))
#print(dom.toprettyxml())

In [None]:
import cypher

Con="http://neo4j:reactome@localhost:7474/db/data" #database connection

#run query for pathway name-ID map
ReaClass_ID_CQ="""
MATCH (ev:ReactionLikeEvent{speciesName:"Homo sapiens"})
RETURN DISTINCT ev.stId AS EventID, ev.schemaClass AS Class
""" 
ReaClassMap_DF=cypher.run(ReaClass_ID_CQ,conn=Con).get_dataframe()
ReaClass_Dict=ReaClassMap_DF.set_index('EventID')['Class'].to_dict()

In [23]:
import gspread
from oauth2client.service_account import ServiceAccountCredentials

# path
notebook_path = os.path.abspath("precedingEvent_Curation_Semi_Automation.ipynb")
path=notebook_path.rsplit('/',1)
path=path[0]+'/Reaction_Connectivity/'

# define the scope
scope = ['https://spreadsheets.google.com/feeds','https://www.googleapis.com/auth/drive']

# add credentials to the account
creds = ServiceAccountCredentials.from_json_keyfile_name(path+'Reactome Connectivity-f994dc05dc97.json', scope)

# authorize the clientsheet 
client = gspread.authorize(creds)

# get the instance of the Spreadsheet
sheet = client.open('ImmuneSystem_Missing_precedingEvents')

# get the first sheet of the Spreadsheet
sheet_instance = sheet.get_worksheet(0)

# get the total number of columns
#sheet_instance.col_count


# get the value at the specific cell
#sheet_instance.cell(col=3,row=2)

# get all the records of the data
records_data = sheet_instance.get_all_records()

# view the data
#records_data

# convert dict to dataframe
records_df = pd.DataFrame.from_dict(records_data)

# view top records
#records_df.head()

In [None]:
# test set
#extracting approved instances
Approved_Records=records_df[records_df['Approved/Rejected']=='Approved']
Max_Freq_PrEv=Approved_Records['Event'].value_counts().index[0]
Approved_Records=Approved_Records[Approved_Records['Event']==Max_Freq_PrEv]

In [None]:
Approved_Records

In [None]:
#extracting approved instances
#Approved_Records=records_df[records_df['Approved/Rejected']=='Approved']

#run analysis for all approved
for index, row in Approved_Records.iterrows():
    #print(row['precedingEvent'],row['Event'])
    precedingEvent_Adder(MyRoot,row['precedingEvent'].split('R-HSA-')[1],ReaClass_Dict[row['precedingEvent']],row['Event'].split('R-HSA-')[1])

In [None]:
from xml.dom import minidom

xmlstr = minidom.parseString(ET.tostring(MyRoot,'utf-8')).toprettyxml(indent=" ",newl='')
with open(path+"Complete_ImmuneSystem_Output.rtpj", "w") as f:
    f.write(xmlstr)

In [None]:
len(Approved_Records)
len(set(list(Approved_Records['Event'])))