## Discription

### Background
Circos plotの描画の処理内で行われている結合をこちらのファイルで行うことを目的に作成しました

In [1]:
import collections
import csv
import datetime
import os
import pandas as pd
import numpy as np
from collections import Counter

In [2]:
# output directory
organism1_vs_organism2 = "HR" # human vs rice
direction = "down"
now = datetime.datetime.now()
circos_dir = f'../data/circos_{direction}_{organism1_vs_organism2}_{now.strftime("%y%m")}'
no_annotation = f'../data/{circos_dir}/no_annotation_{direction}'
os.makedirs(circos_dir, exist_ok=True)
os.makedirs(no_annotation, exist_ok=True)

## HUMAN_DOWN

1. Create a TSV file to plot genes with high HN-score on the Circos plot

2. Extract GOSlim corresponding to the list of up-regulated genes

In [3]:
# 1. Create a TSV file to plot genes with high HN-score on the Circos plot
#organism = "human" (H)
df_h_score = pd.read_csv(f"../data/human_annotation/human_position_{direction}.tsv", sep='\t')
df_h_score_subset = df_h_score[['Gene stable ID', # create a subset
                        'Chromosome/scaffold name', 
                        'start2 (bp)',
                        'end2 (bp)',
                        'HN-score(HN5)']].copy()
df_h_score_subset['Chromosome/scaffold name'] = 'H_' + df_h_score_subset['Chromosome/scaffold name'].astype(str)
df_h_score_subset['organism'] = 'human'# assign the tag

df_h_score_subset.to_csv(f"../data/{circos_dir}/human_position_{direction}_assign.tsv", sep='\t', index=False)

display(df_h_score_subset)

Unnamed: 0,Gene stable ID,Chromosome/scaffold name,start2 (bp),end2 (bp),HN-score(HN5),organism
0,ENSG00000170473,H_12,55901413,55932618,-14,human
1,ENSG00000184675,H_X,64185117,64205708,-14,human
2,ENSG00000161653,H_17,44004622,44009068,-14,human
3,ENSG00000100281,H_22,35257452,35295807,-14,human
4,ENSG00000132773,H_1,45340052,45343973,-14,human
...,...,...,...,...,...,...
199,ENSG00000102921,H_16,48538726,48620148,-41,human
200,ENSG00000179922,H_19,55620741,55624566,-42,human
201,ENSG00000162526,H_1,32351521,32364312,-43,human
202,ENSG00000196196,H_9,35906202,35907136,-52,human


In [4]:
# 2. Extract GOSlim corresponding to the list of up-regulated genes
goslim_all_human = pd.read_csv('../data/biomart_goslim/biomart_human_goslim_R110_domain.tsv', sep='\t',
                               dtype={'Gene stable ID': 'object'}, 
                               low_memory=False)
columns_of_interest_human = ['Gene stable ID', 
                             'GOSlim GOA Accession(s)', 
                             'GOSlim GOA Description', 
                             'GOSlim_domain']
goslim_all_human_filtered = goslim_all_human[columns_of_interest_human]

extract_df_h_goslim = pd.merge(
    df_h_score_subset, 
    goslim_all_human_filtered, 
    on='Gene stable ID', 
    how='inner'
    )
extract_df_h_goslim = extract_df_h_goslim.drop_duplicates(subset=['Gene stable ID',
                                                                  'GOSlim GOA Accession(s)', 
                                                                  'GOSlim GOA Description'], keep='first').copy()

# Extract genes with no annotations
extract_df_h_goslim_with_indicator = pd.merge(
    df_h_score_subset, 
    goslim_all_human_filtered,
    left_on='Gene stable ID',
    right_on='Gene stable ID',
    how='left',
    indicator=True # add indicator column to show the difference
)

# extract_df_h_goslim_with_indicator
left_only_rows = extract_df_h_goslim_with_indicator[extract_df_h_goslim_with_indicator['_merge'] == 'left_only']

extract_df_h_goslim.to_csv(f"../data/{circos_dir}/human_goslim_{direction}.tsv", sep='\t', index=False)
left_only_rows.to_csv(f"../data/{no_annotation}/human_position_down_no_annotation.tsv", sep='\t', index=False)

display(goslim_all_human_filtered, extract_df_h_goslim, left_only_rows)

Unnamed: 0,Gene stable ID,GOSlim GOA Accession(s),GOSlim GOA Description,GOSlim_domain
0,ENSG00000243485,GO:0031047,gene silencing by RNA,biological_process
1,ENSG00000284332,GO:0031047,gene silencing by RNA,biological_process
2,ENSG00000186092,GO:0023052,signaling,biological_process
3,ENSG00000186092,GO:0060089,molecular transducer activity,molecular_function
4,ENSG00000186092,GO:0005886,plasma membrane,cellular_component
...,...,...,...,...
193923,ENSG00000292372,GO:0007010,cytoskeleton organization,biological_process
193924,ENSG00000292372,GO:0008092,cytoskeletal protein binding,molecular_function
193925,ENSG00000292372,GO:0031410,cytoplasmic vesicle,cellular_component
193926,ENSG00000292372,GO:0043226,organelle,cellular_component


Unnamed: 0,Gene stable ID,Chromosome/scaffold name,start2 (bp),end2 (bp),HN-score(HN5),organism,GOSlim GOA Accession(s),GOSlim GOA Description,GOSlim_domain
0,ENSG00000170473,H_12,55901413,55932618,-14,human,GO:0043226,organelle,cellular_component
1,ENSG00000170473,H_12,55901413,55932618,-14,human,GO:0005634,nucleus,cellular_component
2,ENSG00000170473,H_12,55901413,55932618,-14,human,GO:0003723,RNA binding,molecular_function
3,ENSG00000170473,H_12,55901413,55932618,-14,human,GO:0016071,mRNA metabolic process,biological_process
4,ENSG00000170473,H_12,55901413,55932618,-14,human,GO:0005654,nucleoplasm,cellular_component
...,...,...,...,...,...,...,...,...,...
1666,ENSG00000124575,H_6,26234212,26234987,-71,human,GO:0005634,nucleus,cellular_component
1667,ENSG00000124575,H_6,26234212,26234987,-71,human,GO:0003723,RNA binding,molecular_function
1668,ENSG00000124575,H_6,26234212,26234987,-71,human,GO:0006310,DNA recombination,biological_process
1669,ENSG00000124575,H_6,26234212,26234987,-71,human,GO:0006351,DNA-templated transcription,biological_process


Unnamed: 0,Gene stable ID,Chromosome/scaffold name,start2 (bp),end2 (bp),HN-score(HN5),organism,GOSlim GOA Accession(s),GOSlim GOA Description,GOSlim_domain,_merge
406,ENSG00000176092,H_1,26321698,26360080,-15,human,,,,left_only
439,ENSG00000182359,H_11,106051098,106077459,-15,human,,,,left_only
475,ENSG00000234409,H_22,20148416,20151055,-15,human,,,,left_only
503,ENSG00000186118,H_1,46668855,46673594,-15,human,,,,left_only
621,ENSG00000125089,H_4,8182072,8241803,-16,human,,,,left_only
674,ENSG00000187808,H_X,119758588,119760202,-16,human,,,,left_only
743,ENSG00000167183,H_17,47951967,47957883,-17,human,,,,left_only
749,ENSG00000175449,H_5,95646777,95684773,-17,human,,,,left_only
885,ENSG00000179023,H_1,18480930,18485974,-18,human,,,,left_only
936,ENSG00000179476,H_14,44897275,44907257,-19,human,,,,left_only


## RICE_down

1. Create a TSV file to plot genes with high HN-score on the Circos plot

2. Extract GOSlim corresponding to the list of up-regulated genes

In [5]:
# 1. Create a TSV file to plot genes with high HN-score on the Circos plot
df_r_score = pd.read_csv(f'../data/rice_annotation/rice_position_{direction}.tsv', sep='\t')
df_r_score_subset = df_r_score[['Gene stable ID', # create a subset
                        'Chromosome/scaffold name', 
                        'start1 (bp)',
                        'end1 (bp)',
                        'HN-score(HN5)']].copy()
df_r_score_subset['Chromosome/scaffold name'] = 'R_' + df_r_score_subset['Chromosome/scaffold name'].astype(str)
df_r_score_subset['organism'] = 'rice'# assign the tag

df_r_score_subset.to_csv(f"../data/{circos_dir}/rice_position_{direction}_assign.tsv", sep='\t', index=False)

display(df_r_score_subset)

Unnamed: 0,Gene stable ID,Chromosome/scaffold name,start1 (bp),end1 (bp),HN-score(HN5),organism
0,Os01g0136300,R_1,1955181,1955914,-40,rice
1,Os05g0588225,R_5,29307170,29307793,-40,rice
2,Os03g0358800,R_3,13915940,13917546,-40,rice
3,Os06g0157900,R_6,2947702,2948085,-40,rice
4,Os11g0439600,R_11,14390405,14398122,-40,rice
...,...,...,...,...,...,...
379,Os01g0952800,R_1,41971444,41978093,-173,rice
380,Os07g0142100,R_7,2175193,2175719,-178,rice
381,Os03g0307200,R_3,10926469,10927729,-182,rice
382,Os07g0142200,R_7,2176824,2177640,-189,rice


In [6]:
# 2. Extract GOSlim corresponding to the list of up-regulated genes
goslim_all_rice = pd.read_csv('../data/biomart_goslim/biomart_rice_goslim_R56_domain.tsv', sep='\t',
                               dtype={'Gene stable ID': 'object'}, 
                               low_memory=False)
columns_of_interest_rice = ['Gene stable ID',
                            'GOSlim GOA Accession(s)', 
                            'GOSlim GOA Description', 
                            'GOSlim_domain']
goslim_all_rice_filtered = goslim_all_rice[columns_of_interest_rice]

extract_df_r_goslim = pd.merge(
    df_r_score_subset, 
    goslim_all_rice_filtered, 
    on='Gene stable ID', 
    how='inner'
    )

extract_df_r_goslim = extract_df_r_goslim.drop_duplicates(subset=['Gene stable ID',
                                                                    'GOSlim GOA Accession(s)', 
                                                                    'GOSlim GOA Description'], keep='first').copy()

# Extract genes with no annotations
extract_df_r_goslim_with_indicator = pd.merge(
    df_r_score_subset, 
    goslim_all_rice_filtered,
    left_on='Gene stable ID',
    right_on='Gene stable ID',
    how='left',
    indicator=True # add indicator column to show the difference
)

# extract_df_r_goslim_with_indicator
left_only_rows = extract_df_r_goslim_with_indicator[extract_df_r_goslim_with_indicator['_merge'] == 'left_only']

extract_df_r_goslim.to_csv(f"../data/{circos_dir}/rice_goslim_{direction}.tsv", sep='\t', index=False)
left_only_rows.to_csv(f"../data/{no_annotation}/rice_position_{direction}_no_annotation.tsv", sep='\t', index=False)

display(goslim_all_rice_filtered, extract_df_r_goslim, left_only_rows)

Unnamed: 0,Gene stable ID,GOSlim GOA Accession(s),GOSlim GOA Description,GOSlim_domain
0,Os01g0100100,GO:0006810,transport,biological_process
1,Os01g0100100,GO:0008150,biological_process,biological_process
2,Os01g0100100,GO:0009987,cellular process,biological_process
3,Os01g0100100,GO:0003674,molecular_function,molecular_function
4,Os01g0100100,GO:0030234,enzyme regulator activity,molecular_function
...,...,...,...,...
219494,gene-rps19,GO:0003674,molecular_function,molecular_function
219495,gene-rps19,GO:0005198,structural molecule activity,molecular_function
219496,gene-rps19,GO:0005488,binding,molecular_function
219497,gene-rps19,GO:0003723,RNA binding,molecular_function


Unnamed: 0,Gene stable ID,Chromosome/scaffold name,start1 (bp),end1 (bp),HN-score(HN5),organism,GOSlim GOA Accession(s),GOSlim GOA Description,GOSlim_domain
0,Os03g0358800,R_3,13915940,13917546,-40,rice,GO:0005575,cellular_component,cellular_component
1,Os03g0358800,R_3,13915940,13917546,-40,rice,GO:0005622,intracellular anatomical structure,cellular_component
2,Os03g0358800,R_3,13915940,13917546,-40,rice,GO:0003674,molecular_function,molecular_function
3,Os03g0358800,R_3,13915940,13917546,-40,rice,GO:0003824,catalytic activity,molecular_function
4,Os03g0358800,R_3,13915940,13917546,-40,rice,GO:0016740,transferase activity,molecular_function
...,...,...,...,...,...,...,...,...,...
2028,Os03g0307300,R_3,10929507,10930895,-196,rice,GO:0016740,transferase activity,molecular_function
2029,Os03g0307300,R_3,10929507,10930895,-196,rice,GO:0008150,biological_process,biological_process
2030,Os03g0307300,R_3,10929507,10930895,-196,rice,GO:0009987,cellular process,biological_process
2031,Os03g0307300,R_3,10929507,10930895,-196,rice,GO:0008152,metabolic process,biological_process


Unnamed: 0,Gene stable ID,Chromosome/scaffold name,start1 (bp),end1 (bp),HN-score(HN5),organism,GOSlim GOA Accession(s),GOSlim GOA Description,GOSlim_domain,_merge
0,Os01g0136300,R_1,1955181,1955914,-40,rice,,,,left_only
1,Os05g0588225,R_5,29307170,29307793,-40,rice,,,,left_only
24,Os07g0600200,R_7,24487821,24488449,-40,rice,,,,left_only
25,Os08g0178650,R_8,4597056,4597381,-40,rice,,,,left_only
32,Os02g0565150,R_2,21457944,21458344,-40,rice,,,,left_only
...,...,...,...,...,...,...,...,...,...,...
2124,Os01g0647200,R_1,26086062,26088337,-165,rice,,,,left_only
2132,Os02g0594166,R_2,23008659,23011091,-170,rice,,,,left_only
2133,Os01g0608101,R_1,24004176,24006793,-171,rice,,,,left_only
2157,Os07g0142100,R_7,2175193,2175719,-178,rice,,,,left_only


## COMBINED_UP( HUMAN_UP + RICE_UP )

1. chromosome position + gene position + HN-score

2. GOslim terms corresponding to the list of up-regulated genes

In [7]:
# 1. chromosome position + gene position + HN-score
combined_df_score = pd.concat([df_h_score_subset, 
                               df_r_score_subset])
combined_df_score.reset_index(drop=True, inplace=True)
combined_df_score['Gene start (bp)'] = combined_df_score['start1 (bp)'].combine_first(combined_df_score['start2 (bp)'])
combined_df_score['Gene end (bp)'] = combined_df_score['end1 (bp)'].combine_first(combined_df_score['end2 (bp)'])
combined_df_score.drop(['start1 (bp)', 'start2 (bp)', 'end1 (bp)', 'end2 (bp)'], axis=1, inplace=True)

combined_df_score.to_csv(f"../data/{circos_dir}/combined_position_{direction}.tsv", sep='\t', index=False)

display(combined_df_score)

Unnamed: 0,Gene stable ID,Chromosome/scaffold name,HN-score(HN5),organism,Gene start (bp),Gene end (bp)
0,ENSG00000170473,H_12,-14,human,55901413.0,55932618.0
1,ENSG00000184675,H_X,-14,human,64185117.0,64205708.0
2,ENSG00000161653,H_17,-14,human,44004622.0,44009068.0
3,ENSG00000100281,H_22,-14,human,35257452.0,35295807.0
4,ENSG00000132773,H_1,-14,human,45340052.0,45343973.0
...,...,...,...,...,...,...
583,Os01g0952800,R_1,-173,rice,41971444.0,41978093.0
584,Os07g0142100,R_7,-178,rice,2175193.0,2175719.0
585,Os03g0307200,R_3,-182,rice,10926469.0,10927729.0
586,Os07g0142200,R_7,-189,rice,2176824.0,2177640.0


In [8]:
merged_df_goslim = pd.merge(
    extract_df_h_goslim, 
    extract_df_r_goslim, 
    left_on=['GOSlim GOA Accession(s)', 'GOSlim GOA Description', 'GOSlim_domain'],
    right_on=['GOSlim GOA Accession(s)', 'GOSlim GOA Description', 'GOSlim_domain'],
    how='inner',
    suffixes=('_human', '_rice')
)

merged_df_goslim = merged_df_goslim[['GOSlim GOA Accession(s)',
                                     'GOSlim GOA Description',
                                     'GOSlim_domain',
                                     'Gene stable ID_rice',
                                     'Gene stable ID_human',
                                     'Chromosome/scaffold name_rice',
                                     'start1 (bp)',
                                     'end1 (bp)',
                                     'Chromosome/scaffold name_human',
                                     'start2 (bp)',
                                     'end2 (bp)',
                                     'HN-score(HN5)_rice',
                                     'HN-score(HN5)_human']]

# Remove duplicate rows based on specific columns
merged_df_goslim.drop_duplicates(inplace=True)

display(merged_df_goslim)

Unnamed: 0,GOSlim GOA Accession(s),GOSlim GOA Description,GOSlim_domain,Gene stable ID_rice,Gene stable ID_human,Chromosome/scaffold name_rice,start1 (bp),end1 (bp),Chromosome/scaffold name_human,start2 (bp),end2 (bp),HN-score(HN5)_rice,HN-score(HN5)_human
0,GO:0005634,nucleus,cellular_component,Os02g0204400,ENSG00000170473,R_2,5835504,5838694,H_12,55901413,55932618,-41,-14
1,GO:0005634,nucleus,cellular_component,Os01g0772100,ENSG00000170473,R_1,32592931,32593952,H_12,55901413,55932618,-42,-14
2,GO:0005634,nucleus,cellular_component,Os03g0634400,ENSG00000170473,R_3,24226372,24227930,H_12,55901413,55932618,-43,-14
3,GO:0005634,nucleus,cellular_component,Os03g0617500,ENSG00000170473,R_3,23410223,23412753,H_12,55901413,55932618,-44,-14
4,GO:0005634,nucleus,cellular_component,Os03g0162200,ENSG00000170473,R_3,3371375,3372344,H_12,55901413,55932618,-45,-14
...,...,...,...,...,...,...,...,...,...,...,...,...,...
18457,GO:0030312,external encapsulating structure,cellular_component,Os02g0783000,ENSG00000111186,R_2,33221378,33223595,H_12,1529891,1647212,-45,-17
18458,GO:0030312,external encapsulating structure,cellular_component,Os06g0193200,ENSG00000111186,R_6,4700512,4703042,H_12,1529891,1647212,-45,-17
18459,GO:0030312,external encapsulating structure,cellular_component,Os09g0472900,ENSG00000111186,R_9,18067436,18068411,H_12,1529891,1647212,-47,-17
18460,GO:0030312,external encapsulating structure,cellular_component,Os01g0284500,ENSG00000111186,R_1,10167946,10168997,H_12,1529891,1647212,-52,-17


In [9]:
# for debugging purpose
unique_accessions_r = set(extract_df_r_goslim['GOSlim GOA Accession(s)'].unique())
unique_accessions_h = set(extract_df_h_goslim['GOSlim GOA Accession(s)'].unique())
common_accessions = unique_accessions_r.intersection(unique_accessions_h)
unique_accessions_merged = set(merged_df_goslim['GOSlim GOA Accession(s)'].unique())
missing_accessions = common_accessions.difference(unique_accessions_merged)
duplicates = merged_df_goslim.duplicated(subset=['GOSlim GOA Accession(s)', 'Gene stable ID_rice', 'Gene stable ID_human'], keep=False)
unique_combinations = not duplicates.any()
print(f"Number of common accessions: {len(common_accessions)}")
print(f"Number of missing accessions: {len(missing_accessions)}")
print(f"Missing accessions: {missing_accessions}")
print(f"各'Gene stable ID_rice'と'Gene stable ID_human'の組み合わせはユニークですか？: {unique_combinations}")

#Count the occurrences of each 'GOSlim GOA Accession(s)'
accession_counter = Counter(merged_df_goslim['GOSlim GOA Accession(s)'])

#Sort the DataFrame based on the counts
merged_df_goslim['count'] = merged_df_goslim['GOSlim GOA Accession(s)'].map(accession_counter)
merged_df_goslim_sorted = merged_df_goslim.sort_values('count', ascending=False).drop('count', axis=1)
merged_df_goslim_sorted.reset_index(drop=True, inplace=True)
merged_df_goslim_sorted.to_csv(f"../data/{circos_dir}/combined_goslim_{direction}.tsv", sep='\t', index=False)

display(merged_df_goslim_sorted)

Number of common accessions: 26
Number of missing accessions: 0
Missing accessions: set()
各'Gene stable ID_rice'と'Gene stable ID_human'の組み合わせはユニークですか？: True


Unnamed: 0,GOSlim GOA Accession(s),GOSlim GOA Description,GOSlim_domain,Gene stable ID_rice,Gene stable ID_human,Chromosome/scaffold name_rice,start1 (bp),end1 (bp),Chromosome/scaffold name_human,start2 (bp),end2 (bp),HN-score(HN5)_rice,HN-score(HN5)_human
0,GO:0003824,catalytic activity,molecular_function,Os02g0504000,ENSG00000142731,R_2,17880882,17886285,H_4,127880893,127899224,-84,-17
1,GO:0003824,catalytic activity,molecular_function,Os02g0121700,ENSG00000151117,R_2,1145837,1151052,H_11,18693122,18704785,-59,-16
2,GO:0003824,catalytic activity,molecular_function,Os02g0571100,ENSG00000151117,R_2,21841092,21847545,H_11,18693122,18704785,-53,-16
3,GO:0003824,catalytic activity,molecular_function,Os05g0360400,ENSG00000151117,R_5,17175702,17176974,H_11,18693122,18704785,-54,-16
4,GO:0003824,catalytic activity,molecular_function,Os11g0641500,ENSG00000151117,R_11,25423701,25426045,H_11,18693122,18704785,-54,-16
...,...,...,...,...,...,...,...,...,...,...,...,...,...
18457,GO:0006091,generation of precursor metabolites and energy,biological_process,Os06g0718400,ENSG00000213689,R_6,30528045,30529070,H_3,48465811,48467645,-89,-28
18458,GO:0030312,external encapsulating structure,cellular_component,Os03g0651800,ENSG00000111186,R_3,25315697,25317065,H_12,1529891,1647212,-68,-17
18459,GO:0005635,nuclear envelope,cellular_component,Os12g0236100,ENSG00000213689,R_12,7441389,7446142,H_3,48465811,48467645,-81,-28
18460,GO:0005635,nuclear envelope,cellular_component,Os12g0236100,ENSG00000205808,R_12,7441389,7446142,H_9,4662294,4665258,-81,-18


## Merge GOslim terms and line enrichment results

In [11]:
enrichment = pd.read_csv(f'../data/line_enrichment_{direction}_HR_2311/goslim_correspondence_q_values_{direction}_HR/goslim_correspondence_fold_enrichment_p_q_{direction}.tsv', sep='\t')

merged_df_goslim_sorted["GOSlim"] = merged_df_goslim_sorted['GOSlim GOA Accession(s)'] + ": " + merged_df_goslim_sorted['GOSlim GOA Description']

merged_df_goslim_sorted = merged_df_goslim_sorted[['GOSlim',
                                                    'GOSlim GOA Accession(s)',
                                                    'GOSlim GOA Description',
                                                    'GOSlim_domain',
                                                    'Gene stable ID_rice',
                                                    'Gene stable ID_human',
                                                    'Chromosome/scaffold name_rice',
                                                    'start1 (bp)',
                                                    'end1 (bp)',
                                                    'Chromosome/scaffold name_human',
                                                    'start2 (bp)',
                                                    'end2 (bp)',
                                                    'HN-score(HN5)_rice',
                                                    'HN-score(HN5)_human']]


merged_df_goslim_enrichment = pd.merge(
    merged_df_goslim_sorted,
    enrichment[['GOSlim', f'counts_{direction}', 'fold_enrichment', 'p_value', 'q_value']],
    on='GOSlim',
    how='left'
)
merged_df_goslim_enrichment.sort_values('fold_enrichment', ascending=False, inplace=True)
merged_df_goslim_enrichment.reset_index(drop=True, inplace=True)
merged_df_goslim_enrichment.to_csv(f"../data/{circos_dir}/combined_goslim_enrichment_{direction}.tsv", sep='\t', index=False)

display(enrichment, merged_df_goslim_sorted, merged_df_goslim_enrichment)

Unnamed: 0,GOSlim,GOSlim_domain,counts_down,counts_all,down_ratio,all_ratio,fold_enrichment,p_value,q_value
0,GO:0005764: lysosome,cellular_component,21,45237,0.001137,0.00022,5.178879,2.405986e-09,1.168622e-08
1,GO:0008289: lipid binding,molecular_function,64,257535,0.003467,0.00125,2.772388,1.956956e-12,1.33073e-11
2,GO:0005975: carbohydrate metabolic process,biological_process,153,676139,0.008287,0.003283,2.524444,1.642043e-23,1.395737e-22
3,GO:0005576: extracellular region,cellular_component,870,3845282,0.047124,0.01867,2.524069,2.0461569999999998e-127,3.478467e-126
4,GO:0006629: lipid metabolic process,biological_process,204,1517670,0.01105,0.007369,1.499557,2.974814e-08,1.264296e-07
5,GO:0005773: vacuole,cellular_component,54,422166,0.002925,0.00205,1.426989,0.007697182,0.02617042
6,GO:0016787: hydrolase activity,molecular_function,1107,8674587,0.059961,0.042117,1.423669,3.160887e-30,3.582339e-29
7,GO:0003824: catalytic activity,molecular_function,7998,62834292,0.433214,0.305076,1.420021,2.079914e-294,7.071706999999999e-293
8,GO:0003677: DNA binding,molecular_function,672,5819761,0.036399,0.028256,1.288173,8.590193e-11,4.867776e-10
9,GO:0016740: transferase activity,molecular_function,1080,10856023,0.058499,0.052709,1.109847,0.0002787134,0.001052917


Unnamed: 0,GOSlim,GOSlim GOA Accession(s),GOSlim GOA Description,GOSlim_domain,Gene stable ID_rice,Gene stable ID_human,Chromosome/scaffold name_rice,start1 (bp),end1 (bp),Chromosome/scaffold name_human,start2 (bp),end2 (bp),HN-score(HN5)_rice,HN-score(HN5)_human
0,GO:0003824: catalytic activity,GO:0003824,catalytic activity,molecular_function,Os02g0504000,ENSG00000142731,R_2,17880882,17886285,H_4,127880893,127899224,-84,-17
1,GO:0003824: catalytic activity,GO:0003824,catalytic activity,molecular_function,Os02g0121700,ENSG00000151117,R_2,1145837,1151052,H_11,18693122,18704785,-59,-16
2,GO:0003824: catalytic activity,GO:0003824,catalytic activity,molecular_function,Os02g0571100,ENSG00000151117,R_2,21841092,21847545,H_11,18693122,18704785,-53,-16
3,GO:0003824: catalytic activity,GO:0003824,catalytic activity,molecular_function,Os05g0360400,ENSG00000151117,R_5,17175702,17176974,H_11,18693122,18704785,-54,-16
4,GO:0003824: catalytic activity,GO:0003824,catalytic activity,molecular_function,Os11g0641500,ENSG00000151117,R_11,25423701,25426045,H_11,18693122,18704785,-54,-16
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
18457,GO:0006091: generation of precursor metabolite...,GO:0006091,generation of precursor metabolites and energy,biological_process,Os06g0718400,ENSG00000213689,R_6,30528045,30529070,H_3,48465811,48467645,-89,-28
18458,GO:0030312: external encapsulating structure,GO:0030312,external encapsulating structure,cellular_component,Os03g0651800,ENSG00000111186,R_3,25315697,25317065,H_12,1529891,1647212,-68,-17
18459,GO:0005635: nuclear envelope,GO:0005635,nuclear envelope,cellular_component,Os12g0236100,ENSG00000213689,R_12,7441389,7446142,H_3,48465811,48467645,-81,-28
18460,GO:0005635: nuclear envelope,GO:0005635,nuclear envelope,cellular_component,Os12g0236100,ENSG00000205808,R_12,7441389,7446142,H_9,4662294,4665258,-81,-18


Unnamed: 0,GOSlim,GOSlim GOA Accession(s),GOSlim GOA Description,GOSlim_domain,Gene stable ID_rice,Gene stable ID_human,Chromosome/scaffold name_rice,start1 (bp),end1 (bp),Chromosome/scaffold name_human,start2 (bp),end2 (bp),HN-score(HN5)_rice,HN-score(HN5)_human,counts_down,fold_enrichment,p_value,q_value
0,GO:0005764: lysosome,GO:0005764,lysosome,cellular_component,Os01g0613500,ENSG00000125538,R_1,24343518,24345172,H_2,112829751,112836816,-75,-28,21,5.178879,2.405986e-09,1.168622e-08
1,GO:0005764: lysosome,GO:0005764,lysosome,cellular_component,Os01g0971400,ENSG00000240857,R_1,42855657,42857462,H_2,18554723,18560679,-41,-15,21,5.178879,2.405986e-09,1.168622e-08
2,GO:0005764: lysosome,GO:0005764,lysosome,cellular_component,Os02g0715000,ENSG00000068001,R_2,29667743,29669572,H_3,50317790,50322782,-41,-22,21,5.178879,2.405986e-09,1.168622e-08
3,GO:0005764: lysosome,GO:0005764,lysosome,cellular_component,Os01g0613500,ENSG00000170961,R_1,24343518,24345172,H_8,121612116,121641440,-75,-24,21,5.178879,2.405986e-09,1.168622e-08
4,GO:0005764: lysosome,GO:0005764,lysosome,cellular_component,Os01g0613500,ENSG00000166189,R_1,24343518,24345172,H_10,102065349,102068036,-75,-36,21,5.178879,2.405986e-09,1.168622e-08
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
18457,GO:0005783: endoplasmic reticulum,GO:0005783,endoplasmic reticulum,cellular_component,Os05g0482400,ENSG00000166189,R_5,23728568,23738372,H_10,102065349,102068036,-49,-36,13,0.075565,1.000000e+00,1.000000e+00
18458,GO:0005783: endoplasmic reticulum,GO:0005783,endoplasmic reticulum,cellular_component,Os05g0482400,ENSG00000111186,R_5,23728568,23738372,H_12,1529891,1647212,-49,-17,13,0.075565,1.000000e+00,1.000000e+00
18459,GO:0005783: endoplasmic reticulum,GO:0005783,endoplasmic reticulum,cellular_component,Os05g0482400,ENSG00000160678,R_5,23728568,23738372,H_1,153627926,153632039,-49,-15,13,0.075565,1.000000e+00,1.000000e+00
18460,GO:0005783: endoplasmic reticulum,GO:0005783,endoplasmic reticulum,cellular_component,Os05g0482400,ENSG00000147168,R_5,23728568,23738372,H_X,71107404,71112108,-49,-15,13,0.075565,1.000000e+00,1.000000e+00
