## discription

Circos plotの描画の処理内で行われている結合をこちらのファイルで行うことを目的に作成しました

In [1]:
import collections
import csv
import datetime
import os
import pandas as pd
import numpy as np
from collections import Counter

In [2]:
# output directory
direction = "UP"
now = datetime.datetime.now()
plot_up = f'../data/circos_hr_{direction}_{now.strftime("%y%m")}'
no_annotation = f'../data/{plot_up}/no_annotation_{direction}'
os.makedirs(plot_up, exist_ok=True)
os.makedirs(no_annotation, exist_ok=True)

## HUMAN_UP

1. Create a TSV file to plot genes with high HN-score on the Circos plot

2. Extract GOSlim corresponding to the list of up-regulated genes

In [3]:
# 1. Create a TSV file to plot genes with high HN-score on the Circos plot
#organism = "human"
df_h_score = pd.read_csv("../data/human_annotation/human_position_up.tsv", sep='\t')
df_h_score_subset = df_h_score[['Gene stable ID', # create a subset
                        'Chromosome/scaffold name', 
                        'start2 (bp)',
                        'end2 (bp)',
                        'HN-score(HN5)']].copy()
df_h_score_subset['Chromosome/scaffold name'] = 'H_' + df_h_score_subset['Chromosome/scaffold name'].astype(str)
df_h_score_subset['organism'] = 'human'# assign the tag

df_h_score_subset.to_csv(f"../data/{plot_up}/human_position_up_assign.tsv", sep='\t', index=False)

display(df_h_score_subset)

Unnamed: 0,Gene stable ID,Chromosome/scaffold name,start2 (bp),end2 (bp),HN-score(HN5),organism
0,ENSG00000173110,H_1,161524540,161526894,245,human
1,ENSG00000204389,H_6,31815543,31817946,239,human
2,ENSG00000204388,H_6,31827738,31830254,231,human
3,ENSG00000132002,H_19,14514769,14560391,210,human
4,ENSG00000151929,H_10,119651380,119677819,181,human
...,...,...,...,...,...,...
195,ENSG00000187049,H_11,61392393,61398866,51,human
196,ENSG00000176381,H_6,166305300,166308448,51,human
197,ENSG00000171931,H_17,18744026,18779349,51,human
198,ENSG00000125998,H_20,35285731,35292425,51,human


In [4]:
# 2. Extract GOSlim corresponding to the list of up-regulated genes
goslim_all_human = pd.read_csv('../data/biomart_goslim/biomart_human_goslim_R110_domain.tsv', sep='\t',
                               dtype={'Gene stable ID': 'object'}, 
                               low_memory=False)
columns_of_interest_human = ['Gene stable ID', 
                             'GOSlim GOA Accession(s)', 
                             'GOSlim GOA Description', 
                             'GOSlim_domain']
goslim_all_human_filtered = goslim_all_human[columns_of_interest_human]

extract_df_h_goslim = pd.merge(
    df_h_score_subset, 
    goslim_all_human_filtered, 
    on='Gene stable ID', 
    how='inner'
    )
extract_df_h_goslim = extract_df_h_goslim.drop_duplicates(subset=['Gene stable ID',
                                                                  'GOSlim GOA Accession(s)', 
                                                                  'GOSlim GOA Description'], keep='first').copy()

# Extract genes with no annotations
extract_df_h_goslim_with_indicator = pd.merge(
    df_h_score_subset, 
    goslim_all_human_filtered,
    left_on='Gene stable ID',
    right_on='Gene stable ID',
    how='left',
    indicator=True # add indicator column to show the difference
)

# extract_df_h_goslim_with_indicator
left_only_rows = extract_df_h_goslim_with_indicator[extract_df_h_goslim_with_indicator['_merge'] == 'left_only']

extract_df_h_goslim.to_csv(f"../data/{plot_up}/human_goslim_{direction}.tsv", sep='\t', index=False)
left_only_rows.to_csv(f"../data/{no_annotation}/human_position_up_no_annotation.tsv", sep='\t', index=False)

display(goslim_all_human_filtered)
display(extract_df_h_goslim)
display(left_only_rows)

Unnamed: 0,Gene stable ID,GOSlim GOA Accession(s),GOSlim GOA Description,GOSlim_domain
0,ENSG00000243485,GO:0031047,gene silencing by RNA,biological_process
1,ENSG00000284332,GO:0031047,gene silencing by RNA,biological_process
2,ENSG00000186092,GO:0023052,signaling,biological_process
3,ENSG00000186092,GO:0060089,molecular transducer activity,molecular_function
4,ENSG00000186092,GO:0005886,plasma membrane,cellular_component
...,...,...,...,...
193923,ENSG00000292372,GO:0007010,cytoskeleton organization,biological_process
193924,ENSG00000292372,GO:0008092,cytoskeletal protein binding,molecular_function
193925,ENSG00000292372,GO:0031410,cytoplasmic vesicle,cellular_component
193926,ENSG00000292372,GO:0043226,organelle,cellular_component


Unnamed: 0,Gene stable ID,Chromosome/scaffold name,start2 (bp),end2 (bp),HN-score(HN5),organism,GOSlim GOA Accession(s),GOSlim GOA Description,GOSlim_domain
0,ENSG00000173110,H_1,161524540,161526894,245,human,GO:0140657,ATP-dependent activity,molecular_function
1,ENSG00000173110,H_1,161524540,161526894,245,human,GO:0044183,protein folding chaperone,molecular_function
2,ENSG00000173110,H_1,161524540,161526894,245,human,GO:0043226,organelle,cellular_component
3,ENSG00000173110,H_1,161524540,161526894,245,human,GO:0005856,cytoskeleton,cellular_component
4,ENSG00000173110,H_1,161524540,161526894,245,human,GO:0005815,microtubule organizing center,cellular_component
...,...,...,...,...,...,...,...,...,...
1722,ENSG00000107742,H_10,72059034,72089032,51,human,GO:0007155,cell adhesion,biological_process
1723,ENSG00000107742,H_10,72059034,72089032,51,human,GO:1901135,carbohydrate derivative metabolic process,biological_process
1724,ENSG00000107742,H_10,72059034,72089032,51,human,GO:0036211,protein modification process,biological_process
1725,ENSG00000107742,H_10,72059034,72089032,51,human,GO:0006790,sulfur compound metabolic process,biological_process


Unnamed: 0,Gene stable ID,Chromosome/scaffold name,start2 (bp),end2 (bp),HN-score(HN5),organism,GOSlim GOA Accession(s),GOSlim GOA Description,GOSlim_domain,_merge
251,ENSG00000182308,H_4,41981756,41986465,141,human,,,,left_only
323,ENSG00000189030,H_1,156298624,156299307,121,human,,,,left_only
375,ENSG00000176714,H_2,27625638,27629012,113,human,,,,left_only
376,ENSG00000181016,H_7,112480853,112491062,113,human,,,,left_only
435,ENSG00000187944,H_2,196804417,196809355,100,human,,,,left_only
698,ENSG00000268975,H_19,40771648,40796943,80,human,,,,left_only
707,ENSG00000249693,H_4,56530606,56603507,79,human,,,,left_only
747,ENSG00000197520,H_1,222737202,222751004,75,human,,,,left_only
801,ENSG00000187479,H_11,43942637,43943878,72,human,,,,left_only
854,ENSG00000241322,H_17,15565483,15619704,71,human,,,,left_only


## RICE_UP

1. Create a TSV file to plot genes with high HN-score on the Circos plot

2. Extract GOSlim corresponding to the list of up-regulated genes

In [5]:
# 1. Create a TSV file to plot genes with high HN-score on the Circos plot
df_r_score = pd.read_csv('../data/rice_annotation/rice_position_up.tsv', sep='\t')
df_r_score_subset = df_r_score[['Gene stable ID', # create a subset
                        'Chromosome/scaffold name', 
                        'start1 (bp)',
                        'end1 (bp)',
                        'HN-score(HN5)']].copy()
df_r_score_subset['Chromosome/scaffold name'] = 'R_' + df_r_score_subset['Chromosome/scaffold name'].astype(str)
df_r_score_subset['organism'] = 'rice'# assign the tag

df_r_score_subset.to_csv(f"../data/{plot_up}/rice_position_up_assign.tsv", sep='\t', index=False)

display(df_r_score_subset)

Unnamed: 0,Gene stable ID,Chromosome/scaffold name,start1 (bp),end1 (bp),HN-score(HN5),organism
0,Os04g0107900,R_4,483234,485978,253,rice
1,Os01g0136100,R_1,1948773,1949587,246,rice
2,Os02g0259900,R_2,9021454,9023102,238,rice
3,Os03g0245800,R_3,7697015,7698027,237,rice
4,Os03g0277300,R_3,9411494,9416082,236,rice
...,...,...,...,...,...,...
384,Os03g0820400,R_3,34427704,34428391,42,rice
385,Os10g0328600,R_10,9212944,9216458,42,rice
386,Os01g0971800,R_1,42874273,42875515,42,rice
387,Os07g0621600,R_7,25691906,25692138,42,rice


In [6]:
# 2. Extract GOSlim corresponding to the list of up-regulated genes
goslim_all_rice = pd.read_csv('../data/biomart_goslim/biomart_rice_goslim_R56_domain.tsv', sep='\t',
                               dtype={'Gene stable ID': 'object'}, 
                               low_memory=False)
columns_of_interest_rice = ['Gene stable ID',
                            'GOSlim GOA Accession(s)', 
                            'GOSlim GOA Description', 
                            'GOSlim_domain']
goslim_all_rice_filtered = goslim_all_rice[columns_of_interest_rice]

extract_df_r_goslim = pd.merge(
    df_r_score_subset, 
    goslim_all_rice_filtered, 
    on='Gene stable ID', 
    how='inner'
    )

extract_df_r_goslim = extract_df_r_goslim.drop_duplicates(subset=['Gene stable ID',
                                                                    'GOSlim GOA Accession(s)', 
                                                                    'GOSlim GOA Description'], keep='first').copy()

# Extract genes with no annotations
extract_df_r_goslim_with_indicator = pd.merge(
    df_r_score_subset, 
    goslim_all_rice_filtered,
    left_on='Gene stable ID',
    right_on='Gene stable ID',
    how='left',
    indicator=True # add indicator column to show the difference
)

# extract_df_r_goslim_with_indicator
left_only_rows = extract_df_r_goslim_with_indicator[extract_df_r_goslim_with_indicator['_merge'] == 'left_only']

extract_df_r_goslim.to_csv(f"../data/{plot_up}/rice_goslim_{direction}.tsv", sep='\t', index=False)
left_only_rows.to_csv(f"../data/{no_annotation}/rice_position_up_no_annotation.tsv", sep='\t', index=False)

display(goslim_all_rice_filtered)
display(extract_df_r_goslim)
display(left_only_rows)

Unnamed: 0,Gene stable ID,GOSlim GOA Accession(s),GOSlim GOA Description,GOSlim_domain
0,Os01g0100100,GO:0006810,transport,biological_process
1,Os01g0100100,GO:0008150,biological_process,biological_process
2,Os01g0100100,GO:0009987,cellular process,biological_process
3,Os01g0100100,GO:0003674,molecular_function,molecular_function
4,Os01g0100100,GO:0030234,enzyme regulator activity,molecular_function
...,...,...,...,...
219494,gene-rps19,GO:0003674,molecular_function,molecular_function
219495,gene-rps19,GO:0005198,structural molecule activity,molecular_function
219496,gene-rps19,GO:0005488,binding,molecular_function
219497,gene-rps19,GO:0003723,RNA binding,molecular_function


Unnamed: 0,Gene stable ID,Chromosome/scaffold name,start1 (bp),end1 (bp),HN-score(HN5),organism,GOSlim GOA Accession(s),GOSlim GOA Description,GOSlim_domain
0,Os04g0107900,R_4,483234,485978,253,rice,GO:0003674,molecular_function,molecular_function
1,Os04g0107900,R_4,483234,485978,253,rice,GO:0005515,protein binding,molecular_function
2,Os04g0107900,R_4,483234,485978,253,rice,GO:0005488,binding,molecular_function
3,Os04g0107900,R_4,483234,485978,253,rice,GO:0008150,biological_process,biological_process
4,Os04g0107900,R_4,483234,485978,253,rice,GO:0006950,response to stress,biological_process
...,...,...,...,...,...,...,...,...,...
2522,Os01g0971800,R_1,42874273,42875515,42,rice,GO:0008152,metabolic process,biological_process
2523,Os01g0971800,R_1,42874273,42875515,42,rice,GO:0009058,biosynthetic process,biological_process
2524,Os01g0971800,R_1,42874273,42875515,42,rice,GO:0006139,nucleobase-containing compound metabolic process,biological_process
2525,Os01g0971800,R_1,42874273,42875515,42,rice,GO:0003700,DNA-binding transcription factor activity,molecular_function


Unnamed: 0,Gene stable ID,Chromosome/scaffold name,start1 (bp),end1 (bp),HN-score(HN5),organism,GOSlim GOA Accession(s),GOSlim GOA Description,GOSlim_domain,_merge
172,Os04g0108101,R_4,485272,485663,205,rice,,,,left_only
173,Os01g0184050,R_1,4449056,4449806,205,rice,,,,left_only
229,Os05g0296800,R_5,13197199,13199560,166,rice,,,,left_only
243,Os11g0533400,R_11,19430389,19431158,165,rice,,,,left_only
274,Os04g0578550,R_4,29163356,29165144,158,rice,,,,left_only
...,...,...,...,...,...,...,...,...,...,...
2588,Os11g0506700,R_11,18069286,18071596,42,rice,,,,left_only
2589,Os03g0254250,R_3,8134007,8134226,42,rice,,,,left_only
2642,Os10g0328600,R_10,9212944,9216458,42,rice,,,,left_only
2657,Os07g0621600,R_7,25691906,25692138,42,rice,,,,left_only


## COMBINED_UP( HUMAN_UP + RICE_UP )

1. chromosome position + gene position + HN-score

2. GOslim terms corresponding to the list of up-regulated genes

In [7]:
# 1. chromosome position + gene position + HN-score
combined_df_score = pd.concat([df_h_score_subset, 
                               df_r_score_subset])
combined_df_score.reset_index(drop=True, inplace=True)
combined_df_score['Gene start (bp)'] = combined_df_score['start1 (bp)'].combine_first(combined_df_score['start2 (bp)'])
combined_df_score['Gene end (bp)'] = combined_df_score['end1 (bp)'].combine_first(combined_df_score['end2 (bp)'])
combined_df_score.drop(['start1 (bp)', 'start2 (bp)', 'end1 (bp)', 'end2 (bp)'], axis=1, inplace=True)

combined_df_score.to_csv(f"../data/{plot_up}/combined_position_up.tsv", sep='\t', index=False)

display(combined_df_score)

Unnamed: 0,Gene stable ID,Chromosome/scaffold name,HN-score(HN5),organism,Gene start (bp),Gene end (bp)
0,ENSG00000173110,H_1,245,human,161524540.0,161526894.0
1,ENSG00000204389,H_6,239,human,31815543.0,31817946.0
2,ENSG00000204388,H_6,231,human,31827738.0,31830254.0
3,ENSG00000132002,H_19,210,human,14514769.0,14560391.0
4,ENSG00000151929,H_10,181,human,119651380.0,119677819.0
...,...,...,...,...,...,...
584,Os03g0820400,R_3,42,rice,34427704.0,34428391.0
585,Os10g0328600,R_10,42,rice,9212944.0,9216458.0
586,Os01g0971800,R_1,42,rice,42874273.0,42875515.0
587,Os07g0621600,R_7,42,rice,25691906.0,25692138.0


In [8]:
merged_df_goslim = pd.merge(
    extract_df_h_goslim, 
    extract_df_r_goslim, 
    left_on=['GOSlim GOA Accession(s)', 'GOSlim GOA Description', 'GOSlim_domain'],
    right_on=['GOSlim GOA Accession(s)', 'GOSlim GOA Description', 'GOSlim_domain'],
    how='inner',
    suffixes=('_human', '_rice')
)

merged_df_goslim = merged_df_goslim[['GOSlim GOA Accession(s)',
                                     'GOSlim GOA Description',
                                     'GOSlim_domain',
                                     'Gene stable ID_rice',
                                     'Gene stable ID_human',
                                     'Chromosome/scaffold name_rice',
                                     'start1 (bp)',
                                     'end1 (bp)',
                                     'Chromosome/scaffold name_human',
                                     'start2 (bp)',
                                     'end2 (bp)',
                                     'HN-score(HN5)_rice',
                                     'HN-score(HN5)_human']]

# Remove duplicate rows based on specific columns
merged_df_goslim.drop_duplicates(inplace=True)

display(merged_df_goslim)

Unnamed: 0,GOSlim GOA Accession(s),GOSlim GOA Description,GOSlim_domain,Gene stable ID_rice,Gene stable ID_human,Chromosome/scaffold name_rice,start1 (bp),end1 (bp),Chromosome/scaffold name_human,start2 (bp),end2 (bp),HN-score(HN5)_rice,HN-score(HN5)_human
0,GO:0005856,cytoskeleton,cellular_component,Os01g0257300,ENSG00000173110,R_1,8586157,8586941,H_1,161524540,161526894,45,245
1,GO:0005856,cytoskeleton,cellular_component,Os01g0257300,ENSG00000204389,R_1,8586157,8586941,H_6,31815543,31817946,45,239
2,GO:0005856,cytoskeleton,cellular_component,Os01g0257300,ENSG00000204388,R_1,8586157,8586941,H_6,31827738,31830254,45,231
3,GO:0005856,cytoskeleton,cellular_component,Os01g0257300,ENSG00000151929,R_1,8586157,8586941,H_10,119651380,119677819,45,181
4,GO:0005856,cytoskeleton,cellular_component,Os01g0257300,ENSG00000109846,R_1,8586157,8586941,H_11,111908564,111923722,45,173
...,...,...,...,...,...,...,...,...,...,...,...,...,...
15167,GO:0008289,lipid binding,molecular_function,Os04g0438600,ENSG00000174226,R_4,21812159,21816137,H_8,100572889,100663415,56,51
15168,GO:0008289,lipid binding,molecular_function,Os08g0546300,ENSG00000174226,R_8,27364165,27364816,H_8,100572889,100663415,54,51
15169,GO:0008289,lipid binding,molecular_function,Os01g0849000,ENSG00000174226,R_1,36484020,36484818,H_8,100572889,100663415,49,51
15170,GO:0008289,lipid binding,molecular_function,Os07g0659600,ENSG00000174226,R_7,27800556,27802446,H_8,100572889,100663415,43,51


In [9]:
# for debugging purpose
unique_accessions_r = set(extract_df_r_goslim['GOSlim GOA Accession(s)'].unique())
unique_accessions_h = set(extract_df_h_goslim['GOSlim GOA Accession(s)'].unique())
common_accessions = unique_accessions_r.intersection(unique_accessions_h)
unique_accessions_merged = set(merged_df_goslim['GOSlim GOA Accession(s)'].unique())
missing_accessions = common_accessions.difference(unique_accessions_merged)
duplicates = merged_df_goslim.duplicated(subset=['GOSlim GOA Accession(s)', 'Gene stable ID_rice', 'Gene stable ID_human'], keep=False)
unique_combinations = not duplicates.any()
print(f"Number of common accessions: {len(common_accessions)}")
print(f"Number of missing accessions: {len(missing_accessions)}")
print(f"Missing accessions: {missing_accessions}")
print(f"各'Gene stable ID_rice'と'Gene stable ID_human'の組み合わせはユニークですか？: {unique_combinations}")

#Count the occurrences of each 'GOSlim GOA Accession(s)'
accession_counter = Counter(merged_df_goslim['GOSlim GOA Accession(s)'])

#Sort the DataFrame based on the counts
merged_df_goslim['count'] = merged_df_goslim['GOSlim GOA Accession(s)'].map(accession_counter)
merged_df_goslim_sorted = merged_df_goslim.sort_values('count', ascending=False).drop('count', axis=1)
merged_df_goslim_sorted.reset_index(drop=True, inplace=True)
merged_df_goslim_sorted.to_csv(f"../data/{plot_up}/combined_goslim_{direction}.tsv", sep='\t', index=False)

display(merged_df_goslim_sorted)

Number of common accessions: 29
Number of missing accessions: 0
Missing accessions: set()
各'Gene stable ID_rice'と'Gene stable ID_human'の組み合わせはユニークですか？: True


Unnamed: 0,GOSlim GOA Accession(s),GOSlim GOA Description,GOSlim_domain,Gene stable ID_rice,Gene stable ID_human,Chromosome/scaffold name_rice,start1 (bp),end1 (bp),Chromosome/scaffold name_human,start2 (bp),end2 (bp),HN-score(HN5)_rice,HN-score(HN5)_human
0,GO:0003824,catalytic activity,molecular_function,Os04g0167875,ENSG00000178297,R_4,4644736,4644996,H_19,2360238,2426261,57,72
1,GO:0003824,catalytic activity,molecular_function,Os05g0135400,ENSG00000143199,R_5,2073613,2075302,H_1,167809386,167914215,61,97
2,GO:0003824,catalytic activity,molecular_function,Os02g0164000,ENSG00000143199,R_2,3441185,3446850,H_1,167809386,167914215,48,97
3,GO:0003824,catalytic activity,molecular_function,Os08g0473900,ENSG00000143199,R_8,23341289,23343299,H_1,167809386,167914215,48,97
4,GO:0003824,catalytic activity,molecular_function,Os09g0315700,ENSG00000143199,R_9,8692296,8697399,H_1,167809386,167914215,49,97
...,...,...,...,...,...,...,...,...,...,...,...,...,...
15167,GO:0005198,structural molecule activity,molecular_function,Os01g0105800,ENSG00000184697,R_1,306871,308842,H_16,3014712,3020071,53,65
15168,GO:0005777,peroxisome,cellular_component,Os07g0529000,ENSG00000165507,R_7,20691213,20693521,H_10,44970981,44978809,59,70
15169,GO:0005777,peroxisome,cellular_component,Os06g0253100,ENSG00000165507,R_6,7940956,7941680,H_10,44970981,44978809,129,70
15170,GO:0045182,translation regulator activity,molecular_function,Os05g0373900,ENSG00000183655,R_5,18016742,18020353,H_15,85759326,85794925,42,74


## Merge GOslim terms and line enrichment results

In [13]:
enrichment = pd.read_csv('../data/goslim_correspondence_q_values/goslim_correspondence_fold_enrichment_p_q.tsv', sep='\t')

merged_df_goslim_sorted["GOSlim"] = merged_df_goslim_sorted['GOSlim GOA Accession(s)'] + ": " + merged_df_goslim_sorted['GOSlim GOA Description']

merged_df_goslim_sorted = merged_df_goslim_sorted[['GOSlim',
                                                    'GOSlim GOA Accession(s)',
                                                    'GOSlim GOA Description',
                                                    'GOSlim_domain',
                                                    'Gene stable ID_rice',
                                                    'Gene stable ID_human',
                                                    'Chromosome/scaffold name_rice',
                                                    'start1 (bp)',
                                                    'end1 (bp)',
                                                    'Chromosome/scaffold name_human',
                                                    'start2 (bp)',
                                                    'end2 (bp)',
                                                    'HN-score(HN5)_rice',
                                                    'HN-score(HN5)_human']]


merged_df_goslim_enrichment = pd.merge(
    merged_df_goslim_sorted,
    enrichment[['GOSlim', 'fold_enrichment', 'p_value', 'q_value']],
    on='GOSlim',
    how='left'
)
merged_df_goslim_enrichment.sort_values('fold_enrichment', ascending=False, inplace=True)
merged_df_goslim_enrichment.reset_index(drop=True, inplace=True)
merged_df_goslim_enrichment.to_csv(f"../data/{plot_up}/combined_goslim_enrichment_{direction}.tsv", sep='\t', index=False)

display(enrichment)
display(merged_df_goslim_sorted)
display(merged_df_goslim_enrichment)

Unnamed: 0,GOSlim,counts_up,counts_all,up_ratio,all_ratio,fold_enrichment,p_value,q_value
0,GO:0005783: endoplasmic reticulum,588,1919256,0.038756,0.009318,4.159016,2.539107e-175,8.632965e-174
1,GO:0006091: generation of precursor metabolite...,55,265631,0.003625,0.00129,2.810801,3.9668e-11,1.68589e-10
2,GO:0008289: lipid binding,49,257535,0.00323,0.00125,2.58289,6.223319e-09,2.351032e-08
3,GO:0140110: transcription regulator activity,396,2256826,0.026101,0.010957,2.382008,1.88955e-52,1.606117e-51
4,GO:0005829: cytosol,1248,7811118,0.082257,0.037925,2.16894,8.805413e-137,1.49692e-135
5,GO:0005576: extracellular region,600,3845282,0.039547,0.01867,2.118211,9.704598000000001e-62,1.099854e-60
6,GO:0005739: mitochondrion,280,1951236,0.018455,0.009474,1.948024,3.221263e-24,1.5646140000000003e-23
7,GO:0005975: carbohydrate metabolic process,84,676139,0.005537,0.003283,1.686512,5.992973e-06,1.852373e-05
8,GO:0048856: anatomical structure development,819,7216908,0.053981,0.03504,1.540561,2.844564e-32,1.934304e-31
9,GO:0005777: peroxisome,2,18172,0.000132,8.8e-05,1.494078,0.3868057,1.0


Unnamed: 0,GOSlim,GOSlim GOA Accession(s),GOSlim GOA Description,GOSlim_domain,Gene stable ID_rice,Gene stable ID_human,Chromosome/scaffold name_rice,start1 (bp),end1 (bp),Chromosome/scaffold name_human,start2 (bp),end2 (bp),HN-score(HN5)_rice,HN-score(HN5)_human
0,GO:0003824: catalytic activity,GO:0003824,catalytic activity,molecular_function,Os04g0167875,ENSG00000178297,R_4,4644736,4644996,H_19,2360238,2426261,57,72
1,GO:0003824: catalytic activity,GO:0003824,catalytic activity,molecular_function,Os05g0135400,ENSG00000143199,R_5,2073613,2075302,H_1,167809386,167914215,61,97
2,GO:0003824: catalytic activity,GO:0003824,catalytic activity,molecular_function,Os02g0164000,ENSG00000143199,R_2,3441185,3446850,H_1,167809386,167914215,48,97
3,GO:0003824: catalytic activity,GO:0003824,catalytic activity,molecular_function,Os08g0473900,ENSG00000143199,R_8,23341289,23343299,H_1,167809386,167914215,48,97
4,GO:0003824: catalytic activity,GO:0003824,catalytic activity,molecular_function,Os09g0315700,ENSG00000143199,R_9,8692296,8697399,H_1,167809386,167914215,49,97
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15167,GO:0005198: structural molecule activity,GO:0005198,structural molecule activity,molecular_function,Os01g0105800,ENSG00000184697,R_1,306871,308842,H_16,3014712,3020071,53,65
15168,GO:0005777: peroxisome,GO:0005777,peroxisome,cellular_component,Os07g0529000,ENSG00000165507,R_7,20691213,20693521,H_10,44970981,44978809,59,70
15169,GO:0005777: peroxisome,GO:0005777,peroxisome,cellular_component,Os06g0253100,ENSG00000165507,R_6,7940956,7941680,H_10,44970981,44978809,129,70
15170,GO:0045182: translation regulator activity,GO:0045182,translation regulator activity,molecular_function,Os05g0373900,ENSG00000183655,R_5,18016742,18020353,H_15,85759326,85794925,42,74


Unnamed: 0,GOSlim,GOSlim GOA Accession(s),GOSlim GOA Description,GOSlim_domain,Gene stable ID_rice,Gene stable ID_human,Chromosome/scaffold name_rice,start1 (bp),end1 (bp),Chromosome/scaffold name_human,start2 (bp),end2 (bp),HN-score(HN5)_rice,HN-score(HN5)_human,fold_enrichment,p_value,q_value
0,GO:0005783: endoplasmic reticulum,GO:0005783,endoplasmic reticulum,cellular_component,Os03g0822700,ENSG00000160963,R_3,34536280,34539296,H_7,101362875,101559024,50,67,4.159016,2.539107e-175,8.632965e-174
1,GO:0005783: endoplasmic reticulum,GO:0005783,endoplasmic reticulum,cellular_component,Os03g0263700,ENSG00000170345,R_3,8668092,8671028,H_14,75278826,75282230,48,165,4.159016,2.539107e-175,8.632965e-174
2,GO:0005783: endoplasmic reticulum,GO:0005783,endoplasmic reticulum,cellular_component,Os06g0593100,ENSG00000204388,R_6,23303671,23306966,H_6,31827738,31830254,48,231,4.159016,2.539107e-175,8.632965e-174
3,GO:0005783: endoplasmic reticulum,GO:0005783,endoplasmic reticulum,cellular_component,Os08g0442200,ENSG00000204388,R_8,21549721,21551326,H_6,31827738,31830254,48,231,4.159016,2.539107e-175,8.632965e-174
4,GO:0005783: endoplasmic reticulum,GO:0005783,endoplasmic reticulum,cellular_component,Os03g0263700,ENSG00000204388,R_3,8668092,8671028,H_6,31827738,31830254,48,231,4.159016,2.539107e-175,8.632965e-174
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15167,GO:0005198: structural molecule activity,GO:0005198,structural molecule activity,molecular_function,Os01g0105800,ENSG00000109846,R_1,306871,308842,H_11,111908564,111923722,53,173,0.250500,1.000000e+00,1.000000e+00
15168,GO:0005198: structural molecule activity,GO:0005198,structural molecule activity,molecular_function,Os03g0276700,ENSG00000109846,R_3,9375984,9377688,H_11,111908564,111923722,99,173,0.250500,1.000000e+00,1.000000e+00
15169,GO:0005198: structural molecule activity,GO:0005198,structural molecule activity,molecular_function,Os03g0276700,ENSG00000112110,R_3,9375984,9377688,H_6,159789812,159798436,99,63,0.250500,1.000000e+00,1.000000e+00
15170,GO:0005198: structural molecule activity,GO:0005198,structural molecule activity,molecular_function,Os01g0105800,ENSG00000112110,R_1,306871,308842,H_6,159789812,159798436,53,63,0.250500,1.000000e+00,1.000000e+00
