## Discription

### Background
Circos plotの描画の処理内で行われている結合をこちらのファイルで行うことを目的に作成しました

In [1]:
import collections
import csv
import datetime
import os
import pandas as pd
import numpy as np
from collections import Counter

In [2]:
# output directory
organism1_vs_organism2 = "MR" # human vs rice
direction = "up"
now = datetime.datetime.now()
circos_dir = f'../data/circos_{direction}_{organism1_vs_organism2}_{now.strftime("%y%m")}'
no_annotation = f'../data/{circos_dir}/no_annotation_{direction}'
os.makedirs(circos_dir, exist_ok=True)
os.makedirs(no_annotation, exist_ok=True)

## MOUSE_UP

1. Create a TSV file to plot genes with high HN-score on the Circos plot

2. Extract GOSlim corresponding to the list of up-regulated genes

In [3]:
# 1. Create a TSV file to plot genes with high HN-score on the Circos plot
#organism = "human" (H)
df_m_score = pd.read_csv(f"../data/mouse_annotation/mouse_position_{direction}.tsv", sep='\t')
df_m_score_subset = df_m_score[['Gene stable ID', # create a subset
                        'Chromosome/scaffold name', 
                        'start2 (bp)',
                        'end2 (bp)',
                        'HN-score(HN5)']].copy()
df_m_score_subset['Chromosome/scaffold name'] = 'M_' + df_m_score_subset['Chromosome/scaffold name'].astype(str)
df_m_score_subset['organism'] = 'mouse'# assign the tag

df_m_score_subset.to_csv(f"../data/{circos_dir}/human_position_{direction}_assign.tsv", sep='\t', index=False)

display(df_m_score_subset)

Unnamed: 0,Gene stable ID,Chromosome/scaffold name,start2 (bp),end2 (bp),HN-score(HN5),organism
0,ENSMUSG00000090877,M_17,35175412,35178214,149,mouse
1,ENSMUSG00000091971,M_17,35188166,35191132,137,mouse
2,ENSMUSG00000004951,M_5,135916773,135918417,111,mouse
3,ENSMUSG00000092609,M_17,35188888,35191112,88,mouse
4,ENSMUSG00000026628,M_1,190902493,190950236,78,mouse
...,...,...,...,...,...,...
243,ENSMUSG00000037887,M_7,141633227,141649580,13,mouse
244,ENSMUSG00000035775,M_11,99319229,99328976,13,mouse
245,ENSMUSG00000054034,M_X,135101697,135104625,13,mouse
246,ENSMUSG00000098715,M_6,48695541,48729578,13,mouse


In [4]:
# 2. Extract GOSlim corresponding to the list of up-regulated genes
goslim_all_mouse = pd.read_csv('../data/biomart_goslim/biomart_mouse_goslim_R110_domain.tsv', sep='\t',
                               dtype={'Gene stable ID': 'object'}, 
                               low_memory=False)
columns_of_interest_mouse = ['Gene stable ID', 
                             'GOSlim GOA Accession(s)', 
                             'GOSlim GOA Description', 
                             'GOSlim_domain']
goslim_all_mouse_filtered = goslim_all_mouse[columns_of_interest_mouse]

extract_df_m_goslim = pd.merge(
    df_m_score_subset, 
    goslim_all_mouse_filtered, 
    on='Gene stable ID', 
    how='inner'
    )
extract_df_m_goslim = extract_df_m_goslim.drop_duplicates(subset=['Gene stable ID',
                                                                  'GOSlim GOA Accession(s)', 
                                                                  'GOSlim GOA Description'], keep='first').copy()

# Extract genes with no annotations
extract_df_h_goslim_with_indicator = pd.merge(
    df_m_score_subset, 
    goslim_all_mouse_filtered,
    left_on='Gene stable ID',
    right_on='Gene stable ID',
    how='left',
    indicator=True # add indicator column to show the difference
)

# extract_df_h_goslim_with_indicator
left_only_rows = extract_df_h_goslim_with_indicator[extract_df_h_goslim_with_indicator['_merge'] == 'left_only']

extract_df_m_goslim.to_csv(f"../data/{circos_dir}/human_goslim_{direction}.tsv", sep='\t', index=False)
left_only_rows.to_csv(f"../data/{no_annotation}/human_position_down_no_annotation.tsv", sep='\t', index=False)

display(goslim_all_mouse_filtered, extract_df_m_goslim, left_only_rows)

Unnamed: 0,Gene stable ID,GOSlim GOA Accession(s),GOSlim GOA Description,GOSlim_domain
0,ENSMUSG00000064336,GO:0060090,molecular adaptor activity,molecular_function
1,ENSMUSG00000064336,GO:0003723,RNA binding,molecular_function
2,ENSMUSG00000064336,GO:0043226,organelle,cellular_component
3,ENSMUSG00000064336,GO:0005739,mitochondrion,cellular_component
4,ENSMUSG00000064337,GO:0005198,structural molecule activity,molecular_function
...,...,...,...,...
170875,ENSMUSG00000015335,GO:0036211,protein modification process,biological_process
170876,ENSMUSG00000015335,GO:0023052,signaling,biological_process
170877,ENSMUSG00000015335,GO:0048856,anatomical structure development,biological_process
170878,ENSMUSG00000015335,GO:0034330,cell junction organization,biological_process


Unnamed: 0,Gene stable ID,Chromosome/scaffold name,start2 (bp),end2 (bp),HN-score(HN5),organism,GOSlim GOA Accession(s),GOSlim GOA Description,GOSlim_domain
0,ENSMUSG00000090877,M_17,35175412,35178214,149,mouse,GO:0140657,ATP-dependent activity,molecular_function
1,ENSMUSG00000090877,M_17,35175412,35178214,149,mouse,GO:0044183,protein folding chaperone,molecular_function
2,ENSMUSG00000090877,M_17,35175412,35178214,149,mouse,GO:0043226,organelle,cellular_component
3,ENSMUSG00000090877,M_17,35175412,35178214,149,mouse,GO:0005739,mitochondrion,cellular_component
4,ENSMUSG00000090877,M_17,35175412,35178214,149,mouse,GO:0012501,programmed cell death,biological_process
...,...,...,...,...,...,...,...,...,...
2137,ENSMUSG00000035775,M_11,99319229,99328976,13,mouse,GO:0048856,anatomical structure development,biological_process
2138,ENSMUSG00000035775,M_11,99319229,99328976,13,mouse,GO:0030154,cell differentiation,biological_process
2139,ENSMUSG00000054034,M_X,135101697,135104625,13,mouse,GO:0043226,organelle,cellular_component
2140,ENSMUSG00000054034,M_X,135101697,135104625,13,mouse,GO:0005634,nucleus,cellular_component


Unnamed: 0,Gene stable ID,Chromosome/scaffold name,start2 (bp),end2 (bp),HN-score(HN5),organism,GOSlim GOA Accession(s),GOSlim GOA Description,GOSlim_domain,_merge
66,ENSMUSG00000092609,M_17,35188888,35191112,88,mouse,,,,left_only
290,ENSMUSG00000118491,M_19,12741024,12773490,32,mouse,,,,left_only
303,ENSMUSG00000068349,M_15,74685301,74690664,30,mouse,,,,left_only
319,ENSMUSG00000079304,M_6,128352419,128362107,29,mouse,,,,left_only
520,ENSMUSG00000068600,M_15,74690920,74706720,24,mouse,,,,left_only
527,ENSMUSG00000115463,M_15,58761147,58809208,23,mouse,,,,left_only
619,ENSMUSG00000087610,M_3,96484300,96487979,22,mouse,,,,left_only
736,ENSMUSG00000062563,M_12,24715832,24731812,20,mouse,,,,left_only
902,ENSMUSG00000050671,M_12,87325412,87346479,19,mouse,,,,left_only
985,ENSMUSG00000053522,M_7,28563278,28565709,18,mouse,,,,left_only


## RICE_UP

1. Create a TSV file to plot genes with high HN-score on the Circos plot

2. Extract GOSlim corresponding to the list of up-regulated genes

In [5]:
# 1. Create a TSV file to plot genes with high HN-score on the Circos plot
df_r_score = pd.read_csv(f'../data/rice_annotation/rice_position_{direction}.tsv', sep='\t')
df_r_score_subset = df_r_score[['Gene stable ID', # create a subset
                        'Chromosome/scaffold name', 
                        'start1 (bp)',
                        'end1 (bp)',
                        'HN-score(HN5)']].copy()
df_r_score_subset['Chromosome/scaffold name'] = 'R_' + df_r_score_subset['Chromosome/scaffold name'].astype(str)
df_r_score_subset['organism'] = 'rice'# assign the tag

df_r_score_subset.to_csv(f"../data/{circos_dir}/rice_position_{direction}_assign.tsv", sep='\t', index=False)

display(df_r_score_subset)

Unnamed: 0,Gene stable ID,Chromosome/scaffold name,start1 (bp),end1 (bp),HN-score(HN5),organism
0,Os04g0107900,R_4,483234,485978,253,rice
1,Os01g0136100,R_1,1948773,1949587,246,rice
2,Os02g0259900,R_2,9021454,9023102,238,rice
3,Os03g0245800,R_3,7697015,7698027,237,rice
4,Os03g0277300,R_3,9411494,9416082,236,rice
...,...,...,...,...,...,...
384,Os03g0820400,R_3,34427704,34428391,42,rice
385,Os10g0328600,R_10,9212944,9216458,42,rice
386,Os01g0971800,R_1,42874273,42875515,42,rice
387,Os07g0621600,R_7,25691906,25692138,42,rice


In [6]:
# 2. Extract GOSlim corresponding to the list of up-regulated genes
goslim_all_rice = pd.read_csv('../data/biomart_goslim/biomart_rice_goslim_R56_domain.tsv', sep='\t',
                               dtype={'Gene stable ID': 'object'}, 
                               low_memory=False)
columns_of_interest_rice = ['Gene stable ID',
                            'GOSlim GOA Accession(s)', 
                            'GOSlim GOA Description', 
                            'GOSlim_domain']
goslim_all_rice_filtered = goslim_all_rice[columns_of_interest_rice]

extract_df_r_goslim = pd.merge(
    df_r_score_subset, 
    goslim_all_rice_filtered, 
    on='Gene stable ID', 
    how='inner'
    )

extract_df_r_goslim = extract_df_r_goslim.drop_duplicates(subset=['Gene stable ID',
                                                                    'GOSlim GOA Accession(s)', 
                                                                    'GOSlim GOA Description'], keep='first').copy()

# Extract genes with no annotations
extract_df_r_goslim_with_indicator = pd.merge(
    df_r_score_subset, 
    goslim_all_rice_filtered,
    left_on='Gene stable ID',
    right_on='Gene stable ID',
    how='left',
    indicator=True # add indicator column to show the difference
)

# extract_df_r_goslim_with_indicator
left_only_rows = extract_df_r_goslim_with_indicator[extract_df_r_goslim_with_indicator['_merge'] == 'left_only']

extract_df_r_goslim.to_csv(f"../data/{circos_dir}/rice_goslim_{direction}.tsv", sep='\t', index=False)
left_only_rows.to_csv(f"../data/{no_annotation}/rice_position_{direction}_no_annotation.tsv", sep='\t', index=False)

display(goslim_all_rice_filtered, extract_df_r_goslim, left_only_rows)

Unnamed: 0,Gene stable ID,GOSlim GOA Accession(s),GOSlim GOA Description,GOSlim_domain
0,Os01g0100100,GO:0006810,transport,biological_process
1,Os01g0100100,GO:0008150,biological_process,biological_process
2,Os01g0100100,GO:0009987,cellular process,biological_process
3,Os01g0100100,GO:0003674,molecular_function,molecular_function
4,Os01g0100100,GO:0030234,enzyme regulator activity,molecular_function
...,...,...,...,...
219494,gene-rps19,GO:0003674,molecular_function,molecular_function
219495,gene-rps19,GO:0005198,structural molecule activity,molecular_function
219496,gene-rps19,GO:0005488,binding,molecular_function
219497,gene-rps19,GO:0003723,RNA binding,molecular_function


Unnamed: 0,Gene stable ID,Chromosome/scaffold name,start1 (bp),end1 (bp),HN-score(HN5),organism,GOSlim GOA Accession(s),GOSlim GOA Description,GOSlim_domain
0,Os04g0107900,R_4,483234,485978,253,rice,GO:0003674,molecular_function,molecular_function
1,Os04g0107900,R_4,483234,485978,253,rice,GO:0005515,protein binding,molecular_function
2,Os04g0107900,R_4,483234,485978,253,rice,GO:0005488,binding,molecular_function
3,Os04g0107900,R_4,483234,485978,253,rice,GO:0008150,biological_process,biological_process
4,Os04g0107900,R_4,483234,485978,253,rice,GO:0006950,response to stress,biological_process
...,...,...,...,...,...,...,...,...,...
2522,Os01g0971800,R_1,42874273,42875515,42,rice,GO:0008152,metabolic process,biological_process
2523,Os01g0971800,R_1,42874273,42875515,42,rice,GO:0009058,biosynthetic process,biological_process
2524,Os01g0971800,R_1,42874273,42875515,42,rice,GO:0006139,nucleobase-containing compound metabolic process,biological_process
2525,Os01g0971800,R_1,42874273,42875515,42,rice,GO:0003700,DNA-binding transcription factor activity,molecular_function


Unnamed: 0,Gene stable ID,Chromosome/scaffold name,start1 (bp),end1 (bp),HN-score(HN5),organism,GOSlim GOA Accession(s),GOSlim GOA Description,GOSlim_domain,_merge
172,Os04g0108101,R_4,485272,485663,205,rice,,,,left_only
173,Os01g0184050,R_1,4449056,4449806,205,rice,,,,left_only
229,Os05g0296800,R_5,13197199,13199560,166,rice,,,,left_only
243,Os11g0533400,R_11,19430389,19431158,165,rice,,,,left_only
274,Os04g0578550,R_4,29163356,29165144,158,rice,,,,left_only
...,...,...,...,...,...,...,...,...,...,...
2588,Os11g0506700,R_11,18069286,18071596,42,rice,,,,left_only
2589,Os03g0254250,R_3,8134007,8134226,42,rice,,,,left_only
2642,Os10g0328600,R_10,9212944,9216458,42,rice,,,,left_only
2657,Os07g0621600,R_7,25691906,25692138,42,rice,,,,left_only


## COMBINED_UP( HUMAN_UP + RICE_UP )

1. chromosome position + gene position + HN-score

2. GOslim terms corresponding to the list of up-regulated genes

In [7]:
# 1. chromosome position + gene position + HN-score
combined_df_score = pd.concat([df_m_score_subset, 
                               df_r_score_subset])
combined_df_score.reset_index(drop=True, inplace=True)
combined_df_score['Gene start (bp)'] = combined_df_score['start1 (bp)'].combine_first(combined_df_score['start2 (bp)'])
combined_df_score['Gene end (bp)'] = combined_df_score['end1 (bp)'].combine_first(combined_df_score['end2 (bp)'])
combined_df_score.drop(['start1 (bp)', 'start2 (bp)', 'end1 (bp)', 'end2 (bp)'], axis=1, inplace=True)

combined_df_score.to_csv(f"../data/{circos_dir}/combined_position_{direction}.tsv", sep='\t', index=False)

display(combined_df_score)

Unnamed: 0,Gene stable ID,Chromosome/scaffold name,HN-score(HN5),organism,Gene start (bp),Gene end (bp)
0,ENSMUSG00000090877,M_17,149,mouse,35175412.0,35178214.0
1,ENSMUSG00000091971,M_17,137,mouse,35188166.0,35191132.0
2,ENSMUSG00000004951,M_5,111,mouse,135916773.0,135918417.0
3,ENSMUSG00000092609,M_17,88,mouse,35188888.0,35191112.0
4,ENSMUSG00000026628,M_1,78,mouse,190902493.0,190950236.0
...,...,...,...,...,...,...
632,Os03g0820400,R_3,42,rice,34427704.0,34428391.0
633,Os10g0328600,R_10,42,rice,9212944.0,9216458.0
634,Os01g0971800,R_1,42,rice,42874273.0,42875515.0
635,Os07g0621600,R_7,42,rice,25691906.0,25692138.0


In [8]:
merged_df_goslim = pd.merge(
    extract_df_m_goslim, 
    extract_df_r_goslim, 
    left_on=['GOSlim GOA Accession(s)', 'GOSlim GOA Description', 'GOSlim_domain'],
    right_on=['GOSlim GOA Accession(s)', 'GOSlim GOA Description', 'GOSlim_domain'],
    how='inner',
    suffixes=('_mouse', '_rice')
)

merged_df_goslim = merged_df_goslim[['GOSlim GOA Accession(s)',
                                     'GOSlim GOA Description',
                                     'GOSlim_domain',
                                     'Gene stable ID_rice',
                                     'Gene stable ID_mouse',
                                     'Chromosome/scaffold name_rice',
                                     'start1 (bp)',
                                     'end1 (bp)',
                                     'Chromosome/scaffold name_mouse',
                                     'start2 (bp)',
                                     'end2 (bp)',
                                     'HN-score(HN5)_rice',
                                     'HN-score(HN5)_mouse']]

# Remove duplicate rows based on specific columns
merged_df_goslim.drop_duplicates(inplace=True)

display(merged_df_goslim)

Unnamed: 0,GOSlim GOA Accession(s),GOSlim GOA Description,GOSlim_domain,Gene stable ID_rice,Gene stable ID_mouse,Chromosome/scaffold name_rice,start1 (bp),end1 (bp),Chromosome/scaffold name_mouse,start2 (bp),end2 (bp),HN-score(HN5)_rice,HN-score(HN5)_mouse
0,GO:0005739,mitochondrion,cellular_component,Os07g0467200,ENSMUSG00000090877,R_7,16600103,16601979,M_17,35175412,35178214,103,149
1,GO:0005739,mitochondrion,cellular_component,Os03g0276700,ENSMUSG00000090877,R_3,9375984,9377688,M_17,35175412,35178214,99,149
2,GO:0005739,mitochondrion,cellular_component,Os02g0758000,ENSMUSG00000090877,R_2,31920854,31922068,M_17,35175412,35178214,98,149
3,GO:0005739,mitochondrion,cellular_component,Os03g0776900,ENSMUSG00000090877,R_3,32208931,32210830,M_17,35175412,35178214,72,149
4,GO:0005739,mitochondrion,cellular_component,Os02g0181900,ENSMUSG00000090877,R_2,4567630,4573049,M_17,35175412,35178214,71,149
...,...,...,...,...,...,...,...,...,...,...,...,...,...
16724,GO:0005773,vacuole,cellular_component,Os09g0491772,ENSMUSG00000031957,R_9,18987999,18991156,M_8,112413151,112417642,51,13
16725,GO:0005773,vacuole,cellular_component,Os08g0442200,ENSMUSG00000031957,R_8,21549721,21551326,M_8,112413151,112417642,48,13
16726,GO:0005777,peroxisome,cellular_component,Os06g0253100,ENSMUSG00000070985,R_6,7940956,7941680,M_4,49447105,49473912,129,14
16727,GO:0005777,peroxisome,cellular_component,Os07g0529000,ENSMUSG00000070985,R_7,20691213,20693521,M_4,49447105,49473912,59,14


In [9]:
# for debugging purpose
unique_accessions_r = set(extract_df_r_goslim['GOSlim GOA Accession(s)'].unique())
unique_accessions_h = set(extract_df_m_goslim['GOSlim GOA Accession(s)'].unique())
common_accessions = unique_accessions_r.intersection(unique_accessions_h)
unique_accessions_merged = set(merged_df_goslim['GOSlim GOA Accession(s)'].unique())
missing_accessions = common_accessions.difference(unique_accessions_merged)
duplicates = merged_df_goslim.duplicated(subset=['GOSlim GOA Accession(s)', 'Gene stable ID_rice', 'Gene stable ID_mouse'], keep=False)
unique_combinations = not duplicates.any()
print(f"Number of common accessions: {len(common_accessions)}")
print(f"Number of missing accessions: {len(missing_accessions)}")
print(f"Missing accessions: {missing_accessions}")
print(f"各'Gene stable ID_rice'と'Gene stable ID_mouse'の組み合わせはユニークですか？: {unique_combinations}")

#Count the occurrences of each 'GOSlim GOA Accession(s)'
accession_counter = Counter(merged_df_goslim['GOSlim GOA Accession(s)'])

#Sort the DataFrame based on the counts
merged_df_goslim['count'] = merged_df_goslim['GOSlim GOA Accession(s)'].map(accession_counter)
merged_df_goslim_sorted = merged_df_goslim.sort_values('count', ascending=False).drop('count', axis=1)
merged_df_goslim_sorted.reset_index(drop=True, inplace=True)
merged_df_goslim_sorted.to_csv(f"../data/{circos_dir}/combined_goslim_{direction}.tsv", sep='\t', index=False)

display(merged_df_goslim_sorted)

Number of common accessions: 28
Number of missing accessions: 0
Missing accessions: set()
各'Gene stable ID_rice'と'Gene stable ID_mouse'の組み合わせはユニークですか？: True


Unnamed: 0,GOSlim GOA Accession(s),GOSlim GOA Description,GOSlim_domain,Gene stable ID_rice,Gene stable ID_mouse,Chromosome/scaffold name_rice,start1 (bp),end1 (bp),Chromosome/scaffold name_mouse,start2 (bp),end2 (bp),HN-score(HN5)_rice,HN-score(HN5)_mouse
0,GO:0005634,nucleus,cellular_component,Os03g0268600,ENSMUSG00000042745,R_3,8921424,8923437,M_2,152578171,152579330,59,14
1,GO:0005634,nucleus,cellular_component,Os03g0701100,ENSMUSG00000020427,R_3,28138905,28141758,M_11,7156086,7163923,103,16
2,GO:0005634,nucleus,cellular_component,Os03g0131800,ENSMUSG00000020427,R_3,1815345,1816503,M_11,7156086,7163923,65,16
3,GO:0005634,nucleus,cellular_component,Os08g0546800,ENSMUSG00000020427,R_8,27382872,27384470,M_11,7156086,7163923,73,16
4,GO:0005634,nucleus,cellular_component,Os03g0710500,ENSMUSG00000020427,R_3,28647086,28649570,M_11,7156086,7163923,74,16
...,...,...,...,...,...,...,...,...,...,...,...,...,...
16724,GO:0005730,nucleolus,cellular_component,Os06g0592500,ENSMUSG00000005483,R_6,23293383,23294073,M_8,84334822,84339282,118,71
16725,GO:0005730,nucleolus,cellular_component,Os06g0592500,ENSMUSG00000026628,R_6,23293383,23294073,M_1,190902493,190950236,118,78
16726,GO:0005777,peroxisome,cellular_component,Os06g0253100,ENSMUSG00000070985,R_6,7940956,7941680,M_4,49447105,49473912,129,14
16727,GO:0005777,peroxisome,cellular_component,Os07g0529000,ENSMUSG00000070985,R_7,20691213,20693521,M_4,49447105,49473912,59,14


## Merge GOslim terms and line enrichment results

In [14]:
enrichment = pd.read_csv(
    f'../data/line_enrichment_{direction}_MR_2312/goslim_correspondence_q_values_{direction}_MR/goslim_correspondence_fold_enrichment_p_q_{direction}.tsv', sep='\t')

merged_df_goslim_sorted["GOSlim"] = merged_df_goslim_sorted['GOSlim GOA Accession(s)'] + ": " + merged_df_goslim_sorted['GOSlim GOA Description']

merged_df_goslim_sorted = merged_df_goslim_sorted[['GOSlim',
                                                    'GOSlim GOA Accession(s)',
                                                    'GOSlim GOA Description',
                                                    'GOSlim_domain',
                                                    'Gene stable ID_rice',
                                                    'Gene stable ID_mouse',
                                                    'Chromosome/scaffold name_rice',
                                                    'start1 (bp)',
                                                    'end1 (bp)',
                                                    'Chromosome/scaffold name_mouse',
                                                    'start2 (bp)',
                                                    'end2 (bp)',
                                                    'HN-score(HN5)_rice',
                                                    'HN-score(HN5)_mouse']]


merged_df_goslim_enrichment = pd.merge(
    merged_df_goslim_sorted,
    enrichment[['GOSlim', f'counts_{direction}', 'fold_enrichment', 'p_value', 'q_value']],
    on='GOSlim',
    how='left'
)
merged_df_goslim_enrichment.sort_values('fold_enrichment', ascending=False, inplace=True)
merged_df_goslim_enrichment.reset_index(drop=True, inplace=True)
merged_df_goslim_enrichment.to_csv(f"../data/{circos_dir}/combined_goslim_enrichment_{direction}.tsv", sep='\t', index=False)

display(enrichment, merged_df_goslim_sorted, merged_df_goslim_enrichment)

Unnamed: 0,GOSlim,GOSlim GOA Accession(s),GOSlim GOA Description,GOSlim_domain,counts_up,counts_all,up_ratio,all_ratio,fold_enrichment,p_value,q_value
0,GO:0008289: lipid binding,GO:0008289,lipid binding,molecular_function,147,257535,0.008787,0.001406,6.247552,1.084662e-65,7.3757e-65
1,GO:0006091: generation of precursor metabolite...,GO:0006091,generation of precursor metabolites and energy,biological_process,77,243045,0.004603,0.001327,3.46763,9.441558999999999e-20,4.5859e-19
2,GO:0005783: endoplasmic reticulum,GO:0005783,endoplasmic reticulum,cellular_component,504,1609508,0.030127,0.00879,3.427411,1.2658560000000001e-118,1.434636e-117
3,GO:0005576: extracellular region,GO:0005576,extracellular region,cellular_component,610,2069023,0.036464,0.0113,3.226958,2.927792e-132,4.977246e-131
4,GO:0005829: cytosol,GO:0005829,cytosol,cellular_component,1326,5489678,0.079264,0.029981,2.643781,1.4136230000000001e-213,4.806317e-212
5,GO:0005773: vacuole,GO:0005773,vacuole,cellular_component,64,270510,0.003826,0.001477,2.589557,3.137256e-11,1.066667e-10
6,GO:0140110: transcription regulator activity,GO:0140110,transcription regulator activity,molecular_function,396,1948470,0.023671,0.010641,2.224489,1.2409370000000001e-45,7.031977e-45
7,GO:0005975: carbohydrate metabolic process,GO:0005975,carbohydrate metabolic process,biological_process,120,655182,0.007173,0.003578,2.004694,4.773265e-12,1.803234e-11
8,GO:0048856: anatomical structure development,GO:0048856,anatomical structure development,biological_process,1222,7211128,0.073047,0.039383,1.854799,5.85788e-90,4.9791979999999996e-89
9,GO:0005739: mitochondrion,GO:0005739,mitochondrion,cellular_component,322,2075124,0.019248,0.011333,1.698402,8.449760999999999e-19,3.591148e-18


Unnamed: 0,GOSlim,GOSlim GOA Accession(s),GOSlim GOA Description,GOSlim_domain,Gene stable ID_rice,Gene stable ID_mouse,Chromosome/scaffold name_rice,start1 (bp),end1 (bp),Chromosome/scaffold name_mouse,start2 (bp),end2 (bp),HN-score(HN5)_rice,HN-score(HN5)_mouse
0,GO:0005634: nucleus,GO:0005634,nucleus,cellular_component,Os03g0268600,ENSMUSG00000042745,R_3,8921424,8923437,M_2,152578171,152579330,59,14
1,GO:0005634: nucleus,GO:0005634,nucleus,cellular_component,Os03g0701100,ENSMUSG00000020427,R_3,28138905,28141758,M_11,7156086,7163923,103,16
2,GO:0005634: nucleus,GO:0005634,nucleus,cellular_component,Os03g0131800,ENSMUSG00000020427,R_3,1815345,1816503,M_11,7156086,7163923,65,16
3,GO:0005634: nucleus,GO:0005634,nucleus,cellular_component,Os08g0546800,ENSMUSG00000020427,R_8,27382872,27384470,M_11,7156086,7163923,73,16
4,GO:0005634: nucleus,GO:0005634,nucleus,cellular_component,Os03g0710500,ENSMUSG00000020427,R_3,28647086,28649570,M_11,7156086,7163923,74,16
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16724,GO:0005730: nucleolus,GO:0005730,nucleolus,cellular_component,Os06g0592500,ENSMUSG00000005483,R_6,23293383,23294073,M_8,84334822,84339282,118,71
16725,GO:0005730: nucleolus,GO:0005730,nucleolus,cellular_component,Os06g0592500,ENSMUSG00000026628,R_6,23293383,23294073,M_1,190902493,190950236,118,78
16726,GO:0005777: peroxisome,GO:0005777,peroxisome,cellular_component,Os06g0253100,ENSMUSG00000070985,R_6,7940956,7941680,M_4,49447105,49473912,129,14
16727,GO:0005777: peroxisome,GO:0005777,peroxisome,cellular_component,Os07g0529000,ENSMUSG00000070985,R_7,20691213,20693521,M_4,49447105,49473912,59,14


Unnamed: 0,GOSlim,GOSlim GOA Accession(s),GOSlim GOA Description,GOSlim_domain,Gene stable ID_rice,Gene stable ID_mouse,Chromosome/scaffold name_rice,start1 (bp),end1 (bp),Chromosome/scaffold name_mouse,start2 (bp),end2 (bp),HN-score(HN5)_rice,HN-score(HN5)_mouse,counts_up,fold_enrichment,p_value,q_value
0,GO:0008289: lipid binding,GO:0008289,lipid binding,molecular_function,Os07g0659600,ENSMUSG00000020437,R_7,27800556,27802446,M_11,6456548,6470965,43,29,147,6.247552,1.084662e-65,7.375700e-65
1,GO:0008289: lipid binding,GO:0008289,lipid binding,molecular_function,Os07g0659600,ENSMUSG00000020178,R_7,27800556,27802446,M_10,75152711,75170618,43,17,147,6.247552,1.084662e-65,7.375700e-65
2,GO:0008289: lipid binding,GO:0008289,lipid binding,molecular_function,Os03g0111300,ENSMUSG00000057933,R_3,652323,652798,M_9,78238300,78263070,76,13,147,6.247552,1.084662e-65,7.375700e-65
3,GO:0008289: lipid binding,GO:0008289,lipid binding,molecular_function,Os07g0659600,ENSMUSG00000037379,R_7,27800556,27802446,M_5,33355528,33375799,43,14,147,6.247552,1.084662e-65,7.375700e-65
4,GO:0008289: lipid binding,GO:0008289,lipid binding,molecular_function,Os01g0849000,ENSMUSG00000037379,R_1,36484020,36484818,M_5,33355528,33375799,49,14,147,6.247552,1.084662e-65,7.375700e-65
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16724,GO:0003723: RNA binding,GO:0003723,RNA binding,molecular_function,Os02g0622500,ENSMUSG00000096520,R_2,24783201,24786542,M_Y,3771673,3783267,75,15,50,0.178163,1.000000e+00,1.000000e+00
16725,GO:0003723: RNA binding,GO:0003723,RNA binding,molecular_function,Os01g0719400,ENSMUSG00000053113,R_1,29995196,29999996,M_11,117856905,117860873,51,17,50,0.178163,1.000000e+00,1.000000e+00
16726,GO:0003723: RNA binding,GO:0003723,RNA binding,molecular_function,Os10g0540800,ENSMUSG00000096520,R_10,21091977,21096586,M_Y,3771673,3783267,60,15,50,0.178163,1.000000e+00,1.000000e+00
16727,GO:0003723: RNA binding,GO:0003723,RNA binding,molecular_function,Os02g0622500,ENSMUSG00000053113,R_2,24783201,24786542,M_11,117856905,117860873,75,17,50,0.178163,1.000000e+00,1.000000e+00
