In [None]:
#!/usr/bin/env python
# -*- encoding: utf-8 -*-
'''
@文件        :023.GO enrichment for vertebrate.ipynb
@说明        :plot the GO enrichment for vertebrate and lancelet
@时间        :2023/08/31 16:08:59
@作者        :Wu Baosheng
@版本        :1.0
'''

In [1]:
import pandas as pd
import os,re,sys
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib as mpl
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
import scanpy as sc
mpl.rcParams['pdf.fonttype'] = 42
mpl.rcParams['ps.fonttype'] = 42
os.chdir('H:/002.singcell/01.liver/10.mutileSpecies/05.SAMap/017. HVGs')

In [41]:
lancelet = pd.read_excel('enrichment_GO.xlsx',sheet_name='lancelet',header=0,index_col=None,engine="openpyxl")
lancelet = lancelet[lancelet['source'].isin(['GO:BP','GO:CC','GO:MF','KEGG'])]
vertebrate = pd.read_excel('enrichment_GO.xlsx',sheet_name='vertebrate',header=0,index_col=None,engine="openpyxl")
vertebrate = vertebrate[vertebrate['source'].isin(['GO:BP','GO:CC','GO:MF','KEGG'])]

In [42]:
lancelet['functions'].fillna('others',inplace=True)
vertebrate['functions'].fillna('others',inplace=True)

In [43]:
vertebrate

Unnamed: 0,source,term_name,functions,term_id,adjusted_p_value,negative_log10_of_adjusted_p_value,term_size,query_size,intersection_size,effective_domain_size
0,GO:MF,extracellular matrix structural constituent,others,GO:0005201,4.596350e-31,30.337587,174.0,714.0,52.0,20195.0
1,GO:MF,signaling receptor binding,others,GO:0005102,5.249741e-22,21.279862,1564.0,714.0,139.0,20195.0
2,GO:MF,calcium ion binding,others,GO:0005509,3.046992e-19,18.516129,727.0,714.0,84.0,20195.0
3,GO:MF,growth factor binding,others,GO:0019838,1.831258e-18,17.737250,132.0,714.0,35.0,20195.0
4,GO:MF,protein binding,protein,GO:0005515,1.076316e-16,15.968060,14881.0,714.0,623.0,20195.0
...,...,...,...,...,...,...,...,...,...,...
930,KEGG,Calcium signaling pathway,others,KEGG:04020,1.207722e-02,1.918033,239.0,377.0,25.0,8161.0
931,KEGG,Vascular smooth muscle contraction,others,KEGG:04270,1.580168e-02,1.801297,134.0,377.0,17.0,8161.0
932,KEGG,Hypertrophic cardiomyopathy,others,KEGG:05410,2.650557e-02,1.576663,90.0,377.0,13.0,8161.0
933,KEGG,Dilated cardiomyopathy,others,KEGG:05414,4.566851e-02,1.340383,95.0,377.0,13.0,8161.0


In [44]:
vertebrate['functions'].unique()

array(['others', 'protein', 'carbohydrate', 'coagulation', 'lipid',
       'insulin', 'endothelial', 'immune', 'nitrogen', 'phosphate',
       'endoplasmic reticulum'], dtype=object)

In [6]:
color_pettle = {'others':'#c8c9ca','protein':'#a6cee3','carbohydrate':'#1f78b4','coagulation':'#b2df8a','lipid':'#33a02c','insulin':'#fb9a99',
'endothelial':'#e31a1c','immune':'#fdbf6f','nitrogen':'#ff7f00','phosphate':'#cab2d6','endoplasmic reticulum':'#6a3d9a'}

In [65]:
fig = px.scatter(vertebrate[vertebrate['intersection_size']<120],y='intersection_size',x='negative_log10_of_adjusted_p_value',color='functions',size='intersection_size',size_max=20,color_discrete_map=color_pettle)
fig.update_layout(
    width=800,  # 设置宽度为800像素
    height=600  # 设置高度为600像素
)

fig.show()
fig.write_image('./vertebrat_enrichment.pdf')

In [69]:
fig = px.scatter(lancelet[lancelet['intersection_size']<120],y='intersection_size',x='negative_log10_of_adjusted_p_value',color='functions',size='intersection_size',size_max=20,color_discrete_map=color_pettle)
fig.update_layout(
    width=800,  # 设置宽度为800像素
    height=600,  # 设置高度为600像素
    yaxis=dict(range=[0, 120]), # 设置y轴的范围为0到120
    xaxis=dict(range=[0, 60])
)
fig.show()
fig.write_image('./lancelet_enrichment.pdf')

In [55]:
sns.color_palette("Set1")

In [64]:
recruiment = pd.read_excel('enrichment_GO-1.xlsx',sheet_name='331-recruitment',header=0,index_col=None,engine="openpyxl")
recruiment = recruiment[recruiment['source'].isin(['GO:BP','GO:CC','GO:MF'])]
wgd = pd.read_excel('enrichment_GO-1.xlsx',sheet_name='343-vertebrate 2R enrichment',header=0,index_col=None,engine="openpyxl")
wgd = wgd[wgd['source'].isin(['GO:BP','GO:CC','GO:MF'])]
ancient = pd.read_excel('enrichment_GO-1.xlsx',sheet_name='ancient-genes',header=0,index_col=None,engine="openpyxl")
ancient = ancient[ancient['source'].isin(['GO:BP','GO:CC','GO:MF'])]

In [65]:
recruiment['functions'].fillna('others',inplace=True)
wgd['functions'].fillna('others',inplace=True)
ancient['functions'].fillna('others',inplace=True)

In [66]:
recruiment=recruiment[recruiment['functions'] !='others']
wgd=wgd[wgd['functions'] !='others']
ancient=ancient[ancient['functions'] !='others']

In [77]:
recruiment.sort_values(by=['functions','negative_log10_of_adjusted_p_value'],ascending=[True,True],inplace=True)
wgd.sort_values(by=['functions','negative_log10_of_adjusted_p_value'],ascending=[True,True],inplace=True)
ancient.sort_values(by=['functions','negative_log10_of_adjusted_p_value'],ascending=[True,True],inplace=True)

In [78]:
fig = px.bar(recruiment[recruiment['intersection_size']<120],y='term_id',x='negative_log10_of_adjusted_p_value',color='functions',color_discrete_map=color_pettle,width=15)
fig.update_layout(
    width=600,  # 设置宽度为800像素
    height=1200,  # 设置高度为600像素
    # yaxis=dict(range=[0, 120]), # 设置y轴的范围为0到120
    xaxis=dict(range=[0, 40])
)
fig.update_layout(
    font=dict(
        family="Arial",
        size=8
    ),
        yaxis=dict(
        tickmode='linear',
        dtick=1
    )
)
fig.show()
fig.write_image('./397_recruit_enrichment.pdf')

In [79]:
fig = px.bar(wgd[wgd['intersection_size']<120],y='term_id',x='negative_log10_of_adjusted_p_value',color='functions',color_discrete_map=color_pettle,width=15)
fig.update_layout(
    width=600,  # 设置宽度为800像素
    height=1200,  # 设置高度为600像素
    # yaxis=dict(range=[0, 120]), # 设置y轴的范围为0到120
    xaxis=dict(range=[0, 40])
)
fig.update_layout(
    font=dict(
        family="Arial",
        size=8
    ),
        yaxis=dict(
        tickmode='linear',
        dtick=1
    )
)
fig.show()
fig.write_image('./342_2R_wgd_genes_enrichment.pdf')

In [80]:
fig = px.bar(ancient[ancient['intersection_size']<120],y='term_id',x='negative_log10_of_adjusted_p_value',color='functions',color_discrete_map=color_pettle,width=200)
fig.update_layout(
    width=600,  # 设置宽度为800像素
    height=1200,  # 设置高度为600像素
    # yaxis=dict(range=[0, 120]), # 设置y轴的范围为0到120
    xaxis=dict(range=[0, 40])
)
fig.update_layout(
    font=dict(
        family="Arial",
        size=8
    ),
        yaxis=dict(
        tickmode='linear',
        dtick=1
    )
)
fig.show()
fig.write_image('./65_ancient_genes_enrichment.pdf')


In [82]:
def readH5ad(adata_path):
    adata = sc.read_h5ad(adata_path)
    adata.X=adata.layers['counts']
    sc.pp.normalize_total(adata,target_sum=1e6,exclude_highly_expressed=True)
    sc.pp.log1p(adata)
    sc.pp.highly_variable_genes(adata, flavor='seurat', n_top_genes=2000, inplace=True)
    adata = adata[:, adata.var['highly_variable']]
    return adata

In [83]:
lamprey=readH5ad('H:/002.singcell/01.liver/10.mutileSpecies/05.SAMap/01.h5ad_counts/count_new/lamprey_counts.h5ad')

In [2]:
import scanpy as sc
import os,re,sys
import pandas as pd
import numpy as np
import scanpy.external as sce
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib as mpl
mpl.rcParams['pdf.fonttype'] = 42
mpl.rcParams['ps.fonttype'] = 42
# sns.set(style='white', font_scale=1)
# %config InlineBackend.figure_format = 'svg'

In [3]:
dogshark=sc.read('H:/002.singcell/01.liver/03.dogshark/05.HIFIgenome/dogshark.h5ad')

In [12]:
os.chdir('H:/002.singcell/01.liver/10.mutileSpecies/05.SAMap/18.recruitment')
sc.set_figure_params(dpi=100)

In [None]:
import matplotlib.pyplot as plt
cmap = sns.blend_palette(['#cccccc','#e31a1c'],as_cmap=True) 
sc.pl.umap(dogshark,color=['FLT1','KDR','FLT4'],size=10,legend_fontsize=10,legend_loc='on data',frameon=False,cmap=cmap,save='_dogshark.png',vmax=10)

In [18]:
bichir=sc.read('H:/002.singcell/01.liver/08.bichir/03.NCBI/bichir.h5ad')

In [None]:
sc.pl.umap(bichir,color=['FLT1','KDR','FLT4'],size=10,legend_fontsize=10,legend_loc='on data',frameon=False,cmap=cmap,save='_bichir.png',vmax=10)

In [21]:
frog=sc.read('H:/002.singcell/01.liver/05.frog/frog.integrate.h5ad')

In [None]:
sc.pl.umap(frog,color=['A1CF','KDR','FLT4'],size=10,legend_fontsize=10,legend_loc='on data',frameon=False,cmap=cmap,save='_frog.png',vmax=10)

In [28]:
human=sc.read('H:/002.singcell/01.liver/07.Human/human.integrate.h5ad')

In [None]:
sc.pl.umap(human,color=['FLT1','KDR','FLT4'],size=10,legend_fontsize=10,legend_loc='on data',frameon=False,cmap=cmap,save='_human.png',vmax=10)

In [30]:
lamprey=sc.read('H:/002.singcell/01.liver/02.lampery/lamprey.integrate.h5ad')

In [None]:
sc.pl.umap(lamprey,color=['HARS1','KDR','FLT4'],size=10,legend_fontsize=10,legend_loc='on data',frameon=False,cmap=cmap,save='_lamprey.png',vmax=10)

In [34]:
lancelet=sc.read('H:/002.singcell/01.liver/01.lancelet/01.withoutSoupX/lancelet_integrate.h5ad')

In [None]:
sc.pl.umap(lancelet,color=['FLT1','FGFR1','118419931'],size=1,legend_fontsize=10,legend_loc='on data',frameon=False,cmap=cmap,save='_lancelet.png',vmax=10)

In [69]:
lungfish =sc.read('H:/002.singcell/01.liver/04.lungfish/lungfish.integrate.h5ad')

In [None]:
sc.pl.umap(lungfish,color=['FLT1','KDR','FLT4'],size=10,legend_fontsize=10,legend_loc='on data',frameon=False,cmap=cmap,save='_lungfish.png',vmax=10)

In [None]:
sc.pl.umap(lungfish,color=['FLT1'],size=10,legend_fontsize=10,legend_loc='on data',frameon=False,cmap=cmap,save='_lungfish2.pdf',vmax=10)

In [None]:
### sort gff
df = pd.read('./lampery.gff',sep='\t',header=None,index_col=None,names=['chr','source','type','start','end','score','strand','phase','attributes'])
df.sort_values(by=['chr','start','end'],ascending=[True,True,True],inplace=True)
df.to_csv('./lampery.gff',sep='\t',header=None,index=None)