### author by yangshichen
### 注意：脚本仅供参考，使用前请仔细阅读

### 加载包

In [1]:
import os
import glob
import scanpy as sc
import anndata as ad
import numpy as np
import pandas as pd
import seaborn as sb
import seaborn as sns
from matplotlib.pyplot import rc_context
import matplotlib.pyplot as plt
from scipy.io import mmread
from scipy.sparse import csr_matrix
import matplotlib as mpl
mpl.rcParams['pdf.fonttype'] = 42

import warnings
warnings.filterwarnings("ignore")

In [2]:
sc.settings.verbosity = 3             # verbosity: errors (0), warnings (1), info (2), hints (3)
sc.logging.print_header()
sc.settings.set_figure_params(dpi=100, frameon=False)
sc._settings.ScanpyConfig.n_jobs=70

scanpy==1.9.3 anndata==0.8.0 numpy==1.21.6 scipy==1.7.3 pandas==1.3.5 scikit-learn==1.0.2 statsmodels==0.13.2 python-igraph==0.10.8 pynndescent==0.5.13


In [3]:
cluster_cols = ["#DC050C", "#FB8072", "#1965B0", "#7BAFDE", "#882E72", 
                "#B17BA6", "#FF7F00", "#FDB462", "#E7298A", "#E78AC3", 
                "#33A02C", "#B2DF8A", "#55A1B1", "#8DD3C7", "#A6761D", 
                "#E6AB02", "#7570B3", "#BEAED4", "#666666", "#999999", 
                "#aa8282", "#d4b7b7", "#8600bf", "#ba5ce3", "#808000", 
                "#aeae5c", "#1e90ff", "#00bfff", "#56ff0d", "#ffff00"]

### 数据提取

In [18]:
adata = sc.read("/media/AnalysisDisk2/Yangshichen/0_HIV_RNA/HIV-pbmc/pbmc_celltype.h5ad")
adata

AnnData object with n_obs × n_vars = 2744009 × 21679
    obs: 'n_genes', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'doublet_score', 'doublet', 'batch', 'sample', 'stage', 'experiments', 'age', 'total_counts_rp', 'pct_counts_rp', 'total_counts_hb', 'pct_counts_hb', 'total_counts_ncRNA', 'pct_counts_ncRNA', 'total_counts_LOC', 'pct_counts_LOC', 'total_counts_erccs', 'pct_counts_erccs', 'celltype_L1', 'phase_ordered', 'celltype_L3', 'celltype_L1_5', 'celltype_L2'
    var: 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'highly_variable_nbatches', 'highly_variable_intersection', 'mean', 'std'
    uns: 'age_colors', 'celltype_L2_colors', 'experiments_colors', 'hvg', 'log1p', 'neighbors', 'pca', 'phase_ordered_colors', 'stage_colors', 'umap'
    obsm: 'X_pca', 'X_pca_harmony', 'X_umap'
    varm: 'PCs'
    layers: 'counts'
    obsp: 'connectivities', 'distances'

In [24]:
subdata = adata[adata.obs['celltype_L3'].isin(['CD4_Naive_T-CCR7','CD4_Tcm-IFIT3','CD4_Th1-GZMK','CD4_Treg-FOXP3',
                                               'CD8_CTL-GZMB','NKT-NCR1',
                                               'NK_bright-XCL1','Mature_NK_dim-FCGR3A',
                                               'cMono-CD14','ncMono-FCGR3A','cDC2-CD1C',
                                               'Aptypical_Memory_B-ITGAX'])]
subdata = subdata[subdata.obs['stage'].isin(['INRs'])]
subdata

View of AnnData object with n_obs × n_vars = 376099 × 21679
    obs: 'n_genes', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'doublet_score', 'doublet', 'batch', 'sample', 'stage', 'experiments', 'age', 'total_counts_rp', 'pct_counts_rp', 'total_counts_hb', 'pct_counts_hb', 'total_counts_ncRNA', 'pct_counts_ncRNA', 'total_counts_LOC', 'pct_counts_LOC', 'total_counts_erccs', 'pct_counts_erccs', 'celltype_L1', 'phase_ordered', 'celltype_L3', 'celltype_L1_5', 'celltype_L2'
    var: 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'highly_variable_nbatches', 'highly_variable_intersection', 'mean', 'std'
    uns: 'age_colors', 'celltype_L2_colors', 'experiments_colors', 'hvg', 'log1p', 'neighbors', 'pca', 'phase_ordered_colors', 'stage_colors', 'umap'
    obsm: 'X_pca', 'X_pca_harmony', 'X_umap'
    varm: 'PCs'
    layers: 'counts'
    obsp: 'connectivities', 'distances'

In [25]:
subdata.obs['celltype_L3'].value_counts()

CD8_CTL-GZMB                110373
NKT-NCR1                     83973
cMono-CD14                   72616
Mature_NK_dim-FCGR3A         40455
CD4_Naive_T-CCR7             21297
ncMono-FCGR3A                18599
NK_bright-XCL1                7123
cDC2-CD1C                     5832
CD4_Treg-FOXP3                5385
CD4_Th1-GZMK                  5335
CD4_Tcm-IFIT3                 2652
Aptypical_Memory_B-ITGAX      2459
Name: celltype_L3, dtype: int64

In [26]:
del adata

#### 基因

In [27]:
genes = ['NFKBIA','TNFAIP3','STAT1','DUSP1','XAF1','IFI44L','ISG15','MX1','OAS1','S100A9','CXCL8','IFITM3','IL1B','NR4A2','IFI30']

In [29]:
adata_sub = subdata[:, genes]

mean_expr = (
    adata_sub.to_df()
    .groupby([adata_sub.obs["sample"], adata_sub.obs["celltype_L3"]])
    .mean()
)
mean_expr

Unnamed: 0_level_0,gene_id,NFKBIA,TNFAIP3,STAT1,DUSP1,XAF1,IFI44L,ISG15,MX1,OAS1,S100A9,CXCL8,IFITM3,IL1B,NR4A2,IFI30
sample,celltype_L3,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
PD-H201,Aptypical_Memory_B-ITGAX,0.833531,-0.254777,-0.156377,0.901096,-0.024188,0.201257,-0.078881,0.478866,0.393136,-0.149591,-0.156967,-0.264516,-0.097062,2.312644,0.743929
PD-H201,CD4_Naive_T-CCR7,0.371418,0.956498,0.231434,0.174950,0.150244,0.236106,0.038843,0.111567,-0.094873,-0.188185,-0.136781,-0.080016,-0.132017,0.515810,-0.323075
PD-H201,CD4_Tcm-IFIT3,0.740856,1.434940,1.010490,0.968417,0.664179,1.178573,1.435343,1.657381,0.876014,-0.054307,-0.156967,0.307510,-0.170849,1.145844,-0.220889
PD-H201,CD4_Th1-GZMK,1.045024,1.680073,0.677775,1.213249,0.213342,0.498589,0.210928,0.033209,-0.082039,-0.226257,-0.156967,-0.098016,-0.041544,1.572081,-0.307724
PD-H201,CD4_Treg-FOXP3,0.290329,0.953683,0.677762,1.134511,0.235060,0.106439,0.295296,0.439322,0.979216,-0.321683,-0.156967,-0.108249,-0.036958,0.783501,-0.312960
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
PD-H289,NKT-NCR1,-0.667271,-0.614711,-0.019175,-0.811548,0.254476,-0.198231,-0.000727,-0.030709,0.100455,-0.452246,-0.156967,0.000772,-0.170849,-0.436992,-0.251638
PD-H289,NK_bright-XCL1,-0.125728,-0.426851,0.210231,-0.212733,0.411970,0.004112,0.054498,-0.050173,0.014685,-0.472834,-0.156967,0.936535,-0.170849,-0.442006,-0.360221
PD-H289,cDC2-CD1C,-0.304221,-0.702310,-0.060165,-0.368599,0.723503,0.004958,-0.030369,0.137471,0.058768,0.877298,-0.115080,-0.032227,0.344117,-0.065096,1.492741
PD-H289,cMono-CD14,-0.305764,-0.688962,-0.016234,-0.617008,1.041899,0.135306,-0.046583,0.203666,-0.089750,2.164000,-0.099774,-0.176570,0.094182,-0.256069,0.348524


In [31]:
mean_expr.to_csv('/media/AnalysisDisk2/Yangshichen/0_HIV_RNA/INRs/数据/genescore.csv', index=True)

#### 通路

In [32]:
gene_sets = {} 
folder_path = '/media/AnalysisDisk2/Yangshichen/0_HIV_RNA/INRs/数据/pathway/'
txt_files = glob.glob(os.path.join(folder_path, '*.txt'))
for file_path in txt_files:
    with open(file_path, 'r') as file:
        gene_set_name = file.readline().strip()
        gene_list = []
        for line in file:
            gene = line.strip()
            gene_list.append(gene)
        gene_sets[gene_set_name] = gene_list

In [33]:
for gene_set_name, gene_list in gene_sets.items():
    sc.tl.score_genes(subdata, gene_list, score_name=gene_set_name)

computing score 'GOBP_INFLAMMATORY_RESPONSE'
    finished: added
    'GOBP_INFLAMMATORY_RESPONSE', score of gene set (adata.obs).
    1157 total control genes are used. (0:15:51)
computing score 'HALLMARK_APOPTOSIS'
    finished: added
    'HALLMARK_APOPTOSIS', score of gene set (adata.obs).
    1096 total control genes are used. (0:00:28)
computing score 'HALLMARK_INTERFERON_GAMMA_RESPONSE'
    finished: added
    'HALLMARK_INTERFERON_GAMMA_RESPONSE', score of gene set (adata.obs).
    995 total control genes are used. (0:00:40)
computing score 'HALLMARK_TNFA_SIGNALING_VIA_NFKB'
    finished: added
    'HALLMARK_TNFA_SIGNALING_VIA_NFKB', score of gene set (adata.obs).
    1039 total control genes are used. (0:00:08)
computing score 'Module_Activation'
    finished: added
    'Module_Activation', score of gene set (adata.obs).
    349 total control genes are used. (0:00:07)


In [34]:
gene_set_names = gene_sets.keys()
gene_set_names = [str(name) for name in gene_set_names]
gene_set_names

['GOBP_INFLAMMATORY_RESPONSE',
 'HALLMARK_APOPTOSIS',
 'HALLMARK_INTERFERON_GAMMA_RESPONSE',
 'HALLMARK_TNFA_SIGNALING_VIA_NFKB',
 'Module_Activation']

In [35]:
gene_set_df = subdata.obs[["sample","celltype_L3"]+ gene_set_names]
gene_set_df

Unnamed: 0_level_0,sample,celltype_L3,GOBP_INFLAMMATORY_RESPONSE,HALLMARK_APOPTOSIS,HALLMARK_INTERFERON_GAMMA_RESPONSE,HALLMARK_TNFA_SIGNALING_VIA_NFKB,Module_Activation
cellbarcode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
PD-H214-1-CELL98_N1,PD-H214,Aptypical_Memory_B-ITGAX,0.049152,0.266420,0.449499,0.245041,1.123951
PD-H214-1-CELL226_N2,PD-H214,Aptypical_Memory_B-ITGAX,-0.037740,0.095684,0.308991,0.390797,1.189758
PD-H214-1-CELL241_N2,PD-H214,Aptypical_Memory_B-ITGAX,0.061577,0.261756,0.399363,0.256501,1.614679
PD-H214-1-CELL328_N2,PD-H214,Aptypical_Memory_B-ITGAX,0.039787,0.149042,0.434981,0.558054,1.012030
PD-H214-1-CELL509_N4,PD-H214,Aptypical_Memory_B-ITGAX,-0.016201,0.226748,0.494304,0.503742,1.610318
...,...,...,...,...,...,...,...
PD-H264-2-CELL10487_N1,PD-H264,Mature_NK_dim-FCGR3A,0.102440,0.122817,0.303869,0.032498,0.002844
PD-H264-2-CELL11345_N1,PD-H264,Mature_NK_dim-FCGR3A,0.051894,0.210756,0.463105,0.200207,0.015804
PD-H265-1-CELL1521_N2,PD-H265,Mature_NK_dim-FCGR3A,0.073243,0.110803,0.389919,-0.040620,-0.041338
PD-H265-1-CELL4348_N1,PD-H265,Mature_NK_dim-FCGR3A,0.041072,0.302505,0.398605,0.130260,-0.186057


In [36]:
mean_expr = gene_set_df.groupby(['sample', 'celltype_L3']).mean(numeric_only=True).reset_index()
mean_expr

Unnamed: 0,sample,celltype_L3,GOBP_INFLAMMATORY_RESPONSE,HALLMARK_APOPTOSIS,HALLMARK_INTERFERON_GAMMA_RESPONSE,HALLMARK_TNFA_SIGNALING_VIA_NFKB,Module_Activation
0,PD-H201,Aptypical_Memory_B-ITGAX,0.020751,0.193867,0.400405,0.415400,1.414027
1,PD-H201,CD4_Naive_T-CCR7,-0.009544,0.162159,0.258957,0.196262,-0.096521
2,PD-H201,CD4_Tcm-IFIT3,0.051373,0.274987,0.506853,0.304682,0.027822
3,PD-H201,CD4_Th1-GZMK,0.025986,0.273874,0.397487,0.382809,0.285075
4,PD-H201,CD4_Treg-FOXP3,0.014794,0.250748,0.360237,0.289095,1.090625
...,...,...,...,...,...,...,...
511,PD-H289,NKT-NCR1,0.028425,0.164808,0.339574,0.056561,0.063441
512,PD-H289,NK_bright-XCL1,0.050324,0.161488,0.370088,0.073563,-0.057590
513,PD-H289,cDC2-CD1C,0.114515,0.222115,0.358233,0.135326,0.902650
514,PD-H289,cMono-CD14,0.091816,0.174365,0.304365,0.157479,0.127865


In [37]:
mean_expr.to_csv('/media/AnalysisDisk2/Yangshichen/0_HIV_RNA/INRs/数据/pathwayscore.csv', index=False)

#### RNA细胞比例

In [4]:
tmp = pd.read_csv('/media/AnalysisDisk2/Yangshichen/0_HIV_RNA/INRs/数据/scRNA_proportions_L3.csv')
tmp

Unnamed: 0,sample,celltype_L3,count,total_count,proportion
0,HD-H134,Aptypical_Memory_B-ITGAX,76,46020,0.001651
1,HD-H134,CD4_Naive_T-CCR7,7030,46020,0.152760
2,HD-H134,CD4_Naive_T-SOX4,604,46020,0.013125
3,HD-H134,CD4_Tcm-CXCR5,1071,46020,0.023272
4,HD-H134,CD4_Tcm-GPR183,719,46020,0.015624
...,...,...,...,...,...
8231,PD-H308,ncMono-FCGR3A,260,10767,0.024148
8232,PD-H308,ncMono-IFI44L,110,10767,0.010216
8233,PD-H308,ncMono-IFIT1,30,10767,0.002786
8234,PD-H308,pDC-AXL,1,10767,0.000093


In [9]:
subtmp = tmp[tmp['celltype_L3'].isin(['CD4_Naive_T-CCR7','CD4_Tcm-IFIT3','CD4_Th1-GZMK','CD4_Treg-FOXP3',
                                      'CD8_CTL-GZMB','NKT-NCR1','NK_bright-XCL1','Mature_NK_dim-FCGR3A',
                                      'cMono-CD14','ncMono-FCGR3A','cDC2-CD1C','Aptypical_Memory_B-ITGAX'])]
subtmp = subtmp[subtmp['sample'].isin(['PD-H201','PD-H202','PD-H204','PD-H207','PD-H210','PD-H212','PD-H214','PD-H218','PD-H219','PD-H220','PD-H221','PD-H222',
                                       'PD-H223','PD-H224','PD-H225','PD-H228','PD-H229','PD-H230','PD-H231','PD-H232','PD-H233','PD-H237','PD-H238','PD-H240',
                                       'PD-H241','PD-H242','PD-H246','PD-H247','PD-H255','PD-H258','PD-H260','PD-H263','PD-H264','PD-H265','PD-H266','PD-H276',
                                       'PD-H277','PD-H278','PD-H281','PD-H285','PD-H286','PD-H288','PD-H289'])]
subtmp

Unnamed: 0,sample,celltype_L3,count,total_count,proportion
3074,PD-H201,Aptypical_Memory_B-ITGAX,36,15587,0.002310
3075,PD-H201,CD4_Naive_T-CCR7,378,15587,0.024251
3079,PD-H201,CD4_Tcm-IFIT3,16,15587,0.001026
3083,PD-H201,CD4_Th1-GZMK,40,15587,0.002566
3088,PD-H201,CD4_Treg-FOXP3,51,15587,0.003272
...,...,...,...,...,...
7451,PD-H289,NKT-NCR1,730,16623,0.043915
7452,PD-H289,NK_bright-XCL1,85,16623,0.005113
7469,PD-H289,cDC2-CD1C,122,16623,0.007339
7470,PD-H289,cMono-CD14,1127,16623,0.067798


In [10]:
subtmp.to_csv('/media/AnalysisDisk2/Yangshichen/0_HIV_RNA/INRs/数据/rnacellratio.csv', index=False)

#### 临床细胞数：CD4、CD4/CD8

In [12]:
tmp = pd.read_excel('/media/AnalysisDisk2/Yangshichen/0_HIV_RNA/INRs/数据/sample_information.xlsx')
tmp

Unnamed: 0,Sample,Group,Gender,Age,Treat time,Baseline viral load (cp/mL),Viral load (cp/mL),Baseline CD4 count (/μL),CD4 count (/μL),CD8 count(/μL),CD4/CD8 Ratio,ART regimen,INSTI
0,HD-H134,HDs,M,53,——,——,——,——,579,319,1.815047,——,——
1,HD-H135,HDs,M,26,——,——,——,——,595,754,0.789125,——,——
2,HD-H136,HDs,M,28,——,——,——,——,1035,428,2.418224,——,——
3,HD-H137,HDs,M,31,——,——,——,——,852,779,1.09371,——,——
4,HD-H138,HDs,M,32,——,——,——,——,667,1040,0.641346,——,——
...,...,...,...,...,...,...,...,...,...,...,...,...,...
138,PD-H302,IRs,M,37,5,6690,0,7,800,2148,0.372439,AZT/3TC+EFV,No
139,PD-H305,IRs,M,36,5,4445,0,533,728,507,1.435897,3TC+EFV+TDF,No
140,PD-H306,IRs,M,31,5,0,0,386,502,491,1.022403,BIC/FTC/TAF,Yes
141,PD-H307,IRs,M,60,6,86100,0,270,542,286,1.895105,BIC/FTC/TAF,Yes


In [13]:
subtmp = tmp[tmp['Group'].isin(['INRs'])]
subtmp

Unnamed: 0,Sample,Group,Gender,Age,Treat time,Baseline viral load (cp/mL),Viral load (cp/mL),Baseline CD4 count (/μL),CD4 count (/μL),CD8 count(/μL),CD4/CD8 Ratio,ART regimen,INSTI
53,PD-H201,INRs,M,38,5,97000.0,25.1,34,206,700,0.294286,BIC/FTC/TAF,Yes
54,PD-H202,INRs,M,76,6,0.0,0.0,56,128,338,0.378698,3TC+EFV+TDF,No
56,PD-H204,INRs,M,55,13,0.0,0.0,122,252,652,0.386503,AZT/3TC+NVP,No
58,PD-H207,INRs,M,61,9,250.0,0.0,1,269,512,0.525391,3TC+EFV+TDF,No
61,PD-H210,INRs,M,36,10,20.0,0.0,56,240,1010,0.237624,BIC/FTC/TAF,Yes
63,PD-H212,INRs,M,46,4,0.0,0.0,32,151,569,0.265378,3TC+EFV+TDF,No
65,PD-H214,INRs,M,41,9,0.0,0.0,182,282,552,0.51087,BIC/FTC/TAF,Yes
67,PD-H218,INRs,M,70,9,0.0,0.0,29,175,320,0.546875,3TC+EFV+TDF,No
68,PD-H219,INRs,M,37,7,0.0,0.0,13,208,881,0.236095,BIC/FTC/TAF,Yes
69,PD-H220,INRs,M,60,5,0.0,0.0,12,124,374,0.331551,BIC/FTC/TAF,Yes


In [14]:
subtmp.to_csv('/media/AnalysisDisk2/Yangshichen/0_HIV_RNA/INRs/数据/clinicalinformation.csv', index=False)

### 计算相关性和差异

#### 数据合并

In [4]:
tmp1 = pd.read_csv('/media/AnalysisDisk2/Yangshichen/0_HIV_RNA/INRs/数据/genescore.csv')
tmp1

Unnamed: 0,sample,celltype_L3,NFKBIA,TNFAIP3,STAT1,DUSP1,XAF1,IFI44L,ISG15,MX1,OAS1,S100A9,CXCL8,IFITM3,IL1B,NR4A2,IFI30
0,PD-H201,Aptypical_Memory_B-ITGAX,0.833531,-0.254777,-0.156377,0.901096,-0.024188,0.201257,-0.078881,0.478866,0.393136,-0.149591,-0.156967,-0.264516,-0.097062,2.312644,0.743929
1,PD-H201,CD4_Naive_T-CCR7,0.371418,0.956498,0.231434,0.174950,0.150244,0.236106,0.038843,0.111567,-0.094873,-0.188185,-0.136781,-0.080016,-0.132017,0.515810,-0.323075
2,PD-H201,CD4_Tcm-IFIT3,0.740856,1.434940,1.010490,0.968417,0.664179,1.178573,1.435343,1.657381,0.876014,-0.054307,-0.156967,0.307510,-0.170849,1.145844,-0.220889
3,PD-H201,CD4_Th1-GZMK,1.045024,1.680073,0.677775,1.213249,0.213342,0.498589,0.210928,0.033209,-0.082039,-0.226257,-0.156967,-0.098016,-0.041544,1.572081,-0.307724
4,PD-H201,CD4_Treg-FOXP3,0.290329,0.953683,0.677762,1.134511,0.235060,0.106439,0.295296,0.439322,0.979216,-0.321683,-0.156967,-0.108249,-0.036958,0.783501,-0.312960
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
511,PD-H289,NKT-NCR1,-0.667271,-0.614711,-0.019175,-0.811547,0.254476,-0.198231,-0.000727,-0.030709,0.100455,-0.452246,-0.156967,0.000772,-0.170849,-0.436992,-0.251638
512,PD-H289,NK_bright-XCL1,-0.125728,-0.426851,0.210231,-0.212733,0.411970,0.004112,0.054498,-0.050173,0.014685,-0.472834,-0.156967,0.936535,-0.170849,-0.442006,-0.360221
513,PD-H289,cDC2-CD1C,-0.304221,-0.702310,-0.060165,-0.368599,0.723503,0.004958,-0.030369,0.137471,0.058768,0.877298,-0.115080,-0.032227,0.344117,-0.065096,1.492741
514,PD-H289,cMono-CD14,-0.305764,-0.688962,-0.016234,-0.617008,1.041899,0.135306,-0.046583,0.203666,-0.089750,2.164000,-0.099774,-0.176570,0.094182,-0.256069,0.348524


In [6]:
tmp2 = pd.read_csv('/media/AnalysisDisk2/Yangshichen/0_HIV_RNA/INRs/数据/pathwayscore.csv')
tmp2

Unnamed: 0,sample,celltype_L3,GOBP_INFLAMMATORY_RESPONSE,HALLMARK_APOPTOSIS,HALLMARK_INTERFERON_GAMMA_RESPONSE,HALLMARK_TNFA_SIGNALING_VIA_NFKB,Module_Activation
0,PD-H201,Aptypical_Memory_B-ITGAX,0.020751,0.193867,0.400405,0.415400,1.414027
1,PD-H201,CD4_Naive_T-CCR7,-0.009544,0.162159,0.258957,0.196262,-0.096521
2,PD-H201,CD4_Tcm-IFIT3,0.051373,0.274987,0.506853,0.304682,0.027822
3,PD-H201,CD4_Th1-GZMK,0.025986,0.273874,0.397487,0.382809,0.285075
4,PD-H201,CD4_Treg-FOXP3,0.014794,0.250748,0.360237,0.289095,1.090625
...,...,...,...,...,...,...,...
511,PD-H289,NKT-NCR1,0.028425,0.164808,0.339574,0.056561,0.063441
512,PD-H289,NK_bright-XCL1,0.050324,0.161488,0.370088,0.073563,-0.057590
513,PD-H289,cDC2-CD1C,0.114515,0.222115,0.358233,0.135326,0.902650
514,PD-H289,cMono-CD14,0.091816,0.174365,0.304365,0.157479,0.127865


In [8]:
tmp3 = pd.read_csv('/media/AnalysisDisk2/Yangshichen/0_HIV_RNA/INRs/数据/rnacellratio.csv')
tmp3

Unnamed: 0,sample,celltype_L3,count,total_count,proportion
0,PD-H201,Aptypical_Memory_B-ITGAX,36,15587,0.002310
1,PD-H201,CD4_Naive_T-CCR7,378,15587,0.024251
2,PD-H201,CD4_Tcm-IFIT3,16,15587,0.001026
3,PD-H201,CD4_Th1-GZMK,40,15587,0.002566
4,PD-H201,CD4_Treg-FOXP3,51,15587,0.003272
...,...,...,...,...,...
511,PD-H289,NKT-NCR1,730,16623,0.043915
512,PD-H289,NK_bright-XCL1,85,16623,0.005113
513,PD-H289,cDC2-CD1C,122,16623,0.007339
514,PD-H289,cMono-CD14,1127,16623,0.067798


In [9]:
tmp4 = pd.read_csv('/media/AnalysisDisk2/Yangshichen/0_HIV_RNA/INRs/数据/clinicalinformation.csv')
colnames(
tmp4

Unnamed: 0,Sample,Group,Gender,Age,Treat time,Baseline viral load (cp/mL),Viral load (cp/mL),Baseline CD4 count (/μL),CD4 count (/μL),CD8 count(/μL),CD4/CD8 Ratio,ART regimen,INSTI
0,PD-H201,INRs,M,38,5,97000.0,25.1,34,206,700,0.294286,BIC/FTC/TAF,Yes
1,PD-H202,INRs,M,76,6,0.0,0.0,56,128,338,0.378698,3TC+EFV+TDF,No
2,PD-H204,INRs,M,55,13,0.0,0.0,122,252,652,0.386503,AZT/3TC+NVP,No
3,PD-H207,INRs,M,61,9,250.0,0.0,1,269,512,0.525391,3TC+EFV+TDF,No
4,PD-H210,INRs,M,36,10,20.0,0.0,56,240,1010,0.237624,BIC/FTC/TAF,Yes
5,PD-H212,INRs,M,46,4,0.0,0.0,32,151,569,0.265378,3TC+EFV+TDF,No
6,PD-H214,INRs,M,41,9,0.0,0.0,182,282,552,0.51087,BIC/FTC/TAF,Yes
7,PD-H218,INRs,M,70,9,0.0,0.0,29,175,320,0.546875,3TC+EFV+TDF,No
8,PD-H219,INRs,M,37,7,0.0,0.0,13,208,881,0.236095,BIC/FTC/TAF,Yes
9,PD-H220,INRs,M,60,5,0.0,0.0,12,124,374,0.331551,BIC/FTC/TAF,Yes
