### Cell type proportions by condition

**Objective**
Quantify and compare cell-type composition across conditions (IA, HC, PI, PHC)
using the annotated clustered object.

*Does infection lead to changes in immune cell composition?*

**Input**
- Annotated PBMC object from Notebook 05
- Sample metadata (patient-level)

**Methods**
- Compute cell type proportions per patient
- Compare proportions between IA vs HC and PI vs PHC

**Output**
- Table of cell type proportions per patient
- Figure showing proportions by condition

Cell-type proportions were computed by normalizing the number of cells of each cell type to the total number of cells within each condition. This enables direct comparison of immune composition across groups while acknowledging that cell counts may be affected by technical and biological variability.

In [14]:
import scanpy as sc
import pandas as pd

# Load annotated object (has cell_type)
adata = sc.read_h5ad("results/adata_annotated.h5ad")

# Load condition metadata object
adata_cond = sc.read_h5ad("results/adata_with_condition_raw.h5ad")
print("adata (annotated) n_obs:", adata.n_obs)
print("adata_cond (with condition) n_obs:", adata_cond.n_obs)

common = adata.obs_names.intersection(adata_cond.obs_names)
print("Common cells:", len(common))
print("Annotated-only:", adata.n_obs - len(common))
print("Cond-only:", adata_cond.n_obs - len(common))

# Transfer condition information (same obs_names)
common = adata.obs_names.intersection(adata_cond.obs_names)
adata.obs["condition"] = pd.NA
adata.obs.loc[common, "condition"] = adata_cond.obs.loc[common, "condition"].values

# Sanity check
print("Has cell_type?", "cell_type" in adata.obs.columns)
print("Has condition?", "condition" in adata.obs.columns)

# Save final object for NB06+
adata.write("results/adata_final.h5ad")

  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("obs")


adata (annotated) n_obs: 103202
adata_cond (with condition) n_obs: 103202
Common cells: 100203
Annotated-only: 2999
Cond-only: 2999
Has cell_type? True
Has condition? True


In [15]:
#Setup and load annotated object 
import os
import scanpy as sc
import pandas as pd

PROJECT_ROOT = "/mnt/c/Users/yasmi/OneDrive/Desktop/Mini-Projets/scRNA_Influenza_Patients"
os.chdir(PROJECT_ROOT)

adata = sc.read_h5ad("results/adata_annotated.h5ad")
print("Has condition?", "condition" in adata.obs.columns)
print("Has cell_type?", "cell_type" in adata.obs.columns)
print("Conditions:", adata.obs["condition"].value_counts())
print("Cell types:", adata.obs["cell_type"].value_counts())

Has condition? True
Has cell_type? True
Conditions: condition
PI     25531
HC     22084
IC     20058
PHC    18516
IA     17013
Name: count, dtype: int64
Cell types: cell_type
Myeloid     44448
NK          36164
B           12275
T            9539
DC            485
Platelet      291
Name: count, dtype: int64


  utils.warn_names_duplicates("obs")


In [16]:
# Minimal sanity checks 

# Check for missing annotations
print("Missing condition:", adata.obs["condition"].isna().sum())
print("Missing cell_type:", adata.obs["cell_type"].isna().sum())

# Optional: check clusters per condition
print(pd.crosstab(adata.obs["condition"], adata.obs["leiden"]).shape)


Missing condition: 0
Missing cell_type: 0
(5, 12)


In [17]:
# Optional check â€” used during annotation validation
# pm = pd.read_csv("results/panel_mean_by_cluster.csv", index_col=0)
# pm.loc["12"]

#import pandas as pd

#pm = pd.read_csv("results/panel_mean_by_cluster.csv", index_col=0)
#print(pm.loc[12])

In [18]:
# Cell counts per condition x cell type  

counts = (
    adata.obs
    .groupby(["condition", "cell_type"])
    .size()
    .reset_index(name="n_cells")
)

totals = (
    adata.obs
    .groupby("condition")
    .size()
    .reset_index(name="total_cells")
)

props = counts.merge(totals, on="condition")
props["proportion"] = props["n_cells"] / props["total_cells"]

props.sort_values(["condition", "proportion"], ascending=[True, False]).head(20)

  .groupby(["condition", "cell_type"])
  .groupby("condition")


Unnamed: 0,condition,cell_type,n_cells,total_cells,proportion
3,HC,NK,14365,22084,0.650471
2,HC,Myeloid,4769,22084,0.215948
5,HC,T,1900,22084,0.086035
0,HC,B,855,22084,0.038716
1,HC,DC,119,22084,0.005389
4,HC,Platelet,76,22084,0.003441
8,IA,Myeloid,11084,17013,0.651502
6,IA,B,2769,17013,0.162758
9,IA,NK,2312,17013,0.135896
11,IA,T,722,17013,0.042438


In [19]:
# Pivot table (easy to read)
props_wide = props.pivot_table(
    index="condition",
    columns="cell_type",
    values="proportion",
    fill_value=0
)

props_wide

  props_wide = props.pivot_table(


cell_type,B,DC,Myeloid,NK,Platelet,T
condition,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
HC,0.038716,0.005389,0.215948,0.650471,0.003441,0.086035
IA,0.162758,0.00335,0.651502,0.135896,0.004056,0.042438
IC,0.147771,0.003839,0.417988,0.254512,0.001645,0.174245
PHC,0.030784,0.005185,0.135882,0.695075,0.003294,0.12978
PI,0.200423,0.005327,0.693079,0.059222,0.002037,0.039912


In [20]:
# Save outputs 

props.to_csv("results/cell_type_proportions_by_condition_long.csv", index=False)
props_wide.to_csv("results/cell_type_proportions_by_condition_wide.csv")

print("Saved:",
      "results/cell_type_proportions_by_condition_long.csv",
      "results/cell_type_proportions_by_condition_wide.csv")


Saved: results/cell_type_proportions_by_condition_long.csv results/cell_type_proportions_by_condition_wide.csv


In [21]:
#Global cell type proportions
global_props = (
    adata.obs["cell_type"]
    .value_counts(normalize=True)
    .mul(100)
    .round(2)
)

print("Global immune cell composition (%)")
print(global_props)

Global immune cell composition (%)
cell_type
Myeloid     43.07
NK          35.04
B           11.89
T            9.24
DC           0.47
Platelet     0.28
Name: proportion, dtype: float64


In [22]:
print(adata.obs_names[:10])

Index(['AACACACAGAACGCTAGTCCGAGATGT', 'AACACACAGAAGGTGGTACCGAGATGT',
       'AACACACAGAAGGTGGTACTGGTTGAG', 'AACACACAGAAGGTGGTACTGTACACA',
       'AACACACAGAAGGTGGTAGAGACCTAG', 'AACACACAGAATCCGGTGAGCACATGC',
       'AACACACAGACAACAGGTCACACCTCA', 'AACACACAGACAACAGGTGCGAGCTTA',
       'AACACACAGACCAGGTCACCGAGATGT', 'AACACACAGACCAGGTCACGTCAACTG'],
      dtype='object')


In [23]:
#Check condition metadata
# Inspect condition labels
print(adata.obs.columns)
print(adata.obs["condition"].value_counts())

Index(['gsm', 'sample_id', 'condition', 'replicate', 'batch', 'leiden',
       'cell_type'],
      dtype='object')
condition
PI     25531
HC     22084
IC     20058
PHC    18516
IA     17013
Name: count, dtype: int64


In [24]:
#Proportions by condition 

# Counts per condition and cell type
counts = (
    adata.obs
    .groupby(["condition", "cell_type"])
    .size()
    .reset_index(name="n_cells")
)

# Total cells per condition
totals = (
    adata.obs
    .groupby("condition")
    .size()
    .reset_index(name="total_cells")
)

# Merge and compute proportions
props = counts.merge(totals, on="condition")
props["proportion"] = props["n_cells"] / props["total_cells"]

props


  .groupby(["condition", "cell_type"])
  .groupby("condition")


Unnamed: 0,condition,cell_type,n_cells,total_cells,proportion
0,HC,B,855,22084,0.038716
1,HC,DC,119,22084,0.005389
2,HC,Myeloid,4769,22084,0.215948
3,HC,NK,14365,22084,0.650471
4,HC,Platelet,76,22084,0.003441
5,HC,T,1900,22084,0.086035
6,IA,B,2769,17013,0.162758
7,IA,DC,57,17013,0.00335
8,IA,Myeloid,11084,17013,0.651502
9,IA,NK,2312,17013,0.135896


In [27]:
import pandas as pd

# Count cells per condition x cell type
counts = (
    adata.obs
    .dropna(subset=["condition", "cell_type"])
    .groupby(["condition", "cell_type"])
    .size()
    .reset_index(name="n_cells")
)

# Total cells per condition
totals = (
    adata.obs
    .dropna(subset=["condition"])
    .groupby("condition")
    .size()
    .reset_index(name="total_cells")
)

# Proportions
props = counts.merge(totals, on="condition")
props["proportion"] = props["n_cells"] / props["total_cells"]

# Wide table (easy to read)
props_wide = props.pivot_table(
    index="condition",
    columns="cell_type",
    values="proportion",
    fill_value=0
)

print(props_wide)

# Save
props.to_csv("results/cell_type_proportions_by_condition_long.csv", index=False)
props_wide.to_csv("results/cell_type_proportions_by_condition_wide.csv")
print("Saved proportions tables in results/.")


cell_type         B        DC   Myeloid        NK  Platelet         T
condition                                                            
HC         0.038716  0.005389  0.215948  0.650471  0.003441  0.086035
IA         0.162758  0.003350  0.651502  0.135896  0.004056  0.042438
IC         0.147771  0.003839  0.417988  0.254512  0.001645  0.174245
PHC        0.030784  0.005185  0.135882  0.695075  0.003294  0.129780
PI         0.200423  0.005327  0.693079  0.059222  0.002037  0.039912
Saved proportions tables in results/.


  .groupby(["condition", "cell_type"])
  .groupby("condition")
  props_wide = props.pivot_table(
