In [1]:
import os
import numpy  as np
import pandas as pd
from scipy.stats     import ranksums
from statsmodels.stats.multitest import multipletests
import matplotlib.pyplot as plt

import scanpy as sc
import pyreadr

In [17]:
ajm_file_path_raw = "/labs/Aguiar/SSPA_BRAY/BRay/BRAY_AJM2/2_Data/2_SingleCellData/2_AJM_Parse_Timecourse/GEX_TC_LPSonly_Bcellonly_filt_raw_2024-02-05.rds"
ajm_file_path_norm = "/labs/Aguiar/SSPA_BRAY/BRay/BRAY_AJM2/2_Data/2_SingleCellData/2_AJM_Parse_Timecourse/GEX_TC_LPSonly_Bcellonly_filt_norm_2024-02-09.rds"
ajm_metadata_path = "/labs/Aguiar/SSPA_BRAY/BRay/BRAY_AJM2/2_Data/2_SingleCellData/2_AJM_Parse_Timecourse/meta_TC_LPSonly_Bcellonly_filt_raw_2024-02-05.csv"
r_script_path = "/labs/Aguiar/SSPA_BRAY/BRay/VariationalInference/rds_to_anndata.R"


In [18]:
import subprocess
import scipy.sparse as sp

In [19]:
result = subprocess.run(["Rscript", r_script_path], 
                        capture_output=True, 
                        text=True, 
                        check=True)
print("R conversion output:")
print(result.stdout)

R conversion output:



In [20]:
raw_sparse_data = pd.read_csv("raw_matrix_sparse.csv")

raw_row_names = pd.read_csv("raw_matrix_rownames.csv")["row_names"].tolist()  
raw_col_names = pd.read_csv("raw_matrix_colnames.csv")["col_names"].tolist()  

raw_matrix_dims = None

raw_matrix_dims = pd.read_csv("raw_matrix_dims.csv")
raw_nrows = raw_matrix_dims["rows"].iloc[0]
raw_ncols = raw_matrix_dims["cols"].iloc[0]
print(f"Raw matrix dimensions from file: {raw_nrows} x {raw_ncols}")
if raw_nrows != len(raw_row_names) or raw_ncols != len(raw_col_names):
    print(f"WARNING: Dimension mismatch! Row names: {len(raw_row_names)}, Column names: {len(raw_col_names)}")

norm_sparse_data = pd.read_csv("norm_matrix_sparse.csv")

norm_row_names = pd.read_csv("norm_matrix_rownames.csv")["row_names"].tolist()  
norm_col_names = pd.read_csv("norm_matrix_colnames.csv")["col_names"].tolist()  

norm_matrix_dims = None
norm_matrix_dims = pd.read_csv("norm_matrix_dims.csv")
norm_nrows = norm_matrix_dims["rows"].iloc[0]
norm_ncols = norm_matrix_dims["cols"].iloc[0]
print(f"Normalized matrix dimensions from file: {norm_nrows} x {norm_ncols}")
if norm_nrows != len(norm_row_names) or norm_ncols != len(norm_col_names):
    print(f"WARNING: Dimension mismatch! Row names: {len(norm_row_names)}, Column names: {len(norm_col_names)}")


Raw matrix dimensions from file: 57010 x 4036
Normalized matrix dimensions from file: 57010 x 4036


In [21]:
raw_row_indices = raw_sparse_data["row"].values  
raw_col_indices = raw_sparse_data["col"].values  
raw_values = raw_sparse_data["value"].values

print(f"Row indices range: {raw_row_indices.min()} to {raw_row_indices.max()}")
print(f"Column indices range: {raw_col_indices.min()} to {raw_col_indices.max()}")


print(f"Creating sparse matrix with shape: ({raw_nrows}, {raw_ncols}) and transposing to match AnnData format")

raw_sparse_matrix = sp.coo_matrix((raw_values, (raw_row_indices, raw_col_indices)), 
                                shape=(raw_nrows, raw_ncols))

raw_sparse_matrix = raw_sparse_matrix.transpose().tocsr()

print(f"Transposed matrix shape: {raw_sparse_matrix.shape}")

norm_row_indices = norm_sparse_data["row"].values  
norm_col_indices = norm_sparse_data["col"].values  
norm_values = norm_sparse_data["value"].values

print(f"Row indices range: {norm_row_indices.min()} to {norm_row_indices.max()}")
print(f"Column indices range: {norm_col_indices.min()} to {norm_col_indices.max()}")

print(f"Creating sparse matrix with shape: ({norm_nrows}, {norm_ncols}) and transposing to match AnnData format")

norm_sparse_matrix = sp.coo_matrix((norm_values, (norm_row_indices, norm_col_indices)), 
                                shape=(norm_nrows, norm_ncols))

norm_sparse_matrix = norm_sparse_matrix.transpose().tocsr()

print(f"Transposed matrix shape: {norm_sparse_matrix.shape}")

Row indices range: 0 to 57003
Column indices range: 0 to 4035
Creating sparse matrix with shape: (57010, 4036) and transposing to match AnnData format
Transposed matrix shape: (4036, 57010)
Row indices range: 0 to 57003
Column indices range: 0 to 4035
Creating sparse matrix with shape: (57010, 4036) and transposing to match AnnData format
Transposed matrix shape: (4036, 57010)


In [23]:
import anndata as ad

In [24]:
raw_ajm_adata = ad.AnnData(X=raw_sparse_matrix)
norm_ajm_adata = ad.AnnData(X=norm_sparse_matrix)

raw_ajm_adata.var_names = raw_row_names
raw_ajm_adata.obs_names = raw_col_names

norm_ajm_adata.var_names = norm_row_names
norm_ajm_adata.obs_names = norm_col_names

raw_ajm_adata.obs_names_make_unique()
raw_ajm_adata.var_names_make_unique()

norm_ajm_adata.obs_names_make_unique()
norm_ajm_adata.var_names_make_unique()

In [25]:
ajm_raw_mat = raw_ajm_adata.X
ajm_norm_mat = norm_ajm_adata.X
raw_genes = raw_ajm_adata.var_names
raw_cells = raw_ajm_adata.obs_names
norm_genes = norm_ajm_adata.var_names
norm_cells = norm_ajm_adata.obs_names