In [2]:
!pip install cellxgene-census pandas  plotly

Collecting cellxgene-census
  Downloading cellxgene_census-1.17.0-py3-none-any.whl.metadata (5.2 kB)
Collecting tiledbsoma>=1.15.3 (from cellxgene-census)
  Downloading tiledbsoma-1.16.2-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (7.0 kB)
Collecting anndata (from cellxgene-census)
  Downloading anndata-0.11.4-py3-none-any.whl.metadata (9.3 kB)
Collecting s3fs>=2021.06.1 (from cellxgene-census)
  Downloading s3fs-2025.5.0-py3-none-any.whl.metadata (1.9 kB)
Collecting aiobotocore<3.0.0,>=2.5.4 (from s3fs>=2021.06.1->cellxgene-census)
  Downloading aiobotocore-2.22.0-py3-none-any.whl.metadata (24 kB)
Collecting fsspec==2025.5.0 (from s3fs>=2021.06.1->cellxgene-census)
  Downloading fsspec-2025.5.0-py3-none-any.whl.metadata (11 kB)
Collecting scanpy>=1.9.2 (from tiledbsoma>=1.15.3->cellxgene-census)
  Downloading scanpy-1.11.1-py3-none-any.whl.metadata (9.9 kB)
Collecting somacore==1.0.28 (from tiledbsoma>=1.15.3->cellxgene-census)
  Downloading somacore-1.0.28-py3-none-any.whl.metadat

In [4]:
import cellxgene_census
import pandas as pd
import plotly.express as px

In [9]:

CENSUS_VERSION = "2025-01-30"


try:
    census = cellxgene_census.open_soma(census_version=CENSUS_VERSION)
    print(f"Successfully opened census version: {CENSUS_VERSION}")


except Exception as e:
    print(f"An error occurred while opening : {e}")

print("operations completed.")

Successfully opened census version: 2025-01-30
operations completed.


In [6]:

disease_filter = "disease == 'invasive lobular breast carcinoma'"
assay_filter = "assay == '10x gene expression flex'"
primary_data_filter = "is_primary_data == True" # Boolean True in filter is written as True


value_filter_string = f"{disease_filter} and {assay_filter} and {primary_data_filter}"

print(f"Using filter: {value_filter_string}")

Using filter: disease == 'invasive lobular breast carcinoma' and assay == '10x gene expression flex' and is_primary_data == True


In [7]:

column_names_list = ["cell_type", "disease", "assay", "is_primary_data"]

print("Querying cell metadata...")
cell_metadata = cellxgene_census.get_obs(
    census=census,
    organism="Homo sapiens",
    value_filter=value_filter_string,
    column_names=column_names_list,
)

print(f"Retrieved {len(cell_metadata)} cells matching the criteria.")

Querying cell metadata...
Retrieved 3937 cells matching the criteria.


In [8]:


# Calculate cell type distribution
cell_type_distribution = cell_metadata['cell_type'].value_counts()

# Print the distribution (still good to see all counts in the table)
print("\nFull Cell Type Distribution for the specified criteria:")
print(cell_type_distribution)

# --- Fix for Plotting: Select the Top N cell types ---
# You can choose a number for N, for example, the top 20 cell types.
N = 20
top_cell_types = cell_type_distribution.head(N)

# Check if there are any cell types with counts > 0 to plot
if not top_cell_types.empty and top_cell_types.sum() > 0:
    print(f"\nGenerating plot for the top {N} cell types...")
    # Convert the top N Series to a DataFrame for Plotly
    top_cell_types_df = top_cell_types.reset_index()
    top_cell_types_df.columns = ['Cell Type', 'Count']

    fig = px.bar(
        top_cell_types_df,
        x='Cell Type',
        y='Count',
        title=f'Top {N} Cell Types for Invasive Lobular Breast Carcinoma (10x Gene Expression Flex, Primary Data)'
    )
    # Improve readability of x-axis labels by rotating them
    fig.update_layout(xaxis_tickangle=-45)
    fig.show()
else:
    print("\nNo significant cell types found matching the criteria to plot.")




Full Cell Type Distribution for the specified criteria:
cell_type
malignant cell                                 944
plasma cell                                    714
fibroblast of lung                             536
cytotoxic T cell                               434
smooth muscle cell                             282
                                              ... 
enterocyte of epithelium of small intestine      0
enterocyte of epithelium proper of duodenum      0
enterocyte of epithelium proper of ileum         0
enterocyte of epithelium proper of jejunum       0
endothelial cell of venule                       0
Name: count, Length: 819, dtype: int64

Generating plot for the top 20 cell types...
