# Assess read depth by library

In [1]:
import os
import glob
import pandas as pd

# Define the directory containing the CSV files
directory = "./Metric_Summaries/"

# Find all CSV files in the directory
csv_files = glob.glob(os.path.join(directory, "*.csv"))

# Initialize an empty DataFrame to hold concatenated data
all_data = pd.DataFrame()

# Loop through each CSV file
for file in csv_files:
    try:
        # Extract the base filename without extension for labeling
        file_name = os.path.basename(file)
        sample_name = file_name.split("_metrics_summary.csv")[0]
        
        # Read the CSV file
        df = pd.read_csv(file)
        
        # Add a column for the sample name
        df['Sample'] = sample_name
        
        # Concatenate the current file's data with the main DataFrame
        all_data = pd.concat([all_data, df], ignore_index=True)
    except Exception as e:
        print(f"Error loading {file}: {e}")


print("All CSV files have been successfully loaded and concatenated!")


All CSV files have been successfully loaded and concatenated!


In [2]:
all_data

Unnamed: 0,Estimated Number of Cells,Mean Reads per Cell,Median Genes per Cell,Number of Reads,Valid Barcodes,Sequencing Saturation,Q30 Bases in Barcode,Q30 Bases in RNA Read,Q30 Bases in UMI,Reads Mapped to Genome,Reads Mapped Confidently to Genome,Reads Mapped Confidently to Intergenic Regions,Reads Mapped Confidently to Intronic Regions,Reads Mapped Confidently to Exonic Regions,Reads Mapped Confidently to Transcriptome,Reads Mapped Antisense to Gene,Fraction Reads in Cells,Total Genes Detected,Median UMI Counts per Cell,Sample
0,5431,40464,1557,219761632,96.3%,53.3%,94.5%,89.8%,94.2%,89.0%,87.8%,7.0%,0.9%,79.9%,63.4%,16.7%,36.8%,45246,2581,L50
1,2106,64575,1850,135994718,96.4%,79.1%,94.4%,89.8%,94.1%,90.4%,89.3%,8.8%,0.8%,79.7%,68.4%,11.3%,55.0%,41195,3595,L52
2,7106,50451,1406,358503201,96.3%,67.0%,94.6%,90.2%,94.3%,86.4%,85.2%,9.4%,0.9%,74.8%,64.1%,10.7%,32.6%,43154,2338,L43
3,64818,5032,687,326178258,96.2%,61.3%,94.7%,90.3%,94.4%,87.5%,86.3%,7.8%,0.9%,77.6%,69.6%,7.9%,91.6%,46992,905,L34
4,6613,57344,2065,379213893,95.6%,62.1%,94.4%,89.9%,94.0%,88.4%,87.1%,6.7%,0.9%,79.5%,64.3%,15.2%,38.6%,45474,3774,L51
5,4694,70040,2334,328766264,96.3%,60.3%,94.7%,90.4%,94.4%,90.2%,89.1%,7.3%,1.0%,80.8%,65.1%,15.8%,35.7%,45514,4622,L37
6,7044,53243,1533,375046218,95.6%,66.8%,94.4%,89.4%,93.9%,86.7%,85.5%,6.7%,0.8%,78.0%,59.0%,19.0%,41.3%,48909,2530,L54
7,27659,11967,560,330983702,96.0%,66.3%,94.5%,90.2%,94.2%,81.5%,80.4%,11.7%,0.8%,67.9%,60.5%,7.3%,57.3%,44032,722,L31
8,5114,62408,1896,319156698,95.7%,66.6%,94.4%,89.5%,94.1%,90.0%,88.9%,6.5%,1.1%,81.3%,65.8%,15.6%,35.1%,42556,3563,L30
9,28583,14342,643,409937700,96.5%,70.3%,94.4%,90.1%,94.1%,84.7%,83.5%,10.5%,0.9%,72.0%,62.7%,9.3%,51.8%,45724,832,L47


In [3]:
export_data = all_data[['Sample','Number of Reads']]

In [4]:
# Ensure the 'Number of Reads' column is numeric (remove commas)
all_data['Number of Reads'] = all_data['Number of Reads'].str.replace(',', '').astype(float)

# Calculate mean and median for 'Number of Reads'
mean_reads = all_data['Number of Reads'].mean()
median_reads = all_data['Number of Reads'].median()

# Create a new DataFrame with the calculated statistics
statistics = pd.DataFrame({
    'Sample': ['Mean', 'Median'],
    'Number of Reads': [mean_reads, median_reads]
})

# Append the statistics to the original DataFrame
export_data = pd.concat([all_data[['Sample', 'Number of Reads']], statistics], ignore_index=True)

# Convert 'Number of Reads' to strings without scientific notation
export_data['Number of Reads'] = export_data['Number of Reads'].apply(lambda x: f"{x:.0f}")

# Export the DataFrame to a tab-delimited file without index, avoiding scientific notation
output_file = "Read_Counts.txt"
export_data.to_csv(output_file, sep='\t', index=False, float_format='%.0f')

In [5]:
export_data

Unnamed: 0,Sample,Number of Reads
0,L50,219761632
1,L52,135994718
2,L43,358503201
3,L34,326178258
4,L51,379213893
5,L37,328766264
6,L54,375046218
7,L31,330983702
8,L30,319156698
9,L47,409937700
