In [1]:
import os
import pandas as pd

In [4]:

folder = "../chest_xray"

class_counts = {"train": {"NORMAL": 0, "PNEUMONIA": 0}, 
                "test": {"NORMAL": 0, "PNEUMONIA": 0}, 
                "val": {"NORMAL": 0, "PNEUMONIA": 0}}

for split in class_counts.keys():
    folder_path = os.path.join(folder, split)
    
    if not os.path.exists(folder_path):
        print(f"Warning: Folder '{folder_path}' does not exist.")
        continue
    
    for class_name in class_counts[split].keys():
        class_folder_path = os.path.join(folder_path, class_name)
        
        if os.path.exists(class_folder_path):
            class_counts[split][class_name] = len([
                file for file in os.listdir(class_folder_path) 
                if os.path.isfile(os.path.join(class_folder_path, file))
            ])
        else:
            print(f"Warning: Folder '{class_folder_path}' does not exist.")

totals = {"NORMAL": 0, "PNEUMONIA": 0, "Total": 0}
for folder, counts in class_counts.items():
    for class_name, count in counts.items():
        totals[class_name] += count
        totals["Total"] += count

df = pd.DataFrame(class_counts).T
df["Total"] = df["NORMAL"] + df["PNEUMONIA"]
totals_df = pd.DataFrame([totals], index=["Total"])

result_df = pd.concat([df, totals_df])

print(result_df)


       NORMAL  PNEUMONIA  Total
train    1258       3427   4685
test      167        418    585
val       158        428    586
Total    1583       4273   5856


In [8]:
import os
from PIL import Image
import torch
from torchvision import transforms
from tqdm import tqdm  # For showing progress bar

# Replace this with the path to your dataset folder
main_folder_path = '../chest_xray'

# Define image transform to convert to tensor
transform = transforms.Compose([
    transforms.ToTensor()  # Converts to tensor and scales to [0, 1]
])

# Initialize variables to compute mean and std
sum_pixel_values = 0.0
sum_squared_pixel_values = 0.0
total_pixels = 0

# Loop through all subfolders and images
for folder in ["train", "test", "val"]:
    folder_path = os.path.join(main_folder_path, folder)
    
    for class_name in ["NORMAL", "PNEUMONIA"]:
        class_folder_path = os.path.join(folder_path, class_name)
        
        if os.path.exists(class_folder_path):
            for image_name in tqdm(os.listdir(class_folder_path), desc=f"Processing {folder}/{class_name}"):
                image_path = os.path.join(class_folder_path, image_name)
                
                # Open the image
                with Image.open(image_path) as img:
                    # Convert to tensor
                    img_tensor = transform(img)
                    
                    # Accumulate pixel values and squared pixel values
                    sum_pixel_values += img_tensor.sum()
                    sum_squared_pixel_values += (img_tensor ** 2).sum()
                    total_pixels += img_tensor.numel()  # Total number of pixels

# Compute mean and std
mean = sum_pixel_values / total_pixels
std = (sum_squared_pixel_values / total_pixels - mean ** 2).sqrt()

# Print the results
print(f"Mean: {mean.item()}")
print(f"Standard Deviation: {std.item()}")


Processing train/NORMAL: 100%|██████████| 1258/1258 [00:13<00:00, 95.72it/s] 
Processing train/PNEUMONIA: 100%|██████████| 3427/3427 [00:11<00:00, 286.03it/s]
Processing test/NORMAL: 100%|██████████| 167/167 [00:01<00:00, 97.65it/s] 
Processing test/PNEUMONIA: 100%|██████████| 418/418 [00:01<00:00, 262.57it/s]
Processing val/NORMAL: 100%|██████████| 158/158 [00:01<00:00, 108.17it/s]
Processing val/PNEUMONIA: 100%|██████████| 428/428 [00:01<00:00, 298.95it/s]

Mean: 0.488149493932724
Standard Deviation: 0.24423697590827942



