In [None]:
import os
from PIL import Image
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
class DataLoader:
    def __init__(self, data_path, img_size=(64, 64), color_mode="rgb"):
        #store the path, image size, and color mode
        self.data_path = data_path
        self.img_size = img_size
        self.color_mode = color_mode
        self.images = []
        self.filenames = []
        self.original_sizes = []

    def load_images(self):
        #go through each file in the folder
        for filename in os.listdir(self.data_path):
            file_path = os.path.join(self.data_path, filename)
            try:
                with Image.open(file_path) as img:
                    #save the original size
                    original_width, original_height = img.size
                    self.original_sizes.append((original_width, original_height))
                    #convert image to grayscale or RGB
                    if self.color_mode == "grayscale":
                        img = img.convert("L")
                    else:
                        img = img.convert("RGB")
                    #resize the image
                    img = img.resize(self.img_size)
                    #convert image to numpy array
                    img_array = np.array(img)
                    self.images.append(img_array)
                    self.filenames.append(filename)
            except Exception as error:
                print("Skipping file:", filename, "because of error:", error)
        #convert list of images to numpy array for easier processing
        self.images = np.array(self.images)
        print(f"Loaded {len(self.images)} images.")
        return self.images, self.filenames, self.original_sizes

In [None]:
class FeatureExtractor:
    def __init__(self, images, filenames, original_sizes, thumb_width=100, thumb_height=100):
        self.images = images #store numpy arrays of images
        self.filenames = filenames
        self.original_sizes = original_sizes
        self.thumb_width = thumb_width  #added thumbnails width and height for mosaic
        self.thumb_height = thumb_height 
        
    def calculate_brightness(self, img):
        """Calculate brightness of an image (numpy array)."""
        if img.ndim == 3:  #RGB image
            grayscale = np.mean(img, axis=2)  #convert to grayscale by averaging channels
        else:  #already grayscale
            grayscale = img
        return float(np.mean(grayscale))  #return mean brightness

    
    def extract_features(self):
        features = []
        for idx, img in enumerate(self.images):
             #calculate brightness 
            brightness = self.calculate_brightness(img)
            #convert brightness to percentage (0-100 scale)
            brightness_pers = round((brightness / 255) * 100, 2)
            #calculate mean, std, and contrast
            if img.ndim == 2:  #grayscale
                mean_intensity = float(np.mean(img))
                std_intensity = float(np.std(img))
                contrast = float(np.max(img) - np.min(img))
            else:  #RGB
                #calculate mean, std, and contrast for each channel, then average
                mean_channels = [float(np.mean(img[:, :, ch])) for ch in range(3)]
                std_channels = [float(np.std(img[:, :, ch])) for ch in range(3)]
                contrast_channels = [float(np.max(img[:, :, ch]) - np.min(img[:, :, ch])) for ch in range(3)]
                mean_intensity = sum(mean_channels) / 3 
                std_intensity = sum(std_channels) / 3
                contrast = sum(contrast_channels) / 3
            #get original image size and aspect ratio
            original_width, original_height = self.original_sizes[idx]
            aspect_ratio = original_width / original_height

            #convert to grayscale for area and centroid calculation
            if img.ndim == 3:
                img_gray = np.mean(img, axis=2)
            else:
                img_gray = img
            #Area: count of pixels above a brightness threshold
            threshold = 30
            area = int(np.sum(img_gray > threshold))
            #Centroid: center of bright region
            mask = img_gray > threshold
            if np.sum(mask) == 0:
                centroid_x, centroid_y = None, None
            else:
                rows, cols = np.where(mask)  #rows = y, cols = x
                centroid_x = float(np.mean(cols))
                centroid_y = float(np.mean(rows))
            #store all features in a dictionary
            feature_dict = {
                "filename": self.filenames[idx],
                "brightness": brightness,
                "brightness (%)": brightness_pers,
                "mean_intensity": mean_intensity,
                "std_intensity": std_intensity,
                "contrast": contrast,
                "aspect_ratio": aspect_ratio,
                "area": area,
                "centroid_x": centroid_x,
                "centroid_y": centroid_y,
            }
            features.append(feature_dict)
            
        #create a DataFrame from the list of feature dictionaries
        features_df = pd.DataFrame(features)
        #add area interpretation
        features_df["area_interpretation"] = features_df["area"].apply(self.interpret_area)
        #add centroid interpretation
        features_df["centroid_interpretation"] = features_df.apply(self.interpret_centroid, axis=1)
        self.features_df = features_df  #save features for use in build_mosaic
        return features_df

    def interpret_area(self, area):
        #give a simple interpretation of the area value
        if area > 1000:
            return "Large area (may be a big, bright, or nearby galaxy)"
        elif area > 300:
            return "Moderate area (may be a typical galaxy)"
        else:
            return "Small area (may be a compact, faint, or distant galaxy)"

    def interpret_centroid(self, row, img_size=(64, 64)):
        #calculate the center of the image
        center_x = img_size[0] / 2
        center_y = img_size[1] / 2
        #if no centroid, return a message
        if row["centroid_x"] is None or row["centroid_y"] is None:
            return "No bright region detected"
        #calculate distance from centroid to image center
        dist = ((row["centroid_x"] - center_x) ** 2 + (row["centroid_y"] - center_y) ** 2) ** 0.5
        if dist < 5:
            return "Galaxy is well-centered in the image"
        elif dist < 15:
            return "Galaxy is slightly off-center"
        else:
            return "Galaxy is far from the image center"
        
        
    def build_mosaic(self):
        if not hasattr(self, "features_df"):
            raise ValueError("First run extract_features()")
            
        #sort images by brightness
        sorted_indices = self.features_df["brightness"].argsort()
        sorted_images = [self.images[i] for i in sorted_indices]
        
        #calculate grid size
        num_images = len(sorted_images)
        cols = int(math.sqrt(num_images))
        rows = math.ceil(num_images / cols)

        #create empty canvas
        mosaic_width = cols * self.thumb_width
        mosaic_height = rows * self.thumb_height
        mosaic = Image.new("RGB", (mosaic_width, mosaic_height))

       #paste images into the mosaic
        for i, img_array in enumerate(sorted_images):
            #convert numpy array to PIL Image and resize
            img_pil = Image.fromarray(img_array).resize((self.thumb_width, self.thumb_height))
            x = (i % cols) * self.thumb_width
            y = (i // cols) * self.thumb_height
            mosaic.paste(img_pil, (x, y))

        return mosaic

In [None]:
class DataAnalyzer:
    def __init__(self, features_df):
        self.features_df = features_df

    def analyze_features(self):
        #use the mean and std columns for analysis
        mean_list = self.features_df["mean_intensity"].tolist()
        std_list = self.features_df["std_intensity"].tolist()
        filenames = self.features_df["filename"].tolist()
        #prepare lists for results
        apparent_luminosity = []
        interpretation = []
        type_guess = []
        for mean, std in zip(mean_list, std_list):
            #apparent luminosity is just the mean intensity
            apparent_luminosity.append(mean)
            #simple interpretation
            if mean < 20:
                interpretation.append("Dim galaxy")
            elif mean < 60:
                interpretation.append("Moderately bright galaxy")
            else:
                interpretation.append("Bright galaxy")
            #guess type based on std and mean
            if std < 35 and mean > 90:
                type_guess.append("Likely Elliptical")
            elif std >= 35 and mean <= 90:
                type_guess.append("Likely Spiral")
            else:
                type_guess.append("Uncertain/Other")
        #build a DataFrame for the results
        result_df = pd.DataFrame({
            "filename": filenames,
            "apparent_luminosity": apparent_luminosity,
            "std_overall": std_list,
            "type_guess": type_guess,
            "interpretation": interpretation
        })
        return result_df

In [None]:
class Visualizer:
    def __init__(self, features_df, analysis_df):
        self.features_df = features_df
        self.analysis_df = analysis_df

    def plot_histogram_mean_intensity(self):
        plt.figure(figsize=(8, 5))
        plt.hist(self.features_df["mean_intensity"], bins=20, color="skyblue", edgecolor="black")
        plt.title("Histogram of Mean Intensity")
        plt.xlabel("Mean Intensity")
        plt.ylabel("Number of Images")
        plt.grid(True)
        plt.show()

    def plot_scatter_area_vs_luminosity(self):
        plt.figure(figsize=(8, 5))
        plt.scatter(self.features_df["area"], self.analysis_df["apparent_luminosity"], alpha=0.7)
        plt.title("Area vs. Apparent Luminosity")
        plt.xlabel("Area (pixels above threshold)")
        plt.ylabel("Apparent Luminosity (mean intensity)")
        plt.grid(True)
        plt.show()

    def plot_boxplot_std_by_type(self):
        plt.figure(figsize=(8, 5))
        sns.boxplot(x=self.analysis_df["type_guess"], y=self.analysis_df["std_overall"])
        plt.title("Boxplot of Std. Deviation by Galaxy Type Guess")
        plt.xlabel("Type Guess")
        plt.ylabel("Standard Deviation (overall)")
        plt.grid(True)
        plt.show()

    def plot_bar_area_interpretation(self):
        plt.figure(figsize=(8, 5))
        area_counts = self.features_df["area_interpretation"].value_counts()
        area_counts.plot(kind="bar", color="lightgreen", edgecolor="black")
        plt.title("Count of Area Interpretation Categories")
        plt.xlabel("Area Interpretation")
        plt.ylabel("Number of Images")
        plt.grid(axis="y")
        plt.show()

    def plot_line_mean_intensity(self):
        plt.figure(figsize=(10, 5))
        plt.plot(self.features_df["mean_intensity"].values, marker="o", linestyle="-", color="purple")
        plt.title("Mean Intensity Across Images (by index)")
        plt.xlabel("Image Index")
        plt.ylabel("Mean Intensity")
        plt.grid(True)
        plt.show()
    
    def display_mosaic(self, mosaic):
        plt.figure(figsize=(15, 15 * mosaic.height / mosaic.width))
        plt.imshow(mosaic)
        plt.axis("off")
        plt.title("Galaxy Photomosaic Sorted by Brightness")
        plt.show()
        
    def show_all(self, mosaic):
        self.plot_histogram_mean_intensity()
        self.plot_scatter_area_vs_luminosity()
        self.plot_boxplot_std_by_type()
        self.plot_bar_area_interpretation()
        self.plot_line_mean_intensity()
        self.display_mosaic(mosaic)  #call display_mosaic here

if __name__ == "__main__":
    #data path here
    data_path = r"C:\...galaxy_images"
    #create a DataLoader object
    loader = DataLoader(data_path, img_size=(64, 64), color_mode="rgb")
    #load images
    images, filenames, original_sizes = loader.load_images()
    #create a FeatureExtractor object
    extractor = FeatureExtractor(images, filenames, original_sizes, thumb_width=100, thumb_height=100)
    #extract features
    features_df = extractor.extract_features()
    print("Feature columns:", features_df.columns.tolist())
    #create a DataAnalyzer object
    analyzer = DataAnalyzer(features_df)
    #analyze features
    result_df = analyzer.analyze_features()
    print("Result columns:", result_df.columns.tolist())
    #merge all features on "filename"
    all_features_df = features_df.merge(result_df, on="filename", how="outer")
    #sort by brightness in descending order
    all_features_df = all_features_df.sort_values(by="brightness", ascending=True)
    display(all_features_df)
    
    #build the mosaic using FeatureExtractor"s built-in method
    mosaic = extractor.build_mosaic()
    #sisualize the data
    visualizer = Visualizer(features_df, result_df)
    visualizer.show_all(mosaic) #pass the mosaic object here

In [None]:
#©Vardan Grigoryan