In [None]:
import glob
import sqlite3

import numpy as np
import pandas as pd
import tensorflow as tf
from PIL import Image
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout

In [None]:
# Constants
RANDOM_STATE = 32
PROCESSED_IMAGES_DIR = "data/images_processed/"

Import the data and split for testing and training

In [None]:
# load from SQLite database?
connection = sqlite3.connect("data/galaxy_data.sqlite")
df_import = pd.read_sql("SELECT * from galaxy_data", connection)
connection.close()

# keep only needed values
stratify_data = df_import["class_reduced"].values.to_list()
y_output_data = df_import.drop(["objid", "sample", "asset_id", "dr7objid", "ra", "dec", "gz2_class", "class_reduced"], axis=1)
y_output_data.shape

In [None]:
# load the images to a numpy array
image_files = glob.glob(PROCESSED_IMAGES_DIR + "*.jpg")

images = []
for img_path in image_files:
    img = Image.open(img_path)
    # images should already be the correct size and grayscale

    # Convert the image to a NumPy array
    img_array = np.array(img)

    # Normalize pixel values to the range [0, 1]
    img_array = img_array / 255.0

    images.append(img_array)

X_images_array = np.array(images)
X_images_array.shape

In [None]:
# split data
X_train, X_test, y_train, y_test = train_test_split(X_images_array,
                                                    y_output_data,
                                                    random_state=RANDOM_STATE,
                                                    stratify=stratify_data)

## Create the Model
- Convolution Layers
- Pooling
- Dropout
- Dens Layers
- 37 outputs

In [None]:
IMG_SIZE = X_train[0].shape[0]
INPUT_SHAPE = (IMG_SIZE, IMG_SIZE, 1)

# Create a sequential model
model = Sequential()

# Add convolution layers
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=INPUT_SHAPE))
model.add(MaxPooling2D((2, 2)))

model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))

model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))

# Flatten the output from convolution layers
model.add(Flatten())

# Add dense layers
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.2, seed=RANDOM_STATE))

model.add(Dense(64, activation='relu'))

# Add the output layer with 37 units (for 37 classes)
model.add(Dense(37, activation='softmax'))

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Print the model summary
model.summary()

In [None]:
# Create weight function/array/list to give weight to rare categories
class_weights = {
    0: 1,   # t01_smooth
    1: 1,   # t01_features
    2: 1,   # t01_star_artifact
    3: 1,   # t02_edge_on_yes
    4: 1,   # t02_edge_on_no
    5: 1,   # t03_bar_yes
    6: 1,   # t03_bar_no
    7: 1,   # t04_spiral_yes
    8: 1,   # t04_spiral_no
    9: 1,   # t05_bulge_prominence_no_bulge
    10: 1,  # t05_bulge_prominence_just
    11: 1,  # t05_bulge_prominence_obvious
    12: 1,  # t05_bulge_prominence_dominant
    13: 1,  # t06_odd_yes
    14: 1,  # t06_odd_no
    15: 1,  # t07_rounded_completely_round
    16: 1,  # t07_rounded_in_between
    17: 1,  # t07_rounded_cigar_shaped
    18: 1,  # t08_odd_feature_ring
    19: 1,  # t08_odd_feature_lens_or_arc
    20: 1,  # t08_odd_feature_disturbed
    21: 1,  # t08_odd_feature_irregular
    22: 1,  # t08_odd_feature_other
    23: 1,  # t08_odd_feature_merger
    24: 1,  # t08_odd_feature_dust_lane
    25: 1,  # t09_bulge_shape_rounded
    26: 1,  # t09_bulge_shape_boxy
    27: 1,  # t09_bulge_shape_no_bulge
    28: 1,  # t10_arms_winding_tight
    29: 1,  # t10_arms_winding_medium
    30: 1,  # t10_arms_winding_loose
    31: 1,  # t11_arms_number_1
    32: 1,  # t11_arms_number_2
    33: 1,  # t11_arms_number_3
    34: 1,  # t11_arms_number_4
    35: 1,  # t11_arms_number_more_than_4
    36: 1,  # t11_arms_number_cant_tell
}

In [None]:
# Train the model
model.fit(X_train,
          y_train,
          class_weight=class_weights,
          epochs=10,
          batch_size=1_000)