In [1]:
from google.colab import userdata
import os

# Get the GitHub token from Colab Secrets
github_token = userdata.get('GITHUB_TOKEN')

# Replace with your GitHub username and repository name
github_username = 'zainakhalil'
repository_name = 'Ursa-Space-1A'

# Construct the clone URL with the token
clone_url = f'https://{github_token}@github.com/{github_username}/{repository_name}.git'

# Clone the repository
!git clone {clone_url}

fatal: destination path 'Ursa-Space-1A' already exists and is not an empty directory.


### 1. Load and preprocess the SAR imagery data

This step involves loading the SAR imagery data, which is expected to be in a format suitable for image processing. We'll assume the data is organized into training, validation, and test sets, and that each sample includes both HH and HV polarization channels. The data will be preprocessed by normalizing the pixel values and resizing the images to a consistent size.

In [2]:
import numpy as np
import tensorflow as tf
import pandas as pd
import ast # Import ast for literal_eval (keeping for now but might not be needed)

# Define the image size
IMG_HEIGHT = 75 # Based on the sample data dimensions
IMG_WIDTH = 75  # Based on the sample data dimensions

# Function to parse the string representation of the array and convert to numpy
def parse_array_string(array_string):
    try:
        # Remove brackets and split by any whitespace that separates numbers
        # Using a regex to split by one or more whitespace characters
        import re
        numbers_str_list = re.split(r'\s+', array_string.strip().replace('[', '').replace(']', '').replace('...', '')) # Remove brackets and '...', split by one or more spaces

        # Convert to float, ignoring any resulting empty strings from split
        all_numbers = [float(num) for num in numbers_str_list if num]

        # Check if the total number of elements is correct for reshaping
        expected_elements = IMG_HEIGHT * IMG_WIDTH
        if len(all_numbers) == expected_elements:
            # Reshape the flattened list of numbers into the expected 2D array
            return np.array(all_numbers).reshape(IMG_HEIGHT, IMG_WIDTH)
        else:
            print(f"Skipping band during parsing due to incorrect total number of elements: {len(all_numbers)} instead of {expected_elements}")
            return np.array([]) # Return an empty array if the total number of elements is incorrect
    except Exception as e:
        print(f"Error parsing array string: {e} for string: {array_string[:100]}...") # Print error and a snippet of the string
        return np.array([]) # Return empty array on parsing error


# Define the paths to your CSV files
train_x_path = '/content/Ursa-Space-1A/Datasets/X_train.csv'
train_y_path = '/content/Ursa-Space-1A/Datasets/y_train.csv'
val_x_path = '/content/Ursa-Space-1A/Datasets/X_val.csv'
val_y_path = '/content/Ursa-Space-1A/Datasets/y_val.csv'
# Assuming you also have test data in CSVs
# test_x_path = '/content/Ursa-Space-1A/Datasets/X_test.csv'
# test_y_path = '/content/Ursa-Space-1A/Datasets/y_test.csv'


# Load data from CSV files
try:
    X_train_df = pd.read_csv(train_x_path)
    y_train_df = pd.read_csv(train_y_path)
    X_val_df = pd.read_csv(val_x_path)
    y_val_df = pd.read_csv(val_y_path) # Corrected typo here
    # X_test_df = pd.read_csv(test_x_path)
    # y_test_df = pd.read_csv(y_test_path) # Corrected typo here

    print("Training data loaded successfully.")
    print("X_train shape:", X_train_df.shape)
    print("y_train shape:", y_train_df.shape)
    print("X_val shape:", X_val_df.shape)
    print("y_val shape:", y_val_df.shape)


    # Process the image data and labels
    def process_data(X_df, y_df):
        # Parse the string arrays and combine channels
        images = []
        # Keep track of the original indices of the rows that are successfully processed
        processed_indices = []
        for index, row in X_df.iterrows():
            band_1_array = parse_array_string(row['band_1'])
            band_2_array = parse_array_string(row['band_2'])

            # print(f"Row {index}: band_1_array shape: {band_1_array.shape}, band_2_array shape: {band_2_array.shape}")


            # Check if both bands were parsed correctly and have the expected dimensions
            if band_1_array.shape == (IMG_HEIGHT, IMG_WIDTH) and band_2_array.shape == (IMG_HEIGHT, IMG_WIDTH):
                 # Normalize and combine channels
                 # Assuming pixel values are not necessarily 0-255,
                 # using a simpler normalization or leaving as is for now.
                 # You might need to adjust normalization based on your data range.
                 combined_img_array = np.stack([band_1_array, band_2_array], axis=-1) # Removed / 255.0
                 images.append(combined_img_array)
                 processed_indices.append(index) # Store the original index of the processed row
            else:
                 # The parse_array_string function will print a message if a band is skipped
                 # print(f"Skipping row {index} during process_data due to incorrect band dimensions after parsing.")
                 pass # Suppress this print to reduce output verbosity and focus on parsing errors


        # Convert list of images to numpy array
        images = np.array(images)

        # Extract labels for the rows that were successfully processed
        labels = y_df.loc[processed_indices, 'is_iceberg'].values # Assuming 'is_iceberg' is the label column

        return images, labels

    print("Processing training data...")
    train_data, train_labels = process_data(X_train_df, y_train_df)
    print("Training data processed.")
    print("Processed training data shape:", train_data.shape)
    print("Processed training labels shape:", train_labels.shape)


    print("Processing validation data...")
    validation_data, validation_labels = process_data(X_val_df, y_val_df)
    print("Validation data processed.")
    print("Processed validation data shape:", validation_data.shape)
    print("Processed validation labels shape:", validation_labels.shape)

    # If you have test data, process it similarly:
    # print("Processing test data...")
    # test_data, test_labels = process_data(X_test_df, y_test_df)
    # print("Test data processed.")
    # print("Processed test data shape:", test_data.shape)
    # print("Processed test labels shape:", test_data.shape)


except FileNotFoundError as e:
    print(f"Error loading data: {e}. Please check the file paths.")
except Exception as e:
    print(f"An error occurred while loading data: {e}")


# Now train_data, train_labels, validation_data, and validation_labels are ready for model training.

Training data loaded successfully.
X_train shape: (1176, 4)
y_train shape: (1176, 1)
X_val shape: (295, 4)
y_val shape: (295, 1)
Processing training data...
Skipping band during parsing due to incorrect total number of elements: 36 instead of 5625
Skipping band during parsing due to incorrect total number of elements: 36 instead of 5625
Skipping band during parsing due to incorrect total number of elements: 36 instead of 5625
Skipping band during parsing due to incorrect total number of elements: 36 instead of 5625
Skipping band during parsing due to incorrect total number of elements: 36 instead of 5625
Skipping band during parsing due to incorrect total number of elements: 36 instead of 5625
Skipping band during parsing due to incorrect total number of elements: 36 instead of 5625
Skipping band during parsing due to incorrect total number of elements: 36 instead of 5625
Skipping band during parsing due to incorrect total number of elements: 36 instead of 5625
Skipping band during par

### 2. Build a simple CNN model

Here, we'll define a basic Convolutional Neural Network (CNN) model using TensorFlow/Keras. This model will take the combined HH and HV channel images as input.

In [3]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense

# Define the number of classes (vessels and icebergs)
NUM_CLASSES = 2 # Assuming binary classification

# Build the CNN model
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(IMG_HEIGHT, IMG_WIDTH, 2)), # Input shape includes 2 channels
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(128, activation='relu'),
    Dense(NUM_CLASSES, activation='sigmoid') # Use 'sigmoid' for binary classification
])

model.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


### 3. Compile the model

In this step, we'll compile the CNN model by specifying the optimizer, loss function, and metrics. For binary classification, 'binary_crossentropy' is a common loss function, and 'adam' is a popular optimizer. We'll also monitor accuracy.

In [4]:
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

### 4. Train the CNN model

Now, we'll train the model using the training and validation datasets. The `fit()` function will be used for this purpose. You'll need to replace the placeholder data with your actual loaded and preprocessed data.

In [5]:
# Navigate to your repository directory
# Replace 'Ursa-Space-1A' with the actual name of your cloned directory if it's different
%cd /content/Ursa-Space-1A/

/content/Ursa-Space-1A


In [15]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [18]:
# Add all changes
# You can also specify individual files like git add your_notebook_name.ipynb
!git add .

In [None]:
# Replace 'Your_Notebook_Name.ipynb' with the actual name of your notebook file
!cp "/content/drive/MyDrive/Colab Notebooks/Tripynb" "/content/Ursa-Space-1A/"

In [7]:
# Commit your changes
!git commit -m "Train Simple CNN"

On branch main
Your branch is up to date with 'origin/main'.

nothing to commit, working tree clean


In [8]:
!git config --global user.email "zkhal4@uic.edu"
!git config --global user.name "zainakhalil"

In [9]:
# Push to the remote repository
# 'origin' is the default remote name, and 'main' or 'master' is typically the branch name
!git push origin main
# If your branch is named 'master', use:
# !git push origin master

Everything up-to-date


In [10]:
# Placeholder for training the model (replace with your actual training data)
print("Shape of train_data:", train_data.shape)
print("Shape of train_labels:", train_labels.shape)
print("Shape of validation_data:", validation_data.shape)
print("Shape of validation_labels:", validation_labels.shape)

history = model.fit(train_data, train_labels,
                    epochs=10, # You can adjust the number of epochs
                    validation_data=(validation_data, validation_labels))

Shape of train_data: (0,)
Shape of train_labels: (0,)
Shape of validation_data: (0,)
Shape of validation_labels: (0,)
Epoch 1/10


ValueError: Exception encountered when calling Sequential.call().

[1mInvalid input shape for input Tensor("data:0", shape=(32,), dtype=float32). Expected shape (None, 75, 75, 2), but input has incompatible shape (32,)[0m

Arguments received by Sequential.call():
  • inputs=tf.Tensor(shape=(32,), dtype=float32)
  • training=True
  • mask=None
  • kwargs=<class 'inspect._empty'>

### 5. Evaluate the model

After training, we'll evaluate the model's performance on the test dataset to see how well it generalizes to unseen data.

In [None]:
# Placeholder for evaluating the model (replace with your actual test data)
# loss, accuracy = model.evaluate(test_data, test_labels)
# print(f'Test loss: {loss}')
# print(f'Test accuracy: {accuracy}')

### 6. Make predictions

Once the model is evaluated, you can use it to make predictions on new SAR imagery.

In [None]:
# Placeholder for making predictions (replace with your actual new data)
# predictions = model.predict(new_data)
# print(predictions)

### 7. Add comments

Comments have been added to the code to explain each step and the purpose of different parts of the code.

### 8. Finish task

This concludes the initial setup for training a simple CNN for SAR imagery classification. You can now run the code, load your data, train the model, and experiment with different architectures or fine-tuning techniques.