## Extracting Image Features

We are extracting Texture Features and Local Features here. Below are some information about those features : 

Haralick Texture Features: These include energy, contrast, correlation, and entropy, which describe the texture of an image.

Local Features:SIFT (Scale-Invariant Feature Transform) and SURF (Speeded-Up Robust Features): Detect and describe distinctive local features in images.

In [49]:
import numpy as np # linear algebra
import pandas as pd
pd.set_option('display.max_rows', None)  # Display all rows
pd.set_option('display.max_columns', None)  # Display all columns
import json
import cv2
import numpy as np
import skimage.feature as skf
import skimage.filters as skg
from skimage.measure import shannon_entropy
from scipy.stats import entropy
from sklearn.cluster import KMeans


### Extracting texture features 



In [None]:
def extract_texture_features(image_path):
    # Load the image
    img = cv2.imread(image_path)

    # Convert the image to grayscale
    img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    
    haralick = skf.graycomatrix(img_gray, [1], [0], 256, symmetric=True, normed=True) 
    energy = skf.graycoprops(haralick, 'energy')[0, 0] 
    contrast = skf.graycoprops(haralick, 'contrast')[0, 0] 
    correlation = skf.graycoprops(haralick, 'correlation')[0, 0] 
    entropy = shannon_entropy(img_gray)

    mean = np.mean(img)
    std_dev = np.std(img)

    return energy, contrast, correlation, entropy, mean, std_dev



# looping through the training images
# Create an empty dataframe
df_train_img = pd.DataFrame(columns=['PetID', 'Energy', 'Contrast', 'Correlation', 'Entropy', 'Mean', 'Std_Dev'])

# Loop through each image in the folder
for filename in os.listdir(train_img_path):
    if filename.endswith('.jpg'):
        # Extract the texture features
        energy, contrast, correlation, entropy, mean, std_dev = extract_texture_features(os.path.join(train_img_path, filename))
        
        # Add the features to the dataframe
        df_train_img = pd.concat([df_train_img, pd.DataFrame({'PetID': [filename[:-4]], 'Energy': [energy], 'Contrast': [contrast], 'Correlation': [correlation], 'Entropy': [entropy], 'Mean': [mean], 'Std_Dev': [std_dev]})])
        
df_train_img.to_csv('df_train_img_texture.csv')

# looping through the testing images
# Create an empty dataframe
df_test_img = pd.DataFrame(columns=['PetID', 'Energy', 'Contrast', 'Correlation', 'Entropy', 'Mean', 'Std_Dev'])

# Loop through each image in the folder
for filename in os.listdir(test_img_path):
    if filename.endswith('.jpg'):
        # Extract the texture features
        energy, contrast, correlation, entropy, mean, std_dev = extract_texture_features(os.path.join(test_img_path, filename))
        
        # Add the features to the dataframe
        df_test_img = pd.concat([df_test_img, pd.DataFrame({'PetID': [filename[:-4]], 'Energy': [energy], 'Contrast': [contrast], 'Correlation': [correlation], 'Entropy': [entropy], 'Mean': [mean], 'Std_Dev': [std_dev]})])

df_test_img.to_csv('df_test_img_texture.csv')

### Extracting local features



In [None]:
def extract_keypoints(file_path):
    # Load an image
    image = cv2.imread(file_path, cv2.IMREAD_GRAYSCALE)

    # Initialize the SIFT detector
    sift = cv2.SIFT_create()

    # Detect and compute SIFT keypoints and descriptors
    keypoints, _ = sift.detectAndCompute(image, None)

    # Return the number of keypoints
    return len(keypoints)


df_train_img['num_checkpoints'] = None  # Initialize the column

def extract_and_update(row):
    file_path = os.path.join(train_img_path, f"{row['PetID']}.jpg")
    num_keypoints = extract_keypoints(file_path)
    return num_keypoints


# Update the 'num_checkpoints' column
df_train_img['num_checkpoints'] = df_train_img.apply(extract_and_update, axis=1)
df_train_img.to_csv('train_img_keypoints.csv')

# Assuming df is your DataFrame
df_test_img['num_checkpoints'] = None  # Initialize the column

def extract_and_update(row):
    file_path = os.path.join(test_img_path, f"{row['PetID']}.jpg")
    num_keypoints = extract_keypoints(file_path)
    return num_keypoints


# Update the 'num_checkpoints' column
df_test_img['num_checkpoints'] = df_test_img.apply(extract_and_update, axis=1)
df_test_img.to_csv('test_img_keypoints.csv')


Combining the current image feature dataframes first

In [19]:
train_texture = pd.read_csv('df_train_img_texture.csv')
test_texture = pd.read_csv('df_test_img_texture.csv')
train_keypoints =  pd.read_csv('train_img_keypoints.csv')
test_keypoints = pd.read_csv('test_img_keypoints.csv')

In [20]:
test_texture.shape

(14465, 8)

In [30]:
train_texture.columns

Index(['Unnamed: 0', 'PetID', 'Energy', 'Contrast', 'Correlation', 'Entropy',
       'Mean', 'Std_Dev'],
      dtype='object')

In [31]:
train_keypoints.columns

Index(['Unnamed: 0', 'PetID', 'Energy', 'Contrast', 'Correlation', 'Entropy',
       'Mean', 'Std_Dev', 'mean_h', 'mean_s', 'mean_v', 'variance_h',
       'variance_s', 'variance_v', 'num_checkpoints'],
      dtype='object')

In [32]:
final_keypoints = pd.concat([train_keypoints, test_keypoints], ignore_index=True, sort=False)
final_keypoints.head()

Unnamed: 0.1,Unnamed: 0,PetID,Energy,Contrast,Correlation,Entropy,Mean,Std_Dev,mean_h,mean_s,mean_v,variance_h,variance_s,variance_v,num_checkpoints
0,0,cf8d949f9-2,0.035198,205.034612,0.957454,7.365074,133.539156,52.617205,,,,,,,371
1,0,61d4dc56b-12,0.037368,96.525711,0.983005,7.061479,88.483483,56.524143,,,,,,,187
2,0,53923463d-9,0.036733,29.713199,0.995834,6.920583,88.724871,60.340616,,,,,,,251
3,0,0173c456c-8,0.02071,108.01062,0.984549,7.500592,87.555181,61.033249,,,,,,,722
4,0,fa7c7d1be-3,0.023931,106.188246,0.982613,7.396937,121.323948,56.922695,,,,,,,486


In [33]:
# Columns to drop
columns_to_drop = ['mean_h', 'mean_s', 'mean_v', 'variance_h', 'variance_s', 'variance_v', 'Unnamed: 0']

# Drop specified columns
final_keypoints = final_keypoints.drop(columns=columns_to_drop, errors='ignore')

# Print the resulting DataFrame
final_keypoints.head()


Unnamed: 0,PetID,Energy,Contrast,Correlation,Entropy,Mean,Std_Dev,num_checkpoints
0,cf8d949f9-2,0.035198,205.034612,0.957454,7.365074,133.539156,52.617205,371
1,61d4dc56b-12,0.037368,96.525711,0.983005,7.061479,88.483483,56.524143,187
2,53923463d-9,0.036733,29.713199,0.995834,6.920583,88.724871,60.340616,251
3,0173c456c-8,0.02071,108.01062,0.984549,7.500592,87.555181,61.033249,722
4,fa7c7d1be-3,0.023931,106.188246,0.982613,7.396937,121.323948,56.922695,486


In [51]:
final_keypoints.to_csv('image_features.csv')