In [27]:
import pandas as pd
df = pd.read_excel('Dataset (2).xlsx')


In [28]:
# Filter out 'unknown' race instances
df = df[~df['Race'].isin(['Unknown'])]
del df['Gender']
print(df)

                     Age       Race  CatsInHouse  \
0       Less than 1 year     Birman            3   
1       Less than 1 year     Birman            1   
2             2-10 years   European            4   
3       Less than 1 year   European            1   
4              1-2 years     Birman            2   
...                  ...        ...          ...   
3138          2-10 years    Persian            1   
3139    Less than 1 year  MaineCoon            3   
3140  More than 10 years      Other            1   
3141    Less than 1 year     Bengal            1   
3142    Less than 1 year     Bengal            5   

                            HousingType       Zone  TimeOutside  \
0             Apartment without balcony      Urban            0   
1     Apartment with balcony or terrace      Urban            0   
2                House in a subdivision      Urban            0   
3                House in a subdivision      Rural            2   
4                 Individual house zone 

In [29]:
# Calculate the mean of numeric values
numeric_mask = df['NaturalAreasAbundance'] != 'Unknown'
mean_value = pd.to_numeric(df[numeric_mask]['NaturalAreasAbundance']).mean()

# Replace 'Unknown' with the mean value and round to nearest integer
df['NaturalAreasAbundance'] = df['NaturalAreasAbundance'].replace('Unknown', mean_value)
df['NaturalAreasAbundance'] = df['NaturalAreasAbundance'].astype(float).round().astype(int)
print(df.info())

<class 'pandas.core.frame.DataFrame'>
Index: 3063 entries, 0 to 3142
Data columns (total 25 columns):
 #   Column                 Non-Null Count  Dtype 
---  ------                 --------------  ----- 
 0   Age                    3063 non-null   object
 1   Race                   3063 non-null   object
 2   CatsInHouse            3063 non-null   int64 
 3   HousingType            3063 non-null   object
 4   Zone                   3063 non-null   object
 5   TimeOutside            3063 non-null   int64 
 6   TimeWithOwner          3063 non-null   int64 
 7   Shy                    3063 non-null   int64 
 8   Calm                   3063 non-null   int64 
 9   Skittish               3063 non-null   int64 
 10  Intelligent            3063 non-null   int64 
 11  Vigilant               3063 non-null   int64 
 12  Tenacious              3063 non-null   int64 
 13  Affectionate           3063 non-null   int64 
 14  Friendly               3063 non-null   int64 
 15  Loner                  306

In [30]:
from sklearn.model_selection import train_test_split
import pandas as pd

def create_stratified_split(df, stratify_col='Race', test_size=0.1, random_state=42):
    # Create the train/test split while maintaining the same proportions of Race
    train_df, test_df = train_test_split(
        df,
        test_size=test_size,
        random_state=random_state,
        stratify=df[stratify_col]
    )
    
    # Verify the proportions
    print("\nRace proportions in original dataset:")
    print(df[stratify_col].value_counts(normalize=True))
    
    print("\nRace proportions in larger split (90%):")
    print(train_df[stratify_col].value_counts(normalize=True))
    
    print("\nRace proportions in smaller split (10%):")
    print(test_df[stratify_col].value_counts(normalize=True))
    
    return train_df, test_df

df, test_df = create_stratified_split(df)

# Print the sizes of the splits
print(f"\nSplit sizes:")
print(f"Larger split (90%): {len(df)} rows")
print(f"Smaller split (10%): {len(test_df)} rows")


Race proportions in original dataset:
Race
European            0.333660
NoBreed             0.157689
Bengal              0.078028
Ragdoll             0.070846
MaineCoon           0.064643
Birman              0.062684
Persian             0.062684
BritishShorthair    0.054195
Other               0.044074
Sphynx              0.024812
Siamese             0.018936
Chartreux           0.010121
TurkishAngora       0.009141
Savannah            0.008488
Name: proportion, dtype: float64

Race proportions in larger split (90%):
Race
European            0.333817
NoBreed             0.157837
Bengal              0.078012
Ragdoll             0.070755
MaineCoon           0.064586
Persian             0.062772
Birman              0.062772
BritishShorthair    0.054064
Other               0.044267
Sphynx              0.024673
Siamese             0.018868
Chartreux           0.010160
TurkishAngora       0.009071
Savannah            0.008345
Name: proportion, dtype: float64

Race proportions in smaller spl

In [31]:
import pandas as pd

def transform_dataset(df):
    """
    Transform the dataset by:
    1. Mapping age values to numeric
    2. One-hot encoding HousingType, Zone, and Race

    Args:
    df (pandas.DataFrame): Input DataFrame

    Returns:
    pandas.DataFrame: Transformed DataFrame
    """
    # Create a copy of the DataFrame
    df_transformed = df.copy()

    # Age mapping
    age_mapping = {
        'Less than 1 year': 0.5,
        '1-2 years': 1.5,
        '2-10 years': 6,
        'More than 10 years': 12
    }

    # Apply age mapping
    df_transformed['Age'] = df_transformed['Age'].map(age_mapping)

    # One-hot encode categorical variables
    categorical_columns = ['HousingType', 'Zone', 'Race']

    # Create one-hot encoded columns
    for column in categorical_columns:
        one_hot = pd.get_dummies(df_transformed[column], prefix=column)

        # Add one-hot encoded columns to the transformed DataFrame
        df_transformed = pd.concat([df_transformed, one_hot], axis=1)

        # Drop the original categorical column
        df_transformed = df_transformed.drop(column, axis=1)

    return df_transformed

# Example usage:
df = transform_dataset(df)
test_df = transform_dataset(test_df)

df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 2756 entries, 1053 to 1321
Data columns (total 43 columns):
 #   Column                                         Non-Null Count  Dtype  
---  ------                                         --------------  -----  
 0   Age                                            2756 non-null   float64
 1   CatsInHouse                                    2756 non-null   int64  
 2   TimeOutside                                    2756 non-null   int64  
 3   TimeWithOwner                                  2756 non-null   int64  
 4   Shy                                            2756 non-null   int64  
 5   Calm                                           2756 non-null   int64  
 6   Skittish                                       2756 non-null   int64  
 7   Intelligent                                    2756 non-null   int64  
 8   Vigilant                                       2756 non-null   int64  
 9   Tenacious                                      2756 no

In [32]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Define a distinct color palette for cat breeds
DISTINCT_COLORS = {
    'Bengal': '#FF0000',         # Red
    'Birman': '#00FF00',         # Green
    'BritishShorthair': '#0000FF', # Blue
    'Chartreux': '#FFA500',      # Orange
    'European': '#800080',       # Purple
    'MaineCoon': '#FFD700',      # Gold
    'NoBreed': '#4B0082',        # Indigo
    'Other': '#808080',          # Gray
    'Persian': '#FF1493',        # Deep Pink
    'Ragdoll': '#00FFFF',        # Cyan
    'Savannah': '#FF4500',       # Orange Red
    'Siamese': '#9400D3',        # Violet
    'Sphynx': '#32CD32',         # Lime Green
    'TurkishAngora': '#FF8C00'   # Dark Orange
}

# Function to prepare the data
def prepare_data(df):
    # Separate features and target
    # Exclude all Race_ columns as they are our target
    feature_cols = [col for col in df.columns if not col.startswith('Race_')]
    X = df[feature_cols]
    
    # Create target column (single label for each cat)
    race_cols = [col for col in df.columns if col.startswith('Race_')]
    y = df[race_cols].idxmax(axis=1).str.replace('Race_', '')
    
    # Scale the features
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    
    return X_scaled, y

# Function to create PCA visualization
def create_pca_plots(X_scaled, y):
    # 2D PCA
    pca_2d = PCA(n_components=2)
    X_pca_2d = pca_2d.fit_transform(X_scaled)
    
    # 3D PCA
    pca_3d = PCA(n_components=3)
    X_pca_3d = pca_3d.fit_transform(X_scaled)
    
    # Calculate explained variance ratios
    var_ratio_2d = pca_2d.explained_variance_ratio_
    var_ratio_3d = pca_3d.explained_variance_ratio_
    
    # Create subplot with 2D and 3D PCA
    fig = make_subplots(
        rows=1, cols=2,
        specs=[[{'type': 'xy'}, {'type': 'scene'}]],
        subplot_titles=(
            f'2D PCA (Variance explained: {var_ratio_2d[0]:.1%}, {var_ratio_2d[1]:.1%})',
            f'3D PCA (Variance explained: {var_ratio_3d[0]:.1%}, {var_ratio_3d[1]:.1%}, {var_ratio_3d[2]:.1%})'
        )
    )
    
    # Create color array
    colors = [DISTINCT_COLORS[breed] for breed in y]
    
    # 2D PCA plot
    for breed in DISTINCT_COLORS.keys():
        mask = y == breed
        if np.any(mask):
            fig.add_trace(
                go.Scatter(
                    x=X_pca_2d[mask, 0],
                    y=X_pca_2d[mask, 1],
                    mode='markers',
                    marker=dict(size=8, color=DISTINCT_COLORS[breed]),
                    name=breed,
                    text=y[mask],
                    showlegend=True
                ),
                row=1, col=1
            )
    
    # 3D PCA plot
    for breed in DISTINCT_COLORS.keys():
        mask = y == breed
        if np.any(mask):
            fig.add_trace(
                go.Scatter3d(
                    x=X_pca_3d[mask, 0],
                    y=X_pca_3d[mask, 1],
                    z=X_pca_3d[mask, 2],
                    mode='markers',
                    marker=dict(size=4, color=DISTINCT_COLORS[breed]),
                    name=breed,
                    text=y[mask],
                    showlegend=False
                ),
                row=1, col=2
            )
    
    # Update layout
    fig.update_layout(
        title='PCA Visualization of Cat Breeds',
        height=800,
        width=1600,
        legend=dict(
            itemsizing='constant',
            title=dict(text='Cat Breeds'),
            bgcolor='rgba(255, 255, 255, 0.8)'
        ),
        template='plotly_white'
    )
    
    return fig

# Function to create t-SNE visualization
def create_tsne_plots(X_scaled, y):
    # 2D t-SNE
    tsne_2d = TSNE(n_components=2, random_state=42)
    X_tsne_2d = tsne_2d.fit_transform(X_scaled)
    
    # 3D t-SNE
    tsne_3d = TSNE(n_components=3, random_state=42)
    X_tsne_3d = tsne_3d.fit_transform(X_scaled)
    
    # Create subplot with 2D and 3D t-SNE
    fig = make_subplots(
        rows=1, cols=2,
        specs=[[{'type': 'xy'}, {'type': 'scene'}]],
        subplot_titles=('2D t-SNE', '3D t-SNE')
    )
    
    # 2D t-SNE plot
    for breed in DISTINCT_COLORS.keys():
        mask = y == breed
        if np.any(mask):
            fig.add_trace(
                go.Scatter(
                    x=X_tsne_2d[mask, 0],
                    y=X_tsne_2d[mask, 1],
                    mode='markers',
                    marker=dict(size=8, color=DISTINCT_COLORS[breed]),
                    name=breed,
                    text=y[mask],
                    showlegend=True
                ),
                row=1, col=1
            )
    
    # 3D t-SNE plot
    for breed in DISTINCT_COLORS.keys():
        mask = y == breed
        if np.any(mask):
            fig.add_trace(
                go.Scatter3d(
                    x=X_tsne_3d[mask, 0],
                    y=X_tsne_3d[mask, 1],
                    z=X_tsne_3d[mask, 2],
                    mode='markers',
                    marker=dict(size=4, color=DISTINCT_COLORS[breed]),
                    name=breed,
                    text=y[mask],
                    showlegend=False
                ),
                row=1, col=2
            )
    
    # Update layout
    fig.update_layout(
        title='t-SNE Visualization of Cat Breeds',
        height=800,
        width=1600,
        legend=dict(
            itemsizing='constant',
            title=dict(text='Cat Breeds'),
            bgcolor='rgba(255, 255, 255, 0.8)'
        ),
        template='plotly_white'
    )
    
    return fig

# Main execution
def visualize_cat_data(df):
    # Prepare the data
    X_scaled, y = prepare_data(df)
    
    # Create PCA plots
    pca_fig = create_pca_plots(X_scaled, y)
    
    # Create t-SNE plots
    tsne_fig = create_tsne_plots(X_scaled, y)
    
    return pca_fig, tsne_fig

# Example usage:
pca_fig, tsne_fig = visualize_cat_data(df)
pca_fig.show()
tsne_fig.show()

In [49]:
import pandas as pd
import numpy as np
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.regularizers import l2
from tensorflow.keras.callbacks import EarlyStopping
import tensorflow as tf

# Function to create the model with L2 regularization
def create_model(input_dim, output_dim, l2_lambda=0.005):
    model = Sequential([
        Dense(128, activation='relu', kernel_regularizer=l2(l2_lambda), input_dim=input_dim),
        Dropout(0.3),
        Dense(64, activation='relu', kernel_regularizer=l2(l2_lambda)),
        Dropout(0.2),
        Dense(64, activation='relu', kernel_regularizer=l2(l2_lambda)),
        Dense(64, activation='relu', kernel_regularizer=l2(l2_lambda)),
        Dense(64, activation='relu', kernel_regularizer=l2(l2_lambda)),

        Dense(output_dim, activation='softmax')
    ])
    
    model.compile(
        optimizer='adam',
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )
    return model

def train_and_evaluate_model(X, y, n_splits=6):
    # Convert the target variables to numerical format
    race_columns = [col for col in y.columns if col.startswith('Race_')]
    y_encoded = y[race_columns].values
    
    # Initialize StandardScaler
    scaler = StandardScaler()
    
    # Initialize StratifiedKFold
    # We'll use the argmax of y for stratification since we need a 1D array
    skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)
    
    # Initialize lists to store results
    val_accuracies = []
    test_accuracies = []
    best_model = None
    best_val_accuracy = 0
    
    # Early stopping callback
    early_stopping = EarlyStopping(
        monitor='val_accuracy',
        patience=30,
        restore_best_weights=True
    )
    
    # Perform k-fold cross-validation
    for fold, (train_idx, val_idx) in enumerate(skf.split(X, np.argmax(y_encoded, axis=1)), 1):
        print(f"\nFold {fold}/{n_splits}")
        
        # Split data
        X_train, X_val = X.iloc[train_idx], X.iloc[val_idx]
        y_train, y_val = y_encoded[train_idx], y_encoded[val_idx]
        
        # Scale features
        X_train_scaled = scaler.fit_transform(X_train)
        X_val_scaled = scaler.transform(X_val)
        
        # Create and train model
        model = create_model(X_train.shape[1], y_encoded.shape[1])
        
        history = model.fit(
            X_train_scaled, y_train,
            epochs=100,
            batch_size=32,
            validation_data=(X_val_scaled, y_val),
            callbacks=[early_stopping],
            verbose=2
        )
        
        # Evaluate model
        val_accuracy = max(history.history['val_accuracy'])
        val_accuracies.append(val_accuracy)
        
        print(f"Validation Accuracy: {val_accuracy:.4f}")
        
        # Save best model
        if val_accuracy > best_val_accuracy:
            best_val_accuracy = val_accuracy
            best_model = tf.keras.models.clone_model(model)
            best_model.set_weights(model.get_weights())
    
    print("\nCross-validation results:")
    print(f"Mean validation accuracy: {np.mean(val_accuracies):.4f}")
    print(f"Standard deviation: {np.std(val_accuracies):.4f}")
    
    return best_model, val_accuracies

# Prepare the data
def prepare_data(data):
    # Separate features and target
    race_columns = [col for col in data.columns if col.startswith('Race_')]
    X = data.drop(columns=race_columns)
    y = data[race_columns]
    
    return X, y

# Main execution
def main(data):
    # Prepare data
    X, y = prepare_data(data)
    
    # Train and evaluate model
    best_model, accuracies = train_and_evaluate_model(X, y)
    
    return best_model, accuracies

# Example usage
    # Load your data here
best_model, accuracies = main(df)


Fold 1/6
Epoch 1/100



Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.



72/72 - 4s - 51ms/step - accuracy: 0.3210 - loss: 3.5804 - val_accuracy: 0.3326 - val_loss: 3.1954
Epoch 2/100
72/72 - 0s - 3ms/step - accuracy: 0.3345 - loss: 2.9702 - val_accuracy: 0.3326 - val_loss: 2.8191
Epoch 3/100
72/72 - 0s - 3ms/step - accuracy: 0.3341 - loss: 2.6771 - val_accuracy: 0.3326 - val_loss: 2.5959
Epoch 4/100
72/72 - 0s - 3ms/step - accuracy: 0.3341 - loss: 2.4854 - val_accuracy: 0.3326 - val_loss: 2.4515
Epoch 5/100
72/72 - 0s - 3ms/step - accuracy: 0.3341 - loss: 2.3750 - val_accuracy: 0.3326 - val_loss: 2.3667
Epoch 6/100
72/72 - 0s - 3ms/step - accuracy: 0.3349 - loss: 2.2966 - val_accuracy: 0.3326 - val_loss: 2.3065
Epoch 7/100
72/72 - 0s - 3ms/step - accuracy: 0.3354 - loss: 2.2473 - val_accuracy: 0.3413 - val_loss: 2.2617
Epoch 8/100
72/72 - 0s - 3ms/step - accuracy: 0.3332 - loss: 2.2057 - val_accuracy: 0.3304 - val_loss: 2.2429
Epoch 9/100
72/72 - 0s - 3ms/step - accuracy: 0.3367 - loss: 2.1903 - val_accuracy: 0.3283 - val_loss: 2.2216
Epoch 10/100
72/72 - 


Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.



72/72 - 3s - 42ms/step - accuracy: 0.3323 - loss: 3.5739 - val_accuracy: 0.3326 - val_loss: 3.2156
Epoch 2/100
72/72 - 0s - 3ms/step - accuracy: 0.3341 - loss: 3.0467 - val_accuracy: 0.3326 - val_loss: 2.8478
Epoch 3/100
72/72 - 0s - 3ms/step - accuracy: 0.3341 - loss: 2.7463 - val_accuracy: 0.3326 - val_loss: 2.6168
Epoch 4/100
72/72 - 0s - 3ms/step - accuracy: 0.3458 - loss: 2.5471 - val_accuracy: 0.3457 - val_loss: 2.4637
Epoch 5/100
72/72 - 0s - 3ms/step - accuracy: 0.3463 - loss: 2.4225 - val_accuracy: 0.3522 - val_loss: 2.3642
Epoch 6/100
72/72 - 0s - 3ms/step - accuracy: 0.3510 - loss: 2.3245 - val_accuracy: 0.3565 - val_loss: 2.2973
Epoch 7/100
72/72 - 0s - 3ms/step - accuracy: 0.3467 - loss: 2.2670 - val_accuracy: 0.3413 - val_loss: 2.2568
Epoch 8/100
72/72 - 0s - 3ms/step - accuracy: 0.3554 - loss: 2.2105 - val_accuracy: 0.3478 - val_loss: 2.2227
Epoch 9/100
72/72 - 0s - 3ms/step - accuracy: 0.3541 - loss: 2.1899 - val_accuracy: 0.3478 - val_loss: 2.1935
Epoch 10/100
72/72 - 


Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.



72/72 - 3s - 44ms/step - accuracy: 0.2987 - loss: 3.6031 - val_accuracy: 0.3333 - val_loss: 3.1919
Epoch 2/100
72/72 - 0s - 3ms/step - accuracy: 0.3378 - loss: 3.0078 - val_accuracy: 0.3442 - val_loss: 2.8495
Epoch 3/100
72/72 - 0s - 3ms/step - accuracy: 0.3435 - loss: 2.7112 - val_accuracy: 0.3508 - val_loss: 2.6345
Epoch 4/100
72/72 - 0s - 3ms/step - accuracy: 0.3522 - loss: 2.5269 - val_accuracy: 0.3573 - val_loss: 2.4739
Epoch 5/100
72/72 - 0s - 3ms/step - accuracy: 0.3492 - loss: 2.3964 - val_accuracy: 0.3529 - val_loss: 2.3821
Epoch 6/100
72/72 - 0s - 3ms/step - accuracy: 0.3478 - loss: 2.3236 - val_accuracy: 0.3464 - val_loss: 2.3412
Epoch 7/100
72/72 - 0s - 3ms/step - accuracy: 0.3548 - loss: 2.2494 - val_accuracy: 0.3660 - val_loss: 2.2800
Epoch 8/100
72/72 - 0s - 3ms/step - accuracy: 0.3496 - loss: 2.2189 - val_accuracy: 0.3508 - val_loss: 2.2370
Epoch 9/100
72/72 - 0s - 3ms/step - accuracy: 0.3557 - loss: 2.1686 - val_accuracy: 0.3508 - val_loss: 2.2185
Epoch 10/100
72/72 - 


Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.



72/72 - 3s - 43ms/step - accuracy: 0.3235 - loss: 3.5758 - val_accuracy: 0.3333 - val_loss: 3.2396
Epoch 2/100
72/72 - 0s - 3ms/step - accuracy: 0.3339 - loss: 3.0566 - val_accuracy: 0.3333 - val_loss: 2.8673
Epoch 3/100
72/72 - 0s - 3ms/step - accuracy: 0.3335 - loss: 2.7621 - val_accuracy: 0.3333 - val_loss: 2.6479
Epoch 4/100
72/72 - 0s - 3ms/step - accuracy: 0.3418 - loss: 2.5691 - val_accuracy: 0.3420 - val_loss: 2.4919
Epoch 5/100
72/72 - 0s - 3ms/step - accuracy: 0.3500 - loss: 2.4392 - val_accuracy: 0.3529 - val_loss: 2.3841
Epoch 6/100
72/72 - 0s - 3ms/step - accuracy: 0.3518 - loss: 2.3512 - val_accuracy: 0.3377 - val_loss: 2.3145
Epoch 7/100
72/72 - 0s - 3ms/step - accuracy: 0.3566 - loss: 2.2901 - val_accuracy: 0.3399 - val_loss: 2.2727
Epoch 8/100
72/72 - 0s - 3ms/step - accuracy: 0.3579 - loss: 2.2344 - val_accuracy: 0.3442 - val_loss: 2.2312
Epoch 9/100
72/72 - 0s - 3ms/step - accuracy: 0.3587 - loss: 2.2020 - val_accuracy: 0.3508 - val_loss: 2.1953
Epoch 10/100
72/72 - 


Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.



72/72 - 3s - 42ms/step - accuracy: 0.3165 - loss: 3.5692 - val_accuracy: 0.3355 - val_loss: 3.1809
Epoch 2/100
72/72 - 0s - 3ms/step - accuracy: 0.3326 - loss: 2.9967 - val_accuracy: 0.3355 - val_loss: 2.8201
Epoch 3/100
72/72 - 0s - 3ms/step - accuracy: 0.3335 - loss: 2.7024 - val_accuracy: 0.3355 - val_loss: 2.5909
Epoch 4/100
72/72 - 0s - 3ms/step - accuracy: 0.3343 - loss: 2.5108 - val_accuracy: 0.3442 - val_loss: 2.4545
Epoch 5/100
72/72 - 0s - 3ms/step - accuracy: 0.3535 - loss: 2.3903 - val_accuracy: 0.3529 - val_loss: 2.3606
Epoch 6/100
72/72 - 0s - 3ms/step - accuracy: 0.3474 - loss: 2.3169 - val_accuracy: 0.3442 - val_loss: 2.3117
Epoch 7/100
72/72 - 0s - 3ms/step - accuracy: 0.3605 - loss: 2.2482 - val_accuracy: 0.3508 - val_loss: 2.2643
Epoch 8/100
72/72 - 0s - 3ms/step - accuracy: 0.3557 - loss: 2.2004 - val_accuracy: 0.3508 - val_loss: 2.2445
Epoch 9/100
72/72 - 0s - 3ms/step - accuracy: 0.3505 - loss: 2.1735 - val_accuracy: 0.3486 - val_loss: 2.2250
Epoch 10/100
72/72 - 


Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.



72/72 - 3s - 43ms/step - accuracy: 0.3078 - loss: 3.5771 - val_accuracy: 0.3355 - val_loss: 3.1886
Epoch 2/100
72/72 - 0s - 3ms/step - accuracy: 0.3335 - loss: 3.0008 - val_accuracy: 0.3355 - val_loss: 2.8237
Epoch 3/100
72/72 - 0s - 3ms/step - accuracy: 0.3335 - loss: 2.7019 - val_accuracy: 0.3508 - val_loss: 2.5958
Epoch 4/100
72/72 - 0s - 3ms/step - accuracy: 0.3457 - loss: 2.5171 - val_accuracy: 0.3551 - val_loss: 2.4735
Epoch 5/100
72/72 - 0s - 3ms/step - accuracy: 0.3448 - loss: 2.3994 - val_accuracy: 0.3638 - val_loss: 2.3572
Epoch 6/100
72/72 - 0s - 3ms/step - accuracy: 0.3535 - loss: 2.3182 - val_accuracy: 0.3595 - val_loss: 2.2892
Epoch 7/100
72/72 - 0s - 3ms/step - accuracy: 0.3557 - loss: 2.2512 - val_accuracy: 0.3682 - val_loss: 2.2447
Epoch 8/100
72/72 - 0s - 3ms/step - accuracy: 0.3474 - loss: 2.2134 - val_accuracy: 0.3638 - val_loss: 2.2125
Epoch 9/100
72/72 - 0s - 3ms/step - accuracy: 0.3509 - loss: 2.1804 - val_accuracy: 0.3704 - val_loss: 2.1858
Epoch 10/100
72/72 - 

In [43]:
def evaluate_test_set(model, scaler, test_df):
    """
    Evaluate the trained model on a test dataset.
    
    Parameters:
    model: Trained tensorflow model
    scaler: Fitted StandardScaler
    test_df: pandas DataFrame containing test data
    
    Returns:
    dict: Dictionary containing test metrics and predictions
    """
    # Prepare test data using the same preparation function
    X_test, y_test = prepare_data(test_df)
    
    # Scale the test features using the same scaler
    X_test_scaled = scaler.transform(X_test)
    
    # Get predictions
    predictions = model.predict(X_test_scaled)
    
    # Evaluate model on test set
    test_loss, test_accuracy = model.evaluate(X_test_scaled, y_test, verbose=0)
    
    # Get predicted classes (assuming 0.5 threshold for binary classification)
    predicted_classes = (predictions >= 0.5).astype(int)
    
    # Calculate additional metrics per class
    results = {
        'test_accuracy': test_accuracy,
        'test_loss': test_loss,
        'predictions': predictions,
        'predicted_classes': predicted_classes,
        'true_labels': y_test
    }
    
    return results

# Example usage:
# Assuming you have your trained model, scaler, and test_df ready
test_results = evaluate_test_set(model, scaler, test_df)

# Print test metrics
print(f"\nTest Set Results:")
print(f"Test Accuracy: {test_results['test_accuracy']:.4f}")
print(f"Test Loss: {test_results['test_loss']:.4f}")

# Print accuracy per race category
race_columns = [col for col in test_df.columns if col.startswith('Race_')]
for i, race in enumerate(race_columns):
    correct_predictions = (test_results['predicted_classes'][:, i] == 
                         test_results['true_labels'][race]).mean()
    print(f"Accuracy for {race}: {correct_predictions:.4f}")
    
test_results = evaluate_test_set(model, scaler, test_df)

[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step

Test Set Results:
Test Accuracy: 0.3779
Test Loss: 0.2258
Accuracy for Race_Bengal: 0.9218
Accuracy for Race_Birman: 0.9381
Accuracy for Race_BritishShorthair: 0.9446
Accuracy for Race_Chartreux: 0.9902
Accuracy for Race_European: 0.7068
Accuracy for Race_MaineCoon: 0.9349
Accuracy for Race_NoBreed: 0.8436
Accuracy for Race_Other: 0.9577
Accuracy for Race_Persian: 0.9381
Accuracy for Race_Ragdoll: 0.9283
Accuracy for Race_Savannah: 0.9902
Accuracy for Race_Siamese: 0.9805
Accuracy for Race_Sphynx: 0.9739
Accuracy for Race_TurkishAngora: 0.9902
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
