In [70]:
# Import the necessary libraries 
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder

# Load your dataset
data = pd.read_csv('clean_split_data.csv')

# Initialize the LabelEncoder
le = LabelEncoder()

# Fit the encoder to the 'breed' column and transform it
data['breed_encoded'] = le.fit_transform(data['breed'])
data.drop('breed', axis=1, inplace=True)
data.replace([np.inf, -np.inf], np.nan, inplace=True)
data= data.fillna(0).astype(int)

#To see the mapping of breed to breed_encoded
breed_mapping = dict(zip(le.classes_, le.transform(le.classes_)))

# Separate target variable and features
X = data.drop('breed_encoded', axis=1)  
y = data['breed_encoded']

# Feature scaling
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=123)

# Using a linear kernel for simplicity; you can choose the one that fits your case
model = SVC(kernel='linear', C=1.0, random_state=42)
model.fit(X_train, y_train)

predictions = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, predictions))

# First, let's suppose we have feature names in our dataset
feature_names = X.columns

# Get the weight of each feature
weights = model.coef_[0]

# Rank features based on their weights
# Using argsort() for indexing, [::-1] to reverse for descending order
indices = np.argsort(weights)[::-1]

predicted_encoded_breed = 5  
original_breed_name = le.inverse_transform([predicted_encoded_breed])[0]  # Using inverse_transform

print(f"The original breed name for the encoded breed '{predicted_encoded_breed}' is: '{original_breed_name}'.")

# Print the feature ranking with feature names
print("Feature ranking:")
for f in range(X.shape[1]):
    # Using the feature_names array to access feature names via their indices
    feature_name = feature_names[indices[f]]
    print(f"{f + 1}. Feature '{feature_name}' (Weight: {weights[indices[f]]})")


# First, assume we recover feature names from your dataset
feature_names = X.columns

# Get the absolute weight of each feature across all classes
weights = np.sum(np.abs(model.coef_), axis=0)

# Rank features based on their absolute weights
indices = np.argsort(weights)[::-1]

# Print the top feature ranking common to predicting all breeds
print("Top feature ranking (common to predicting all breeds):")
for f in range(X.shape[1]):
    print(f"{f + 1}. Feature '{feature_names[indices[f]]}' (Importance: {weights[indices[f]]})")

Accuracy: 1.0
The original breed name for the encoded breed '5' is: 'american english coonhound'.
Feature ranking:
1. Feature 'tendency_to_bark_or_howl' (Weight: 0.06265928754222089)
2. Feature 'grooming_required' (Weight: 0.05698572439392033)
3. Feature 'category_companion' (Weight: 0.055208839980648204)
4. Feature 'intelligence_category' (Weight: 0.04863402542765306)
5. Feature 'hips' (Weight: 0.04390942863089128)
6. Feature 'age' (Weight: 0.00013784036702132324)
7. Feature 'gender' (Weight: 4.6331362080209715e-09)
8. Feature 'potential_for_mouthiness' (Weight: 3.469446951953614e-18)
9. Feature 'osteopathy' (Weight: 0.0)
10. Feature 'longevity' (Weight: 0.0)
11. Feature 'tolerates_being_alone' (Weight: 0.0)
12. Feature 'kid_friendly' (Weight: 0.0)
13. Feature 'category_sporting' (Weight: 0.0)
14. Feature 'spine' (Weight: 0.0)
15. Feature 'category_herding' (Weight: 0.0)
16. Feature 'cleft_palate' (Weight: 0.0)
17. Feature 'patella' (Weight: 0.0)
18. Feature 'metabolic' (Weight: 0.0)
