In [18]:
import pandas as pd
from sklearn.preprocessing import OneHotEncoder

df = pd.read_csv('clean_split_data.csv')

df = df.drop(['category_companion','category_herding', 'category_hound', 'category_sporting',
              'category_terrier', 'category_working'], axis = 1)

replacements = {
    'spinone italiano': 'italian spinone',
    'labrador': 'labrador retriever',
    'english toy spaniel': 'king charles spaniel',
}
df['breed'] = df['breed'].replace(replacements)


# ---------------------FCI Group Mappings---------------------
fci_groups = {
    'Unclassified': ['mix medium', 'mix small', 'mix large'],  # Considering mixes not officially recognized

    'Group 1': ['australian shepherd', 'australian cattle dog', 'bearded collie', 'belgian tervuren', 'border collie', 'bouvier des flandres', 
                'briard', 'german shepherd', 'old english sheepdog',
                'polish lowland sheepdog', 'cardigan welsh corgi', 'pembroke welsh corgi', 'shetland sheepdog'],
    
    'Group 2': ['affenpinscher', 'anatolian shepherd dog', 'boxer', 'bullmastiff', 'dobermann', 'german pinscher', 'great dane', 
                'rottweiler', 'bernese mountain dog', 'giant schnauzer', 'great pyrenees', 'greater swiss mountain dog', 
                'leonberger', 'komondor', 'mastiff','miniature schnauzer', 'newfoundland', 
                'portuguese water dog', 'saint bernard', 'tibetan mastiff'],
    
    'Group 3': ['airedale terrier', 'australian terrier', 'bedlington terrier', 'border terrier', 
                'bull terrier', 'cairn terrier', 'cesky terrier', 'dandie dinmont terrier', 'english toy terrier', 'glen of imaal terrier', 
                'irish terrier', 'kerry blue terrier', 'manchester terrier', 'miniature bull terrier', 
                'norfolk terrier', 'scottish terrier', 'sealyham terrier', 'silky terrier', 'skye terrier',
                'smooth fox terrier', 'soft coated wheaten terrier', 'staffordshire bull terrier', 'west highland white terrier', 'wire fox terrier', 'yorkshire terrier'],
    
    'Group 4': ['dachshund'],
    
    'Group 5': ['akita', 'alaskan malamute', 'basenji', 'canaan dog', 'chow chow', 'finnish lapphund', 'siberian husky', 
                'samoyed', 'shiba inu', 'norwegian buhund', 'pharaoh hound', 'pomeranian','swedish vallhund', ],
    
    'Group 6': ['american english coonhound','basset hound', 'beagle', 'bloodhound', 'otterhound', 
                'petit basset griffon vendeen', 'rhodesian ridgeback'],
    
    'Group 7': ['brittany', 'pointer', 'english setter', 'gordon setter', 'irish setter', 'irish red and white setter', 
                 'italian spinone', 'vizsla', 'brittany', 'german shorthaired pointer', 'german wirehaired pointer', 'gordon setter',
                 'pointer', 'weimaraner', 'wirehaired pointing griffon'],
    
    'Group 8': ['chesapeake bay retriever', 'clumber spaniel', 'cocker spaniel', 'curly coated retriever', 'english springer spaniel', 
                 'field spaniel', 'flat coated retriever', 'golden retriever', 'irish water spaniel', 'labrador retriever', 'labrador',
                'nova scotia duck tolling retriever', 'portuguese water dog', 'sussex spaniel', 'welsh springer spaniel' ],
    
    'Group 9': ['boston terrier', 'cavalier king charles spaniel', 'chihuahua', 'chinese crested', 'french bulldog', 'japanese chin', 
                'king charles spaniel', 'maltese', 'papillon', 'pekingese', 'pug', 'shih tzu', 'brussels griffon', 
                'havanese' ],
    
    'Group 10': ['afghan hound', 'borzoi', 'greyhound', 'irish wolfhound', 'italian greyhound', 'saluki', 'scottish deerhound', 'whippet'],
}

# Map numerical categories to descriptive labels
category_labels = {
    0: 'Unclassified',
    1: 'Sheepdogs and Cattle Dogs',
    2: 'Pinschers and Schnauzers – Molossoid Breeds',
    3: 'Terriers',
    4: 'Dachshunds',
    5: 'Spitz and Primitive Types',
    6: 'Scenthounds and Related Breeds',
    7: 'Pointing Dogs',
    8: 'Retrievers – Flushing Dogs – Water Dogs',
    9: 'Companion and Toy Dogs',
    10: 'Sighthounds'
}

# Reverse breed_dict mapping from the fci_groups structure
breed_dict = {}
for group_num, breeds in enumerate(fci_groups.values(), start=0):
    for breed in breeds:
        breed_dict[breed] = group_num

# Map breeds to their category
df['breed_cat'] = df['breed'].map(breed_dict).fillna(0).astype(int)

# Map categories to labels
df['cat_label'] = df['breed_cat'].map(category_labels)

# --------- Drop breed, export PDF -------------------------

df = df.drop(['breed', 'cat_label'], axis = 1)

new_order = ['breed_cat', 'gender', 'age', 'longevity', 'size', 'grooming_required',
       'intelligence_category', 'sensitivity_level', 'tolerates_being_alone',
       'tolerates_cold_weather', 'tolerates_hot_weather', 'kid_friendly',
       'dog_friendly', 'stranger_friendly', 'potential_for_mouthiness',
       'prey_drive', 'tendency_to_bark_or_howl', 'wanderlust_potential',
       'exercise_needs', 'energy_level', 'allergies', 'bleeding', 'bloat',
       'breathing', 'cleft_palate', 'dental', 'elbows', 'eyes', 'heart',
       'hips', 'kidney', 'liver', 'metabolic', 'neurological', 'none',
       'osteopathy', 'patella', 'respiratory', 'skin', 'spine', 'thyroid',
       'urinary', 'yearly_final_cost']

# Reorder the columns using reindex
df_cat = df.reindex(columns=new_order)

# Export CSV
df_cat.to_csv('breedcat_data.csv', index=False)

