# DS-SF-23 | Exploratory Data Analysis on Kaggle Dataset | Shelter Animal Outcomes

(https://www.kaggle.com/c/shelter-animal-outcomes)

## CODE TO GET YOU STARTED

In [1]:
import os
import numpy as np
import pandas as pd
import csv
import re

pd.set_option('display.max_rows', 10)
pd.set_option('display.notebook_repr_html', True)
pd.set_option('display.max_columns', 10)

In [3]:
df = pd.read_csv(os.path.join('..', 'datasets', 'shelter-animal-outcomes.csv.gz'), index_col = 'AnimalID')

In [4]:
df

Unnamed: 0_level_0,Name,DateTime,OutcomeType,OutcomeSubtype,AnimalType,SexuponOutcome,AgeuponOutcome,Breed,Color
AnimalID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
A671945,Hambone,2014-02-12 18:22:00,Return_to_owner,,Dog,Neutered Male,1 year,Shetland Sheepdog Mix,Brown/White
A656520,Emily,2013-10-13 12:44:00,Euthanasia,Suffering,Cat,Spayed Female,1 year,Domestic Shorthair Mix,Cream Tabby
A686464,Pearce,2015-01-31 12:28:00,Adoption,Foster,Dog,Neutered Male,2 years,Pit Bull Mix,Blue/White
A683430,,2014-07-11 19:09:00,Transfer,Partner,Cat,Intact Male,3 weeks,Domestic Shorthair Mix,Blue Cream
A667013,,2013-11-15 12:52:00,Transfer,Partner,Dog,Neutered Male,2 years,Lhasa Apso/Miniature Poodle,Tan
...,...,...,...,...,...,...,...,...,...
A702446,,2015-05-14 11:56:00,Transfer,Partner,Cat,Intact Male,1 month,Domestic Shorthair Mix,Brown Tabby/White
A718934,,2016-01-20 18:59:00,Transfer,SCRP,Cat,Spayed Female,3 months,Domestic Shorthair Mix,Brown Tabby
A698128,Zeus,2015-03-09 13:33:00,Adoption,,Dog,Neutered Male,4 years,Old English Bulldog Mix,White/Tan
A677478,,2014-04-27 12:22:00,Transfer,Partner,Cat,Intact Male,4 weeks,Domestic Shorthair Mix,Black


In [5]:
df.columns

Index([u'Name', u'DateTime', u'OutcomeType', u'OutcomeSubtype', u'AnimalType',
       u'SexuponOutcome', u'AgeuponOutcome', u'Breed', u'Color'],
      dtype='object')

In [7]:
len(df.Name.unique())

6375

In [8]:
df.Name.value_counts()

Max            136
Bella          135
Charlie        107
Daisy          106
Lucy            94
              ... 
Sally Jessy      1
Nelda            1
Hobie Cat        1
Madi             1
Sumi             1
Name: Name, dtype: int64

In [9]:
df.AnimalType.unique()

array(['Dog', 'Cat'], dtype=object)

In [10]:
df.AnimalType.value_counts()

Dog    15595
Cat    11134
Name: AnimalType, dtype: int64

In [11]:
df.OutcomeType.unique()

array(['Return_to_owner', 'Euthanasia', 'Adoption', 'Transfer', 'Died'], dtype=object)

In [13]:
df.OutcomeType.value_counts()

Adoption           10769
Transfer            9422
Return_to_owner     4786
Euthanasia          1555
Died                 197
Name: OutcomeType, dtype: int64

In [14]:
df.OutcomeSubtype.value_counts()

Partner                7816
Foster                 1800
SCRP                   1599
Suffering              1002
Aggressive              320
                       ... 
Enroute                   8
Court/Investigation       6
At Vet                    4
In Surgery                3
Barn                      2
Name: OutcomeSubtype, dtype: int64

In [15]:
df.SexuponOutcome.value_counts()

Neutered Male    9779
Spayed Female    8820
Intact Male      3525
Intact Female    3511
Unknown          1093
Name: SexuponOutcome, dtype: int64

In [16]:
df.SexuponOutcome.unique()

array(['Neutered Male', 'Spayed Female', 'Intact Male', 'Intact Female',
       'Unknown', nan], dtype=object)

In [17]:
'Male' in 'Neutered Male'

True

In [18]:
'Female' in 'Neutered Male'

False

In [24]:
is_male = df.SexuponOutcome.apply(lambda sex: 'Male' in sex)

In [21]:
df.SexuponOutcome = df.SexuponOutcome.apply(lambda sex: 'Unknown' if pd.isnull(sex) else sex)

In [22]:
df.SexuponOutcome.unique()

array(['Neutered Male', 'Spayed Female', 'Intact Male', 'Intact Female',
       'Unknown'], dtype=object)

In [25]:
is_male

AnimalID
A671945     True
A656520    False
A686464     True
A683430     True
A667013     True
           ...  
A702446     True
A718934    False
A698128     True
A677478     True
A706629     True
Name: SexuponOutcome, dtype: bool

In [26]:
is_female = df.SexuponOutcome.apply(lambda sex: 'Female' in sex)

In [27]:
is_female

AnimalID
A671945    False
A656520     True
A686464    False
A683430    False
A667013    False
           ...  
A702446    False
A718934     True
A698128    False
A677478    False
A706629    False
Name: SexuponOutcome, dtype: bool

In [28]:
df['Sex'] = 'Unknown'

In [29]:
df.Sex

AnimalID
A671945    Unknown
A656520    Unknown
A686464    Unknown
A683430    Unknown
A667013    Unknown
            ...   
A702446    Unknown
A718934    Unknown
A698128    Unknown
A677478    Unknown
A706629    Unknown
Name: Sex, dtype: object

In [33]:
df.Sex.value_counts()

Unknown    13425
Male       13304
Name: Sex, dtype: int64

In [34]:
df.loc[is_male, 'Sex'] = 'Male'

In [35]:
df.Sex.value_counts()

Unknown    13425
Male       13304
Name: Sex, dtype: int64

In [36]:
df.loc[is_female, 'Sex'] = 'Female'

In [37]:
df.Sex.value_counts()

Male       13304
Female     12331
Unknown     1094
Name: Sex, dtype: int64

In [38]:
df.AgeuponOutcome.unique()

array(['1 year', '2 years', '3 weeks', '1 month', '5 months', '4 years',
       '3 months', '2 weeks', '2 months', '10 months', '6 months',
       '5 years', '7 years', '3 years', '4 months', '12 years', '9 years',
       '6 years', '1 weeks', '11 years', '4 weeks', '7 months', '8 years',
       '11 months', '4 days', '9 months', '8 months', '15 years',
       '10 years', '1 week', '0 years', '14 years', '3 days', '6 days',
       '5 days', '5 weeks', '2 days', '16 years', '1 day', '13 years', nan,
       '17 years', '18 years', '19 years', '20 years'], dtype=object)

In [45]:
a = '12 year'

In [50]:
a = '7 months'

In [51]:
a

'7 months'

In [52]:
import re #regular expressions

In [59]:
#match = re.search(xxxxx, a)

match = re.search(r'(\d+) (\S+[^s])', a)

In [60]:
int(match.group(1))

7

In [63]:
match.group(2)

'month'

In [65]:
len(df.Breed.unique())

1380

In [66]:
df.Breed

AnimalID
A671945          Shetland Sheepdog Mix
A656520         Domestic Shorthair Mix
A686464                   Pit Bull Mix
A683430         Domestic Shorthair Mix
A667013    Lhasa Apso/Miniature Poodle
                      ...             
A702446         Domestic Shorthair Mix
A718934         Domestic Shorthair Mix
A698128        Old English Bulldog Mix
A677478         Domestic Shorthair Mix
A706629         Domestic Shorthair Mix
Name: Breed, dtype: object

In [67]:
len(df.Color.unique())

366

In [68]:
df.Color.value_counts()

Black/White                 2824
Black                       2292
Brown Tabby                 1635
Brown Tabby/White            940
White                        931
                            ... 
Blue Merle/Cream               1
Blue Tabby/Tan                 1
Brown Brindle/Blue Cream       1
Blue Cream/Buff                1
Blue Cream/Blue Tabby          1
Name: Color, dtype: int64

## how to clean up color data (ie black/white, white/black, black, white)