# Data Importing/Formatting

In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv('./squirrel_census.csv')
df.head(3)

Unnamed: 0,long,lat,unique_squirrel_id,hectare,shift,date,hectare_squirrel_number,age,primary_fur_color,highlight_fur_color,...,approaches,indifferent,runs_from,other_interactions,lat_long,zip_codes,community_districts,borough_boundaries,city_council_districts,police_precincts
0,-73.956134,40.794082,37F-PM-1014-03,37F,PM,10142018,3,,,,...,False,False,False,,POINT (-73.9561344937861 40.7940823884086),,19,4,19,13
1,-73.957044,40.794851,37E-PM-1006-03,37E,PM,10062018,3,Adult,Gray,Cinnamon,...,False,False,True,me,POINT (-73.9570437717691 40.794850940803904),,19,4,19,13
2,-73.976831,40.766718,2E-AM-1010-03,02E,AM,10102018,3,Adult,Cinnamon,,...,False,True,False,,POINT (-73.9768311751004 40.76671780725581),,19,4,19,13


In [3]:
# Get rid of unnecessary features
df = df.drop(['lat_long',
              'highlight_fur_color',
              'combination_of_primary_and_highlight_color',
              'color_notes',
              'specific_location',
              'above_ground_sighter_measurement',
              'other_activities',
              'other_interactions',
              'zip_codes',
              'community_districts',
              'borough_boundaries',
              'city_council_districts',
              'police_precincts'], axis=1)
# Changes 'False' and 'True' to 0 and 1,respectively
df = (df * 1)

print('Shape: ',df.shape)
df.head()

Shape:  (3023, 23)


Unnamed: 0,long,lat,unique_squirrel_id,hectare,shift,date,hectare_squirrel_number,age,primary_fur_color,location,...,eating,foraging,kuks,quaas,moans,tail_flags,tail_twitches,approaches,indifferent,runs_from
0,-73.956134,40.794082,37F-PM-1014-03,37F,PM,10142018,3,,,,...,0,0,0,0,0,0,0,0,0,0
1,-73.957044,40.794851,37E-PM-1006-03,37E,PM,10062018,3,Adult,Gray,Ground Plane,...,0,0,0,0,0,0,0,0,0,1
2,-73.976831,40.766718,2E-AM-1010-03,02E,AM,10102018,3,Adult,Cinnamon,Above Ground,...,0,0,0,0,0,0,0,0,1,0
3,-73.975725,40.769703,5D-PM-1018-05,05D,PM,10182018,5,Juvenile,Gray,Above Ground,...,0,0,0,0,0,0,0,0,0,1
4,-73.959313,40.797533,39B-AM-1018-01,39B,AM,10182018,1,,,Above Ground,...,0,0,1,0,0,0,0,0,0,0


In [4]:
for feat in ['unique_squirrel_id', 'hectare', 'shift', 'age', 'primary_fur_color', 'location']:
    df[feat] = df[feat].astype('category')

df.dtypes

long                        float64
lat                         float64
unique_squirrel_id         category
hectare                    category
shift                      category
date                          int64
hectare_squirrel_number       int64
age                        category
primary_fur_color          category
location                   category
running                       int32
chasing                       int32
climbing                      int32
eating                        int32
foraging                      int32
kuks                          int32
quaas                         int32
moans                         int32
tail_flags                    int32
tail_twitches                 int32
approaches                    int32
indifferent                   int32
runs_from                     int32
dtype: object

In [5]:
# Convert squirrel ID to integer
df.unique_squirrel_id = pd.Categorical(df.unique_squirrel_id)
df.unique_squirrel_id = df.unique_squirrel_id.cat.codes

df.head()

Unnamed: 0,long,lat,unique_squirrel_id,hectare,shift,date,hectare_squirrel_number,age,primary_fur_color,location,...,eating,foraging,kuks,quaas,moans,tail_flags,tail_twitches,approaches,indifferent,runs_from
0,-73.956134,40.794082,1912,37F,PM,10142018,3,,,,...,0,0,0,0,0,0,0,0,0,0
1,-73.957044,40.794851,1902,37E,PM,10062018,3,Adult,Gray,Ground Plane,...,0,0,0,0,0,0,0,0,0,1
2,-73.976831,40.766718,1297,02E,AM,10102018,3,Adult,Cinnamon,Above Ground,...,0,0,0,0,0,0,0,0,1,0
3,-73.975725,40.769703,2565,05D,PM,10182018,5,Juvenile,Gray,Above Ground,...,0,0,0,0,0,0,0,0,0,1
4,-73.959313,40.797533,2045,39B,AM,10182018,1,,,Above Ground,...,0,0,1,0,0,0,0,0,0,0


In [6]:
df.location.value_counts()

Ground Plane    2116
Above Ground     843
Name: location, dtype: int64