# Problem Set 5: Using Pandas
Follow the instructions below.

### Importing the package

In [1]:
#Import pandas
import pandas as pd

### 1. Loading and exploring the data
► Q1.1 Import the `EleTaiOneWeek.csv` file (located in the data folder) into a pandas dataframe.
  * Be sure the `CalcID` field is imported as a string 
  * Convert the `StartDate`,`EndDate`, and `Fixtime` fields are imported as date fields

In [3]:
#Load the data as instructed
df = pd.read_csv('../data/EleTaiOneWeek.csv',
                 dtype={'CalcID':'str'},
                 parse_dates=['StartDate','EndDate','Fixtime'])

---
► Q1.2 Display the first 5 records in the dataframe

In [None]:
#Display the "head" (i.e. the first 5 records) of the dataframe
df.head()

---
► Q1.3 Display the data types of each column

In [None]:
#Display the data types of each column
df.dtypes

---
► Q1.4 List each unique value in the `MovDataID` column

In [None]:
#List each unique value in the MovDataID column
df.MovDataID.unique()

---
► Q1.5 How many unique dates are in the `Fixtime` field?

In [None]:
#Display the number of values in the `Fixtime` column
df.Fixtime.nunique()

---
### 2. Subsetting data

► Q2.1 Extract values in the `X` column into a variable called `xCoords`

In [None]:
#Extract the X values into a series called xCoords
xCoords = df['X']

---
► Q2.2 Display the last 10 values in the `Xcoords` series

In [None]:
#Display the last 10 values in the series
xCoords[-10:]

---
► Q2.3 Create a dataframe of just the `X` and `Y` columns and show the head (first 5 rows) of the dataframe

In [None]:
#Create a dataframe of just the X and Y columns and show the first 5 rows of data
dfXY = df[['X','Y']]
dfXY.head()

---
► Q2.4 Create a Boolean mask (`dfGeorgeMask`) for all records where `MovDataID` is "George"

In [6]:
dfGeorgeMask = df['MovDataID'] == 'George'

---
► Q2.5 Select all records where the MovDataID equals "George" into a dataframe called `dfGeorge`
 * *Feel free to use the Boolean mask above or use other techniques.*

In [7]:
dfGeorge = df[dfGeorgeMask]

---
### 3. Answering questions with the data
► Q3.1 If the 'Y' column represents latitude, what's the southern most location George has been seen?

In [8]:
dfGeorge['Y'].min()

6.2032

---
► Q3.2 How many times has George been seen north of 6.23°N?

In [11]:
dfNorth = dfGeorge.query("Y > 6.23")
dfNorth.count()

MovDataID    28
CalcID       28
StartDate    28
EndDate      28
Fixtime      28
X            28
Y            28
Z             0
dtype: int64

---
► Q3.3 How many times has George been seen north of 6.23°N *and* west of -7.62°W?

In [10]:
dfNW = dfGeorge.query("Y > 6.23 & X < -7.62")
len(dfNW)

18

► Q3.4 What's the *earliest* "Fixtime" date George has been seen 6.23°N and west of -7.62°W?

In [12]:
dfNW['Fixtime'].min()

Timestamp('2017-04-10 04:12:32')

---
### 4. Grouping/aggregating data
► Q4.1 Group the dataframe of all elephants on the `MovDataID` attribute into a "groupBy" object called `grpName`.

In [13]:
grpName = df.groupby('MovDataID')

► Q4.2 List the count of observations for each elephant

In [23]:
grpName['MovDataID'].count()

MovDataID
Anika         35
Blanche      146
Cami         139
Claudine     134
Dave         138
DjeDje       138
Frank        134
George       132
Laura        143
Mahmahdou    132
Marie         34
Omar         144
Sylvie       144
Tapa         134
Yves         134
Zamba        146
Name: MovDataID, dtype: int64

### 5. Transforming/pivoting data

In [59]:
#Add hour to the data
df['Hour'] = df['Fixtime'].dt.hour


In [64]:
df.pivot_table(index='MovDataID',columns='Hour',values=['X'])

Unnamed: 0_level_0,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X
Hour,0,1,2,3,4,5,6,7,8,9,...,14,15,16,17,18,19,20,21,22,23
MovDataID,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
Anika,-7.897617,,,-7.895108,-7.896067,,,-7.892625,-7.898033,,...,,-7.893646,-7.893678,,,-7.894971,-7.897067,,,-7.895379
Blanche,-7.873469,-7.873723,-7.872239,-7.872672,-7.872683,-7.872319,-7.871022,-9.425011,-7.868287,-7.868488,...,-7.869611,-7.869874,-7.869888,-7.870777,-7.871547,-7.872306,-7.873325,-7.873256,-7.872856,-7.873053
Cami,-7.622808,-7.622478,-7.621619,-7.621742,-7.620194,-7.619456,-7.61665,-7.616727,-7.623403,-7.626608,...,-7.62082,-7.620121,-7.625278,-7.621028,-7.618223,-7.622433,-7.622256,-7.622117,-7.622364,-7.622814
Claudine,-7.637256,-7.637183,-7.637028,-9.230853,-7.636483,-7.636908,-7.635754,-7.629583,-7.626593,-7.628872,...,-7.630879,-7.626114,-7.632083,-7.631042,-7.624217,-7.633407,-7.632183,-7.630992,-7.635556,-7.636997
Dave,-7.654708,-7.655944,-7.656908,-7.657453,-7.657542,-7.656787,-7.662283,-9.250336,-9.024014,-9.568483,...,-7.65905,-10.050071,-7.66399,-7.665569,-9.252431,-7.668387,-7.659573,-7.6585,-7.65725,-7.655656
DjeDje,-8.028353,-8.025607,-8.027111,-8.026622,-8.026208,-8.025158,-8.027197,-8.023425,-8.02176,-8.02355,...,-8.022769,-8.023093,-8.025844,-8.026228,-8.030517,-8.026147,-9.559361,-8.031803,-8.021989,-8.02898
Frank,-7.704722,-7.705581,-7.707642,-7.709597,-7.711458,-7.715622,-7.716243,-7.719188,-7.717592,-7.721187,...,-7.707737,-7.713538,-7.71221,-7.706289,-7.706069,-7.708642,-7.706047,-7.706553,-7.706525,-7.704908
George,-7.62375,-7.627146,-7.622225,-7.619872,-7.61692,-7.622063,-7.615133,-7.617722,-7.621558,-7.617397,...,-7.622636,-7.6213,-7.622736,-7.626864,-7.627163,-7.627711,-7.627678,-7.62566,-7.627303,-7.625267
Laura,-7.707322,-7.707686,-7.708442,-9.290272,-7.709714,-7.710043,-7.711707,-9.293739,-9.290117,-7.709867,...,-7.709579,-9.064619,-7.708864,-7.708111,-7.70927,-7.708837,-7.707725,-7.706519,-7.706325,-7.706847
Mahmahdou,-7.681069,-7.682161,-7.681386,-7.67656,-7.678558,-7.675353,-7.67576,-7.672333,-9.026693,-9.57349,...,-7.669005,-7.679312,-7.67483,-7.666344,-7.682506,-7.66699,-7.671922,-7.674744,-7.677169,-7.679531


In [44]:
df['Hour'] = df['Fixtime'].dt.hour
df.head()

Unnamed: 0,MovDataID,CalcID,StartDate,EndDate,Fixtime,X,Y,Z,Day,Hour
0,Cami,587,2017-04-09 07:33:40,2017-04-15 16:31:42,2017-04-09 08:14:57,-7.62665,6.24205,,2017-04-09,8
1,Cami,587,2017-04-09 07:33:40,2017-04-15 16:31:42,2017-04-09 10:13:55,-7.628533,6.2392,,2017-04-09,10
2,Cami,587,2017-04-09 07:33:40,2017-04-15 16:31:42,2017-04-09 11:14:33,-7.628367,6.238933,,2017-04-09,11
3,Cami,587,2017-04-09 07:33:40,2017-04-15 16:31:42,2017-04-09 12:14:07,-7.6292,6.2375,,2017-04-09,12
4,Cami,587,2017-04-09 07:33:40,2017-04-15 16:31:42,2017-04-09 13:13:56,-7.6298,6.2384,,2017-04-09,13


In [51]:
grpHour = df.groupby('Hour')

In [54]:
grpHour['MovDataID'].describe()

Unnamed: 0_level_0,count,unique,top,freq
Hour,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,85,15,Mahmahdou,6
1,86,15,Mahmahdou,6
2,84,14,Mahmahdou,6
3,87,15,Sylvie,6
4,78,15,Mahmahdou,6
5,80,15,Sylvie,6
6,71,14,Blanche,6
7,76,15,Sylvie,6
8,84,15,Mahmahdou,7
9,86,15,Sylvie,7


In [31]:
grpTime['MovDataID'].describe()

Unnamed: 0_level_0,count,unique,top,freq
Fixtime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2017-04-09 08:00:32,1,1,Zamba,1
2017-04-09 08:01:04,1,1,Dave,1
2017-04-09 08:01:32,1,1,Sylvie,1
2017-04-09 08:01:33,1,1,Laura,1
2017-04-09 08:12:27,1,1,George,1
2017-04-09 08:12:28,1,1,Yves,1
2017-04-09 08:12:53,1,1,Claudine,1
2017-04-09 08:13:09,1,1,Frank,1
2017-04-09 08:13:21,1,1,Tapa,1
2017-04-09 08:14:26,1,1,Blanche,1
