# pandas

## Getting Started


In [40]:
#Importing the package
import pandas as pd

#Read the file
data = pd.read_csv("chopstick-effectiveness.csv")

#Convert all the strings to numbers
for col in data.columns:
    data[col] = pd.to_numeric(data[col])

In [41]:
data

Unnamed: 0,Food Pinching Efficiency,Individual,Chopstick Length
0,19.55,1,180
1,27.24,2,180
2,28.76,3,180
3,31.19,4,180
4,21.91,5,180
5,27.62,6,180
6,29.46,7,180
7,26.35,8,180
8,26.69,9,180
9,30.22,10,180


In [42]:
#Shows part of the data
data.head()

Unnamed: 0,Food Pinching Efficiency,Individual,Chopstick Length
0,19.55,1,180
1,27.24,2,180
2,28.76,3,180
3,31.19,4,180
4,21.91,5,180


## Cleaning/Organizing Data

In [43]:
#Cleaning out bad rows by adding dummy values
data = data.fillna(0) 

In [44]:
#Cleaning out bad rows by dropping the row
data = data.dropna() 

In [45]:
#Picking out certain data
data[data['Food Pinching Efficiency']>20]

Unnamed: 0,Food Pinching Efficiency,Individual,Chopstick Length
1,27.24,2,180
2,28.76,3,180
3,31.19,4,180
4,21.91,5,180
5,27.62,6,180
6,29.46,7,180
7,26.35,8,180
8,26.69,9,180
9,30.22,10,180
10,27.81,11,180


In [46]:
#Sorting by chopstick length
data.sort_values("Chopstick Length")

Unnamed: 0,Food Pinching Efficiency,Individual,Chopstick Length
0,19.55,1,180
30,27.97,31,180
29,28.29,30,180
28,14.47,29,180
27,21.48,28,180
26,28.98,27,180
25,26.67,26,180
24,17.94,25,180
23,22.82,24,180
21,22.66,22,180


In [47]:
#Sorting by the individual
data.sort_values("Individual").head()

Unnamed: 0,Food Pinching Efficiency,Individual,Chopstick Length
0,19.55,1,180
155,21.32,1,330
124,22.5,1,300
31,23.53,1,210
93,24.4,1,270


## Running Analyses

In [48]:
data.describe()

Unnamed: 0,Food Pinching Efficiency,Individual,Chopstick Length
count,186.0,186.0,186.0
mean,25.005591,16.0,255.0
std,4.039693,8.968413,51.37304
min,14.47,1.0,180.0
25%,22.54,8.0,210.0
50%,24.905,16.0,255.0
75%,27.9325,24.0,300.0
max,36.15,31.0,330.0


In [49]:
data.sum()

Food Pinching Efficiency     4651.04
Individual                   2976.00
Chopstick Length            47430.00
dtype: float64

In [50]:
data.std()

Food Pinching Efficiency     4.039693
Individual                   8.968413
Chopstick Length            51.373040
dtype: float64

In [51]:
data.mean()

Food Pinching Efficiency     25.005591
Individual                   16.000000
Chopstick Length            255.000000
dtype: float64

## So which length of chopsticks is most efficient?

In [52]:
#Get a list of unique lengths of chopsticks
lengths = data["Chopstick Length"].unique()

#Loop through and get the average efficiencies
for length in lengths:
    df = data[data["Chopstick Length"] == length]
    print length
    print df["Food Pinching Efficiency"].mean()
    print "==================\n"

180
24.9351612903

210
25.4838709677

240
26.3229032258

270
24.3238709677

300
24.9680645161

330
23.9996774194



### We can see that the chopsticks of length 240 is most efficient!

## But who was the better user?

In [53]:
#Get a list of unique individuals
peeps = data["Individual"].unique()
efficiencies = []

#Loop through and get the average efficiencies
for person in peeps:
    df = data[data["Individual"] == person]
    print person
    avg = df["Food Pinching Efficiency"].mean()
    efficiencies.append(avg)
    print avg
    print "==================\n"

1
22.1066666667

2
26.455

3
29.1283333333

4
27.5566666667

5
21.235

6
28.0683333333

7
30.005

8
21.0083333333

9
31.0283333333

10
28.0683333333

11
24.9716666667

12
24.915

13
26.92

14
27.4033333333

15
22.9466666667

16
24.6583333333

17
27.4933333333

18
23.2216666667

19
29.265

20
20.0966666667

21
23.2233333333

22
22.4383333333

23
23.5733333333

24
25.0866666667

25
19.0416666667

26
23.3266666667

27
27.2766666667

28
19.835

29
17.4716666667

30
30.12

31
27.2283333333



In [54]:
print efficiencies.index(max(efficiencies)) + 1

9


## Person 9 can use chopsticks the best!