[*pandas*](http://pandas.pydata.org/) is a column-oriented data analysis API. It's a great tool for handling and analyzing input data, and many ML frameworks support *pandas* data structures as inputs.
Although a comprehensive introduction to the *pandas* API would span many pages, the core concepts are fairly straightforward, and we'll present them below. For a more complete reference, the [*pandas* docs site](http://pandas.pydata.org/pandas-docs/stable/index.html) contains extensive documentation and many tutorials.

In [1]:
import numpy as np
import pandas as pd

In [3]:
print('pandas version : %s'%pd.__version__)

pandas version : 0.23.0


In [4]:
data = pd.read_csv('admission_data.csv')

In [6]:
data.head()

Unnamed: 0,Serial No.,GRE Score,TOEFL Score,University Rating,SOP,LOR,CGPA,Research,Chance of Admit
0,1,337,118,4,4.5,4.5,9.65,1,0.92
1,2,324,107,4,4.0,4.5,8.87,1,0.76
2,3,316,104,3,3.0,3.5,8.0,1,0.72
3,4,322,110,3,3.5,2.5,8.67,1,0.8
4,5,314,103,2,2.0,3.0,8.21,0,0.65


In [5]:
data.sample(5)

Unnamed: 0,Serial No.,GRE Score,TOEFL Score,University Rating,SOP,LOR,CGPA,Research,Chance of Admit
473,474,316,102,2,4.0,3.5,8.15,0,0.67
338,339,323,108,5,4.0,4.0,8.74,1,0.81
5,6,330,115,5,4.5,3.0,9.34,1,0.9
449,450,315,101,3,3.5,4.5,9.13,0,0.79
356,357,327,109,3,3.5,4.0,8.77,1,0.79


In [7]:
data.dtypes

Serial No.             int64
GRE Score              int64
TOEFL Score            int64
University Rating      int64
SOP                  float64
LOR                  float64
CGPA                 float64
Research               int64
Chance of Admit      float64
dtype: object

In [8]:
data.describe()

Unnamed: 0,Serial No.,GRE Score,TOEFL Score,University Rating,SOP,LOR,CGPA,Research,Chance of Admit
count,500.0,500.0,500.0,500.0,500.0,500.0,500.0,500.0,500.0
mean,250.5,316.472,107.192,3.114,3.374,3.484,8.57644,0.56,0.72174
std,144.481833,11.295148,6.081868,1.143512,0.991004,0.92545,0.604813,0.496884,0.14114
min,1.0,290.0,92.0,1.0,1.0,1.0,6.8,0.0,0.34
25%,125.75,308.0,103.0,2.0,2.5,3.0,8.1275,0.0,0.63
50%,250.5,317.0,107.0,3.0,3.5,3.5,8.56,1.0,0.72
75%,375.25,325.0,112.0,4.0,4.0,4.0,9.04,1.0,0.82
max,500.0,340.0,120.0,5.0,5.0,5.0,9.92,1.0,0.97


In [13]:
high_gre_score_data = data[data['GRE Score'] > 320]

In [14]:
high_gre_score_data.describe()

Unnamed: 0,Serial No.,GRE Score,TOEFL Score,University Rating,SOP,LOR,CGPA,Research,Chance of Admit
count,197.0,197.0,197.0,197.0,197.0,197.0,197.0,197.0,197.0
mean,235.822335,327.77665,112.675127,3.989848,4.048223,3.997462,9.107157,0.903553,0.84203
std,145.733941,5.394712,4.076568,0.880572,0.706356,0.778371,0.399513,0.295955,0.091912
min,1.0,321.0,100.0,1.0,1.5,1.5,8.0,0.0,0.45
25%,112.0,324.0,110.0,3.0,3.5,3.5,8.84,1.0,0.79
50%,223.0,327.0,113.0,4.0,4.0,4.0,9.11,1.0,0.86
75%,362.0,331.0,116.0,5.0,4.5,4.5,9.38,1.0,0.92
max,500.0,340.0,120.0,5.0,5.0,5.0,9.92,1.0,0.97


In [20]:
high_gre_toefl_score_data = data[(data['GRE Score'] > 320) & (data['TOEFL Score'] > 115)]

In [21]:
high_gre_toefl_score_data.describe()

Unnamed: 0,Serial No.,GRE Score,TOEFL Score,University Rating,SOP,LOR,CGPA,Research,Chance of Admit
count,52.0,52.0,52.0,52.0,52.0,52.0,52.0,52.0,52.0
mean,220.0,333.25,117.730769,4.384615,4.528846,4.394231,9.454231,0.961538,0.922115
std,150.355787,3.8444,1.470166,0.745019,0.479107,0.588589,0.315491,0.194184,0.040261
min,1.0,326.0,116.0,2.0,3.0,3.0,8.0,0.0,0.78
25%,105.75,331.0,116.0,4.0,4.375,4.0,9.325,1.0,0.91
50%,191.0,333.0,118.0,4.5,4.5,4.5,9.48,1.0,0.93
75%,364.75,336.0,119.0,5.0,5.0,5.0,9.6525,1.0,0.9425
max,498.0,340.0,120.0,5.0,5.0,5.0,9.92,1.0,0.97


In [22]:
data.columns

Index(['Serial No.', 'GRE Score', 'TOEFL Score', 'University Rating', 'SOP',
       'LOR ', 'CGPA', 'Research', 'Chance of Admit '],
      dtype='object')

In [25]:
data['LOR '].values[:15]

array([4.5, 4.5, 3.5, 2.5, 3. , 3. , 4. , 4. , 1.5, 3. , 4. , 4.5, 4.5,
       3. , 2. ])