# Data Selection in Series

In [1]:
import numpy as np
import pandas as pd

In [2]:
data = pd.Series([0.25, 0.5, 0.75, 1],
                index=['a','b','c','d'])
data

a    0.25
b    0.50
c    0.75
d    1.00
dtype: float64

In [3]:
data['b']

0.5

In [4]:
'a' in data

True

In [5]:
data.keys()

Index(['a', 'b', 'c', 'd'], dtype='object')

In [6]:
list(data.items())

[('a', 0.25), ('b', 0.5), ('c', 0.75), ('d', 1.0)]

In [7]:
data['e'] = 1.25
data

a    0.25
b    0.50
c    0.75
d    1.00
e    1.25
dtype: float64

In [9]:
data['a':'c']

a    0.25
b    0.50
c    0.75
dtype: float64

In [10]:
data[0:2]

a    0.25
b    0.50
dtype: float64

In [11]:
data[(data>0.3) & (data<0.8)]

b    0.50
c    0.75
dtype: float64

In [12]:
data[['a','c']]

a    0.25
c    0.75
dtype: float64

### Indexers: loc, iloc and ix

In [13]:
data = pd.Series(['a','b','c'],
                index=[1,2,3])
data

1    a
2    b
3    c
dtype: object

In [14]:
data[1]

'a'

In [15]:
data[1:3]

2    b
3    c
dtype: object

In [16]:
data.loc[1]

'a'

In [17]:
data.loc[1:3]

1    a
2    b
3    c
dtype: object

In [18]:
data.iloc[1]

'b'

In [19]:
data.iloc[1:3]

2    b
3    c
dtype: object

# Data Selection in DataFrame

In [21]:
pop = pd.Series({'Ha Noi':8053663,
                   'Thanh Hoa': 3640128,
                   'Nghe An': 3327791,
                   'Da Nang': 1134310,
                   'Hue': 1128620})

area = pd.Series({'Ha Noi': 3358.9,
             'Thanh Hoa': 11130.2,
             'Nghe An': 16493.7,
             'Da Nang': 1284.9,
             'Hue': 5033.2})

In [22]:
data = pd.DataFrame({'area':area, 'pop':pop})

In [23]:
data

Unnamed: 0,area,pop
Ha Noi,3358.9,8053663
Thanh Hoa,11130.2,3640128
Nghe An,16493.7,3327791
Da Nang,1284.9,1134310
Hue,5033.2,1128620


In [24]:
data['area']

Ha Noi        3358.9
Thanh Hoa    11130.2
Nghe An      16493.7
Da Nang       1284.9
Hue           5033.2
Name: area, dtype: float64

In [25]:
data.area

Ha Noi        3358.9
Thanh Hoa    11130.2
Nghe An      16493.7
Da Nang       1284.9
Hue           5033.2
Name: area, dtype: float64

In [26]:
data.area is data['area']

True

In [27]:
data.pop is data['pop']

False

In [28]:
data['density'] = data['pop'] / data['area']
data

Unnamed: 0,area,pop,density
Ha Noi,3358.9,8053663,2397.708476
Thanh Hoa,11130.2,3640128,327.049649
Nghe An,16493.7,3327791,201.761339
Da Nang,1284.9,1134310,882.800218
Hue,5033.2,1128620,224.235079


In [29]:
data.values

array([[3.35890000e+03, 8.05366300e+06, 2.39770848e+03],
       [1.11302000e+04, 3.64012800e+06, 3.27049649e+02],
       [1.64937000e+04, 3.32779100e+06, 2.01761339e+02],
       [1.28490000e+03, 1.13431000e+06, 8.82800218e+02],
       [5.03320000e+03, 1.12862000e+06, 2.24235079e+02]])

In [30]:
data.T

Unnamed: 0,Ha Noi,Thanh Hoa,Nghe An,Da Nang,Hue
area,3358.9,11130.2,16493.7,1284.9,5033.2
pop,8053663.0,3640128.0,3327791.0,1134310.0,1128620.0
density,2397.708,327.0496,201.7613,882.8002,224.2351


In [31]:
data.values[0]

array([3.35890000e+03, 8.05366300e+06, 2.39770848e+03])

In [32]:
data.iloc[:3,:2]

Unnamed: 0,area,pop
Ha Noi,3358.9,8053663
Thanh Hoa,11130.2,3640128
Nghe An,16493.7,3327791


In [34]:
data.loc[:'Da Nang', :'pop']

Unnamed: 0,area,pop
Ha Noi,3358.9,8053663
Thanh Hoa,11130.2,3640128
Nghe An,16493.7,3327791
Da Nang,1284.9,1134310


In [37]:
data.loc[data.density > 500, ['pop','density']]

Unnamed: 0,pop,density
Ha Noi,8053663,2397.708476
Da Nang,1134310,882.800218


In [38]:
data.iloc[0,2] = 99

In [39]:
data

Unnamed: 0,area,pop,density
Ha Noi,3358.9,8053663,99.0
Thanh Hoa,11130.2,3640128,327.049649
Nghe An,16493.7,3327791,201.761339
Da Nang,1284.9,1134310,882.800218
Hue,5033.2,1128620,224.235079
