In [1]:
import numpy as np
import pandas as pd
from numpy.random import randn

## Drop Entry

In [2]:
ser1 = pd.Series(np.arange(3), index = ['a', 'b', 'c'])
ser1

a    0
b    1
c    2
dtype: int64

In [3]:
# drop row in the Series
ser1.drop('b')

a    0
c    2
dtype: int64

In [4]:
ser1

a    0
b    1
c    2
dtype: int64

In [5]:
temp_ser = ser1.drop('b')
temp_ser

a    0
c    2
dtype: int64

In [6]:
# same stuff for a DataFrame
dframe1 = pd.DataFrame(np.arange(9).reshape(3,3), index = ['SF', 'LA', 'NY'], columns = ['col1', 'col2', 'col3'])
dframe1

Unnamed: 0,col1,col2,col3
SF,0,1,2
LA,3,4,5
NY,6,7,8


In [7]:
dframe1.drop('LA')

Unnamed: 0,col1,col2,col3
SF,0,1,2
NY,6,7,8


In [8]:
dframe1

Unnamed: 0,col1,col2,col3
SF,0,1,2
LA,3,4,5
NY,6,7,8


In [9]:
# so basically if you want to drop a row you need to pass it on with another df, 
# because 'drop' won't affect your original df
dframe2 = dframe1.drop('LA')
dframe2

Unnamed: 0,col1,col2,col3
SF,0,1,2
NY,6,7,8


In [10]:
# same stuff with dropping a column
dframe1.drop('col2', axis=1) # axis = 0 is the row

Unnamed: 0,col1,col3
SF,0,2
LA,3,5
NY,6,8


In [11]:
dframe1

Unnamed: 0,col1,col2,col3
SF,0,1,2
LA,3,4,5
NY,6,7,8


## Selecting entries 

In [12]:
ser1 = ser1*2
ser1

a    0
b    2
c    4
dtype: int64

In [13]:
# select by index
ser1['b']

2

In [14]:
# select by numerical value of index
ser1[1]

2

In [15]:
ser1[0:3]

a    0
b    2
c    4
dtype: int64

In [16]:
# select with names
ser1[['a', 'b']]

a    0
b    2
dtype: int64

In [17]:
# select by 'logic'
ser1[ser1>3]

c    4
dtype: int64

In [18]:
# set value by logic
ser1[ser1>3] = 10
ser1

a     0
b     2
c    10
dtype: int64

## Select values in DataFrames 

In [19]:
dframe1

Unnamed: 0,col1,col2,col3
SF,0,1,2
LA,3,4,5
NY,6,7,8


In [20]:
# select by column name
dframe1['col1']

SF    0
LA    3
NY    6
Name: col1, dtype: int64

In [21]:
dframe1[['col1', 'col3']]

Unnamed: 0,col1,col3
SF,0,2
LA,3,5
NY,6,8


In [22]:
# select by logic
dframe1[dframe1['col3'] >= 5]

Unnamed: 0,col1,col2,col3
LA,3,4,5
NY,6,7,8


In [23]:
# boolean dataframe
dframe1 > 4

Unnamed: 0,col1,col2,col3
SF,False,False,False
LA,False,False,True
NY,True,True,True


In [24]:
# select by row index
dframe1.iloc[0]

col1    0
col2    1
col3    2
Name: SF, dtype: int64

In [25]:
# select by row name
dframe1.loc[['SF', 'LA']]

Unnamed: 0,col1,col2,col3
SF,0,1,2
LA,3,4,5


In [26]:
dframe1.loc[['SF']]

Unnamed: 0,col1,col2,col3
SF,0,1,2


## Data Alignment

In [27]:
ser1

a     0
b     2
c    10
dtype: int64

In [28]:
ser2 = pd.Series([3,4,5,6], index = ['a','b','c','d'])
ser2

a    3
b    4
c    5
d    6
dtype: int64

In [29]:
ser1 + ser2

a     3.0
b     6.0
c    15.0
d     NaN
dtype: float64

In [30]:
dframe1

Unnamed: 0,col1,col2,col3
SF,0,1,2
LA,3,4,5
NY,6,7,8


In [31]:
dframe2 = pd.DataFrame(np.arange(16).reshape(4,4), 
                       columns = ['col1', 'col2', 'col3', 'col4'], index =['SF', 'LA','BA', 'NY'])
dframe2

Unnamed: 0,col1,col2,col3,col4
SF,0,1,2,3
LA,4,5,6,7
BA,8,9,10,11
NY,12,13,14,15


In [32]:
dframe1 + dframe2

Unnamed: 0,col1,col2,col3,col4
BA,,,,
LA,7.0,9.0,11.0,
NY,18.0,20.0,22.0,
SF,0.0,2.0,4.0,


In [33]:
# replace the NaN values
dframe1.add(dframe2, fill_value = 0)

Unnamed: 0,col1,col2,col3,col4
BA,8.0,9.0,10.0,11.0
LA,7.0,9.0,11.0,7.0
NY,18.0,20.0,22.0,15.0
SF,0.0,2.0,4.0,3.0


In [34]:
# operations between a Series and a DataFrame
ser3 = dframe2.iloc[2]
ser3

col1     8
col2     9
col3    10
col4    11
Name: BA, dtype: int64

In [35]:
dframe2

Unnamed: 0,col1,col2,col3,col4
SF,0,1,2,3
LA,4,5,6,7
BA,8,9,10,11
NY,12,13,14,15


In [36]:
dframe2-ser3

Unnamed: 0,col1,col2,col3,col4
SF,-8,-8,-8,-8
LA,-4,-4,-4,-4
BA,0,0,0,0
NY,4,4,4,4


## Rank and Sort

In [37]:
ser1 = pd.Series(range(4), index = ['C', 'A', 'B', 'D'])
ser1

C    0
A    1
B    2
D    3
dtype: int64

In [38]:
# sort by index, does not change the actual series
ser1.sort_index()

A    1
B    2
C    0
D    3
dtype: int64

In [39]:
ser2 = ser1.sort_index()
ser2

A    1
B    2
C    0
D    3
dtype: int64

In [40]:
ser2.sort_values()

C    0
A    1
B    2
D    3
dtype: int64

In [41]:
ser2 = pd.Series(randn(10))
ser2

0    0.313538
1    1.222091
2   -1.109406
3   -1.103313
4   -0.540627
5    1.309283
6   -0.542026
7   -0.088496
8    1.445465
9   -2.045161
dtype: float64

In [42]:
ser2.sort_values()

9   -2.045161
2   -1.109406
3   -1.103313
6   -0.542026
4   -0.540627
7   -0.088496
0    0.313538
1    1.222091
5    1.309283
8    1.445465
dtype: float64

In [43]:
# ranking
ser2.rank()

0     7.0
1     8.0
2     2.0
3     3.0
4     5.0
5     9.0
6     4.0
7     6.0
8    10.0
9     1.0
dtype: float64

In [44]:
ser3 = pd.Series(randn(10))
ser3

0    1.309328
1   -0.835436
2    0.630514
3    2.114483
4    0.622227
5    0.599752
6   -0.666301
7    0.180932
8   -0.354425
9    1.662553
dtype: float64

In [45]:
ser3.rank()

0     8.0
1     1.0
2     7.0
3    10.0
4     6.0
5     5.0
6     2.0
7     4.0
8     3.0
9     9.0
dtype: float64

In [46]:
ser3 = ser3.sort_values()
ser3

1   -0.835436
6   -0.666301
8   -0.354425
7    0.180932
5    0.599752
4    0.622227
2    0.630514
0    1.309328
9    1.662553
3    2.114483
dtype: float64

In [47]:
ser3.rank()

1     1.0
6     2.0
8     3.0
7     4.0
5     5.0
4     6.0
2     7.0
0     8.0
9     9.0
3    10.0
dtype: float64