In [2]:
import pandas as pd
import numpy as np

In [3]:
df = pd.DataFrame(np.random.randn(5,3), 
                  index=['a', 'c', 'e', 'f', 'h'],
                  columns=['one', 'two', 'three'])

In [4]:
df['four'] = 'bar'

In [5]:
df['five'] = df['one'] > 0

In [6]:
df

Unnamed: 0,one,two,three,four,five
a,0.060292,-0.9276,0.853603,bar,True
c,0.560099,-0.346787,0.142385,bar,True
e,0.455387,-1.094195,-1.110477,bar,True
f,-1.044906,0.183499,-1.186992,bar,False
h,0.229114,0.572187,-0.196487,bar,True


In [7]:
df2 = df.reindex(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'])

In [8]:
df2

Unnamed: 0,one,two,three,four,five
a,0.060292,-0.9276,0.853603,bar,True
b,,,,,
c,0.560099,-0.346787,0.142385,bar,True
d,,,,,
e,0.455387,-1.094195,-1.110477,bar,True
f,-1.044906,0.183499,-1.186992,bar,False
g,,,,,
h,0.229114,0.572187,-0.196487,bar,True


In [9]:
df2['one']

a    0.060292
b         NaN
c    0.560099
d         NaN
e    0.455387
f   -1.044906
g         NaN
h    0.229114
Name: one, dtype: float64

In [10]:
pd.isnull(df2['one'])

a    False
b     True
c    False
d     True
e    False
f    False
g     True
h    False
Name: one, dtype: bool

In [11]:
df2['four'].notnull()

a     True
b    False
c     True
d    False
e     True
f     True
g    False
h     True
Name: four, dtype: bool

In [12]:
df2.isnull()

Unnamed: 0,one,two,three,four,five
a,False,False,False,False,False
b,True,True,True,True,True
c,False,False,False,False,False
d,True,True,True,True,True
e,False,False,False,False,False
f,False,False,False,False,False
g,True,True,True,True,True
h,False,False,False,False,False


In [16]:
df2 = df.copy()

In [17]:
df2['timestamp'] = pd.Timestamp('20160601')

In [18]:
df2

Unnamed: 0,one,two,three,four,five,timestamp
a,0.060292,-0.9276,0.853603,bar,True,2016-06-01
c,0.560099,-0.346787,0.142385,bar,True,2016-06-01
e,0.455387,-1.094195,-1.110477,bar,True,2016-06-01
f,-1.044906,0.183499,-1.186992,bar,False,2016-06-01
h,0.229114,0.572187,-0.196487,bar,True,2016-06-01


In [19]:
df2.ix[['a', 'c', 'h'], ['one', 'timestamp']] = np.nan

In [20]:
df2

Unnamed: 0,one,two,three,four,five,timestamp
a,,-0.9276,0.853603,bar,True,NaT
c,,-0.346787,0.142385,bar,True,NaT
e,0.455387,-1.094195,-1.110477,bar,True,2016-06-01
f,-1.044906,0.183499,-1.186992,bar,False,2016-06-01
h,,0.572187,-0.196487,bar,True,NaT


In [21]:
df2.get_dtype_counts()

bool              1
datetime64[ns]    1
float64           3
object            1
dtype: int64

In [22]:
s = pd.Series([1,2,3])

In [23]:
s.loc[0] = None

In [24]:
s

0    NaN
1    2.0
2    3.0
dtype: float64

In [25]:
s = pd.Series(["a", "b", "c"])

In [26]:
s.loc[0] = None

In [27]:
s.loc[1] = np.nan

In [28]:
s

0    None
1     NaN
2       c
dtype: object

In [30]:
a = df.copy()

In [32]:
df['one'].sum()

0.25998575831198156

In [33]:
df.mean(1)

a    0.246574
c    0.338924
e   -0.187321
f   -0.512100
h    0.401204
dtype: float64

In [34]:
df.cumsum()

Unnamed: 0,one,two,three,four,five
a,0.060292,-0.9276,0.853603,bar,True
c,0.620391,-1.27439,0.995988,barbar,2
e,1.07578,-2.36858,-0.114489,barbarbar,3
f,0.0308716,-2.18508,-1.30148,barbarbarbar,3
h,0.259986,-1.6129,-1.49797,barbarbarbarbar,4


In [35]:
df.groupby('one').mean()

Unnamed: 0_level_0,two,three,five
one,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
-1.044906,0.183499,-1.186992,False
0.060292,-0.9276,0.853603,True
0.229114,0.572187,-0.196487,True
0.455387,-1.094195,-1.110477,True
0.560099,-0.346787,0.142385,True


In [36]:
df2

Unnamed: 0,one,two,three,four,five,timestamp
a,,-0.9276,0.853603,bar,True,NaT
c,,-0.346787,0.142385,bar,True,NaT
e,0.455387,-1.094195,-1.110477,bar,True,2016-06-01
f,-1.044906,0.183499,-1.186992,bar,False,2016-06-01
h,,0.572187,-0.196487,bar,True,NaT


In [37]:
df2.fillna(0)

Unnamed: 0,one,two,three,four,five,timestamp
a,0.0,-0.9276,0.853603,bar,True,1970-01-01
c,0.0,-0.346787,0.142385,bar,True,1970-01-01
e,0.455387,-1.094195,-1.110477,bar,True,2016-06-01
f,-1.044906,0.183499,-1.186992,bar,False,2016-06-01
h,0.0,0.572187,-0.196487,bar,True,1970-01-01


In [38]:
df2['four'].fillna('missing')

a    bar
c    bar
e    bar
f    bar
h    bar
Name: four, dtype: object

In [39]:
df

Unnamed: 0,one,two,three,four,five
a,0.060292,-0.9276,0.853603,bar,True
c,0.560099,-0.346787,0.142385,bar,True
e,0.455387,-1.094195,-1.110477,bar,True
f,-1.044906,0.183499,-1.186992,bar,False
h,0.229114,0.572187,-0.196487,bar,True


In [40]:
df.fillna(method='pad')

Unnamed: 0,one,two,three,four,five
a,0.060292,-0.9276,0.853603,bar,True
c,0.560099,-0.346787,0.142385,bar,True
e,0.455387,-1.094195,-1.110477,bar,True
f,-1.044906,0.183499,-1.186992,bar,False
h,0.229114,0.572187,-0.196487,bar,True


In [41]:
dff = pd.DataFrame(np.random.randn(10,3), columns=list('ABC'))

In [42]:
dff.iloc[3:5,0] = np.nan
dff.iloc[4:6,1] = np.nan
dff.iloc[5:8,2] = np.nan
dff

Unnamed: 0,A,B,C
0,1.271355,-1.150795,-0.069327
1,0.16325,-0.885262,0.646302
2,0.818627,0.167846,0.548558
3,,-1.889239,1.049861
4,,,0.171938
5,0.636075,,
6,1.555896,-1.308266,
7,0.749882,1.117618,
8,-1.129328,-0.706977,-1.772163
9,-0.57306,-0.872514,-0.793924


In [43]:
dff.fillna(dff.mean())

Unnamed: 0,A,B,C
0,1.271355,-1.150795,-0.069327
1,0.16325,-0.885262,0.646302
2,0.818627,0.167846,0.548558
3,0.436587,-1.889239,1.049861
4,0.436587,-0.690949,0.171938
5,0.636075,-0.690949,-0.031251
6,1.555896,-1.308266,-0.031251
7,0.749882,1.117618,-0.031251
8,-1.129328,-0.706977,-1.772163
9,-0.57306,-0.872514,-0.793924


In [44]:
dff.fillna(dff.mean()['B':'C'])

Unnamed: 0,A,B,C
0,1.271355,-1.150795,-0.069327
1,0.16325,-0.885262,0.646302
2,0.818627,0.167846,0.548558
3,,-1.889239,1.049861
4,,-0.690949,0.171938
5,0.636075,-0.690949,-0.031251
6,1.555896,-1.308266,-0.031251
7,0.749882,1.117618,-0.031251
8,-1.129328,-0.706977,-1.772163
9,-0.57306,-0.872514,-0.793924


In [45]:
dff.where(pd.notnull(dff), dff.mean(), axis='columns')

Unnamed: 0,A,B,C
0,1.271355,-1.150795,-0.069327
1,0.16325,-0.885262,0.646302
2,0.818627,0.167846,0.548558
3,0.436587,-1.889239,1.049861
4,0.436587,-0.690949,0.171938
5,0.636075,-0.690949,-0.031251
6,1.555896,-1.308266,-0.031251
7,0.749882,1.117618,-0.031251
8,-1.129328,-0.706977,-1.772163
9,-0.57306,-0.872514,-0.793924
