In [1]:
import pandas as pd
import numpy as np

# SERIES

### Series-- 1-d array object which contains sequence of value and an associated array of data labels called index

In [2]:
ser = pd.Series(['a','b','c','d'])

In [3]:
ser

0    a
1    b
2    c
3    d
dtype: object

###  series with predefined index

In [4]:
ser1 = pd.Series([8,4,6,9], index= ['a','b','c','d'])

In [5]:
ser1

a    8
b    4
c    6
d    9
dtype: int64

###  selection of values with the help of index

In [6]:
ser1['a']

8

In [7]:
ser1[['a','d']]

a    8
d    9
dtype: int64

###  filtering with boolean array

In [8]:
ser1[ser1>3]

a    8
b    4
c    6
d    9
dtype: int64

In [9]:
ser1[ser1<=8]

a    8
b    4
c    6
dtype: int64

###  Scalar Multiplication

In [10]:
ser1*2

a    16
b     8
c    12
d    18
dtype: int64

###  Mathematical Function implication on series

In [11]:
np.exp(ser1)

a    2980.957987
b      54.598150
c     403.428793
d    8103.083928
dtype: float64

In [12]:
np.log(ser1)

a    2.079442
b    1.386294
c    1.791759
d    2.197225
dtype: float64

###  value check with the help of in operatorm

In [13]:
'b' in ser1

True

In [14]:
'e' in ser1

False

In [15]:
8 in ser1   # compatible with index value

False

In [16]:
2 in ser   # as this series has numeric index value

True

###  Creating Series with Dictionary

In [17]:
data = {'Name': 'Raj', 'key2': 20}

In [18]:
ser2 = pd.Series(data)

In [19]:
ser2

Name    Raj
key2     20
dtype: object

###  overwriting of index in existing series

In [20]:
student=['Name','Age','Qual']   # one or more values should be identical

In [21]:
ser2= pd.Series(data, index=student)

In [22]:
ser2

Name    Raj
Age     NaN
Qual    NaN
dtype: object

###  isnull() and notnull()

In [23]:
pd.isnull(ser2)

Name    False
Age      True
Qual     True
dtype: bool

In [24]:
pd.notnull(ser2)

Name     True
Age     False
Qual    False
dtype: bool

###  arithmatic operation

In [25]:
sdata = {'Hyd': 35000, 'Tel': 71000, 'Bih': 16000, 'Up': 5000}

In [26]:
obj = pd.Series(sdata)

In [27]:
obj

Hyd    35000
Tel    71000
Bih    16000
Up      5000
dtype: int64

In [28]:
states = ['WB', 'Hyd', 'Blore', 'Tel']

In [29]:
obj1 = pd.Series(sdata, index=states)

In [30]:
obj1

WB           NaN
Hyd      35000.0
Blore        NaN
Tel      71000.0
dtype: float64

In [31]:
obj+obj1

Bih           NaN
Blore         NaN
Hyd       70000.0
Tel      142000.0
Up            NaN
WB            NaN
dtype: float64

###  assigning name to series

In [32]:
obj1.name= 'state_pop'

In [33]:
obj1

WB           NaN
Hyd      35000.0
Blore        NaN
Tel      71000.0
Name: state_pop, dtype: float64

###  assigning name to index

In [34]:
obj1.index.name = 'States'

In [35]:
obj1

States
WB           NaN
Hyd      35000.0
Blore        NaN
Tel      71000.0
Name: state_pop, dtype: float64

###  index alteration by assignment

In [36]:
obj1.index = ['Bihar','Punjab','UP','MP']

In [37]:
obj1

Bihar         NaN
Punjab    35000.0
UP            NaN
MP        71000.0
Name: state_pop, dtype: float64

###  Droping entries from axis in Series

In [38]:
obj2= pd.Series(np.arange(6), index=list('abcdef'))

In [39]:
obj2

a    0
b    1
c    2
d    3
e    4
f    5
dtype: int32

In [40]:
new_obj = obj2.drop('b')

In [41]:
new_obj

a    0
c    2
d    3
e    4
f    5
dtype: int32

In [42]:
obj2.drop(['d','f'])

a    0
b    1
c    2
e    4
dtype: int32

In [43]:
obj2

a    0
b    1
c    2
d    3
e    4
f    5
dtype: int32

###  Indexing, Slicing and Filtering with Series

In [44]:
obj2['e']  #indexing

4

In [45]:
obj2[2:4]  # Slicing

c    2
d    3
dtype: int32

In [46]:
obj2[['b','e']]   #indexing

b    1
e    4
dtype: int32

In [47]:
obj2[[1,3]]     #indexing

b    1
d    3
dtype: int32

In [48]:
obj2['b':'e']   # Slicing with labels

b    1
c    2
d    3
e    4
dtype: int32

In [49]:
obj2[obj2<3]   #  Filtering

a    0
b    1
c    2
dtype: int32

###  modifying values

In [50]:
obj2['b':'c'] = 5

In [51]:
obj2

a    0
b    5
c    5
d    3
e    4
f    5
dtype: int32

# DataFrame

## Dataframe -- a group of series, where each series contains a column of data

### Dataframe from dictionary

In [52]:
data = {'name':['Raj','Mohan','Ramesh','Pinky'],
         'age':[20,21,25,19],
          'qual':['Grad','Engg','BSc','Bcom']}

In [53]:
df = pd.DataFrame(data)

In [54]:
df

Unnamed: 0,name,age,qual
0,Raj,20,Grad
1,Mohan,21,Engg
2,Ramesh,25,BSc
3,Pinky,19,Bcom


###  manipulation over column position

In [55]:
pd.DataFrame(data, columns=['age','name','qual'])

Unnamed: 0,age,name,qual
0,20,Raj,Grad
1,21,Mohan,Engg
2,25,Ramesh,BSc
3,19,Pinky,Bcom


In [56]:
df

Unnamed: 0,name,age,qual
0,Raj,20,Grad
1,Mohan,21,Engg
2,Ramesh,25,BSc
3,Pinky,19,Bcom


In [57]:
df1= pd.DataFrame(data, columns=['age','name','qual'])

In [58]:
df1

Unnamed: 0,age,name,qual
0,20,Raj,Grad
1,21,Mohan,Engg
2,25,Ramesh,BSc
3,19,Pinky,Bcom


### if you pass column name more than the value in dictionry

In [59]:
df2 = pd.DataFrame(data, columns=['name','age','qual','married'])

In [60]:
df2

Unnamed: 0,name,age,qual,married
0,Raj,20,Grad,
1,Mohan,21,Engg,
2,Ramesh,25,BSc,
3,Pinky,19,Bcom,


###  to check columns in dataframe

In [61]:
df2.columns

Index(['name', 'age', 'qual', 'married'], dtype='object')

###  to retrive column attribute

In [62]:
df2['age']    #index method

0    20
1    21
2    25
3    19
Name: age, dtype: int64

In [63]:
df2.age   # direct method

0    20
1    21
2    25
3    19
Name: age, dtype: int64

###  based on label i.e loc

In [64]:
df2.loc[1]  #index value is provided here

name       Mohan
age           21
qual        Engg
married      NaN
Name: 1, dtype: object

In [65]:
df2.loc[:,['age','name']]  # all rows along with age and name column

Unnamed: 0,age,name
0,20,Raj
1,21,Mohan
2,25,Ramesh
3,19,Pinky


In [66]:
df2.loc[:,'age']

0    20
1    21
2    25
3    19
Name: age, dtype: int64

In [67]:
df2.loc[[1,3]]

Unnamed: 0,name,age,qual,married
1,Mohan,21,Engg,
3,Pinky,19,Bcom,


In [68]:
df.loc[[1,3],['age','name']]

Unnamed: 0,age,name
1,21,Mohan
3,19,Pinky


###  based on position i.e iloc

In [69]:
df2.iloc[0:2,1:3]

Unnamed: 0,age,qual
0,20,Grad
1,21,Engg


In [70]:
df2.iloc[:,[3,1]]

Unnamed: 0,married,age
0,,20
1,,21
2,,25
3,,19


In [71]:
df2.iloc[[1,3],:]

Unnamed: 0,name,age,qual,married
1,Mohan,21,Engg,
3,Pinky,19,Bcom,


In [72]:
df2.iloc[[0,2],[0,3]]

Unnamed: 0,name,married
0,Raj,
2,Ramesh,


###  column modification with assignment

In [73]:
df2['married'] = 'No'

In [74]:
df2

Unnamed: 0,name,age,qual,married
0,Raj,20,Grad,No
1,Mohan,21,Engg,No
2,Ramesh,25,BSc,No
3,Pinky,19,Bcom,No


In [75]:
df2['income']= 200     

In [76]:
df2

Unnamed: 0,name,age,qual,married,income
0,Raj,20,Grad,No,200
1,Mohan,21,Engg,No,200
2,Ramesh,25,BSc,No,200
3,Pinky,19,Bcom,No,200


In [77]:
df2['profession']=['Intern','Student','Student','Intern']

In [78]:
df2

Unnamed: 0,name,age,qual,married,income,profession
0,Raj,20,Grad,No,200,Intern
1,Mohan,21,Engg,No,200,Student
2,Ramesh,25,BSc,No,200,Student
3,Pinky,19,Bcom,No,200,Intern


###  deletion of column from dataframe

In [79]:
del df2['income']

In [80]:
df2

Unnamed: 0,name,age,qual,married,profession
0,Raj,20,Grad,No,Intern
1,Mohan,21,Engg,No,Student
2,Ramesh,25,BSc,No,Student
3,Pinky,19,Bcom,No,Intern


###  Dictionary Creation with nested Dictionary

In [81]:
data = {'Pune':{2001:4, 2002:9},
         'Hyd':{2000:5,2001:7,2002:3}}

In [82]:
df3 = pd.DataFrame(data)   #outer key-- column  #inner key--index

In [83]:
df3

Unnamed: 0,Pune,Hyd
2001,4.0,7
2002,9.0,3
2000,,5


###  Transpose

In [84]:
df3.T

Unnamed: 0,2001,2002,2000
Pune,4.0,9.0,
Hyd,7.0,3.0,5.0


###  Index name

In [85]:
df3.index.name = 'Year'

###  Column Name

In [86]:
df3.columns.name = 'City'

In [87]:
df3

City,Pune,Hyd
Year,Unnamed: 1_level_1,Unnamed: 2_level_1
2001,4.0,7
2002,9.0,3
2000,,5


###  to check  values

In [88]:
df3.values

array([[ 4.,  7.],
       [ 9.,  3.],
       [nan,  5.]])

In [89]:
df3['Pune'].unique

<bound method Series.unique of Year
2001    4.0
2002    9.0
2000    NaN
Name: Pune, dtype: float64>

In [90]:
df3['Hyd'].unique()

array([7, 3, 5], dtype=int64)

# Reindexing

## Reindexing-- create a new object with the data conformed(adoption) to a new index

###  Reindexing with Series

In [91]:
a = pd.Series([1,2,3,4], index= list('abcd'))

In [92]:
a_rindex = a.reindex(list('acdeb'))

In [93]:
a

a    1
b    2
c    3
d    4
dtype: int64

In [94]:
a_rindex

a    1.0
c    3.0
d    4.0
e    NaN
b    2.0
dtype: float64

 ## for ordered data, like time series, we can do interpolation i.e filling of values when reindeing.
 ## we can do this by method -- ffill and bfill

In [95]:
colour = pd.Series(['red','blue','green'], index=[0,2,4])

In [96]:
colour1 = pd.Series(['red','blue','green'], index=[0,2,4])

In [97]:
colour

0      red
2     blue
4    green
dtype: object

###  ffill

In [98]:
colour = colour.reindex(range(6),method='ffill')

In [99]:
colour

0      red
1      red
2     blue
3     blue
4    green
5    green
dtype: object

In [100]:
colour1 = colour1.reindex(range(6), method='bfill')

In [101]:
colour1

0      red
1     blue
2     blue
3    green
4    green
5      NaN
dtype: object

###  Reindexing with DataFrame

###  in dataframe reindexing can alter either index or column or both

In [102]:
df = pd.DataFrame(np.arange(9).reshape(3,3), index= list('acb'),columns= ['Hyd','Pune','Blore'])

In [103]:
df

Unnamed: 0,Hyd,Pune,Blore
a,0,1,2
c,3,4,5
b,6,7,8


In [104]:
df1= df.reindex(list('abdc'))  # when only one sequence is pass it reindex the row/index

In [105]:
df1

Unnamed: 0,Hyd,Pune,Blore
a,0.0,1.0,2.0
b,6.0,7.0,8.0
d,,,
c,3.0,4.0,5.0


In [106]:
city = ['Hyd','Mumbai','Pune']

In [107]:
df2 = df.reindex(columns=city)   # columns can be reindex with columns keyword

In [108]:
df2

Unnamed: 0,Hyd,Mumbai,Pune
a,0,,1
c,3,,4
b,6,,7


In [109]:
df3= df.reindex(list('abdc'), columns=city)

In [110]:
df3

Unnamed: 0,Hyd,Mumbai,Pune
a,0.0,,1.0
b,6.0,,7.0
d,,,
c,3.0,,4.0


###  Droping entries from Axis with DataFrame

In [114]:
df = pd.DataFrame(np.arange(16).reshape(4,4), index= list('abcd'), columns = ['Hyd','Pune','Blore','Kolkata'])

In [115]:
df

Unnamed: 0,Hyd,Pune,Blore,Kolkata
a,0,1,2,3
b,4,5,6,7
c,8,9,10,11
d,12,13,14,15


In [116]:
df.drop('c')    # dropping by index value

Unnamed: 0,Hyd,Pune,Blore,Kolkata
a,0,1,2,3
b,4,5,6,7
d,12,13,14,15


In [117]:
df.drop('Blore', axis =1)    # dropping by columns

Unnamed: 0,Hyd,Pune,Kolkata
a,0,1,3
b,4,5,7
c,8,9,11
d,12,13,15


In [118]:
df

Unnamed: 0,Hyd,Pune,Blore,Kolkata
a,0,1,2,3
b,4,5,6,7
c,8,9,10,11
d,12,13,14,15


In [119]:
df.drop('Blore', axis =1, inplace = True)

In [120]:
df

Unnamed: 0,Hyd,Pune,Kolkata
a,0,1,3
b,4,5,7
c,8,9,11
d,12,13,15


###  Indexing, Selection and Filtering with Dataframe

In [121]:
data = pd.DataFrame(np.arange(16).reshape(4,4), index = list('abcd'), columns = ['one','two','three','four'])

In [122]:
data

Unnamed: 0,one,two,three,four
a,0,1,2,3
b,4,5,6,7
c,8,9,10,11
d,12,13,14,15


In [123]:
data['two']     #indexing

a     1
b     5
c     9
d    13
Name: two, dtype: int32

In [124]:
data[:2]     #slicing

Unnamed: 0,one,two,three,four
a,0,1,2,3
b,4,5,6,7


In [125]:
data[data['three']>5]     # Filtering

Unnamed: 0,one,two,three,four
b,4,5,6,7
c,8,9,10,11
d,12,13,14,15


In [126]:
data <5     #Boolean DataFrame

Unnamed: 0,one,two,three,four
a,True,True,True,True
b,True,False,False,False
c,False,False,False,False
d,False,False,False,False


In [127]:
data[data<5] = 0     #assinging values with boolean 

In [128]:
data

Unnamed: 0,one,two,three,four
a,0,0,0,0
b,0,5,6,7
c,8,9,10,11
d,12,13,14,15


# Integer Index

In [137]:
ser = pd.Series(np.arange(4))

In [138]:
ser

0    0
1    1
2    2
3    3
dtype: int32

In [139]:
ser[1]

1

In [141]:
ser[:2]

0    0
1    1
dtype: int32

In [142]:
ser.loc[:2]

0    0
1    1
2    2
dtype: int32

In [143]:
ser.iloc[-1]

3

In [144]:
ser.iloc[-3:-1]

1    1
2    2
dtype: int32

In [145]:
ser2 = pd.Series(np.arange(4), index=list('abcd'))

In [146]:
ser2[-1]

3

# Arithmetic and Data Alignment

###  with Series

In [147]:
s1 = pd.Series(np.arange(3), index = list('abc'))
s2 = pd.Series(np.arange(5), index = ['a','c','e','f','g'])

In [148]:
s1

a    0
b    1
c    2
dtype: int32

In [149]:
s2

a    0
c    1
e    2
f    3
g    4
dtype: int32

In [150]:
s1+s2

a    0.0
b    NaN
c    3.0
e    NaN
f    NaN
g    NaN
dtype: float64

###  with dataframe

In [151]:
df1 = pd.DataFrame(np.arange(9).reshape(3,3), columns = list('bcd'), index = ['one','two','three'])
df2 = pd.DataFrame(np.arange(12).reshape(4,3), columns = list('bde'), index = ['one','two','three','four'])

In [152]:
df1

Unnamed: 0,b,c,d
one,0,1,2
two,3,4,5
three,6,7,8


In [153]:
df2

Unnamed: 0,b,d,e
one,0,1,2
two,3,4,5
three,6,7,8
four,9,10,11


In [154]:
df1+df2

Unnamed: 0,b,c,d,e
four,,,,
one,0.0,,3.0,
three,12.0,,15.0,
two,6.0,,9.0,


In [155]:
df3 = pd.DataFrame({'A':[1,2]})
df4 = pd.DataFrame({'B':[3,4]})

In [156]:
df3

Unnamed: 0,A
0,1
1,2


In [157]:
df4

Unnamed: 0,B
0,3
1,4


In [158]:
df3+df4

Unnamed: 0,A,B
0,,
1,,


###  Arithmetic method with fill value

In [159]:
df5 = pd.DataFrame(np.arange(12).reshape(3,4), columns = list('bcde'))
df6 = pd.DataFrame(np.arange(20).reshape(4,5), columns = list('bdefc'))

In [160]:
df5

Unnamed: 0,b,c,d,e
0,0,1,2,3
1,4,5,6,7
2,8,9,10,11


In [161]:
df6.loc[1,'b'] = np.nan

In [162]:
df6

Unnamed: 0,b,d,e,f,c
0,0.0,1,2,3,4
1,,6,7,8,9
2,10.0,11,12,13,14
3,15.0,16,17,18,19


In [163]:
df5+df6

Unnamed: 0,b,c,d,e,f
0,0.0,5.0,3.0,5.0,
1,,14.0,12.0,14.0,
2,18.0,23.0,21.0,23.0,
3,,,,,


In [164]:
df5.add(df6, fill_value=0)

Unnamed: 0,b,c,d,e,f
0,0.0,5.0,3.0,5.0,3.0
1,4.0,14.0,12.0,14.0,8.0
2,18.0,23.0,21.0,23.0,13.0
3,15.0,19.0,16.0,17.0,18.0


# Operation Between DataFrame and Series

In [165]:
df6 = pd.DataFrame(np.arange(12).reshape(4,3), columns= list('abc'), index = ['one','two','three','four'])

In [166]:
df6

Unnamed: 0,a,b,c
one,0,1,2
two,3,4,5
three,6,7,8
four,9,10,11


In [167]:
series = df6.iloc[0]    # complete one row value

In [168]:
series

a    0
b    1
c    2
Name: one, dtype: int32

In [169]:
df6-series   # broadcasting down the rows as index value matches

Unnamed: 0,a,b,c
one,0,0,0
two,3,3,3
three,6,6,6
four,9,9,9


In [170]:
series2 = pd.Series(range(3), index=list('afc'))  # if index value of series and column value of dataframe didn't match, object will be reindexed to form union

In [171]:
df6+series2

Unnamed: 0,a,b,c,f
one,0.0,,4.0,
two,3.0,,7.0,
three,6.0,,10.0,
four,9.0,,13.0,


###  Broadcasting over Column

In [172]:
series3 = df6['b']

In [173]:
series3    # complete one column value

one       1
two       4
three     7
four     10
Name: b, dtype: int32

In [174]:
df6.sub(series3, axis=0)

Unnamed: 0,a,b,c
one,-1,0,1
two,-1,0,1
three,-1,0,1
four,-1,0,1


# Function Application and Mapping

In [175]:
df7 = pd.DataFrame(np.random.randn(4,3), columns=list('bde'), index = ['one','two','three','four'])

In [176]:
df7

Unnamed: 0,b,d,e
one,-0.902534,-1.210747,0.028141
two,0.795487,1.127444,0.170395
three,0.047141,0.148038,-0.024307
four,1.252542,-0.753411,-1.68562


In [177]:
cols = ['b', 'd' , 'e']

In [178]:
df7[cols] = df7[cols].applymap(np.int64)


In [179]:
df7

Unnamed: 0,b,d,e
one,0,-1,0
two,0,1,0
three,0,0,0
four,1,0,-1


In [180]:
df7['b'] = df7.b.astype(float)

In [181]:
df7

Unnamed: 0,b,d,e
one,0.0,-1,0
two,0.0,1,0
three,0.0,0,0
four,1.0,0,-1


###  Absolute Function

In [182]:
np.abs(df7)

Unnamed: 0,b,d,e
one,0.0,1.0,0.0
two,0.0,1.0,0.0
three,0.0,0.0,0.0
four,1.0,0.0,1.0


###  applying function on one dimensional array to each columns or row

In [183]:
f= lambda x: x.max()-x.min()

In [184]:
df7.apply(f)

b    1.0
d    2.0
e    1.0
dtype: float64

In [185]:
df7.apply(f, axis= 1)

one      1.0
two      1.0
three    0.0
four     2.0
dtype: float64

In [186]:
def f(x):
    return pd.Series([x.min(),x.max()], index = ['min','max'])
df7.apply(f)                                                      # Series with multiple values

Unnamed: 0,b,d,e
min,0.0,-1,-1
max,1.0,1,0


###  Element wise Python function


In [191]:
d= lambda x: '%.2f'%x

In [192]:
df7.applymap(d)

Unnamed: 0,b,d,e
one,0.0,-1.0,0.0
two,0.0,1.0,0.0
three,0.0,0.0,0.0
four,1.0,0.0,-1.0


In [193]:
df7['e'].map(d)

one       0.00
two       0.00
three     0.00
four     -1.00
Name: e, dtype: object

In [194]:
def add_2(val):
    return val+2
df7.apply(add_2)

Unnamed: 0,b,d,e
one,2.0,1,2
two,2.0,3,2
three,2.0,2,2
four,3.0,2,1


# Sorting and Ranking

###  sorting series with index

In [195]:
obj = pd.Series( range(4), index=list('bdac'))

In [196]:
obj

b    0
d    1
a    2
c    3
dtype: int64

In [197]:
obj.sort_index()

a    2
b    0
c    3
d    1
dtype: int64

###  sorting series with values

In [198]:
obj1 = pd.Series([4,-8,7,-1])

In [199]:
obj1.sort_values()

1   -8
3   -1
0    4
2    7
dtype: int64

In [200]:
obj2 = pd.Series([4,-8,7,np.nan,-1, np.nan])

In [201]:
obj2.sort_values()

1   -8.0
4   -1.0
0    4.0
2    7.0
3    NaN
5    NaN
dtype: float64

###  Ranking in series

In [202]:
obj3 = pd.Series([7, -5, 7, 4, 2, 0, 4])

In [203]:
obj3.rank()

0    6.5
1    1.0
2    6.5
3    4.5
4    3.0
5    2.0
6    4.5
dtype: float64

In [204]:
obj3.rank(method = 'first')

0    6.0
1    1.0
2    7.0
3    4.0
4    3.0
5    2.0
6    5.0
dtype: float64

###  sorting dataframe with index

In [205]:
df8 = pd.DataFrame(np.arange(16).reshape(4,4), index = ['two','four','one','three'], columns = list('dabc'))

In [206]:
df8

Unnamed: 0,d,a,b,c
two,0,1,2,3
four,4,5,6,7
one,8,9,10,11
three,12,13,14,15


In [207]:
df8.sort_index()

Unnamed: 0,d,a,b,c
four,4,5,6,7
one,8,9,10,11
three,12,13,14,15
two,0,1,2,3


In [208]:
df8.sort_index(axis=1)

Unnamed: 0,a,b,c,d
two,1,2,3,0
four,5,6,7,4
one,9,10,11,8
three,13,14,15,12


 ### sorting dataframe with values

In [209]:
df9 = pd.DataFrame({'b':[4,3],'a':[-9,11]})

In [210]:
df9

Unnamed: 0,b,a
0,4,-9
1,3,11


In [211]:
df9.sort_values(by='b')

Unnamed: 0,b,a
1,3,11
0,4,-9


In [212]:
df9.sort_values(by=['a','b'])

Unnamed: 0,b,a
0,4,-9
1,3,11


In [213]:
df9.sort_values(by=['b','a'])

Unnamed: 0,b,a
1,3,11
0,4,-9


###  Ranking in Dataframe

In [214]:
data = pd.DataFrame({'b': [4.3, 7, -3, 2], 'a': [0, 1, 0, 1], 'c': [-2, 5, 8, -2.5]})

In [215]:
data

Unnamed: 0,b,a,c
0,4.3,0,-2.0
1,7.0,1,5.0
2,-3.0,0,8.0
3,2.0,1,-2.5


In [216]:
data.rank(axis=1)

Unnamed: 0,b,a,c
0,3.0,2.0,1.0
1,3.0,1.0,2.0
2,1.0,2.0,3.0
3,3.0,2.0,1.0


In [217]:
data.rank()

Unnamed: 0,b,a,c
0,3.0,1.5,2.0
1,4.0,3.5,3.0
2,1.0,1.5,4.0
3,2.0,3.5,1.0


In [218]:
data.rank(method='first', axis = 1)

Unnamed: 0,b,a,c
0,3.0,2.0,1.0
1,3.0,1.0,2.0
2,1.0,2.0,3.0
3,3.0,2.0,1.0


# Summarizing and Computing Descriptive Statistics

In [219]:
df = pd.DataFrame([[1.4,np.nan],[7.1,-4.5],[np.nan,np.nan],[0.75,-1.3]], index = list('abcd'), columns= ['one','two'])

In [220]:
df

Unnamed: 0,one,two
a,1.4,
b,7.1,-4.5
c,,
d,0.75,-1.3


In [221]:
df.sum()

one    9.25
two   -5.80
dtype: float64

In [222]:
df.sum(axis=1)

a    1.40
b    2.60
c    0.00
d   -0.55
dtype: float64

In [223]:
df.mean(axis=1)

a    1.400
b    1.300
c      NaN
d   -0.275
dtype: float64

In [224]:
df.idxmax()

one    b
two    d
dtype: object

In [225]:
df.idxmin()

one    d
two    b
dtype: object

In [226]:
df.describe()

Unnamed: 0,one,two
count,3.0,2.0
mean,3.083333,-2.9
std,3.493685,2.262742
min,0.75,-4.5
25%,1.075,-3.7
50%,1.4,-2.9
75%,4.25,-2.1
max,7.1,-1.3


# Membership

### isin -- it performs a vectorize set membership check, useful in filtering a database down to a subject of values in a Series
### or columns in DataFrame

In [227]:
obj = pd.Series(['c','a','d','a','a','b','b','c','c'])

In [228]:
mask = obj.isin(['b','c'])

In [229]:
obj[mask]

0    c
5    b
6    b
7    c
8    c
dtype: object

In [230]:
data1 = pd.DataFrame({'Qu1': [1, 3, 4, 3, 4],'Qu2': [2, 3, 1, 2, 3],'Qu3': [1, 5, 2, 4, 4]}, index = list('abcde'))

In [231]:
data1

Unnamed: 0,Qu1,Qu2,Qu3
a,1,2,1
b,3,3,5
c,4,1,2
d,3,2,4
e,4,3,4


In [232]:
mask = data1.isin([3,2])

In [233]:
data1[mask]

Unnamed: 0,Qu1,Qu2,Qu3
a,,2.0,
b,3.0,3.0,
c,,,2.0
d,3.0,2.0,
e,,3.0,


In [234]:
mask1 = data1.isin([4])

In [235]:
data1[mask1]

Unnamed: 0,Qu1,Qu2,Qu3
a,,,
b,,,
c,4.0,,
d,,,4.0
e,4.0,,4.0
