# Pandas - DataFrames Part -2

In [1]:
import numpy as np
import pandas as pd

In [2]:
np.random.seed(0)

In [15]:
df = pd.DataFrame(np.random.randn(5,4),['A','B','C','D','E'],['W','X','Y','Z'])

In [16]:
df

Unnamed: 0,W,X,Y,Z
A,-0.67246,-0.359553,-0.813146,-1.726283
B,0.177426,-0.401781,-1.630198,0.462782
C,-0.907298,0.051945,0.729091,0.128983
D,1.139401,-1.234826,0.402342,-0.68481
E,-0.870797,-0.57885,-0.311553,0.056165


### Conditional selection

In [17]:
df>0

Unnamed: 0,W,X,Y,Z
A,False,False,False,False
B,True,False,False,True
C,False,True,True,True
D,True,False,True,False
E,False,False,False,True


#### Passing  Conditional statements to a variable

In [18]:
booldf = df>0

In [19]:
booldf

Unnamed: 0,W,X,Y,Z
A,False,False,False,False
B,True,False,False,True
C,False,True,True,True
D,True,False,True,False
E,False,False,False,True


In [20]:
df[booldf]
# values which are true are represented to be true
# values which remains false are represented as NaN

Unnamed: 0,W,X,Y,Z
A,,,,
B,0.177426,,,0.462782
C,,0.051945,0.729091,0.128983
D,1.139401,,0.402342,
E,,,,0.056165


In [21]:
df[df>0]

Unnamed: 0,W,X,Y,Z
A,,,,
B,0.177426,,,0.462782
C,,0.051945,0.729091,0.128983
D,1.139401,,0.402342,
E,,,,0.056165


In [22]:
df['W']>0

A    False
B     True
C    False
D     True
E    False
Name: W, dtype: bool

In [23]:
df[df['W']>0]

Unnamed: 0,W,X,Y,Z
B,0.177426,-0.401781,-1.630198,0.462782
D,1.139401,-1.234826,0.402342,-0.68481


In [25]:
df[df['Z']<0]

Unnamed: 0,W,X,Y,Z
A,-0.67246,-0.359553,-0.813146,-1.726283
D,1.139401,-1.234826,0.402342,-0.68481


In [26]:
resultdf = df[df['W']>0]

In [27]:
resultdf

Unnamed: 0,W,X,Y,Z
B,0.177426,-0.401781,-1.630198,0.462782
D,1.139401,-1.234826,0.402342,-0.68481


In [28]:
resultdf['W']

B    0.177426
D    1.139401
Name: W, dtype: float64

In [29]:
df[df['W']>0]['X']

B   -0.401781
D   -1.234826
Name: X, dtype: float64

In [30]:
df[df['W']>0][['Y','X']]

Unnamed: 0,Y,X
B,-1.630198,-0.401781
D,0.402342,-1.234826


In [32]:
boolser = df['W']>0
result = df[boolser]
mycols = ['Y','X']
result[mycols]
# only disadvantage in multiple steps: you are taking up memory which each variable you define

Unnamed: 0,Y,X
B,-1.630198,-0.401781
D,0.402342,-1.234826


#### Passing two parameters to df

In [33]:
df[(df['W']>0) and (df['Y']>1)]

ValueError: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().

In [34]:
# remenber and operator compares and results only single boolean values
# True and False: True

In [35]:
# if we pass series of boolean values and use and operator and operator will get confused

In [37]:
df[(df['W']>0) & (df['Y']>1)]
# AND operation

Unnamed: 0,W,X,Y,Z


In [38]:
df[(df['W']>0) | (df['Y']>1)]
# OR operation

Unnamed: 0,W,X,Y,Z
B,0.177426,-0.401781,-1.630198,0.462782
D,1.139401,-1.234826,0.402342,-0.68481


#### reseting indexes

In [39]:
df.reset_index()
# index reset to a column and you will get the actual index
# your old index will become column of dataframe

Unnamed: 0,index,W,X,Y,Z
0,A,-0.67246,-0.359553,-0.813146,-1.726283
1,B,0.177426,-0.401781,-1.630198,0.462782
2,C,-0.907298,0.051945,0.729091,0.128983
3,D,1.139401,-1.234826,0.402342,-0.68481
4,E,-0.870797,-0.57885,-0.311553,0.056165


In [40]:
# this doesnot occur inplace unless you specify it for it to occur in place 

In [41]:
df

Unnamed: 0,W,X,Y,Z
A,-0.67246,-0.359553,-0.813146,-1.726283
B,0.177426,-0.401781,-1.630198,0.462782
C,-0.907298,0.051945,0.729091,0.128983
D,1.139401,-1.234826,0.402342,-0.68481
E,-0.870797,-0.57885,-0.311553,0.056165


In [42]:
df.reset_index(inplace = True)

In [43]:
df

Unnamed: 0,index,W,X,Y,Z
0,A,-0.67246,-0.359553,-0.813146,-1.726283
1,B,0.177426,-0.401781,-1.630198,0.462782
2,C,-0.907298,0.051945,0.729091,0.128983
3,D,1.139401,-1.234826,0.402342,-0.68481
4,E,-0.870797,-0.57885,-0.311553,0.056165


In [44]:
newind = 'CA NY WY OR CO'.split()

In [45]:
newind

['CA', 'NY', 'WY', 'OR', 'CO']

In [46]:
df['States'] = newind

In [47]:
df

Unnamed: 0,index,W,X,Y,Z,States
0,A,-0.67246,-0.359553,-0.813146,-1.726283,CA
1,B,0.177426,-0.401781,-1.630198,0.462782,NY
2,C,-0.907298,0.051945,0.729091,0.128983,WY
3,D,1.139401,-1.234826,0.402342,-0.68481,OR
4,E,-0.870797,-0.57885,-0.311553,0.056165,CO


In [48]:
df.set_index('States')

Unnamed: 0_level_0,index,W,X,Y,Z
States,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
CA,A,-0.67246,-0.359553,-0.813146,-1.726283
NY,B,0.177426,-0.401781,-1.630198,0.462782
WY,C,-0.907298,0.051945,0.729091,0.128983
OR,D,1.139401,-1.234826,0.402342,-0.68481
CO,E,-0.870797,-0.57885,-0.311553,0.056165
