# Pandas DataFrames

In [1]:
import numpy as np
import pandas as pd

In [2]:
from numpy.random import randn

In [3]:
#seed is "" Random numbers
np.random.seed(101)

### Create

***Creating DataFrame***

In [4]:
df = pd.DataFrame(data=randn(5,4), index=['A','B','C','D','E'], columns=['W','X','Y','Z'])

In [5]:
df

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [6]:
# Each column is a Series
print(df['W'])
print(type(df['W']))

A    2.706850
B    0.651118
C   -2.018168
D    0.188695
E    0.190794
Name: W, dtype: float64
<class 'pandas.core.series.Series'>


In [7]:
# Entire Frame is a PANDAS DataFrame
print(type(df))

<class 'pandas.core.frame.DataFrame'>


 ***Acess column***

In [8]:
# Just like SQL we can directly acess columns by "dataFrame.ColumnName"
#Note : Don't Use it may create ambiguity with funcions
df.W

A    2.706850
B    0.651118
C   -2.018168
D    0.188695
E    0.190794
Name: W, dtype: float64

In [9]:
# Use this way of acessing
df['W']

A    2.706850
B    0.651118
C   -2.018168
D    0.188695
E    0.190794
Name: W, dtype: float64

In [10]:
# Acess multiple columns
df[['W','Z']]

Unnamed: 0,W,Z
A,2.70685,0.503826
B,0.651118,0.605965
C,-2.018168,-0.589001
D,0.188695,0.955057
E,0.190794,0.683509


 ***Create a new Column***


In [11]:
df['new'] = df['W'] + df['Y']

In [12]:
df['new']

A    3.614819
B   -0.196959
C   -1.489355
D   -0.744542
E    2.796762
Name: new, dtype: float64

### Drop

***Droping a Column***

In [13]:
#Drop method refers to "INDEX" so put axis=1 to drop column
# df.drop('new')
df.drop('new',axis=1)

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [14]:
#BUT STILL acessing df['new']. so use inplce argument=Ture
df

Unnamed: 0,W,X,Y,Z,new
A,2.70685,0.628133,0.907969,0.503826,3.614819
B,0.651118,-0.319318,-0.848077,0.605965,-0.196959
C,-2.018168,0.740122,0.528813,-0.589001,-1.489355
D,0.188695,-0.758872,-0.933237,0.955057,-0.744542
E,0.190794,1.978757,2.605967,0.683509,2.796762


In [15]:
#inplace is default False : Becase pandas don't want us to loose DATA
df.drop('new', axis=1, inplace=True)
df

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


***Drop a Row***

In [16]:
# df.drop('E', axis=0) > default axis = 0
df.drop('E',inplace=True)

In [17]:
df

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057


` Why Axis is = 0 or 1 ?`<br>
`Since : it is based from Numpy if we print shape it will return (x,y) dimension => (0,1) axis => (Rows, Columns) <br>=> Rows = 0 , Columns = 1`

In [18]:
df.shape

(4, 4)

### Selecting

In [19]:
df

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057


***Column Selecting***

In [20]:
df[['X','Z']]

Unnamed: 0,X,Z
A,0.628133,0.503826
B,-0.319318,0.605965
C,0.740122,-0.589001
D,-0.758872,0.955057


###### Rows Selecting

- First Way : location based or LABEL Based

In [21]:
df.loc['C'] #Returns Series

W   -2.018168
X    0.740122
Y    0.528813
Z   -0.589001
Name: C, dtype: float64

- Second Way : NUMERICAL or INDEX Based Location

In [22]:
df.iloc[2]

W   -2.018168
X    0.740122
Y    0.528813
Z   -0.589001
Name: C, dtype: float64

- Numpy Based Way: [ROW, COLUMN]

In [23]:
df.loc['A' , 'Y' ]

0.9079694464765431

In [24]:
df.loc[['A','B'] , ['W','Y']]

Unnamed: 0,W,Y
A,2.70685,0.907969
B,0.651118,-0.848077
