## Pandas Tutorial

Pandas is an open source, BSD-licensed library providing high-performance, easy-to-use data structures and data analysis tools for the Python programming language.

Topics

- What is Data Frames?
- What is Data Series?
- Different operation in Pandas

In [2]:
# Import libraries

In [4]:
import numpy as np
import pandas as pd

In [7]:
df  = pd.DataFrame(np.arange(0,20).reshape(5,4), index=['r1','r2','r3','r4','r5'],columns=['c1','c2','c3','c4'])

In [8]:
df

Unnamed: 0,c1,c2,c3,c4
r1,0,1,2,3
r2,4,5,6,7
r3,8,9,10,11
r4,12,13,14,15
r5,16,17,18,19


In [9]:
df.to_csv('test.csv')

##### accessing the element

- .loc
- .iloc

In [12]:
df.loc['r1']

c1    0
c2    1
c3    2
c4    3
Name: r1, dtype: int32

In [13]:
type(df.loc['r1'])

pandas.core.series.Series

In [14]:
df.iloc[:,:] #left side for rows and right side for columns

Unnamed: 0,c1,c2,c3,c4
r1,0,1,2,3
r2,4,5,6,7
r3,8,9,10,11
r4,12,13,14,15
r5,16,17,18,19


In [15]:
# we just want first two rows

In [17]:
df.iloc[0:2,:]

Unnamed: 0,c1,c2,c3,c4
r1,0,1,2,3
r2,4,5,6,7


In [18]:
# we just want first two rows and 3 columns

In [20]:
df.iloc[0:2,0:3]

Unnamed: 0,c1,c2,c3
r1,0,1,2
r2,4,5,6


In [21]:
type(df.iloc[0:2,0:3])

pandas.core.frame.DataFrame

In [22]:
df.iloc[0:2,0]

r1    0
r2    4
Name: c1, dtype: int32

In [23]:
type(df.iloc[0:2,0])

pandas.core.series.Series

In [24]:
## take the element from the column

In [25]:
df.iloc[:,1:]

Unnamed: 0,c2,c3,c4
r1,1,2,3
r2,5,6,7
r3,9,10,11
r4,13,14,15
r5,17,18,19


In [26]:
# converting dataframe into arrays

In [27]:
df.iloc[:,1:].values

array([[ 1,  2,  3],
       [ 5,  6,  7],
       [ 9, 10, 11],
       [13, 14, 15],
       [17, 18, 19]])

In [28]:
df.iloc[:,1:].values.shape

(5, 3)

In [29]:
# how to check null elements

In [30]:
df.isnull()

Unnamed: 0,c1,c2,c3,c4
r1,False,False,False,False
r2,False,False,False,False
r3,False,False,False,False
r4,False,False,False,False
r5,False,False,False,False


In [31]:
df.isnull().sum()

c1    0
c2    0
c3    0
c4    0
dtype: int64

In [32]:
# to know how many time a particular value is present in column

In [35]:
df['c2'].value_counts()

13    1
5     1
17    1
9     1
1     1
Name: c2, dtype: int64

In [36]:
# to know unique values

In [38]:
df['c2'].unique()

array([ 1,  5,  9, 13, 17], dtype=int64)

In [39]:
df['c1']

r1     0
r2     4
r3     8
r4    12
r5    16
Name: c1, dtype: int32

In [40]:
df['c2']

r1     1
r2     5
r3     9
r4    13
r5    17
Name: c2, dtype: int32

In [41]:
# if we want to print multiple column

In [42]:
df['c1','c2']

KeyError: ('c1', 'c2')

In [43]:
#we have to provide column as list

In [44]:
df[['c1','c2']]

Unnamed: 0,c1,c2
r1,0,1
r2,4,5
r3,8,9
r4,12,13
r5,16,17
