# Pandas DataFrame: Basics

In [1]:
import pandas as pd
import numpy as np

pd.__version__, np.__version__

('0.25.3', '1.17.4')

### Initializing a DataFrame from a Dictionary

In [None]:
data = {
    
     'vivek' : [35, 'M', 'Trainer'] ,
     'john'  : [22, 'M', 'Gardener'] ,
     'jill'  : [19, 'F', 'Accountant'] ,
     'ravi'  : [42, 'M', 'Sales'] 
}

df1 = pd.DataFrame(data)

df1

In [None]:
df1.columns

In [None]:
df1.values

In [None]:
type(df1.values)

#### Changing the Index

Many ways to do this, this is just one of them!

In [None]:
df1['characteristics'] = [ 'age', 'sex', 'profession']

df1

In [None]:
df1.set_index('characteristics', inplace=True)

df1

In [None]:
df1.columns.name = 'name'

df1

### From a Series

In [None]:
s1 = pd.Series(np.random.randint(0,100,7))

s2 = pd.Series(np.random.randint(0,100,8))

# side_by_side function from Wes McKinney, author of Pandas

def side_by_side(*objs, **kwds):
    from pandas.core.common import adjoin
    space = kwds.get('space', 4)
    reprs = [repr(obj).split('\n') for obj in objs]
    print adjoin(space, *reprs)
    
df2 = pd.DataFrame({ 's1' : s1, 's2' : s2 })

df2

### Even a single Series can be used

In [None]:
pd.DataFrame(s1)

### Head and Tail

In [None]:
df1.head(3)

In [None]:
df1.tail(3)

### Accessing Values in a DataFrame

In [None]:
df1

#### Fetch a Column

In [None]:
df1['john']

In [None]:
type(df1['john'])

#### Fetch Columns

In [None]:
df1

In [None]:
df1[[1,2]]

### Fetch a Row or a Column by Label

#### Fetch Row by Label

.xs http://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.xs.html

In [None]:
df1.xs('age')

In [None]:
type(df1.xs('age'))

In [None]:
df1

#### Fetch Column by Label

In [None]:
df1.xs('vivek', axis=1)

### Fetch by Label but fall back on integer position based

http://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.ix.html

In [None]:
df1.ix[2] # 3rd Row 

#### Fetch Rows

In [None]:
df1[1:3] # Fetch rows 1 and 2 (0 indexed)

#### .loc and .iloc

In [None]:
df1

In [None]:
df1.loc['age']

In [None]:
df1.loc[['age', 'profession']]

In [None]:
df2

In [None]:
df2.iloc[[2,4,7]]

Search by Index Label or Location with .ix

In [None]:
df2

In [None]:
df2.ix[4]

In [None]:
df1

In [None]:
df1.ix['sex']

#### Fetching using Row,Column Combinations

In [None]:
df1

In [None]:
# Get the value in row 2 and column 3 i.e. 'M'

df1.ix[1,2], df1.ix[1,'ravi']

In [None]:
df1.iloc[1,2], df1.loc['sex','ravi']

In [None]:
# We want the region between index values 1-3 and columns 'john' and 'ravi'

df1.ix[1:3, ['john', 'ravi']]

In [None]:
df1.loc[['sex', 'profession'], ['john', 'ravi']]

In [None]:
df1.iloc[1:3, 1:3]

In [None]:
# Columns in above do not need to be adjacent 

df1.ix[1:2, ['john', 'vivek']]

In [None]:
# column numbers can be used as well (axis=1)

df1.ix[0:2, 1:4]

In [None]:
# just like the above 

df1.ix[1:3, [0,3]]

In [None]:
# List of rows and columns can be provided as well

df1.ix[[1,2], [1,2]]

#### Fast label based lookup with .at 

http://pandas.pydata.org/pandas-docs/version/0.17.0/generated/pandas.DataFrame.at.html

In [None]:
df1.at['sex', 'john']

In [None]:
df1.at['profession', 'ravi']

#### .iat for using Location Based

In [None]:
df1

In [None]:
df1.iat[2, 3]