## Import modules

In [1]:
import pandas as pd
import numpy as np

## Create DataFrame from numpy ND-array

In [2]:
m = np.random.randn(5, 3)
m

array([[ 0.62583724,  0.2360453 ,  0.08741013],
       [-0.62205136,  0.18276292, -0.19120327],
       [ 0.01568882, -2.2932682 , -0.10354367],
       [-1.32542571, -1.16120576,  0.75243419],
       [ 0.71027579,  1.35679859, -0.59957155]])

In [3]:
# data is shared between m and df
df = pd.DataFrame(m, 
                  columns=['a', 'b', 'c'],
                  index=['A','B','C','D','E'])
df

Unnamed: 0,a,b,c
A,0.625837,0.236045,0.08741
B,-0.622051,0.182763,-0.191203
C,0.015689,-2.293268,-0.103544
D,-1.325426,-1.161206,0.752434
E,0.710276,1.356799,-0.599572


## Get information about Data Frame

In [4]:
df.shape

(5, 3)

In [5]:
df.dtypes

a    float64
b    float64
c    float64
dtype: object

In [6]:
df.describe()

Unnamed: 0,a,b,c
count,5.0,5.0,5.0
mean,-0.119135,-0.335773,-0.010895
std,0.86156,1.411894,0.494967
min,-1.325426,-2.293268,-0.599572
25%,-0.622051,-1.161206,-0.191203
50%,0.015689,0.182763,-0.103544
75%,0.625837,0.236045,0.08741
max,0.710276,1.356799,0.752434


## Data is shared between Data Frame and ND-array

In [None]:
m[0,0] = 10.0
df

## Accessing data in Data Frames

In [7]:
# Get element 0,0
df.iloc[0,0]

0.62583723874686059

In [8]:
df.loc['A','a']

0.62583723874686059

In [9]:
# Get column 0
df.iloc[:,0]

A    0.625837
B   -0.622051
C    0.015689
D   -1.325426
E    0.710276
Name: a, dtype: float64

In [None]:
df['a']
df.loc[:,'a']

In [None]:
# Get columns 0 and 1
df.iloc[:,0:2]

In [None]:
df[['a','b']]
df.loc[:,'a':'b']

In [None]:
# Get columns 0 and 2
df.iloc[:,[0,2]]

In [None]:
df.loc[:,['a','c']]

In [None]:
# Get row 0
df.iloc[0,:]

In [None]:
df.loc['A',:]

In [None]:
# Get rows 0 and 1
df.iloc[0:2,:]

In [None]:
df.loc['A':'B',:]

In [None]:
# Get rows 0 and 2
df.iloc[[0,2],:]

In [None]:
df.loc[['A','C'],:]

## Modifying data in Data Frames

In [None]:
# Set element 0, 0
df.iloc[0,0] = 100.0
df.iloc[0,0]

In [None]:
df.loc['A','a'] = 200.0
df.iloc[0,0]

In [None]:
# Set column 0 using 1D-array
df.iloc[:,0] = np.arange(5, dtype=np.float64)
df

In [None]:
# Set column 1 using scalar
df.iloc[:,1] = 200.0
df

## Some methods create new Data Frames

In [None]:
# add new column
df.assign(d=np.abs(df['c']))

In [None]:
# df has not changed
df

## Creating Data Frame from dictionary

In [None]:
df2 = pd.DataFrame({'a': m[:,0],
                    'b': ['ava','bill','carol','dory','ed'],
                    'c': m[:,2]},
                   index=['A','B','C','D','E'])
df2

In [None]:
df2.describe()

## Reading and writing Data Frames to files

In [None]:
df2.to_csv('data.csv')

In [None]:
df3 = pd.read_csv('data.csv', index_col=0)
df3

In [None]:
newcol = np.abs(df3['c']) + np.arange(5, dtype=np.float64)
newcol

In [None]:
df4 = df3.assign(d=newcol)
df4

In [None]:
df4.to_csv('data2.csv')