In [5]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [3]:
## create a Series by passing a list of values, letting pandas create a default integer index

s = pd.Series([1, 3, 5, np.nan, 6, 8])
s

0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64

In [7]:
## create a DataFrame by passing a numpy array, with a datetime index and labeled columns

dates = pd.date_range('20130101', periods=6)
dates

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [9]:
df = pd.DataFrame(np.random.randn(6, 4),
                 index = dates,
                 columns = list('ABCD'))
df

Unnamed: 0,A,B,C,D
2013-01-01,-0.345314,-1.213064,-0.607966,-0.851094
2013-01-02,-0.204905,1.475389,0.341369,-0.313155
2013-01-03,1.27842,1.442819,-0.41922,0.531115
2013-01-04,0.61014,-0.15942,0.613996,0.352854
2013-01-05,-0.695667,0.320412,0.95472,-1.062654
2013-01-06,-0.405611,-0.249739,0.241814,0.896514


In [11]:
## create a DataFrame by passing a dict of objects that can be converted to series-like

df2 = pd.DataFrame({
    'A': 1,
    'B': pd.Timestamp('20130102'),
    'C': pd.Series(1, index=list(range(4)), dtype='float32'),
    'D': np.array([3] * 4, dtype='float32'),
    'E': pd.Categorical(['test', 'train', 'test', 'train']),
    'F': 'foo'
})

df2

Unnamed: 0,A,B,C,D,E,F
0,1,2013-01-02,1.0,3.0,test,foo
1,1,2013-01-02,1.0,3.0,train,foo
2,1,2013-01-02,1.0,3.0,test,foo
3,1,2013-01-02,1.0,3.0,train,foo


In [12]:
df2.dtypes

A             int64
B    datetime64[ns]
C           float32
D           float32
E          category
F            object
dtype: object

In [13]:
df.head()

Unnamed: 0,A,B,C,D
2013-01-01,-0.345314,-1.213064,-0.607966,-0.851094
2013-01-02,-0.204905,1.475389,0.341369,-0.313155
2013-01-03,1.27842,1.442819,-0.41922,0.531115
2013-01-04,0.61014,-0.15942,0.613996,0.352854
2013-01-05,-0.695667,0.320412,0.95472,-1.062654


In [15]:
df.tail(3)

Unnamed: 0,A,B,C,D
2013-01-04,0.61014,-0.15942,0.613996,0.352854
2013-01-05,-0.695667,0.320412,0.95472,-1.062654
2013-01-06,-0.405611,-0.249739,0.241814,0.896514


In [16]:
df.index

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [17]:
df.columns

Index(['A', 'B', 'C', 'D'], dtype='object')

In [18]:
df.values

array([[-0.34531375, -1.21306383, -0.60796627, -0.85109412],
       [-0.20490493,  1.47538916,  0.34136942, -0.31315529],
       [ 1.27842018,  1.44281867, -0.41921963,  0.53111522],
       [ 0.61013986, -0.15942046,  0.61399577,  0.35285425],
       [-0.69566686,  0.32041158,  0.95472026, -1.06265424],
       [-0.40561131, -0.24973876,  0.24181444,  0.89651368]])

In [19]:
df.describe()

Unnamed: 0,A,B,C,D
count,6.0,6.0,6.0,6.0
mean,0.039511,0.269399,0.187452,-0.074403
std,0.749261,1.047379,0.599671,0.791046
min,-0.695667,-1.213064,-0.607966,-1.062654
25%,-0.390537,-0.227159,-0.253961,-0.716609
50%,-0.275109,0.080496,0.291592,0.019849
75%,0.406379,1.162217,0.545839,0.48655
max,1.27842,1.475389,0.95472,0.896514


In [20]:
df.T

Unnamed: 0,2013-01-01 00:00:00,2013-01-02 00:00:00,2013-01-03 00:00:00,2013-01-04 00:00:00,2013-01-05 00:00:00,2013-01-06 00:00:00
A,-0.345314,-0.204905,1.27842,0.61014,-0.695667,-0.405611
B,-1.213064,1.475389,1.442819,-0.15942,0.320412,-0.249739
C,-0.607966,0.341369,-0.41922,0.613996,0.95472,0.241814
D,-0.851094,-0.313155,0.531115,0.352854,-1.062654,0.896514


In [21]:
df.sort_index(axis=1, ascending=False)

Unnamed: 0,D,C,B,A
2013-01-01,-0.851094,-0.607966,-1.213064,-0.345314
2013-01-02,-0.313155,0.341369,1.475389,-0.204905
2013-01-03,0.531115,-0.41922,1.442819,1.27842
2013-01-04,0.352854,0.613996,-0.15942,0.61014
2013-01-05,-1.062654,0.95472,0.320412,-0.695667
2013-01-06,0.896514,0.241814,-0.249739,-0.405611


In [22]:
df.sort_values(by='B')

Unnamed: 0,A,B,C,D
2013-01-01,-0.345314,-1.213064,-0.607966,-0.851094
2013-01-06,-0.405611,-0.249739,0.241814,0.896514
2013-01-04,0.61014,-0.15942,0.613996,0.352854
2013-01-05,-0.695667,0.320412,0.95472,-1.062654
2013-01-03,1.27842,1.442819,-0.41922,0.531115
2013-01-02,-0.204905,1.475389,0.341369,-0.313155


In [23]:
## Secletion, .at, .iat, .loc, .iloc, .ix

df['A']

2013-01-01   -0.345314
2013-01-02   -0.204905
2013-01-03    1.278420
2013-01-04    0.610140
2013-01-05   -0.695667
2013-01-06   -0.405611
Freq: D, Name: A, dtype: float64

In [None]:
## Missing data: np.nan 

Unnamed: 0,A,B,C,D
2013-01-01,-0.690627,-2.426128,-1.215933,-1.702188
2013-01-02,-0.40981,2.950778,0.682739,-0.626311
2013-01-03,2.55684,2.885637,-0.838439,1.06223
2013-01-04,1.22028,-0.318841,1.227992,0.705708
2013-01-05,-1.391334,0.640823,1.909441,-2.125308
2013-01-06,-0.811223,-0.499478,0.483629,1.793027
