In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

## Object Creation

#### Series

In [None]:
s = pd.Series([1,3,5,np.nan,6,8])
print(s)

#### Creating a DataFrame by passing a NumPy array, with a datetime index and labeled columns

In [None]:
dates = pd.date_range('20130101', periods=6)
print(dates)
print("dates[0] == {}".format(dates[0]))
print("type(dates[0]) == {}".format(type(dates[0])))
df = pd.DataFrame(np.random.randn(6,4), index=dates, columns=list('ABCD'))
print(df)

#### Creating a DataFrame by passing a dict of objects that can be converted to series-like.

In [None]:
df2 = pd.DataFrame({ 'A' : 1.,
  'B' : pd.Timestamp('20130102'),
  'C' : pd.Series(1,index=list(range(4)),dtype='float32'),
  'D' : np.array([3] * 4,dtype='int32'),
  'E' : pd.Categorical(["test","train","test","train"]),
  'F' : 'foo' })

print(df2)
print(df2.dtypes)

## Viewing Data

In [None]:
print(df.head())
print(df.tail(3))
print(df.index)
print(df.columns)

In [None]:
print(type(df.values))
df.values

In [None]:
df.T

#### Sorting by an axis

In [None]:
df.sort_index(axis=1, ascending=False)

In [None]:
df.sort_values(by='B')

## Missing data

In [None]:
print(np.nan is np.NaN)
print(np.NaN is np.NAN)

In [None]:
df_ = df2.copy()
df_.at[0, 'F'] = np.nan
df_

In [None]:
print(type(df_.at[0, 'F']))
print(type(df_.at[1, 'F']))
print(df_.dtypes)

## Reshaping (stacking/unstacking)

In [None]:
tuples = list(zip(*[
                        ['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'], 
                        ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']
                   ]
                 )
             )

index = pd.MultiIndex.from_tuples(tuples, names=['first', 'second'])

df = pd.DataFrame(np.random.randn(8, 2), index=index, columns=['A', 'B'])
df.columns.name = 'third'

df

In [None]:
df_stacked = df.stack()
df_stacked

In [None]:
df_stacked.unstack()

In [None]:
df_stacked.unstack(level=0)

In [None]:
df_stacked.unstack(level=[1,0])

## Time Series (resampling)

In [None]:
rng = pd.date_range('1/1/2012', periods=600, freq='S')
ts = pd.Series(np.random.randint(0, 500, len(rng)), index=rng)
ts.head()

In [None]:
print(ts.sum())
print("type(ts.resample('5Min')) -> \n{}".format(ts.resample('5Min')))
print("ts.resample('5Min').sum() -> \n{}".format(ts.resample('5Min').sum()))
print("ts.resample('5Min').mean() -> \n{}".format(ts.resample('5Min').mean()))
print("ts.resample('5Min').max() -> \n{}".format(ts.resample('5Min').max()))
print("ts.resample('5Min').min() -> \n{}".format(ts.resample('5Min').min()))

In [None]:
rng = pd.date_range('1/1/2012', periods=5, freq='MS')
ts = pd.Series(np.random.randn(len(rng)), index=rng)
ts

## Categoricals

In [None]:
df = pd.DataFrame({"id":[1,2,3,4,5,6], "raw_grade":['a', 'b', 'b', 'a', 'a', 'e']})
df["grade"] = df["raw_grade"].astype("category")
df["grade"]

In [None]:
df["grade"].cat