In [None]:
import numpy as np
import pandas as pd
from pandas import Series, DataFrame

# Combining DataFrames

In [None]:
ser1 = Series([2, np.nan, 4, np.nan, 6, np.nan], index = list('QRSTUV'))
ser1

In [None]:
ser2 = Series(np.arange(len(ser1)), dtype = np.float64, index = list('QRSTUV')) 
ser2

#### Two different ways to make a series based on series1 while using series2 to replace its null values.

In [None]:
np.where(pd.isnull(ser1), ser2, ser1) 

In [None]:
Series(np.where(pd.isnull(ser1), ser2, ser1), index = ser1.index) 

In [None]:
ser1.combine_first(ser2) 

#### Two different ways to make a dataframe based on dframe1 while using dframe2 to replace its null values.

In [None]:
nan = np.nan

dframe_odds = DataFrame({'X':[1., nan, 3., nan], 
                         'Y':[nan, 5., nan, 7.], 
                         'Z':[nan, 9., nan, 11.]})

In [None]:
dframe_evens = DataFrame({'X':[2., 4., nan, 6., 8.], 
                         'Y':[nan, 10., 12., 14., 16.]}) 

In [None]:
dframe_odds

In [None]:
dframe_evens

In [None]:
dframe_odds.combine_first(dframe_evens)  

# Reshaping

In [None]:
dframe1 = DataFrame(np.arange(8).reshape(2, 4), index = pd.Index(['LA', 'SF'], name = 'city')
                   , columns = pd.Index(['A', 'B', 'C', 'D'], name = 'letter'))
#We use pd.Index because it makes it possible to set the name at the same time
dframe1

In [None]:
dframe1_st = dframe1.stack()
dframe1_st

In [None]:
type(dframe1_st) 

In [None]:
dframe1_st.unstack() 

In [None]:
dframe1_st.unstack(level='city') 

In [None]:
dframe1_st.unstack(level='letter') 

In [None]:
ser1 = Series([0, 1, 2], index = ['Q', 'X', 'Y'])
ser2 = Series([4, 5, 6], index = ['X', 'Y', 'Z']) 

In [None]:
dframe = pd.concat([ser1, ser2], keys = ['Alpha', 'Beta'])
dframe

In [None]:
type(dframe) 

In [None]:
dframe = dframe.unstack(level = 0)  
dframe

In [None]:
dframe.stack() 
#The null values will be ignored automatically

In [None]:
dframe.stack(dropna=False) 

# Pivoting

DataFrame.pivot(index=None, columns=None, values=None)[source]
Reshape data (produce a “pivot” table) based on column values. Uses unique values from index / columns to form axes of the resulting DataFrame.


Parameters:	
index : string or object, optional
    Column name to use to make new frame’s index. If None, uses existing index.
columns : string or object
    Column name to use to make new frame’s columns
values : string or object, optional
    Column name to use for populating new frame’s values. If not specified, all remaining columns will     be used and the result will have hierarchically indexed column

In [None]:
df1 = pd.DataFrame({'sex': ['male','male','female','female','female','male'],
                       'degree': ['A', 'B', 'C', 'A', 'B', 'C'],
                       'age':[20, 21, 22, 20, 20, 21],
                       'count': [2, 2, 3, 4, 2, 4]})
df1

In [None]:
df1.pivot('sex', 'degree', 'count')

In [None]:
df1.pivot('sex', 'degree')

#### Use pivot_table if there are duplicates

In [None]:
df2 = pd.DataFrame({'sex': ['male','male','female','female','female','male','male','male','female'],
                       'degree': ['A', 'B', 'C', 'A', 'B', 'C', 'A', 'B', 'C'],
                       'age':[20, 21, 22, 20, 20, 21, 21, 20, 20],
                       'count': [2, 2, 3, 4, 2, 4, 1, 2, 3]})
df2

In [None]:
#df2.pivot(index = 'sex', columns='degree') #error
df2.pivot_table(index = 'sex', columns='degree')#, aggfunc=np.sum)

# Duplicates in DataFrames

In [None]:
dframe = DataFrame({'key1': ['A']*2 + ['B']*3, 'key2':[2,2,2,3,3]}) 
dframe

In [None]:
dframe.duplicated() 

In [None]:
dframe.drop_duplicates() 

In [None]:
dframe.drop_duplicates(['key1']) 
#It keeps the first occurance by default

In [None]:
dframe

In [None]:
dframe.drop_duplicates(['key1'], keep='last') 

# Mapping

In [None]:
dframe = DataFrame({'city': ['Rasht', 'Tehran', 'Bam'], 
                    'population(*1000)':[3000, 16000, 100]})
dframe

In [None]:
state_map = {"Rasht": "Gilan", "Tehran": "Tehran", "Bam": "Kerman"}
dframe['state'] = dframe['city'].map(state_map) 
dframe

# Replace

In [None]:
ser1 = Series([1,2,3,4] * 2)
ser1

In [None]:
ser1.replace(1, np.nan)

In [None]:
ser1.replace([1,3], np.nan) 

In [None]:
ser1.replace([1,3], [100, 300]) 

In [None]:
ser1.replace({1:np.nan, 2:200}) 

# Renaming Index

In [None]:
dframe = DataFrame(np.arange(12).reshape(3, 4), index = ['Rasht', 'Qom', 'Tehran'],
                   columns = ['A', 'B', 'C', 'D']) 
dframe

In [None]:
dframe.index.map(str.upper) 

In [None]:
dframe.index = dframe.index.map(str.upper) 
dframe

In [None]:
dframe.columns = dframe.columns.map(str.lower) 
dframe.columns

In [None]:
def myMap(input):
    return input + ",,,"
dframe.columns.map(myMap) 

In [None]:
dframe

In [None]:
dframe.rename(index = str.title, columns = str.upper, inplace=True)
dframe

In [None]:
dframe.rename(index = myMap, columns = myMap)

In [None]:
dframe.rename(index = {"Tehran":"Tehran, capital"}, columns = {"A":"Alpha"}, inplace=True)
dframe

# Binning

In [None]:
years = [1990, 1991, 1992, 2008, 2012, 2015, 1987, 1969, 2013, 2008, 1999]
decade_bins = [1960, 1970, 1980, 1990, 2000, 2010, 2020]

In [None]:
decade_cats = pd.cut(years, decade_bins) 
decade_cats

In [None]:
decade_cats.categories

In [None]:
type(decade_cats)

In [None]:
pd.value_counts(decade_cats) 

In [None]:
pd.cut(years, bins=2, labels=['First half', 'Second half']) 

# Outliers

In [None]:
dframe = DataFrame(np.random.randn(1000, 4)) 
dframe.head(n=3)  

In [None]:
dframe.tail() 

In [None]:
dframe.describe() 

In [None]:
col = dframe[0] 
col.head() 

In [None]:
col[np.abs(col) > 3] 

In [None]:
dframe[(np.abs(dframe) > 3).any(axis = 1)]  

In [None]:
dframe[np.abs(dframe) > 3] = 3 * np.sign(dframe) 

In [None]:
dframe.describe() 

In [None]:
dframe[np.abs(dframe) > 3] = np.nan
dframe.dropna(inplace=True)  
dframe.describe() 

# Permutation

#### Sampling without replacement

In [None]:
dframe = DataFrame(np.arange(16).reshape(4, 4)) 
dframe

In [None]:
blender = np.random.permutation(4) 
blender

In [None]:
dframe.take(blender) 

#### Sampling with replacement

In [None]:
box = np.array(["Red", "white", "Black"])
shaker = np.random.randint(0, len(box), size=10)
shaker

In [None]:
hand_grabs = box.take(shaker)
hand_grabs 