In [1]:
# More of this on intermediate python
import pandas as pd
df = pd.read_csv('sales.csv', index_col = 'month')
df

Unnamed: 0_level_0,eggs,salt,spam
month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Jan,47,12.0,17
Feb,110,50.0,31
Mar,221,89.0,72
Apr,77,87.0,20
May,132,,52
Jun,205,60.0,55


## USE DATAFRAME VECTORIZED METHODS

In [2]:
df['dozens_of_eggs'] = df.eggs.floordiv(12) # Convert to dozens unit for eggs
df

Unnamed: 0_level_0,eggs,salt,spam,dozens_of_eggs
month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Jan,47,12.0,17,3
Feb,110,50.0,31,9
Mar,221,89.0,72,18
Apr,77,87.0,20,6
May,132,,52,11
Jun,205,60.0,55,17


## USE NUMPY VECTORIZED FUNCTIONS (fastest)

In [3]:
import numpy as np
np.floor_divide(df, 12) # Convert to dozens unit

  


Unnamed: 0_level_0,eggs,salt,spam,dozens_of_eggs
month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Jan,3.0,1.0,1.0,0.0
Feb,9.0,4.0,2.0,0.0
Mar,18.0,7.0,6.0,1.0
Apr,6.0,7.0,1.0,0.0
May,11.0,,4.0,0.0
Jun,17.0,5.0,4.0,1.0


## USE PLAIN PYTHON FUNCTIONS

In [4]:
def dozens(n):
    return n//12
df.apply(dozens) # Convert to dozens unit

df.apply(lambda n: n//12)

Unnamed: 0_level_0,eggs,salt,spam,dozens_of_eggs
month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Jan,3,1.0,1,0
Feb,9,4.0,2,0
Mar,18,7.0,6,1
Apr,6,7.0,1,0
May,11,,4,0
Jun,17,5.0,4,1


## TRANSFORM INDEX

In [5]:
# Index to upper
df.index = df.index.str.upper()
df

Unnamed: 0_level_0,eggs,salt,spam,dozens_of_eggs
month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
JAN,47,12.0,17,3
FEB,110,50.0,31,9
MAR,221,89.0,72,18
APR,77,87.0,20,6
MAY,132,,52,11
JUN,205,60.0,55,17


In [6]:
# Index to lower
df.index = df.index.map(str.lower)
df

Unnamed: 0_level_0,eggs,salt,spam,dozens_of_eggs
month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
jan,47,12.0,17,3
feb,110,50.0,31,9
mar,221,89.0,72,18
apr,77,87.0,20,6
may,132,,52,11
jun,205,60.0,55,17


In [7]:
# Upper again: (part 2)
df.index = [month.upper() for month in df.index]
df

Unnamed: 0,eggs,salt,spam,dozens_of_eggs
JAN,47,12.0,17,3
FEB,110,50.0,31,9
MAR,221,89.0,72,18
APR,77,87.0,20,6
MAY,132,,52,11
JUN,205,60.0,55,17


In [8]:
# Change index name label (part 2)
df.index.name = 'MONTHS'
df.columns.name = 'PRODUCTS'
df

PRODUCTS,eggs,salt,spam,dozens_of_eggs
MONTHS,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
JAN,47,12.0,17,3
FEB,110,50.0,31,9
MAR,221,89.0,72,18
APR,77,87.0,20,6
MAY,132,,52,11
JUN,205,60.0,55,17


## MUTATE NEW COLUMN USING OTHER COLUMNS

In [9]:
df['salty_eggs'] = df.salt + df.dozens_of_eggs
df

PRODUCTS,eggs,salt,spam,dozens_of_eggs,salty_eggs
MONTHS,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
JAN,47,12.0,17,3,15.0
FEB,110,50.0,31,9,59.0
MAR,221,89.0,72,18,107.0
APR,77,87.0,20,6,93.0
MAY,132,,52,11,
JUN,205,60.0,55,17,77.0


## USE MAP WITH A DICTIONARY

In [11]:
election = pd.read_csv('pennsylvania2012_turnout.csv')
election.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 67 entries, 0 to 66
Data columns (total 9 columns):
county     67 non-null object
state      67 non-null object
total      67 non-null int64
Obama      67 non-null float64
Romney     67 non-null float64
winner     67 non-null object
voters     67 non-null int64
turnout    67 non-null float64
margin     67 non-null float64
dtypes: float64(4), int64(2), object(3)
memory usage: 4.8+ KB


In [12]:
# Create the dictionary: red_vs_blue
red_vs_blue = {"Obama": "blue", "Romney": "red"}

# Use the dictionary to map the 'winner' column to the new column: election['color']
election['color'] = election.winner.map(red_vs_blue)

# Print the output of election.head()
print(election.head())

      county state   total      Obama     Romney  winner  voters    turnout  \
0      Adams    PA   41973  35.482334  63.112001  Romney   61156  68.632677   
1  Allegheny    PA  614671  56.640219  42.185820   Obama  924351  66.497575   
2  Armstrong    PA   28322  30.696985  67.901278  Romney   42147  67.198140   
3     Beaver    PA   80015  46.032619  52.637630  Romney  115157  69.483401   
4    Bedford    PA   21444  22.057452  76.986570  Romney   32189  66.619031   

      margin color  
0  27.629667   red  
1  14.454399  blue  
2  37.204293   red  
3   6.605012   red  
4  54.929118   red  
