In [1]:
# Suppose we wanted to give a 'curve' of grades the following:
# A: 20%, B: 30%, C: 30%, D: 10%, F: 10%

import pandas as pd

grades_df = pd.DataFrame(
    data={'exam1': [43, 81, 78, 75, 89, 70, 91, 65, 98, 87],
          'exam2': [24, 63, 56, 56, 67, 51, 79, 46, 72, 60]},
    index=['Andre', 'Barry', 'Chris', 'Dan', 'Emilio', 
           'Fred', 'Greta', 'Humbert', 'Ivan', 'James']
)

In [2]:
def convert_grades_curve(exam_grades):
    # Pandas has a bult-in function that will perform this calculation
    # This will give the bottom 0% to 10% of students the grade 'F',
    # 10% to 20% the grade 'D', and so on. You can read more about
    # the qcut() function here:
    # http://pandas.pydata.org/pandas-docs/stable/generated/pandas.qcut.html
    return pd.qcut(exam_grades,
                   [0, 0.1, 0.2, 0.5, 0.8, 1],
                   labels=['F', 'D', 'C', 'B', 'A'])

In [3]:
# qcut() operates on a list, array, or Series. This is the
# result of running the function on a single column of the
# DataFrame.
convert_grades_curve(grades_df['exam1'])

Andre      F
Barry      B
Chris      C
Dan        C
Emilio     B
Fred       C
Greta      A
Humbert    D
Ivan       A
James      B
Name: exam1, dtype: category
Categories (5, object): [F < D < C < B < A]

In [4]:
# qcut() does not work on DataFrames, but we can use apply()
# to call the function on each column separately
grades_df.apply(convert_grades_curve)

Unnamed: 0,exam1,exam2
Andre,F,F
Barry,B,B
Chris,C,C
Dan,C,C
Emilio,B,B
Fred,C,C
Greta,A,A
Humbert,D,D
Ivan,A,A
James,B,B


In [8]:
def standardize(df):
    '''
    Fill in this function to standardize each column of the given
    DataFrame. To standardize a variable, convert each value to the
    number of standard deviations it is above or below the mean.
    '''
    return df.apply(standardize_column)

standardize(grades_df)

Unnamed: 0,exam1,exam2
Andre,-2.196525,-2.186335
Barry,0.208891,0.366571
Chris,0.01899,-0.091643
Dan,-0.170911,-0.091643
Emilio,0.715295,0.628408
Fred,-0.487413,-0.418938
Greta,0.841896,1.413917
Humbert,-0.803916,-0.746234
Ivan,1.284999,0.955703
James,0.588694,0.170194


In [7]:
# Building the above function

# First build function to standardize one column
def standardize_column(column):
    return (column - column.mean()) / column.std()

standardize_column(grades_df['exam1'])

Andre     -2.196525
Barry      0.208891
Chris      0.018990
Dan       -0.170911
Emilio     0.715295
Fred      -0.487413
Greta      0.841896
Humbert   -0.803916
Ivan       1.284999
James      0.588694
Name: exam1, dtype: float64

In [9]:
# Solution can also be with nested functions:
def standardize(df):
    def standardize_column(column):
        return (column - column.mean()) / column.std()
    return df.apply(standardize_column)

standardize(grades_df)

Unnamed: 0,exam1,exam2
Andre,-2.196525,-2.186335
Barry,0.208891,0.366571
Chris,0.01899,-0.091643
Dan,-0.170911,-0.091643
Emilio,0.715295,0.628408
Fred,-0.487413,-0.418938
Greta,0.841896,1.413917
Humbert,-0.803916,-0.746234
Ivan,1.284999,0.955703
James,0.588694,0.170194
