In [None]:
df = load_data()
df2 = df[df['col2'] < 0]
df2['col1_demeaned'] = df2['col1'] - df2['col1'].mean()
result = df2.groupby('key').col1_demeaned.std()

In [None]:
# Usual non-functional way
df2 = df.copy()
df2['k'] = v

In [None]:
# functional assign way
df2 = df.assign(k=v)

In [None]:
result = (df2.assign(col1_demeaned = df2.col1 - df2.col2.mean())
          .groupby('key')
          .col1_demeaned.std()
         )

In [1]:
#One thing to keep in mind when doing method chaining is that you may need to refer to temprary objects. In the
#Preceding example, we cannot refer to the result of load_data util it has been assigned to the temporary variable df.
#To help with this, assign and many other pandas functions accept function like arguments, also know as callables.

In [2]:
#To show callables in action, consider a fragment of the example from before:

In [5]:
df = load_data()
df2 = df[df['col2'] < 0]
# This can be rewritten as

In [None]:
df = (load_data()
      [lambda x: x.col2 < 0])
#Here, the result of load_data is not assigned to a variable, so the function passed into []
#is the bound to the object at that stage of the method chain.

#We can continue, then and write the entire seq as a single chained expression:
result = (load_data
         [lambda x: x.col2 > 0]
         .assign(col1_demeaned = lambda x: x.col1 - x.col1.mean())
         .groupby('key')
         .col1_demeaned.std() )

# The pipe Method

In [6]:
#You can accomplish a lot with built-in pandas functions and the approches to method chaining with
#callables that we just looked at. However, sometimes you need to use your own functions or functions
#from third-party libraries. This is where the pipe method comes in.

In [None]:
a = f(df, arg1=v1)
b = g(a, v2, arg3=v3) 
c = h(b, arg4=v4)

In [None]:
#When using functions that accept and return Series or DataFrame objects, you can rewrite this using calls to pipe:
result = (df.pipe(f, arg1=v1)
            .pipe(g, v2, arg3=v3)
            .pipe(g, arg4=v4))

In [7]:
#The statement f(df) and df.pipe(f) are equi, but pipe makes chained invocation easier.

In [None]:
def group_demean(df, by, cols):
    result =  df.copy()
    g = df.groupby(by)
    for c in cols:
        result[c] = df[c] - g[c].transform('mean')
    return result

In [None]:
#Then it is possible to write:
result = (df[df.col1 < 0]
          .pipe(group_dmean, ['key1', 'key2'], ['col1']))
