## Advanced Python Functions

### Groupby method

In [9]:
import pandas as pd

# create a simple dataframe with stock, person and sales
data = { 'Company': ['GOOG', 'GOOG', 'MSFT', 'MSFT', 'FB', 'FB'],
            'Person': ['Sam', 'Charlie', 'Amy', 'Vanessa', 'Carl', 'Sarah'],
            'Sales': [200, 120, 340, 124, 243, 350]}

company_df = pd.DataFrame(data) 

# group by company
byCompany = company_df.groupby('Company')

for key, item in byCompany:
    print("\n Group Key: {}".format(key))
    print(byCompany.get_group(key), "\n\n")



 Group Key: FB
  Company Person  Sales
4      FB   Carl    243
5      FB  Sarah    350 



 Group Key: GOOG
  Company   Person  Sales
0    GOOG      Sam    200
1    GOOG  Charlie    120 



 Group Key: MSFT
  Company   Person  Sales
2    MSFT      Amy    340
3    MSFT  Vanessa    124 




In [10]:
type(byCompany) # pandas.core.groupby.generic.DataFrameGroupBy

pandas.core.groupby.generic.DataFrameGroupBy

In [11]:
grouped = company_df.groupby('Company').sum()
print(grouped)

             Person  Sales
Company                   
FB        CarlSarah    593
GOOG     SamCharlie    320
MSFT     AmyVanessa    464


### Apply method

In [6]:
import numpy as np
import pandas as pd

df = pd.DataFrame({'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'],
                   'B': ['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'],
                   'C': np.random.randn(8),
                   'D': np.random.randn(8)})

print(df)

# define a funtion
def get_sum(row):
    return row.sum()

# Apply the function
df['sum'] = df[['C', 'D']].apply(get_sum, axis=1)
print(df)

     A      B         C         D
0  foo    one -1.970511 -2.685626
1  bar    one  1.704706 -0.506112
2  foo    two  0.692032  1.162363
3  bar  three  0.346966  1.337943
4  foo    two -0.632235  1.282138
5  bar    two -1.508743 -0.688584
6  foo    one -0.355343  1.992399
7  foo  three  0.045312  0.412502
     A      B         C         D       sum
0  foo    one -1.970511 -2.685626 -4.656137
1  bar    one  1.704706 -0.506112  1.198594
2  foo    two  0.692032  1.162363  1.854395
3  bar  three  0.346966  1.337943  1.684909
4  foo    two -0.632235  1.282138  0.649903
5  bar    two -1.508743 -0.688584 -2.197327
6  foo    one -0.355343  1.992399  1.637056
7  foo  three  0.045312  0.412502  0.457814


In [17]:
# Maximum sales of each company using groupby and apply

print(company_df.groupby('Company').apply(lambda x: x['Sales'].max(), include_groups=False))

Company
FB      350
GOOG    200
MSFT    340
dtype: int64
