## Environment Initiation

In [1]:
import numpy
import pandas as pd
import collections

## Creating example data set.

In [3]:
x = {'first': ["Alex", "Gordon", "Marcus", "Emeril"], 
     'last': ["Guarnaschelli", "Ramsay", "Samuelsson", "Lagasse"], 
     'age':[47,53,48,60]}
df = pd.DataFrame(data=x)
df

Unnamed: 0,first,last,age
0,Alex,Guarnaschelli,47
1,Gordon,Ramsay,53
2,Marcus,Samuelsson,48
3,Emeril,Lagasse,60


## Merging two columns.

In [4]:
df["full_name"] = df["first"] + "_" + df["last"]
df

Unnamed: 0,first,last,age,full_name
0,Alex,Guarnaschelli,47,Alex_Guarnaschelli
1,Gordon,Ramsay,53,Gordon_Ramsay
2,Marcus,Samuelsson,48,Marcus_Samuelsson
3,Emeril,Lagasse,60,Emeril_Lagasse


### Deleting them if necessary

In [5]:
if "first" in df.columns: df.drop("first", axis=1, inplace=True) 
if "last" in df.columns: df.drop("last", axis=1, inplace=True)
df

Unnamed: 0,age,full_name
0,47,Alex_Guarnaschelli
1,53,Gordon_Ramsay
2,48,Marcus_Samuelsson
3,60,Emeril_Lagasse


## I replaced age with speed, like these chefs are reporting their speed multiple times.

In [7]:
x2 = {'first': ["Alex", "Gordon", "Marcus", "Emeril","Alex","Marcus"],
      'last': ["Guarnaschelli", "Ramsay", "Samuelsson", "Lagasse", "Guarnaschelli", "Samuelsson"],
      'speed':[47,53,48,60,10,14]}
df2 = pd.DataFrame(data=x2)
df2

Unnamed: 0,first,last,speed
0,Alex,Guarnaschelli,47
1,Gordon,Ramsay,53
2,Marcus,Samuelsson,48
3,Emeril,Lagasse,60
4,Alex,Guarnaschelli,10
5,Marcus,Samuelsson,14


### Again, combining their names and dropping the original coluns

In [8]:
df2["full_name"] = df2["first"] + "_" + df2["last"]
if "first" in df2.columns: df2.drop("first", axis=1, inplace=True) 
if "last" in df2.columns: df2.drop("last", axis=1, inplace=True)
df2

Unnamed: 0,speed,full_name
0,47,Alex_Guarnaschelli
1,53,Gordon_Ramsay
2,48,Marcus_Samuelsson
3,60,Emeril_Lagasse
4,10,Alex_Guarnaschelli
5,14,Marcus_Samuelsson


### Using a counter to count the number of unique full_name entries

In [9]:
myCounter = collections.Counter(df2["full_name"])
none = [print(f"There are {value} {key}'s") for key, value in myCounter.items()] #the line starts with none so jupyterlab doesn't print out the return value of the list comprehension

There are 2 Alex_Guarnaschelli's
There are 1 Gordon_Ramsay's
There are 2 Marcus_Samuelsson's
There are 1 Emeril_Lagasse's


### Alternatively, using the groupby method of a data frame.

In [10]:
df2.groupby("full_name").count()

Unnamed: 0_level_0,speed
full_name,Unnamed: 1_level_1
Alex_Guarnaschelli,2
Emeril_Lagasse,1
Gordon_Ramsay,1
Marcus_Samuelsson,2


### And making it pretty

In [11]:
df2.groupby("full_name").agg(count = pd.NamedAgg(column="speed", aggfunc='count'))

Unnamed: 0_level_0,count
full_name,Unnamed: 1_level_1
Alex_Guarnaschelli,2
Emeril_Lagasse,1
Gordon_Ramsay,1
Marcus_Samuelsson,2


# Question 3

### Using groupby

In [12]:
grouped = df2.groupby("full_name")

### Showing a specific groups table

In [13]:
grouped.get_group("Alex_Guarnaschelli")

Unnamed: 0,speed,full_name
0,47,Alex_Guarnaschelli
4,10,Alex_Guarnaschelli


### Showing mean, max, and min aggreggates

In [14]:
grouped.mean()

Unnamed: 0_level_0,speed
full_name,Unnamed: 1_level_1
Alex_Guarnaschelli,28.5
Emeril_Lagasse,60.0
Gordon_Ramsay,53.0
Marcus_Samuelsson,31.0


In [15]:
grouped.max()

Unnamed: 0_level_0,speed
full_name,Unnamed: 1_level_1
Alex_Guarnaschelli,47
Emeril_Lagasse,60
Gordon_Ramsay,53
Marcus_Samuelsson,48


In [16]:
grouped.min()

Unnamed: 0_level_0,speed
full_name,Unnamed: 1_level_1
Alex_Guarnaschelli,10
Emeril_Lagasse,60
Gordon_Ramsay,53
Marcus_Samuelsson,14


### Naming the columns

In [107]:
grouped.agg(avg_speed = pd.NamedAgg(column="speed", aggfunc=numpy.mean))

Unnamed: 0_level_0,avg_speed
full_name,Unnamed: 1_level_1
Alex_Guarnaschelli,28.5
Emeril_Lagasse,60.0
Gordon_Ramsay,53.0
Marcus_Samuelsson,31.0


In [104]:
grouped.agg(max_speed = pd.NamedAgg(column="speed", aggfunc=numpy.max))

Unnamed: 0_level_0,max_speed
full_name,Unnamed: 1_level_1
Alex_Guarnaschelli,47
Emeril_Lagasse,60
Gordon_Ramsay,53
Marcus_Samuelsson,48


In [105]:
grouped.agg(min_speed = pd.NamedAgg(column="speed", aggfunc=numpy.min))

Unnamed: 0_level_0,min_speed
full_name,Unnamed: 1_level_1
Alex_Guarnaschelli,10
Emeril_Lagasse,60
Gordon_Ramsay,53
Marcus_Samuelsson,14
