<center><h1>Apply, Map and Applymap in Pandas</h1></center>

In [1]:
# Import the pandas library and assign it the alias 'pd'
import pandas as pd

import warnings
# Suppress FutureWarning messages
warnings.simplefilter(action='ignore', category=FutureWarning)

In [6]:
# Create a DataFrame 'df' containing information about 
# revenue and cost for different regions, teams, and squads
df = pd.DataFrame({
    "Region": ['North', 'West', 'East', 'South', 'North', 'West', 'East', 'South'],  
    # Regions where revenue and cost are recorded
    "Team": ['Republicans', 'Republicans', 'Republicans', 'Republicans', 'Democrats', 'Democrats', 'Democrats', 'Democrats'],  
    # Teams within each region
    "Squad": ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H'],  
    # Squads within each team
    "Revenue": [7500, 5500, 2750, 6400, 2300, 3750, 1900, 575],  
    # Revenue generated by each squad
    "Cost": [5200, 5100, 4400, 5300, 1250, 1300, 2100, 50]  
    # Cost incurred by each squad
})


##### Use apply() to alter values along an axis in **dataframe** or in a **series** by applying a function

In [7]:
# Add a new column 'Profit' to the DataFrame 'df' 
# based on the comparison of 'Revenue' and 'Cost' for each row
df['Profit'] = df.apply(lambda x: 'Profit' if x['Revenue'] > x['Cost'] else 'Loss', axis=1)


# This code adds a new column 'Profit' to the DataFrame 'df', where the value is 
# set to 'Profit' if the revenue ('Revenue' column) is greater than the cost 
# ('Cost' column) for a particular row, and 'Loss' otherwise. The lambda function 
# is applied row-wise using the apply() function along the specified axis. Finally, 
# the updated DataFrame 'df' is displayed.


# Display the updated DataFrame 'df'
df


Unnamed: 0,Region,Team,Squad,Revenue,Cost,Profit
0,North,Republicans,A,7500,5200,Profit
1,West,Republicans,B,5500,5100,Profit
2,East,Republicans,C,2750,4400,Loss
3,South,Republicans,D,6400,5300,Profit
4,North,Democrats,E,2300,1250,Profit
5,West,Democrats,F,3750,1300,Profit
6,East,Democrats,G,1900,2100,Loss
7,South,Democrats,H,575,50,Profit


##### Use map to substitute each value in a **series**, using either a funtion, dictionay, or series.

In [8]:
# Dictionary 'team_map' mapping team names to corresponding colors

# This dictionary 'team_map' is used to map team names to their corresponding 
# colors. Each key-value pair represents a mapping of a team name to a color.

team_map = {
    "Republicans": "Red",  # Map team name 'Republicans' to color 'Red'
    "Democrats": "Blue"  # Map team name 'Democrats' to color 'Blue'
}


In [9]:
# Add a new column 'Team Color' to the DataFrame 'df' 
# by mapping team names to colors using 'team_map'
df['Team Color'] = df['Team'].map(team_map)

# Display the DataFrame 'df'
df

Unnamed: 0,Region,Team,Squad,Revenue,Cost,Profit,Team Color
0,North,Republicans,A,7500,5200,Profit,Red
1,West,Republicans,B,5500,5100,Profit,Red
2,East,Republicans,C,2750,4400,Loss,Red
3,South,Republicans,D,6400,5300,Profit,Red
4,North,Democrats,E,2300,1250,Profit,Blue
5,West,Democrats,F,3750,1300,Profit,Blue
6,East,Democrats,G,1900,2100,Loss,Blue
7,South,Democrats,H,575,50,Profit,Blue


##### Use applymap() to apply a function to each element in your **dataframe**

In [10]:
# Apply the len(str(x)) function to every element in the DataFrame 'df'
# This function calculates the length of the string representation of each element
df.applymap(lambda x: len(str(x)))


Unnamed: 0,Region,Team,Squad,Revenue,Cost,Profit,Team Color
0,5,11,1,4,4,6,3
1,4,11,1,4,4,6,3
2,4,11,1,4,4,4,3
3,5,11,1,4,4,6,3
4,5,9,1,4,4,6,4
5,4,9,1,4,4,6,4
6,4,9,1,4,4,4,4
7,5,9,1,3,2,6,4


### If all else fails, use a for loop.

In [12]:
# Initialize an empty list to store the calculated ratios
new_col = []


# This loop calculates the ratio of the revenue of each row to the total revenue of 
# its corresponding region using a conditional selection based on the 'Region' 
# column. The calculated ratios are stored in the list 'new_col'.


# Iterate over each row in the DataFrame 'df'
for i in range(0, len(df)):
    # Calculate the ratio of revenue of the current row to the total revenue of its corresponding region
    rev = df['Revenue'][i] / df[df['Region'] == df.loc[i, 'Region']]['Revenue'].sum()
    # Append the calculated ratio to the list 'new_col'
    new_col.append(rev)


In [14]:
# Add a new column 'Revenue Share of Region' to the DataFrame 
#'df' containing the values from the list 'new_col'
df['Revenue Share of Region'] = new_col


# After executing this code, the DataFrame 'df' will have a new column 'Revenue 
# Share of Region' containing the calculated revenue shares, and the DataFrame will 
# be sorted based on the 'Region' column. However, please note that the sorting 
# operation doesn't modify the original DataFrame 'df' unless you assign the result 
# back to it or capture it in a new variable.


# Sort the DataFrame 'df' based on the 'Region' column
df.sort_values(by='Region')


Unnamed: 0,Region,Team,Squad,Revenue,Cost,Profit,Team Color,Revenue Share of Region
2,East,Republicans,C,2750,4400,Loss,Red,0.591398
6,East,Democrats,G,1900,2100,Loss,Blue,0.408602
0,North,Republicans,A,7500,5200,Profit,Red,0.765306
4,North,Democrats,E,2300,1250,Profit,Blue,0.234694
3,South,Republicans,D,6400,5300,Profit,Red,0.917563
7,South,Democrats,H,575,50,Profit,Blue,0.082437
1,West,Republicans,B,5500,5100,Profit,Red,0.594595
5,West,Democrats,F,3750,1300,Profit,Blue,0.405405
