# <span style="color:darkblue"> Lecture 8d: Local/Global and Apply </span>

<font size = "5">



## <span style="color:darkblue"> I. Import Libraries </span>

In [1]:
# the "pandas" library is for manipualting datasets

import pandas as pd
import numpy as np


## <span style="color:darkblue"> II. Local/Global Variables </span>

<font size="5"> 

Most of the variables we've defined so far are "global"

- Stored in working environment
- Can be referenced in other parts of the notebook



<font size = "5">
Example:

In [2]:
message_hello = "hello"
number3       = 3

In [3]:
print(message_hello + " world")
print(number3 * 2)

hello world
6


<font size = "5">

Any "global" variable can be referenced inside functions

- However, this can lead to mistakes
- Preferrably, include **all** the inputs as parameters

<font size = "5">

$f(x,y,z) = x + y + z$

In [4]:
# Correct Example:
def fn_add_recommended(x,y,z):
    return(x + y + z)

print(fn_add_recommended(x = 1, y = 2, z = 5))
print(fn_add_recommended(x = 1, y = 2, z = 10))


8
13


In [5]:
# Example that runs (but not recommended)
# Python will try to fill in any missing inputs
# with variables in the working environment
def fn_add_notrecommended(x,y):
    return(x + y + z)

z = 5
print(fn_add_notrecommended(x = 1, y = 2))
z = 10
print(fn_add_notrecommended(x = 1, y = 2))



8
13


<font size ="5">

Variables defined inside functions are "local"

- Stored "temporarily" while running
- Includes: Parameters + Intermediate variables


<font size = "5">

Local variables supercede global variables

In [6]:
# This is an example where we define a quadratic function
# (x,y) are both local variables of the function
# 
# When we call the function, only the arguments matter.
# any intermediate value inside the function

def fn_square(x):
    y = x**2
    return(y)

x = 5
y = -5

print(fn_square(x = 1))

print(x)
print(y)


1
5
-5


<font size = "5">

Local variables are **not** stored in the working environment

In [7]:
# The following code assigns a global variable x
# Inside the function

x = 5
y = 4

print("Example 1:")
print(fn_square(x = 10))
print(x)
print(y)

print("Example 2:")
print(fn_square(x = 20))
print(x)
print(y)


Example 1:
100
5
4
Example 2:
400
5
4


<font size = "5">

To permanently modify a variable, use the "global" command

In [8]:
def modify_x():
    global x
    x = x + 5

x = 1
# Now, running the function wil permanently increase x by 5.
modify_x()
print(x)

6


In [9]:
modify_x()
print(x)

11


<font size = "5">

Try it yourself:

- What happens if we run "modify_x" twice?
- What happens if we add "global y" inside "fn_square"?

In [10]:
# Write your own code here






25
625


## <span style="color:darkblue"> III. Operations over data frames (apply/map) </span>


<font size = "5">

Create an empty data frame

In [11]:
data  = pd.DataFrame()

In [12]:
print(data)

Empty DataFrame
Columns: []
Index: []


<font size = "5">

Add variables

In [13]:
# The following are lists with values for different individuals
# "age" is the number of years
# "num_underage_siblings" is the total number of underage siblings
# "num_adult_siblings" is the total number of adult siblings

data["age"] = [18,29,15,32,6]
data["num_underage_siblings"] = [0,0,1,1,0]
data["num_adult_siblings"] = [1,0,0,1,0]


In [14]:
data

Unnamed: 0,age,num_underage_siblings,num_adult_siblings
0,18,0,1
1,29,0,0
2,15,1,0
3,32,1,1
4,6,0,0


<font size = "5">

Define functions

In [15]:
# The first two functions return True/False depending on age constraints
# The third function returns the sum of two numbers
# The fourt function returns a string with the age bracket

fn_iseligible_vote = lambda age: age >= 18

fn_istwenties = lambda age: (age >= 20) & (age < 30)

fn_sum = lambda x,y: x + y

def fn_agebracket(age):
    if (age >= 18):
        status = "Adult"
    elif (age >= 10) & (age < 18):
        status = "Adolescent"
    else:
        status = "Child"
    return(status)


<font size = "5">
Applying functions with one argument: <br>

```python
 apply(myfunction)
 ```
 - Takes a dataframe series (a column vector) as an input
 - Computes function separately for each individual


In [16]:
# The fucntion "apply" will extract each element and return the function value
# It is similar to running a "for-loop" over each element

data["can_vote"]    = data["age"].apply(fn_iseligible_vote)
data["in_twenties"] = data["age"].apply(fn_istwenties)
data["age_bracket"] = data["age"].apply(fn_agebracket)


# NOTE: The following code also works:
# data["can_vote"]    = data["age"].apply(lambda age: age >= 18)
# data["in_twenties"] = data["age"].apply(lambda age: (age >= 20) & (age < 30))

display(data)


Unnamed: 0,age,num_underage_siblings,num_adult_siblings,can_vote,in_twenties,age_bracket
0,18,0,1,True,False,Adult
1,29,0,0,True,True,Adult
2,15,1,0,False,False,Adolescent
3,32,1,1,True,False,Adult
4,6,0,0,False,False,Child


<font size = "5">

Creating a new variable

In [17]:
data['new_var'] = data['age'].apply(lambda age: age >= 18)
data

Unnamed: 0,age,num_underage_siblings,num_adult_siblings,can_vote,in_twenties,age_bracket,new_var
0,18,0,1,True,False,Adult,True
1,29,0,0,True,True,Adult,True
2,15,1,0,False,False,Adolescent,False
3,32,1,1,True,False,Adult,True
4,6,0,0,False,False,Child,False


<font size = "5">

Dropping an existing variable

In [18]:
data = data.drop(columns=['new_var'])
data

Unnamed: 0,age,num_underage_siblings,num_adult_siblings,can_vote,in_twenties,age_bracket
0,18,0,1,True,False,Adult
1,29,0,0,True,True,Adult
2,15,1,0,False,False,Adolescent
3,32,1,1,True,False,Adult
4,6,0,0,False,False,Child
