In [None]:
from datascience import *
import numpy as np

%matplotlib inline
import matplotlib.pyplot as plots
plots.style.use('fivethirtyeight')

## Comparison ##

Let's look at some basic comparisons:

In [None]:
3 > 1

In [None]:
3 < 1

In [None]:
3 == 3

What does `3 <= 3` evaluate to?

In [None]:
# ...

What does `3 < 3` evaluate to?

In [None]:
# ...

What does `3 != 3` evaluate to?

In [None]:
# ...

Comparisons return a value of `True` or `False`, which are *Boolean* values:

In [None]:
type(True)

In [None]:
type(False)

In [None]:
type(3 > 1)

In [None]:
# The values True and False are case-sensitive
true

In [None]:
# True and False are not strings!

In [None]:
'True' == True

We can also compare some non-numeric values, like strings:

In [None]:
'Rat' == 'Mouse'

In [None]:
# What is this true?
'Capybara' > 'Beaver'

When checking for equality, we use two equal signs `==` instead of one equal sign `=`:

In [None]:
3 = 4

Remember that `=` is already used in Python to assign values to variables, so `3 = 4` will try to overwrite the value of `3`. Fortunately, Python does not allow this!

In [None]:
3 == 4

We can compare the values of ints and floats:

In [None]:
3 == 3.0

In [None]:
3 == 3.0000000001

Python has limited numerical precision:

In [None]:
3 == 3.0000000000000001
# floating point error in python, 15-16 decimal places saved in memory 

**Question:** let's define some variables:

In [None]:
x = 14
y = 3

What does `x > 15` return? How about `x > y`?

In [None]:
# ...

## Combining comparisons

We can also check several comparisons simultaneously:

In [None]:
12 < x < 20

In [None]:
10 < x-y < 13

In [None]:
1 < 2 < 3 < 4 <= 5 <= 6 != 7

In [None]:
# The expression is True if and only if every single comparison is True.
# Why is this expression False?
1 < 2 < 3 < 4 <= 5 <= 6 != 7 > 8

## Comparisons with arrays

In [None]:
# Compare with a single value...
make_array(1, 2, 3, 4, 5) >= 3

In [None]:
# ...or compare multiple values
x = make_array(1, 2, 3, 4, 5)
y = make_array(5, 4, 3, 2, 1)
x == y

In [None]:
# We can only compare two arrays with the same length:
x > make_array(3, 4, 5)

Summing an array or list of `bool` values will count the number of `True` values:

In [None]:
sum(make_array(True, True, False, False, False))

In [None]:
True + True + True + False

In [None]:
sum(x == y)

**Question:** how many capybars are in this array of rodents?

In [None]:
rodents = make_array('capybara', 'guinea pig', 'beaver', 'mouse', 'rat', 'guinea pig', 'mouse', 
           'guinea pig', 'hamster', 'hamster', 'guinea pig', 'beaver', 'mouse', 'hamster', 
           'mouse', 'guinea pig', 'guinea pig', 'hamster', 'mouse', 'capybara', 'capybara', 
           'mouse', 'marmot', 'capybara', 'mouse', 'marmot', 'capybara', 'guinea pig', 'beaver', 
           'beaver', 'marmot', 'marmot', 'capybara', 'hamster', 'beaver', 'hamster', 'hamster', 
           'capybara', 'beaver', 'hamster', 'mouse', 'capybara', 'guinea pig', 'capybara', 'capybara', 
           'mouse', 'marmot', 'beaver', 'rat', 'guinea pig', 'guinea pig', 'hamster')

In [None]:
# ...

## `If` Statements

`If` statements are often used in functions to change behavior based on the value of the arguments. Here is a simple example:

In [None]:
def sign(x):
    if x > 0:
        return "positive"
    elif x < 0:
        return "negative"
    else:
        return "zero"

In [None]:
sign(1)

In [None]:
sign(-1)

In [None]:
sign(0)

Here is another example:

In [None]:
def federal_income_tax(income): 
    """
    Calculate 2022 federal income tax for single filers.
    """
    if income < 0:
        return 0.0
    elif income < 10275:
        return 0.1 * income 
    elif income < 41775:
        return 1027.50 + 0.12 * (income - 10275)
    elif income < 89075:
        return 4807.50 + 0.22 * (income - 41775)
    elif income < 170050:
        return 15213.50 + 0.24 * (income - 89075)
    elif income < 215951:
        return 34647.50 + 0.32 * (income - 170050)
    elif income < 539900:
        return 49335.50 + 0.35 * (income - 215950)
    else:
        return 162718 + 0.37 * (income - 539900)

In [None]:
# Use the apply method to calculate taxes for various taxable incomes between 0 and $600000
incomes_array = np.arange(0, 600000, 20)
incomes_table = Table().with_column('taxable income', incomes_array)
incomes_table = incomes_table.with_column(
    'federal income tax',
    incomes_table.apply(federal_income_tax, 'taxable income'))
incomes_table.show(5)

# Plot
incomes_table.plot('taxable income')

Here is a complicated example:

In [None]:
def rock_paper_scissors(player1, player2):
    """
    Evaluate a game of rock-paper-scissors.
    Each player must provide a string 'rock', 'paper', or 'scissors'.
    """
    
    # First clause of the outer if statement
    if player1 == 'rock':
        
        # If player1 plays 'rock', then we evaluate this inner if statement:
        if player2 == 'rock':
            return "It's a draw!"
        elif player2 == 'paper':
            return "Player 2 wins!"
        elif player2 == 'scissors':
            return "Player 1 wins!"
        else:
            return 'Player 2 gave an invalid value "' + str(player2) + '"'
    
    # Second clause of the outer if statement
    elif player1 == 'paper':
        
        # If player1 plays 'paper', then we evaluate this inner if statement:
        if player2 == 'paper':
            return "It's a draw!"
        elif player2 == 'scissors':
            return "Player 2 wins!"
        elif player2 == 'rock':
            return "Player 1 wins!"
        else:
            return 'Player 2 gave an invalid value "' + str(player2) + '"'
        
    # Third clause of the outer if statement
    elif player1 == 'scissors':
        
        # If player1 plays 'scissors', then we evaluate this inner if statement:
        if player2 == 'scissors':
            return "It's a draw!"
        elif player2 == 'rock':
            return "Player 2 wins!"
        elif player2 == 'paper':
            return "Player 1 wins!"
        else:
            return 'Player 2 gave an invalid value "' + str(player2) + '"'
        
    # Final clause of the outer if statement
    # If Python gets here, then the value of player1 is neither 'rock', 'paper', nor 'scissors',
    # so they must have provided an invalid value.
    else:
        return 'Player 1 gave an invalid value "' + str(player1) + '"'

In [None]:
rock_paper_scissors('rock', 'paper')

In [None]:
rock_paper_scissors('scissors', 'paper')

In [None]:
rock_paper_scissors('paper', 'chainsaw')

Make sure to indent properly!

In [None]:
def bad_sign(x):
    if x > 0:
    return 'positive' # everything to be executed when x > 0 should be indented
    elif x < 0:
    return 'negative' # this line is also missing an indent
    else:
    return 'zero' # this line is also missing an indent

**Discussion Question:** suppose we want to bin a dataset by placing values into the bins $[0, 1)$, $[1, 2)$, and $[2, 3]$. For example, the number 1 would belong to the $[1, 2)$ bin. Write a function called `bin_data` that takes a single argument `x`. If `x` belongs to one of the three bins, return the left edge of that bin (e.g., if `x = 0.5`, we would return `0`). Otherwise, return the string `"None"`.

In [None]:
# ...

## `For` Loops

`for` loops allow us to iterate over arrays or lists, performing a set of instructions for each value in the sequence:

In [None]:
pet_array = make_array('mouse', 'capybara', 'marmot')

for pet in pet_array:
    message = 'I love my ' + pet
    print(message)

In [None]:
pet = pet_array.item(0)
message = 'I love my ' + pet
print(message)

pet = pet_array.item(1)
message = 'I love my ' + pet
print(message)

pet = pet_array.item(2)
message = 'I love my ' + pet
print(message)

Here's another example, which iterates over a list instead of an array:

In [None]:
for number in [1, 2, 3, 4, 5, 6]:
    if number < 3:
        print('The number ' + str(number) + ' is less than 3')

**Question:** how can I print out every integer between 5 and 10 (inclusive)?

In [None]:
# ...

## Appending Arrays

Usually when we use `for` loops, we would like to calculate and store some value each iteration---not just print it out. The `np.append` function will let us add a value (or values) to an existing array.

In [None]:
# Define two arrays
first = np.arange(4)
second = np.arange(10, 17)

In [None]:
first

In [None]:
second

In [None]:
# Add a single value to the first array
np.append(first, 4)

In [None]:
# Append doesn't modify the array itself! 
# It returns a new array with the extra value added.
first

In [None]:
# We can also use append to combine two arrays together
np.append(first, second)

We can use the `np.append` function inside of a `for` loop to construct an array on element at a time. For example, suppose we want to calculate the absolute values of some numbers:

In [None]:
numbers = make_array(-1.5, 3.5, 2.5, 0.5, -2.0, -4.0)

We can first create an empty array using `make_array()`, and then use a `for` loop to append the absolute values on at a time.

In [None]:
abs_vals = make_array() # Create an empty array 
for number in numbers: # Iterate over each number in the array
    abs_vals = np.append(abs_vals, abs(number)) # Append the absolute value to the abs_vals array
    
abs_vals

**Question:** Below is a list of taxable incomes. Using a `for` loop, create an array called `taxes` that contains the amount of federal income tax each of these individuals must pay.

In [None]:
# ...

## Warming Up with Simulations: A Preview of Next Week

We can use the `np.random.choice` function to randomly select entries from an array. Let's take a look:

In [None]:
mornings = make_array('wake up', 'sleep in')
mornings

In [None]:
np.random.choice(mornings)

In [None]:
# Make one random choice for each day of the week
np.random.choice(mornings, 7)

In [None]:
sum(np.random.choice(mornings, 7) == 'wake up')

In [None]:
morning_week = np.random.choice(mornings, 7)
morning_week

In [None]:
morning_week

In [None]:
def roll_a_die(d):
    """
    Roll a die with d sides.
    """
    die_faces = np.arange(1, d)
    return np.random.choice(die_faces)

In [None]:
roll_a_die(20)

### Example: Simulating Rock-Paper-Scissors

Simulate 100 games of rock-paper-scissors, with each player making random plays each round:

In [None]:
num_games = 100
plays = ['rock', 'paper', 'scissors']
player1_moves = np.random.choice(plays, num_games) # randomly pick player 1's moves
player2_moves = np.random.choice(plays, num_games) # randomly pick player 2's moves

player1_moves

Let's use the `rock_paper_scissors` function to determine which player wins in each of these games. There are two ways we can approach this. One is to put all the relevant data into a table, and use the `apply` method:

In [None]:
# APPROACH 1: Use the apply method
rps_games = Table().with_columns(
    'Player 1 Move', player1_moves,
    'Player 2 Move', player2_moves)
outcomes_apply = rps_games.apply(rock_paper_scissors, 'Player 1 Move', 'Player 2 Move')

Another approach is to use a `for` loop.

In [None]:
# APPROACH 2: Use a for loop
outcomes_for = make_array() # Create an empty array
for game_number in np.arange(num_games): # Iterate over game_number = 0, 1, 2, ..., 99
    
    # Use .item to look up the entries of player1_move and player2_move at the position game_number
    player1_move = player1_moves.item(game_number) 
    player2_move = player2_moves.item(game_number)
    
    # Evaluate the game
    outcome = rock_paper_scissors(player1_move, player2_move)
    
    # Append the outcome to the array
    outcomes_for = np.append(outcomes_for, outcome)

We now have two arrays, `outcomes_apply` and `outcomes_for`, which should contain the same information. Let's check that the entries of these arrays are identical:

In [None]:
# Out of the 100 entries in each of these arrays, how many are equal?
sum(outcomes_apply == outcomes_for)

Let's see the distribution of outcomes over 100 games:

In [None]:
rps_games = rps_games.with_column('Outcome', outcomes_apply)
rps_games

In [None]:
rps_games.group('Outcome').barh('Outcome')

Is this roughly what you would expect?

### Example: Simulating Coin Tosses

Suppose that we toss a coin 100 times. How many times do we end up with heads?

In [None]:
coin = make_array('heads', 'tails')

In [None]:
sum(np.random.choice(coin, 100) == 'heads')

The number of heads can vary quite widely. Let's repeat this experiment many times, and examine the distribution for the number of heads each trial.

In [None]:
def num_heads():
    """
    Simulate 100 coin tosses and count the number of heads.
    """
    return sum(np.random.choice(coin, 100) == 'heads')

In [None]:
# Decide how many times you want to repeat the experiment
repetitions = 10000

In [None]:
# Simulate the outcomes
outcomes = make_array()
for i in np.arange(repetitions):
    outcomes = np.append(outcomes, num_heads())

In [None]:
# Plot a histogram with the distribution of outcomes
heads = Table().with_column('Heads', outcomes)
heads.hist(bins = np.arange(29.5, 70.6))

In [None]:
np.mean(outcomes)