In [1]:
from __future__ import print_function, division

# Python Workshop

This workshop will review Python fundamentals 
and prepare you for Galvanize's DSI.

# Topics

### Day 2

Morning:

* sets
* dictionaries
* efficiency

Afternoon:

1. Discuss morning solutions:
    * View student solutions & official solutions.
    * Why such a dramatic increase in speed for each of the four functions?
2. Prep for afternoon exercise:
    * Understand the data.
    * Read through the exercise text.
    * Brainstorm the 4 functions you'll need to write.

# Sets -- same sets at mathematics (valueless dictionaries)

## Set methods

Some common set methods are
 * `add`
 * `union`
 * `intersection`
 * `difference`
 * `update`
 * `issubset`
 * `issuperset`
 * `copy`

In [2]:
groceries = set()
groceries.add('carrots')
groceries.add('figs')
groceries.add('popcorn')

In [3]:
groceries2 = {'popcorn', 'carrots', 'figs'}

In [4]:
groceries3 = set(('popcorn', 'carrots', 'figs'))

In [6]:
groceries == groceries2 == groceries3

True

In [6]:
groceries

{'carrots', 'figs', 'popcorn'}

In [7]:
'figs' in groceries

True

In [9]:
'Skylar' in groceries

False

In [8]:
for element in groceries:
    print(element)

popcorn
carrots
figs


# Dictionaries

A dictionary is a set where each element has an associated value.

## Dictionary methods

Dictionaries have many methods; some of the most common are
 * `has_key`
 * `get`
 * `iterkeys`
 * `itervalues`
 * `iteritems`
 * `copy`

In [9]:
prices = {}
prices['banana'] = 1
prices['steak'] = 10
prices['ice cream'] = 5

In [10]:
prices2 = {'steak': 10, 'banana': 1, 'ice cream': 5}

In [11]:
prices == prices2

True

In [12]:
prices

{'banana': 1, 'ice cream': 5, 'steak': 10}

In [13]:
'banana' in prices

True

In [14]:
print(prices['banana'])

1


In [15]:
'Skylar' in prices

False

In [16]:
print(prices['Skylar'])

KeyError: 'Skylar'

In [18]:
for key, value in prices.iteritems():
    print(key, '->', value)
    
#Do not use prices.items() 

('steak', '->', 10)
('banana', '->', 1)
('ice cream', '->', 5)


Note the parallels between some of the types we've learned:

| type     | builtin        |         | comprehension |
|----------|----------------|---------|---------|
| `str`    | `str(1.0)`     | `'1.0'` | |
| `tuple`  | `tuple('abc')` | `('a','b','c')` | `(x for x in 'abc')` |
| `list`   | `list('abc')`  | `['a','b','c']` | `[x for x in 'abc']` |
| `set`    | `set('abc')`   | `{'a','b','c'}` | `{x for x in 'abc'}` |
| `dict`   | `dict([('a',1),('b',2)])`   | `{'a':1,'b':2}` | `{x:i for i,x in enumerate('ab')}`|

!!! __(polls 1-3)__ !!!

# Hashing

* a 'hash' function computes an integer for the given object
* dictionaries and sets use hashing for fast inserts, removes, and lookups

BOARDWORK HERE!

In [31]:
print(hash("Skylar"))
print(hash(7))
print(hash((1, 3, 'bob')))

-7627367430384417774
7
-8771069815892485235


In [36]:
p = 113

print(hash("Skylar") % p)
print(hash(7) % p)
print(hash((1, 3, 'bob')) % p)

86
7
105


Possible break for morning

# Variations on Dictionaries

* defaultdict
* Counter

### defaultdict

See: https://docs.python.org/2/library/collections.html

In [38]:
from collections import defaultdict

d_int = defaultdict(int)
d_int[1] = 25
print(d_int[1])
print(d_int[2])

25
0


In [40]:
d_float = defaultdict(float)
print('Default float:', d_float['some_key'])

d_str = defaultdict(str)
print('Default string:', d_str['some_key'])

d_list = defaultdict(list)
print('Default list:', d_list['some_key'])

Default float: 0.0
Default string: 
Default list: []


### Why defaultdict?

In [42]:
document = "Skylar walks to the gym then walks home to eat and sleep".split()

# Common pattern:
word_counts = {}
for word in document:
    if word in word_counts:
        word_counts[word] += 1
    else:
        word_counts[word] = 1
print(word_counts)

# Better if you use a defaultdict!
word_counts_2 = defaultdict(int)
for word in document:
    word_counts_2[word] += 1
print(word_counts_2)

# Same?
word_counts == word_counts_2

{'and': 1, 'then': 1, 'gym': 1, 'ryan': 1, 'to': 2, 'sleep': 1, 'walks': 2, 'home': 1, 'the': 1, 'eat': 1}
defaultdict(<type 'int'>, {'and': 1, 'then': 1, 'gym': 1, 'ryan': 1, 'to': 2, 'sleep': 1, 'walks': 2, 'home': 1, 'the': 1, 'eat': 1})


True

### Counter

In [49]:
from collections import Counter

letters = ['c', 'a', 'a', 'b', 'b', 'c','d']
counter = Counter(letters) # note the difference in capitalization!
print(counter)

Counter({'a': 2, 'c': 2, 'b': 2, 'd': 1})


In [50]:
# Counters have a 'most_common' method

print(counter.most_common())

# Elements with equal counts are ordered arbitrarily:
print(counter.most_common(2))
print(counter.most_common(1))

[('a', 2), ('c', 2), ('b', 2), ('d', 1)]
[('a', 2), ('c', 2)]
[('a', 2)]


In [53]:
# Can two numbers draw (with replacement) from a list sum to a given value?

list_ = [3, 5, 7, 9]

# make_sum(list_, 4) ==> False
# make_sum(list_, 8) ==> True


In [54]:
# Method 1

from itertools import combinations_with_replacement

def make_sum1(numbers, target):
    combinations = combinations_with_replacement(numbers, 2)
    for combo in combinations:
        if sum(combo) == target:
            return True
    return False


In [55]:
# Method 2

def make_sum2(numbers, target):
    for number in numbers:
        if target - number in numbers:
            return True
    return False


In [56]:
# Method 3

def make_sum3(numbers, target):
    numbers = set(numbers)
    for number in numbers:
        if target - number in numbers:
            return True
    return False


In [57]:
import random

number_of_samples = 1000
list_range = (1, 1000)
list_length = 100

samples = []

for s in xrange(number_of_samples):
    list_ = [random.randint(*list_range) for i in xrange(list_length)]
    target = random.randint(*list_range)
    samples.append((list_, target))


In [58]:
def test_make_sum(samples, make_sum):
    for numbers, target in samples:
        make_sum(numbers, target)

In [59]:
time test_make_sum(samples, make_sum1)

CPU times: user 474 ms, sys: 6.79 ms, total: 481 ms
Wall time: 485 ms


In [60]:
time test_make_sum(samples, make_sum2)

CPU times: user 58.9 ms, sys: 1.5 ms, total: 60.4 ms
Wall time: 60.8 ms


In [61]:
time test_make_sum(samples, make_sum3)

CPU times: user 11.5 ms, sys: 1.55 ms, total: 13.1 ms
Wall time: 17.1 ms
