In [None]:
from __future__ import print_function, division

# Python Workshop

This workshop will review Python fundamentals 
and prepare you for Galvanize's DSI.

## Review Day 1

Workflow:

* Common commands
* What types did we cover yesterday? Which are mutable and immutable?
* What is an iterable? Generator? (Black Jack example, tell me if it's good or not) 
* What is a module? Why do we use it (what is the name when we import it?)
* Docstrings - what are they? How do we access them?
* String formatting, how many ways? What is the correct way :) 
* File opening/closing - what should we always be using? 

Morning Challenge -- only using your command line (and atom) create two python files and two simple functions -- one containing the if __name__ == '__main__' block to call the function and the other just calling the function. 

Furthermore print the __name__ of the document and import one from the other, notice what happens. 

In [4]:
import pandas as pd

In [5]:
def test():
    """
    demo 
    
    params:
        a(int): 
    
    """
    pass

In [None]:
test()

In [3]:
demo_lst = [1,2,3]

demo_lst.append('a')

print(demo_lst)

[1, 2, 3, 'a']


# Topics

### Day 2

Morning:

* sets
* dictionaries
* efficiency

Afternoon:

1. Discuss morning solutions:
    * View student solutions & official solutions.
    * Why such a dramatic increase in speed for each of the four functions?
2. Prep for afternoon exercise:
    * Understand the data.
    * Read through the exercise text.
    * Brainstorm the 4 functions you'll need to write.

# Sets -- same sets at mathematics (valueless dictionaries)

## Set methods

Some common set methods are
 * `add`
 * `union`
 * `intersection`
 * `difference`
 * `update`
 * `issubset`
 * `issuperset`
 * `copy`

In [6]:
groceries = set()
groceries.add('carrots')
groceries.add('figs')
groceries.add('popcorn')

In [7]:
groceries2 = {'popcorn', 'carrots', 'figs'}

In [8]:
groceries3 = set(('popcorn', 'carrots', 'figs'))

In [9]:
#Union
whole_food = {'fancy hummus', 'carrots'}
groceries.union(whole_food)

{'carrots', 'fancy hummus', 'figs', 'popcorn'}

In [10]:
groceries.union(whole_food)

{'carrots', 'fancy hummus', 'figs', 'popcorn'}

In [11]:
groceries.update(whole_food)

In [12]:
groceries

{'carrots', 'fancy hummus', 'figs', 'popcorn'}

In [13]:
'figs' in groceries

True

In [14]:
'Skylar' in groceries

False

In [15]:
for element in groceries:
    print(element)

carrots
fancy hummus
popcorn
figs


# Dictionaries

A dictionary is a set where each element has an associated value.

## Dictionary methods

Dictionaries have many methods; some of the most common are
 * `has_key`
 * `get`
 * `iterkeys`
 * `itervalues`
 * `iteritems`
 * `copy`

In [17]:
prices = {}
prices['banana'] = 1
prices['steak'] = 10
prices['ice cream'] = 5

In [18]:
prices2 = {'steak': 10, 
           'banana': 1, 
           'ice cream': 5}

In [23]:
prices['test'] = 10

In [24]:
print(id(prices))
print(id(prices2))


4650032080
4650030784


In [25]:
prices == prices2

False

In [None]:
prices

In [26]:
'banana' in prices

True

In [27]:
print(prices['banana'])

1


In [28]:
'Skylar' in prices

False

In [29]:
prices['Skylar']

KeyError: 'Skylar'

In [30]:
try:
    prices['Skylar']
except Exception:
    print('not in there error')

not in there error


In [31]:
if 'Skylar' in prices:
    print('here')
else:
    print('not here')

not here


In [33]:
for key, value in prices.items():
    print(key, '->', value)

#Do not use prices.items() 

banana -> 1
steak -> 10
ice cream -> 5
test -> 10


Note the parallels between some of the types we've learned:

| type     | builtin        |         | comprehension |
|----------|----------------|---------|---------|
| `str`    | `str(1.0)`     | `'1.0'` | |
| `tuple`  | `tuple('abc')` | `('a','b','c')` | `(x for x in 'abc')` |
| `list`   | `list('abc')`  | `['a','b','c']` | `[x for x in 'abc']` |
| `set`    | `set('abc')`   | `{'a','b','c'}` | `{x for x in 'abc'}` |
| `dict`   | `dict([('a',1),('b',2)])`   | `{'a':1,'b':2}` | `{x:i for i,x in enumerate('ab')}`|

!!! __(polls 1-3)__ !!!

# Hashing

* a 'hash' function computes an integer for the given object
* dictionaries and sets use hashing for fast inserts, removes, and lookups

BOARDWORK HERE!

In [34]:
print(hash("Skylar"))
#print(hash(7))
#print(hash((1, 3, 'bob')))

-3339144261158719130


In [None]:
p = 113

print(hash("Skylar") % p)
print(hash(7) % p)
print(hash((1, 3, 'bob')) % p)

In [35]:
names = ['me', 'you']

print(''.join(names))

meyou


Possible break for morning

# Variations on Dictionaries

* defaultdict
* Counter

### defaultdict

See: https://docs.python.org/2/library/collections.html

In [46]:
from collections import defaultdict

words = 'testing one two three'

count_dict = defaultdict(int)

for char in words:
    count_dict[char] += 1
#     if char in count_dict:
#         count_dict[char] += 1
#     else:
#         count_dict[char] = 1
print(count_dict)

defaultdict(<class 'int'>, {'t': 4, 'e': 4, 's': 1, 'i': 1, 'n': 2, 'g': 1, ' ': 3, 'o': 2, 'w': 1, 'h': 1, 'r': 1})


In [42]:
from collections import defaultdict

d_int = defaultdict(int)
d_int['a'] = 25
d_int['testing']
#print(d_int)
# print(d_int[2])

for k, v in d_int.items():
    if v:
        print(k, v)

a 25


In [24]:
d_int

defaultdict(int, {1: 'a', 2: 0})

In [31]:
d_float = defaultdict(float)
print('Default float:', d_float['some_key'])

d_str = defaultdict(str)
print('Default string:', d_str['some_key'])

d_list = defaultdict(list)
print('Default list:', d_list['some_key'])


if d_float['some_key'] == True:
    

('Default float:', 0.0)
('Default string:', '')
('Default list:', [])


### Why defaultdict?

In [32]:
document = "Skylar walks to the gym then walks home to eat and sleep".split()

# Common pattern:
word_counts = {}
for word in document:
    if word in word_counts:
        word_counts[word] += 1
    else:
        word_counts[word] = 1
print(word_counts)

# Better if you use a defaultdict!
word_counts_2 = defaultdict(int)
for word in document:
    word_counts_2[word] += 1
print(word_counts_2)

# Same?
word_counts == word_counts_2

{'and': 1, 'then': 1, 'gym': 1, 'to': 2, 'Skylar': 1, 'sleep': 1, 'walks': 2, 'home': 1, 'the': 1, 'eat': 1}
defaultdict(<type 'int'>, {'and': 1, 'then': 1, 'gym': 1, 'to': 2, 'Skylar': 1, 'sleep': 1, 'walks': 2, 'home': 1, 'the': 1, 'eat': 1})


True

### Counter

In [47]:
from collections import Counter

letters = ['c', 'a', 'a', 'b', 'B', 'c','d']
counter = Counter(letters) # note the difference in capitalization!
print(counter)

Counter({'c': 2, 'a': 2, 'b': 1, 'B': 1, 'd': 1})


In [42]:
# Counters have a 'most_common' method
print(counter.most_common())

# Elements with equal counts are ordered arbitrarily:
print(counter.most_common(2))
print(counter.most_common(1))

[('a', 2), ('c', 2), ('b', 2), ('d', 1)]
[('a', 2), ('c', 2)]
[('a', 2)]


In [43]:
# Can two numbers draw (with replacement) from a list sum to a given value?

list_ = [3, 5, 7, 9]

# make_sum(list_, 4) ==> False
# make_sum(list_, 8) ==> True


In [48]:
# Method 1

from itertools import combinations_with_replacement

def make_sum1(numbers, target):
    combinations = combinations_with_replacement(numbers, 2)
    for combo in combinations:
        if sum(combo) == target:
            return True
    return False


In [49]:
# Method 2

def make_sum2(numbers, target):
    for number in numbers:
        if target - number in numbers:
            return True
    return False


In [50]:
# Method 3

def make_sum3(numbers, target):
    numbers = set(numbers)
    for number in numbers:
        if target - number in numbers:
            return True
    return False


In [52]:
import random

number_of_samples = 1000
list_range = (1, 1000)
list_length = 100

samples = []

for s in range(number_of_samples):
    list_ = [random.randint(*list_range) for i in range(list_length)]
    target = random.randint(*list_range)
    samples.append((list_, target))


In [53]:
def test_make_sum(samples, make_sum):
    for numbers, target in samples:
        make_sum(numbers, target)

In [54]:
time test_make_sum(samples, make_sum1)

CPU times: user 329 ms, sys: 3.53 ms, total: 333 ms
Wall time: 334 ms


In [55]:
time test_make_sum(samples, make_sum2)

CPU times: user 38.9 ms, sys: 2.18 ms, total: 41.1 ms
Wall time: 40.3 ms


In [56]:
time test_make_sum(samples, make_sum3)

CPU times: user 8.63 ms, sys: 471 µs, total: 9.1 ms
Wall time: 8.79 ms


In [77]:
##Quick demo about the perils of not copying objects

test = [1,2,3]
test3 = [1,2,3]
test2 = test

In [78]:
def double_ints(lst):
    lst = list(lst)
    for idx, num in enumerate(lst):
        lst[idx] = num**2

In [79]:
double_ints(test)
print(test)

[1, 2, 3]


In [80]:
test2

[1, 2, 3]

In [70]:
def cap_str(string):
    string.upper()
    
my_str = 'skylar'
cap_str(my_str)
print(my_str)

skylar


In [67]:
test3 == test

True

In [58]:
test is test2

True

In [59]:
test == test2

True

In [60]:
id(test)

4653224328

In [61]:
id(test2)

4653224328

In [62]:
test2[0] = 100

In [63]:
test is test2

True

In [64]:
test

[100, 2, 3]

In [None]:
def create_where_clause(where_items):
    temp_where = where_items 
    temp_where