# Advanced Python Workshop - Lecture Notes
May 24, 2013


Lets start with warm up problems.
<http://anandology.com/apy/slides/python-warmup.html>

In [9]:
# problem 1
x = 1
y = x
x = 2
x,y

(2, 1)

In [10]:
# problem 2
x = [1, 2]
y = [x, 5]
x.append(3)
y

[[1, 2, 3], 5]

## Functions

In [11]:
def square(x): 
    return x*x
square(4)

16

In [12]:
f = square
f(4)

16

In [13]:
def fxy(f, x, y):
    return f(x) + f(y)

fxy(square, 3, 4)

25

In [14]:
f = lambda x: x*x
f(3)

9

In [16]:
fxy(lambda x: x*x*x, 3, 4)

91

In [None]:
x = ['python', 'perl', 
     'java', 'c', 
     'haskell', 'ruby']
sorted(x)

In [None]:
sorted(x, key=len)

**Default Arguments**

In [None]:
def inc(x, amount=1):
    return x+amount

inc(5)

In [None]:
inc(5, 4)

In [None]:
inc(x=5, amount=4)

Lets find out when is default value computed. 

In [None]:
def f(x):
    print("f is called with", x)
    return x

print("before defining inc")

def inc(x, amount=f(1)):
    return x + amount

print("after defining inc")
print(inc(5))

# Iterators and Generators

In [None]:
for a in [1, 2, 3, 4]: 
    print(a)

In [None]:
for a in (1, 2, 3, 4):
    print(a)

In [None]:
for k in {"a": 1, "b": 2}: 
    print(k)

In [None]:
for c in "hello":
    print(c)

In [None]:
",".join(["a", "b", "c"])

In [None]:
",".join({"a": 1, "b": 2})

In [None]:
",".join("hello")

In [None]:
max([1, 2, 3, 4])

In [None]:
max("hello")

In [None]:
max({"a": 1, "b": 2})

Lets try to understand how iteration works. 

In [None]:
x = iter([1, 2, 3, 4])

In [None]:
x.next()

In [None]:
x.next()

In [None]:
x.next()

In [None]:
x.next()

In [None]:
x.next()

In [None]:
x = iter("abc")
x.next()

In [None]:
x.next()

In [None]:
x.next()

In [None]:
x.next()

In [None]:
class yrange:
    def __init__(self, n):
        self.i = 0
        self.n = n
        
    def __iter__(self):
        return self
        
    def next(self):
        i = self.i
        if i < self.n:
            self.i = i + 1
            return i
        else:
            raise StopIteration()
            
y = yrange(5)
for a in y:
    print(a)

Lets try to see how for loop is behind the scenes.
 
    for a in x:
        print a

Translate this in to while loop.

    it = iter(x)
    while True:
        try:
            a = it.next()
        except StopIteration:
            break
        print a

In [None]:
# [1, 2, 3, 4] is an iterable object.
# x = iter([1, 2, 3, 4]) gives an iterator.
# next() method can be called on an iterator.

y = yrange(5)
print(list(y))
print(list(y))

In [None]:
class zrange:
    def __init__(self, n):
        self.n = n
    def __iter__(self):
        return yrange(self.n)
    
z = zrange(5)
print(list(z))
print(list(z))

## Generators

In [None]:
def yrange(n):
    i = 0
    while i < n:
        yield i
        i += 1

y = yrange(3)
print(y.next())

In [None]:
y.next()

In [None]:
y.next()

In [None]:
y.next()

In [None]:
def f():
    print "begin f"
    yield 1
    print "after yielding 1"
    yield 2
    print "end"
    
a = f()
print a

In [None]:
a.next()

In [None]:
a.next()

In [None]:
a.next()

In [None]:
max(yrange(4))

In [None]:
sum(yrange(4))

In [None]:
def squares(numbers):
    for n in numbers:
        yield n*n
        
sum(squares(xrange(1000000)))

In [None]:
%%file a.txt
1
2
3
4
5

In [None]:
def toint(strings):
    for s in strings:
        yield int(s)
        
sum(toint(open("a.txt")))

In [None]:
sum(squares(toint(open("a.txt"))))

In [None]:
# the regular way is
result = 0
for line in open("a.txt"):
    n = int(line)
    result += n
result

**Problem:** Write a function `joiniters`, that takes 2 iterators and returns a combined iterator.

    print sum(joiniters([1, 2, 3], [4, 5, 6]))
    for a in joiniters([1, 2, 3], "hello"):
        print a

    print list(joiniters(iter([1, 2]), iter([3, 4])))


**Problem:** Write a function `iterappend`, that takes 2 arguments, an iterator and a value and return a new iterator containing all the elements of the given iterator and the given value.

    >>> list(iterappend([1, 2], 3))
    [1, 2, 3]

In [None]:
# Solution to joiniters
def joiniters(x, y):
    for a in x:
        yield a
    for b in y:
        yield b
        
# solution to iterappend
def iterappend(x, end):
    return joiniters(x, [end])

sum(iterappend([1, 2], 3))

## Quick Introduction to List Comprehensions

In [None]:
x = range(10)

In [None]:
[a*a for a in x]

In [None]:
[a*a for a in x if a % 2 == 0]

In [None]:
%%file square.py
def square(x):
    return x*x
def cube(x):
    return x*x*x

In [None]:
# Find all line containing function definations
[line for line in open("square.py") if line.startswith("def")]

In [None]:
# fine all function names

In [None]:
[line.split("(")[0][len("def "):] for line in open("square.py") 
                    if line.startswith("def")]

In [None]:
%%file a.csv
a,b,c
1,2,3
1,4,9
1,8,27

In [None]:
[line.strip("\n").split(",") for line in open("a.csv")]

In [None]:
def squares(values):
    return [x*x for x in values]
sum(squares(xrange(1000000)))

## Generator Expressions

In [None]:
squares_list = [x*x for x in xrange(1000000)]

In [None]:
squares_gen = (x*x for x in xrange(1000000))

In [None]:
squares_gen

In [None]:
sum(squares_gen)

In [None]:
sum((x*x for x in xrange(1000000)))

In [None]:
sum(x*x for x in xrange(1000000))

**Problem** Write function `squares` that takes a iterable over numbers as argument and returns an iterator over their squares. Use generator expressions for doing this.

    print sum(squares(xrange(1000)))

#### Example: Reading multiple files

In [None]:
def grep(pattern, fileobj):
    return (line for line in fileobj if pattern in line)

In [None]:
def printlines(lines):
    for line in lines:
        print(line.strip("\n"))

In [None]:
fileobj = open("square.py")
lines = grep("def", fileobj)
printlines(lines)

Lets try to make the program search in multiple files instead of just single one.

In [None]:
%%file hello.py
def hello(name):
    print("hello", name)

In [None]:
def joiniters(x, y):
    for a in x: yield a
    for b in y: yield b
        
fileobj1 = open("square.py")
fileobj2 = open("hello.py")
lines = joiniters(fileobj1, fileobj2)
lines = grep("def", lines)
printlines(lines)

Lets extract that into useful function.

In [None]:
def readfiles(filenames):
    """Reads all files and returns iterator over lines."""
    for filename in filenames:
        for line in open(filename):
            yield line

In [None]:
lines = readfiles(["square.py", "hello.py"])
lines = grep("def", lines)
printlines(lines)

Lets say some files are compressed using gzip and we want our program to read them as well.

In [None]:
!gzip hello.py

In [None]:
!ls *.gz

In [None]:
import gzip
def xopen(filename):
    if filename.endswith(".gz"):
        return gzip.open(filename)
    else:
        return open(filename)
    
def readfiles(filenames):
    """Reads all files and returns iterator over lines."""
    for filename in filenames:
        for line in xopen(filename):
            yield line
            
lines = readfiles(["square.py", "hello.py.gz"])
lines = grep("def", lines)
printlines(lines)

**Problem:** Write a function `countiter` to count number of elements in an iterator.
    
    >>> countiter(xrange(100))
    100
    >>> countiter(x for x in xrange(100) if x % 2 == 0)
    50

**Problem:** Write a function to `linecount` to count number of lines in a given file.

    print linecount("square.py")

**Problem:** Write a function `wordcount` to count number of words in a file.

    print wordcount("square.py")

In [None]:
# countiter solution
def countiter(it):
    count = 0
    for x in it:
        count += 1
    return count

def countiter(it):
    return sum(1 for x in it)

#### The itertools module

In [None]:
import itertools

list(itertools.chain([1, 2, 3, 4], [5, 6]))

In [None]:
for a, b in itertools.izip("hello", "world"):
    print(a, b)

 **Problem:** Implement `izip` function.

In [None]:
x = itertools.izip("hello", "world")
x.next()

**Problem:** Implement a function `numbers` that generate an infinite sequence of numbers starting from 0.

    >>> n = numbers()
    >>> n.next()
    0
    >>> n.next()
    1
    >>> n.next()
    2    

In [None]:
# solution to izip

def izip(x, y):
    x = iter(x)
    y = iter(y)
    while True:
        yield x.next(), y.next()
    
for a, b in izip([1, 2, 3], "hello"):
    print(a, b)

In [None]:
for i, c in enumerate("hello"):
    print(i, c)

In [None]:
def myenumerate(it):
    return izip(numbers(), it)

def numbers():
    i = 0
    while True:
        yield i
        i += 1

for i, c in myenumerate("hello"):
    print(i, c)

# Functional Programming

## Recursion

In [None]:
def exp(x, n):
    print "exp", x, n
    if n == 0:
        return 1
    else:
        return x * exp(x, n-1)
    
exp(2, 10)

In [None]:
def fast_exp(x, n):
    print "fast_exp", x, n
    if n == 0:
        return 1
    elif n % 2 == 0:
        return fast_exp(x*x, n/2)
    else:
        return x * fast_exp(x, n-1)

fast_exp(2, 100)

**Product:** Write a function `product` to compute product of 2 numbers, using `+` and `-` operators only.

**Example: Flatten list**

In [None]:
def flatten_list(x, result=None):
    """Flattens a nested list.

        >>> flatten_list([[1, 2], [3, 4, [5]]])
        [1, 2, 3, 4, 5]
    """
    if result is None:
        result = []
        
    for a in x:
        if isinstance(a, list):
            flatten_list(a, result)
        else:
            result.append(a)
    return result

print(flatten_list([1, 2, 3]))
print(flatten_list([[1, 2], [3, 4, [5]]]))

**Problem:** Write a function `flatten_dict` to flatten a nested dictionary by joining the keys with `.` character.

    >>> flatten_dict({'a': 1, 'b': {'x': 2, 'y': 3}, 'c': 4})
    {'a': 1, 'b.x': 2, 'b.y': 3, 'c': 4}

In [None]:
def flatten_dict(d, result=None, prefix=None):
    if result is None:
        result = {}
    
    for k, v in d.items():
        if prefix is None:
            key = k
        else:
            key = prefix + "." + k
        if isinstance(v, dict):
            flatten_dict(v, result, prefix=key)
        else:
            result[key] = v
    return result

flatten_dict({'a': 1, 'b': {'x': 2, 'y': 3, 'z': {'p': 5}}, 'c': 4})

#### Example: JSON Encode

In [None]:
def json_encode(data):
    if isinstance(data, bool):
        if data:
            return "true"
        else:
            return "false"
    elif isinstance(data, (int, float)):
        return str(data)
    elif isinstance(data, str):
        return '"' + data + '"'
    elif isinstance(data, list):
        elements = [json_encode(d) for d in data]
        values = ", ".join(elements)
        return "[" + values + "]"
        
print(json_encode(True))
print(json_encode(1.234))
print(json_encode([1, 2, 3, True, "hello", [3, 4]]))
print(json_encode({"a": [1, True], "b": {"name": "hello"}}))
# {"a": [1, true], "b": {"name": "hello"}}

### Higher Order Functions

#### Example: Tracing Function Calls

In [None]:

indent = 0
def trace(f):
    def g(n):
        global indent
        print "| " * indent + "|-- " + f.__name__, n
        indent += 1
        value = f(n)
        indent -= 1
        return value
    return g

def memoize(f):
    cache = {}
    def g(n):
        if n not in cache:
            cache[n] = f(n)
        return cache[n]
    return g

import time
#fib = trace(fib)
#fib = memoize(fib)

@memoize
@trace
def fib(n):
    if n == 0 or n == 1:
        return 1
    else:
        return fib(n-1) + fib(n-2)

t0 = time.time()
print fib(5)
t1 = time.time()
print("took %f seconds" % (t1-t0))

In [None]:
def profile(f):
    def g():
        ...
    return g
    
def timepass():
    for i in range(100000):
        for j in range(100):
            x = i*j

timepass = profile(timepass)
timepass()