## Functions as Arguments

In [1]:
sent = ['Take', 'care', 'of', 'the', 'sense', ',', 'and', 'the',
        'sounds', 'will', 'take', 'care', 'of', 'themselves', '.']

In [2]:
def extract_property(prop):
    return [prop(word) for word in sent]

In [3]:
extract_property(len)

[4, 4, 2, 3, 5, 1, 3, 3, 6, 4, 4, 4, 2, 10, 1]

In [4]:
def last_letter(word):
    return word[-1]
extract_property(last_letter)

['e', 'e', 'f', 'e', 'e', ',', 'd', 'e', 's', 'l', 'e', 'e', 'f', 's', '.']

In [5]:
extract_property(lambda w: w[-1])

['e', 'e', 'f', 'e', 'e', ',', 'd', 'e', 's', 'l', 'e', 'e', 'f', 's', '.']

In [7]:
print(sorted(sent))

[',', '.', 'Take', 'and', 'care', 'care', 'of', 'of', 'sense', 'sounds', 'take', 'the', 'the', 'themselves', 'will']


## Accumulative Functions

In [11]:
def search1(substring, words):
    result = []
    for word in words:
        if substring in word:
            result.append(word)
    return result

def search2(substring, words):
    for word in words:
        if substring in word:
            yield word
            
# search2 is a generator
# 1. main calls search2
# 2. function gets as far as yield statement and pauses
# 3. main uses the first word
# 4. main call function to continue, until next yield
# 5. repeat loop

In [15]:
import nltk
for item in search1('zz', nltk.corpus.brown.words()):
    print(item, end=" ")

Grizzlies' fizzled Rizzuto huzzahs dazzler jazz Pezza Pezza Pezza embezzling embezzlement pizza jazz Ozzie nozzle drizzly puzzle puzzle dazzling Sizzling guzzle puzzles dazzling jazz jazz Jazz jazz Jazz jazz jazz Jazz jazz jazz jazz Jazz jazz dizzy jazz Jazz puzzler jazz jazzmen jazz jazz Jazz Jazz Jazz jazz Jazz jazz jazz jazz Jazz jazz jazz jazz jazz jazz jazz jazz jazz jazz Jazz Jazz jazz jazz nozzles nozzle puzzle buzz puzzle blizzard blizzard sizzling puzzled puzzle puzzle muzzle muzzle muezzin blizzard Neo-Jazz jazz muzzle piazzas puzzles puzzles embezzle buzzed snazzy buzzes puzzled puzzled muzzle whizzing jazz Belshazzar Lizzie Lizzie Lizzie Lizzie Lizzie Lizzie Lizzie Lizzie Lizzie's Lizzie Lizzie Lizzie Lizzie Lizzie Lizzie Lizzie Lizzie Lizzie blizzard blizzards blizzard blizzard fuzzy Lazzeri Piazza piazza palazzi Piazza Piazza Palazzo Palazzo Palazzo Piazza Piazza Palazzo palazzo palazzo Palazzo Palazzo Piazza piazza piazza piazza Piazza Piazza Palazzo palazzo Piazza piazz

In [14]:
for item in search2('zz', nltk.corpus.brown.words()):
    print(item, end=" ")

Grizzlies' fizzled Rizzuto huzzahs dazzler jazz Pezza Pezza Pezza embezzling embezzlement pizza jazz Ozzie nozzle drizzly puzzle puzzle dazzling Sizzling guzzle puzzles dazzling jazz jazz Jazz jazz Jazz jazz jazz Jazz jazz jazz jazz Jazz jazz dizzy jazz Jazz puzzler jazz jazzmen jazz jazz Jazz Jazz Jazz jazz Jazz jazz jazz jazz Jazz jazz jazz jazz jazz jazz jazz jazz jazz jazz Jazz Jazz jazz jazz nozzles nozzle puzzle buzz puzzle blizzard blizzard sizzling puzzled puzzle puzzle muzzle muzzle muezzin blizzard Neo-Jazz jazz muzzle piazzas puzzles puzzles embezzle buzzed snazzy buzzes puzzled puzzled muzzle whizzing jazz Belshazzar Lizzie Lizzie Lizzie Lizzie Lizzie Lizzie Lizzie Lizzie Lizzie's Lizzie Lizzie Lizzie Lizzie Lizzie Lizzie Lizzie Lizzie Lizzie blizzard blizzards blizzard blizzard fuzzy Lazzeri Piazza piazza palazzi Piazza Piazza Palazzo Palazzo Palazzo Piazza Piazza Palazzo palazzo palazzo Palazzo Palazzo Piazza piazza piazza piazza Piazza Piazza Palazzo palazzo Piazza piazz

In [16]:
def permutations(seq):
    if len(seq) <= 1:
        yield seq
    else:
        for perm in permutations(seq[1:]):
            for i in range(len(perm)+1):
                yield perm[:i] + seq[0:1] + perm[i:]

In [17]:
list(permutations(['police', 'fish', 'buffalo']))

[['police', 'fish', 'buffalo'],
 ['fish', 'police', 'buffalo'],
 ['fish', 'buffalo', 'police'],
 ['police', 'buffalo', 'fish'],
 ['buffalo', 'police', 'fish'],
 ['buffalo', 'fish', 'police']]

In [23]:
for item in permutations(['police', 'fish', 'buffalo']):
    print(item)

['police', 'fish', 'buffalo']
['fish', 'police', 'buffalo']
['fish', 'buffalo', 'police']
['police', 'buffalo', 'fish']
['buffalo', 'police', 'fish']
['buffalo', 'fish', 'police']


## Higher-Order Functions

In [26]:
def is_content_word(word):
    return word.lower() not in ['a', 'of', 'the', 'and', 'will', ',', '.']
sent = ['Take', 'care', 'of', 'the', 'sense', ',', 'and', 'the',
        'sounds', 'will', 'take', 'care', 'of', 'themselves', '.']
# filter
list(filter(is_content_word, sent))
# applies   function  to    each item   in the sequence
# only retains   items    for which   function == True.

['Take', 'care', 'sense', 'sounds', 'take', 'care', 'themselves']

In [35]:
# map
lengths = list( map(len, nltk.corpus.brown.sents(categories='news')) )

In [37]:
sum(lengths) / len(lengths)

21.75081116158339

In [38]:
print(lengths)

[25, 43, 35, 37, 24, 24, 43, 2, 26, 25, 14, 14, 28, 24, 59, 23, 25, 17, 34, 2, 33, 33, 33, 33, 3, 12, 31, 3, 28, 34, 22, 6, 9, 20, 15, 16, 16, 20, 11, 13, 17, 14, 10, 30, 22, 23, 37, 34, 20, 28, 32, 31, 22, 21, 9, 20, 17, 28, 32, 18, 21, 2, 26, 43, 31, 3, 35, 33, 28, 33, 41, 30, 13, 20, 25, 27, 36, 37, 16, 18, 35, 1, 37, 31, 13, 18, 19, 12, 18, 13, 16, 33, 18, 19, 29, 12, 11, 9, 3, 23, 36, 14, 22, 37, 29, 24, 31, 20, 38, 19, 12, 39, 31, 20, 22, 3, 30, 24, 15, 53, 49, 31, 24, 30, 21, 21, 11, 20, 26, 26, 22, 7, 9, 25, 32, 26, 7, 34, 30, 14, 42, 37, 36, 36, 3, 20, 34, 24, 25, 29, 22, 16, 3, 14, 44, 33, 14, 21, 31, 36, 36, 12, 9, 9, 3, 25, 41, 11, 22, 13, 38, 13, 14, 6, 25, 21, 15, 29, 18, 19, 51, 18, 6, 28, 32, 38, 25, 5, 34, 22, 34, 21, 3, 8, 27, 16, 31, 34, 23, 14, 3, 17, 15, 21, 23, 10, 21, 9, 4, 39, 39, 36, 3, 28, 48, 38, 24, 16, 10, 20, 20, 3, 41, 22, 11, 25, 16, 56, 3, 14, 32, 45, 24, 8, 3, 34, 4, 49, 49, 23, 8, 6, 28, 29, 26, 15, 46, 3, 35, 9, 24, 14, 3, 23, 33, 1, 17, 36, 1, 15, 1

In [39]:
myli = list( map(len, nltk.corpus.brown.sents(categories='news')) )

In [40]:
len(myli)

4623

In [41]:
myma = map(len, nltk.corpus.brown.sents(categories='news')) 

In [42]:
myli2 = list(myma)

In [43]:
len(myli2)

4623

In [44]:
# list comprehension
lengths = [len(sent) for sent in nltk.corpus.brown.sents(categories='news')]

In [54]:
list(map(lambda w: len( list(filter(lambda c: c.lower() in "aeiou", w)) ), sent))

[2, 2, 1, 1, 2, 0, 1, 1, 2, 1, 2, 2, 1, 3, 0]

In [61]:
[len(list(c for c in w if c.lower() in "aeiou")) for w in sent]

[2, 2, 1, 1, 2, 0, 1, 1, 2, 1, 2, 2, 1, 3, 0]

In [50]:
[w for w in sent]

['Take',
 'care',
 'of',
 'the',
 'sense',
 ',',
 'and',
 'the',
 'sounds',
 'will',
 'take',
 'care',
 'of',
 'themselves',
 '.']

In [52]:
w = 'auita'
list(filter(lambda c: c.lower() in "aeiou", w))

['a', 'u', 'i', 'a']

In [63]:
filter(lambda c: c.lower() in "aeiou", w)

<filter at 0x106886cc0>

In [53]:
len(list(filter(lambda c: c.lower() in "aeiou", w)))

4

In [55]:
(c for c in w if c.lower() in "aeiou")

<generator object <genexpr> at 0x10683da98>

In [56]:
print((c for c in w if c.lower() in "aeiou"))

<generator object <genexpr> at 0x1067d4fc0>


In [59]:
for item in (c for c in w if c.lower() in "aeiou"):
    print(item)

a
u
i
a


In [60]:
list((c for c in w if c.lower() in "aeiou"))

['a', 'u', 'i', 'a']

## Named Arguments

In [65]:
def repeat(msg='<empty>', num=1):
    return msg * num

In [66]:
repeat(num=3)

'<empty><empty><empty>'

In [67]:
repeat(msg='Alice')

'Alice'

In [68]:
repeat(num=5, msg='Alice')

'AliceAliceAliceAliceAlice'

In [69]:
def generic(*args, **kwargs):
    print(args)
    print(kwargs)

### keyword arguments

In [70]:
generic(1, "African swallow", monty="python")

(1, 'African swallow')
{'monty': 'python'}


In [71]:
song = [['four', 'calling', 'birds'],
    ['three', 'French', 'hens'],
    ['two', 'turtle', 'doves']]

In [72]:
list(zip(song[0], song[1], song[2]))

[('four', 'three', 'two'),
 ('calling', 'French', 'turtle'),
 ('birds', 'hens', 'doves')]

In [73]:
list(zip(*song))

[('four', 'three', 'two'),
 ('calling', 'French', 'turtle'),
 ('birds', 'hens', 'doves')]

In [78]:
import nltk
from nltk import word_tokenize

def freq_words(file, min=1, num=10):
    text = open(file).read()
    tokens = word_tokenize(text)
    freqdist = nltk.FreqDist(t for t in tokens if len(t) >= min)
    return freqdist.most_common(num)

fw = freq_words('ch01.rst', 4, 10)
fw = freq_words('ch01.rst', min=4, num=10)
fw = freq_words('ch01.rst', num=10, min=4)

In [79]:
fw

[('that', 137),
 ('words', 123),
 ('text', 110),
 ('Python', 98),
 ('this', 98),
 ('with', 91),
 ('word', 91),
 ('from', 57),
 ('have', 52),
 ('list', 52)]

In [80]:
def freq_words(file, min=1, num=10, verbose=False):
    freqdist = FreqDist()
    if verbose: print("Opening", file)
    text = open(file).read()
    if verbose: print("Read in %d characters" % len(file))
    for word in word_tokenize(text):
        if len(word) >= min:
            freqdist[word] += 1
            if verbose and freqdist.N() % 100 == 0: print(".", sep="")
    if verbose: print
    return freqdist.most_common(num)

In [None]:
# using   with,  Python will close open files automatically
# with open("lexicon.txt") as f:
#     data = f.read()

In [None]:
# CAUTION
# do not use   mutable object   as   default value   of   parameter