## Fromt Lists to Strings

In [2]:
silly = ['We', 'called', 'him', 'Tortoise', 'because', 'he', 'taught', 'us', '.']
' '.join(silly)

'We called him Tortoise because he taught us .'

In [3]:
';'.join(silly)

'We;called;him;Tortoise;because;he;taught;us;.'

In [4]:
'WecalledhimTortoisebecausehetaughtus.'

'WecalledhimTortoisebecausehetaughtus.'

## Strings and Formats

In [6]:
word = 'cat'
sentence = """hello
world"""
print(word)

cat


In [7]:
print(sentence)

hello
world


In [8]:
word

'cat'

In [9]:
sentence

'hello\nworld'

In [11]:
import nltk
fdist = nltk.FreqDist(['dog', 'cat', 'dog', 'cat', 'dog', 'snake', 'dog', 'cat'])
for word in sorted(fdist):
    print(word, '->', fdist[word], end='; ')

cat -> 3; dog -> 4; snake -> 1; 

In [12]:
for word in sorted(fdist):
    print('{}->{};'.format(word, fdist[word]), end=' ')

cat->3; dog->4; snake->1; 

In [13]:
'I want a {} right now'.format('coffee')

'I want a coffee right now'

In [14]:
'{} wants a {}'.format ('Lee', 'sandwich', 'for lunch')
''' extra arguments are ignored '''

'Lee wants a sandwich'

In [15]:
'from {1} to {0}'.format('A', 'B')

'from B to A'

In [17]:
template = 'Lee wants a {} right now'
menu = ['sandwich', 'spam fritter', 'pancake']
for snack in menu:
    print(template.format(snack))

Lee wants a sandwich right now
Lee wants a spam fritter right now
Lee wants a pancake right now


## Lining Things Up

In [19]:
'{:6}'.format(41)

'    41'

In [20]:
'{:<6}' .format(41)

'41    '

In [21]:
'{:6}'.format('dog')

'dog   '

In [22]:
'{:>6}'.format('dog')

'   dog'

In [23]:
import math
'{:.4f}'.format(math.pi)

'3.1416'

In [24]:
count, total = 3205, 9375
"accuracy for {} words: {:.4%}".format(total, count / total)

'accuracy for 9375 words: 34.1867%'

In [25]:
def tabulate(cfdist, words, categories):
    print('{:16}'.format('Category'), end=' ')                    # column headings
    for word in words:
        print('{:>6}'.format(word), end=' ')
    print()
    for category in categories:
        print('{:16}'.format(category), end=' ')                  # row heading
        for word in words:                                        # for each word
            print('{:6}'.format(cfdist[category][word]), end=' ') # print table cell
        print()                                                   # end the row

In [26]:
from nltk.corpus import brown
cfd = nltk.ConditionalFreqDist(
          (genre, word)
          for genre in brown.categories()
          for word in brown.words(categories=genre))

In [27]:
genres = ['news', 'religion', 'hobbies', 'science_fiction', 'romance', 'humor']
modals = ['can', 'could', 'may', 'might', 'must', 'will']
tabulate(cfd, modals, genres)

Category            can  could    may  might   must   will 
news                 93     86     66     38     50    389 
religion             82     59     78     12     54     71 
hobbies             268     58    131     22     83    264 
science_fiction      16     49      4     12      8     16 
romance              74    193     11     51     45     43 
humor                16     30      8      8      9     13 


In [29]:
'{:{width}}'.format("Monty Python", width=15)
# width = max(len(w) for w in words)

'Monty Python   '

## Writing Results to a File

In [36]:
output_file = open('output.txt', 'w')
words = set(nltk.corpus.genesis.words('english-kjv.txt'))
for word in sorted(words):
    print(word, file=output_file)

In [31]:
len(words)

2789

In [32]:
str(len(words))

'2789'

In [38]:
output_file = open('output.txt', 'w')
print(str(len(words)), file=output_file)

## Text Wrapping

In [40]:
saying = ['After', 'all', 'is', 'said', 'and', 'done', ',',
          'more', 'is', 'said', 'than', 'done', '.']
for word in saying:
    print(word, '(' + str(len(word)) + '),', end=' ')

After (5), all (3), is (2), said (4), and (3), done (4), , (1), more (4), is (2), said (4), than (4), done (4), . (1), 

In [41]:
from textwrap import fill
format = '%s (%d),'
pieces = [format % (word, len(word)) for word in saying]
output = ' '.join(pieces)
wrapped = fill(output)
print(wrapped)

After (5), all (3), is (2), said (4), and (3), done (4), , (1), more
(4), is (2), said (4), than (4), done (4), . (1),


In [43]:
format = '%s_(%d),'
pieces = [format % (word, len(word)) for word in saying]
output = ' '.join(pieces)
wrapped = fill(output)
print(wrapped.replace('_', ' '))


After (5), all (3), is (2), said (4), and (3), done (4), , (1),
more (4), is (2), said (4), than (4), done (4), . (1),
