## 1.1. Unpacking a Sequence into Separate Variables

In [1]:
p = (4, 5)
x, y = p
print x,y

4 5


In [2]:
data = [ 'ACME', 50, 91.1, (2012, 12, 21) ]
name, shares, price, date = data
print name

ACME


Unpacking actually works with any object that happens to be iterable, not just tuples or
lists. This includes strings, files, iterators, and generators.

## 1.2.  Converting Between Characters and Numeric Codes(python3)

You need to turn a character into its numeric ASCII (ISO) or Unicode code,and vice
versa.

In [1]:
print ord('a')
print chr(97)

97
a


In [5]:
print ord(u"\u2020")
print repr(unichr(8824))
print u'\u2278'

8224
u'\u2278'
≸


In [6]:
print map(ord,"ciao")

[99, 105, 97, 111]


In [9]:
print "".join(map(chr,range(97,110)))

abcdefghijklm


## 1.3. Testing Whether an Object Is String-like

to test if an object,typically an argument to a function or method is a string

In [13]:
def isString(obj):
    ##basestring is common base class for the str and unicode types
    return isinstance(obj,basestring)
a = '12'
print isString(a)
print isString(isString)

True
False


In [15]:
def isStringLike(obj):
    try:
        obj+""
    except:
        return False
    else:
        return True
print isStringLike(a)
print isStringLike(isString)

True
False


## 1.4 Aligning Strings

align strings: left, right, or center

In [17]:
print '|', "hej".ljust(20), '|', "hej".rjust(20),'|', 'hej'.center(20)

| hej                  |                  hej |         hej         


In [18]:
print 'hej'.center(20,"+")

++++++++hej+++++++++


## 1.5 Trimming Space from the Ends of a String

In [19]:
x = '              hej           '
print '|',x.lstrip(),'|',x.rstrip(),'|',x.strip(),'|'

| hej            |               hej | hej |


In [21]:
x = 'xyxxyy hejyx yyx'
print '|'+x.strip('xy')+'|'

| hejyx |


## 1.6 Combining Strings

In [None]:
largeString = ''.join(pieces)

In [None]:
largeString = '%s%s something %s yet more' % (small1, small2, small3)

In [22]:
import operator
pieces = ["1","2","adfa"]
largeString = reduce(operator.add,pieces,"")
print largeString

12adfa


## 1.7 Reversing a String by Words or Characters

In [23]:
astring="asdfrtrtyret"
revchars = astring[::-1]
print revchars

terytrtrfdsa


In [25]:
revwords = astring.split( ) 
revwords.reverse( ) 
revwords = ' '.join(revwords)
print revwords

asdfrtrtyret


In [26]:
revwords = ' '.join(astring.split( )[::-1])

## 1.8 Checking Whether a String Contains a Set of Characters

In [28]:
def containsAny(seq, aset):
    """ Check whether sequence seq contains ANY of the items in aset. """
    for c in seq:
        if c in aset: 
            return True
        return False

You can gain a little speed by moving to a higher-level,more sophisticated approach, based on the itertools

In [29]:
import itertools
def containsAny(seq, aset):
    for item in itertools.ifilter(aset.__contains__, seq):
        return True
    return False

## 1.9 Simplifying Usage of Strings’ translate Method

In [30]:
import string
def translator(frm='', to='', delete='', keep=None):
    if len(to) == 1:
        to = to * len(frm)
    trans = string.maketrans(frm, to)
    if keep is not None:
        allchars = string.maketrans('', '')
        delete = allchars.translate(allchars, keep.translate(allchars, delete))
    def translate(s):
        return s.translate(trans, delete)
    return translate
digits_only = translator(keep=string.digits)
digits_only('Chris Perkins : 224-7992')

'2247992'

In [31]:
no_digits = translator(delete=string.digits)
no_digits('Chris Perkins : 224-7992')

'Chris Perkins : -'

In [41]:
a_to_c = translator(delete='cabd')
a_to_c("abcd  %% adc")

'  %% '

## 1.10 Filtering a String for a Set of Characters

In [6]:
import string 
allchars = string.maketrans('','')
print [ord(i) for i in allchars]
def makefilter(keep):
    delchars = allchars.translate(allchars,keep)
    def thefilter(s):
        return s.translate(allchars,delchars)
    return thefilter
just_vowels = makefilter('aeouy')
print just_vowels('four score and seven years ago')

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221,

In [9]:
def canonicform(s):
    return makefilter(s)(allchars)
print canonicform("hello world!")

 !dehlorw


## 1.11 Checking Whether a String Is Text or Binary

In [None]:
from __future__ import division
import string
text_characters="".join(map(chr,range(32,127)))+"\n\r\b"
_null_trans = string.maketrans("","")
def istext(s, text_charactertext_characters, threshold=0.3):
    if "\0" in s:
        return False
    if not s:
        return True
    t = s.translate(_null_trans,text_characters)
    return len(t)/len(s) <= threshold

## 1.12 Controlling Case

In [13]:
little = "abc"
big = little.upper( )
little = big.lower( )
print big
def capitalize(s):
    return s[:1].upper()+s[1:].lower()
print capitalize("hello world!")
print "hello world".title()

ABC
Hello world!
Hello World


In [29]:
def iscapitalized(s):
    #print s 
    #print s.capitalize()
    return s == s.capitalize()
print iscapitalized("")

True


In [26]:
import string
notrans = string.maketrans("","")
def containsAny(str,strset):
    return len(strset) != len(strset.translate(notrans,str))
def iscapitalized(s):
    return s== s.capitalize() and containsAny(s,string.letters)
print iscapitalized("")

False


## 1.13 Accessing Substrings

In [None]:
afield = theline[3:8]

## 1.14 Changing the Indentation of a Multiline String

You have a string made up of multiple lines,and you need to build another string
from it,adding or removing leading spaces on each line so that the indentation of
each line is some absolute number of spaces.

In [34]:
def reindent(s, numSpace):
    leading_space = numSpace * " "
    lines = [leading_space + line.strip() for line in s.splitlines()]
    return '\n'.join(lines)
a=""" dsfasdf
     asdfadsf
     oiup
     dfa"""
print a
print reindent(a, 4)

 dsfasdf
     asdfadsf
     oiup
     dfa
    dsfasdf
    asdfadsf
    oiup
    dfa


## 1.15 Expanding and Compressing Tabs

In [72]:
def unexpand(astring, tablen = 8):
    import re
    pieces = re.split(r"(  +)", astring.expandtabs(tablen))
    lensofar = 0
    for i, piece in enumerate(pieces):
        thislen = len(piece)
        lensofar += thislen
        if piece.isspace():
            numblanks = lensofar % tablen
            numtabs = (thislen - numblanks + tablen-1)/tablen
            pieces[i] = '\t'*numtabs + ' '*numblanks
    return ''.join(pieces)
a = "adsfa    asdfas  dfasdf    adsf"
print a
print unexpand(a,tablen=5)

adsfa    asdfas  dfasdf    adsf
adsfa    asdfas  dfasdf	  adsf


## 1.16 Interpolating Variables in a String

In [81]:
def expand(format, d, marker='"', safe = False):
    if safe:
        def lookup(w): return d.get(w, w.join(marker*2))
    else:
        def lookup(w): return d[w]
    print safe
    parts = format.split(marker)
    parts[1::2]=map(lookup,parts[1::2])
    return ''.join(parts)
print expand('just "a" test',{'a':'one'})

False
just one test


## 1.17 Interpolating Variables in a String in Python 2.4

you need a simple way to get a copy of a string where specially
marked identifiers are replaced with the results of looking up the identifiers in a
dictionary.

In [84]:
import string
# make a templete
new_style = string.Template('this is $thing')
print new_style
print new_style.substitute({'thing':5})
print new_style.substitute({'thing':'test'})
print new_style.substitute(thing=5)
print new_style.substitute(thing='test')

<string.Template object at 0x0000000003DE6F28>
this is 5
this is test
this is 5
this is test


## 1.18 Replacing Multiple Patterns in a Single Pass

perform several string substitutions on a string.

In [117]:
import re
def multiple_replace(text,adict):
    rx = re.compile('|'.join(map(re.escape,adict)))
    def one_xlat(match):
        print type(match.group)
        print match.group(0)
        
        return adict[match.group(0)]
    return rx.sub(one_xlat,text)


In [129]:
import re 
def make_xlat(*args, **kwds):
    adict = dict(*args, **kwds)
    rx = re.compile('|'.join(map(re.escape,adict)))
    def one_xlat(match):
        return adict[match.group(0)]
    def xlat(text):
        return rx.sub(one_xlat,text)
    return xlat

In [130]:
text = "Larry Wall is the creator of Perl"
adict = {
"Larry Wall" : "Guido van Rossum",
"creator" : "Benevolent Dictator for Life",
"Perl" : "Python",
}
print adict.keys()
print multiple_replace(text, adict)
translate = make_xlat(adict)
print translate(text)

['Larry Wall', 'Perl', 'creator']
<type 'builtin_function_or_method'>
Object `match.group` not found.
Larry Wall
<type 'builtin_function_or_method'>
Object `match.group` not found.
creator
<type 'builtin_function_or_method'>
Object `match.group` not found.
Perl
Guido van Rossum is the Benevolent Dictator for Life of Python
Guido van Rossum is the Benevolent Dictator for Life of Python


## 1.19 Checking a String for Any of Multiple Endings

In [134]:
import itertools
def anyTrue(predicate, sequence):
    return True in itertools.imap(predicate,sequence)
def endsWith(s, *endings):
    return anyTrue(s.endswith, endings)

In [136]:
import os 
for filename in os.listdir('.'):
    if endsWith(filename,'.jpg','.jpeg','gif','.ipynb'):
        print filename

Python cookbook.ipynb


## 1.20 Handling International Text with Unicode

In [137]:
german_ae = unicode('\xc3\xa4', 'utf8')
print german_ae

ä


In [139]:
sentence = "This is a " + german_ae
sentence2 = "Easy!"
para = ". ".join([sentence, sentence2])
print para

This is a ä. Easy!


## 1.21 Converting Between Unicode and Plain Strings

In [150]:
unicodestring = u"Hello world"
print unicodestring.decode()
utf8string = unicodestring.encode("utf-8")
asciistring = unicodestring.encode("ascii")
isostring = unicodestring.encode("ISO-8859-1")
utf16string = unicodestring.encode("utf-16")

plainstring = unicode(utf8string,"utf-8")
plainstring1 = unicode(asciistring,"ascii")
plainstring2 = unicode(isostring,"ISO-8859-1")
plainstring4 = unicode(utf16string,"utf-16")

print plainstring
print plainstring1
print plainstring2
print plainstring4

print plainstring1 == plainstring2 == plainstring3 == plainstring4

Hello world
Hello world
Hello world
Hello world
Hello world
True


## 1.22 Printing Unicode Characters to Standard Output

In [1]:
import codecs, sys
#sys.stdout = codecs.lookup('iso8859-1')[-1](sys.stdout)

In [3]:
old = sys.stdout
print old
char = u"\N{LATIN SMALL LETTER A WITH DIAERESIS}"
print char

<ipykernel.iostream.OutStream object at 0x0000000003B64DA0>
ä


In [4]:
sys.stdout = codecs.lookup('utf-8')[-1](sys.stdout)
print char

ä


## 1.23 Encoding Unicode Data for XML and HTML

In [5]:
def encode_for_xml(unicode_data, encoding='ascii'):
    return unicode_data.encode(encoding, 'xmlcharrefreplace')

In [6]:
import codecs
from htmlentitydefs import codepoint2name
def html_replace(exc):
    if isinstance(exc, (UnicodeEncodeError, UnicodeTranslateError)):
        s = [u'&%s;' % codepoint2name[ord(c)] for c in exc.object[exc.start:exc.end]]
        return ''.join(s),exc.end
    else:
        raise TypeError("can't handle %s" % exc.__name__)
codecs.register_error('html_replace',html_replace)

In [10]:
def encode_for_html(unicode_data, encoding='ascii'):
    return unicode_data.encode(encoding, 'html_replace')

In [11]:
data = u'''\
<html>
<head>
<title>Encoding Test</title>
</head>
<body>
<p>accented characters:
<ul>
<li>\xe0 (a + grave)
<li>\xe7 (c + cedilla)
<li>\xe9 (e + acute)
</ul>
<p>symbols:
<ul>
<li>\xa3 (British pound)
<li>\u20ac (Euro)
<li>\u221e (infinity)
</ul>
</body></html>
'''
print encode_for_xml(data)
print encode_for_html(data)

<html>
<head>
<title>Encoding Test</title>
</head>
<body>
<p>accented characters:
<ul>
<li>&#224; (a + grave)
<li>&#231; (c + cedilla)
<li>&#233; (e + acute)
</ul>
<p>symbols:
<ul>
<li>&#163; (British pound)
<li>&#8364; (Euro)
<li>&#8734; (infinity)
</ul>
</body></html>

<html>
<head>
<title>Encoding Test</title>
</head>
<body>
<p>accented characters:
<ul>
<li>&agrave; (a + grave)
<li>&ccedil; (c + cedilla)
<li>&eacute; (e + acute)
</ul>
<p>symbols:
<ul>
<li>&pound; (British pound)
<li>&euro; (Euro)
<li>&infin; (infinity)
</ul>
</body></html>



## 1.24 Making Some Strings Case-Insensitive

In [14]:
class iStr(str):
    """case insensitive class"""
    def __init__(self,*args):
        self.lowered = str.lower(self)
    def __repr__(self):
        return '%s(%s)' % (type(self).__name__, str.__repr__(self))
    def __hash__(self):
        return hash(self.lowered)
    def lower(self):
        return self.lowered
    def _make_case_insensitive(name):
        ''' wrap one method of str into an iStr one, case-insensitive '''
        str_meth = getattr(str, name)
        def x(self, other, *args):
            """try lowercasing 'other', which is typically a string, but
                be prepared to use it as-is if lowering gives problems,
                since strings CAN be correctly compared with non-strings.
            """
            try: 
                other = other.lower()
            except (TypeError, AttributeError, ValueError):
                pass
            return str_meth(self._lowered, other, *args)
        setattr(iStr, name, x)
        for name in 'eq lt le gt gt ne cmp contains'.split( ):
            _make_case_insensitive('__%s__' % name)
        for name in 'count endswith find index rfind rindex startswith'.split( ):
            _make_case_insensitive(name)
        # note that we don't modify methods 'replace', 'split', 'strip', ...
        # of course, you can add modifications to them, too, if you prefer that.
        del _make_case_insensitive

## 1.25 Converting HTML Documents to Text on a Unix Terminal

In [None]:
#!/usr/bin/env python
import sys, os, htmllib, formatter
# use Unix tput to get the escape sequences for bold, underline, reset
set_bold = os.popen('tput bold').read( )
set_underline = os.popen('tput smul').read( )
perform_reset = os.popen('tput sgr0').read( )
class TtyFormatter(formatter.AbstractFormatter):
    ''' a formatter that keeps track of bold and italic font states, and
    emits terminal control sequences accordingly.
    '''
    def __init__(self, writer):
        # first, as usual, initialize the superclass
        formatter.AbstractFormatter.__init__(self, writer)
        # start with neither bold nor italic, and no saved font state
        self.fontState = False, False
        self.fontStack = [ ]
    def push_font(self, font):
        # the `font' tuple has four items, we only track the two flags
        # about whether italic and bold are active or not
        size, is_italic, is_bold, is_tt = font
        self.fontStack.append((is_italic, is_bold))
        self._updateFontState( )
    def pop_font(self, *args):
        # go back to previous font state
        try:
            self.fontStack.pop( )
        except IndexError:
            pass
        self._updateFontState( )
    def updateFontState(self):
        # emit appropriate terminal control sequences if the state of
        # bold and/or italic(==underline) has just changed
        try:
            newState = self.fontStack[-1]
        except IndexError:
            newState = False, False
        if self.fontState != newState:
            # relevant state change: reset terminal
            print perform_reset,
            # set underine and/or bold if needed
            if newState[0]:
                print set_underline,
            if newState[1]:
                print set_bold,
            # remember the two flags as our current font-state
            self.fontState = newState
# make writer, formatter and parser objects, connecting them as needed
myWriter = formatter.DumbWriter( )
if sys.stdout.isatty( ):
    myFormatter = TtyFormatter(myWriter)
else:
    myFormatter = formatter.AbstractFormatter(myWriter)
myParser = htmllib.HTMLParser(myFormatter)
# feed all of standard input to the parser, then terminate operations
myParser.feed(sys.stdin.read( ))
myParser.close( )