## Getting Started

In [None]:
import re

In [None]:
p = re.compile(r'r[aeiou]se')
result = p.search('A rose is a rose is a rose.')
print(result)

## Module-level Method

In [None]:
re.search(r'r[aeiou]se', 'A rose is a rose is a rose.')

## Raw String Notation

In [None]:
print('a\nb\nc')
print('a\\nb\\nc')
print(r'a\nb\nc')

In [None]:
cooked = re.search('\babc\b','''abc''')
cookedandprepped = re.search('\\babc\\b','''abc''')
raw = re.search(r'\babc\b','''abc''')
print(cooked, cookedandprepped, raw, sep='\n')

## Splitting on a Pattern

In [None]:
p = re.compile(r'\W')
p.split('andré@example.com')

## Flags

#### re.IGNORECASE

In [None]:
p = re.compile('Foo',re.IGNORECASE)
p.findall('foobar')

#### re.MULTILINE

In [None]:
emails = '''andre@example.com
andré@example.com'''
matches1 = re.findall(r'^\w+@\w+\.\w+$', emails)
matches2 = re.findall(r'^\w+@\w+\.\w+$', emails, re.MULTILINE)
print(matches1, matches2, sep="\n")

#### re.DOTALL

In [None]:
emails = '''andre@example.com
andré@example.com'''
matches1 = re.findall(r'm.a', emails)
matches2 = re.findall(r'm.a', emails, re.DOTALL)
print(matches1, matches2, sep="\n")

#### re.ASCII

In [None]:
p = re.compile(r'\w+@\w+\.\w+',re.ASCII)
match1 = p.findall('andre@example.com')
match2 = p.findall('andré@example.com')
print(match1, match2, sep="\n")

#### re.VERBOSE

In [None]:
pattern = r'''^     #start here
            \w+    #text before the @ symbol
            @      #@ symbol
            \w+    #text between the @ and .
            \.     #literal dot
            \w+    #text after .
            $      #end here'''
emails = '''andre@example.com
andré@example.com'''
re.findall(pattern, emails, re.VERBOSE|re.MULTILINE)

#### re.DEBUG

In [None]:
p = re.compile(r'\b(https?:\/\/)?([\da-z\.-]+)\.([a-z\.]{2,6})([\/\w\.-]*)*\/?\b',re.DEBUG)

## Groups

In [None]:
p = re.compile(r'(\w+)@(\w+\.(\w+))')
match = p.match('andre@example.com')
email = match.group(0)
handle = match.group(1)
domain = match.group(2)
domain_type = match.group(3)
print(email, handle, domain, domain_type, sep='\n')

print(match.groups())

In [None]:
p = re.compile(r'(?P<handle>\w+)@(?P<domain>\w+\.(?P<domain_type>\w+))')
match = p.match('andre@example.com')
email = match.group(0)
handle = match.group('handle')
domain = match.group('domain')
domain_type = match.group('domain_type')
print(email, handle, domain, domain_type, sep='\n')

## sub() with Function

In [None]:
import random
def clean_cuss(match):
    cuss = match.group(0)
    l = len(cuss)
    s = '!@#$%^&*'
    while l > len(s):
        s += s
    return ''.join(random.sample(s,l))

p = re.compile(r'\b[a-z]*(stupid|stinky|darn|shucks|crud|slob)[a-z]*\b', re.IGNORECASE|re.MULTILINE)
s = '''Shucks! What a cruddy day I\'ve had. I spent the whole darn day \
with my slobbiest friend darning his stinky socks.'''
result = p.sub(clean_cuss,s)
result