## Regular Expression

### Phone number checker 

In [30]:
import re
def is_phone(s):
    given = s.split('-')
    length = [3, 3, 4]
    if len(given) != len(length):
        return False
    for p, n in zip(given, length):
        if not (p.isdigit() and len(p) == n):
            return False
    return True

is_phone('111-111-1111')

True

In [53]:
def complex_phone (s):

    pattern = '''
        ^\s*               # Leading spaces
        (?P<area>          # Area code
           \d{3}-?         # make - optional
           | \(\d{3}\)\s*  # OR "(xxx) "
        )
        (?P<second>\d{3})  # second
        -?                 # make - optional
        (?P<last>\d{4})    # last
        \s*$               # Trailing spaces
    '''
    matcher = re.compile(pattern, re.VERBOSE)
    matches = matcher.match(s)
    if matches is None:
        raise ValueError("'Please double check your entry: {}'".format (s))
    area = re.search('\d{3}', matches.group ('area')).group()
    second = matches.group ('second')
    last = matches.group ('last')
    
    if len(area) != 3 or len(second) != 3 or len(last) != 4:
        raise ValueError("'{}' is invalid length.".format (s))

    if matches:
        return (area, second, last)
    
    
complex_phone('240-644-2012')

('240', '644', '2012')

### Finding matching

1. match() - Determine if the RE matches at the beginning of the string.
2. search() - Scan through a string, looking for any location where this RE matches.
3. findall() - Find all substrings where the RE matches, and returns them as a list.
4. finditer() - Find all substrings where the RE matches, and returns them as an iterator.


In [9]:
import re
text = 'star'
find_matching = re.compile (text)

input = 'twinkle, twinkle little star'
matches = find_matching.search (input)
print (matches)

print (matches.group ())
print (matches.start ())
print (matches.end ())
print (matches.span ())


<re.Match object; span=(24, 28), match='star'>
star
24
28
(24, 28)


### Name detection

In [61]:
name = re.compile ('''^
                           (?P<first>[a-zA-Z]+)
                           \s
                           (?P<middle>[a-zA-Z]+\s)?
                           \s*
                           (?P<last>[a-zA-Z]+)
                           $
                        ''',
                        re.VERBOSE)
print (name.match ('Romeo Montague').group ('first'))
print (name.match ('Juliet A Capulet').groups ())
print (name.match ('Romeo Montague').group ())


Romeo
('Juliet', 'A ', 'Capulet')
Romeo Montague


### Email detection

In [81]:
def email_detection (s):
    """Parses a string as an email address, returning an (id, domain) pair."""
    pattern = '''
       ^
       (?P<user>[a-zA-Z][\w.\-+]*)
       @
       (?P<domain>[\w.\-]*[a-zA-Z])
       $
    '''
    matcher = re.compile(pattern, re.VERBOSE)
    matches = matcher.match(s)
    if matches:
        return (matches.group('user'), matches.group('domain'))
    raise ValueError("invalid email address")
    
email_detection('sample@gmail.com')



('sample', 'gmail.com')