In [1]:
# 8.12.1 re Module and Function fullmatch
# fullmatch checks whether the entire string in its 
# second argument matches the pattern in its first argument

import re

pattern = '02215'

'Match' if re.fullmatch(pattern, '02215') else 'No Match'

'Match'

In [2]:
'Match' if re.fullmatch(pattern, '51220') else 'No Match'

'No Match'

In [3]:
# The \ metacharacter begins each of the predefined character classes, each matching a specific set of characters
# Validate a five-digit ZIP Code
'Valid' if re.fullmatch(r'\d{5}', '02215') else 'Invalid'

'Valid'

In [4]:
'Valid' if re.fullmatch(r'\d{5}', '2215') else 'Invalid'

'Invalid'

In [5]:
'Valid' if re.fullmatch(r'\d\d\d\d\d', '2215') else 'Invalid'

'Invalid'

In [7]:
# Custom Character Classes
# Square brackets, [], defines a custom character class
# that match a single character.
# [aeiou] matches a lowercase vowel
#[A-Z] matches an uppercase letter
#[a-z] matches a lowercase letter
#[a-zA-Z] matches any lowercase or uppercase letter

In [8]:
# The * quantifier matches zero or more occurrences of subexpression to its left.
# Begin with an uppercase letter followed by any number of lowercase letters
'Valid' if re.fullmatch('[A-Z][a-z]*', 'Wally') else 'Invalid'

'Valid'

In [9]:
'Valid' if re.fullmatch('[A-Z][a-z]*', 'Wo') else 'Invalid'

'Valid'

In [10]:
'Valid' if re.fullmatch('[A-Z][a-z]*', 'wally') else 'Invalid'

'Invalid'

In [11]:
# The caret (^) matches any character that's not specified
# Any character that is not a lowercase letter
'Valid' if re.fullmatch('[^a-z]', 'W') else 'Invalid'

'Valid'

In [12]:
'Valid' if re.fullmatch('[^a-z]', 'a') else 'Invalid'

'Invalid'

In [13]:
# [*+$] matches a single *,+, or $ character
'Match' if re.fullmatch('[*+$]', '*') else 'No Match'

'Match'

In [14]:
'Match' if re.fullmatch('[*+$]', '*+') else 'No Match'

'No Match'

In [15]:
# Any words that begin with a captial letter
# followed at least one lowercase letter.
'Valid' if re.fullmatch('[A-Z][a-z]+', 'Wally') else 'Invalid'

'Valid'

In [16]:
'Valid' if re.fullmatch('[A-Z][a-z]+', 'E') else 'Invalid'

'Invalid'

In [17]:
# [l?] indicates that there can be zero or one more L characters

In [18]:
'Match' if re.fullmatch('labell?ed', 'labelled') else 'No Match'

'Match'

In [20]:
'Match' if re.fullmatch('labell?ed', 'labeled') else 'No Match'

'Match'

In [21]:
'Match' if re.fullmatch('labell?ed', 'labellled') else 'No Match'

'No Match'

In [22]:
# {n,} matches at least n occurrences of a subexpression

In [23]:
# at least three digits
'Valid' if re.fullmatch(r'\d{3,}', '123') else 'Invalid'

'Valid'

In [24]:
'Valid' if re.fullmatch(r'\d{3,}', '12') else 'Invalid'

'Invalid'

In [25]:
# {n,m} matches between n and m (inclusive) occurrences of a subexpresion

In [26]:
# 3 to 6 digits
'Valid' if re.fullmatch(r'\d{3,6}', '123') else 'Invalid'

'Valid'

In [27]:
'Valid' if re.fullmatch(r'\d{3,6}', '123456') else 'Invalid'

'Valid'

In [28]:
'Valid' if re.fullmatch(r'\d{3,6}', '1234567') else 'Invalid'

'Invalid'

In [29]:
'Valid' if re.fullmatch(r'\d{3,6}', '12') else 'Invalid'

'Invalid'

In [30]:
# 8.12.2 Replacing Substrings and Splitting Strings
# Function sub for replacing patterns in a string
# Receives three required arguments
# The pattern to match
# The replacement text
# The string to be searched

In [31]:
import re

# Function sub
re.sub(r'\t',', ', '1\t2\t3\t4')

'1, 2, 3, 4'

In [32]:
# Count can be used to specify the maximum numbers of replacements
re.sub(r'\t',', ', '1\t2\t3\t4', count= 2)

'1, 2, 3\t4'

In [33]:
# Function split
# Tokenize a string by splitting it at any comma that
# followed by 0 or more whitespace characters
re.split(r',\s*', '1, 2, 3, 4,  5, 6, 7, 8')

['1', '2', '3', '4', '5', '6', '7', '8']

In [35]:
# maxsplit to specify the maximum numbers of splits
re.split(r',\s*', '1, 2, 3, 4,  5,6,7,8', maxsplit = 3)

['1', '2', '3', '4,  5,6,7,8']