In [1]:
import re

# 1. U.S. ZIP codes
zip_pattern = re.compile(r"\b\d{5}(?:[-\s]\d{4})?\b")
zip_tests = ["12345", "12345-6789", "12345 6789", "123456", "abc12345"]
print("ZIP:", [m.group() for t in zip_tests for m in zip_pattern.finditer(t)])

# 2. Words not starting with a capital letter
word_pattern = re.compile(r"\b(?![A-Z])[A-Za-z]+(?:['’-][A-Za-z]+)*\b")
word_tests = ["dog", "Cat", "don’t", "state-of-the-art", "Hello"]
print("Non-capitalized words:", [m.group() for t in word_tests for m in word_pattern.finditer(t)])

# 3. Numbers (signs, commas, decimals, scientific notation)
num_pattern = re.compile(r"[+-]?(?:\d{1,3}(?:,\d{3})*|\d+)(?:\.\d+)?(?:[eE][+-]?\d+)?")
num_tests = ["123", "+123.45", "-1,234", "1.23e-4", "12,345,678.90e+10"]
print("Numbers:", [m.group() for t in num_tests for m in num_pattern.finditer(t)])

# 4. Email spelling variants
email_pattern = re.compile(r"(?i)\be[-\s–]?mail\b")
email_tests = ["email", "E-mail", "e mail", "E–mail", "Mail"]
print("Email variants:", [m.group() for t in email_tests for m in email_pattern.finditer(t)])

# 5. Interjection go/goo/gooo... with optional punctuation
go_pattern = re.compile(r"\bgo+[\!\.\,\?]?\b")
go_tests = ["go", "goo", "gooo!", "go?", "gone", "gooo,"]
print("Go variants:", [m.group() for t in go_tests for m in go_pattern.finditer(t)])
# 6. Lines ending with question mark + optional closing symbols
q_pattern = re.compile(r"\?[)\"'\]\s]*$")
q_tests = [
    "Is this working?",
    "What time is it?\") ",
    "Really?'",
    "No way!",
]
print("Questions:", [t for t in q_tests if q_pattern.search(t)])

ZIP: ['12345', '12345-6789', '12345 6789']
Non-capitalized words: ['dog', 'don’t', 'state-of-the-art']
Numbers: ['123', '+123.45', '-1,234', '1.23e-4', '12,345,678.90e+10']
Email variants: ['email', 'E-mail', 'e mail', 'E–mail']
Go variants: ['go', 'goo', 'gooo', 'go', 'gooo']
Questions: ['Is this working?', 'What time is it?") ', "Really?'"]
