Source:: https://towardsdatascience.com/7-useful-tricks-for-python-regex-you-should-know-ec20381e22f2?utm_source=pocket_mylist

In [None]:
import re

# 1. Always use “r-string”

In [3]:
s = 'I\'m Chris'
print(s)

I'm Chris


In [4]:
s = r'I\'m Chris'
print(s)

I\'m Chris


In [7]:
print('Without raw string...')
print('a\nb')

print('\nWith raw string...')
print(r'a\nb')

Without raw string...
a
b

With raw string...
a\nb


In [18]:
#Searching for a pattern that several letters repeated after a few whitespaces...

print('Without raw string...')
print(re.search('(\w+)\s+\1','abc abc'))

print('\nWith raw string...')
print(re.search(r'(\w+)\s+\1','abc abc'))

print('\nWith raw string...')
print(re.search(r'(\w+)\s+','abc abc abc'))

print('\nWith raw string...')
print(re.search(r'(\w+)\s+\1','abc abc abc'))

Without raw string...
None

With raw string...
<re.Match object; span=(0, 7), match='abc abc'>

With raw string...
<re.Match object; span=(0, 4), match='abc '>

With raw string...
<re.Match object; span=(0, 7), match='abc abc'>


# 2. Use re.IGNORECASE Flag When Necessary

In [21]:
print('With both Upper & Lower cases...')
print(re.search(r'[a-zA-Z]+', 'AbCdEfG'))

print('\nWithout Upper case...')
print(re.search(r'[a-z]+', 'AbCdEfG'))

print('\nWith Ignore case...')
print(re.search(r'[a-z]+', 'AbCdEfG',re.I))

With both Upper & Lower cases...
<re.Match object; span=(0, 7), match='AbCdEfG'>

Without Upper case...
<re.Match object; span=(1, 2), match='b'>

With Ignore case...
<re.Match object; span=(0, 7), match='AbCdEfG'>


# 3. Use re.VERBOSE Flag to Improve the Readability

In [25]:
#re.VERBOSE is a must to write in such details for future developers to understand it better...
print(re.search(r'''
    (\w+)   # Group 1: Match one or more letters, numbers or underscore
    \s+     # Match one or more whitespaces
    \1      # Match the Group 1 whatever it is
''', 'abc   abc', re.VERBOSE))

print(re.search(r'''
    (\w+)   # Group 1: Match one or more letters, numbers or underscore
    \s+     # Match one or more whitespaces
    \1      # Match the Group 1 whatever it is
''', 'abc   abc', re.X))

<re.Match object; span=(0, 9), match='abc   abc'>
<re.Match object; span=(0, 9), match='abc   abc'>


# 4. Customise Substitution Behaviour of re.sub()

In [29]:
#re.sub() in Python regex - tries to find a pattern (pattern) in a string (string) and replace it with the provided 
##replacement string (repl)

print(re.sub(r'\d','*','User\'s mobile number is 1234567890')) #This code will hide any mobile numbers in a string.

print(re.sub(r'\d+','*','User\'s mobile number is 1234567890')) #Small change can alter the complete result

User's mobile number is **********
User's mobile number is *


In [35]:
#hide the user’s phone number, but to reveal the last 3 digits to make sure that the user has a clue what’s that number. 
def hide_reverse_3(s):
    return '*' * (len(s[0])-3) + s[0][-3:]

print(re.sub(r'\d+',hide_reverse_3,'User\'s mobile number is 1234567890'))


#Using lambda Function achieving the above..
print(re.sub(r'\d+', lambda s: '*' * (len(s[0])-3) + s[0][-3:], 'User\'s mobile number is 1234567890'))



User's mobile number is *******890
User's mobile number is *******890


# 5. Use re.compile() to Enable Reusability

In [36]:
#After defining the pattern with re.compile(), we can use it as many times as needed.
pattern = re.compile('abc')

print(pattern.search('abc abc'))

print(pattern.match('abcdef'))

print(pattern.findall('abc def abc'))

<re.Match object; span=(0, 3), match='abc'>
<re.Match object; span=(0, 3), match='abc'>
['abc', 'abc']


# 6. Use Regex to Generate a Dictionary

In [38]:
#We have to follow the pattern (?P<Y>...) where Y is the key name, ... is the defined regex pattern.

re.match(
    r"My name is (?P<first_name>\w+) (?P<last_name>\w+) and I love (?P<preference>\w+).", 
    "My name is Vishal Kumar and I love Python."
).groupdict()

{'first_name': 'Vishal', 'last_name': 'Kumar', 'preference': 'Python'}

# 7. Use Regex Groups to Catch Repeat Patterns

In [45]:
#We use the compiled regex pattern and try to match the string. Then, get the first element of the matched groups. 
##It will be the first letter that has been found repeated in the string.

pair = re.compile(r'''
    .*    # Match any number of any characters
    (.)   # Match 1 character, whatever it is (except new-line), this will be the "group 1"
    .*    # Match any number of any characters
    \1    # Match the group 1
''', re.VERBOSE)

pair.match('abcdefgc').groups()[0]

'c'