source:: https://towardsdatascience.com/the-ultimate-guide-to-using-the-python-regex-module-69aad9e9ba56

In [5]:
import re

# 1. Finding number of punctuations in a particular piece of text..

In [4]:
string = '''
It was the best of times, it was the worst of times, it was the age of wisdom, it was the age of foolishness, 
it was the epoch of belief, it was the epoch of incredulity, it was the season of Light, it was the season of Darkness, 
it was the spring of hope, it was the winter of despair, we had everything before us, we had nothing before us, 
we were all going direct to Heaven, we were all going direct the other way – in short, the period was so far like the present 
period, that some of its noisiest authorities insisted on its being received, for good or for evil, in the superlative degree 
of comparison only.
'''

pattern = r"[;.,–]"  
print(len(re.findall(pattern,string)))

19


# 2. Creating Patterns...

In [6]:
pattern = r'times'
string = "It was the best of times, it was the worst of times."
print(len(re.findall(pattern,string)))

2


# 2.1 the [] operator

In [7]:
pattern = r'[a-zA-Z]'
string = "It was the best of times, it was the worst of times."
print(len(re.findall(pattern,string)))

39


# 2.2. The dot Operator

In [9]:
#The dot operator(.) is used to match a single instance of any character except the newline character.

##find out the substrings in the string that start with small d or Capital D and end with e with a length of 6.
string = '''
It was the best of times, it was the worst of times, it was the age of wisdom, it was the age of foolishness, 
it was the epoch of belief, it was the epoch of incredulity, it was the season of Light, it was the season of Darkness, 
it was the spring of hope, it was the winter of despair, we had everything before us, we had nothing before us, 
we were all going direct to Heaven, we were all going direct the other way – in short, the period was so far like the present 
period, that some of its noisiest authorities insisted on its being received, for good or for evil, in the superlative degree 
of comparison only.
'''

pattern = r"[dD]....e"  
print(re.findall(pattern,string))

['Darkne', 'degree']


# 2.3. Some Meta Sequences

In [12]:
# \w, Matches any letter, digit or underscore. Equivalent to [a-zA-Z0–9_]

# \W, Matches anything other than a letter, digit or underscore.

# \d, Matches any decimal digit. Equivalent to [0–9].

# \D, Matches anything other than a decimal digit.


# 2.4. The Plus and Star operator

In [15]:
#find out all substrings that start with d and end with e with at least one character between d and e, we can use: d\w+e
string = '''
It was the best of times, it was the worst of times, it was the age of wisdom, it was the age of foolishness, 
it was the epoch of belief, it was the epoch of incredulity, it was the season of Light, it was the season of Darkness, 
it was the spring of hope, it was the winter of despair, we had everything before us, we had nothing before us, 
we were all going direct to Heaven, we were all going direct the other way – in short, the period was so far like the present 
period, that some of its noisiest authorities insisted on its being received, for good or for evil, in the superlative degree 
of comparison only.
'''

pattern = r"d\w+e"  
print(re.findall(pattern,string))


# We could also have used a more generic approach using {}

# \w{n} - Repeat \w exactly n number of times.

# \w{n,} - Repeat \w at least n times or more.

# \w{n1, n2} - Repeat \w at least n1 times but no more than n2 times.

['dire', 'dire', 'degree']


# 2.5. ^ Caret Operator and $ Dollar operator.



In [17]:
#'^' Matches the start of a string, and '$' Matches the end of the string.
string = r'best time best time'

pattern = r"^best"  
print(re.findall(pattern,string))

pattern1 = r"time$"  
print(re.findall(pattern1,string))

['best']
['time']


# 2.6. Word Boundary

In [19]:
string = '''
It was the best of times, it was the worst of times, it was the age of wisdom, it was the age of foolishness, 
it was the epoch of belief, it was the epoch of incredulity, it was the season of Light, it was the season of Darkness, 
it was the spring of hope, it was the winter of despair, we had everything before us, we had nothing before us, 
we were all going direct to Heaven, we were all going direct the other way – in short, the period was so far like the present 
period, that some of its noisiest authorities insisted on its being received, for good or for evil, in the superlative degree 
of comparison only.
'''

pattern = r"d\w*"  
print(re.findall(pattern,string))

pattern = r"\bd\w*"  
print(re.findall(pattern,string))

['dom', 'dulity', 'despair', 'd', 'd', 'direct', 'direct', 'd', 'd', 'd', 'd', 'd', 'degree']
['despair', 'direct', 'direct', 'degree']


# 3. Regex Functions

# 3.1 findall

In [20]:
#USAGE:
pattern = r'[iI]t'
string = "It was the best of times, it was the worst of times."

matches = re.findall(pattern,string)
for match in matches:
    print(match)

It
it


# 3.2 Search

In [22]:
#USAGE:
pattern = r'[iI]t'
string = "It was the best of times, it was the worst of times."

location = re.search(pattern,string)
print(location)

#We can get this location object's data using
print(location.group())

<re.Match object; span=(0, 2), match='It'>
It


# 3.3 Substitute

In [23]:
string = "It was the best of times, it was the worst of times."
string = re.sub(r'times', r'life', string)
print(string)

It was the best of life, it was the worst of life.


# 4. Some Case Studies:
    

# 4.1 PAN Numbers:

In [25]:
#The basic validation criteria for PAN is that it must have all its letters in uppercase and characters in the following order:
## <char><char><char><char><char><digit><digit><digit><digit><char>

### Is ‘ABcDE1234L’ a valid PAN?
match=re.search(r'[A-Z]{5}[0–9]{4}[A-Z]','ABcDE1234L')
if match:
    print(True)
else:
    print(False)

False


# 4.2 Find Domain Names

In [26]:
string = '''
<div class="reflist" style="list-style-type: decimal;">
<ol class="references">
<li id="cite_note-1"><span class="mw-cite-backlink"><b>^ ["Train (noun)"](http://www.askoxford.com/concise_oed/train?view=uk). <i>(definition – Compact OED)</i>. Oxford University Press<span class="reference-accessdate">. Retrieved 2008-03-18</span>.</span><span title="ctx_ver=Z39.88-2004&rfr_id=info%3Asid%2Fen.wikipedia.org%3ATrain&rft.atitle=Train+%28noun%29&rft.genre=article&rft_id=http%3A%2F%2Fwww.askoxford.com%2Fconcise_oed%2Ftrain%3Fview%3Duk&rft.jtitle=%28definition+%E2%80%93+Compact+OED%29&rft.pub=Oxford+University+Press&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal" class="Z3988"><span style="display:none;"> </span></span></span></li>
<li id="cite_note-2"><span class="mw-cite-backlink"><b>^</b></span> <span class="reference-text"><span class="citation book">Atchison, Topeka and Santa Fe Railway (1948). <i>Rules: Operating Department</i>. p. 7.</span><span title="ctx_ver=Z39.88-2004&rfr_id=info%3Asid%2Fen.wikipedia.org%3ATrain&rft.au=Atchison%2C+Topeka+and+Santa+Fe+Railway&rft.aulast=Atchison%2C+Topeka+and+Santa+Fe+Railway&rft.btitle=Rules%3A+Operating+Department&rft.date=1948&rft.genre=book&rft.pages=7&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook" class="Z3988"><span style="display:none;"> </span></span></span></li>
<li id="cite_note-3"><span class="mw-cite-backlink"><b>^ [Hydrogen trains](http://www.hydrogencarsnow.com/blog2/index.php/hydrogen-vehicles/i-hear-the-hydrogen-train-a-comin-its-rolling-round-the-bend/)</span></li>
<li id="cite_note-4"><span class="mw-cite-backlink"><b>^ [Vehicle Projects Inc. Fuel cell locomotive](http://www.bnsf.com/media/news/articles/2008/01/2008-01-09a.html)</span></li>
<li id="cite_note-5"><span class="mw-cite-backlink"><b>^</b></span> <span class="reference-text"><span class="citation book">Central Japan Railway (2006). <i>Central Japan Railway Data Book 2006</i>. p. 16.</span><span title="ctx_ver=Z39.88-2004&rfr_id=info%3Asid%2Fen.wikipedia.org%3ATrain&rft.au=Central+Japan+Railway&rft.aulast=Central+Japan+Railway&rft.btitle=Central+Japan+Railway+Data+Book+2006&rft.date=2006&rft.genre=book&rft.pages=16&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook" class="Z3988"><span style="display:none;"> </span></span></span></li>
<li id="cite_note-6"><span class="mw-cite-backlink"><b>^ ["Overview Of the existing Mumbai Suburban Railway"](http://web.archive.org/web/20080620033027/http://www.mrvc.indianrail.gov.in/overview.htm). _Official webpage of Mumbai Railway Vikas Corporation_. Archived from [the original](http://www.mrvc.indianrail.gov.in/overview.htm) on 2008-06-20<span class="reference-accessdate">. Retrieved 2008-12-11</span>.</span><span title="ctx_ver=Z39.88-2004&rfr_id=info%3Asid%2Fen.wikipedia.org%3ATrain&rft.atitle=Overview+Of+the+existing+Mumbai+Suburban+Railway&rft.genre=article&rft_id=http%3A%2F%2Fwww.mrvc.indianrail.gov.in%2Foverview.htm&rft.jtitle=Official+webpage+of+Mumbai+Railway+Vikas+Corporation&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal" class="Z3988"><span style="display:none;"> </span></span></span></li>
</ol>
</div>
'''

match=re.findall(r'http(s:|:)\/\/(www.|ww2.|)([0-9a-z.A-Z-]*\.\w{2,3})',string)  

for elem in match:  
    print(elem)

(':', 'www.', 'askoxford.com')
(':', 'www.', 'hydrogencarsnow.com')
(':', 'www.', 'bnsf.com')
(':', '', 'web.archive.org')
(':', 'www.', 'mrvc.indianrail.gov.in')
(':', 'www.', 'mrvc.indianrail.gov.in')


# 4.3 Find Email Addresses:

In [31]:
string = '''
<div class="reflist" style="list-style-type: decimal;">
<ol class="references">
<li id="cite_note-1"><span class="mw-cite-backlink"><b>^ ["Train (noun)"](http://www.askoxford.com/concise_oed/train?view=uk). <i>(definition – Compact OED)</i>. Oxford University Press<span class="reference-accessdate">. Retrieved 2008-03-18</span>.</span><span title="ctx_ver=Z39.88-2004&rfr_id=info%3Asid%2Fen.wikipedia.org%3ATrain&rft.atitle=Train+%28noun%29&rft.genre=article&rft_id=http%3A%2F%2Fwww.askoxford.com%2Fconcise_oed%2Ftrain%3Fview%3Duk&rft.jtitle=%28definition+%E2%80%93+Compact+OED%29&rft.pub=Oxford+University+Press&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal" class="Z3988"><span style="display:none;"> </span></span></span></li>
<li id="cite_note-2"><span class="mw-cite-backlink"><b>^</b></span> <span class="reference-text"><span class="citation book">Atchison, Topeka and Santa Fe Railway (1948). <i>Rules: Operating Department</i>. p. 7.</span><span title="ctx_ver=Z39.88-2004&rfr_id=info%3Asid%2Fen.wikipedia.org%3ATrain&rft.au=Atchison%2C+Topeka+and+Santa+Fe+Railway&rft.aulast=Atchison%2C+Topeka+and+Santa+Fe+Railway&rft.btitle=Rules%3A+Operating+Department&rft.date=1948&rft.genre=book&rft.pages=7&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook" class="Z3988"><span style="display:none;"> </span></span></span></li>
<li id="cite_note-3"><span class="mw-cite-backlink"><b>^ [Hydrogen trains](http://www.hydrogencarsnow.com/blog2/index.php/hydrogen-vehicles/i-hear-the-hydrogen-train-a-comin-its-rolling-round-the-bend/)</span></li>
<li id="cite_note-4"><span class="mw-cite-backlink"><b>^ [Vehicle Projects Inc. Fuel cell locomotive](http://www.bnsf.com/media/news/articles/2008/01/2008-01-09a.html)</span></li>
<li id="cite_note-5"><span class="mw-cite-backlink"><b>^</b></span> <span class="reference-text"><span class="citation book">Central Japan Railway (2006). <i>Central Japan Railway Data Book 2006</i>. p. 16.</span><span title="ctx_ver=Z39.88-2004&rfr_id=info%3Asid%2Fen.wikipedia.org%3ATrain&rft.au=Central+Japan+Railway&rft.aulast=Central+Japan+Railway&rft.btitle=Central+Japan+Railway+Data+Book+2006&rft.date=2006&rft.genre=book&rft.pages=16&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook" class="Z3988"><span style="display:none;"> </span></span></span></li>
<li id="cite_note-6"><span class="mw-cite-backlink"><b>^ ["Overview Of the existing Mumbai Suburban Railway"](http://web.archive.org/web/20080620033027/http://www.mrvc.indianrail.gov.in/overview.htm). _Official webpage of Mumbai Railway Vikas Corporation_. Archived from [the original](http://www.mrvc.indianrail.gov.in/overview.htm) on 2008-06-20<span class="reference-accessdate">. Retrieved 2008-12-11</span>.</span><span title="ctx_ver=Z39.88-2004&rfr_id=info%3Asid%2Fen.wikipedia.org%3ATrain&rft.atitle=Overview+Of+the+existing+Mumbai+Suburban+Railway&rft.genre=article&rft_id=http%3A%2F%2Fwww.mrvc.indianrail.gov.in%2Foverview.htm&rft.jtitle=Official+webpage+of+Mumbai+Railway+Vikas+Corporation&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal" class="Z3988"><span style="display:none;"> </span></span></span></li>
</ol>
</div>
'''
match=re.findall(r'([\w0-9-._]+@[\w0-9-.]+[\w0-9]{2,3})',string)