## Real World UseCases of Regular Expressions

### 1. Extracting URLs

In [1]:
import re

text = "Visit my website at https://example.com or check out the latest news at http://news.example.com"

url_pattern = r"https?://(?:[-\w.]|(?:%[\da-fA-F]{2}))+"
urls = re.findall(url_pattern, text)

print(urls)

['https://example.com', 'http://news.example.com']


### 2. Validating Email Addresses

In [2]:
import re


def is_valid_email(email):
    email_pattern = r"^[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}$"
    return re.match(email_pattern, email) is not None


print(is_valid_email("john.doe@example.com"))
print(is_valid_email("invalid_email"))

True
False


### 3. Extracting Hashtags

In [3]:
import re

text = "Just enjoying a #beautiful day with #friends in #nature"

hashtag_pattern = r"#\w+"
hashtags = re.findall(hashtag_pattern, text)

print(hashtags)

['#beautiful', '#friends', '#nature']


### 4. Parsing HTML Attributes

In [4]:
import re

html = '<a href="https://example.com">Visit our website</a>'

href_pattern = r'href="([^"]*)"'
href = re.search(href_pattern, html).group(1)

print(href)

https://example.com


### 5. Tokenizing Words

In [5]:
import re

text = "Hello! How are you today 06/12/2023? I hope everything is going well. Have a great day."

word_pattern = r"\b(?![\d.!?])\w+\b"
words = re.findall(word_pattern, text)

print(words)

['Hello', 'How', 'are', 'you', 'today', 'I', 'hope', 'everything', 'is', 'going', 'well', 'Have', 'a', 'great', 'day']


### 6. Removing Extra Whitespaces

In [6]:
import re

text = "This    sentence   has   extra   whitespaces."

clean_text = re.sub(r"\s+", " ", text)

print(clean_text)

This sentence has extra whitespaces.


### 7. Splitting Text into Sentences

In [7]:
import re

text = "Hello! How are you today? I hope everything is going well. Have a great day."

sentence_pattern = r"(.*?[.!?])"
sentences = re.findall(sentence_pattern, text)

print(sentences)

['Hello!', ' How are you today?', ' I hope everything is going well.', ' Have a great day.']


### 8. Extracting Dates

In [8]:
import re

text = "The event will take place on 06-12-2023. Don't miss it!"

date_pattern = r"\b\d{2}-\d{2}-\d{4}\b"
dates = re.findall(date_pattern, text)

print(dates)

['06-12-2023']


### 9.  Extracting IP Addresses

In [9]:
import re

log = "Client IP: 192.168.0.1 - Request received from 10.0.0.1 - Server IP: 172.16.0.1"

ip_pattern = r"\b(?:\d{1,3}\.){3}\d{1,3}\b"
ips = re.findall(ip_pattern, log)

print(ips)

['192.168.0.1', '10.0.0.1', '172.16.0.1']


### 10. Finding Duplicate Words

In [10]:
import re

text = "This is is a test sentence to find duplicate duplicate words."

duplicate_pattern = r"\b(\w+)\b(?=.*\b\1\b)"
duplicates = re.findall(duplicate_pattern, text)

print(duplicates)

['is', 'duplicate']


### 11. Removing HTML Tags

In [11]:
import re

html = "<h1>Welcome to the Regex World</h1><p>Enjoy the power of regex!</p>"

clean_text = re.sub(r"<.*?>", " ", html)

print(clean_text)

 Welcome to the Regex World  Enjoy the power of regex! 


### 12.  Extracting Quoted Text

In [12]:
import re

text = 'She said, "Life is short, enjoy every moment"'

quote_pattern = r'"([^"]*)"'
quotes = re.findall(quote_pattern, text)

print(quotes)

['Life is short, enjoy every moment']


### 13.  Extracting Time from Text

In [13]:
import re

text = "The meeting will start at 14:30. Please be on time."

time_pattern = r"\b\d{2}:\d{2}\b"
times = re.findall(time_pattern, text)

print(times)

['14:30']


### 14. Removing Non-Alphanumeric Characters

In [14]:
import re

text = "This sentence includes !@#$% special characters *&^."

clean_text = re.sub(r"[^a-zA-Z0-9\s]", "", text)

print(clean_text)

This sentence includes  special characters 


### 15.  Extracting Social Security Numbers

In [15]:
import re

text = "The SSN of John Doe is 123-45-6789."

ssn_pattern = r"\d{3}-\d{2}-\d{4}"
ssns = re.findall(ssn_pattern, text)

print(ssns)

['123-45-6789']
