In [3]:
import numpy as np
import pandas as pd
import re

# Regular Expression Example

In [4]:
###
sample = 'To be or not to be, that is the question'

In [5]:
pattern = re.compile('be')
result = pattern.search(sample)
print(result)

<re.Match object; span=(3, 5), match='be'>


In [6]:
##Search
pattern = re.compile('be')
result = pattern.search(sample)
print(result.match)

AttributeError: 're.Match' object has no attribute 'match'

In [7]:
##Search
pattern = re.compile('bee')
result = pattern.search(sample)
print(result)

None


In [8]:
pattern = re.compile('be')
if pattern.search(sample):
    print("'be' is part of string")
    print("'be' starts at character", pattern.search(sample).span()[0])
else:
    print("'be' is not part of string")

'be' is part of string
'be' starts at character 3


In [9]:
### Find all
sample = 'To be or not to be, that is the question'
pattern = re.compile('be')

result = pattern.findall(sample)
result

['be', 'be']

In [10]:
print("There are", len(result), 'matches')

There are 2 matches


In [11]:
### Find all
sample = 'To be or not to be, that is the question'
pattern = re.compile('e')

result = pattern.findall(sample)
print(result)
print("There are", len(result), 'matches')

['e', 'e', 'e', 'e']
There are 4 matches


In [12]:
sample = 'To be or not to be, that is the question'
pattern = re.compile('be')
result = pattern.sub('beee', sample)
print(result)

To beee or not to beee, that is the question


# Let's try something Useful

In [34]:
### Simulate webpage
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.common.exceptions import NoSuchElementException

# create a new Safari session
## for safari, please make sure you enable 
driver = webdriver.Safari()
driver.implicitly_wait(15)

In [31]:
###select a product name
product = 'ps5'
###need product address
target_url = "https://www.walmart.com/ip/PlayStation-5-Console/363472942"
##load webpage
driver.get(target_url)
html = driver.page_source

instock = re.compile('add to cart')
soldby = re.compile('seller-name-link')
outofstock = re.compile('This item is <b>out of stock</b>.')
if instock.search(html):
    if soldby.search(html):
        print(product + " is in stock, by third party")
    else:
        print(product + " is in stock, by Walmart")
elif outofstock.search(html):
    print(product + " is not in stock")
else:
    print('code change, not sure')

ps5 is in stock, by third party


In [1]:
html

NameError: name 'html' is not defined

In [32]:
###let's build a function
def instock(product_name, webaddress):
    ###select a product name
    product = product_name
    ###need product address
    target_url = webaddress
    ##load webpage
    driver.get(target_url)
    html = driver.page_source

    instock = re.compile('add to cart')
    outofstock = re.compile('This item is.{2,10}out of stock</b>.')
    if instock.search(html):
        if soldby.search(html):
            print(product + " is in stock, by third party")
        else:
            print(product + " is in stock, by Walmart")
    elif outofstock.search(html):
        print(product + " is not in stock")
    else:
        print('code change, not sure')


In [35]:
instock('Xbox X', 'https://www.walmart.com/ip/Xbox-Series-X/443574645')
instock('Nintendo Switch', 'https://www.walmart.com/ip/Nintendo-Switch-Console-with-Neon-Blue-Red-Joy-Con/709776123')

Xbox X is in stock, by Walmart
Nintendo Switch is in stock, by Walmart


# Regular Expression with Pandas

In [88]:
data = pd.read_excel("Sales Feedback.xlsx")
data

Unnamed: 0,Date,Total Sales,Contact
0,2020-05-01,20,This product doesn't work! Please reach back to me at john_smith@gmail.com
1,2020-05-01,67,Can you tell me more about the features? My phone number is 234-554-2323
2,2020-05-03,21,"Please call me at (302)434-3443, I want to learn more about this product"
3,2020-05-04,55,"Oliver Coate, 7182324432, oliverc@outlook.com"
4,2020-05-05,30,Phone: 843-4323322
5,2020-05-06,89,Please email me details at bob.kane@hardware.com
6,2020-05-07,93,"I have a coupon code of ACD03213, please let me know if that works, slamboy@gmail.com"
7,2020-05-08,160,please call me asap at 912-323-3333
8,2020-05-09,166,call me back at 316-332-4233 Mary


In [89]:
##Change display limit to 100 character
pd.options.display.max_colwidth = 100
data

Unnamed: 0,Date,Total Sales,Contact
0,2020-05-01,20,This product doesn't work! Please reach back to me at john_smith@gmail.com
1,2020-05-01,67,Can you tell me more about the features? My phone number is 234-554-2323
2,2020-05-03,21,"Please call me at (302)434-3443, I want to learn more about this product"
3,2020-05-04,55,"Oliver Coate, 7182324432, oliverc@outlook.com"
4,2020-05-05,30,Phone: 843-4323322
5,2020-05-06,89,Please email me details at bob.kane@hardware.com
6,2020-05-07,93,"I have a coupon code of ACD03213, please let me know if that works, slamboy@gmail.com"
7,2020-05-08,160,please call me asap at 912-323-3333
8,2020-05-09,166,call me back at 316-332-4233 Mary


In [90]:
###We want to know if there is phone number, if so, what is phone number
###We also want to know if there is email, if so , what is email
###Regular Express is really useful when you try to read patterns
###https://www.debuggex.com/cheatsheet/regex/python

In [91]:
### What is a phone number?
## \d digit
## . non return carriage character
pattern = re.compile('\d{3}.\d{3}.\d{4}')
def find_pattern(pattern, string):
    if pattern.search(string):
        return pattern.search(string).group()
    else:
        return ""


data['Phone'] = data['Contact'].map(lambda x: find_pattern(pattern, x))
data

Unnamed: 0,Date,Total Sales,Contact,Phone
0,2020-05-01,20,This product doesn't work! Please reach back to me at john_smith@gmail.com,
1,2020-05-01,67,Can you tell me more about the features? My phone number is 234-554-2323,234-554-2323
2,2020-05-03,21,"Please call me at (302)434-3443, I want to learn more about this product",302)434-3443
3,2020-05-04,55,"Oliver Coate, 7182324432, oliverc@outlook.com",
4,2020-05-05,30,Phone: 843-4323322,
5,2020-05-06,89,Please email me details at bob.kane@hardware.com,
6,2020-05-07,93,"I have a coupon code of ACD03213, please let me know if that works, slamboy@gmail.com",
7,2020-05-08,160,please call me asap at 912-323-3333,912-323-3333
8,2020-05-09,166,call me back at 316-332-4233 Mary,316-332-4233


In [92]:
##Use | and get all patterns
pattern = re.compile('(\d{3}.\d{3}.\d{4})|(\d{10})|(\d{3}.\d{7})')
data['Phone'] = data['Contact'].map(lambda x: phone(pattern, x))
data

Unnamed: 0,Date,Total Sales,Contact,Phone
0,2020-05-01,20,This product doesn't work! Please reach back to me at john_smith@gmail.com,
1,2020-05-01,67,Can you tell me more about the features? My phone number is 234-554-2323,234-554-2323
2,2020-05-03,21,"Please call me at (302)434-3443, I want to learn more about this product",302)434-3443
3,2020-05-04,55,"Oliver Coate, 7182324432, oliverc@outlook.com",7182324432
4,2020-05-05,30,Phone: 843-4323322,843-4323322
5,2020-05-06,89,Please email me details at bob.kane@hardware.com,
6,2020-05-07,93,"I have a coupon code of ACD03213, please let me know if that works, slamboy@gmail.com",
7,2020-05-08,160,please call me asap at 912-323-3333,912-323-3333
8,2020-05-09,166,call me back at 316-332-4233 Mary,316-332-4233


In [93]:
##remove all non digits to make phone number standard
pattern2 = re.compile("[^0-9]")
data['Phone_Clean'] = data['Phone'].map(lambda x: pattern2.sub('', x))
data

Unnamed: 0,Date,Total Sales,Contact,Phone,Phone_Clean
0,2020-05-01,20,This product doesn't work! Please reach back to me at john_smith@gmail.com,,
1,2020-05-01,67,Can you tell me more about the features? My phone number is 234-554-2323,234-554-2323,2345542323.0
2,2020-05-03,21,"Please call me at (302)434-3443, I want to learn more about this product",302)434-3443,3024343443.0
3,2020-05-04,55,"Oliver Coate, 7182324432, oliverc@outlook.com",7182324432,7182324432.0
4,2020-05-05,30,Phone: 843-4323322,843-4323322,8434323322.0
5,2020-05-06,89,Please email me details at bob.kane@hardware.com,,
6,2020-05-07,93,"I have a coupon code of ACD03213, please let me know if that works, slamboy@gmail.com",,
7,2020-05-08,160,please call me asap at 912-323-3333,912-323-3333,9123233333.0
8,2020-05-09,166,call me back at 316-332-4233 Mary,316-332-4233,3163324233.0


In [94]:
###create a function to remove all non 0-9 characters

def phone_clean(string):
    if string != '':  ##if exist
        nondigit = re.compile("[^0-9]") ##any character that's not 0-9
        clean_string = nondigit.sub("", string) ##replace with ""
        phone = clean_string[0:3] + "-" + clean_string[3:6] + "-" + clean_string[6:10]
        return phone
    return ''

## apply function to everyline
data['Phone_Clean'] = data['Phone'].map(lambda x: phone_clean(x))
data

Unnamed: 0,Date,Total Sales,Contact,Phone,Phone_Clean
0,2020-05-01,20,This product doesn't work! Please reach back to me at john_smith@gmail.com,,
1,2020-05-01,67,Can you tell me more about the features? My phone number is 234-554-2323,234-554-2323,234-554-2323
2,2020-05-03,21,"Please call me at (302)434-3443, I want to learn more about this product",302)434-3443,302-434-3443
3,2020-05-04,55,"Oliver Coate, 7182324432, oliverc@outlook.com",7182324432,718-232-4432
4,2020-05-05,30,Phone: 843-4323322,843-4323322,843-432-3322
5,2020-05-06,89,Please email me details at bob.kane@hardware.com,,
6,2020-05-07,93,"I have a coupon code of ACD03213, please let me know if that works, slamboy@gmail.com",,
7,2020-05-08,160,please call me asap at 912-323-3333,912-323-3333,912-323-3333
8,2020-05-09,166,call me back at 316-332-4233 Mary,316-332-4233,316-332-4233


In [95]:
#### Check for emails, \S for one NonWite space, + for any amount, @ symbol, use \. for dot, as dot is a special character
email_pattern = re.compile('\S+@\S+\.\S+')  
data['Email'] = data['Contact'].map(lambda x: phone(email_pattern, x))
data

Unnamed: 0,Date,Total Sales,Contact,Phone,Phone_Clean,Email
0,2020-05-01,20,This product doesn't work! Please reach back to me at john_smith@gmail.com,,,john_smith@gmail.com
1,2020-05-01,67,Can you tell me more about the features? My phone number is 234-554-2323,234-554-2323,234-554-2323,
2,2020-05-03,21,"Please call me at (302)434-3443, I want to learn more about this product",302)434-3443,302-434-3443,
3,2020-05-04,55,"Oliver Coate, 7182324432, oliverc@outlook.com",7182324432,718-232-4432,oliverc@outlook.com
4,2020-05-05,30,Phone: 843-4323322,843-4323322,843-432-3322,
5,2020-05-06,89,Please email me details at bob.kane@hardware.com,,,bob.kane@hardware.com
6,2020-05-07,93,"I have a coupon code of ACD03213, please let me know if that works, slamboy@gmail.com",,,slamboy@gmail.com
7,2020-05-08,160,please call me asap at 912-323-3333,912-323-3333,912-323-3333,
8,2020-05-09,166,call me back at 316-332-4233 Mary,316-332-4233,316-332-4233,
