In [22]:
import xml.etree.ElementTree as ET
import pandas as pd

rev_data = open('data.xml', 'r').read()
root = ET.XML(rev_data)

reviews = []
dates = []
for text in root.iter('text'):
    reviews.append(text.text)   
for date in root.iter('publishedAtDate'):
    dates.append(date.text)

dtrain = {}
for key in reviews:
    for value in dates:
        dtrain[key] = value     

dfr_train = pd.Series(dtrain).to_frame()

from snorkel.labeling import labeling_function
import re

STUFF = 1
FLUFF = 0
ABSTAIN = -1

@labeling_function()
def recommend(x):
    return STUFF if re.search("recommend|advice|advise|suggest|bring",str(x)) else ABSTAIN

@labeling_function()
def warning(x):
    return STUFF if re.search("warning|watch out|be sure to|plan for|be careful|avoid", str(x)) else ABSTAIN

@labeling_function()
def parking(x):
    return STUFF if re.search("parking",str(x)) else ABSTAIN

@labeling_function()
def tour(x):
    return STUFF if re.search("tour|helicopter|plane",str(x)) else ABSTAIN

@labeling_function()
def shuttle(x):
    return STUFF if re.search("shuttle[s]?|bus|bus stops",str(x)) else ABSTAIN

@labeling_function()
def bathroom(x):
    return STUFF if re.search("bathroom[s]?|facilities",str(x)) else ABSTAIN

@labeling_function()
def food(x):
    return STUFF if re.search("food|cafe|restaurant",str(x)) else ABSTAIN

@labeling_function()
def pricing(x):
    return STUFF if re.search("\$",str(x)) else ABSTAIN

@labeling_function()
def length(x):
    return FLUFF if len(str(x)) < 60 else ABSTAIN

from snorkel.labeling import PandasLFApplier
lfs = [recommend, warning, parking, tour, shuttle, bathroom, food, pricing, length]
applier = PandasLFApplier(lfs=lfs)
L_train = applier.apply(dfr_train)
print(L_train)
#L_test = applier.apply(dfr_test)

  from pandas import Panel
100%|██████████| 298/298 [00:00<00:00, 353.31it/s]

[[ 1 -1 -1 ...  1 -1 -1]
 [ 1 -1 -1 ... -1 -1 -1]
 [ 1 -1 -1 ... -1 -1 -1]
 ...
 [-1 -1 -1 ... -1 -1 -1]
 [-1 -1 -1 ... -1 -1 -1]
 [-1 -1  1 ... -1 -1 -1]]





In [23]:
from snorkel.labeling import LFAnalysis
LFAnalysis(L=L_train, lfs=lfs,).lf_summary()

Unnamed: 0,j,Polarity,Coverage,Overlaps,Conflicts
recommend,0,[1],0.157718,0.067114,0.0
warning,1,[1],0.033557,0.026846,0.0
parking,2,[1],0.043624,0.026846,0.0
tour,3,[1],0.077181,0.043624,0.0
shuttle,4,[1],0.104027,0.060403,0.0
bathroom,5,[1],0.02349,0.016779,0.0
food,6,[1],0.043624,0.02349,0.0
pricing,7,[1],0.036913,0.026846,0.0
length,8,[],0.0,0.0,0.0


In [24]:
LFAnalysis(L=L_train).label_coverage()

0.3389261744966443

In [26]:
dfr_train.iloc[L_train[:,1] == STUFF].sample(5, random_state=1)

Unnamed: 0,0
"Spectacular, Beautiful, Wonderful and Inspirational. One of God's gifts to us all.\nThe cost per car was $35. Nice hike around the rim. We went during the year when it was still COLD, so be sure to check the weather as it got COLD FAST once the sun went down. Be sure to bring your camera as you will want to take plenty of photos. Be Safe while taking your photos!",2020-10-04T02:20:08.660Z
"Took me over 3 hours just to get to the gate. And surprise, surprise, no parking. I do not have a handicap sticker, but need a cane when doing a lot of walking. Finally found a place to park and was able to make it to see the canyon. People were rude, adults bumping into me with no apology. Kids totally unconscious of their surroundings and people. People with their dogs on a long leash, is quite the trailer. I was actually glad to get out of there! If you go be sure to stay in the right lane as much as you can. It goes much faster.",2020-10-04T02:20:08.660Z
"For your visit:\n\nComing in the summer? Beware, there will be massive crowds and despite the ample parking area, it will all be packed, so be prepared to drive around for a while and then walk to the rim. The South Rim offers great views, but if you're there during the summer, I would suggest going down the Bright Angel Trail and seeing it all first-hand. The views are amazing and unlike anything you can see if you stay on the rim. It's also good to visit in the winter if you dislike crowds.\n\nAlso, if you're there for the summer, be sure to check out the North Rim. It's closed during the winter and a bit of a drive, but it's worth it. The Kaibab Trail is amazing and has much fewer people.",2020-10-04T02:20:08.660Z
"Nothing negative to say about our trip to the Grand Canyon! We suggest that anyone going brings lots of water, and takes their vitamins to avoid heat exhaustion! Another great tip is to use the Gypsy Guide app, as it is a great tour guide through any national park and will give you all the ins and outs of the area, with great historical facts, and all the spots to stop and enjoy the view!!",2020-10-04T02:20:08.660Z
"Just today on 4/8/2021 they opened the East entrance so there were not too many people there. The park is great, but plan for full parking lots and closed facilities.",2020-10-04T02:20:08.660Z


In [19]:
from snorkel.labeling.model import LabelModel

label_model = LabelModel(cardinality=2, verbose=True)
label_model.fit(L_train, n_epochs=500, log_freq=100, seed=123)
dfr_train["label"] = label_model.predict(L_train)
dfr_train = dfr_train[dfr_train.label != ABSTAIN]