In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

G2_df = pd.read_csv('G2_articles.csv')
links = G2_df['Article Link']

In [None]:
def article_text(link):
    article = requests.get(link)
    article_soup = BeautifulSoup(article.content, "html.parser")
    article_paragraphs = ''
    for paragraph in article_soup.find_all('p'):
        article_paragraphs += paragraph.text
    return article_paragraphs

In [None]:
import transformers
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
model = AutoModelForSeq2SeqLM.from_pretrained("t5-base")
tokenizer = AutoTokenizer.from_pretrained("t5-base")

  from .autonotebook import tqdm as notebook_tqdm
Downloading: 100%|██████████| 1.17k/1.17k [00:00<00:00, 1.15MB/s]
Downloading: 100%|██████████| 850M/850M [00:16<00:00, 55.6MB/s]
Downloading: 100%|██████████| 773k/773k [00:00<00:00, 53.6MB/s]
Downloading: 100%|██████████| 1.32M/1.32M [00:00<00:00, 56.9MB/s]


In [None]:
summaries = []
for link in links:
    text = article_text(link)
    inputs = tokenizer("summarize: " + text, return_tensors="pt", max_length=500, truncation=True)
    outputs = model.generate(inputs["input_ids"], max_length=100, min_length=10, length_penalty=2.0, num_beams=4, early_stopping=True)
    summary = tokenizer.decode(outputs[0])
    summaries.append(summary)

In [None]:
import re

clean_summaries = []
for summary in summaries:
    clean = re.compile('<.*?>')
    clean_summary = re.sub(clean, "", summary)
    if clean_summary.strip():
        clean_summaries.append(clean_summary.strip())

In [None]:
final_summaries = []
for t in clean_summaries:
    s = t.lower()
    s = re.sub(r'[,\.!?&\']|\(.*?\)', '', s)
    s = re.sub(r'\s+', ' ', s)
    final_summaries.append(s.strip())
final_summaries

['sweetgreen is using new technology to re-imagine the fast-casual model the restaurant is using new technology to re-imagine the fast-casual model it faces unprecedented challenges from the coronavirus crisis',
 'fast-food chain sweetgreen has a lower carbon footprint than a typical fast-food chain the salad chain plans to cut its carbon footprint in half in six years the rest of its emissions will be offset making the company carbon neutral',
 'healthy food chain sweetgreen is now offering employees five months of paid parental leave the benefit extends to "mothers fathers adoptive parents foster parents and others with new additions to their families" the company believes "it is our responsibility to lead the way given the us is one of the few countries that does not mandate any paid leave for new parents"',
 'sweetgreen is a powerful example of a brand marrying purpose with profit to scale growth and impact the fast casual food chain has demonstrated impressive growth showcasing ov

In [None]:
import csv
df = G2_df[~(G2_df.index == 42)]
df['Restaurant'].tolist()

sum_zip = zip(df['Restaurant'].tolist(), df['Article Title'].tolist(), df['Article Link'].tolist(), final_summaries)

header = ['Restaurant', 'Article Title', 'Article Link', 'Summary']
with open('G2_summaries.csv', 'w') as f:
    write = csv.writer(f)
    write.writerow(header)
    write.writerows(sum_zip)

In [None]:
saved_df = pd.read_csv('G2_summaries.csv')
saved_df

Unnamed: 0,Restaurant,Article Title,Article Link,Summary
0,Sweetgreen,sweetgreen: Nicolas Jammet\n& Jonathan Neman,https://www.npr.org/2020/04/10/831678631/sweet...,sweetgreen is using new technology to re-imagi...
1,Sweetgreen,How sweetgreen plans to cut its carbon footpri...,https://www.fastcompany.com/90606733/how-sweet...,fast-food chain sweetgreen has a lower carbon ...
2,Sweetgreen,sweetgreen now offers 5 months of parental leave,https://www.cbsnews.com/news/sweetgreen-announ...,healthy food chain sweetgreen is now offering ...
3,Sweetgreen,Purpose at work: how sweetgreen is building a ...,https://www.forbes.com/sites/simonmainwaring/2...,sweetgreen is a powerful example of a brand ma...
4,Roti,"If There’s One Industry That Can Beat COVID, I...",https://www.qsrmagazine.com/outside-insights/i...,a year ago the world as we knew it came to a s...
5,Roti,Rōti Reopens Reimagined Northbrook Flagship,https://www.qsrmagazine.com/news/roti-reopens-...,rti northbrook a fast-casual mediterranean res...
6,Roti,Rōti Modern Mediterranean Names Justin Seamon...,https://www.qsrmagazine.com/news/ro-ti-modern-...,justin seamonds has been named as the new chie...
7,Roti,Roti to Open First Texas Location Friday,https://www.qsrmagazine.com/news/roti-open-fir...,roti modern mediterranean will open its first ...
8,Roti,Rōti Modern Mediterranean to Deploy Eatsa Tech...,https://www.qsrmagazine.com/news/r-ti-modern-m...,the first rti restaurant to showcase the full ...
9,Roti,Rōti Modern Mediterranean Receives $23M Invest...,https://www.qsrmagazine.com/news/r-ti-modern-m...,rti modern mediterranean has announced a $23 m...


<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=9e88faaf-ade4-4526-a9d5-3a022b266140' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>