## Newsletter Generator

### Imports

In [88]:
# newspaper and basic imports
import newspaper 
from newspaper import Article
import numpy as np
import pandas as pd
from tqdm import tqdm

In [89]:
# nltk imports
import nltk
from nltk.tokenize import word_tokenize
from nltk.text import Text
from nltk.corpus import stopwords
nltk.download('stopwords')
nltk.download('punkt')

[nltk_data] Downloading package stopwords to /Users/Zac/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to /Users/Zac/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [90]:
# image imports
from PIL import Image
import requests
from io import BytesIO

In [91]:
# stopping the working from slice of dataframe from coming up
pd.options.mode.chained_assignment = None

In [92]:
# summariser imports
from transformers import BartTokenizer, BartForConditionalGeneration
import torch

In [93]:
# export imports
import os
import shutil
from datetime import datetime

In [94]:
# weather import
from pyowm.owm import OWM

In [95]:
# stock inports
import pandas_datareader as pdr
from datetime import datetime, timedelta

### Getting Articles

In [96]:
# list of websites to scrape

websites = ['https://medium.com/tag/technology', 'https://towardsdatascience.com', 'https://python.plainenglish.io/', 'https://www.kdnuggets.com', 'https://www.dataversity.net', 'https://www.ibm.com/blogs/journey-to-ai/', 'https://insidebigdata.com', 'https://www.datarobot.com/blog/', 'https://dataaspirant.com']

In [97]:
# function to pull articles from websites:

def websites_pull(websites, no_articles):

    # lists for data to go into

    website = []
    title = []
    body = []
    authors = []
    top_image = []
    keywords = []
        
    # scraping websites

    for url in websites:

        paper = newspaper.build(url, memoize_articles=False)
        
        paper_articles = []

        print(url)

        for article in paper.articles[1:30]:
                if '#comments' not in article.url:
                    paper_articles.append(article.url)

        if len(paper_articles) > no_articles:

            for i in tqdm(range(no_articles)):

                try:

                    article = Article(paper_articles[i], language="en")

                    article.download()
                    article.parse()

                    website.append(article.url)

                    title.append(article.title)

                    body.append(article.text)

                    authors.append(article.authors)

                    top_image.append(article.top_image)

                    article.nlp()

                    keywords.append(article.keywords)

                except:
                    continue
        
        else:
            for i in tqdm(range(len(paper_articles))):

                    try:

                        article = Article(paper_articles[i], language="en")

                        article.download()
                        article.parse()

                        website.append(article.url)

                        title.append(article.title)

                        body.append(article.text)

                        authors.append(article.authors)

                        top_image.append(article.top_image)

                        article.nlp()

                        keywords.append(article.keywords) 
                    
                    except:
                        continue

    # putting all the data in a dataframe

    df = pd.DataFrame({'title':title, 'authors':authors, 'body':body, 'website':website,
    'image':top_image, 'keywords':keywords})

    return df

In [98]:
# put in a list of websites and the number of articles desired
# returns a dataframe of articles

df = websites_pull(websites, 5)

https://medium.com/tag/technology


100%|██████████| 5/5 [00:03<00:00,  1.35it/s]


https://towardsdatascience.com


100%|██████████| 5/5 [00:10<00:00,  2.20s/it]


https://python.plainenglish.io/


100%|██████████| 5/5 [00:13<00:00,  2.63s/it]


https://www.kdnuggets.com


100%|██████████| 5/5 [00:03<00:00,  1.62it/s]


https://www.dataversity.net


100%|██████████| 5/5 [00:09<00:00,  1.85s/it]


https://www.ibm.com/blogs/journey-to-ai/


100%|██████████| 5/5 [00:02<00:00,  2.20it/s]


https://insidebigdata.com


100%|██████████| 5/5 [00:05<00:00,  1.04s/it]


https://www.datarobot.com/blog/


100%|██████████| 5/5 [00:04<00:00,  1.20it/s]


https://dataaspirant.com


100%|██████████| 3/3 [00:04<00:00,  1.58s/it]


### Cleaning the data

In [99]:
# getting rid of duplicate articles
df['title'] = df['title'].drop_duplicates()
df = df.dropna()
df.reset_index(drop=True, inplace=True)

In [100]:
# cleaning the main text - removing \n
df['body'] = df['body'].apply(lambda x:x.replace('\n', ' '))

In [101]:
# removing double spaces
df['body'] = df['body'].apply(lambda x:' '.join(x.split()))

In [102]:
# cleaning urls
df['website']=df['website'].apply(lambda x:x.split('?')[0])

In [103]:
# dropping short articles - sometimes not all the article is scraped
df.drop(df[df['body'].str.len() < 1000].index, inplace=True)
df.reset_index(drop=True, inplace=True)

In [104]:
# drop articles which start with a number (summariser doesn't deal well with e.g. 7 steps to...) 
# if number is not 20 - just to keep 2021 and 2022 articles in the mix
df.drop(df[(df['title'].str[0].str.isdigit()) & (df['title'].str[0:2] != '20')].index, inplace=True)
df.reset_index(drop=True, inplace=True)

### Getting Collocations

In [105]:
## bringing back most frequent word pairs found in each article

col_final = []

for i in range(len(df['body'])):

    textfile = df['body'][i]

    # getting tokens
    tokens = word_tokenize(textfile)

    # making everying lower case
    lower_tokens = [token.lower() for token in tokens]

    # getting rid of numbers
    clean1 = [word for word in lower_tokens if word.isalpha()]
    
    # getting stop words
    stop_words = stopwords.words('english')
    word_m_stop = [word for word in clean1 if not word in stop_words]
    final_text = Text(word_m_stop)

    # adding collocations to list
    col_list = final_text.collocation_list()

    col_list = [list(i) for i in col_list]

    col_med = []

    for i in range(len(col_list)):
        sentence = col_list[i]
        col_med.append(' '.join(sentence))
        
    col_final.append(col_med)
    
# creating dataframe column

df['collocations'] = col_final

### Filtering for Relevancy

In [106]:
# data science key terms
filter_words_upper = ['Artificial Intelligence',
 'Big Data', ''
 'Clustering',
 'Python',
 'Outlier',
 'Data Science',
 'Data Warehouse',
 'Machine Learning',
 'Artificial Intelligence',
 'Data Analysis',
 'Data Engineering',
 'Data Visualization',
 'Data Wrangling',
 'Box Plot',
 'Correlation',
 'dashboard',
 'EDA',
 'Histogram',
 'Hypothesis',
 'Iteration',
 'AWS',
 'azure',
 'Numpy',
 'pandas',
 'matplotlib',
 'seaborn',
 'Bayes Theorem',
 'Decision Tree',
 'Quantile',
 'Predictive Modelling',
 'Standard Deviation',
 'Random Forest',
 'boolean',
 'Fuzzy Logic',
 'Regression',
 'Classification',
 'Overfit',
 'underfit',
 'Statistical Significance',
 'Variance',
 'Deep Learning',
 'Feature Selection',
 'Supervised Machine Learning',
 'Unsupervised Machine Learning',
 'Binary Variable',
 'Binomial Distribution',
 'Computer Vision',
 'Confusion Matrix',
 'covariance',
 'Degrees of Freedom',
 'Evaluation Metrics',
 'F-Score',
 'Hadoop',
 'Hyperparameter',
 'IQR',
 'Keras',
 'kNN',
 'NoSQL',
 'Normal Distribution',
 'Normalize',
 'One Hot Encoding',
 'dummies',
 'recall',
 'P-Value',
 'roc',
 'auc',
 'Root Mean Squared Error',
 'rmse',
 'Skewness',
 'SMOTE',
 'stadardize',
 'Standard error',
 'TensorFlow',
 'Univariate Analysis',
 'Z-test',
 'Residual',
 'Neural Network',
 'Autoregression',
 'Backpropogation',
 'Bagging',
 'Bias-Variance Trade-off',
 'Boosting',
 'Bootstrapping',
 'Classification Threshold',
 'Convex Function',
 'Cosine Similarity',
 'Cost Function',
 'Cross Entropy',
 'Cross Validation',
 'DBScan',
 'Decision Boundary',
 'Dplyr',
 'Early Stopping',
 'Feature Hashing',
 'Gated Recurrent Unit',
 'Hidden Markov Model',
 'Hierarchical Clustering',
 'Holdout Sample',
 'Holt-Winters Forecasting',
 'Imputation',
 'K-Means',
 'Kurtosis',
 'Lasso',
 'Maximum Likelihood Estimation',
 'Multivariate Analysis',
 'Naive Bayes',
 'Polynomial Regression',
 'Ridge Regression',
 'Rotational Invariance',
 ' Semi-Supervised Learning',
 'Stochastic Gradient Descent',
 'SVM', 
 'BART', 
 'BERT',
 'NLP',
 'Pandas']

In [107]:
# making filtered words lower
filter_words = [word.lower() for word in filter_words_upper]

In [108]:
# creating masks - getting boolean arrays for where keywords, collocations or title 
# contain filter words
mask = np.array([bool(set(map(str, x)) & set(filter_words)) for x in df['collocations']])
mask2 = np.array([bool(set(map(str, x)) & set(filter_words)) for x in df['keywords']])
mask3 = np.array([bool(set(map(str, x)) & set(filter_words)) for x in df['title']])

In [109]:
# filtering by mask and resetting index
df_filtered = df[mask | mask2 | mask3]
df_filtered.reset_index(drop=True, inplace=True)

### Tagging articles - beginner, intermediate, advanced

In [110]:
## getting lower case tag words

# difficulty key words
beginner_upper = ['Artificial Intelligence', 'Big Data', 'Python', 'Outlier', 'Data Science', 'Data Warehouse', 'Machine Learning', 'Artificial Intelligence', 'Data Analysis', 'Data Engineering', 'Data Visualization', 'Data Wrangling', 'Box Plot', 'Correlation', 'dashboard', 'EDA', 'Histogram', 'Hypothesis', 'Iteration', 'AWS', 'azure', 'Numpy', 'pandas', 'matplotlib', 'seaborn']

beginner = [word.lower() for word in beginner_upper]

medium_upper = ['Clustering','Bayes Theorem', 'Decision Tree', 'Quantile', 'Predictive Modelling', 'Standard Deviation', 'Random Forest', 'boolean', 'Fuzzy Logic', 'Regression', 'Classification', 'Overfit', 'underfit', 'Statistical Significance', 'Variance', 'Deep Learning', 'Feature Selection', 'Supervised Machine Learning', 'Unsupervised Machine Learning', 'Binary Variable', 'Binomial Distribution', 'Computer Vision', 'Confusion Matrix', 'covariance', 'Degrees of Freedom', 'Evaluation Metrics', 'F-Score', 'Hadoop', 'Hyperparameter', 'IQR', 'Keras', 'kNN', 'NoSQL', 'Normal Distribution', 'Normalize', 'One Hot Encoding', 'dummies', 'recall', 'P-Value', 'roc', 'auc', 'Root Mean Squared Error', 'rmse', 'Skewness', 'SMOTE', 'standardize', 'Standard error', 'TensorFlow', 'Univariate Analysis', 'Z-test']

medium = [word.lower() for word in medium_upper]

advanced_upper = ['Residual', 'Neural Network', 'Autoregression', 'Backpropogation', 'Bagging', 'Bias-Variance Trade-off', 'Boosting', 'Bootstrapping', 'Classification Threshold', 'Convex Function', 'Cosine Similarity', 'Cost Function', 'Cross Entropy', 'Cross Validation', 'DBScan', 'Decision Boundary', 'Dplyr', 'Early Stopping', 'Feature Hashing', 'Gated Recurrent Unit', 'Hidden Markov Model', 'Hierarchical Clustering', 'Holdout Sample', 'Holt-Winters Forecasting', 'Imputation', 'K-Means', 'Kurtosis', 'Lasso', 'Maximum Likelihood Estimation', 'Multivariate Analysis', 'Naive Bayes', 'Polynomial Regression', 'Ridge Regression', 'Rotational Invariance', ' Semi-Supervised Learning', 'Stochastic Gradient Descent', 'SVM']

advanced = [word.lower() for word in advanced_upper]

In [111]:
# calculating the percentage of keywords which are beginner, intermediate and advanced

beg = []
med = []
adv = []


for i in range(len(df_filtered['body'])):

    beg_count = 0

    for word in beginner:
        if word in df_filtered['body'][i]:
            beg_count +=1

    med_count = 0

    for word in medium:
        if word in df_filtered['body'][i]:
            med_count +=1

    adv_count = 0

    for word in advanced:
        if word in df_filtered['body'][i]:
            adv_count +=1

    total_count = beg_count + med_count + adv_count

    if total_count != 0:

        beg_percentage = beg_count/total_count
        med_percentage = med_count/total_count
        adv_percentage = adv_count/total_count

        beg.append(beg_percentage)
        med.append(med_percentage)
        adv.append(adv_percentage)

    else:
        beg.append(0)
        med.append(0)
        adv.append(0)

# adding columns to dataframe

df_filtered['percentage_beginner'] = beg
df_filtered['percentage_medium'] = med
df_filtered['percentage_advanced'] = adv


In [112]:
# getting back difficulty tag column
# adjusted advanced threshold

tags = []

for i in range(len(df_filtered)):
    if df_filtered['percentage_advanced'][i] >= 0.3:
        tags.append('Advanced')
    elif df_filtered['percentage_medium'][i] >= 0.5:
        tags.append('Intermediate')
    else:
        tags.append('Beginner')

df_filtered['tag'] = tags

In [113]:
df_filtered.head()

Unnamed: 0,title,authors,body,website,image,keywords,collocations,percentage_beginner,percentage_medium,percentage_advanced,tag
0,I Took 12 Data Science Courses For 3 Months — ...,[Benjamin Nweke],1. Keep your mind open to limitless possibilit...,https://towardsdatascience.com/i-took-12-data-...,https://miro.medium.com/max/1200/1*8pPrgUYTar9...,"[earlier, took, heres, 12, scientists, wish, p...","[data science, data scientists, already prior,...",0.833333,0.166667,0.0,Beginner
1,Python: Short Introduction to os.path Module,[Tony Li Xu],Python: Short Introduction to os.path Module o...,https://python.plainenglish.io/python-short-in...,https://miro.medium.com/max/543/0*Vih-j6psgmaX...,"[tail, returns, file, ospath, module, short, p...","[path print, print path, true path, commonly u...",1.0,0.0,0.0,Beginner
2,How to Predict Customer Lifetime Value (CLV) i...,[Muhammed Resit Cicekdag],How to Predict Customer Lifetime Value (CLV) i...,https://python.plainenglish.io/how-to-predict-...,https://miro.medium.com/max/1200/0*MrS-82Gy9Tf...,"[values, lets, bgnbd, number, up_limit, python...","[dataframe variable, uniquely assigned, variab...",0.666667,0.333333,0.0,Beginner
3,Best Practices for MLOps Documentation,[],Whether it's an ML side project or adding a ne...,https://www.kdnuggets.com/2021/12/best-practic...,https://www.kdnuggets.com/wp-content/uploads/b...,"[best, practices, technical, machine, automati...","[machine learning, technical documentation, ml...",0.571429,0.428571,0.0,Beginner
4,Write Clean Python Code Using Pipes,[],"By Khuyen Tran, Data Science Intern Motivation...",https://www.kdnuggets.com/2021/12/write-clean-...,https://www.kdnuggets.com/wp-content/uploads/w...,"[values, write, list, clean, elements, python,...","[image author, data science, unfold iterables,...",1.0,0.0,0.0,Beginner


### Getting Read_time

In [114]:
# getting read time
# according to the internet a person reads around 238 words per minute
import math

df_filtered['read_time'] = df_filtered['body'].apply(lambda x:math.ceil(len(x.split())/238))

### Choosing Articles to Suggest

In [115]:
# make sure there are 5 articles from different websites
# loop until this is true

if len(df_filtered) > 5:
    df_sample = df_filtered.sample(n=5)
    df_sample = df_sample['website'].apply(lambda x:x.split('/')[2]).drop_duplicates()
    
    while len(df_sample) < 5:
        df_sample = df_filtered.sample(n=5)
        df_sample['website'] = df_sample['website'].apply(lambda x:x.split('/')[2]).drop_duplicates()
        df_sample.dropna(inplace=True)

df_filtered = df_filtered.iloc[df_sample.index]

In [116]:
df_filtered.reset_index(drop=True, inplace=True)

### Getting Summaries - BART Summariser

In [117]:
# getting model

model = BartForConditionalGeneration.from_pretrained('facebook/bart-large-cnn')
tokenizer = BartTokenizer.from_pretrained('facebook/bart-large-cnn')

In [118]:
# function to generate summaries
# for more information on BART see README

def get_summaries(df_filtered, length):

    summaries = []

    for i in range(len(df_filtered['body'])):
        document = df_filtered['body'][i]

        # tokenize text

        inputs = tokenizer([document], max_length=1024, return_tensors='pt', truncation=True)

        # generate summary

        summary_ids = model.generate(inputs['input_ids'], num_beams=4, max_length=length, early_stopping=True, length_penalty=2)
        summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True, clean_up_tokenization_spaces=False)

        summaries.append(summary)
    
    # add to dataframe

    df_filtered['summary'] = summaries

    return df_filtered

In [119]:
# add dataframe and max length of summary

df_filtered = get_summaries(df_filtered, 120)

#### cleaning summaries

In [120]:
# removing last sentence from summary if not complete

sent_tokenizer = nltk.data.load('tokenizers/punkt/english.pickle')

summaries_complete = []

for i in range(len(df_filtered['summary'])):
    string = df_filtered['summary'][i]

    sentences = sent_tokenizer.tokenize(string)

    add = []

    for i in range(len(sentences)):
        if sentences[i][-1] == '.':
            add.append(sentences[i])


    summaries_complete.append(" ".join(s for s in add))

df_filtered['summary'] = summaries_complete

In [121]:
# removing sentences which are the same as the title

sent_tokenizer = nltk.data.load('tokenizers/punkt/english.pickle')

summaries_no_title = []

for i in range(len(df_filtered['summary'])):
    string = df_filtered['summary'][i]

    sentences = sent_tokenizer.tokenize(string)

    add = []

    for s in range(len(sentences)):
        if sentences[s][:-1] != df_filtered['title'][i]:
            add.append(sentences[s])

    summaries_no_title.append(" ".join(s for s in add))

df_filtered['summary'] = summaries_no_title

In [122]:
# removing any white space from start and end of summaries
df_filtered['summary'] = df_filtered['summary'].apply(lambda x:x.strip())

### Newspaper Function - exports article summaries and information, gets weather and stocks

In [123]:
def newsletter():

    # make a folder to put contents with todays date

    path = '/Users/Zac/Desktop/Newsletter - ' + datetime.strftime(datetime.now(), '%d-%m-%Y') + '/'

    if os.path.exists(path):
        shutil.rmtree(path)

    os.mkdir(path)

    with open(os.path.join('/Users/Zac/Desktop/Newsletter - ' + datetime.strftime(datetime.now(), '%d-%m-%Y') + '/','Summaries.txt'), "w") as f:
        f.write(datetime.strftime(datetime.now(), '%A %d %B') + '\n\n')

    # make folder for images

    path = '/Users/Zac/Desktop/Newsletter - ' + datetime.strftime(datetime.now(), '%d-%m-%Y') + '/Images/'

    if os.path.exists(path):
        shutil.rmtree(path)
        
    os.mkdir(path)

    for i in range(len(df_filtered)):

        # add text information including summary

        with open('/Users/Zac/Desktop/Newsletter - ' + datetime.strftime(datetime.now(), '%d-%m-%Y') + '/Summaries.txt', 'a') as f:
            f.write('Tag: ' + df_filtered['tag'][i].upper() + ' - Read Time: ' + str(df_filtered['read_time'][i]) + '\n\n' + df_filtered['title'][i] + '\n\n' + df_filtered['summary'][i] + '\n\n' + 'Read the full story here: ' + df_filtered['website'][i] + '\n\n' + '---------------' + '\n\n')

        # add images

        try:
            response = requests.get(df_filtered['image'][i])
            img = Image.open(BytesIO(response.content))
            img.save('/Users/Zac/Desktop/Newsletter - ' + datetime.strftime(datetime.now(), '%d-%m-%Y') + '/Images/' + df_filtered['title'][i] +'.png')

        except:
            #print('No image found for:', df_filtered['title'][i])

            try:
                response = requests.get(df_filtered['image'][i])
                img = Image.open(BytesIO(response.content))
                img.save('/Users/Zac/Desktop/Newsletter - ' + datetime.strftime(datetime.now(), '%d-%m-%Y') + '/Images/' + df_filtered['title'][i] +'.jpg')
            except:
               print('No image found for:', df_filtered['title'][i])
        continue


    # getting weather information

    owm = OWM('b33c2d9566ba8b3c3260afc40c91d012')
    mgr = owm.weather_manager()
    observation = mgr.one_call(lat = 41.38879, lon = 2.15899)
    weather = observation.forecast_daily[0]

    # saving weather

    path = '/Users/Zac/Desktop/Newsletter - ' + datetime.strftime(datetime.now(), '%d-%m-%Y') + '/Weather/'
    
    # make a folder to put contents

    if os.path.exists(path):
        shutil.rmtree(path)

    os.mkdir(path)

    with open(os.path.join('/Users/Zac/Desktop/Newsletter - ' + datetime.strftime(datetime.now(), '%d-%m-%Y') + '/Weather/' + 'weather.txt'), "w") as f:
        
        f.write('Barcelona'.upper()+ '\n\n')

        weather_dict = {'clear sky':'Clear Skies', 'few clouds':'Partly Cloudy', 'scattered clouds':'Partly Cloudy', 'broken clouds':'Cloudy', 'shower rain':'Light Showers', 'rain':'Rain', 'thunderstorm':'Stormy', 'snow':'Snow', 'mist':'Misty'}

        if weather.detailed_status in weather_dict.keys():

            f.write(weather_dict[weather.detailed_status].upper() + '\n\n')
        
        else:
            f.write(weather.detailed_status.upper() + '\n\n')
    
        # high and low temp
        f.write('Low/High: ' + str(round(weather.temperature('celsius')['min'])) + '°/' + str(round(weather.temperature('celsius')['max']))+ '°\n\n')

        # sunrise
        timestamp = datetime.fromtimestamp(weather.srise_time)
        f.write('Sunrise: ' + timestamp.strftime('%H:%M') + '\n')

        # sunset
        timestamp = datetime.fromtimestamp(weather.sset_time)
        f.write('Sunset: '+ timestamp.strftime('%H:%M'))

    # getting back weather icon

    try:

        url = weather.weather_icon_url()

        response = requests.get(url)
        img = Image.open(BytesIO(response.content))

        img.save('/Users/Zac/Desktop/Newsletter - ' + datetime.strftime(datetime.now(), '%d-%m-%Y') + '/Weather/' + 'weathericon' + '.png')

    except:
        print('Couldn\'t fetch weather icon.')

    # stocks

    stock_weekdays = ['Tue', 'Wed', 'Thu', 'Fri']

    stock_dict = {'^GSPC':'S&P 500', '^IBEX':'IBEX 35', '^IXIC':'NASDAQ', '^DJI':'DOW'}

    path = '/Users/Zac/Desktop/Newsletter - ' + datetime.strftime(datetime.now(), '%d-%m-%Y') + '/Stocks/'
    
    # make a folder to put contents

    if os.path.exists(path):
        shutil.rmtree(path)

    os.mkdir(path)

    with open(os.path.join('/Users/Zac/Desktop/Newsletter - ' + datetime.strftime(datetime.now(), '%d-%m-%Y') + '/Stocks/' + 'stocks.txt'), "w") as f:
        f.write('Stocks: \n\n')

    # get daily stock change for weekdays

    if datetime.strftime(datetime.now() - timedelta(1), '%a') in stock_weekdays:

        for stock_ticker in stock_dict.keys():

            df = pdr.get_data_yahoo(stock_ticker)
            df = df.reset_index()
            yesterday_close = df[df['Date'] == datetime.strftime(datetime.now() - timedelta(1), '%Y-%m-%d')]['Close'].item()

            try:
                day_before_yesterday_close = df[df['Date'] == datetime.strftime(datetime.now() - timedelta(2), '%Y-%m-%d')]['Close'].item()
            except:
                print('No stock data available today.')

            change = yesterday_close - day_before_yesterday_close
            percentage_change = round((change/day_before_yesterday_close)*100, 2)

            with open(os.path.join('/Users/Zac/Desktop/Newsletter - ' + datetime.strftime(datetime.now(), '%d-%m-%Y') + '/Stocks/' + 'stocks.txt'), "a") as f:

                if percentage_change > 0:
                
                    f.write(stock_dict[stock_ticker] + ' +' + str(percentage_change) +  '%\n\n')
                
                else:
                    f.write(stock_dict[stock_ticker]+ ' ' +str(percentage_change) + '%\n\n')

    # if not a weekday, get yearly change

    else:
        for stock_ticker in stock_dict.keys():

            df = pdr.get_data_yahoo(stock_ticker)
            df = df.reset_index()

            start_year = df[df['Date'] == datetime.strftime(datetime(datetime.today().year, 2, 1), '%Y-%m-%d')]['Close'].item()
            yesterday_close = df[df['Date'] == datetime.strftime(datetime.now() - timedelta(1), '%Y-%m-%d')]['Close'].item()

            change = yesterday_close - start_year
            percentage_change = round((change/start_year)*100, 2)

            if percentage_change > 0:
                    
                f.write('Yearly Change: ' + stock_dict[stock_ticker] + ' +' + str(percentage_change) +  '%')
                    
            else:
                f.write('Yearly Change: ' + stock_dict[stock_ticker]+ ' ' +str(percentage_change) + '%')

In [124]:
newsletter()

No image found for: Customer Analytics and AI: Better Together
No image found for: “Above the Trend Line” – Your Industry Rumor Central for 12/10/2021
No image found for: Write Clean Python Code Using Pipes
