In [123]:
#Import statements and retrieve function definition
import numpy as np
import pandas as pd
import datetime
import bs4 as bs
import urllib.request

#Function to retrieve current events list from single wiki page
#Inputs: date: datetime object for target day
#Output: DF of dat, event category, and event description
#Output Dimension: 3x(# of events listed)

def get_current_events(start_date):
    try:
        year = start_date.year
        month = start_date.strftime("%B")
        day = start_date.day
        url = f"https://en.wikipedia.org/wiki/Portal:Current_events/{year}_{month}_{day}"
        print(url)
        scraped_data = urllib.request.urlopen(url)
        article = scraped_data.read()
        soup = bs.BeautifulSoup(article,'lxml')
        df = pd.DataFrame(columns=['date','cat','descr'])                   

        section = soup.find("div",{"class":"current-events-content description"})
        for header in section.children:
            try:
                if header.name.startswith('div'):
                    cat = header.text
                    continue
                else:
                    for li in header.children:
                        if li.name == "li":
                            row = {'date':[start_date],
                                   'cat':[cat],
                                   'descr':[li.text]}
                            dft = pd.DataFrame(row)
                            df = pd.concat([df, dft])
            except:
                continue
    except:
        row = {'date':[start_date],
                'cat':"NA",
                'descr':"NA"}
        df = pd.DataFrame(row)
                        
    return df 


In [125]:
start_date = datetime. date(2000, 1, 1)
end_date = datetime. date(2022, 11, 30)
delta = datetime. timedelta(days=1)

df_m = pd.DataFrame()

while start_date <= end_date:
    df_add = get_current_events(start_date)
    df_m = pd.concat([df_m, df_add])
    start_date += delta

df_m.to_csv("temp/total_events_full.csv")

https://en.wikipedia.org/wiki/Portal:Current_events/2000_January_1
https://en.wikipedia.org/wiki/Portal:Current_events/2000_January_2
https://en.wikipedia.org/wiki/Portal:Current_events/2000_January_3
https://en.wikipedia.org/wiki/Portal:Current_events/2000_January_4
https://en.wikipedia.org/wiki/Portal:Current_events/2000_January_5
https://en.wikipedia.org/wiki/Portal:Current_events/2000_January_6
https://en.wikipedia.org/wiki/Portal:Current_events/2000_January_7
https://en.wikipedia.org/wiki/Portal:Current_events/2000_January_8
https://en.wikipedia.org/wiki/Portal:Current_events/2000_January_9
https://en.wikipedia.org/wiki/Portal:Current_events/2000_January_10
https://en.wikipedia.org/wiki/Portal:Current_events/2000_January_11
https://en.wikipedia.org/wiki/Portal:Current_events/2000_January_12
https://en.wikipedia.org/wiki/Portal:Current_events/2000_January_13
https://en.wikipedia.org/wiki/Portal:Current_events/2000_January_14
https://en.wikipedia.org/wiki/Portal:Current_events/2000_