In [93]:
from datetime import date, timedelta
from bs4 import BeautifulSoup
import urllib.request
import json
import csv

def daterange(start_date, end_date):
    for n in range(int((end_date - start_date).days)):
        yield start_date + timedelta(n)

In [121]:
BASE_URL = "https://www.xwordinfo.com/Crossword?date="
def get_soup(single_date):
    full_url = BASE_URL + single_date.strftime("%m/%d/%Y")
    contents = urllib.request.urlopen(full_url).read()
    soup = BeautifulSoup(contents, 'html.parser')
    return soup

def scrape(soup, across, single_date):
    res = []
    pan_id = "ACluesPan" if across else "DCluesPan"
    div_list = soup.find(id=pan_id).find(class_="numclue").find_all('div')
    assert(len(div_list) % 2 == 0)
    for i in range(0, len(div_list), 2):
        clue_idx = div_list[i].string
        clue_content = div_list[i+1]
        clue = clue_content.contents[0].get_text().removesuffix(" : ")
        answer = clue_content.a.string
        entry = {
            'year': single_date.year,
            'month': single_date.month,
            'day': single_date.day,
            'weekday': single_date.weekday(),
            'clue': clue,
            'answer': answer,
            'index': clue_idx + '-ACROSS' if across else '-DOWN',
        }
        res.append(entry)
    print(f"Inserted {len(res)} clues for {single_date.strftime('%m/%d/%Y')} {'ACROSS' if across else 'DOWN'}")
    return res

In [70]:
BY_DATE_STORE = {}
# Use a {'11211993': [{clueDetails}, {clueDetails}, ... ]} format so that if any one day's scraping fails,
# we can just overwrite the partial result by inserting into the same key. If we were to store in a  
# big flat list of clues, it wouldn't be easy to know where to start overwriting from.
start_date = date(1993, 11, 21)
end_date = date(2005, 1, 1)
for single_date in daterange(start_date, end_date):
    soup = get_soup(single_date)
    across_res = scrape(soup, True, single_date)
    if single_date not in [date(2000, 6, 7), date(2014, 10, 9)]: # These are uniclue puzzles which only have acrosses
        down_res = scrape(soup, False, single_date)
    BY_DATE_STORE[single_date.strftime('%m%d%Y')] = across_res + down_res

Inserted 77 clues for 11/21/1993 ACROSS
Inserted 73 clues for 11/21/1993 DOWN
Inserted 35 clues for 11/22/1993 ACROSS
Inserted 43 clues for 11/22/1993 DOWN
Inserted 41 clues for 11/23/1993 ACROSS
Inserted 37 clues for 11/23/1993 DOWN
Inserted 35 clues for 11/24/1993 ACROSS
Inserted 43 clues for 11/24/1993 DOWN
Inserted 39 clues for 11/25/1993 ACROSS
Inserted 37 clues for 11/25/1993 DOWN
Inserted 35 clues for 11/26/1993 ACROSS
Inserted 43 clues for 11/26/1993 DOWN
Inserted 34 clues for 11/27/1993 ACROSS
Inserted 36 clues for 11/27/1993 DOWN
Inserted 67 clues for 11/28/1993 ACROSS
Inserted 71 clues for 11/28/1993 DOWN
Inserted 41 clues for 11/29/1993 ACROSS
Inserted 39 clues for 11/29/1993 DOWN
Inserted 35 clues for 11/30/1993 ACROSS
Inserted 39 clues for 11/30/1993 DOWN
Inserted 37 clues for 12/01/1993 ACROSS
Inserted 39 clues for 12/01/1993 DOWN
Inserted 37 clues for 12/02/1993 ACROSS
Inserted 39 clues for 12/02/1993 DOWN
Inserted 37 clues for 12/03/1993 ACROSS
Inserted 39 clues for 12

In [132]:
BY_DATE_STORE = {}
start_date = date(2007, 8, 23)
end_date = date(2010, 1, 1)
for single_date in daterange(start_date, end_date):
    soup = get_soup(single_date)
    across_res = scrape(soup, True, single_date)
    if single_date not in [
            date(2000, 6, 7), date(2007, 8, 23), date(2007, 11, 1), 
            date(2014, 10, 9), date(2016, 9, 28), date(2019, 2, 6), 
            date(2021, 1, 6), date(2022, 1, 19)]: 
        # ^These are uniclue puzzles which only have acrosses. See https://www.xwordinfo.com/Uniclue
        down_res = scrape(soup, False, single_date)
    BY_DATE_STORE[single_date.strftime('%m%d%Y')] = across_res + down_res

Inserted 66 clues for 08/23/2007 ACROSS
Inserted 33 clues for 08/24/2007 ACROSS
Inserted 35 clues for 08/24/2007 DOWN
Inserted 32 clues for 08/25/2007 ACROSS
Inserted 38 clues for 08/25/2007 DOWN
Inserted 62 clues for 08/26/2007 ACROSS
Inserted 74 clues for 08/26/2007 DOWN
Inserted 39 clues for 08/27/2007 ACROSS
Inserted 39 clues for 08/27/2007 DOWN
Inserted 35 clues for 08/28/2007 ACROSS
Inserted 39 clues for 08/28/2007 DOWN
Inserted 39 clues for 08/29/2007 ACROSS
Inserted 39 clues for 08/29/2007 DOWN
Inserted 34 clues for 08/30/2007 ACROSS
Inserted 36 clues for 08/30/2007 DOWN
Inserted 30 clues for 08/31/2007 ACROSS
Inserted 38 clues for 08/31/2007 DOWN
Inserted 35 clues for 09/01/2007 ACROSS
Inserted 35 clues for 09/01/2007 DOWN
Inserted 60 clues for 09/02/2007 ACROSS
Inserted 80 clues for 09/02/2007 DOWN
Inserted 33 clues for 09/03/2007 ACROSS
Inserted 41 clues for 09/03/2007 DOWN
Inserted 37 clues for 09/04/2007 ACROSS
Inserted 41 clues for 09/04/2007 DOWN
Inserted 37 clues for 09

Inserted 35 clues for 12/08/2007 ACROSS
Inserted 35 clues for 12/08/2007 DOWN
Inserted 68 clues for 12/09/2007 ACROSS
Inserted 72 clues for 12/09/2007 DOWN
Inserted 39 clues for 12/10/2007 ACROSS
Inserted 39 clues for 12/10/2007 DOWN
Inserted 35 clues for 12/11/2007 ACROSS
Inserted 43 clues for 12/11/2007 DOWN
Inserted 39 clues for 12/12/2007 ACROSS
Inserted 39 clues for 12/12/2007 DOWN
Inserted 35 clues for 12/13/2007 ACROSS
Inserted 39 clues for 12/13/2007 DOWN
Inserted 31 clues for 12/14/2007 ACROSS
Inserted 39 clues for 12/14/2007 DOWN
Inserted 36 clues for 12/15/2007 ACROSS
Inserted 36 clues for 12/15/2007 DOWN
Inserted 65 clues for 12/16/2007 ACROSS
Inserted 75 clues for 12/16/2007 DOWN
Inserted 33 clues for 12/17/2007 ACROSS
Inserted 37 clues for 12/17/2007 DOWN
Inserted 35 clues for 12/18/2007 ACROSS
Inserted 41 clues for 12/18/2007 DOWN
Inserted 35 clues for 12/19/2007 ACROSS
Inserted 41 clues for 12/19/2007 DOWN
Inserted 35 clues for 12/20/2007 ACROSS
Inserted 41 clues for 12

Inserted 66 clues for 03/23/2008 ACROSS
Inserted 74 clues for 03/23/2008 DOWN
Inserted 41 clues for 03/24/2008 ACROSS
Inserted 37 clues for 03/24/2008 DOWN
Inserted 41 clues for 03/25/2008 ACROSS
Inserted 35 clues for 03/25/2008 DOWN
Inserted 35 clues for 03/26/2008 ACROSS
Inserted 41 clues for 03/26/2008 DOWN
Inserted 33 clues for 03/27/2008 ACROSS
Inserted 37 clues for 03/27/2008 DOWN
Inserted 32 clues for 03/28/2008 ACROSS
Inserted 36 clues for 03/28/2008 DOWN
Inserted 35 clues for 03/29/2008 ACROSS
Inserted 35 clues for 03/29/2008 DOWN
Inserted 69 clues for 03/30/2008 ACROSS
Inserted 71 clues for 03/30/2008 DOWN
Inserted 37 clues for 03/31/2008 ACROSS
Inserted 39 clues for 03/31/2008 DOWN
Inserted 39 clues for 04/01/2008 ACROSS
Inserted 35 clues for 04/01/2008 DOWN
Inserted 35 clues for 04/02/2008 ACROSS
Inserted 43 clues for 04/02/2008 DOWN
Inserted 41 clues for 04/03/2008 ACROSS
Inserted 37 clues for 04/03/2008 DOWN
Inserted 33 clues for 04/04/2008 ACROSS
Inserted 39 clues for 04

Inserted 33 clues for 07/07/2008 ACROSS
Inserted 39 clues for 07/07/2008 DOWN
Inserted 37 clues for 07/08/2008 ACROSS
Inserted 41 clues for 07/08/2008 DOWN
Inserted 41 clues for 07/09/2008 ACROSS
Inserted 35 clues for 07/09/2008 DOWN
Inserted 39 clues for 07/10/2008 ACROSS
Inserted 39 clues for 07/10/2008 DOWN
Inserted 36 clues for 07/11/2008 ACROSS
Inserted 36 clues for 07/11/2008 DOWN
Inserted 32 clues for 07/12/2008 ACROSS
Inserted 34 clues for 07/12/2008 DOWN
Inserted 63 clues for 07/13/2008 ACROSS
Inserted 69 clues for 07/13/2008 DOWN
Inserted 39 clues for 07/14/2008 ACROSS
Inserted 39 clues for 07/14/2008 DOWN
Inserted 37 clues for 07/15/2008 ACROSS
Inserted 39 clues for 07/15/2008 DOWN
Inserted 37 clues for 07/16/2008 ACROSS
Inserted 35 clues for 07/16/2008 DOWN
Inserted 36 clues for 07/17/2008 ACROSS
Inserted 42 clues for 07/17/2008 DOWN
Inserted 33 clues for 07/18/2008 ACROSS
Inserted 37 clues for 07/18/2008 DOWN
Inserted 35 clues for 07/19/2008 ACROSS
Inserted 35 clues for 07

Inserted 31 clues for 10/21/2008 ACROSS
Inserted 45 clues for 10/21/2008 DOWN
Inserted 35 clues for 10/22/2008 ACROSS
Inserted 41 clues for 10/22/2008 DOWN
Inserted 35 clues for 10/23/2008 ACROSS
Inserted 39 clues for 10/23/2008 DOWN
Inserted 32 clues for 10/24/2008 ACROSS
Inserted 38 clues for 10/24/2008 DOWN
Inserted 34 clues for 10/25/2008 ACROSS
Inserted 34 clues for 10/25/2008 DOWN
Inserted 65 clues for 10/26/2008 ACROSS
Inserted 75 clues for 10/26/2008 DOWN
Inserted 35 clues for 10/27/2008 ACROSS
Inserted 41 clues for 10/27/2008 DOWN
Inserted 37 clues for 10/28/2008 ACROSS
Inserted 39 clues for 10/28/2008 DOWN
Inserted 35 clues for 10/29/2008 ACROSS
Inserted 41 clues for 10/29/2008 DOWN
Inserted 35 clues for 10/30/2008 ACROSS
Inserted 39 clues for 10/30/2008 DOWN
Inserted 35 clues for 10/31/2008 ACROSS
Inserted 35 clues for 10/31/2008 DOWN
Inserted 37 clues for 11/01/2008 ACROSS
Inserted 39 clues for 11/01/2008 DOWN
Inserted 69 clues for 11/02/2008 ACROSS
Inserted 67 clues for 11

Inserted 33 clues for 02/04/2009 ACROSS
Inserted 43 clues for 02/04/2009 DOWN
Inserted 37 clues for 02/05/2009 ACROSS
Inserted 39 clues for 02/05/2009 DOWN
Inserted 34 clues for 02/06/2009 ACROSS
Inserted 38 clues for 02/06/2009 DOWN
Inserted 35 clues for 02/07/2009 ACROSS
Inserted 37 clues for 02/07/2009 DOWN
Inserted 63 clues for 02/08/2009 ACROSS
Inserted 77 clues for 02/08/2009 DOWN
Inserted 39 clues for 02/09/2009 ACROSS
Inserted 39 clues for 02/09/2009 DOWN
Inserted 41 clues for 02/10/2009 ACROSS
Inserted 37 clues for 02/10/2009 DOWN
Inserted 39 clues for 02/11/2009 ACROSS
Inserted 39 clues for 02/11/2009 DOWN
Inserted 35 clues for 02/12/2009 ACROSS
Inserted 41 clues for 02/12/2009 DOWN
Inserted 33 clues for 02/13/2009 ACROSS
Inserted 37 clues for 02/13/2009 DOWN
Inserted 35 clues for 02/14/2009 ACROSS
Inserted 33 clues for 02/14/2009 DOWN
Inserted 67 clues for 02/15/2009 ACROSS
Inserted 73 clues for 02/15/2009 DOWN
Inserted 35 clues for 02/16/2009 ACROSS
Inserted 41 clues for 02

Inserted 37 clues for 05/21/2009 ACROSS
Inserted 41 clues for 05/21/2009 DOWN
Inserted 33 clues for 05/22/2009 ACROSS
Inserted 35 clues for 05/22/2009 DOWN
Inserted 32 clues for 05/23/2009 ACROSS
Inserted 36 clues for 05/23/2009 DOWN
Inserted 68 clues for 05/24/2009 ACROSS
Inserted 72 clues for 05/24/2009 DOWN
Inserted 35 clues for 05/25/2009 ACROSS
Inserted 36 clues for 05/25/2009 DOWN
Inserted 37 clues for 05/26/2009 ACROSS
Inserted 41 clues for 05/26/2009 DOWN
Inserted 38 clues for 05/27/2009 ACROSS
Inserted 40 clues for 05/27/2009 DOWN
Inserted 39 clues for 05/28/2009 ACROSS
Inserted 37 clues for 05/28/2009 DOWN
Inserted 32 clues for 05/29/2009 ACROSS
Inserted 34 clues for 05/29/2009 DOWN
Inserted 36 clues for 05/30/2009 ACROSS
Inserted 36 clues for 05/30/2009 DOWN
Inserted 72 clues for 05/31/2009 ACROSS
Inserted 66 clues for 05/31/2009 DOWN
Inserted 36 clues for 06/01/2009 ACROSS
Inserted 42 clues for 06/01/2009 DOWN
Inserted 37 clues for 06/02/2009 ACROSS
Inserted 39 clues for 06

Inserted 32 clues for 09/04/2009 ACROSS
Inserted 32 clues for 09/04/2009 DOWN
Inserted 34 clues for 09/05/2009 ACROSS
Inserted 34 clues for 09/05/2009 DOWN
Inserted 75 clues for 09/06/2009 ACROSS
Inserted 65 clues for 09/06/2009 DOWN
Inserted 37 clues for 09/07/2009 ACROSS
Inserted 41 clues for 09/07/2009 DOWN
Inserted 37 clues for 09/08/2009 ACROSS
Inserted 41 clues for 09/08/2009 DOWN
Inserted 35 clues for 09/09/2009 ACROSS
Inserted 41 clues for 09/09/2009 DOWN
Inserted 39 clues for 09/10/2009 ACROSS
Inserted 39 clues for 09/10/2009 DOWN
Inserted 31 clues for 09/11/2009 ACROSS
Inserted 39 clues for 09/11/2009 DOWN
Inserted 33 clues for 09/12/2009 ACROSS
Inserted 37 clues for 09/12/2009 DOWN
Inserted 66 clues for 09/13/2009 ACROSS
Inserted 68 clues for 09/13/2009 DOWN
Inserted 35 clues for 09/14/2009 ACROSS
Inserted 41 clues for 09/14/2009 DOWN
Inserted 37 clues for 09/15/2009 ACROSS
Inserted 39 clues for 09/15/2009 DOWN
Inserted 35 clues for 09/16/2009 ACROSS
Inserted 41 clues for 09

Inserted 33 clues for 12/19/2009 ACROSS
Inserted 33 clues for 12/19/2009 DOWN
Inserted 65 clues for 12/20/2009 ACROSS
Inserted 75 clues for 12/20/2009 DOWN
Inserted 33 clues for 12/21/2009 ACROSS
Inserted 43 clues for 12/21/2009 DOWN
Inserted 37 clues for 12/22/2009 ACROSS
Inserted 41 clues for 12/22/2009 DOWN
Inserted 29 clues for 12/23/2009 ACROSS
Inserted 47 clues for 12/23/2009 DOWN
Inserted 39 clues for 12/24/2009 ACROSS
Inserted 39 clues for 12/24/2009 DOWN
Inserted 36 clues for 12/25/2009 ACROSS
Inserted 40 clues for 12/25/2009 DOWN
Inserted 37 clues for 12/26/2009 ACROSS
Inserted 35 clues for 12/26/2009 DOWN
Inserted 66 clues for 12/27/2009 ACROSS
Inserted 73 clues for 12/27/2009 DOWN
Inserted 38 clues for 12/28/2009 ACROSS
Inserted 40 clues for 12/28/2009 DOWN
Inserted 36 clues for 12/29/2009 ACROSS
Inserted 42 clues for 12/29/2009 DOWN
Inserted 37 clues for 12/30/2009 ACROSS
Inserted 39 clues for 12/30/2009 DOWN
Inserted 36 clues for 12/31/2009 ACROSS
Inserted 42 clues for 12

### NOTE: On 2022-08-17, last written clue was 07/31/2022. Next time, start_date should be date(2022,8,1)

In [133]:
# Write to csv periodically
with open(f"bydatestore_xwordinfo.csv", 'a') as f:
    field_names = BY_DATE_STORE[start_date.strftime("%m%d%Y")][0].keys()
    dict_writer = csv.DictWriter(f, field_names)
    for v in BY_DATE_STORE.values():
        dict_writer.writerows(v)

In [123]:
# For debugging
div_list = soup.find(id="DCluesPan").find(class_="numclue").find_all('div')
for i in range(0, len(div_list), 2):
    print(clue_content)
    clue_idx = div_list[i].string
    clue_content = div_list[i+1]
    clue = clue_content.contents[0].string.removesuffix(" : ")
    answer = clue_content.a.string

AttributeError: 'NoneType' object has no attribute 'find'

In [120]:
clue_content.contents[0].get_text()

'P PU UB B'