Based on https://github.com/chinese-poetry/chinese-poetry

Character genome
* Book ID    2 digits,          00-99 (00 - Tang Poem, 01 - Song Poem, 02 - Song Ci) 
* Author ID  4 digits,    00000-99999 (max ~1.4K)
* Title ID   6 digits,  000000-999999 (max ~55K)
* Char ID    4 digits,      0000-9999 (max 1.6K)

In [1]:
from IPython.display import display
import pandas as pd
pd.set_option('display.max_columns', None)
import json
from bs4 import BeautifulSoup
from splinter import Browser
import csv
import time
import random
import pandas as pd
from datetime import datetime
import re
from dateutil import tz
from ast import literal_eval
from selenium import webdriver
import pickle
from os import listdir
from os.path import isfile, join
import os

In [3]:
import urllib.request, json 
myURL = "https://raw.githubusercontent.com/chinese-poetry/chinese-poetry/master/json/poet.tang.7000.json"

with urllib.request.urlopen(myURL) as url:
    data = json.loads(url.read().decode())
#     print(data)

In [4]:
data[10]['strains']

['平平仄平仄，仄仄仄平平。', '平仄平平仄，平平仄仄平。']

In [5]:
len(data)

1000

In [6]:
len(data[0]['paragraphs'])

6

In [7]:
data[0]['paragraphs']

['扶蘇秦帝子，舉代稱其賢。',
 '百萬猶在握，可爭天下權。',
 '束身就一劒，壯志皆棄捐。',
 '塞下有遺跡，千齡人共傳。',
 '疎蕪盡荒草，寂歷空寒煙。',
 '到此盡垂淚，非我獨潸然。']

In [8]:
import re

def cleanLine(line):    
    line = re.sub("[\s+\.\!\/_,$%^*(+\"\']+|[+——！，。？、~@#￥%……&*（）：；《）《》“”()»〔〕-]+", "",line)
    return(line)

In [67]:
import numpy as np

author_list = []
title_list = []
cha_list = []
# strain_list = []
# author_id = []
# title_id = []
char_id = []

for poem in data:
#     author_list.append(poem['author'])
#     title_list.append(poem['title'])    
    i = 0
    for para in poem['paragraphs']:
        para = cleanLine(para)
        for char in para:
            author_list.append(poem['author'])
            title_list.append(poem['title'])
            cha_list.append(char)            
            char_id.append(i)
            i = i + 1
    

char_df = pd.DataFrame(np.column_stack([author_list, title_list, cha_list, char_id]), 
                               columns=['author', 'title', 'char', 'char_id'])
char_df['char_id'] = char_df['char_id'].astype(int)
char_df.head()

Unnamed: 0,author,title,char,char_id
0,陶翰,經殺子谷,扶,0
1,陶翰,經殺子谷,蘇,1
2,陶翰,經殺子谷,秦,2
3,陶翰,經殺子谷,帝,3
4,陶翰,經殺子谷,子,4


Investigate unicode encoding

In [70]:
u'\u76ca'

'益'

In [71]:
'陶翰'.encode('utf-8')

b'\xe9\x99\xb6\xe7\xbf\xb0'

In [72]:
int.from_bytes('經殺子谷'.encode('utf-8'), byteorder='big')

71711760211115320418155147447

In [73]:
def encode(chinese_str):
    code = int.from_bytes(chinese_str.encode('utf-8'), byteorder='big')
    return(hex(code))

In [74]:
char_df['author_code'] = char_df['author'].apply(encode)
char_df['title_code'] = char_df['title'].apply(encode)
char_df.head()

Unnamed: 0,author,title,char,char_id,author_code,title_code
0,陶翰,經殺子谷,扶,0,0xe999b6e7bfb0,0xe7b693e6aebae5ad90e8b0b7
1,陶翰,經殺子谷,蘇,1,0xe999b6e7bfb0,0xe7b693e6aebae5ad90e8b0b7
2,陶翰,經殺子谷,秦,2,0xe999b6e7bfb0,0xe7b693e6aebae5ad90e8b0b7
3,陶翰,經殺子谷,帝,3,0xe999b6e7bfb0,0xe7b693e6aebae5ad90e8b0b7
4,陶翰,經殺子谷,子,4,0xe999b6e7bfb0,0xe7b693e6aebae5ad90e8b0b7


In [75]:
def rhash(n):
    return "%08x" % (n * 387420489 % 4000000000000000000000000000)

def un_rhash(h):
    return int(h, 16) * 3513180409 % 4000000000000000000000000000

hex(un_rhash(rhash(0xe999b6e7bfb0)))


'0xb5048780dfe913038bfb0'

## Need to fit the charactor gene into 64 hex number

### Two possible approaches. 1) encode the chinese characters and use these as ID 2) create an ordered ID starting from 0

In [76]:
char_df = char_df.assign(author_id2=(char_df['author']).astype('category').cat.codes)
char_df = char_df.assign(title_id2=(char_df['title']).astype('category').cat.codes)
char_df

Unnamed: 0,author,title,char,char_id,author_code,title_code,author_id2,title_id2
0,陶翰,經殺子谷,扶,0,0xe999b6e7bfb0,0xe7b693e6aebae5ad90e8b0b7,16,561
1,陶翰,經殺子谷,蘇,1,0xe999b6e7bfb0,0xe7b693e6aebae5ad90e8b0b7,16,561
2,陶翰,經殺子谷,秦,2,0xe999b6e7bfb0,0xe7b693e6aebae5ad90e8b0b7,16,561
3,陶翰,經殺子谷,帝,3,0xe999b6e7bfb0,0xe7b693e6aebae5ad90e8b0b7,16,561
4,陶翰,經殺子谷,子,4,0xe999b6e7bfb0,0xe7b693e6aebae5ad90e8b0b7,16,561
5,陶翰,經殺子谷,舉,5,0xe999b6e7bfb0,0xe7b693e6aebae5ad90e8b0b7,16,561
6,陶翰,經殺子谷,代,6,0xe999b6e7bfb0,0xe7b693e6aebae5ad90e8b0b7,16,561
7,陶翰,經殺子谷,稱,7,0xe999b6e7bfb0,0xe7b693e6aebae5ad90e8b0b7,16,561
8,陶翰,經殺子谷,其,8,0xe999b6e7bfb0,0xe7b693e6aebae5ad90e8b0b7,16,561
9,陶翰,經殺子谷,賢,9,0xe999b6e7bfb0,0xe7b693e6aebae5ad90e8b0b7,16,561


In [77]:
author_df = char_df[['author', 'title']].drop_duplicates()['author'].value_counts().reset_index()
author_df.columns = ['author', 'poem_count']
author_df['author_id'] = author_df.index
author_df

Unnamed: 0,author,poem_count,author_id
0,劉長卿,510,0
1,孟浩然,268,1
2,李白,68,2
3,蕭穎士,41,3
4,李華,29,4
5,孟雲卿,18,5
6,崔曙,15,6
7,王翰,14,7
8,顏真卿,10,8
9,賀蘭進明,7,9


In [78]:
author_df = char_df[['author', 'title']].drop_duplicates().groupby(['author']).size().reset_index()
author_df.columns = ['author', 'author_title_count']
author_df['author_id'] = author_df.index
author_df = author_df[['author_id', 'author', 'author_title_count']]
author_df

Unnamed: 0,author_id,author,author_title_count
0,0,劉長卿,510
1,1,孟浩然,268
2,2,孟雲卿,18
3,3,崔曙,15
4,4,庾光先,1
5,5,張巡,2
6,6,張抃,1
7,7,李希仲,3
8,8,李白,68
9,9,李華,29


In [79]:
title_df = char_df.groupby(['author', 'title']).size().reset_index()
title_df.columns = ['author', 'title', 'title_char_count']
title_df['title_id'] = title_df.groupby('author').cumcount()
title_df = title_df.merge(author_df, on='author', how='left')
title_df = title_df[['author_id', 'author', 'author_title_count', 'title_id', 'title', 'title_char_count']]
title_df

Unnamed: 0,author_id,author,author_title_count,title_id,title,title_char_count
0,0,劉長卿,510,0,七里灘重送,28
1,0,劉長卿,510,1,三月李明府後亭泛舟,56
2,0,劉長卿,510,2,上巳日越中與鮑侍郎泛舟耶溪,56
3,0,劉長卿,510,3,上湖田館南樓憶朱晏,80
4,0,劉長卿,510,4,上陽宮望幸,56
5,0,劉長卿,510,5,九日岳陽待黃遂張渙,80
6,0,劉長卿,510,6,九日登李明府北樓,40
7,0,劉長卿,510,7,九日題蔡國公主樓,60
8,0,劉長卿,510,8,京口懷洛陽舊居兼寄廣陵二三知己,100
9,0,劉長卿,510,9,代邊將有懷,40


In [80]:
char_df2 = char_df.merge(title_df, on=['author', 'title'], how='left')

char_df2['book_id'] = 0
char_df2['book'] = 'tang poem'

# 16 digits in total
# Book ID 2 digits, 00-99 (00 - Tang Poem, 01 - Song Poem, 02 - Song Ci)
# Author ID 4 digits, 00000-99999 (max ~1.4K)
# Title ID 6 digits, 000000-999999 (max ~55K)
# Char ID 4 digits, 0000-9999 (max 1.6K)

char_df2['char_genome'] = char_df2['book_id'] * 10**14 + char_df2['author_id'] * 10**10 + char_df2['title_id'] * 10**4 + char_df2['char_id']

char_df2 = char_df2[['book_id', 'book', 'author_id', 'author', 'author_title_count',                
       'title_id',  'title',  'title_char_count', 'char_id', 'char', 'char_genome']]
char_df2

Unnamed: 0,book_id,book,author_id,author,author_title_count,title_id,title,title_char_count,char_id,char,char_genome
0,0,tang poem,16,陶翰,7,4,經殺子谷,60,0,扶,160000040000
1,0,tang poem,16,陶翰,7,4,經殺子谷,60,1,蘇,160000040001
2,0,tang poem,16,陶翰,7,4,經殺子谷,60,2,秦,160000040002
3,0,tang poem,16,陶翰,7,4,經殺子谷,60,3,帝,160000040003
4,0,tang poem,16,陶翰,7,4,經殺子谷,60,4,子,160000040004
5,0,tang poem,16,陶翰,7,4,經殺子谷,60,5,舉,160000040005
6,0,tang poem,16,陶翰,7,4,經殺子谷,60,6,代,160000040006
7,0,tang poem,16,陶翰,7,4,經殺子谷,60,7,稱,160000040007
8,0,tang poem,16,陶翰,7,4,經殺子谷,60,8,其,160000040008
9,0,tang poem,16,陶翰,7,4,經殺子谷,60,9,賢,160000040009


In [82]:
char_df2.to_csv(r'sample_data.csv', index=False)

In [83]:
author_df.to_csv(r'author_data.csv', index=False)
title_df.to_csv(r'title_data.csv', index=False)

## Start Here

In [2]:
author_df = pd.read_csv(r'author_data.csv')
title_df = pd.read_csv(r'title_data.csv')
char_df2 = pd.read_csv(r'sample_data.csv')
display(char_df2.head())
display(char_df2.tail())
display(author_df.head())
display(title_df.head(30))

Unnamed: 0,book_id,book,author_id,author,author_title_count,title_id,title,title_char_count,char_id,char,char_genome
0,0,tang poem,16,陶翰,7,4,經殺子谷,60,0,扶,160000040000
1,0,tang poem,16,陶翰,7,4,經殺子谷,60,1,蘇,160000040001
2,0,tang poem,16,陶翰,7,4,經殺子谷,60,2,秦,160000040002
3,0,tang poem,16,陶翰,7,4,經殺子谷,60,3,帝,160000040003
4,0,tang poem,16,陶翰,7,4,經殺子谷,60,4,子,160000040004


Unnamed: 0,book_id,book,author_id,author,author_title_count,title_id,title,title_char_count,char_id,char,char_genome
55687,0,tang poem,8,李白,68,66,行行遊且獵篇,87,82,下,80000660082
55688,0,tang poem,8,李白,68,66,行行遊且獵篇,87,83,帷,80000660083
55689,0,tang poem,8,李白,68,66,行行遊且獵篇,87,84,復,80000660084
55690,0,tang poem,8,李白,68,66,行行遊且獵篇,87,85,何,80000660085
55691,0,tang poem,8,李白,68,66,行行遊且獵篇,87,86,益,80000660086


Unnamed: 0,author_id,author,author_title_count
0,0,劉長卿,510
1,1,孟浩然,268
2,2,孟雲卿,18
3,3,崔曙,15
4,4,庾光先,1


Unnamed: 0,author_id,author,author_title_count,title_id,title,title_char_count
0,0,劉長卿,510,0,七里灘重送,28
1,0,劉長卿,510,1,三月李明府後亭泛舟,56
2,0,劉長卿,510,2,上巳日越中與鮑侍郎泛舟耶溪,56
3,0,劉長卿,510,3,上湖田館南樓憶朱晏,80
4,0,劉長卿,510,4,上陽宮望幸,56
5,0,劉長卿,510,5,九日岳陽待黃遂張渙,80
6,0,劉長卿,510,6,九日登李明府北樓,40
7,0,劉長卿,510,7,九日題蔡國公主樓,60
8,0,劉長卿,510,8,京口懷洛陽舊居兼寄廣陵二三知己,100
9,0,劉長卿,510,9,代邊將有懷,40


## Convert char_genome to actual book, author, title and character


In [22]:
# 16 digits in total
# Book ID 2 digits, 00-99 (00 - Tang Poem, 01 - Song Poem, 02 - Song Ci)
# Author ID 4 digits, 00000-99999 (max ~1.4K)
# Title ID 6 digits, 000000-999999 (max ~55K)
# Char ID 4 digits, 0000-9999 (max 1.6K)

def genome_to_ids(genome):
    # genome = 10000550049
    char_id = divmod(genome, 10**4)[1]
    title_id = divmod((divmod(genome, 10**10)[1] - char_id), 10**4)[0]
    author_id = divmod(divmod(genome, 10**14)[1] - char_id - title_id * 10**4, 10**10)[0]
    book_id = 0
    return((book_id, author_id, title_id, char_id))    

def genome_to_char(genome):
    # genome = 10000550049    
    book_id, author_id, title_id, char_id = genome_to_ids(genome)    
    df = char_df2[(char_df2['book_id']==book_id) & (char_df2['author_id'] == author_id) &
           (char_df2['title_id']==title_id) & (char_df2['char_id']==char_id)]
    return(df)

genome_to_char(10000550049)

Unnamed: 0,book_id,book,author_id,author,author_title_count,title_id,title,title_char_count,char_id,char,char_genome
48549,0,tang poem,1,孟浩然,268,55,奉先張明府休沐還鄉海亭宴集,60,49,齋,10000550049


## Breeding characters

In [28]:
char_df2.sample(n=3, random_state=1)

Unnamed: 0,book_id,book,author_id,author,author_title_count,title_id,title,title_char_count,char_id,char,char_genome
13294,0,tang poem,0,劉長卿,510,30,北遊酬孟雲卿見寄,70,19,才,300019
48549,0,tang poem,1,孟浩然,268,55,奉先張明府休沐還鄉海亭宴集,60,49,齋,10000550049
21381,0,tang poem,0,劉長卿,510,370,送王司馬秩滿西歸,28,12,歸,3700012


In [44]:
import random
random.seed(123)
random_number = random.uniform(0, 1)
rand_1 = int(str(random_number-int(random_number))[2:3])
rand_2 = int(str(random_number-int(random_number))[3:4])
rand_3 = int(str(random_number-int(random_number))[4:5])
rand_4 = int(str(random_number-int(random_number))[5:6])
rand_5 = int(str(random_number-int(random_number))[6:7])
rand_6 = int(str(random_number-int(random_number))[7:8])
rand_7 = int(str(random_number-int(random_number))[8:9])

display(random_number)
display((rand_1, rand_2, rand_3, rand_4, rand_5, rand_6, rand_7))

0.052363598850944326

(0, 5, 2, 3, 6, 3, 5)


* To avoid looking up data, 100% chance within the same title, char id randomly selected within the max char count?


* 15% chance same author, different title, char id = 0
*  5% chance different author
* genome needs to have max count
* 


In [26]:
dad_genome =      300019
mum_genome = 10000550049

dad_book_id, dad_author_id, dad_title_id, dad_char_id = genome_to_ids(dad_genome)
mum_book_id, mum_author_id, mum_title_id, mum_char_id = genome_to_ids(mum_genome)


rand_12 = rand_1*10 + rand_2

if rand_12 < 80:
    if rand_3 < 5:
        kid_book_id = dad_book_id
        kid_author_id = dad_author_id
        kid_title_id = dad_title_id
    else:
        kid_book_id = mum_book_id
        kid_author_id = mum_author_id
        kid_title_id = mum_title_id
elif rand_12 < 95:
    if rand_3 < 5:
        kid_book_id = dad_book_id
        kid_author_id = dad_author_id
        kid_title_id = dad_title_id
    else:
        kid_book_id = mum_book_id
        kid_author_id = mum_author_id
        kid_title_id = mum_title_id
        
if rand_2 < 5:
    
else:
    

if rand_3 < 5:
    
else:
    


display(genome_to_ids(dad_genome))
display(genome_to_ids(mum_genome))
display(genome_to_char(dad_genome))
display(genome_to_char(mum_genome))

(0, 0, 30, 19)

(0, 1, 55, 49)

Unnamed: 0,book_id,book,author_id,author,author_title_count,title_id,title,title_char_count,char_id,char,char_genome
13294,0,tang poem,0,劉長卿,510,30,北遊酬孟雲卿見寄,70,19,才,300019


Unnamed: 0,book_id,book,author_id,author,author_title_count,title_id,title,title_char_count,char_id,char,char_genome
48549,0,tang poem,1,孟浩然,268,55,奉先張明府休沐還鄉海亭宴集,60,49,齋,10000550049


## Title formation

## Battle

## Reward

## The length of a character genome now fits within the limit (64 hex)

In [181]:
hex(800006210086000100000000680087)

'0xa18f5a07cdb57d384b15ca097'

In [146]:
char_df[char_df['title']=='經殺子谷']

Unnamed: 0,author,title,char,char_id,author_code,title_code,author_id2,title_id2
0,陶翰,經殺子谷,扶,0,0xe999b6e7bfb0,0xe7b693e6aebae5ad90e8b0b7,16,561
1,陶翰,經殺子谷,蘇,1,0xe999b6e7bfb0,0xe7b693e6aebae5ad90e8b0b7,16,561
2,陶翰,經殺子谷,秦,2,0xe999b6e7bfb0,0xe7b693e6aebae5ad90e8b0b7,16,561
3,陶翰,經殺子谷,帝,3,0xe999b6e7bfb0,0xe7b693e6aebae5ad90e8b0b7,16,561
4,陶翰,經殺子谷,子,4,0xe999b6e7bfb0,0xe7b693e6aebae5ad90e8b0b7,16,561
5,陶翰,經殺子谷,舉,5,0xe999b6e7bfb0,0xe7b693e6aebae5ad90e8b0b7,16,561
6,陶翰,經殺子谷,代,6,0xe999b6e7bfb0,0xe7b693e6aebae5ad90e8b0b7,16,561
7,陶翰,經殺子谷,稱,7,0xe999b6e7bfb0,0xe7b693e6aebae5ad90e8b0b7,16,561
8,陶翰,經殺子谷,其,8,0xe999b6e7bfb0,0xe7b693e6aebae5ad90e8b0b7,16,561
9,陶翰,經殺子谷,賢,9,0xe999b6e7bfb0,0xe7b693e6aebae5ad90e8b0b7,16,561


In [23]:
# pageurl = "https://28utscprojects.wordpress.com/2011/01/21/001/"
pageurl = "https://raw.githubusercontent.com/chinese-poetry/chinese-poetry/master/json/poet.tang.7000.json"
print(pageurl)
browser = Browser('firefox')
browser.visit(pageurl)
soup = BeautifulSoup(browser.html, 'html.parser')    
soup

https://raw.githubusercontent.com/chinese-poetry/chinese-poetry/master/json/poet.tang.7000.json


<html><head><link href="resource://content-accessible/plaintext.css" rel="alternate stylesheet" title="Wrap Long Lines" type="text/css"/></head><body><pre>[
  {
    "strains": [
      "平平平仄仄，仄仄○○平。", 
      "仄仄○仄仄，仄平平仄平。", 
      "仄平仄仄仄，仄仄平仄平。", 
      "仄仄仄○仄，平平平仄○。", 
      "○平仄平仄，仄仄○平平。", 
      "仄仄仄平仄，平仄仄○平。"
    ], 
    "author": "陶翰", 
    "paragraphs": [
      "扶蘇秦帝子，舉代稱其賢。", 
      "百萬猶在握，可爭天下權。", 
      "束身就一劒，壯志皆棄捐。", 
      "塞下有遺跡，千齡人共傳。", 
      "疎蕪盡荒草，寂歷空寒煙。", 
      "到此盡垂淚，非我獨潸然。"
    ], 
    "title": "經殺子谷"
  }, 
  {
    "strains": [
      "仄平○仄平，平○○平仄。", 
      "平○仄仄仄，平仄仄平仄。", 
      "平平平仄○，仄仄平平仄。", 
      "平仄仄仄仄，仄平平○仄。", 
      "○平平仄仄，平仄仄平仄。", 
      "仄仄平仄平，平平仄平仄。", 
      "仄平仄仄仄，仄仄平○仄。"
    ], 
    "author": "陶翰", 
    "paragraphs": [
      "夜來三渚風，晨過臨淮島。", 
      "湖中海氣白，城上楚雲早。", 
      "鱗鱗魚浦帆，漭漭蘆洲草。", 
      "川路日浩蕩，惄焉心如擣。", 
      "且言任倚伏，何暇念枯槁。", 
      "范子名屢移，蘧公志常保。", 
      "古人去已久，此理今難道。"
    ], 
    "title": "早過臨淮"
  }, 
  {
    "strains": [
      "仄仄○仄平，平平○平仄。", 
 

In [10]:
soup.findAll('div', attrs={'class':'entry-content'})

[<div class="entry-content">
 <p></p>
 <p><span id="more-356"></span><br/><b>001 張九齡 – 感遇四首其一 <br/>001 ZHANG Jiuling – Thoughts I of IV</b></p>
 <p>中文原文﹕</p>
 <p>孤鴻海上來， <br/>池潢不敢顧。<br/>側見雙翠鳥， <br/>巢在三珠樹。</p>
 <p>矯矯珍木巔， <br/>得無金丸懼？<br/>美服患人指， <br/>高明逼神惡。</p>
 <p>今我游冥冥， <br/>弋者何所慕？</p>
 <p>英文翻譯 / English Translation﹕ </p>
 <p>A solitary swan flew in from the sea,<br/>Passing on ponds without thinking twice.<br/>Looking sideways it sees a couple of kingfishers of fine feathers,<br/>With their nest on a treasured tree of pearls enshrined</p>
 <p>Atop a tree of such eminence they perched daintily,<br/>Are they not afraid of powerful metal slingshot strikes?<br/>Well dressers draw attention,<br/>High positions attract the Deity's scrutiny for vice.</p>
 <p>As for I, much like that lone swan in the infinite sky,<br/>Wouldn't bird-hunters find me difficult to pinpoint and acquire? </p>
 <p>.</p>
 <div id="atatags-370373-5a88b890dd81c">
 <script type="text/javascript">
             __ATA.cmd.pu

In [13]:
soup.findAll('h1', attrs={'class':'entry-title'})[0].text

'001 張九齡 –\xa0感遇四首其一'

In [15]:
soup.find('h1', attrs={'class':'entry-title'}).text

'001 張九齡 –\xa0感遇四首其一'

In [18]:
soup.find('div', attrs={'class':'entry-content'}).findAll('p')

[<p></p>,
 <p><span id="more-356"></span><br/><b>001 張九齡 – 感遇四首其一 <br/>001 ZHANG Jiuling – Thoughts I of IV</b></p>,
 <p>中文原文﹕</p>,
 <p>孤鴻海上來， <br/>池潢不敢顧。<br/>側見雙翠鳥， <br/>巢在三珠樹。</p>,
 <p>矯矯珍木巔， <br/>得無金丸懼？<br/>美服患人指， <br/>高明逼神惡。</p>,
 <p>今我游冥冥， <br/>弋者何所慕？</p>,
 <p>英文翻譯 / English Translation﹕ </p>,
 <p>A solitary swan flew in from the sea,<br/>Passing on ponds without thinking twice.<br/>Looking sideways it sees a couple of kingfishers of fine feathers,<br/>With their nest on a treasured tree of pearls enshrined</p>,
 <p>Atop a tree of such eminence they perched daintily,<br/>Are they not afraid of powerful metal slingshot strikes?<br/>Well dressers draw attention,<br/>High positions attract the Deity's scrutiny for vice.</p>,
 <p>As for I, much like that lone swan in the infinite sky,<br/>Wouldn't bird-hunters find me difficult to pinpoint and acquire? </p>,
 <p>.</p>,
 <p class="jp-relatedposts-post jp-relatedposts-post0" data-post-format="false" data-post-id="353"><span class="jp-re

In [4]:
from bs4 import BeautifulSoup
# soup = BeautifulSoup(page.content, 'html.parser')

AttributeError: module 'html5lib.treebuilders' has no attribute '_base'