# SLJ Controversial Books Survey Responses

# Source information

URL: https://www.slj.com/page/features-self-censorship

In [1]:
from bs4 import BeautifulSoup
import requests
import pandas as pd

## 1. Weighing Subject Matter

In [2]:
url = "https://www.slj.com/story/slj-controversial-books-survey-responses-weighing-subject-matter"
outfile = "../data/slj_controversial_books_weighing_subject_matter.csv"

### 1.1 Fetch and extract the data from an HTML table with BeautifulSoup

In [3]:
data = requests.get(url).text
text_file = open("../unprocessed/slj/weighing_subject_matter.html", "w")
n = text_file.write(data)
text_file.close()

soup = BeautifulSoup(data, 'html5lib')
table = soup.find('table')

table_headers = []
for tx in table.find_all('th'):
        table_headers.append(tx.text)

table_headers = table_headers[0:4]

l = []
for row in table.find_all('tr'):
    td = row.find_all('td')
    x = [tr.text for tr in td]
    l.append(x)

df = pd.DataFrame(l, columns=table_headers)
df.head()

Unnamed: 0,LIBRARIAN COMMENTS,REGION,LOCALITY,SCHOOL LEVEL
0,,,,
1,,,,
2,A middle school library's collection should co...,Midwest,Small Town,M
3,A parent complained and the principal asked th...,Northeast,Suburban,M
4,Actually I find I am being more liberal with c...,South Central,Urban,EM


### 1.2 Remove empty rows and reset the index column

In [4]:
df.drop(labels=[0, 1], axis=0, inplace=True)
df.reset_index(drop=True, inplace=True)
df

Unnamed: 0,LIBRARIAN COMMENTS,REGION,LOCALITY,SCHOOL LEVEL
0,A middle school library's collection should co...,Midwest,Small Town,M
1,A parent complained and the principal asked th...,Northeast,Suburban,M
2,Actually I find I am being more liberal with c...,South Central,Urban,EM
3,"Actually less, since society and our local com...",Midwest,Suburban,M
4,Administrator pressure,Northeast,Rural,EM
...,...,...,...,...
148,"With little funds to purchase books, I need to...",Mountain,Urban,EM
149,Working for a Catholic school I have to be ver...,South Atlantic,Suburban,EM
150,"Yes, because so many books today try to ""sneak...",South Atlantic,Rural,M
151,"Yes, because there seem to be many more contro...",Pacific,Rural,MH


### 1.3 Split multiple data out of SCHOOL LEVEL field

In [5]:
df['elementary_school'] = df['SCHOOL LEVEL'].map(lambda x: 1 if 'E' in x else 0)
df['middle_school'] = df['SCHOOL LEVEL'].map(lambda x: 1 if 'M' in x else 0)
df['high_school'] = df['SCHOOL LEVEL'].map(lambda x: 1 if 'H' in x else 0)
df.drop(labels="SCHOOL LEVEL", axis=1, inplace=True)
df.head()

Unnamed: 0,LIBRARIAN COMMENTS,REGION,LOCALITY,elementary_school,middle_school,high_school
0,A middle school library's collection should co...,Midwest,Small Town,0,1,0
1,A parent complained and the principal asked th...,Northeast,Suburban,0,1,0
2,Actually I find I am being more liberal with c...,South Central,Urban,1,1,0
3,"Actually less, since society and our local com...",Midwest,Suburban,0,1,0
4,Administrator pressure,Northeast,Rural,1,1,0


### 1.4 Rename columns to have consistent standard, without spaces

In [6]:
df.rename(columns={'LIBRARIAN COMMENTS': 'librarian_comments', 'REGION': 'region', 'LOCALITY': 'locality'}, inplace=True)
df.head()

Unnamed: 0,librarian_comments,region,locality,elementary_school,middle_school,high_school
0,A middle school library's collection should co...,Midwest,Small Town,0,1,0
1,A parent complained and the principal asked th...,Northeast,Suburban,0,1,0
2,Actually I find I am being more liberal with c...,South Central,Urban,1,1,0
3,"Actually less, since society and our local com...",Midwest,Suburban,0,1,0
4,Administrator pressure,Northeast,Rural,1,1,0


### 1.5 Replace empty cells with placeholder

In [7]:
df.replace("", "N/A", inplace=True)

### 1.6 Export to CSV

In [8]:
df.to_csv(outfile, index=False)

## 2. Comments About Age-Appropriateness

In [9]:
url = "https://www.slj.com/story/slj-controversial-book-survey-comments-about-age-appropriateness"
outfile = "../data/slj_controversial_books_comments_about_age_appropriateness.csv"

### 2.1 Fetch and extract the data from an HTML table with BeautifulSoup

In [10]:
data = requests.get(url).text
text_file = open("../unprocessed/slj/comments_about_age_appropriateness.html", "w")
n = text_file.write(data)
text_file.close()

soup = BeautifulSoup(data, 'html5lib')
table = soup.find('table')

table_headers = []
for tx in table.find_all('th'):
        table_headers.append(tx.text)

table_headers = table_headers[0:4]

l = []
for row in table.find_all('tr'):
    td = row.find_all('td')
    x = [tr.text for tr in td]
    l.append(x)

df = pd.DataFrame(l, columns=table_headers)
df.head()

Unnamed: 0,LIBRARIAN COMMENTS,REGION,LOCALITY,SCHOOL LEVEL
0,,,,
1,,,,
2,"""Adult relationships"" that are not ""age approp...",Northeast,Suburban,M
3,1. Check www.commonsensemedia.com to see if th...,Midwest,Small Town,EM
4,1. I read reviews for information on content 2...,South Central,Suburban,EM


### 2.2 Remove empty rows and reset the index column

In [11]:
df.drop(labels=[0, 1], axis=0, inplace=True)
df.reset_index(drop=True, inplace=True)
df

Unnamed: 0,LIBRARIAN COMMENTS,REGION,LOCALITY,SCHOOL LEVEL
0,"""Adult relationships"" that are not ""age approp...",Northeast,Suburban,M
1,1. Check www.commonsensemedia.com to see if th...,Midwest,Small Town,EM
2,1. I read reviews for information on content 2...,South Central,Suburban,EM
3,Age and maturity of middle school students.,Midwest,Suburban,M
4,"Age of characters, themes of book, reading lev...",Midwest,Suburban,E
...,...,...,...,...
470,We look at the reviews and at the content. We ...,Pacific,Urban,MH
471,Weighing one or a variety of the following: pr...,Midwest,Suburban,EM
472,"Well, since I have only 'non-selected' two boo...",Pacific,Rural,M
473,What I feel is appropriate for the age,Midwest,Suburban,EM


### 2.3 Split multiple data out of SCHOOL LEVEL field

In [12]:
df['elementary_school'] = df['SCHOOL LEVEL'].map(lambda x: 1 if 'E' in x else 0)
df['middle_school'] = df['SCHOOL LEVEL'].map(lambda x: 1 if 'M' in x else 0)
df['high_school'] = df['SCHOOL LEVEL'].map(lambda x: 1 if 'H' in x else 0)
df.drop(labels="SCHOOL LEVEL", axis=1, inplace=True)
df.head()

Unnamed: 0,LIBRARIAN COMMENTS,REGION,LOCALITY,elementary_school,middle_school,high_school
0,"""Adult relationships"" that are not ""age approp...",Northeast,Suburban,0,1,0
1,1. Check www.commonsensemedia.com to see if th...,Midwest,Small Town,1,1,0
2,1. I read reviews for information on content 2...,South Central,Suburban,1,1,0
3,Age and maturity of middle school students.,Midwest,Suburban,0,1,0
4,"Age of characters, themes of book, reading lev...",Midwest,Suburban,1,0,0


### 2.4 Rename columns to have consistent standard, without spaces

In [13]:
df.rename(columns={'LIBRARIAN COMMENTS': 'librarian_comments', 'REGION': 'region', 'LOCALITY': 'locality'}, inplace=True)
df.head()

Unnamed: 0,librarian_comments,region,locality,elementary_school,middle_school,high_school
0,"""Adult relationships"" that are not ""age approp...",Northeast,Suburban,0,1,0
1,1. Check www.commonsensemedia.com to see if th...,Midwest,Small Town,1,1,0
2,1. I read reviews for information on content 2...,South Central,Suburban,1,1,0
3,Age and maturity of middle school students.,Midwest,Suburban,0,1,0
4,"Age of characters, themes of book, reading lev...",Midwest,Suburban,1,0,0


### 2.5 Replace empty cells with placeholder

In [14]:
df.replace("", "N/A", inplace=True)

### 2.6 Export to CSV

In [15]:
df.to_csv(outfile, index=False)

## 3. Comments About Book Challenges

In [16]:
url = "https://www.slj.com/story/slj-controversial-books-survey-comments-about-book-challenges"
outfile = "../data/slj_controversial_books_comments_about_book_challenges.csv"

### 3.1 Fetch and extract the data from an HTML table with BeautifulSoup

In [17]:
data = requests.get(url).text
text_file = open("../unprocessed/slj/comments_about_book_challenges.html", "w")
n = text_file.write(data)
text_file.close()

soup = BeautifulSoup(data, 'html5lib')
table = soup.find('table')

table_headers = []
for tx in table.find_all('th'):
        table_headers.append(tx.text)

table_headers = table_headers[0:4]

l = []
for row in table.find_all('tr'):
    td = row.find_all('td')
    x = [tr.text for tr in td]
    l.append(x)

df = pd.DataFrame(l, columns=table_headers)
df.head()

Unnamed: 0,LIBRARIAN COMMENTS,REGION,LOCALITY,SCHOOL LEVEL
0,,,,
1,,,,
2,A book on terrorism was challenged by a studen...,Northeast,Urban,H
3,A community read book (selected by another lib...,Northeast,Small Town,EM
4,A kindergarten student took out a book on terr...,Canada,Small Town,EM


### 3.2 Remove empty rows and reset the index column

In [18]:
df.drop(labels=[0, 1], axis=0, inplace=True)
df.reset_index(drop=True, inplace=True)
df

Unnamed: 0,LIBRARIAN COMMENTS,REGION,LOCALITY,SCHOOL LEVEL
0,A book on terrorism was challenged by a studen...,Northeast,Urban,H
1,A community read book (selected by another lib...,Northeast,Small Town,EM
2,A kindergarten student took out a book on terr...,Canada,Small Town,EM
3,A parent asked that we remove all goosebumps a...,Canada,Urban,EM
4,A parent brought a challenge directly to our p...,Pacific,Small Town,M
...,...,...,...,...
187,When I was new (and not yet finished my course...,Northeast,Small Town,M
188,"When I was teaching elementary, I had a parent...",Midwest,Suburban,H
189,When parents begin to talk about limiting a bo...,South Atlantic,Suburban,E
190,"While at the middle school, my principal told ...",Midwest,Urban,H


### 3.3 Split multiple data out of SCHOOL LEVEL field

In [19]:
df['elementary_school'] = df['SCHOOL LEVEL'].map(lambda x: 1 if 'E' in x else 0)
df['middle_school'] = df['SCHOOL LEVEL'].map(lambda x: 1 if 'M' in x else 0)
df['high_school'] = df['SCHOOL LEVEL'].map(lambda x: 1 if 'H' in x else 0)
df.drop(labels="SCHOOL LEVEL", axis=1, inplace=True)
df.head()

Unnamed: 0,LIBRARIAN COMMENTS,REGION,LOCALITY,elementary_school,middle_school,high_school
0,A book on terrorism was challenged by a studen...,Northeast,Urban,0,0,1
1,A community read book (selected by another lib...,Northeast,Small Town,1,1,0
2,A kindergarten student took out a book on terr...,Canada,Small Town,1,1,0
3,A parent asked that we remove all goosebumps a...,Canada,Urban,1,1,0
4,A parent brought a challenge directly to our p...,Pacific,Small Town,0,1,0


### 3.4 Rename columns to have consistent standard, without spaces

In [20]:
df.rename(columns={'LIBRARIAN COMMENTS': 'librarian_comments', 'REGION': 'region', 'LOCALITY': 'locality'}, inplace=True)
df.head()

Unnamed: 0,librarian_comments,region,locality,elementary_school,middle_school,high_school
0,A book on terrorism was challenged by a studen...,Northeast,Urban,0,0,1
1,A community read book (selected by another lib...,Northeast,Small Town,1,1,0
2,A kindergarten student took out a book on terr...,Canada,Small Town,1,1,0
3,A parent asked that we remove all goosebumps a...,Canada,Urban,1,1,0
4,A parent brought a challenge directly to our p...,Pacific,Small Town,0,1,0


### 3.5 Replace empty cells with placeholder

In [21]:
df.replace("", "N/A", inplace=True)

### 3.6 Export to CSV

In [22]:
df.to_csv(outfile, index=False)