In [4]:
import requests
import pandas as pd
from bs4 import BeautifulSoup as soup

In [5]:
#set the url
coccygectomy_url = 'https://www.coccyx.org/treatmen/docsusa.htm'

In [6]:
#request
req = requests.get(coccygectomy_url, allow_redirects=False)

In [7]:
#parse with bs
page = soup(req.content)

In [8]:
p = page.find_all('p')[4:-4]

In [9]:
#find all entries with the keyword 'coccygectomy'
coccygectomy = [item for item in p if 'coccygectomy' in item.text]

In [12]:
#have an initial look. the data seems very unorganized.
coccygectomy[:3]

[<p><strong>Arizona -:- coccygectomy</strong>
 Dr. Mark Garrett, surgeon. Barrow Neurosurgical Associates. Phone (602) 406-3181. Website: <a href="https://www.barrowbrainandspine.com/">www.barrowbrainandspine.com</a>. See <a href="../personal/2013/anon1305.htm">Anonymous's story</a>.</p>,
 <p><strong>California -:- coccygectomy</strong>
 Rudolph J. Schrot, MD. Adult Cranial and Spinal Neurosurgery, Sutter Medical Group, 2800 L Street, Suite 500, Sacramento, CA 95816. Tel. (916) 454-6916. Website: <a href="https://www.sutterhealth.org/dr-rudolph-j-schrot.html">www.sutterhealth.org</a>. Author of a <a href="https://www.ncbi.nlm.nih.gov/pubmed/21332277">paper on coccygectomy</a>. See <a href="../personal/2008/anon0815.htm">Noel's story</a>, <a href="../personal/2009/rodger.htm">Rodger's story</a>, <a href="../personal/2010/neil.htm">Neil's story</a>, <a href="../personal/2011/lynette.htm">Lynette's story</a>, <a href="../personal/2011/linda.htm">Linda's story</a>, <a href="../personal/201

In [11]:
coccygectomy[0].text.split('\n')

['Arizona -:- coccygectomy',
 "Dr. Mark Garrett, surgeon. Barrow Neurosurgical Associates. Phone (602) 406-3181. Website: www.barrowbrainandspine.com. See Anonymous's story."]

In [13]:
#split each entry into small pieces and save in a dictionary
doc_dict = [{'state': item.text.split(' -:- ')[0],
      'doctor': item.text.split('\n')[1].split(',')[0], 
      'treatment': item.text.split('\n')[0].split()[2:], 
      'info': item.text.split('\n')[1].split(',')[1:]} for item in coccygectomy]

In [14]:
#convert the dictionary into a Pandas DataFrame
df = pd.DataFrame(doc_dict)
df

Unnamed: 0,state,doctor,treatment,info
0,Arizona,Dr. Mark Garrett,[coccygectomy],[ surgeon. Barrow Neurosurgical Associates. Ph...
1,California,Rudolph J. Schrot,[coccygectomy],"[ MD. Adult Cranial and Spinal Neurosurgery, ..."
2,California,Gilbert Cadena,[coccygectomy],"[ MD. Neurological Surgery, UC Irvine Medical..."
3,California,Kirkham B. Wood,"[injections,, coccygectomy]",[ MD. Professor of Orthopaedic Surgery at the ...
4,California,Dr Steven D Mills,[coccygectomy],"[. University of California, Irvine, 333 Cit..."
...,...,...,...,...
77,Virginia,John A Cardea,[coccygectomy],[ MD. Virginia Commonwealth University/ Medica...
78,Washington,Dr J. Scott Price,[coccygectomy],"[ ProOrtho. 12911-120th Avenue NE, Suite H-21..."
79,"Washington, DC",Dr Warren Yu,"[-:-, coccygectomy]",[ Orthopedic Surgeon. The George Washington Un...
80,Washington DC,Grosso NP,"[-:-, coccygectomy]","[or van Dam BE, Department of Orthopaedics, ..."


In [81]:
#save to a csv file
df.to_csv('coccygectomy doctors.csv', index=False)

### The 'info' section still isn't pretty, but this result does two things:
1. All 82 doctors that explicitly mention that they can perform coccygectomy were saved, down from the original list of 414 doctors;
2. All these 82 doctors can be searched by states and their names. Their clinics and contact information can be found in the 'info' section. 