# Scraping AZ DOT Authorized Third Party Driver License Locations

In [53]:
from bs4 import BeautifulSoup
import requests

headers = {
    "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/137.0.0.0 Safari/537.36" 
}

response = requests.get("https://travel-id-documents.az.gov/authorized-third-party-driver-license-locations", headers=headers)
doc = BeautifulSoup(response.text, 'html.parser')

First, I find the block that contains just the data, which is <tbody>. Then, each row of info is a <tr>, within which are 4 <td> tags, each of which corresponds to a column I want

In [54]:
body = doc.find('tbody')

In [55]:
items = body.find_all('tr')
len(items)

40

In [57]:
items[0]

<tr><td><a href="http://az-mvd.com/" target="_blank">1 Stop Title &amp; Registration Services</a></td>
<td>940 N. Alma School Rd., #104<br/>
			Chandler, AZ 85224</td>
<td>480.821.3288</td>
<td>Mon.-Fri. 8:00 a.m.-6:00 p.m. Sat. 9:00 a.m.-4:30 p.m.</td>
</tr>

So I want to loop through every row, finding the <td> elements (columns) within it. I then assign each <td> element to a column variable based on the index of the row. Finally, I create a list of dictionaries called 'rows' and convert each column variable to a key using rows.append

In [58]:
rows = []

for item in items:
    print('------')
    row = {}
    columns = item.find_all('td')

    company = columns[0].text.strip()
    address = columns[1].text.strip()
    telephone = columns[2].text.strip()
    hours = columns[3].text.strip()
    
    print(company)
    row['company'] = company

    try:
        print(item.find('a')['href'])
        row['url'] = item.find('a')['href']
    except:
        print("No url")
        pass

    print(address)
    row['address'] = address

    print(telephone)
    row['telephone'] = telephone

    print(hours)
    row['hours'] = hours

    rows.append(row)

------
1 Stop Title & Registration Services
http://az-mvd.com/
940 N. Alma School Rd., #104
			Chandler, AZ 85224
480.821.3288
Mon.-Fri. 8:00 a.m.-6:00 p.m. Sat. 9:00 a.m.-4:30 p.m.
------
1 Stop Title & Registration Services
http://az-mvd.com/
5036 W. Cactus Rd., Ste. 2
			Glendale, AZ 85304
602.264.2400
Mon.-Fri. 8:00 a.m.-6:00 p.m. Sat. 8:30 a.m.-4:30 p.m.
------
Academy of Driving Motor Vehicle Center
No url
4733 E. Broadway Blvd.
			Tucson, AZ 85711
520.750.7572
Mon.-Fri. 9 a.m.-5 p.m. and Sat. 9 a.m.-3 p.m.
------
Arizona Auto License
No url
1337 W. Prince Rd
			Tucson, AZ 85705
520.696.2023
Driver License Hours: 
			Mon.-Fri. 9 a.m.-5 p.m.
			Sat by Appt. Only
------
Arizona Auto License Service LLC
No url
1457 N. Eliseo C Felix Jr.
			Way, Ste. 105 and 106
			Avondale, AZ 85323
623.925.5455 or Fax 623.925.5879
Mon.-Fri. 8 a.m.-5 p.m.
------
Arizona Auto License Service LLC
No url
5130 W Baseline Rd.
			Ste. 105
			Laveen, AZ 85339
602.334.1700
			or Fax 602.272.2480
Mon.-Sat. 8

In [59]:
import pandas as pd

df = pd.json_normalize(rows)
df.head()

Unnamed: 0,company,url,address,telephone,hours
0,1 Stop Title & Registration Services,http://az-mvd.com/,"940 N. Alma School Rd., #104\n\t\t\tChandler, ...",480.821.3288,Mon.-Fri. 8:00 a.m.-6:00 p.m. Sat. 9:00 a.m.-4...
1,1 Stop Title & Registration Services,http://az-mvd.com/,"5036 W. Cactus Rd., Ste. 2\n\t\t\tGlendale, AZ...",602.264.2400,Mon.-Fri. 8:00 a.m.-6:00 p.m. Sat. 8:30 a.m.-4...
2,Academy of Driving Motor Vehicle Center,,"4733 E. Broadway Blvd.\n\t\t\tTucson, AZ 85711",520.750.7572,Mon.-Fri. 9 a.m.-5 p.m. and Sat. 9 a.m.-3 p.m.
3,Arizona Auto License,,"1337 W. Prince Rd\n\t\t\tTucson, AZ 85705",520.696.2023,Driver License Hours: \n\t\t\tMon.-Fri. 9 a.m....
4,Arizona Auto License Service LLC,,"1457 N. Eliseo C Felix Jr.\n\t\t\tWay, Ste. 10...",623.925.5455 or Fax 623.925.5879,Mon.-Fri. 8 a.m.-5 p.m.


In [61]:
df.to_csv("auth-third-party-license-locations.csv", index=False)