# Scraping data from MD Case Search

In [46]:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time, datetime, os
from pathlib import Path
import string

## Create folder with today's date
today = datetime.date.today()  
todaystr = today.isoformat()   
dir_path = os.getcwd()
save_dir = dir_path + "/" + todaystr
if not os.path.exists(save_dir):
    os.mkdir(save_dir)


## Set the target save location and Firefox preferences to auto download csv files
profile = webdriver.FirefoxProfile()
profile.set_preference("browser.download.folderList", 2)
profile.set_preference("browser.download.manager.showWhenStarting", False)
profile.set_preference("browser.download.dir", save_dir)
profile.set_preference("browser.helperApps.neverAsk.saveToDisk", "text/csv")

## Create the Firefox session
driver = webdriver.Firefox(firefox_profile=profile)
## Get past the disclaimer splash screen
driver.get("http://casesearch.courts.state.md.us/casesearch/inquiry-index.jsp")
check = driver.find_element_by_name("disclaimer")
check.click()
continue_button = driver.find_element_by_name("action")
continue_button.click()
return driver

assert "No results found." not in driver.page_source



SyntaxError: 'return' outside function (<ipython-input-46-a76b221ac2c6>, line 34)

#### Remember to insert dates

In [102]:
## The search page won't let you search for all cases, need to do 26 searches (A-Z)
county_list = ['Charles County', "Saint Mary's County", 'Calvert County']

for county in county_list
    for last_initial in string.ascii_lowercase:
        ## Enter search criteria
        last_name = driver.find_element_by_name("lastName")
        last_name.clear()
        last_name.send_keys(last_initial)
        driver.find_element_by_xpath("//select[@name='partyType']/option[text()='Plaintiff']").click()
        driver.find_element_by_xpath("//input[@name='site' and @value='CIVIL']").click()
        driver.find_element_by_xpath("//input[@name='courtSystem' and @value='D']").click()
        driver.find_element_by_xpath("//select[@name='countyName']/option[text()=" + county + "]").click()
        last_name = driver.find_element_by_name("filingStart")
        last_name.clear()
        last_name.send_keys("6/1/2019")  ## Turn to variable
        last_name = driver.find_element_by_name("filingEnd")
        last_name.clear()
        last_name.send_keys("9/30/2019")  ## Turn to variable
        driver.find_element_by_xpath("//input[@name='action' and @value='Search']").click()

        element = WebDriverWait(driver, 10).until(
                EC.presence_of_element_located((By.LINK_TEXT, "CSV"))).click() ## need to add logic for when X isn't found
        driver.back()
        time.sleep(15)
    

TimeoutException: Message: 


# Concatenate csv exports to dataframe

In [6]:
os.chdir(save_dir)
os.getcwd()

'/Users/joshuahogge/Projects/tiredLandlords/2019-09-24'

In [25]:
import csv
import glob

# Adding a header to each file
path = r'/Users/joshuahogge/Projects/tiredLandlords/2019-09-24' # use your path
all_files = glob.glob(path + "/*.csv")

for filename in all_files:
    with open(filename, newline='') as f:
        r = csv.reader(f)
        data = [line for line in r]
    with open(filename,'w',newline='') as f:
        w = csv.writer(f)
        w.writerow(['Case Number', 'Name', 'Date of Birth', 'Party Type', 'Court', 
                        'Case Type', 'Case Status', 'Filing Date', 'Case Caption'])
        w.writerows(data)

In [15]:
import pandas as pd
# Concatenate csv to single dataframe
df = pd.concat([pd.read_csv(f, index_col=0) for f in all_files])
df.shape

# Filtering down to the good stuff

In [21]:
da = df[df['Case Type'] == 'FTPR'] # Failure to Pay Rent
da

Unnamed: 0_level_0,Name,Date of Birth,Party Type,Court,Case Type,Case Status,Filing Date,Case Caption
Case Number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
D042LT18007202,"Mason, Christy",,Plaintiff,Charles County District Court,FTPR,Open,08/26/2019,"CHRISTY MASON vs. STEVE MCFARLANE, et al."
D042LT19002704,"Md Property Management, Llc",,Plaintiff,Charles County District Court,FTPR,Open,08/06/2019,"MD PROPERTY MANAGEMENT, LLC vs. BREANNA PARKER..."
D042LT19002670,"Bailey, Vincent",,Plaintiff,Charles County District Court,FTPR,Open,08/05/2019,"VINCENT BAILEY vs. BROOKE GRAY, et al."
D042LT19002042,"Banks, Curtis",,Plaintiff,Charles County District Court,FTPR,Closed,08/02/2019,CURTIS BANKS vs. KENDALL BROOKS
D042LT19002626,"Beckham, Zachriell",,Plaintiff,Charles County District Court,FTPR,Closed,08/08/2019,"ZACHRIELL BECKHAM vs. STEPHANIE VAUGHN, et al."
...,...,...,...,...,...,...,...,...
D042LT19003008,"Lee, Dana",,Plaintiff,Charles County District Court,FTPR,Closed,08/22/2019,DANA LEE vs. JUNITA FRIPP
D042LT19001602,"Legg, Daniel S",,Plaintiff,Charles County District Court,FTPR,Closed,07/30/2019,"DANIEL LEGG vs. AYRA JACKSON, et al."
D042LT19002043,Catholic Charites Of The Archdicese Of Washington,,Plaintiff,Charles County District Court,FTPR,Closed,08/02/2019,CATHOLIC CHARITES OF THE ARCHDICESE OF WASHING...
D042LT19002183,Coachman's Landing,,Plaintiff,Charles County District Court,FTPR,Closed,08/29/2019,"COACHMAN'S LANDING vs. LASHAWN THOMAS, et al."


# Getting case data

In [43]:


# for index, row in da.iterrows():
#     print(index, row['Court'])

# Going to need to change all instances of 'District' to 'Circuit' to select the correct entity on query page
# or drop the 'District Court' and use contains() in the XPATH as described:
# here: https://stackoverflow.com/questions/12495723/using-xpath-wildcards-in-attributes-in-selenium-webdriver

In [53]:
# Enter case data on query page
driver.find_element_by_xpath("//select[@name='locationCode']/option[text()=" + "'Charles County Circuit Court'" + "]").click()
case_number = driver.find_element_by_name("caseId")
case_number.clear()
case_number.send_keys(da.index[0]) ## Change to variable
driver.find_element_by_xpath("//input[@name='action' and @value='Get Case']").click()

assert "Case Header Information" in driver.title

# Mine data


In [56]:
for tr in driver.find_elements_by_xpath('//table//tr'):
    tds = tr.find_elements_by_tag_name('td')
    print ([td.text for td in tds])

['CaseSearch', 'District Court of Maryland', '']
['Case Information']
['Court System:', 'District Court For Charles County - Civil']
['Location:', 'Charles']
['Case Number:', 'D-042-LT-18-007202']
['Title:', 'CHRISTY MASON vs. STEVE MCFARLANE, et al.']
['Case Type:', 'Failure to Pay Rent']
['Filing Date:', '08/26/2019']
['Case Status:', 'Open']
[]
['Involved Parties Information']
['Name:', 'MASON, CHRISTY']
['Address:', '13535 WAVERLY RD']
['City:', 'NEWBURGState:MDZip Code:20664']
['Name:', 'MCFARLANE, STEVE']
['Address:', '2165 PINEVIEW CT']
['City:', 'WALDORFState:MDZip Code:20602']
['Name:', 'WASHINGTON, DEBBIE']
['Address:', '2165 PINEVIEW CT']
['City:', 'WALDORFState:MDZip Code:20602']
['Document Information']
['File Date:', '11/19/2018']
['Filed By:', '']
['Document Name:', 'Complaint / Petition - Landlord Tenant']
['Comment:', '']
['File Date:', '12/10/2018']
['Filed By:', '']
['Document Name:', 'Landlord Tenant Disposition']
['Comment:', '']
