In [1]:
from urllib.request import urlopen
from PyPDF2 import PdfReader
import requests
import io
import pandas as pd
from bs4 import BeautifulSoup
import re
import csv
from datetime import datetime
from dateparser.search import search_dates


  from pandas.core.computation.check import NUMEXPR_INSTALLED
  from pandas.core import (


In [2]:
def shooting_pdfurls():
    currentyear = 2024
    pdfurls = []
    srcurls = ['https://da.lacounty.gov/reports/ois/' + str(y) for y in range(2016, currentyear)] + \
        ['https://da.lacounty.gov/reports/ois/']
    for srcurl in srcurls:
        req=requests.get(srcurl)
        soup = BeautifulSoup(req.content,"html")
        content = soup.find_all("div", {"class": "field-items"})
        for p in content:
            for a in p("a"):
                filepath = a.get('href')
                rex = re.search("JSID-OIS.*\.pdf$", filepath)
                if rex:
                    pdfurl = 'https://da.lacounty.gov' + filepath
                    filename = rex[0]
                    pdfurls += [pdfurl]
    return list(set(pdfurls))

In [4]:
# features to extract to database:
# name(s) of victim(s)
# officers involved: which agency, deputy names, and badge numbers. 
# jsid file number
# internal file number(s) for involved agencies
# date of filing
# conclusion text
# conclusion - maybe need NLP
# action taken - need NLP

# geolocation, incident date would be ideal. but those are not always available,
# and when available need to extracted from the document text, probably can only be 
# reliably accomplished by a finetuned powerful transformer. 
# or just human labor, since it's only a few hundred documents

In [3]:
# first layer of processing: extract
# title
# departments 
# list of officers w/ badge numbers
# jsid number
# date of filing
# file numbers
# conclusion text

def shooting_process(url):
    try:
        response = requests.get(url)
    except:
        print('Malformed URL exception thrown at URL: ' + url)
        return False
    with io.BytesIO(response.content) as f:
        try:
            pdf = PdfReader(f)
            covertext = pdf.pages[0].extract_text() 
            # the first page contains fatal/nonftal, officers, jsid number district attorney, date of filing
            coverlines = covertext.splitlines()
            coverlines = [line for line in coverlines if line.strip()]
        except:
            print('404 exception thrown at URL: ' + url)
            return False
        try:
            # find the line which has "department," "patrol," or "administration" (modulo taxicab 1)
            # if multiple, throw a formatting exception
            deptlines = [s for s in coverlines \
                         if "depart" in s.lower() or "artment" in s.lower() \
                         or "patrol" in s.lower() or "admin" in s.lower() or "bureau" in s.lower()]
            if len(deptlines) != 1:
                print('Unable to recognize department line at URL: ' + url)
                return False
            deptindex = coverlines.index(deptlines[0])
            # title is concatenation of lines prior to department line. clean title. 
            title = ''.join(coverlines[0:deptindex])
            title = title.strip()
            title = re.sub(' +', ' ', title) # strip extra whitespaces
            title = re.sub(r'(?<=\W)\s', '',title) 
            title = re.sub(r' (?![a-zA-Z])', '',title) # remove all spaces between non-letters
            department = re.sub(r' ([^A-Z])', r'\1', deptlines[0]) # strip all spaces not before capital letters
            i = deptindex + 1
            # deputies are concatenation of lines following department line. clean deputies
            deputies = []
            while coverlines[i][0:7] != 'J.S.I.D':
                line = coverlines[i].replace(",", "") # strip commas
                line = line.replace("#", "") # strip number sign
                line = line.replace("and", "") # strip the word "and"
                line = re.sub(' +', ' ', line) # strip extra whitespaces
                line = line.strip() # strip beginning and trailing whitespace
                # strip all spaces between numbers
                line = re.sub(r'(\d)\s+(\d)', r'\1\2', line)
                # split at every number
                linesplit = re.split(r'(\d+)', line)
                linesplit = [re.sub(r' ([^A-Z])', r'\1', l.strip()) for l in linesplit if l and not l.isspace()]
                for j in range(0, len(linesplit), 2):
                    try:
                        deputies += [(linesplit[j], linesplit[j+1])]
                    except:
                        deputies += [(linesplit[j])]
                i += 1
            # if deputies is empty, throw exception
            if len(deputies) == 0:
                print('Empty deputies list at URL: ' + url)
                return False
            # until J.S.I.D., it's deputy names

            jsid = coverlines[i].split('#', 1)[-1] # jsid number after number sign
            jsid = re.sub(r' ', '', jsid) # remove all whitespace from jsid
            date = coverlines[-1]
            # strip date
            # remove all spaces not before capital letters or numbers
            date = re.sub(r'([0-9]) ([0-9])', r'\1\2', date) # strip all spaces between two numbers
            date = date.replace(",", "") # strip all commas
            date = re.sub(' +', ' ', date) # strip extra whitespaces
            date = re.sub(r'(?<=[a-z]) (?=[a-z])', '', date) # remove all whitespaces between lowercases
            date = date.strip() # strip beginning and trailing whitespace
            date = search_dates(date)[0][0]
            date = datetime.strptime(date, '%B %d %Y')
        except:
            print('Other text exception thrown at URL: ' + url)
            return False
        
        page1text = pdf.pages[1].extract_text()
        page1lines = page1text.splitlines()
        files = [] 
        for l in page1lines: # try this cruder way to get files
            if "#" in l and "J.S.I.D" not in l and "deputy" not in l.lower() and "officer" not in l.lower():
                files += [l.strip()] 
        
        maintext = [p.extract_text() for p in pdf.pages[1:]]
        wholetext = ''.join(maintext)
        '''
        subj = re.search(r"S[\s]*UBJEC[\s]*T[\s]*:(.*?)DA[\s]*T[\s]*E", wholetext, re.DOTALL)
        if not subj:
            subj = re.search(r"RE[\s]*:(.*?)DATE", wholetext, re.DOTALL)
            if not subj:
                print("no subject found at url " + url)
                return False
        subjinfo = subj.group(1).strip()
        # this should catch all of the subjects. now for each line with a # but no "J.S.I.D", capture it
        subjlines = subjinfo.splitlines()
        files = []
        for l in subjlines:
            if "#" in l and "J.S.I.D" not in l:
                files += [l.strip()]            
        '''
        
        # get everything from factual/summary heading until "LAW" or "LEGAL" or "ANALYSIS" or "CONCLUSION"
        # get everything after conclusion heading
        wholelines = wholetext.splitlines()
        # factstr = ""
        constr = ""
        # factflag = False
        conflag = False
        for l in wholelines:
            stripl = l.replace(" ", "")
            # if "LAW" in stripl or "LEGAL" in stripl or "ANALYSIS" in stripl:
                # factflag = False
            # if "FACT" in stripl or "SUMMARY" in stripl: 
                # factflag = True
            if "CONCLUSION" in stripl or stripl.upper() == "CONCLUSION":
                # factflag = False
                if conflag == True:
                    print("Multiple conclusions found at url " + url)
                conflag = True
            # if factflag:
                # factstr += l
            if conflag:
                constr += l     
        constr = constr.split('CONCLUSION', 1)[-1]
        constr = constr.strip()
        constr = re.sub('\n', '', constr)
        constr = re.sub(' +', ' ', constr) # strip extra whitespaces

        return [url, title, {department: deputies}, jsid, date, files, constr]
        # there's one random lowercase conclusion. one weird "attachment" with two conclusions.

In [4]:
pdfurls = shooting_pdfurls()
shooting_csv = []
for p in pdfurls:
    newrow = shooting_process(p)
    if newrow:
        shooting_csv += [newrow]

404 exception thrown at URL: https://da.lacounty.gov/sites/default/files/JSID-OIS-06-21-22-Mountford.pdf
Unable to recognize department line at URL: https://da.lacounty.gov/sites/default/files/pdf/JSID-OIS-07-26-22-Lugo-Pina.pdf
404 exception thrown at URL: https://da.lacounty.gov/sites/default/files/pdf/JSID-OIS-04-13-23-AnthonyW.pdf
404 exception thrown at URL: https://da.lacounty.gov/sites/default/files/JSID-OIS-06-22-22-White.pdf
Unable to recognize department line at URL: https://da.lacounty.gov/sites/default/files/pdf/JSID-OIS-07-15-21-Hunter.pdf
404 exception thrown at URL: https://da.lacounty.gov/sites/default/files/pdf/JSID-OIS-11-21-23-Stevenson.pdf
Unable to recognize department line at URL: https://da.lacounty.gov/sites/default/files/pdf/JSID-OIS-12-15-22-Diaz_0.pdf
Unable to recognize department line at URL: https://da.lacounty.gov/sites/default/files/pdf/JSID-OIS-08-29-22-Meza.pdf
Unable to recognize department line at URL: https://da.lacounty.gov/sites/default/files/pdf/

In [5]:
anomaly_rows = []

In [11]:
# known broken links:
# 2022: white, mountford, 
# 2023: stevenson, rolando martinez, anthonyW
# all can be skipped (they're just redundant with working links)
# for the other anomalies, add each row to shootings.csv manually  as below
url = "https://da.lacounty.gov/sites/default/files/pdf/JSID-OIS-11-21-23-Martinez.pdf"
title = "Non-Fatal Officer Involved Shooting of Jose Martinez"
deputies_by_dept = { "Los Angeles County Sheriff’s Department": \
                   ["Deputy Nicholas Carey", "552242", "Deputy Eric Slattery", "624622", \
                    "Deputy Nicholas Alerich", "610403", "Deputy Andrew Roxas", "611035"], \
                    "Bell Police Department": ["Officer Angela Arutunian", "250", "Officer Javier Avila", "240"] \
                   } # maybe throw out the names, just keep the numbers?
jsid = "20-0186"
date = "November 21 2023"
files = ["B.P.D. File #20-1353", "L.A.S.D. File #020-07905-2111-011", "L.A.S.D. File #020-07905-2111-013"]

constr = """We conclude, based on the totality of the evidence provided, that the deputies and officers
reasonably believed their use of force was necessary to defend against an imminent threat of
death or great bodily injury."""

anomaly_rows += [[url, title, deputies_by_dept, jsid, date, files, constr]]



In [12]:
url = "https://da.lacounty.gov/sites/default/files/pdf/JSID-OIS-08-29-22-Meza.pdf"
title = "Fatal Officer Involved Shooting of Jose Meza"
deputies_by_dept = { "Los Angeles County Sheriff’s Department": \
                   ["Deputy Grant Roth", "502208"], \
                    "San Gabriel Police Department": ["Detective Enrique De Anda", "116"] \
                   } # maybe throw out the names, just keep the numbers?
jsid = "19-0258"
date = "August 29 2022"
files = ["BS.G.P.D. File #2019-01323", "L.A.S.D. File #019-00054-3199-013"]

constr = """We conclude that the deadly force utilized by Deputy Grant Roth and Detective Enrique De
Anda was legally justified in self-defense and the defense of others."""

anomaly_rows += [[url, title, deputies_by_dept, jsid, date, files, constr]]



In [13]:
url = "https://da.lacounty.gov/sites/default/files/pdf/JSID-OIS-12-07-23-Samaniego.pdf"
title = "Non-Fatal Officer Involved Shooting of Gabriel Adrian Samaniego"
deputies_by_dept = { "Los Angeles County Sheriff’s Department": 
                   ["Deputy Juan Lopez", "458967", "Deputy Cesar Hernandez", "648628"], 
                    "California Highway Patrol": ["Officer Hector Placencia", "22142", 
                                                    "Officer Yimi Osorio", "22225", 
                                                    "Officer Jennifer Sanchez", "22721", 
                                                    "Officer Daniel Castaneda", "21551"] 
                   } # maybe throw out the names, just keep the numbers?
jsid = "22-0168"
date = "December 7 2023"
files = ["C.H.P. File # MCU-099-509-22", "L.A.S.D. File #022-80007-6837-013"]

constr = """For the foregoing reasons, we find that the evidence demonstrates that CHP Officers Sanchez,
Osorio, Castaneda, Gonzalez, and LASD Deputies Lopez and Hernandez acted in lawful self-
defense when they used deadly force against Gabriel Adrian Samaniego."""

anomaly_rows += [[url, title, deputies_by_dept, jsid, date, files, constr]]


In [14]:
url = "https://da.lacounty.gov/sites/default/files/pdf/JSID-OIS-12-20-23-Hart-Cabrera-Martinez-Lindsey.pdf"
title = "Non-Fatal Officer Involved Shooting of Alison Hart and Adolfo Cabrera-Martinez and \
        No-Hit Officer Involved Shooting at Dylan Lindsey"
deputies_by_dept = { "Los Angeles County Sheriff’s Department": 
                   ["Deputy Art Hernandez", "609650", "Deputy Jonathan Charrette", "617257",
                    "Deputy Rene Vasquez", "274557", "Deputy David Vega", "538845"], 
                    "Downey Police Department": ["Corporal Drew Lofquist", "12476", 
                                                 "Officer Alejandro Zuniga", "13498"] 
                   } # maybe throw out the names, just keep the numbers?
jsid = "19-0201"
date = "December 20 2023"
files = ["D.P.D. File #19-29796", "L.A.S.D. File #019-00046-3199-013"]

constr = """It is our conclusion, Corporal Lofquist, Officer Zuniga, and Deputies Hernandez, Charrette,
Vasquez, and Vega acted in lawful self-defense and defense of others when they fired their
weapons at Lindsey. We further conclude that the rounds fired by the officers did not strike
Lindsey, who died of a self-inflicted gunshot wound to the head, though rounds fired by
unknown officers did strike Hart and Cabrera-Martinez, who were nonfatally injured."""

anomaly_rows += [[url, title, deputies_by_dept, jsid, date, files, constr]]


In [15]:
url = "https://da.lacounty.gov/sites/default/files/pdf/JSID-OIS-07-26-22-Lugo-Pina.pdf"
title = "Fatal Officer Involved Shooting of Tony Lugo and Non-Fatal Shooting at Ranfere Pina"
deputies_by_dept = { "Los Angeles County Sheriff’s Department": 
                   ["Deputy Joshua Mejia", "462487", "Deputy Christopher McDonald", "641043"], 
                    "California Highway Patrol": ["Officer Erik Davis", "21851"] 
                   } # maybe throw out the names, just keep the numbers?
jsid = "20-0220"
date = "July 26 2022"
files = ["C.H.P. File #F202000717", "L.A.S.D. File #020-07976-2608-013"]

constr = """We find that Deputies Joshua Mejia and Christopher McDonald and Officer Erik Davis acted
lawfully in self-defense and the defense of others when they used deadly force against Tony Lugo
and Ranfere Pina."""

anomaly_rows += [[url, title, deputies_by_dept, jsid, date, files, constr]]


In [16]:
url = "https://da.lacounty.gov/sites/default/files/pdf/JSID-OIS-11-20-2020-Chavez.pdf"
title = "Fatal Officer Involved Shooting of Christian Rigoberto Chavez"
deputies_by_dept = { "South Gate Police Department": ["Detective Daniel Bernabe", "10098"
                                                        "Detective George Chaves", "10123"
                                                        "Officer Adam Cook", "10141"
                                                        "Officer Anthony Reyes", "10205"
                                                        "Officer Gilberto Varela", "10215"], 
                    "Huntington Park Police Department": ["Sergeant Steve Thoreson", "556"] 
                   } # maybe throw out the names, just keep the numbers?
jsid = "17-0546"
date = "November 20 2020"
files = ["S.G.P.D. File #17-13932", "H.P.P.D. File #171127-0089", "L.A.S.D. File #017-00125-3199-013"]

constr = """We find that Bernabe, Chaves, Cook, Reyes, Thoreson and Varela acted in lawful self-defense
and in the lawful defense of others when they fired their duty weapons. We are closing our file
and will take no further action in this matter."""

anomaly_rows += [[url, title, deputies_by_dept, jsid, date, files, constr]]


In [17]:
url = "https://da.lacounty.gov/sites/default/files/pdf/JSID-OIS-11-30-22-Cervantes_0.pdf"
title = "Non-Fatal Officer Involved Shooting of Isais Cervantes"
deputies_by_dept = { "Los Angeles County Sheriff's Department": ["Deputy David Vega", "538845"] 
                   } # maybe throw out the names, just keep the numbers?
jsid = "21-0128"
date = "November 30 2022"
files = ["L.A.S.D. File #021-04312-0262-058"]

constr = """We have determined there is insufficient evidence to prove Deputy Vega did not act in lawful self-defense
        and the defense of his partner when he fired his duty weapon."""

anomaly_rows += [[url, title, deputies_by_dept, jsid, date, files, constr]]


In [18]:
url = "https://da.lacounty.gov/sites/default/files/pdf/JSID-OIS-06-05-24-Lopez.pdf"
title = "Fatal Officer Involved Shooting of Miguel Lopez"
deputies_by_dept = { "Los Angeles County Sheriff's Department": ["Deputy Marc Elizondo", "501581", 
                                                                 "Deputy Edward Martinez", "506119"] 
                   } # maybe throw out the names, just keep the numbers?
jsid = "23-0023"
date = "June 5 2024"
files = ["L.A.S.D. File #023-01205-0250-0138"]

constr = """For the foregoing reasons, we find that the use of deadly force by Deputies Martinez and
Elizondo was lawful and reasonable defense of themselves and others."""

anomaly_rows += [[url, title, deputies_by_dept, jsid, date, files, constr]]


In [19]:
url = "https://da.lacounty.gov/sites/default/files/pdf/JSID-OIS-12-15-22-Diaz_0.pdf"
title = "Fatal Officer Involved Shooting of Christopher Diaz"
deputies_by_dept = { "Montebello Police Department": ["Officer Craig Adams", "1356",
                                                    "Officer Andrew Fivecoat", "1379"] 
                   } # maybe throw out the names, just keep the numbers?
jsid = "17-0409"
date = "December 15 2022"
files = ["Montebello P.D. File # 2017-00005397", 
         "ATF File # ATF-FRO-2017-3038", "LASD File #017-00097-3199-013"]

constr = """Fivecoat, Adams and Ciccone acted lawfully in self-defense, the defense of each other and in
apprehending a person they believed to be a fleeing felon."""

anomaly_rows += [[url, title, deputies_by_dept, jsid, date, files, constr]]


In [20]:
url = "https://da.lacounty.gov/sites/default/files/pdf/JSID-OIS-10-19-23-Loia.pdf"
title = "Fatal Officer Involved Shooting of Anthony Loia"
deputies_by_dept = { "Long Beach Police Department": ["Darrek Cano", "11104", "Bryan Pasion", 
                                                      "10845", "Keyen Foley", 
                                                      "10908", "Juan Urrieta",
                                                    "10802", "Julian Perez", "11201"] 
                   } # maybe throw out the names, just keep the numbers?
jsid = "21-0054"
date = "October 19 2023"
files = ["L.B.P.D. File #21-6195"]

constr = """Based on the totality of the evidence, we find We find that it was reasonable based on the totality
of the circumstances for LBPD Officers Darrek Cano, Bryan Pasion, Keyen Foley, Juan Urrieta,
and Julian Perez to believe deadly force was necessary to defend against an imminent threat of
death or serious bodily injury."""

anomaly_rows += [[url, title, deputies_by_dept, jsid, date, files, constr]]


In [6]:
url = "https://da.lacounty.gov/sites/default/files/pdf/JSID-OIS-05-20-24-Gonzalez.pdf"
title = "Non-Fatal Officer Involved Shooting of Ramon Gonzalez"
deputies_by_dept = { "Los Angeles Police Department": ["Officer Chris Mazmanyan", "41771"] 
                   } # maybe throw out the names, just keep the numbers?
jsid = "23-0039"
date = "May 20 2024"
files = ["L.A.P.D. File #F005-23"]

constr = """For the foregoing reasons, we find that Officer Mazmanyan’s use of deadly force was reasonable
under the circumstances, and he acted lawfully in self-defense."""

anomaly_rows += [[url, title, deputies_by_dept, jsid, date, files, constr]]


In [22]:
url = "https://da.lacounty.gov/sites/default/files/pdf/JSID-OIS-07-15-21-Hunter.pdf"
title = "Fatal Officer Involved Shooting of Rodney Hunter"
deputies_by_dept = { "Cal Poly Pomona Police Department": ["Officer Marc Simpson", "S014"] ,
                    "Pomona Police Department": ["Officer Frank Sacca", "41873"]
                   } # maybe throw out the names, just keep the numbers?
jsid = "18-0279"
date = "July 15 2021"
files = ["L.A.S.D. File #018-00049-3199-011",
    "Cal Poly Pomona Police Department File #18-454",
         "Pomona Police Department File #18-73935"]

constr = """We conclude that Sacca and Simpson acted lawfully in self defense when they used deadly force."""

anomaly_rows += [[url, title, deputies_by_dept, jsid, date, files, constr]]


In [7]:
# foia request:
# sustained (and maybe also other non-unfounded?), with badge number, location (exact, failing that division/RD), outcome, etc.
# non-sustained categories same, but anonymized
# OIS, categorical use of force, critical incidents w/ case numbers, associated complaint numbers

final_table = pd.DataFrame(shooting_csv + anomaly_rows, columns=['url','title','deputies','jsid','date','files','conclusion'])

In [19]:
i = 0
for l in list(final_table['deputies']):
    if "Los Angeles Police Department" in str(l):
        i += 1
print(i)

99


In [13]:
list(final_table['deputies'])

[{'Los Angeles County Sheriff’s Department ': [('Deputy Jose Ramirez',
    '503608'),
   ('Deputy Tyler Wilson', '602406')]},
 {'Azusa Police Department ': [('Officer Dennis Tremblay', '1106')]},
 {'Glendora Police Department ': [('Officer Mykel Tso', '0120')]},
 {'South Gate Police Department ': [('Officer Isaac Beteta', '10209')]},
 {'Los Angeles Police Department ': [('Officer Hector Almeda', '39529')]},
 {'San Fernando Police Department ': [('Sergeant Paul Ventimiglia', '10202'),
   ('Officers Jonathan Zibli', '10473'),
   ('Brittany Najera', '10389'),
   ('Elon Kaiserman', '10491'),
   ('J Robles', '10334'),
   ('Christopher Lopez', '10508')]},
 {'Los Angeles County Sheriff’s Department ': [('Sergeant Kamal Ahmad',
    '435642'),
   ('Deputy Daniel Leon', '440002'),
   ('Deputy Darell Edwards', '465642'),
   ('Deputy Gonzalo Galvez', '465623')]},
 {'Los Angeles County Sheriff’s Department ': [('Deputy Miguel Vega',
    '542244')]},
 {'Los Angeles Police Department ': [('Officer Ed

In [24]:
final_table[final_table['deputies'].str.contains('Sheriff')]
# conclusion buckets from final_table[6]

Unnamed: 0,url,title,deputies,jsid,date,files,conclusion
0,https://da.lacounty.gov/sites/default/files/pd...,Non-Fatal Officer Involved Shooting of Quentin...,{'Los Angeles Police Department ': [('Officer ...,16-0625,2020-08-24 00:00:00,[L.A.P.D File #F081 -16],We find that Officer Borunda acted in lawful s...
1,https://da.lacounty.gov/sites/default/files/pd...,Fatal officer Involved Shooting of Robert Colvin,{'Los Angeles County Sheriff’s Department ': [...,20-0187,2023-08-24 00:00:00,[L.A.S.D. File # 020-09658 -1125 -013],"Based on the totality of the circumstances, we..."
2,https://da.lacounty.gov/sites/default/files/pd...,Fatal Officer Involved Shooting of Jarrid Hurst,{'Los Angeles Sheriff’s Department ': [('Deput...,20-0194,2021-10-18 00:00:00,[L.A.S.D. File #020-05733 -1418 -013],We find that Deputies Jover and Abarca acted i...
3,https://da.lacounty.gov/sites/default/files/pd...,Non-Fatal Officer Involved Shooting of Nathan ...,{'Los Angeles Police Department ': [('Officer ...,21-0116,2023-02-16 00:00:00,[L.A.P.D. File # F017-21],We conclude that there is insufficient evidenc...
4,https://da.lacounty.gov/sites/default/files/pd...,Fatal Officer Involved Shooting of Ricardo Myers,{'Los Angeles County Sheriff’s Department ': [...,19-0453,2020-10-09 00:00:00,[L.A.S.D. File #019 -22894 -1137 -013],We find that Deput y Lee acted lawfully in the...
...,...,...,...,...,...,...,...
286,https://da.lacounty.gov/sites/default/files/pd...,Fatal Officer Involved Shooting of Miguel Lopez,"{""Los Angeles County Sheriff's Department"": ['...",23-0023,June 5 2024,[L.A.S.D. File #023-01205-0250-0138],"For the foregoing reasons, we find that the us..."
287,https://da.lacounty.gov/sites/default/files/pd...,Fatal Officer Involved Shooting of Christopher...,{'Montebello Police Department': ['Officer Cra...,17-0409,December 15 2022,"[Montebello P.D. File # 2017-00005397, ATF Fil...","Fivecoat, Adams and Ciccone acted lawfully in ..."
288,https://da.lacounty.gov/sites/default/files/pd...,Fatal Officer Involved Shooting of Anthony Loia,{'Long Beach Police Department': ['Darrek Cano...,21-0054,October 19 2023,[L.B.P.D. File #21-6195],"Based on the totality of the evidence, we find..."
289,https://da.lacounty.gov/sites/default/files/pd...,Non-Fatal Officer Involved Shooting of Ramon G...,{'Los Angeles Police Department': ['Officer Ch...,23-0039,May 20 2024,[L.A.P.D. File #F005-23],"For the foregoing reasons, we find that Office..."


In [25]:
# final_table.to_csv('shootings_august_2024.csv', index=False)  

In [26]:
from sqlalchemy import create_engine 
# Create an engine instance
engine = create_engine('postgresql://yiv:postgres@localhost/postgres')



In [28]:
from psycopg2.extras import Json, DictCursor


In [32]:
final_table['deputies'] = final_table['deputies'].apply(lambda x: Json(x))

In [37]:
final_table = final_table.drop(columns=['files'])

In [39]:
final_table.iloc[0:1].to_sql('shooting', con=engine, if_exists='append', index=False)

1

In [40]:
final_table.iloc[0:1]

Unnamed: 0,url,title,deputies,jsid,date,conclusion
0,https://da.lacounty.gov/sites/default/files/pd...,Non-Fatal Officer Involved Shooting of Quentin...,"'{""Los Angeles Police Department "": [[""Officer...",16-0625,2020-08-24 00:00:00,We find that Officer Borunda acted in lawful s...


In [None]:
from IPython.display import clear_output

In [89]:
def con_process(x):
    stripx = x.replace(" ", "").lower()
    if "insufficientevidence" in stripx:
        return 3
    if "actedinlawful" in stripx or "actedlawful" in stripx or "wasreasonable" in stripx \
        or "waslawful" in stripx or "reasonablybelieved" in stripx or "notunlawful" in stripx:
        return 1
    else:
        print(x)
        a = input()
        clear_output()
        return a

In [90]:
# 1: justified, 2: unjustified, 3: evidence lacking
con_buckets = final_table['conclusion'].apply(con_process)

In [100]:
final_table.insert(5, "Justified?", list(con_buckets), True)

Unnamed: 0,url,title,deputies,jsid,date,Justified?,conclusion
0,https://da.lacounty.gov/sites/default/files/pd...,Non-Fatal Officer Involved Shooting of Quentin...,"'{""Los Angeles Police Department "": [[""Officer...",16-0625,2020-08-24 00:00:00,1,We find that Officer Borunda acted in lawful s...
1,https://da.lacounty.gov/sites/default/files/pd...,Fatal officer Involved Shooting of Robert Colvin,"'{""Los Angeles County Sheriff\u2019s Departmen...",20-0187,2023-08-24 00:00:00,1,"Based on the totality of the circumstances, we..."
2,https://da.lacounty.gov/sites/default/files/pd...,Fatal Officer Involved Shooting of Jarrid Hurst,"'{""Los Angeles Sheriff\u2019s Department "": [[...",20-0194,2021-10-18 00:00:00,1,We find that Deputies Jover and Abarca acted i...
3,https://da.lacounty.gov/sites/default/files/pd...,Non-Fatal Officer Involved Shooting of Nathan ...,"'{""Los Angeles Police Department "": [[""Officer...",21-0116,2023-02-16 00:00:00,3,We conclude that there is insufficient evidenc...
4,https://da.lacounty.gov/sites/default/files/pd...,Fatal Officer Involved Shooting of Ricardo Myers,"'{""Los Angeles County Sheriff\u2019s Departmen...",19-0453,2020-10-09 00:00:00,1,We find that Deput y Lee acted lawfully in the...
...,...,...,...,...,...,...,...
286,https://da.lacounty.gov/sites/default/files/pd...,Fatal Officer Involved Shooting of Miguel Lopez,"'""{\""Los Angeles County Sheriff''s Department\...",23-0023,June 5 2024,1,"For the foregoing reasons, we find that the us..."
287,https://da.lacounty.gov/sites/default/files/pd...,Fatal Officer Involved Shooting of Christopher...,"'""{''Montebello Police Department'': [''Office...",17-0409,December 15 2022,1,"Fivecoat, Adams and Ciccone acted lawfully in ..."
288,https://da.lacounty.gov/sites/default/files/pd...,Fatal Officer Involved Shooting of Anthony Loia,"'""{''Long Beach Police Department'': [''Darrek...",21-0054,October 19 2023,1,"Based on the totality of the evidence, we find..."
289,https://da.lacounty.gov/sites/default/files/pd...,Non-Fatal Officer Involved Shooting of Ramon G...,"'""{''Los Angeles Police Department'': [''Offic...",23-0039,May 20 2024,1,"For the foregoing reasons, we find that Office..."
