In [1]:
# libraries

import json
import lzma
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup
import re
from tqdm import tqdm
from IPython.core.display import display, HTML
import re

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

pd.options.display.max_columns = 999
pd.options.display.max_rows = 999

In [2]:
# using Arkansas-20200302-text not -xml
# taking a look at one case as an example
with lzma.open('Arkansas_1/data/data.jsonl.xz', 'r') as jsonl_file:
    for index, line in enumerate(jsonl_file):
        if index == 0:
            print(json.loads(line))

{'id': 11640036, 'url': 'https://api.capapi.org/v1/cases/11640036/', 'name': 'FISHER v. REIDER', 'name_abbreviation': 'Fisher v. Reider', 'decision_date': '1829-11', 'docket_number': 'Case No. 4,822a', 'first_page': '137', 'last_page': '138', 'citations': [{'type': 'official', 'cite': '9 F. Cas. 137'}], 'volume': {'url': 'https://api.capapi.org/v1/volumes/32044054565676/', 'barcode': '32044054565676', 'volume_number': '9'}, 'reporter': {'url': 'https://api.capapi.org/v1/reporters/942/', 'full_name': 'Federal Cases', 'id': 942}, 'court': {'url': 'https://api.capapi.org/v1/courts/ark-super-ct-1/', 'name_abbreviation': 'Ark. Super. Ct.', 'slug': 'ark-super-ct-1', 'name': 'Superior Court of the Territory of Arkansas', 'id': 9132}, 'jurisdiction': {'name': 'Ark.', 'name_long': 'Arkansas', 'whitelisted': True, 'url': 'https://api.capapi.org/v1/jurisdictions/ark/', 'id': 34, 'slug': 'ark'}, 'frontend_url': 'https://cite.capapi.org/f-cas/9/137/', 'preview': [], 'casebody': {'data': {'judges': 

In [3]:
%%time

cases = []
with lzma.open('Arkansas_1/data/data.jsonl.xz', 'r') as jsonl_file:
    for case in jsonl_file:
        cases.append(json.loads(str(case, 'utf-8')))
        
df = pd.DataFrame(cases).sort_values('decision_date').reset_index(drop=True)

CPU times: user 12.4 s, sys: 480 ms, total: 12.8 s
Wall time: 12.9 s


In [4]:
print(df.shape)
df.head(1)

(59735, 16)


Unnamed: 0,id,url,name,name_abbreviation,decision_date,docket_number,first_page,last_page,citations,volume,reporter,court,jurisdiction,frontend_url,preview,casebody
0,6611556,https://api.capapi.org/v1/cases/6611556/,UNITED STATES v. DICKINSON,United States v. Dickinson,1820-01,,849,850,"[{'type': 'official', 'cite': '25 F. Cas. 849'...",{'url': 'https://api.capapi.org/v1/volumes/NOT...,{'url': 'https://api.capapi.org/v1/reporters/9...,{'url': 'https://api.capapi.org/v1/courts/ark-...,"{'name': 'Ark.', 'name_long': 'Arkansas', 'whi...",https://cite.capapi.org/f-cas/25/849/6611556/,[],"{'data': {'judges': ['Before SCOTT, J.'], 'att..."


In [5]:
# taking a look at "casebody"
df.iloc[0,15]['data']

{'judges': ['Before SCOTT, J.'],
 'attorneys': ['Joshua Norvell, for the United States.',
  'Jasin Chamberlain, Henry Cassady, Alexander S. Walker, and Perly Wallis, for prisoner.'],
 'opinions': [{'type': 'majority',
   'text': "THE COURT\noverruled the motion, and said that some of the reasons urged in arrest of judgment were not sustained by the record; that others were not proper grounds in arrest of judgment, and that some had not been presented at the proper time nor in a proper manner, if good at all.\nThe prisoner being asked if he had any objection why sentence should not be pronounced against him on the verdict of the jury, said that he objected to any sentence, because he was advised that the indictment did not properly charge the commission of a felony.\nTHE COURT disregarded- his objection, and sentenced him to be castrated according to the law in that behalf provided, by a skillful physician, under the direction of the sheriff of Arkansas county, on the 15th February, 182

In [6]:
# defining a fucnction to remove \n and HTML tags
def text_cleaner(text):
    text_devided = text.splitlines()
    text_devided_clean = " ".join(text_devided)
    return text_devided_clean

In [7]:
%%time
storage = []

for i in range(df.shape[0]):
    
    judges = df.iloc[i,15]['data']['judges']
    attorneys = df.iloc[i,15]['data']['attorneys']
    headnotes = df.iloc[i,15]['data']['head_matter']
    if df.iloc[i,15]['data']['opinions'] != []:
        opinions = df.iloc[i,15]['data']['opinions'][0]['text']

    headnotes_clean = text_cleaner(headnotes)
    opinions_clean = text_cleaner(opinions)
    
    storage.append({'judges': judges,
                    'attorneys': attorneys,
                    'headnotes': headnotes_clean,
                    'opinions': opinions_clean})

CPU times: user 3.25 s, sys: 296 ms, total: 3.55 s
Wall time: 3.55 s


In [8]:
df_parsed = pd.DataFrame(storage)
print(df_parsed.shape)
df_parsed.head(15)

(59735, 4)


Unnamed: 0,judges,attorneys,headnotes,opinions
0,"[Before SCOTT, J.]","[Joshua Norvell, for the United States., Jasin...","Case ¡No. 14,967a. UNITED STATES v. DICKINSON....","THE COURT overruled the motion, and said that ..."
1,[],"[Joshua Norvell, prosecuting attorney, for the...",The United States vs. Thomas Dickinson. 1. It ...,This was an indictment for rape committed on t...
2,"[Before JOHNSON and SCOTT, JJ.]",[],"Case No. 13,944a. THOMPSON et al. v. CAMPBELL....",OPINION OF THE COURT. It is clear that the cou...
3,[],[],Hewes Scull vs. Joseph Kuykendall. A suit shou...,Opinion oe the Court. — The court below dismis...
4,[],"[Before JOHNSON and SCOTT, JJ.]","Case No. 12,670b. SCULL v. KUYKENDALL. [Hempst...",OPINION OF THE COURT. The court below dismisse...
5,[],[],William Russell vs. Amos Wheeler et al. 1. In ...,"Johnson, J., delivered the opinion of the Cour..."
6,[],[],Thompson and Mathews vs. Campbell. 1. It is er...,Opinion op the Court.— It is clear that the co...
7,"[Before JOHNSON and SCOTT, JJ.]",[],"Case No. 12,164a. RUSSELL v. WHEELER et al. [H...","JOHNSON, J. William Russell sued out from two ..."
8,[],[],In the matter of Radford Ellis. A grand juror ...,"On motion of the prosecuting attorney, the cou..."
9,[],[],William Neely vs. Robinson et al. An attorney ...,Opinion oe the Court. — In this case it would ...


In [9]:
# headnotes
df_parsed.iloc[0,2]

'Case ¡No. 14,967a. UNITED STATES v. DICKINSON. [Hempst 1.] Superior Court Territory of Arkansas. Jan., 1820. Rape—Indictment—Jury. 1. It is not a fatal defect in an indictment for rape that it also alleges that the woman was gotten with child. 2. Before a jury is made up, incompetent jurors who have been summoned, may be discharged, and others summoned in their places. Indictment [against Thomas Dickinson] for rape. Before SCOTT, J. This was an indictment for rape committed on the person of Sally ‘ Hall, to which the defendant pleaded not guilty, and there was a trial by jury composed of Richmond Peeler, Charles Roberts, Manuel Roderigue, John Jordolas, Jacques Gocio, Stephen Vasseau, Nathal Vasseau, Michael Petterson, John Pertua, Manuel Pertua, Pierre Mitchell, and Attica Nodall, who, after hearing evidence and arguments of counsel, retired- to consult of their verdict, and, after deliberation, returned into court the following, namely, “We, the jury, find the defendant guilty of ra

In [10]:
# opinions
df_parsed.iloc[0,3]

"THE COURT overruled the motion, and said that some of the reasons urged in arrest of judgment were not sustained by the record; that others were not proper grounds in arrest of judgment, and that some had not been presented at the proper time nor in a proper manner, if good at all. The prisoner being asked if he had any objection why sentence should not be pronounced against him on the verdict of the jury, said that he objected to any sentence, because he was advised that the indictment did not properly charge the commission of a felony. THE COURT disregarded- his objection, and sentenced him to be castrated according to the law in that behalf provided, by a skillful physician, under the direction of the sheriff of Arkansas county, on the 15th February, 1820, between 10 o'clock a. m., and 3 o’clock p. m., of that day. • A motion was made by the prisoner for a writ of error coram nobis, but the motion was overruled. 4 This sentence was not executed, the prisoner having been pardoned by

In [11]:
%%time
df_headnotes = df_parsed.merge(df, left_index=True, right_index=True)
df_headnotes.head(10)

CPU times: user 19.8 ms, sys: 1.17 ms, total: 20.9 ms
Wall time: 19.9 ms


Unnamed: 0,judges,attorneys,headnotes,opinions,id,url,name,name_abbreviation,decision_date,docket_number,first_page,last_page,citations,volume,reporter,court,jurisdiction,frontend_url,preview,casebody
0,"[Before SCOTT, J.]","[Joshua Norvell, for the United States., Jasin...","Case ¡No. 14,967a. UNITED STATES v. DICKINSON....","THE COURT overruled the motion, and said that ...",6611556,https://api.capapi.org/v1/cases/6611556/,UNITED STATES v. DICKINSON,United States v. Dickinson,1820-01,,849,850,"[{'type': 'official', 'cite': '25 F. Cas. 849'...",{'url': 'https://api.capapi.org/v1/volumes/NOT...,{'url': 'https://api.capapi.org/v1/reporters/9...,{'url': 'https://api.capapi.org/v1/courts/ark-...,"{'name': 'Ark.', 'name_long': 'Arkansas', 'whi...",https://cite.capapi.org/f-cas/25/849/6611556/,[],"{'data': {'judges': ['Before SCOTT, J.'], 'att..."
1,[],"[Joshua Norvell, prosecuting attorney, for the...",The United States vs. Thomas Dickinson. 1. It ...,This was an indictment for rape committed on t...,236605,https://api.capapi.org/v1/cases/236605/,The United States vs. Thomas Dickinson,United States v. Dickinson,1820-01,,1,3,"[{'type': 'official', 'cite': '1 Ark. Terr. Re...",{'url': 'https://api.capapi.org/v1/volumes/320...,{'url': 'https://api.capapi.org/v1/reporters/6...,{'url': 'https://api.capapi.org/v1/courts/ark-...,"{'name': 'Ark.', 'name_long': 'Arkansas', 'whi...",https://cite.capapi.org/ark-terr-rep/1/1/,[],"{'data': {'judges': [], 'attorneys': ['Joshua ..."
2,"[Before JOHNSON and SCOTT, JJ.]",[],"Case No. 13,944a. THOMPSON et al. v. CAMPBELL....",OPINION OF THE COURT. It is clear that the cou...,6657705,https://api.capapi.org/v1/cases/6657705/,THOMPSON et al. v. CAMPBELL,Thompson v. Campbell,1821-06,,1027,1027,"[{'type': 'official', 'cite': '23 F. Cas. 1027'}]",{'url': 'https://api.capapi.org/v1/volumes/NOT...,{'url': 'https://api.capapi.org/v1/reporters/9...,{'url': 'https://api.capapi.org/v1/courts/ark-...,"{'name': 'Ark.', 'name_long': 'Arkansas', 'whi...",https://cite.capapi.org/f-cas/23/1027/6657705/,[],{'data': {'judges': ['Before JOHNSON and SCOTT...
3,[],[],Hewes Scull vs. Joseph Kuykendall. A suit shou...,Opinion oe the Court. — The court below dismis...,236682,https://api.capapi.org/v1/cases/236682/,Hewes Scull vs. Joseph Kuykendall,Scull v. Kuykendall,1821-06,,9,9,"[{'type': 'official', 'cite': '1 Ark. Terr. Re...",{'url': 'https://api.capapi.org/v1/volumes/320...,{'url': 'https://api.capapi.org/v1/reporters/6...,{'url': 'https://api.capapi.org/v1/courts/ark-...,"{'name': 'Ark.', 'name_long': 'Arkansas', 'whi...",https://cite.capapi.org/ark-terr-rep/1/9/236682/,[],"{'data': {'judges': [], 'attorneys': [], 'opin..."
4,[],"[Before JOHNSON and SCOTT, JJ.]","Case No. 12,670b. SCULL v. KUYKENDALL. [Hempst...",OPINION OF THE COURT. The court below dismisse...,6571402,https://api.capapi.org/v1/cases/6571402/,SCULL v. KUYKENDALL,Scull v. Kuykendall,1821-06,,894,894,"[{'type': 'official', 'cite': '21 F. Cas. 894'...",{'url': 'https://api.capapi.org/v1/volumes/NOT...,{'url': 'https://api.capapi.org/v1/reporters/9...,{'url': 'https://api.capapi.org/v1/courts/ark-...,"{'name': 'Ark.', 'name_long': 'Arkansas', 'whi...",https://cite.capapi.org/f-cas/21/894/6571402/,[],"{'data': {'judges': [], 'attorneys': ['Before ..."
5,[],[],William Russell vs. Amos Wheeler et al. 1. In ...,"Johnson, J., delivered the opinion of the Cour...",236518,https://api.capapi.org/v1/cases/236518/,William Russell vs. Amos Wheeler et al.,Russell v. Wheeler,1821-06,,3,8,"[{'type': 'official', 'cite': '1 Ark. Terr. Re...",{'url': 'https://api.capapi.org/v1/volumes/320...,{'url': 'https://api.capapi.org/v1/reporters/6...,{'url': 'https://api.capapi.org/v1/courts/ark-...,"{'name': 'Ark.', 'name_long': 'Arkansas', 'whi...",https://cite.capapi.org/ark-terr-rep/1/3/,[],"{'data': {'judges': [], 'attorneys': [], 'opin..."
6,[],[],Thompson and Mathews vs. Campbell. 1. It is er...,Opinion op the Court.— It is clear that the co...,236580,https://api.capapi.org/v1/cases/236580/,Thompson and Mathews vs. Campbell,Thompson v. Campbell,1821-06,,8,9,"[{'type': 'official', 'cite': '1 Ark. Terr. Re...",{'url': 'https://api.capapi.org/v1/volumes/320...,{'url': 'https://api.capapi.org/v1/reporters/6...,{'url': 'https://api.capapi.org/v1/courts/ark-...,"{'name': 'Ark.', 'name_long': 'Arkansas', 'whi...",https://cite.capapi.org/ark-terr-rep/1/8/,[],"{'data': {'judges': [], 'attorneys': [], 'opin..."
7,"[Before JOHNSON and SCOTT, JJ.]",[],"Case No. 12,164a. RUSSELL v. WHEELER et al. [H...","JOHNSON, J. William Russell sued out from two ...",6561940,https://api.capapi.org/v1/cases/6561940/,RUSSELL v. WHEELER et al.,Russell v. Wheeler,1821-06,,66,68,"[{'type': 'official', 'cite': '21 F. Cas. 66'}...",{'url': 'https://api.capapi.org/v1/volumes/NOT...,{'url': 'https://api.capapi.org/v1/reporters/9...,{'url': 'https://api.capapi.org/v1/courts/ark-...,"{'name': 'Ark.', 'name_long': 'Arkansas', 'whi...",https://cite.capapi.org/f-cas/21/66/,[],{'data': {'judges': ['Before JOHNSON and SCOTT...
8,[],[],In the matter of Radford Ellis. A grand juror ...,"On motion of the prosecuting attorney, the cou...",236532,https://api.capapi.org/v1/cases/236532/,In the matter of Radford Ellis,In re Ellis,1821-10,,10,10,"[{'type': 'official', 'cite': '1 Ark. Terr. Re...",{'url': 'https://api.capapi.org/v1/volumes/320...,{'url': 'https://api.capapi.org/v1/reporters/6...,{'url': 'https://api.capapi.org/v1/courts/ark-...,"{'name': 'Ark.', 'name_long': 'Arkansas', 'whi...",https://cite.capapi.org/ark-terr-rep/1/10/236532/,[],"{'data': {'judges': [], 'attorneys': [], 'opin..."
9,[],[],William Neely vs. Robinson et al. An attorney ...,Opinion oe the Court. — In this case it would ...,236530,https://api.capapi.org/v1/cases/236530/,William Neely vs. Robinson et al.,Neely v. Robinson,1821-10,,9,10,"[{'type': 'official', 'cite': '1 Ark. Terr. Re...",{'url': 'https://api.capapi.org/v1/volumes/320...,{'url': 'https://api.capapi.org/v1/reporters/6...,{'url': 'https://api.capapi.org/v1/courts/ark-...,"{'name': 'Ark.', 'name_long': 'Arkansas', 'whi...",https://cite.capapi.org/ark-terr-rep/1/9/236530/,[],"{'data': {'judges': [], 'attorneys': [], 'opin..."
