# Notebook for analyzing the Delhi High Court dataset and the results of the models

In [2]:
import os
import json
import plotly.express as px
from collections import Counter, defaultdict
import statistics
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy.stats as stats
from itertools import chain


Getting advocate cases

In [104]:
adv_cases_path = "/home/workboots/Datasets/DHC/variations/new/var_1/adv_info/adv_cases.json"
case_chapters_path = "/home/workboots/Datasets/DHC/variations/new/var_1/targets/case_chapters.json"
roles_path = "/home/workboots/Datasets/DHC/common_new/preprocess/roles/"
selected_cases_path = "/home/workboots/Datasets/DHC/variations/new/var_1/adv_info/selected_cases.txt"

In [105]:
with open(adv_cases_path, 'r') as f:
    adv_cases = json.load(f)

In [106]:
with open(case_chapters_path, 'r') as f:
    case_chapters = json.load(f)

In [107]:
with open(selected_cases_path, 'r') as f:
    selected_cases = f.readlines()
selected_cases = list(filter(None, map(lambda x: x.strip(), selected_cases)))

In [108]:
selected_cases

['154003639',
 '3814551',
 '148740508',
 '91527765',
 '100355572',
 '160230226',
 '195812892',
 '5731710',
 '169386063',
 '116511838',
 '188498538',
 '161399126',
 '69181514',
 '114764023',
 '43884107',
 '131986714',
 '166956200',
 '109750416',
 '66939334',
 '138869338',
 '72898067',
 '43242715',
 '22030457',
 '92872',
 '64470704',
 '106508763',
 '32376650',
 '162296162',
 '12251513',
 '178925055',
 '88640270',
 '46124010',
 '169527354',
 '15027392',
 '103560012',
 '19403563',
 '86149508',
 '184558299',
 '172768632',
 '79699677',
 '190020118',
 '31934338',
 '122813093',
 '65596853',
 '55131537',
 '97984630',
 '164289063',
 '92794817',
 '196656053',
 '29309932',
 '47742484',
 '131583952',
 '12735895',
 '120154188',
 '198364086',
 '121647116',
 '164151163',
 '163139259',
 '63402494',
 '49230364',
 '106456368',
 '117589824',
 '27645841',
 '91155038',
 '187556272',
 '35329999',
 '129823823',
 '146470158',
 '112621805',
 '66613163',
 '53778351',
 '100847065',
 '40327685',
 '567922',
 '99136

### Basic Advocate Stats

Total number of advocates

In [121]:
len(adv_cases)

94

Maximum number of cases for an advocate

In [122]:
max([len(v) for v in adv_cases.values()])

297

Minimum number of cases for an advocate

In [123]:
min([len(v) for v in adv_cases.values()])

25

Number of advocates with only one case

In [129]:
sum(int(len(v) < 30) for v in adv_cases.values())

18

In [97]:
10979/21531


0.509915935163253

Average number of cases for an advocate

In [124]:
statistics.mean([len(v) for v in adv_cases.values()])

70.57446808510639

Mode number of cases for an advocate

In [126]:
statistics.mode([len(v) for v in adv_cases.values()])

44

Number of advocates with more than or equal to 10 cases

In [13]:
sum([int(len(v) > 9) for v in adv_cases.values()])

2255

### Basic Charge Stats

Total number of chapters

In [139]:
case_chapters = {k: v for k, v in case_chapters.items() if k in selected_cases}

In [140]:
len(case_chapters)

4257

In [141]:
chapter_counts = Counter(chain.from_iterable(v for v in case_chapters.values()))

In [146]:
chapter_counts = {k:v for k,v in sorted(chapter_counts.items(), key=lambda x: x[1], reverse=True)}

In [147]:
chapter_counts

{'Indian Penal Code, 1860_OF OFFENCESAFFECTINGTHE HUMAN BODY': 2851,
 'Indian Penal Code, 1860_GENERAL EXPLANATIONS': 2158,
 'Indian Penal Code, 1860_OF OFFENCES AGAINST PROPERTY': 1710,
 'Indian Penal Code, 1860_OF ABETMENT': 922,
 'Indian Penal Code, 1860_OF OFFENCES RELATINGTO MARRIAGE': 627,
 'Indian Penal Code, 1860_OR CRIMINAL INTIMIDATION': 550,
 'Arms Act, 1959_OFFENCES AND PENALTIES': 449,
 'Indian Penal Code, 1860_OF OFFENCES RELATING TO DOCUMENTS AND TO PROPERTY MARKS': 445,
 'Indian Penal Code, 1860_INTRODUCTION': 363,
 'Indian Penal Code, 1860_OF FLSEEVIDENCE AND OFFENCES AGAINST PUBLIC JUSTICE': 361,
 'Indian Penal Code, 1860_OF OFFENCES AGAINST THE PUBLIC TRANQUILLITY': 189,
 'Arms Act, 1959_MISCELLANEOUS': 152,
 'Indian Penal Code, 1860_OF CONTEMPTS OF THE LAWFUL AUTHORITY OF PUBLIC SERVANTS': 109,
 'Indian Penal Code, 1860_OF OFFENCES AFFECTING THE PUBLIC HEALTH': 85,
 'Indian Penal Code, 1860_OF ATTEMPTS OF COMMIT OFFENCES': 79,
 'Transfer of Property Act, 1882_NO CHA

Maximum number of case citations for a chapter

In [143]:
max([v for v in chapter_counts.values()])

2851

Minimum number of case citations for a chapter

In [133]:
min([v for v in chapter_counts.values()])

52

Average number of case citations for a chapter

In [134]:
statistics.mean([v for v in chapter_counts.values()])

725.35

Mode number of case citations for a chapter

In [135]:
statistics.mode([v for v in chapter_counts.values()])

3563

In [136]:
sum(int(v == 1) for v in chapter_counts.values())

0

Number of chapters with at least 10 citations

In [24]:
sum([int(v > 9) for v in chapter_counts.values()])

265

## Rhetorical Roles

Getting all rhetorical roles

In [109]:
case_roles = {}
for fl in selected_cases:
    with open(os.path.join(roles_path, f"{fl}.txt"), 'r') as f:
        roles = f.readlines()
    roles = list(filter(None, map(lambda x: x.strip(), roles)))
    case_roles[fl] = roles

In [110]:
len(case_roles)

4257

In [111]:
role_counts = Counter(chain.from_iterable(v for v in case_roles.values()))

In [112]:
role_counts

Counter({'Facts': 339680,
         'Ruling by Lower Court': 7279,
         'Statute': 16412,
         'Precedent': 216686,
         'Ratio of the decision': 188538,
         'Argument': 88252,
         'Ruling by Present Court': 16681})

Total number of sentences

In [113]:
total = sum(role_counts.values())

In [114]:
total

873528

Role Percentages

In [115]:
percent = {k: v * 1/total for k, v in role_counts.items()}

In [116]:
percent

{'Facts': 0.3888598877196839,
 'Ruling by Lower Court': 0.008332875420135359,
 'Statute': 0.018788178512881098,
 'Precedent': 0.24805844804058944,
 'Ratio of the decision': 0.21583509629914555,
 'Argument': 0.10102938886904599,
 'Ruling by Present Court': 0.019096125138518744}

Average document length

In [117]:
statistics.mean([len(v) for v in case_roles.values()])

205.19802677942212

Maximum number of sentences in a document

In [118]:
max([len(v) for v in case_roles.values()])

12518

Minimum number of sentences in a document

In [119]:
min([len(v) for v in case_roles.values()])

12

Mode number of sentences in a document

In [120]:
statistics.mode([len(v) for v in case_roles.values()])

80

## Getting charge modality of advocates

In [148]:
adv_charges = defaultdict(list)

In [149]:
for adv, cases in adv_cases.items():
    adv_charges[adv].extend(chain.from_iterable([case_chapters.get(case, []) for case in cases]))

In [151]:
adv_chapters = {k: set(v) for k, v in adv_charges.items()}

In [152]:
adv_chapters

{'PawanSharma': {'Arms Act, 1959_MISCELLANEOUS',
  'Arms Act, 1959_OFFENCES AND PENALTIES',
  'Companies Act, 2013_NATIONAL COMPANY LAW TRIBUNAL AND APPELLATE TRIBUNAL',
  'Indian Penal Code, 1860_GENERAL EXPLANATIONS',
  'Indian Penal Code, 1860_INTRODUCTION',
  'Indian Penal Code, 1860_OF ABETMENT',
  'Indian Penal Code, 1860_OF ATTEMPTS OF COMMIT OFFENCES',
  'Indian Penal Code, 1860_OF CONTEMPTS OF THE LAWFUL AUTHORITY OF PUBLIC SERVANTS',
  'Indian Penal Code, 1860_OF DEFAMATION',
  'Indian Penal Code, 1860_OF FLSEEVIDENCE AND OFFENCES AGAINST PUBLIC JUSTICE',
  'Indian Penal Code, 1860_OF OFFENCES AFFECTING THE PUBLIC HEALTH',
  'Indian Penal Code, 1860_OF OFFENCES AGAINST PROPERTY',
  'Indian Penal Code, 1860_OF OFFENCES AGAINST THE PUBLIC TRANQUILLITY',
  'Indian Penal Code, 1860_OF OFFENCES RELATING TO DOCUMENTS AND TO PROPERTY MARKS',
  'Indian Penal Code, 1860_OF OFFENCES RELATING TO THE ARMY',
  'Indian Penal Code, 1860_OF OFFENCES RELATINGTO MARRIAGE',
  'Indian Penal Code

In [156]:
chapter_advs = defaultdict(list)

In [157]:
for adv, chapters in adv_chapters.items():
    for chapter in chapters:
        chapter_advs[chapter].append(adv)

In [161]:
chapter_advs_counts = {k: len(v) for k, v in chapter_advs.items()}

In [162]:
chapter_advs_counts = {k: v for k, v in sorted(chapter_advs_counts.items(),
                                               key=lambda x: x[1],
                                               reverse=True)}

In [163]:
chapter_advs_counts

{'Indian Penal Code, 1860_OF OFFENCES AGAINST PROPERTY': 94,
 'Indian Penal Code, 1860_GENERAL EXPLANATIONS': 94,
 'Indian Penal Code, 1860_OF OFFENCESAFFECTINGTHE HUMAN BODY': 94,
 'Indian Penal Code, 1860_OF ABETMENT': 94,
 'Indian Penal Code, 1860_OF OFFENCES RELATING TO DOCUMENTS AND TO PROPERTY MARKS': 90,
 'Indian Penal Code, 1860_OR CRIMINAL INTIMIDATION': 90,
 'Indian Penal Code, 1860_INTRODUCTION': 89,
 'Arms Act, 1959_OFFENCES AND PENALTIES': 89,
 'Indian Penal Code, 1860_OF FLSEEVIDENCE AND OFFENCES AGAINST PUBLIC JUSTICE': 88,
 'Indian Penal Code, 1860_OF OFFENCES RELATINGTO MARRIAGE': 87,
 'Indian Penal Code, 1860_OF OFFENCES AGAINST THE PUBLIC TRANQUILLITY': 83,
 'Indian Penal Code, 1860_OF CONTEMPTS OF THE LAWFUL AUTHORITY OF PUBLIC SERVANTS': 69,
 'Arms Act, 1959_MISCELLANEOUS': 68,
 'Indian Penal Code, 1860_OF ATTEMPTS OF COMMIT OFFENCES': 59,
 'Indian Penal Code, 1860_OF DEFAMATION': 58,
 'Indian Penal Code, 1860_OF OFFENCES AFFECTING THE PUBLIC HEALTH': 51,
 'Indian 

In [164]:
adv_case_counts = {k: len(v) for k, v in adv_cases.items()}

In [165]:
adv_modality = {k: len(statistics.multimode(v)) for k, v in adv_charges.items()}

In [193]:
sum(int(v == 1) for v in adv_modality.values())

89

Getting maximum modality

In [166]:
max([v for v in adv_modality.values()])

2

Getting minimum modality

In [167]:
min([v for v in adv_modality.values()])

1

Getting average modality

In [168]:
statistics.mean([v for v in adv_modality.values()])

1.053191489361702

Getting modal modality

In [169]:
statistics.mode([v for v in adv_modality.values()])

1

In [170]:
adv_count_modality = {k: (adv_modality[k], adv_case_counts[k]) for k in adv_modality}

In [192]:
adv_count_modality

{'PawanSharma': (1, 143),
 'SMuralidhar': (1, 77),
 'ArvindNigam': (1, 53),
 'KamalKumarGhai': (1, 30),
 'MLYadav': (1, 167),
 'RaviNayak': (1, 50),
 'RakhiDubey': (1, 123),
 'MNDudeja': (1, 67),
 'RajniGupta': (1, 39),
 'NHariharan': (1, 41),
 'KusumDhalla': (1, 287),
 'RajdipaBehura': (1, 81),
 'RameshGupta': (1, 108),
 'RaviGupta': (1, 28),
 'KewalSinghAhuja': (1, 191),
 'RajeshKumar': (1, 44),
 'AmitChadha': (1, 59),
 'MukeshKumar': (1, 32),
 'AmitGupta': (1, 130),
 'ManjeetArya': (1, 200),
 'RichaKapoor': (1, 297),
 'AjayVerma': (1, 73),
 'ManinderSingh': (1, 39),
 'MohitMathur': (1, 155),
 'PannaLalSharma': (1, 49),
 'SumeetVerma': (1, 87),
 'KSinghal': (1, 56),
 'MukeshGupta': (1, 44),
 'ChetanSharma': (1, 26),
 'RahulMehra': (2, 53),
 'RituGauba': (1, 41),
 'SidharthLuthra': (1, 98),
 'VikasPahwa': (1, 73),
 'SoniaMathur': (1, 27),
 'SahilaLamba': (1, 144),
 'ChetanLokur': (1, 62),
 'AnuragJain': (1, 81),
 'MeenakshiChauhan': (1, 267),
 'NanditaRao': (2, 104),
 'RNMittal': (1, 

In [171]:
modality_cases_counts = Counter(adv_count_modality.values())

In [172]:
modality_cases_counts

Counter({(1, 143): 1,
         (1, 77): 1,
         (1, 53): 1,
         (1, 30): 2,
         (1, 167): 1,
         (1, 50): 1,
         (1, 123): 1,
         (1, 67): 1,
         (1, 39): 2,
         (1, 41): 3,
         (1, 287): 1,
         (1, 81): 3,
         (1, 108): 1,
         (1, 28): 4,
         (1, 191): 1,
         (1, 44): 4,
         (1, 59): 1,
         (1, 32): 1,
         (1, 130): 1,
         (1, 200): 1,
         (1, 297): 1,
         (1, 73): 3,
         (1, 155): 1,
         (1, 49): 1,
         (1, 87): 1,
         (1, 56): 2,
         (1, 26): 3,
         (2, 53): 1,
         (1, 98): 1,
         (1, 27): 5,
         (1, 144): 1,
         (1, 62): 1,
         (1, 267): 1,
         (2, 104): 1,
         (1, 36): 2,
         (1, 33): 2,
         (1, 85): 1,
         (1, 201): 1,
         (1, 31): 2,
         (1, 118): 1,
         (1, 57): 2,
         (1, 58): 1,
         (1, 37): 2,
         (1, 55): 2,
         (1, 38): 5,
         (1, 116): 1,
         (1, 96): 

In [173]:
unique_modalities = np.unique([v[0] for v in adv_count_modality.values()])

In [174]:
len(unique_modalities)

2

In [175]:
unique_case_counts = np.unique([v[1] for v in adv_count_modality.values()])

In [176]:
len(unique_case_counts)

55

In [177]:
max(unique_case_counts)

297

In [178]:
counts = np.zeros((max(unique_modalities)+1, max(unique_case_counts)+1))

In [179]:
counts.shape

(3, 298)

In [180]:
for k, v in modality_cases_counts.items():
    counts[k[0],k[1]] = v

In [181]:
idx_col = np.argwhere(np.all(counts[..., :] == 0, axis=0))

In [182]:
counts = np.delete(counts, idx_col, axis=1)

In [183]:
idx_row = np.argwhere(np.all(counts[:, ...] == 0, axis=1))

In [184]:
counts = np.delete(counts, idx_row, axis=0)

In [185]:
counts.shape

(2, 55)

In [186]:
chi2 = stats.chi2_contingency(counts, correction=False)[0]
sample_size = np.sum(counts)
min_dim = min(counts.shape) - 1
min_dim = min_dim - min_dim**2/(sample_size - 1)
cramer_v = np.sqrt((chi2 / sample_size) / min_dim)

cramer_v

0.6829077318351967

In [187]:
df = pd.DataFrame([[k[0], k[1]] for k in adv_count_modality.values()], columns = ["Modality", "Number of Cases"])

In [188]:
len(df)

94

In [189]:
spearman = df.corr(method="spearman")["Modality"].iloc[1]

In [190]:
spearman

-0.0454414055987521