In [2]:
import numpy as np
import pandas as pd

In [3]:
df = pd.read_parquet('./OUTPUT/evictions_matched.parquet')

In [4]:
df.columns

Index(['plaintiff_name', 'defendant_name', 'defendant_zip', 'c2dp_case_id',
       'county', 'fips', 'case_number', 'filed_date', 'case_type', 'debt_type',
       'defendant_attorney', 'defendant_dba_ta', 'defendant_address',
       'defendant_count', 'all_defendant_names', 'all_defendant_addresses',
       'defendant_order_validation', 'hearing_count', 'latest_hearing_date',
       'latest_hearing_result', 'judgment', 'homestead_exemption_waived',
       'is_judgment_satisfied', 'date_satisfaction_filed',
       'further_case_information', 'costs', 'attorney_fees',
       'principal_amount', 'interest_award', 'possession',
       'writ_of_eviction_issued_date', 'other_amount', 'other_awarded',
       'clean_party_name', 'plaintiff_attorney', 'plaintiff_dba_ta',
       'plaintiff_address', 'plaintiff_count', 'all_plaintiff_names',
       'all_plaintiff_addresses', 'plaintiff_order_validation', 'filed_year',
       'filed_quarter', 'plaintiff_zip', 'serial_filing',
       'latest_filing

In [7]:
plaintiff_col = "plaintiff_name"
matched_col = "EntityID" 

# Sample 20 random rows where a match was found
sample = df[[plaintiff_col, matched_col]].dropna().sample(n=20)


In [8]:
for _, row in sample.iterrows():
    print(f"Filed Plaintiff: {row[plaintiff_col]}\nMatched LLC:     {row[matched_col]}\n{'-'*60}")

Filed Plaintiff: KPM LLC
Matched LLC:     KPM LLC   
------------------------------------------------------------
Filed Plaintiff: RBG BEACON 303 ASSOCIATES LLC
Matched LLC:     RBG BEACON 303 ASSOCIATES, LLC
------------------------------------------------------------
Filed Plaintiff: POLIZOS PROPERTIES LLC
Matched LLC:     POLIZOS PROPERTIES LLC
------------------------------------------------------------
Filed Plaintiff: CRS BROOKMONT HOUSING CORP
Matched LLC:     J&P HOUSING LLC
------------------------------------------------------------
Filed Plaintiff: WAVERTON CS DEL OWNER LLC
Matched LLC:     Waverton CS Del Owner, LLC
------------------------------------------------------------
Filed Plaintiff: COMMONS AT PLANT ZERO LLC, THE
Matched LLC:     The Commons at Plant Zero, LLC
------------------------------------------------------------
Filed Plaintiff: WHISPERING OAKS APTS LP
Matched LLC:     Salem Colony, LLC - 841 Whispering Woods Court, Protected Series
-----------------------

In [12]:
grouped_matches = (
    df
    .groupby(matched_col)[plaintiff_col]
    .nunique()
    .sort_values(ascending=False)
)

# Show the top 20 matched names by diversity of inputs
print(grouped_matches.head(20))

EntityID
Nora Electronics Inc LLC                      337
XXX LIMITED                                   150
M and S paving, LLC                           145
MG LLC                                        128
M&G LLC                                       128
MG, L.L.C.                                    128
Property Property, LLC                        119
Clever Kids Educational Services Inc LLC      110
L&C LLC                                        85
M, LLC                                         73
M+ LLC                                         73
SPRINGBUCK BUILDING AND REMODELING, L.L.C.     71
Heart + Paw Acquisition Co., LLC               69
L&P MAINTENANCE, LLC                           57
L., llc                                        49
L LLC                                          49
111 Sutter Holdings Managing Co., LLC          45
OW, LLC                                        43
ST LLC                                         40
ST, LLC                                  

In [13]:
suspect_match = grouped_matches.index[0]  # or any specific LLC
mapped_names = df[df[matched_col] == suspect_match]

print(f"All plaintiff names matched to: {suspect_match}")
print(mapped_names["plaintiff_name"].value_counts().head(30))

All plaintiff names matched to: Nora Electronics Inc LLC 
plaintiff_name
KETTLER MGMT INC    1569
UDR INC              930
MAAC INC             915
GRADY MGMT INC       801
G E MATTHEWS INC     619
MAISONETTE INC       222
JESSCORP INC         199
MJRW INC             181
TGM MANASSAS INC     180
GRADY MGT INC        131
NEXUS APTS INC       119
IVY ACRES INC        113
RPMT INC             106
CMG LEASING INC       85
REB/BILL INC          72
CAMG INC              64
RESIDENCE INC         63
H2J INC               53
BORGER MNGT INC       51
SRC VIRGINIA INC      48
DPC INC               42
HALLMARK INC          41
PEOPLE INC            41
INDIAN ROCK INC       36
LONG MEADOWS INC      34
BSV INC 401K          33
BARG INC              33
VELA INC              31
STANCIU MGMT INC      30
SUITES INC            30
Name: count, dtype: int64
