# Init

## Import

In [6]:
import duckdb
import pandas as pd
from openai import OpenAI
import json
import re

## Load data

In [16]:
df_all = pd.read_csv("../data/Building_Violations_20250421.csv")

In [8]:
df_all.head()

Unnamed: 0,ID,VIOLATION LAST MODIFIED DATE,VIOLATION DATE,VIOLATION CODE,VIOLATION STATUS,VIOLATION STATUS DATE,VIOLATION DESCRIPTION,VIOLATION LOCATION,VIOLATION INSPECTOR COMMENTS,VIOLATION ORDINANCE,...,ADDRESS,STREET NUMBER,STREET DIRECTION,STREET NAME,STREET TYPE,PROPERTY GROUP,SSA,LATITUDE,LONGITUDE,LOCATION
0,7320798,04/19/2025,04/19/2025,BR1001,OPEN,,OWNER OR LICENSED CONTRACTOR,,,The code violations listed below must be corre...,...,8100 S JUSTINE ST,8100,S,JUSTINE,ST,21148,69.0,41.74664,-87.662276,"(41.74663982161097, -87.6622758721806)"
1,7320799,04/19/2025,04/19/2025,BR3054,OPEN,,REPAIR,,REPAIR LEAK BY HARTFORD LOOP,Repair,...,8100 S JUSTINE ST,8100,S,JUSTINE,ST,21148,69.0,41.74664,-87.662276,"(41.74663982161097, -87.6622758721806)"
2,7320804,04/19/2025,04/18/2025,BR2010,OPEN,,NO ENTRY,,NO ENTRY,"Call 744-3516 to arrange entry for boiler, wat...",...,3935 W DIVERSEY AVE,3935,W,DIVERSEY,AVE,7743,,41.931694,-87.725977,"(41.93169376081675, -87.72597658142915)"
3,7320803,04/19/2025,04/18/2025,BR1001,OPEN,,OWNER OR LICENSED CONTRACTOR,,,The code violations listed below must be corre...,...,3935 W DIVERSEY AVE,3935,W,DIVERSEY,AVE,7743,,41.931694,-87.725977,"(41.93169376081675, -87.72597658142915)"
4,7320524,04/18/2025,04/18/2025,CN065014,OPEN,,REPAIR LINTELS,EXTERIOR:N :,"NORTH, WEST AND EAST ELEVATIONS - RUSTED AND ...",Failed to maintain lintel in good repair and f...,...,4903 W DICKENS AVE,4903,W,DICKENS,AVE,24508,,41.918612,-87.748767,"(41.918611994039395, -87.74876662733054)"


# Utils

In [13]:
HP_NORTH_BOUND = 41.809647
HP_SOUTH_BOUND = 41.780482
HP_WEST_BOUND = -87.615877
HP_EAST_BOUND = -87.579056

query = f"""
SELECT *
    /* ADDRESS
    ,"ID" as "VIOLATION ID"
    ,strptime("VIOLATION DATE", '%m/%d/%Y') as "VIOLATION DATE"
    ,"DEPARTMENT BUREAU"
    ,"INSPECTION NUMBER" as "INSPECTION ID"
    ,"INSPECTION CATEGORY"
    ,"INSPECTION STATUS"
    ,"VIOLATION ORDINANCE"
    ,"VIOLATION DESCRIPTION"
    ,"VIOLATION LOCATION"
    ,"VIOLATION INSPECTOR COMMENTS"
    ,"VIOLATION STATUS" */
FROM all
WHERE strptime("VIOLATION DATE", '%m/%d/%Y') >= DATE '2020-01-01'
-- AND LATITUDE BETWEEN {HP_SOUTH_BOUND} AND {HP_NORTH_BOUND}
-- AND LONGITUDE BETWEEN {HP_WEST_BOUND} AND {HP_EAST_BOUND}

"""
df = duckdb.sql(query).df()
df.head()

ParserException: Parser Error: syntax error at or near "all"

In [47]:
def generate_report_from_df(df) -> str:
    out = ""
    for _, row in df.iterrows():
        out += f"""On {row["VIOLATION DATE"].strftime("%Y-%m-%d")}, it received a complaint that it violates city ordinance "{row["VIOLATION DESCRIPTION"]}". The inspector commented: "{row["VIOLATION INSPECTOR COMMENTS"]}". """
    return out

In [None]:
def generate_report_from_df_2(df) -> str:
    out = ""
    for _, row in df.iterrows():
        out += f"""On {row["VIOLATION DATE"].strftime("%Y-%m-%d")}, it received a complaint that it violates city ordinance "{row["VIOLATION DESCRIPTION"]}". The inspector commented: "{row["VIOLATION INSPECTOR COMMENTS"]}". """
    return out

In [35]:
def generate_df_from_address_keyword(kw: str, complaints_only: bool = True) -> pd.DataFrame:
    df = df[df["ADDRESS"].str.startswith(kw.upper())]
    if complaints_only:
        return df[df["INSPECTION CATEGORY"].eq("COMPLAINT")]
    return df

In [36]:
def generate_report_from_address_keyword(kw: str, complaints_only: bool = True) -> str:
    df = generate_df_from_address_keyword(kw, complaints_only)
    return generate_report_from_df(df)

# Dev

In [5]:
df_all.dtypes

ID                                int64
VIOLATION LAST MODIFIED DATE     object
VIOLATION DATE                   object
VIOLATION CODE                   object
VIOLATION STATUS                 object
VIOLATION STATUS DATE            object
VIOLATION DESCRIPTION            object
VIOLATION LOCATION               object
VIOLATION INSPECTOR COMMENTS     object
VIOLATION ORDINANCE              object
INSPECTOR ID                     object
INSPECTION NUMBER                 int64
INSPECTION STATUS                object
INSPECTION WAIVED                object
INSPECTION CATEGORY              object
DEPARTMENT BUREAU                object
ADDRESS                          object
STREET NUMBER                     int64
STREET DIRECTION                 object
STREET NAME                      object
STREET TYPE                      object
PROPERTY GROUP                    int64
SSA                             float64
LATITUDE                        float64
LONGITUDE                       float64


## EDA

test if unique inspection ids are spread across multiple days

In [35]:
query = f"""
SELECT 
    "INSPECTION NUMBER", 
    COUNT(DISTINCT strptime("VIOLATION DATE", '%m/%d/%Y')) as n_days,
    COUNT(DISTINCT strftime(strptime("VIOLATION DATE", '%m/%d/%Y'), '%Y-%m')) as n_months
FROM df_all
GROUP BY "INSPECTION NUMBER"
HAVING n_months > 1
"""
duckdb.sql(query).df()

Unnamed: 0,INSPECTION NUMBER,n_days,n_months
0,10278175,2,2
1,10727949,2,2
2,2290066,2,2
3,2272500,3,2
4,10695717,2,2
...,...,...,...
37725,10374141,2,2
37726,14389163,2,2
37727,13073394,2,2
37728,12395971,2,2


In [27]:
df_all["INSPECTION NUMBER"].nunique()

451381

In [36]:
df = df_all[df_all["INSPECTION NUMBER"] == 10278175]
df.head()

Unnamed: 0,ID,VIOLATION LAST MODIFIED DATE,VIOLATION DATE,VIOLATION CODE,VIOLATION STATUS,VIOLATION STATUS DATE,VIOLATION DESCRIPTION,VIOLATION LOCATION,VIOLATION INSPECTOR COMMENTS,VIOLATION ORDINANCE,...,ADDRESS,STREET NUMBER,STREET DIRECTION,STREET NAME,STREET TYPE,PROPERTY GROUP,SSA,LATITUDE,LONGITUDE,LOCATION
1039749,4346610,12/17/2012,12/17/2012,CN190019,OPEN,,ARRANGE PREMISE INSPECTION,OTHER : :,INTERIOR OF BUILDING NO RESPONSE. UNABLE TO I...,Arrange for inspection of premises. (13-12-100),...,907 W ARGYLE ST,907,W,ARGYLE,ST,11409,34.0,41.973232,-87.652874,"(41.973232363026156, -87.65287395668437)"
1289559,3650234,12/17/2012,10/28/2010,CN140016,NO ENTRY,,DWELLING CLEAN AND SANITARY,INTERIOR:001 :.,"1ST FLOOR CORRIDORS, CARPET IS STAINED THRU OUT .","Keep premises clean, sanitary, and safe. (13-1...",...,907 W ARGYLE ST,907,W,ARGYLE,ST,11409,34.0,41.973232,-87.652874,"(41.973232363026156, -87.65287395668437)"
1289560,3650235,12/17/2012,10/28/2010,CN197087,NO ENTRY,,CARB MONOX DETECT IN RESID,INTERIOR:001 :.,"LAUNDRY ROOM, MISSING CARBON MONOXIDE DETECTOR.",Install carbon monoxide detector within 40 fee...,...,907 W ARGYLE ST,907,W,ARGYLE,ST,11409,34.0,41.973232,-87.652874,"(41.973232363026156, -87.65287395668437)"


## HP data

In [7]:
len(df)

6249

In [8]:
df.value_counts("VIOLATION DESCRIPTION")

VIOLATION DESCRIPTION
MAINTAIN OR REPAIR ELECT ELEVA    991
ARRANGE PREMISE INSPECTION        336
REPAIR EXTERIOR WALL              272
MAINTAIN OR REPAIR HYDRO ELEVA    235
REPAIR PORCH SYSTEM               167
                                 ... 
REMVE DEAD ANIMAL F/VCNT COMM       1
REPAIR / REBUILD  SHED              1
REPLACE DEFECTIVE WATER PIPING      1
REPAIR BATTERY LIGHT                1
OUTDOOR ARMORED CABLE               1
Name: count, Length: 321, dtype: int64

In [9]:
df.value_counts("DEPARTMENT BUREAU")

DEPARTMENT BUREAU
CONSERVATION                  4053
ELEVATOR                      1256
SPECIAL TASK FORCE             279
SPECIAL INSPECTION PROGRAM     145
PLUMBING                       138
BOILER                         115
VENTILATION                     90
NEW CONSTRUCTION                74
REFRIGERATION                   46
ELECTRICAL                      42
SIGNS                            6
DEMOLITION                       5
Name: count, dtype: int64

In [10]:
df.value_counts("INSPECTION CATEGORY")

INSPECTION CATEGORY
COMPLAINT    3498
PERIODIC     1974
PERMIT        777
Name: count, dtype: int64

In [11]:
df_5252 = df[df["ADDRESS"].str.startswith("5252")]
df_5252

Unnamed: 0,ADDRESS,VIOLATION DATE,DEPARTMENT BUREAU,INSPECTION CATEGORY,INSPECTION STATUS,VIOLATION ORDINANCE,VIOLATION DESCRIPTION,VIOLATION LOCATION,VIOLATION INSPECTOR COMMENTS,VIOLATION STATUS


In [12]:
permit = df[df["INSPECTION CATEGORY"].eq("PERMIT")]
permit

Unnamed: 0,ADDRESS,VIOLATION DATE,DEPARTMENT BUREAU,INSPECTION CATEGORY,INSPECTION STATUS,VIOLATION ORDINANCE,VIOLATION DESCRIPTION,VIOLATION LOCATION,VIOLATION INSPECTOR COMMENTS,VIOLATION STATUS
90,4822 S COTTAGE GROVE AVE,2025-04-01,ELEVATOR,PERMIT,FAILED,Failed to maintain electric elevator equipment...,MAINTAIN OR REPAIR ELECT ELEVA,,PROVIDE SUFFICIENT FIRE SERVICE KEYS INSIDE AL...,OPEN
91,4822 S COTTAGE GROVE AVE,2025-04-01,ELEVATOR,PERMIT,FAILED,Failed to maintain electric elevator equipment...,MAINTAIN OR REPAIR ELECT ELEVA,,"PROVIDE CAR ID AND BRAILLE TAGS, ALL CARS, ALL...",OPEN
92,4822 S COTTAGE GROVE AVE,2025-04-01,ELEVATOR,PERMIT,FAILED,Failed to maintain electric elevator equipment...,MAINTAIN OR REPAIR ELECT ELEVA,,"TEST/ VERIFY PHONE COMMUNICATION LOSS, ALL CAR...",OPEN
93,4822 S COTTAGE GROVE AVE,2025-04-01,ELEVATOR,PERMIT,FAILED,Failed to maintain electric elevator equipment...,MAINTAIN OR REPAIR ELECT ELEVA,,PROVIDE MOUNTED FIRE EXTINGUISHER AT TOP LANDI...,OPEN
94,4822 S COTTAGE GROVE AVE,2025-04-01,ELEVATOR,PERMIT,FAILED,Failed to maintain electric elevator equipment...,MAINTAIN OR REPAIR ELECT ELEVA,,TEST/ VERIFY FIRE SERVICE PHASE II OPERATION W...,OPEN
...,...,...,...,...,...,...,...,...,...,...
6217,6230 S DORCHESTER AVE,2020-01-16,ELEVATOR,PERMIT,PASSED,Failed to maintain hydraulic elevator equipmen...,MAINTAIN OR REPAIR HYDRO ELEVA,,REPLACE MAIN LINE DISCONNECT W/ HEAVY DUTY DIS...,COMPLIED
6218,6230 S DORCHESTER AVE,2020-01-16,ELEVATOR,PERMIT,PASSED,Failed to maintain hydraulic elevator equipmen...,MAINTAIN OR REPAIR HYDRO ELEVA,,TEST FAID'S AND BATTERY BACK-UP LOWERING,COMPLIED
6219,6230 S DORCHESTER AVE,2020-01-16,ELEVATOR,PERMIT,PASSED,Failed to maintain hydraulic elevator equipmen...,MAINTAIN OR REPAIR HYDRO ELEVA,,REMOVE/ COVER EXPOSED PIPING IN HOISTWAY( ASBE...,COMPLIED
6220,6230 S DORCHESTER AVE,2020-01-16,ELEVATOR,PERMIT,PASSED,Failed to maintain hydraulic elevator equipmen...,MAINTAIN OR REPAIR HYDRO ELEVA,,"CANT ALL LEDGES OVER 4"" IN HOISTWAY",COMPLIED


In [13]:
permit.value_counts("VIOLATION DESCRIPTION")

VIOLATION DESCRIPTION
MAINTAIN OR REPAIR ELECT ELEVA    454
MAINTAIN OR REPAIR HYDRO ELEVA     86
ARRANGE COMPLETED PERMIT INSP.     57
OPEN                               52
ARRANGE FOR RE-INSPECTION          32
SEE INSPECTOR COMMENTS             15
NONCOMPLIANT PERSONNEL HOIST       14
APPROVE PLANS ON SITE              10
REPAIR PORCH SYSTEM                 6
CONTRARY TO PLANS/OUT OF SCOPE      5
COMPLETE WORK                       4
SUBMIT REVISED PLANS                3
PRV HIGH SIDE                       3
PLANS & PERMITS REQ - CONTRCTR      3
ACCESS TO ROOF                      3
FOLLOW PLANS/RESUBMIT REVISED       3
PROVIDE                             2
INSTALL VENT/AS SHOWN ON PLANS      2
SUPPLY HOT WTR MIN TEMP 120DEG      2
INSPECT CONSTRUCTION WORK           2
ARRANGE PERMIT INSP.                2
BLANK-TEXT MUST CITE CODE SECT      1
APRV REQD TRANSPORTATION. PLEA      1
SUBMIT PLANS/GET VENT PERMIT        1
ARRANGE FOR INSPECTION              1
REPLACE DEFECTIVE TRAP      

In [14]:
no_permit = df[~df["INSPECTION CATEGORY"].eq("PERMIT")]
no_permit

Unnamed: 0,ADDRESS,VIOLATION DATE,DEPARTMENT BUREAU,INSPECTION CATEGORY,INSPECTION STATUS,VIOLATION ORDINANCE,VIOLATION DESCRIPTION,VIOLATION LOCATION,VIOLATION INSPECTOR COMMENTS,VIOLATION STATUS
0,6100 S DR MARTIN L KING JR DR,2025-04-18,CONSERVATION,COMPLAINT,FAILED,Arrange for inspection of premises. (13-12-100),ARRANGE PREMISE INSPECTION,OTHER : :OTHER,NO ENTRY TO BASEMENT.,OPEN
1,1448 E 53RD ST,2025-04-18,SPECIAL INSPECTION PROGRAM,PERIODIC,FAILED,Failed to maintain exterior stairways in safe ...,REPAIR EXTERIOR STAIR,OTHER,FRONT ELEVATION - SOUTHWEST ENTRY STAIR /HANDR...,OPEN
2,1448 E 53RD ST,2025-04-18,SPECIAL INSPECTION PROGRAM,PERIODIC,FAILED,Failed to maintain roof in sound condition and...,REPAIR ROOF,OTHER,SOUTH AND EAST ELEVATIONS /ROOF SHINGLES - LOO...,OPEN
3,1448 E 53RD ST,2025-04-18,SPECIAL INSPECTION PROGRAM,PERIODIC,FAILED,pending notice reinspection,ARRANGE FOR REINSPECTION REGAR,OTHER,INTERIOR OF BUILDING /BASEMENT AND FELLOWSHIP ...,OPEN
4,1448 E 53RD ST,2025-04-18,SPECIAL INSPECTION PROGRAM,PERIODIC,FAILED,Failed to maintain windows in sound condition ...,MAINTAIN WINDOW,OTHER,SOUTH AND WEST ELEVATION - ROSE WINDOW WITH WO...,OPEN
...,...,...,...,...,...,...,...,...,...,...
6244,835 E DREXEL SQUARE DR,2020-01-06,CONSERVATION,COMPLAINT,FAILED,"Provide and maintain every facility, piece of ...",SAFE WORKING CONDITIONS,OTHER : :OTHER,THE BOILER FOR THIS 3 UNIT BUILDING IS INOPERA...,OPEN
6245,835 E DREXEL SQUARE DR,2020-01-06,CONSERVATION,COMPLAINT,FAILED,Supply adequate hot water with minimum tempera...,SUPPLY HOT WTR MIN TEMP 120DEG,OTHER : :OTHER,THE WATER HEATER IS INOPERABLE AS WELL AS THER...,OPEN
6246,835 E DREXEL SQUARE DR,2020-01-06,CONSERVATION,COMPLAINT,FAILED,"Provide kitchen sink, bathroom sink, and/or ba...",PROVIDE COLD WATER TO FICTURES,OTHER : :OTHER,THERE IS NO WATER SERVICE TO THIS BUILDING.,OPEN
6247,5401 S CORNELL AVE,2020-01-03,ELEVATOR,PERIODIC,FAILED,Failed to maintain electric elevator equipment...,MAINTAIN OR REPAIR ELECT ELEVA,,UPDATE ALL APPROPIATE PAPERWORK TO REFLECT CUR...,OPEN


## complaints only

In [183]:
complaints = df[df["INSPECTION CATEGORY"].eq("COMPLAINT")]
complaints

Unnamed: 0,ADDRESS,VIOLATION ID,VIOLATION DATE,DEPARTMENT BUREAU,INSPECTION ID,INSPECTION CATEGORY,INSPECTION STATUS,VIOLATION ORDINANCE,VIOLATION DESCRIPTION,VIOLATION LOCATION,VIOLATION INSPECTOR COMMENTS,VIOLATION STATUS
0,6100 S DR MARTIN L KING JR DR,7320435,2025-04-18,CONSERVATION,14649807,COMPLAINT,FAILED,Arrange for inspection of premises. (13-12-100),ARRANGE PREMISE INSPECTION,OTHER : :OTHER,NO ENTRY TO BASEMENT.,OPEN
5,6100 S DR MARTIN L KING JR DR,7320437,2025-04-18,CONSERVATION,14649807,COMPLAINT,FAILED,Performed or allowed work to be performed with...,PLANS & PERMITS REQ - CONTRCTR,OTHER : :OTHER,ANY REPAIRS OR REPLACEMENT OF FLOOR STRUCTURE ...,OPEN
6,6100 S DR MARTIN L KING JR DR,7320436,2025-04-18,CONSERVATION,14649807,COMPLAINT,FAILED,Failed to maintain floor free from holes and w...,REPAIR FLOOR,OTHER : :OTHER,FLOOR IN RETAIL SPACE HAS DEFLECTION IN VARIOU...,OPEN
42,4838 S CHAMPLAIN AVE,7319476,2025-04-15,CONSERVATION,14477739,COMPLAINT,FAILED,Failed to maintain roof downspouts in good rep...,REPAIR DOWNSPOUT,EAST ELEVATION / FRONT OF COACH HOUSE /,DOWNSPOUT - MISSING / DETACHED AT THE TIME OF ...,OPEN
44,6025 S DR MARTIN L KING JR DR,7318352,2025-04-14,CONSERVATION,14648642,COMPLAINT,FAILED,Failed to repair or replace defective or missi...,REPAIR PORCH SYSTEM,EXTERIOR:E :,REAR PORCH - FOUNDATION STATUS UNKNOWN; STRING...,OPEN
...,...,...,...,...,...,...,...,...,...,...,...,...
6242,835 E DREXEL SQUARE DR,6409226,2020-01-06,CONSERVATION,13064864,COMPLAINT,FAILED,Install carbon monoxide detector within 40 fee...,CARB MONOX DETECT IN RESID,OTHER : :OTHER,THERE ARE NO CARBON MONOXIDE DETECTORS IN THIS...,OPEN
6243,835 E DREXEL SQUARE DR,6409227,2020-01-06,CONSERVATION,13064864,COMPLAINT,FAILED,Heat dwelling unit adequately from September 1...,HEAT UNIT ADEQUATELY,OTHER : :OTHER,THE 1ST FLOOR IS THE ONLY (OWNER) OCCUPIED UNI...,OPEN
6244,835 E DREXEL SQUARE DR,6409228,2020-01-06,CONSERVATION,13064864,COMPLAINT,FAILED,"Provide and maintain every facility, piece of ...",SAFE WORKING CONDITIONS,OTHER : :OTHER,THE BOILER FOR THIS 3 UNIT BUILDING IS INOPERA...,OPEN
6245,835 E DREXEL SQUARE DR,6409229,2020-01-06,CONSERVATION,13064864,COMPLAINT,FAILED,Supply adequate hot water with minimum tempera...,SUPPLY HOT WTR MIN TEMP 120DEG,OTHER : :OTHER,THE WATER HEATER IS INOPERABLE AS WELL AS THER...,OPEN


### unique HP adddress with complaints

In [19]:
complaints["ADDRESS"].nunique()

480

## 5514 S Blackstone

In [15]:
df_5514 = df[df["ADDRESS"].str.startswith("5514 S BLACKSTONE AVE")]
# .loc[lambda df: df["INSPECTION CATEGORY"].eq("COMPLAINT")]
df_5514_complaints = df_5514[df_5514["INSPECTION CATEGORY"].eq("COMPLAINT")]
df_5514_complaints.head()

Unnamed: 0,ADDRESS,VIOLATION DATE,DEPARTMENT BUREAU,INSPECTION CATEGORY,INSPECTION STATUS,VIOLATION ORDINANCE,VIOLATION DESCRIPTION,VIOLATION LOCATION,VIOLATION INSPECTOR COMMENTS,VIOLATION STATUS
1520,5514 S BLACKSTONE AVE,2024-01-16,CONSERVATION,COMPLAINT,PASSED,Heat dwelling unit adequately from September 1...,HEAT UNIT ADEQUATELY,OTHER : :OTHER,"102 - LOIVINGO ROOM TEMP - 43F, BEDROOM - 49F;...",OPEN
1521,5514 S BLACKSTONE AVE,2024-01-16,CONSERVATION,COMPLAINT,PASSED,Failed to maintain windows in relation to the ...,WINDOW REPAIR FOR AIR SEEPAGE,OTHER : :OTHER,"102, 103, AND 104 - WINDOWS DRAFTY WITH AIR SE...",COMPLIED
1522,5514 S BLACKSTONE AVE,2024-01-16,CONSERVATION,COMPLAINT,FAILED,Failed to maintain interior walls and ceilings...,REPAIR INTERIOR WALLS/CEILING,OTHER : :OTHER,1ST FLOOR / MAIL BOX AREA - HOLE IN CEILING,OPEN
1523,5514 S BLACKSTONE AVE,2024-01-16,CONSERVATION,COMPLAINT,PASSED,Repair or replace defective or out of service ...,REPAIR/REPLACE SMOKE DETECTORS,OTHER : :OTHER,104 - SMOKE DETECTOR - OUT OF SERVICE.,COMPLIED
1524,5514 S BLACKSTONE AVE,2024-01-16,CONSERVATION,COMPLAINT,PASSED,Supply adequate hot water with minimum tempera...,SUPPLY HOT WTR MIN TEMP 120DEG,OTHER : :OTHER,"102, 103, 104 AND 205 - HOT WATER TEMPERATURE ...",COMPLIED


In [23]:
report_5514 = generate_report_from_df(df_5514_complaints)
report_5514

'This apartment received a total of 13 complaints. On 2024-01-16, it received a complaint that it violates city ordiance "HEAT UNIT ADEQUATELY". The inspector commented: "102 - LOIVINGO ROOM TEMP - 43F, BEDROOM - 49F; 103 - LIVING ROOM TEMP - 57F, BEDROOM - 61F; 104 - LIVING ROOM / BEDROOM TEMP - 49F. HEATING SYSTEM IS OFF AT TIME OF INSPECTION.". On 2024-01-16, it received a complaint that it violates city ordiance "WINDOW REPAIR FOR AIR SEEPAGE". The inspector commented: "102, 103, AND 104 - WINDOWS DRAFTY WITH AIR SEEPAGE.". On 2024-01-16, it received a complaint that it violates city ordiance "REPAIR INTERIOR WALLS/CEILING". The inspector commented: "1ST FLOOR / MAIL BOX AREA - HOLE IN CEILING". On 2024-01-16, it received a complaint that it violates city ordiance "REPAIR/REPLACE SMOKE DETECTORS". The inspector commented: "104 - SMOKE DETECTOR - OUT OF SERVICE.". On 2024-01-16, it received a complaint that it violates city ordiance "SUPPLY HOT WTR MIN TEMP 120DEG". The inspector co

## 5220 S HARPER AVE

In [32]:
generate_report_from_address_keyword("5132 S CORNELL")

'This apartment received a total of 9 complaints. On 2024-04-25, it received a complaint that it violates city ordiance "OBSTRUCTIONS IN EXIT WAY". The inspector commented: "SOUTH - GARBAGE RECEPTACLES STORED IN THRU-CORRIDOR OBSTRUCTING EXITWAY; WEST/#5128-30 REAR PORCH - GARBAGE BAGS, CONCRETE BLOCK STORED ON 1ST FLOOR DECK; BBQ GRILL ON 2ND FLOOR DECK; CHAIR ON 3RD FLOOR DECK; OBSTRUCTIONS". On 2024-04-25, it received a complaint that it violates city ordiance "REPAIR EXTERIOR WALL". The inspector commented: "EXTERIOR WALLS - WASHED OUT MORTAR AT 3RD FLOOR LEVEL OF REAR PORCH NEAR BEAM POCKETS.". On 2024-04-25, it received a complaint that it violates city ordiance "REPAIR PORCH SYSTEM". The inspector commented: "REAR PORCHES (BOTH) - FOUNDATION STATUS UNKNOWN; 6X8 BEAMS ROTTING AT 3RD FLOOR; SCAB WOOD AT BEAM/COLUMN CONNECTIONS AT 3RD FLOOR; OPEN BEAM POCKETS AT 3RD FLOOR; PAINT PEELING AT 3RD FLOOR CEILINGS; #5132 REAR PORCH - LOOSE TREADS FROM 2ND TO 3RD FLOOR; LOOSE, CURLING DEC

In [42]:
address = "5801 S DORCHESTER AVE"
df_5801 = generate_df_from_address_keyword(address)
generate_report_from_address_keyword(address)

'This apartment received a total of 12 complaints. On 2025-01-22, it received a complaint that it violates city ordiance "WINDOW REPAIR FOR AIR SEEPAGE". The inspector commented: "5801 APT A2 / WINDOW FRAMES - GAPS, DRAFTY WINDOWS, AIR SEEPAGE. 14X-3-303.13.". On 2025-01-22, it received a complaint that it violates city ordiance "UNAPPROVED HEATING DEVICE". The inspector commented: "5801 APT A2 / USING COOKING STOVE AS HEATING DEVICE. 14X-8-802.2.". On 2025-01-22, it received a complaint that it violates city ordiance "REPAIR/REPLACE SMOKE DETECTORS". The inspector commented: "5801 APT A2 / SMOKE DETECTOR - MISSING BATTERIES. 14X-5-504.8.". On 2025-01-22, it received a complaint that it violates city ordiance "HEAT UNIT ADEQUATELY". The inspector commented: "5801 APT A / LIVING ROOM TEMP - 60 DEGREES AT 1112 AM. KITCHEN TEMP - 60 DEGREES AT 1112 AM. HEATING SYSTEM IS ON AT TIME OF INSPECTION, HEAT IS INSUFFICIENT. PROVIDE PERMANENT HEATING EQUIPMENT CAPABLE OF MAINTAINING A ROOM TEMPER

# LLM Summary

In [140]:
address = "5514 S Blackstone"

In [131]:
prompt = """
Your goal is to summarize long inspection records for an apartment building into a succinct report with a one-line summary as well as bullet points of issues spotted on each occasion. You will be provided a concatenated string of the inspection records, and you will output a single json object that looks like the following example (you should NOT wrap the returned JSON object within any markdown markers such as ```json):

{
  "summary": "This building has received recent complaints regarding <> and <> concerns, including inadequate heat in at least one unit and multiple violations related to <>, which may pose potential <> risks",
  "note": "Some issues on <> are omitted for brevity."
  "summarized_issues": [
    {
      "date": "Jan 2025", // following the Mon YYYY format
      "issues": [
        {
          "emoji": "🧊", // use a relevant emoji and if possible a different one for each issue
          "description": "Insufficient heating (60°F in living room and kitchen) in Unit a, b, c" //
        },
        {
          "emoji": "🚿",
          "description": "Low hot water pressure and substandard temperature (as low as 45°F) in Units a, b, c"
        }
      ]
    },'
    {
      "date": "Mar 2024",
      "issues": []
        {
          "emoji": "🎨",
          "description": "Graffiti and overflowing trash in rear of building"
        },
        {
          "emoji": "🚪",
          "description": "Multiple apartment and hallway doors jammed, missing, or propped open with wedges—posing potential fire safety risks"
        }
      ]
    }
  ]
}

Use a more neural tone that resembles an inspector or assessor but is less esoteric and more appropriate -- one that is appropriate for presentation on an app that helps tenants find apartments. Each summarized issue description should be a short bullet-point summary that, when available, also specifies the unit number or building area at the end.

In the summary, please omit any inspection records on the following issues:
- unauthorized use of space as an event venue
- inspector denied entry or need for re-inspecton in the records
- any other issues that do not impact quality of life and likely not of interest to future tenants"""

print(prompt)


Your goal is to summarize long inspection records for an apartment building into a succinct report with a one-line summary as well as bullet points of issues spotted on each occasion. You will be provided a concatenated string of the inspection records, and you will output a single json object that looks like the following example (you should NOT wrap the returned JSON object within any markdown markers such as ```json):

{
  "summary": "This building has received recent complaints regarding <> and <> concerns, including inadequate heat in at least one unit and multiple violations related to <>, which may pose potential <> risks",
  "note": "Some issues on <> are omitted for brevity."
  "summarized_issues": [
    {
      "date": "Jan 2025", // following the Mon YYYY format
      "issues": [
        {
          "emoji": "🧊", // use a relevant emoji and if possible a different one for each issue
          "description": "Insufficient heating (60°F in living room and kitchen) in Unit a, 

In [40]:
from dotenv import load_dotenv
import os

load_dotenv()

openrouter_api_key = os.getenv("OPENROUTER_API_KEY")

client = OpenAI(
    base_url="https://openrouter.ai/api/v1",
    api_key=openrouter_api_key,
)

In [145]:
def clean_json_string(json_string):
    """
    Remove markdown markers i.e. ```json from the json string
    """
    pattern = r"^```json\s*(.*?)\s*```$"
    cleaned_string = re.sub(pattern, r"\1", json_string, flags=re.DOTALL)
    return cleaned_string.strip()

In [178]:
def llm_summarize_from_address_keyword(kw: str) -> dict:
    completion = client.chat.completions.create(
        model="deepseek/deepseek-prover-v2:free",
        response_format={"type": "json_schema"},
        messages=[
            {"role": "user", "content": prompt},
            {"role": "user", "content": generate_report_from_address_keyword(kw)},
        ],
    )
    # raw_return still may contain markdown markers i.e. ```json
    try:
        raw_return = completion.choices[0].message.content
        parsed_return = json.loads(clean_json_string(raw_return))
        return parsed_return
    except Exception as e:
        print(f"Error parsing JSON: {e}")
        return {}

In [158]:
json_eg = llm_summarize_from_address_keyword(address)
print(json_eg)

{'summary': "This building has received complaints regarding inadequate heating, plumbing issues, and structural concerns, posing risks to tenants' comfort and safety. Recent violations highlight heating and water system deficiencies, structural deterioration, and the need for comprehensive repairs and permits.", 'note': 'Some issues which do not directly impact quality of life are omitted for brevity.', 'summarized_issues': [{'date': 'Jan 2025', 'issues': [{'emoji': '❄️', 'description': 'Broken furnace and inadequate heating (66°F) in the building'}, {'emoji': '🛠️', 'description': 'Heating pipes disconnected in the building'}]}, {'date': 'Nov 2024', 'issues': [{'emoji': '🚰', 'description': 'Defective waste piping in Units 6042 (1st and 2nd floors)'}, {'emoji': '🛁', 'description': 'Leaking water from ceiling in bathroom of Unit 6042 (1st floor)'}, {'emoji': '🛀', 'description': 'Insufficient hot water (below 120°F) throughout the building'}]}, {'date': 'Aug 2024', 'issues': [{'emoji': '

In [151]:
json_parsed = clean_json_string(json_eg)
type(json.loads(json_parsed))

dict

In [97]:
# create all unique HP addresses:
all_unique_hp_addresses_with_complaints_since_2020 = complaints["ADDRESS"].unique()
all_unique_hp_addresses_with_complaints_since_2020

array(['6100 S DR MARTIN L KING JR DR', '4838 S CHAMPLAIN AVE',
       '6025 S DR MARTIN L KING JR DR', '1418 E HYDE PARK BLVD',
       '716 E 47TH ST', '1167 E 52ND ST', '6137 S EVANS AVE',
       '6211 S DORCHESTER AVE', '6015 S HARPER AVE',
       '5140 S HYDE PARK BLVD', '860 E 63RD ST', '5118 S DORCHESTER AVE',
       '645 E 62ND ST', '4809 S FORRESTVILLE AVE', '5454 S EVERETT AVE',
       '4916 S DREXEL BLVD', '6201 S CHAMPLAIN AVE', '6241 S DREXEL AVE',
       '6148 S VERNON AVE', '6236 S EVANS AVE', '5445 S INGLESIDE AVE',
       '6045 S DR MARTIN L KING JR DR', '6130 S EBERHART AVE',
       '6200 S ST LAWRENCE AVE', '5124 S GREENWOOD AVE',
       '5540 S HYDE PARK BLVD', '6127 S EVANS AVE', '5464 S ELLIS AVE',
       '6126 S WOODLAWN AVE', '6149 S RHODES AVE', '6132 S EBERHART AVE',
       '6042 S VERNON AVE', '707 E 62ND ST', '4830 S DREXEL BLVD',
       '4940 S EAST END AVE', '5100 S ELLIS AVE',
       '6134 S ST LAWRENCE AVE', '1224 E 52ND ST',
       '5801 S DORCHESTER AVE

In [106]:
# Testing on a few examples
for address in all_unique_hp_addresses_with_complaints_since_2020[:1]:
    summary_json = llm_summarize_from_address_keyword(address)
    print(f"ADDRESS: {address}\nSUMMARY: {summary_json}")
    print("\n----------------------------\n")

ADDRESS: 6100 S DR MARTIN L KING JR DR
SUMMARY: ```json
{
  "summary": "This building has received multiple complaints regarding heating, plumbing, electrical, and structural issues, including inadequate heat, plumbing violations, electrical hazards, and uneven flooring posing trip hazards in various units and common areas",
  "note": "Some issues on denied entry, re-inspection, or deadends are omitted for brevity.",
  "summarized_issues": [
    {
      "date": "Apr 2025",
      "issues": [
        {
          "emoji": "🧊",
          "description": "No entry to basement (possible lack of fire separation)"
        },
        {
          "emoji": "📋",
          "description": "Plans and permits required for repairs or replacement of the floor structure (retail space)"
        },
        {
          "emoji": "🔧",
          "description": "Floors in retail space have damage and require repairs (deflection, broken tiles, damaged subfloors)"
        }
      ]
    },
    {
      "date": "May 

In [179]:
# Load existing data if the file exists
try:
    with open("inspection_summaries.json", "r") as file:
        results = json.load(file)
except FileNotFoundError:
    results = {}

# Load existing addresses with issues if the file exists
try:
    with open("addresses_with_issues.json", "r") as file:
        addresses_with_issues = json.load(file)
except FileNotFoundError:
    addresses_with_issues = []

for idx, address in enumerate(all_unique_hp_addresses_with_complaints_since_2020[64:]):
    print(f"Processing address no. {idx}: {address}")
    try:
        summary_json = llm_summarize_from_address_keyword(address)
        results[address] = summary_json
        # Write the updated results back to the file
        with open("inspection_summaries.json", "w") as file:
            json.dump(results, file, indent=4)
    except Exception as e:
        print(f"Error processing address {address}: {e}")
        addresses_with_issues.append(address)
        continue

# Write the addresses with issues back to the file
with open("addresses_with_issues.json", "w") as file:
    json.dump(addresses_with_issues, file, indent=4)

Processing address no. 0: 5325 S COTTAGE GROVE AVE
Error parsing JSON: 'NoneType' object is not subscriptable
Processing address no. 1: 6040 S HARPER AVE
Processing address no. 2: 4917 S DREXEL BLVD
Processing address no. 3: 727 E 60TH ST
Processing address no. 4: 6158 S DR MARTIN L KING JR DR
Processing address no. 5: 6126 S GREENWOOD AVE
Processing address no. 6: 6211 S KIMBARK AVE
Processing address no. 7: 4900 S DREXEL BLVD
Processing address no. 8: 6253 S COTTAGE GROVE AVE
Processing address no. 9: 6103 S WOODLAWN AVE
Processing address no. 10: 5496 S HYDE PARK BLVD
Processing address no. 11: 5336 S HYDE PARK BLVD
Processing address no. 12: 5130 S DREXEL AVE
Processing address no. 13: 4751 S VINCENNES AVE
Processing address no. 14: 724 E 50TH PL
Processing address no. 15: 6107 S ST LAWRENCE AVE
Processing address no. 16: 5019 S DREXEL BLVD
Error parsing JSON: Expecting value: line 1 column 1 (char 0)
Processing address no. 17: 6206 S RHODES AVE
Processing address no. 18: 6140 S DR

In [168]:
returned = llm_summarize_from_address_keyword("5002 S CHAMPLAIN AVE")

In [180]:
addresses_with_issues

[]