# 🔐 PII Detection Playground

Detect personally identifiable information (PII) using NER models and LLMs.

## 📄 Load Sample Document

In [None]:
with open("data/sample_doc.txt") as f:
    doc_text = f.read()

print(doc_text)


## 🤖 Detect PII with spaCy NER

In [None]:
import spacy

nlp = spacy.load("en_core_web_sm")
doc = nlp(doc_text)

for ent in doc.ents:
    if ent.label_ in ["PERSON", "ORG", "GPE", "EMAIL", "DATE"]:
        print(f"{ent.text} ({ent.label_})")


## 🧠 Detect PII with LLM (Zero-Shot Prompting)

In [None]:
from openai import OpenAI
openai = OpenAI()

prompt = f"""Identify and list all PII from the text below:

Text:
{doc_text}

List PII items only:
"""

response = openai.chat.completions.create(
    model="gpt-4",
    messages=[{"role": "user", "content": prompt}]
)

print(response.choices[0].message.content.strip())
