## Json Parsing And Processing

In [None]:
import json 
import os 
os.makedirs("data/json_files/",exist_ok=True)


## Json Processing Strategies

In [None]:

from langchain_community.document_loaders import JSONLoader
import json

## MEthod1 : JsonLoader With jq_schema
print("1️⃣ JSONLoader - Extract specific fields")

# Extract employee information
employee_loader = JSONLoader(
    file_path='data/json_files/company_data.json',
    jq_schema='.employees[]',  # jq query to extract each employee
    text_content=False  # Get full JSON objects
)

employee_docs = employee_loader.load()
print(f"Loaded {len(employee_docs)} employee documents")
print(f"First employee: {employee_docs[0].page_content[:200]}...")
print(employee_docs)




In [None]:
# !pip install jq

In [None]:
# Method 2: Custom JSON processing for complex structures
from typing import List
from langchain_core.documents import Document
print("\n2️⃣ Custom JSON Processing")

def process_json_intelligently(filepath: str) -> List[Document]:
    """Process JSON with intelligent flattening and context preservation"""
    with open(filepath, 'r') as f:
        data = json.load(f)
    
    documents = []
    
    # Strategy 1: Create documents for each employee with full context
    for emp in data.get('employees', []):
        content = f"""Employee Profile:
        Name: {emp['name']}
        Role: {emp['role']}
        Skills: {', '.join(emp['skills'])}

        Projects:"""
        for proj in emp.get('projects', []):
            content += f"\n- {proj['name']} (Status: {proj['status']})"
        
        doc = Document(
            page_content=content,
            metadata={
                'source': filepath,
                'data_type': 'employee_profile',
                'employee_id': emp['id'],
                'employee_name': emp['name'],
                'role': emp['role']
            }
        )
        documents.append(doc)

    return documents

In [None]:
process_json_intelligently("data/json_files/company_data.json")