In [96]:
import csv
import json

def clean(text):
    return text.strip().replace('â€™', "'").replace('Â', '') if text else ""

In [97]:
# Input and output paths
csv_path = "Constitution_Schedules.csv"
json_path = "part_Constitution_Schedules.json"

filename = "Constitution_Schedules.pdf"

In [98]:
# Data structure to hold all pages
pages_dict = {}

In [99]:
with open(csv_path, newline='', encoding='utf-8') as csvfile:
    reader = csv.DictReader(csvfile)
    
    for row in reader:
        page_num = int(clean(row['Page']))  # assuming this column exists
        part = clean(row['Part'])
        part_title = clean(row['Chapter'])
        art_num = clean(row['Article_Number'])
        art_title = clean(row['Article_Title'])
        clause_num = clean(row['Clause_Number'])
        clause_text = clean(row['Clause_Text'])
        sub_clause_letter = clean(row['Sub_Clause_Letter'])
        sub_clause_text = clean(row['Sub_Clause_Text'])
        amendment = clean(row['Amendments'])
        annotation = clean(row['Annotations'])

        # Initialize page if not exists
        if page_num not in pages_dict:
            pages_dict[page_num] = {
                "page_number": page_num,
                "content": {
                    "part": part,
                    "title": part_title,
                    "articles": [],
                    "annotations": []
                }
            }

        page = pages_dict[page_num]
        articles = page["content"]["articles"]
        annotations = page["content"]["annotations"]

        # Check if annotation for the page exists and add if new
        if annotation and annotation not in annotations:
            annotations.append(annotation)

        # Find article by number or create new
        article = next((a for a in articles if a["number"] == art_num), None)
        if article is None:
            article = {
                "number": art_num,
                "title": art_title,
            }
            # Only add 'clauses' if clauses exist for this article
            if clause_num:
                article["clauses"] = []
            articles.append(article)

        # If clause number exists
        if clause_num != "":
            # If sub clause letter is empty, it's a main clause
            if sub_clause_letter == "":
                clause_obj = {
                    "number": f"({clause_num})" if not clause_num.startswith("(") else clause_num,
                    "text": clause_text
                }
                if amendment:
                    clause_obj["amendments"] = [amendment]
                article.setdefault("clauses", []).append(clause_obj)
            else:
                # It's a sub-clause, attach to last clause in article
                if "clauses" in article and article["clauses"]:
                    last_clause = article["clauses"][-1]
                    last_clause.setdefault("sub_clauses", []).append({
                        "letter": sub_clause_letter,
                        "text": sub_clause_text
                    })

In [100]:
# Convert pages dict to sorted list by page number
pages_list = [pages_dict[k] for k in sorted(pages_dict)]

# Final JSON structure
final_json = {
    "filename": filename,
    "pages": pages_list
}

In [101]:
# Write JSON to file
with open(json_path, "w", encoding="utf-8") as jsonfile:
    json.dump(final_json, jsonfile, indent=2, ensure_ascii=False)

print(f"✅ JSON successfully written to {json_path}")


✅ JSON successfully written to part_Constitution_Schedules.json
