# 法律語法形式化
- version: 20250313
- spec version: 3.1
- history:
    - 20250312: 讀寫 json, 基本支援 LLM 建構 json
    - 20250313: 用 code 產生小架構的法規，增加管理功能
## 起始化-物件與functions

In [None]:
import json
from enum import Enum
import re

dir_txt = "txt"
dir_json = "json"
def get_law_names_from_directory(directory_path):
    """
    從目錄中取得所有法規名稱的列表
    :param directory_path: 目錄的路徑
    :return: 法規名稱的列表
    """
    law_names = set()
    for filename in os.listdir(directory_path):
        if filename.endswith("_law_regulation.json"):
            law_name = filename.split("_law_regulation")[0]
            law_names.add(law_name)
    return list(law_names)
def handle_regex(regex,file_path,type="col2"): 
    """
    return lines
    """
    print(f"parse file_path:{file_path}")
    with open(file_path, 'r', encoding='utf-8') as file:
            test_str = file.read()
    #print(test_str)
    matches = re.finditer(regex, test_str, re.DOTALL) #re.MULTILINE
    lines = []
    if type=="col2": #regex="\*\*Q：\*\*(.*)\n\*\*A：\*\*(.*)\n"
        for matchNum, match in enumerate(matches, start=1):
            for groupNum in range(0, len(match.groups())):
                groupNum = groupNum + 1
                mark = "Q" if groupNum==1 else "A"
                group = match.group(groupNum).replace("*","")
                print_str = f"{mark}:{group}"
                print(print_str ) 
                lines.append(print_str)
    if type=="col1":
        
        for matchNum, match in enumerate(matches, start=1):
            
            for groupNum in range(0, len(match.groups())):
                groupNum = groupNum + 1
                group = match.group(groupNum)
                print_str = f"{group}"
                #print(print_str ) 
                lines.append(print_str)
    return lines    

# Helper function: Load JSON data from file
def load_json_data(filepath):
    try:
        with open(filepath, 'r', encoding='utf-8') as f:
            return json.load(f)
    except FileNotFoundError:
        print(f"File not found: {filepath}")
        return None
    except json.JSONDecodeError:
        print(f"JSON format error: {filepath}")
        return None
    
class ConceptCategory(Enum):
    """Legal concept categories enumeration."""
    CORE_CONCEPT_DEFINITION = "核心概念 - 定義" # You can keep Chinese values for display if needed
    # ... 其他類別 (other categories)

class LawMetadata:
    """
    Law MetaData object, used to integrate different types of law information.
    """
    def __init__(self, law_name = None, law_regulation=None, legal_concepts=None, hierarchy_relations=None, law_relations=None, law_articles=None):
        """
        Initializes LawMetaData object.

        Args:
            law_regulation (dict, optional): Law regulation metadata, expected as a single dict. Defaults to None.
            legal_concepts (list of dict, optional): Legal concepts metadata, expected as a list of dict. Defaults to None.
            hierarchy_relations (list of dict, optional): Hierarchy relations metadata, expected as a list of dict. Defaults to None.
            law_relations (list of dict, optional): Law relations metadata, expected as a list of dict. Defaults to None.
            law_articles (list of dict, optional): Law articles metadata, expected as a list of dict. Defaults to None.
        """
        self.law_name = law_name or "NA"
        self.law_regulation = law_regulation or {}
        self.legal_concepts = legal_concepts or []
        self.hierarchy_relations = hierarchy_relations or []
        self.law_relations = law_relations or []
        self.law_articles = law_articles or []
        self.law_name = self.law_regulation.get("法規名稱", "default_law") # Default prefix if not found
        self.short_name = self.law_name


    @classmethod
    def from_json_files(cls, law_regulation, legal_concepts, hierarchy_relations, law_relations, law_articles):
        """
        Reads law MetaData from separate JSON files.

        Args:
            law_regulation (str): Filepath for law regulation metadata JSON file.
            legal_concepts (str): Filepath for legal concepts metadata JSON file.
            hierarchy_relations (str): Filepath for hierarchy relations metadata JSON file.
            law_relations (str): Filepath for law relations metadata JSON file.
            law_articles (str): Filepath for law articles metadata JSON file.

        Returns:
            LawMetaData: LawMetaData object, or None if any file reading fails.
        """
        law_regulation_data = load_json_data(law_regulation)
        legal_concepts_data = load_json_data(legal_concepts)
        hierarchy_relations_data = load_json_data(hierarchy_relations)
        law_relations_data = load_json_data(law_relations)
        law_articles_data = load_json_data(law_articles)

        if law_regulation_data is None or legal_concepts_data is None or hierarchy_relations_data is None or law_relations_data is None or law_articles_data is None:
            return None

        return cls(
            law_regulation=law_regulation_data, # Still using Chinese keys for data structure consistency
            legal_concepts=legal_concepts_data,
            hierarchy_relations=hierarchy_relations_data,
            law_relations=law_relations_data,
            law_articles=law_articles_data
        )
    



    def to_json_files(self, output_prefix="gpa"):
        """
        Exports LawMetaData object to separate JSON files with law name prefix.

        Args:
            output_prefix (str, optional): Prefix for output filenames. Defaults to "gpa".
        """
        if not output_prefix:
            output_prefix = self.law_name # Fallback to law prefix if output_prefix is not provided

        filepaths = {
            "law_regulation": f"{output_prefix}_law_regulation.json",
            "legal_concepts": f"{output_prefix}_legal_concepts.json",
            "hierarchy_relations": f"{output_prefix}_hierarchy_relations.json",
            "law_relations": f"{output_prefix}_law_relations.json",
            "law_articles": f"{output_prefix}_law_articles.json"
        }

        data_to_export = {
            "law_regulation": self.law_regulation,
            "legal_concepts": [
                {**concept, "概念類別": concept["概念類別"].value if isinstance(concept.get("概念類別"), ConceptCategory) else concept.get("概念類別")}
                for concept in self.legal_concepts
            ],
            "hierarchy_relations": self.hierarchy_relations,
            "law_relations": self.law_relations,
            "law_articles": self.law_articles
        }

        for key, filepath in filepaths.items():
            with open(filepath, 'w', encoding='utf-8') as f:
                json.dump(data_to_export[key], f, indent=2, ensure_ascii=False)

    def renew_id(self):
        law_id = f"LT_{self.law_name}"
        self.law_regulation["代號"]=law_id
        for article in self.law_articles:
            if article["代號"][0:3] != "LA_":
                article_id = f"LA_{self.law_name}_{article['條號']}"
                article["代號"]=article_id
            #print(article)
        for concept in self.legal_concepts:
            if concept["代號"][0:3] != "LC_":
                concept_id = f"LC_{self.law_name}_{concept['詞彙名稱']}"
                concept['代號']=concept_id
        seq = 1
        for relation in self.law_relations:
            if relation["代號"][0:3] != "LR_":
                relation_id = f"LR_{self.law_name}_{seq}"
                relation['代號']=relation_id
            seq += 1
        
        for hierarchy in self.hierarchy_relations:
            if hierarchy["關係代號"][0:3] != "LH_":
                hierarchy_id = f"LH_{self.law_name}_{hierarchy['關聯法規']}"
                hierarchy['關係代號']=hierarchy_id
            



    def __repr__(self):
        return f"LawMetadata(law_name='{self.law_regulation.get('法規名稱', 'N/A')}', concept_count={len(self.legal_concepts)}, ...)"

class LawMetadataMgr:
    def __init__(self):
        self.lms = {}
        self.short_names = {}
        self.dir_json = "json"

    def add_lm(self, lm , short_name=None):
        
        
        if short_name:
            lm.short_name = short_name
        else:
            short_name = lm.law_name
        
        self.short_names[short_name] = short_name
        self.lms[lm.law_name] = lm

    def remove_lm(self, law_name):
        if law_name in self.lms:
            del self.lms[law_name]

    def find_lm(self, law_name):
        return self.lms.get(law_name, None)

    def load_lm_bynames(self, short_names):
        for short_name in short_names:
            lm = LawMetadata.from_json_files(
                f"{self.dir_json}/{short_name}_law_regulation.json",
                f"{self.dir_json}/{short_name}_legal_concepts.json",
                f"{self.dir_json}/{short_name}_hierarchy_relations.json",
                f"{self.dir_json}/{short_name}_law_relations.json",
                f"{self.dir_json}/{short_name}_law_articles.json"
            )
            if lm:
                self.add_lm(lm,short_name)
    def export_all_to_json(self, output_prefix="all_laws"):
        for law_name, lm in self.lms.items():
            lm.to_json_files(output_prefix=f"{output_prefix}_{law_name}")

    def __repr__(self):
        return f"LawMetadataManager(law_count={len(self.lms)})"




## 單法規使用

In [None]:

law_name = "政府採購法施行細則" # Or dynamically extract from law_regulation.json if needed

filepaths = {
    "law_regulation": f"{dir_json}/{law_name}_law_regulation.json",
    "legal_concepts": f"{dir_json}/{law_name}_legal_concepts.json",
    "hierarchy_relations": f"{dir_json}/{law_name}_hierarchy_relations.json",
    "law_relations": f"{dir_json}/{law_name}_law_relations.json",
    "law_articles": f"{dir_json}/{law_name}_law_articles.json"
}


# 1. Read from separate JSON files and create LawMetadata object
lm = LawMetadata.from_json_files(**filepaths)


if lm:
    print(lm)

    # 2. Access Metadata content (example)
    print("\nLaw Regulation Name:", lm.law_regulation.get("法規名稱"))
    print("\nFirst Legal Concept Name:", lm.legal_concepts[0].get("詞彙名稱"))

    if 1:
        lm.renew_id()
    # 3. Modify Metadata content (example)
    if 0:
        lm.law_regulation["版本"] = "20250312-Test Version"
        new_concept = {
            "代號": "concept-gpa-new-concept",
            "詞彙名稱": "New Concept",
            "定義": "This is a new concept definition.",
            "相關概念": [],
            "相關法條": [],
            "概念類別": "新增概念",
            "同義詞": [],
            "台灣觀點": "Taiwan Viewpoint.",
            "範例": "Example here.",
            "語意向量": "[]"
        }
        lm.legal_concepts.append(new_concept)
if 1:
    # 4. Export LawMetadata object to separate JSON files
    lm.to_json_files(output_prefix=f"{dir_json}/{law_name}_M") # Exports to gpa_modified_*.json






## 法規管理使用

In [65]:
if 1: #法律管理
    lmmgr = LawMetadataMgr()
    law_names = get_law_names_from_directory("./json")
    #print(law_names)
    #lmmgr.load_lm_bynames(["憲法合併","憲法增修合併","刑法","民法","行政程序法","預算法","政府採購法","政府採購法施行細則"])
    lmmgr.load_lm_bynames(law_names)   
    lms = lmmgr.lms
if 1: #法律列表
    for law_name in lms.keys():
        #print(f"{lms[law_name].law_name}-{lms[law_name].short_name}" )
        print(f"{lms[law_name].law_name}" )
if 0: #法律概念列表
    for law_name in lms.keys():
        print(f"----- {law_name} -----")
        for lc in lms[law_name].legal_concepts:
            print(f"{lc['詞彙名稱']}")
    #print(lms['中華民國憲法(合併增修條文)'].legal_concepts)
if 0: # 某法跟什麼法有關係如 公司法
    target = "公司法"
    for law_name in lms.keys():
        if target in lms[law_name].law_regulation['相關法規']:
            print(f"{law_name} 跟 {target} 有關係")
        #for lr in lms[law_name].law_regulation:
        #    print(lr['相關法規'])
            #print(type(lr))
            #print(lr)
            #if "公司法" in lr['相關法規']:
            #    print(f"{law_name} 跟 公司法 有關係") 


民法
中華民國憲法(合併增修條文)
政府採購法施行細則
憲法增修條文(合併憲法)
中華民國刑法
政府採購法
行政程序法
預算法


## LLM 建構 json
- 由於 LLM 的產出有時遇到小問題，所以使用時暫時需開開關關

In [None]:
import base64
import os
from google import genai
from google.genai import types

def generate(client,files,law_name,user_prompt,file_path):
    model = "gemini-2.0-flash-thinking-exp-01-21"

    if 0:
        contents = [
            types.Content(
                role="user",
                parts=[
                    types.Part.from_uri(
                        file_uri=files[0].uri,
                        mime_type=files[0].mime_type,
                    ),
                    types.Part.from_uri(
                        file_uri=files[1].uri,
                        mime_type=files[1].mime_type,
                    ),
                    types.Part.from_text(text=user_prompt),
                ],
            )
        ]
        generate_content_config = types.GenerateContentConfig(
            temperature=0.7,
            top_p=0.95,
            top_k=64,
            max_output_tokens=65536,
            response_mime_type="text/plain",
            system_instruction=[
                types.Part.from_text(text="""請以台灣人的立場，用繁體中文回答"""),
            ],
        )
        
        print(f"Q::{user_prompt}")

        response_text = ""
        for chunk in client.models.generate_content_stream(
            model=model,
            contents=contents,
            config=generate_content_config,
        ):
            response_text += chunk.text
            #print(chunk.text, end="")
        print(f"A::{response_text}")

        # Define the file path
        #file_path = f"{dir_txt}/{law_name}_{file_postfix}.txt"

        # Write the content to the file
        try:
            with open(file_path, 'w', encoding='utf-8') as file:
                file.write(f"Q::{user_prompt}\n")
                file.write(f"A::{response_text}")
                print(f"Content written to {file_path}")
        except Exception as e:
            print(f"An error occurred: {e}")

api_key=os.environ.get("GEMINI_API_KEY")
law_name="刑法"

if 0: # 設定 LLM, 並上傳檔案
    client = genai.Client(
        
        api_key = api_key,
    )

    files = [
        # Make the file available in local system working directory
        #client.files.upload(file=f"{law_name}.txt"),
        client.files.upload(file=f"{law_name}.md",config={'mime_type':"text/markdown"}),
        client.files.upload(file="法律語法形式化.md",config={'mime_type':"text/markdown"}),

        ]
if 1: # 產生非法條的 Meta data
    prompt_list=[
        ['law_regulation',f"""根據{law_name}的整體資訊，按照法律語法形式化的設計，依照裡面範例格式，產生法規 Meta Data,盡可能詳列資訊，不要省略"""],
        ['legal_concepts',f"""根據{law_name}的整體資訊，按照法律語法形式化的設計，依照裡面範例格式，產生法律概念 Meta Data (Legal Concept Meta Data)，注意並非 法規 Meta Data，請列出全部概念，不要省略"""],
        ['hierarchy_relations',f"""根據{law_name}的整體資訊，按照法律語法形式化的設計，依照裡面範例格式，產生法規階層關係 Meta Data"""],
        ['law_relations',f"""根據{law_name}的整體資訊，按照法律語法形式化的設計，依照裡面範例格式，產生法規關聯性 Meta Data"""]
    ]
    for prompt_pair in prompt_list:
        if 0: # 產生 meta data , .txt
            file_path = f"{dir_txt}/{law_name}_{prompt_pair[0]}.txt"
            generate(client,files,law_name,prompt_pair[1],file_path)
        else: # 從檔案內組合成 .json
            file_path = f"{dir_txt}/{law_name}_{prompt_pair[0]}.txt"
            #print(f"parse file_path:{file_path}")
            regex = r"```(?:json)*\n(.*)```"
            lines = handle_regex(regex,file_path,"col1")
            json_string = "\n".join(lines)
            json_string = re.sub(r'//.*', '', json_string)
            print(json_string)
            json_object = json.loads(json_string)
            print(json_object)
            file_path = f"{dir_json}/{law_name}_{prompt_pair[0]}.json"
            with open(file_path, 'w', encoding='utf-8') as f:
                json.dump(json_object, f, indent=2, ensure_ascii=False)

if 0: # 取得最大條號 ，也可手動填入結果
    prompt_list=[
        ['tmp',f"""根據{law_name}的整體資訊，請問最大的條號是多少？請不要說明，用以下格式回覆：<最大條號>填入結果</最大條號>"""],
    ]
    for prompt_pair in prompt_list:
        file_path = f"{dir_txt}/{law_name}_{prompt_pair[0]}.txt"
        generate(client,files,law_name,prompt_pair[1],file_path)

    lines = handle_regex("A::<最大條號>第\s*(\d+)\s*條</最大條號>",f"{law_name}_tmp.txt","col1")
    print(lines[0])

    max_cnt = int(lines[0])
    
if 0: # 產生法條 Meta Data
    start = 1
    step = 10   
    max_cnt = 100 #start + step -1 
    prompt_list_a = []
    
    for i in range(start,max_cnt+1,step):
        end_cnt = i+step-1
        if end_cnt>max_cnt:
            end_cnt = max_cnt
        prompt_list_a.append([f"article_{i}",f"根據{law_name}的整體資訊，按照法律語法形式化的設計，依照裡面範例格式，產生第{i}條到第{end_cnt}條的法條 Meta Data"])
        #prompt_list_a.append([f"article_{i}",f"根據{law_name}的整體資訊，按照法律語法形式化的設計，依照裡面範例格式，產生中華民國憲法 (合併增修條文)第{i}條到第{end_cnt}條的法條 Meta Data"])
        #prompt_list_a.append([f"article_{i}",f"根據{law_name}的整體資訊，按照法律語法形式化的設計，依照裡面範例格式，產生憲法增修條文(合併憲法)第{i}條到第{end_cnt}條的法條 Meta Data"])

    #prompt_list_a
    for prompt_pair in prompt_list_a:
        file_path = f"{dir_txt}/{law_name}_{prompt_pair[0]}.txt"
        generate(client,files,law_name,prompt_pair[1],file_path)
if 0: #從檔案內組合法條 Meta data
    
    max_cnt = 100 # 手動時需要修改
    step = 10  
    law_articles = []
    for i in range(1,max_cnt+1,step):
        end_cnt = i+step-1
        if end_cnt>max_cnt:
            end_cnt = max_cnt
        file_path = f"{dir_txt}/{law_name}_tmp_{i}.txt"
        #print(f"parse file_path:{file_path}")
        regex = r"```(?:json)*\n(.*)```"
        lines = handle_regex(regex,file_path,"col1")
        json_string = "\n".join(lines)
        json_string = re.sub(r'//.*', '', json_string)
        json_object = json.loads(json_string)
        print(json_object)
        law_articles.extend(json_object)
    
    file_path = f"{dir_json}/{law_name}_law_articles.json"
    with open(file_path, 'w', encoding='utf-8') as f:
        json.dump(law_articles, f, indent=2, ensure_ascii=False)