### 生成されたall_data.jsonに画像のパスを追記するためのプログラム．
#### このプログラムはgutenbergIDを冠したディレクトリと同階層に存在する想定で作成している．

In [7]:
import json
import re

def sanitize_directory_name(name):
    """Sanitize the directory name by replacing or removing invalid characters."""
    # Replace spaces with underscores and remove other non-alphanumeric characters
    return re.sub(r'[^a-zA-Z0-9_]', '', name.replace(' ', '_'))

def add_image_path(objs, gutenberg_id):
    for obj in objs:
        if "subSection" in obj:  # This is a folderData
            add_image_path(obj["subSection"], gutenberg_id)
        elif "nodes" in obj:  # This is a fileData
            section_name = sanitize_directory_name(obj["sectionName"])
            for node in obj["nodes"]:
                node_label = sanitize_directory_name(node['label'])
                image_path = f'/static/visualizer/summarized_data/{gutenberg_id}/sample0/images/{section_name}/{node_label}.png'
                node['image'] = image_path
        else:
            print("Unknown object type found in the data.")

def process_files(gutenbergID):
    for id in gutenbergID:
        file_path = f'./{str(id)}/sample0/all_data.json'
        try:
            with open(file_path, 'r', encoding='utf-8') as file:
                data = json.load(file)
                add_image_path(data, id)
                # Save the modified data
                with open(file_path, 'w', encoding='utf-8') as outfile:
                    json.dump(data, outfile, ensure_ascii=False, indent=4)
        except FileNotFoundError:
            print(f"File not found: {file_path}")

if __name__ == "__main__":
    gutenbergID = [11, 12, 16, 35, 120, 146, 164, 829, 1257, 1661, 18155]
    process_files(gutenbergID)
