测试和获取从koodo reader导出的csv 文件，将单词内容总结为一个字典，输出给后续处理模块。
csv的位置为：../data/source/xxx/xxx.csv


In [19]:
import csv, json
import pandas
from pathlib import Path

In [None]:
def get_csv_path(dir_path:str = "../../data/source") -> list[str]:
    '''
    遍历该文件夹内部所有csv文件，返回一个tuple

    Args:
        dir_path: 顶层文件夹路径

    Returns：
        list[str]: list[csv相对地址]

    Examples:
        >>> get_csv_path()
        [WindowsPath('../../data/source/Musk/KoodoReader-Note-2025-09-17.csv')]
    '''
    src_dir = Path(dir_path)
    if not src_dir.exists():
        raise FileNotFoundError(f"Directory not found: {dir_path}")
    csv_paths = list(src_dir.rglob("*.csv"))
    return csv_paths


### dict中需要关注的key
text: 单词

notes：所在句子

date：添加时间

chapter: 章节

bookName：书名

bookAuthor：作者

In [23]:
def get_csv_info(csv_paths:list) -> list[dict]:
    '''
    输入一个csv的地址列表，读取csv，获得csv中的信息。将单词信息构成dict，组成为列表

    Args:
        csv_paths: csv文件的地址列表

    Returns：
        list[dict]: 单词的信息组成为字典形式

    Examples:
    
    '''
    all_words = []
    for csv_path in csv_paths:
        df = pandas.read_csv(csv_path)
        words = df.to_dict(orient='records')
        all_words.extend(words)
    return all_words
            
get_csv_info(get_csv_path())

[{'key': 1758082040687,
  'bookKey': 1738143464138,
  'date': '2025-09-17',
  'chapter': 'Epigraph',
  'chapterIndex': 3,
  'text': 'dude',
  'cfi': '{"text":"To anyone I’ve offended, I just want to say, I reinvented electric cars and I’m sending people to Mars in a rocket ship. Did you think I was also going to be a chill, normal dude?","chapterTitle":"Epigraph","chapterDocIndex":"3","chapterHref":"OEBPS/part0003.xhtml","count":"0","percentage":"0.01507537688442211","page":""}',
  'range': '{"characterRange":{"start":174,"end":178},"backward":false}',
  'notes': 'Did you think I was also going to be a chill, normal dude?',
  'percentage': 0.0,
  'color': '#FBF1D1',
  'tag': nan,
  'highlightType': 'background',
  'bookName': 'Elon Musk',
  'bookAuthor': 'Walter Isaacson'},
 {'key': 1758082151531,
  'bookKey': 1738143464138,
  'date': '2025-09-17',
  'chapter': 'Cover',
  'chapterIndex': 5,
  'text': 'paramilitary',
  'cfi': '{"text":"The playground","chapterTitle":"","chapterDocIndex"