In [17]:
import os
import json
import re
from bs4 import BeautifulSoup, NavigableString
from dataclasses import dataclass, field
from typing import List, Optional, Dict, Any
from markdownify import markdownify as html_to_md
from collections import defaultdict
from tqdm import tqdm

In [18]:
with open("/Users/liao/myProjects/VSCode_workspace/rockbook/resources/crags_urls.json", "r") as f:
    info_list = json.load(f)

print(len(info_list))
print(info_list[0].keys())

31
dict_keys(['title', 'href', 'data_nid', 'data_subtype', 'children'])


In [19]:
@dataclass
class CragNode:
    title: str
    href: str = ""
    data_nid: str = ""
    data_subtype: str = ""
    children: List["CragNode"] = field(default_factory=list)
    # {title(str): {"content": str in markdown format, "ref_id": str}, ..., "tags": list[str]}
    info_dict: Optional[Dict[str, Any]] = field(default_factory=dict)
    routes: List["RouteNode"] = field(default_factory=list)

    @staticmethod
    def from_dict(d: Dict[str, Any]) -> "CragNode":
        return CragNode(
            title=d["title"],
            href=d["href"],
            data_nid=d["data_nid"],
            data_subtype=d["data_subtype"],
            children=[CragNode.from_dict(c) for c in d.get("children", [])],
        )

    def __repr__(self) -> str:
        return f"CragNode(title={self.title}, href={self.href}, data_nid={self.data_nid}, data_subtype={self.data_subtype}, children_count={len(self.children)}, info_dict={json.dumps(self.info_dict, indent=2, ensure_ascii=False)}), routes={self.routes})"

    def __str__(self) -> str:
        return self.__repr__()

    def __eq__(self, other: object) -> bool:
        if isinstance(other, str):
            return self.data_nid == other
        return self.data_nid == other.data_nid

    def __hash__(self) -> int:
        return hash(self.data_nid)


@dataclass
class RouteNode:
    name: str
    data_nid: str
    index: int
    style: Optional[str] = None
    height: Optional[str] = None
    bolts: Optional[str] = None
    # markdown format, tags are in format #[tag]
    desc_md: Optional[str] = None
    # {"what": <"首攀", "自由首攀", "开线/定线", "NA">, "who": list[str], "when": {year: int, month: int, day: int}, "note": (optional: str)}
    # "NA"表示其他类型记录， 比如最年轻攀登记录: https://www.thecrag.com/zh_hans/climbing/china/route/24535542
    history_dict: Optional[dict] = field(default_factory=dict)
    # {"system": Dict[str, list], "display": str}
    # grading "system" examples:
    # {'7510858': {'5.13b', '5.11b', '5.9', '5.11a', '5.13c', '--', '5.7', '5.15a', '5.10a', '5.11d', '5.12d', '5.10c', '5.3', '5.4', '5.13d', '5.0', '5.6', '5.14b', '5.2', '5.12c', '5.11c', '5.13a', '5.1', '5.14c', '5.5', '5.10d', '5.10b', '5.14d', '5.12b', '5.12a', '5.8', '5.14a'}, '7510864': {'5.10+', '5.14-', '5.12+', '5.13', '5.14', '5.11+', '5.9', '5.11', '5.12', '5.13+', '5.12-', '5.14+', '5.10-', '5.13-', '5.11-', '5.5', '5.10'}, '7510954': {'C2', 'C1', 'C2+', 'C1+', 'C3'}, '7510948': {'A4+', 'A2', 'A3+', 'A0', 'A2+', 'A1+', 'A1', 'A3'}, '7510960': {'VB', 'VB+', 'V7', 'V3', 'V0', 'V9', 'V1', 'V11', 'V12', 'VB-', 'V6', 'V0-', 'V0+', 'V5', 'V4', 'V8', 'V2', 'V10'}, '7511020': {'R', 'PG13', 'G', 'X', 'PG'}, '285768865': {'A', 'D', 'B', 'C'}, '7510882': {'6c', '5a', '8b+', '5b', '4b', '9b', '5c', '6b', '7a+', '7a', '7c+', '9a+', '8a+', '8a', '8c', '7b+', '6b+', '5c+', '6a+', '8b', '8c+', '7c', '9a', '6a', '6c+', '7b'}, '7510996': {'WI3-', 'WI6', 'WI5', 'WI5+', 'WI4', 'WI3', 'WI4+', 'WI6+', 'WI3+', 'WI2', 'WI2+'}, '7511014': {'III', 'V', 'VI', 'IV'}, '7511002': {'M7', 'M10', 'M8', 'M12', 'M6', 'M9', 'M11', 'M3', 'M4', 'M5'}, '7510870': {'Class 5', 'Class 4'}, '208414621': {'6C+', '7C', '7C+'}, '7510852': {'23', '19'}, '7510894': {'E6', 'E1', 'E2'}, '7510984': {'A', 'B', 'D+'}, '343015924': {'D7'}, '622228540': {'II'}, '7511008': {'AD', 'D', 'F', 'PD'}, '7510888': {'5', '5+'}, '7510990': {'AI4', 'AI2'}, '621218338': {'IV'}}
    grade_dict: Optional[dict] = field(default_factory=dict)
    # {pitchIdx(int): {"grade": str, "height": str}}
    pitch_dict: Optional[dict] = field(default_factory=dict)

    @staticmethod
    def create(name: str, data_nid: str, index: int) -> "RouteNode":
        return RouteNode(
            name=name,
            data_nid=data_nid,
            index=index,
        )

    def __repr__(self) -> str:
        return f"RouteNode(name={self.name}, data_nid={self.data_nid}, index={self.index}, " f"style={self.style}, height={self.height}, bolts={self.bolts}, desc_md={self.desc_md}, " f"history_md={json.dumps(self.history_dict, indent=2, ensure_ascii=False)}, " f"grade_dict={json.dumps(self.grade_dict, indent=2, ensure_ascii=False)}, " f"pitch_dict={json.dumps(self.pitch_dict, indent=2, ensure_ascii=False)})"

    def __str__(self) -> str:
        return self.__repr__()

    def __eq__(self, other: object) -> bool:
        if isinstance(other, str):
            return self.data_nid == other
        return self.data_nid == other.data_nid

    def __hash__(self) -> int:
        return hash(self.data_nid)

In [20]:
@dataclass(frozen=True)
class ParsedDate:
    year: Optional[int]
    month: Optional[int]
    day: Optional[int]


def _clamp_ymd(y: Optional[int], m: Optional[int], d: Optional[int]) -> ParsedDate:
    # 基础范围校验，避免明显脏数据
    if y is not None and not (1 <= y <= 9999):
        y = None
    if m is not None and not (1 <= m <= 12):
        m = None
    if d is not None and not (1 <= d <= 31):
        d = None
    return ParsedDate(y, m, d)


def parse_ymd(s: str) -> ParsedDate:
    if s is None:
        return ParsedDate(None, None, None)

    s = str(s).strip()
    if not s:
        return ParsedDate(None, None, None)

    # 统一空白
    s = re.sub(r"\s+", " ", s)

    # 1) 2012-08-03 / 2012/8/3 / 2012.8.3
    m = re.search(r"\b(\d{4})[\/\-.](\d{1,2})[\/\-.](\d{1,2})\b", s)
    if m:
        y, mo, d = map(int, m.groups())
        return _clamp_ymd(y, mo, d)

    # 2) 2012年8月3日 / 2012年8月 / 2012年
    m = re.search(r"(\d{4})\s*年\s*(?:(\d{1,2})\s*月\s*(?:(\d{1,2})\s*(?:日|号))?)?", s)
    if m:
        y = int(m.group(1))
        mo = int(m.group(2)) if m.group(2) else None
        d = int(m.group(3)) if m.group(3) else None
        return _clamp_ymd(y, mo, d)

    # 3) 8月 2012 / 8月2012 / Aug 2012 / August 2012
    month_map = {
        "jan": 1,
        "january": 1,
        "feb": 2,
        "february": 2,
        "mar": 3,
        "march": 3,
        "apr": 4,
        "april": 4,
        "may": 5,
        "jun": 6,
        "june": 6,
        "jul": 7,
        "july": 7,
        "aug": 8,
        "august": 8,
        "sep": 9,
        "sept": 9,
        "september": 9,
        "oct": 10,
        "october": 10,
        "nov": 11,
        "november": 11,
        "dec": 12,
        "december": 12,
    }

    # 3a) 中文：8月 2012
    m = re.search(r"\b(\d{1,2})\s*月\s*(\d{4})\b", s)
    if m:
        mo, y = int(m.group(1)), int(m.group(2))
        return _clamp_ymd(y, mo, None)

    # 3b) 英文月份在前：Aug 2012
    m = re.search(r"\b([A-Za-z]+)\s+(\d{4})\b", s)
    if m:
        mon_str = m.group(1).lower()
        mon_key = mon_str[:3] if mon_str[:3] in month_map else mon_str
        mo = month_map.get(mon_key)
        if mo is not None:
            y = int(m.group(2))
            return _clamp_ymd(y, mo, None)

    # 4) 仅年份：2012
    m = re.search(r"\b(\d{4})\b", s)
    if m:
        y = int(m.group(1))
        return _clamp_ymd(y, None, None)

    return ParsedDate(None, None, None)

In [21]:
root = {"title": "China", "href": "/zh_hans/climbing/china", "data_nid": "11746747", "data_subtype": "region", "children": info_list}
root_node = CragNode.from_dict(root)

crag_node_map = {}
child_to_parent_map = {}

BASE_URL = "https://www.thecrag.com"


def traverse(node: CragNode):
    node.href = BASE_URL + node.href
    crag_node_map[node.data_nid] = node
    crag_node_map[node.href] = node
    for child in node.children:
        traverse(child)
        child_to_parent_map[child.data_nid] = node.data_nid


traverse(root_node)
print(f"Total crag nodes: {len(crag_node_map)}")

Total crag nodes: 4052


In [22]:
predefined_tags = {
    "线路特征": {
        "岩石类型": {
            "沉积岩": ["沉积岩", "砾岩", "白云岩", "杂砂岩", "砂砾岩", "石灰岩", "砂岩", "长石砂岩", "燧石", "沉积角砾岩"],
            "变质岩": ["变质岩", "片麻岩", "大理石", "石英岩", "片岩", "蛇纹岩", "角闪石", "绿片岩和蓝片岩", "角页岩", "板岩", "混合岩"],
            "火成岩": ["火成岩", "花岗岩", "斜长岩", "橄榄岩", "辉长岩", "闪长岩", "辉绿岩", "正长岩", "花岗闪长岩", "二长岩"],
            "火成火山岩": ["火成火山岩", "玄武岩", "粒玄岩", "熔灰岩", "流纹岩", "凝灰岩", "粗面岩", "碱玄岩", "响岩", "安山岩", "集块岩", "火成角砾岩"],
            "热液岩": ["热液岩", "石英"],
        },
        "陡度": ["内倾", "垂直的", "外倾/仰角", "屋檐/天花板"],
        "朝向": ["北向", "东北向", "东向", "东南向", "南向", "西南向", "西向", "西北向"],
        "下降": ["步行下撤", "倒攀下撤", "绳降下撤"],
        "条件": ["松动的", "浮砂", "泥泞的", "多植被", "潮湿的", "光滑的", "破碎的", "被破坏"],
        "风格": ["岩面", "内倾", "裂缝", "烟囱", "宽缝", "夹角", "体操型", "动态型", "跨度大", "狭窄的", "挂绳器", "山脊"],
        "开发": ["已充分开发", "有开发潜力", "线路开发中", "新区域", "未攀登地区"],
        "植被": ["草地", "灌木丛", "针叶树", "落叶乔木"],
    },
    "舒适性": {
        "天气": {
            "晴天": ["全天日晒", "上午日照", "午间日照", "下午日晒", "长期背阴"],
            "雨天": ["雨天潮湿", "潮湿时脆弱", "雨天不湿", "干燥速度慢", "快速干燥"],
            "风": ["背风", "风大"],
        },
        "设施": ["饮用水", "厕所", "山屋", "露营", "烧烤", "用火", "禁止露营", "禁止用火"],
        "家庭友好度": ["适宜儿童", "适宜宠物", "宠物禁入", "不适宜儿童"],
        "人群拥挤度": ["人迹罕至", "多初学者", "向导", "拥挤的"],
    },
    "接近性": {
        "合法性": ["攀岩专用", "允许的", "私有的", "暂许进入", "非法", "关闭的", "需付费", "Not permitted"],
        "步行时间": ["5分钟以内", "5-10分钟", "10-20分钟", "20-30分钟", "30-45分钟", "45-60分钟", "1-2小时", "2-3小时", "3小时以上"],
        "步行角度": ["绳降接近", "攀爬接近", "陡坡徒步", "温和的", "平缓"],
        "水路接近": ["任何潮汐", "仅退潮时", "须乘船"],
    },
}

In [23]:
html_list = []
nid_list = []

with open("/Users/liao/myProjects/VSCode_workspace/rockbook/resources/thecrag0/thecrag.jsonl", "r") as f:
    data = [json.loads(line) for line in f]
    html_list = [item["result"] for item in tqdm(data)]
    nid_list = [item["input"] for item in tqdm(data)]


# 读取目录中所有文件
paths = ["/Users/liao/myProjects/VSCode_workspace/rockbook/resources/thecrag1", "/Users/liao/myProjects/VSCode_workspace/rockbook/resources/thecrag2", "/Users/liao/myProjects/VSCode_workspace/rockbook/resources/thecrag3"]

for path in paths:
    for filename in tqdm(os.listdir(path)):
        if not filename.endswith(".html"):
            continue
        with open(os.path.join(path, filename), "r") as f:
            html_list.append(f.read())
            nid_list.append(filename)  # remove .html extension

print(f"Total html files: {len(html_list)}")

100%|██████████| 347/347 [00:00<00:00, 1084518.25it/s]
100%|██████████| 347/347 [00:00<00:00, 2211889.80it/s]
100%|██████████| 56/56 [00:00<00:00, 794.12it/s]
100%|██████████| 934/934 [00:00<00:00, 1052.19it/s]
100%|██████████| 683/683 [00:00<00:00, 1143.04it/s]

Total html files: 2020





In [24]:
nid_list[1168]

'2318808696.html'

In [25]:
# /Users/liao/Desktop/thecrag/精灵谷 Jinglinggu, 攀登 _ theCrag.html
# /Users/liao/Desktop/thecrag/A区, 运动攀登 | theCrag.html
# /Users/liao/myProjects/VSCode_workspace/rockbook/resources/thecrag3/4895530659.html, multi-pitch
# /Users/liao/myProjects/VSCode_workspace/rockbook/resources/thecrag2/12485971.html, sport
# /Users/liao/myProjects/VSCode_workspace/rockbook/resources/thecrag2/12486187.html, sport, laotieshan
# with open("/Users/liao/myProjects/VSCode_workspace/rockbook/resources/thecrag2/12486187.html", "r") as f:
#     html = f.read()


def bind_html_to_crag_node(html: str, crag_node_map: Dict[str, CragNode], statistics: Dict[str, Any]) -> CragNode:
    # for statistics only
    grad_system_examples = statistics["grad_system_examples"]
    grad_system_to_nid_map = statistics["grad_system_to_nid_map"]
    grad_atom_keys = statistics["grad_atom_keys"]
    history_what_set = statistics["history_what_set"]

    # parse html
    soup = BeautifulSoup(html, "lxml")
    data_nid = soup.find("body").get("data-nid")
    target_crag_node = crag_node_map.get(data_nid)
    tag_set = set()

    is_gym = True if soup.body.get("data-is-gym", False) == "1" else False
    if is_gym:
        target_crag_node.data_subtype = "gym"
        print(f"Gym Found: {target_crag_node.title} ({data_nid})")
        return target_crag_node

    # 补全链接
    main_contain = soup.select_one("#wrapper div.regions__content div.regions__read")
    for ele_a in main_contain.select("a[href]"):
        if ele_a["href"].startswith("/"):
            ele_a["href"] = BASE_URL + ele_a["href"]

    # 信息节点
    info_nodes = main_contain.select("div.regions__read div.node-info")
    for info_node in info_nodes:
        # 标题
        ele_h2 = info_node.select_one("h2")
        title = "".join(t.strip() for t in ele_h2.find_all(string=True, recursive=False) if t.strip())
        if "规划您的旅程" in title:
            continue
        # print(f"Info Title: {title}")

        # 内容
        ele_content = info_node.select_one("div.content")
        if ele_content:
            ele_content_markdown = ele_content.select_one("div.markdown")
            md_content = html_to_md(str(ele_content_markdown), heading_style="ATX")
            # print(f"Info Content (Markdown):\n{md_content}\n")

            element_content_tags = ele_content.select("a.tags")
            for a_tag in element_content_tags:
                tag_text = a_tag.text.strip()
                if tag_text:
                    tag_set.add(tag_text)

            element_content_icontags = ele_content.select("a.icontag")
            for a_tag in element_content_icontags:
                tag_text = a_tag.text.strip()
                if tag_text:
                    tag_set.add(tag_text)

            element_content_sapect_chart_tags = ele_content.select("div.aspect-chart-tags span.tags")
            for span_tag in element_content_sapect_chart_tags:
                if "display: none" not in span_tag.get("style", ""):
                    tag_text = span_tag.text.strip()
                    if tag_text:
                        tag_set.add(tag_text)

            if ele_content_markdown:
                target_crag_node.info_dict[title] = {"content": md_content}

        # 引用自
        ele_small_a = ele_h2.select_one("small.from a")
        if ele_small_a:
            content_from_href = ele_small_a.get("href", "")
            ref_node = crag_node_map[content_from_href]
            target_crag_node.info_dict[title]["ref_id"] = ref_node.data_nid
            # print(f"Content From: {ref_node.title} ({content_from_href})")

    # remove "从左至右" and "从右至左"
    if "从左至右" in tag_set:
        tag_set.remove("从左至右")
    if "从右至左" in tag_set:
        tag_set.remove("从右至左")
    target_crag_node.info_dict["tags"] = list(tag_set)
    # print(f"Tags Found: {tag_set}")

    # 子节点表格，目前只处理线路
    ele_form = main_contain.select_one("div.regions__read form")
    if ele_form:
        ele_areas = ele_form.select("div.area:not(.header)[data-nid]")
        ele_routes = ele_form.select("div.route:not(.header)[data-nid]")
        # print(f"Found {len(ele_areas)} areas and {len(ele_routes)} routes in the table.")
        if ele_routes:
            for ele_route in ele_routes:
                route_nid = ele_route["data-nid"]

                # parse dict string to dict
                route_tick_attr_str = ele_route.get("data-route-tick", None)
                assert route_tick_attr_str is not None, f"Route data-route-tick is None for route NID: {route_nid}"
                route_info_json = json.loads(route_tick_attr_str)

                # basic route info
                route_name = route_info_json["name"]
                route_idx = ele_route.select_one("div.sticky-holder span.num.toponum").text.strip()

                # Create Route and bind to crag node
                route_node = RouteNode.create(
                    name=route_name,
                    data_nid=route_nid,
                    index=int(route_idx),
                )
                target_crag_node.routes.append(route_node)

                # extra route info
                route_style = route_info_json.get("styleStub", "")
                route_node.style = route_style

                route_height = "".join(str(x) for x in route_info_json.get("displayHeight", []))
                route_node.height = route_height

                route_bolts = route_info_json.get("bolts", "")
                route_node.bolts = route_bolts

                # route description
                ele_desc = ele_route.select_one("div.desc")
                # 如果span标签是 ele_desc 的直接子标签，则将其包裹一层p标签
                for span in ele_desc.find_all("span", recursive=False):
                    span.wrap(soup.new_tag("p"))
                # 将span及span内的文字转化为 #[text]，span标签将会消失
                for span in ele_desc.select("p span.label"):
                    span.replace_with(f"#[{span.text.strip()}]")
                # 处理图片
                for img_block in ele_desc.select("div.link-photo"):
                    # 1. 主链接
                    photo_a = img_block.select_one("a.photo")
                    photo_href = photo_a.get("href") if photo_a else None
                    # 2. 标题文本（包含路线名 + 难度）
                    title_p = img_block.select_one("p.title")
                    text = title_p.get_text(" ", strip=True) if title_p else "图片"
                    # 3. 构造新标签
                    p = soup.new_tag("p")
                    a = soup.new_tag("a", href=photo_href)
                    a.string = text
                    p.append(a)
                    # 4. 替换整个 block
                    img_block.replace_with(p)
                # 只处理被目标签包裹的内容
                valid_desc_tags = ["p", "ol", "ul", "dl"]
                element_desc_pol = ele_desc.find_all(valid_desc_tags, recursive=False)
                route_desc_md = html_to_md("\n".join(str(x) for x in element_desc_pol), heading_style="ATX")
                route_node.desc_md = route_desc_md
                # print(f"Route Description: {route_desc_md}")

                # route pitches, 先从 route_info_json 里取, 然后从 desc 中验证，pitch的补充信息，现在归入desc中
                route_pitches = {}
                if "pitches" in route_info_json and int(route_info_json["pitches"]) > 1:
                    if "pitch" in route_info_json:
                        for pidx, pinfo in enumerate(route_info_json["pitch"]):
                            route_pitches[pidx + 1] = {
                                "grade": pinfo.get("grade", ""),
                                "height": pinfo.get("height", ""),
                            }
                    else:
                        # 多绳段但是没有绳段信息
                        for pidx in range(int(route_info_json["pitches"])):
                            route_pitches[pidx + 1] = {
                                "grade": "",
                                "height": "",
                            }
                ele_desc_pitches = ele_desc.select("div.pitches div")
                if ele_desc_pitches:
                    assert len(route_info_json["pitch"]) == len(ele_desc_pitches)

                # route history
                ele_history = ele_desc.select_one("div.route-history")
                if ele_history:
                    for _ele_p in ele_history.children:
                        if _ele_p == "\n":
                            continue
                        assert _ele_p.name == "p"
                        what = _ele_p.select_one("span.fa__what").text.strip(":")
                        who = _ele_p.select_one("span.fa__who").text.strip()
                        when1 = _ele_p.select_one("span.fa_when").text.strip()
                        when2 = _ele_p.select_one("span.fa_when").get("title", "").strip()
                        when = parse_ymd(when1 if len(when1) > len(when2) else when2)
                        _ele_note = _ele_p.find("span", class_="fa__note")
                        note = _ele_note.get("title") if _ele_note else None
                        if what not in history_what_set:
                            history_what_set[what].add(data_nid)
                            print(f"New History What Found: {what}, in route NID: {route_nid}")
                        if not who:
                            continue
                        route_node.history_dict = {
                            "what": what,
                            "who": [x.strip() for x in who.replace("&", ",").split(",")],
                            "when": {
                                "year": when.year,
                                "month": when.month,
                                "day": when.day,
                            },
                        }
                        if note:
                            route_node.history_dict["note"] = note

                # check unexpected tags in route description
                for _ele in ele_desc.children:
                    if _ele.name in valid_desc_tags:
                        pass
                    elif isinstance(_ele, NavigableString):
                        assert _ele == "\n", f"Unexpected NavigableString in route description for route NID: {route_nid}: {_ele} in node {target_crag_node.title}"
                    elif _ele.name == "div":
                        accepted_div_classes = ["route-history", "pitches"]
                        cls_list = _ele.get("class", [])
                        assert any(accept_cls in cls_list for accept_cls in accepted_div_classes), f"Unexpected div class in route description for route NID: {route_nid}: {cls_list} in node {target_crag_node.title}"
                    else:
                        raise ValueError(f"Unexpected tag in route description for route NID: {route_nid}: {_ele.name} in node {target_crag_node.title}")

                # grade systems
                route_grade = route_info_json.get("systems", {})
                route_grade = {str(k): v for k, v in route_grade.items()}
                for gs_key, gs_vals in route_grade.items():
                    if gs_key not in grad_system_examples:
                        print(f"New Grade System Found: [{gs_key}: {gs_vals}], in route NID: {route_nid}")
                    for gs_val in gs_vals:
                        if gs_key in grad_system_examples and gs_val not in grad_system_examples[gs_key]:
                            print(f"New Grade Example Found for System [{gs_key}: {gs_val}], in route NID: {route_nid}")
                        grad_system_examples[gs_key].add(gs_val)
                    grad_system_to_nid_map[gs_key].add(data_nid)
                route_node.grade_dict["system"] = route_grade

                # another grade, 可以确认 gradeAtom 只有 "grade" 一个字段
                route_gradeAtom = route_info_json.get("gradeAtom", {})
                for k in route_gradeAtom.keys():
                    if k not in grad_atom_keys:
                        grad_atom_keys[k].add(route_gradeAtom[k])
                        print(f"New Grade Atom Key Found: [{k}: {route_gradeAtom[k]}], in route NID: {route_nid}")

                # display grade
                route_display_grade = ele_route.select_one("div.sticky-holder span.r-grade").text.strip()
                route_node.grade_dict["display"] = route_display_grade

    return target_crag_node

In [26]:
# statistics = {
#     "grad_system_examples": grad_system_examples,
#     "grad_system_to_nid_map": grad_system_to_nid_map,
#     "grad_atom_keys": grad_atom_keys,
#     "history_what_set": history_what_set,
# }
# curr_crag_node.routes = []
# curr_crag_node = bind_html_to_crag_node(html, crag_node_map, statistics)
# curr_crag_node

In [27]:
grad_system_examples = defaultdict(set)
grad_system_to_nid_map = defaultdict(set)
grad_atom_keys = defaultdict(set)
history_what_set = defaultdict(set)

nodes_without_html = crag_node_map.copy()

region_unknown_routes = {
    "房山 Fangshan District": "https://www.thecrag.com//zh_hans/climbing/china/beijing/sidu",
    "辽阳": "https://www.thecrag.com/zh_hans/climbing/china/area/3439706169",
    "青岛 Qingdao": "https://www.thecrag.com/zh_hans/climbing/china/shandong/qingdao",
    "双廊 Shuang Lang": "https://www.thecrag.com/zh_hans/climbing/china-mainland/yunnan/shuanglang",
}
region_ignore_routes = {
    "新疆 Xinjiang": "https://www.thecrag.com/zh_hans/climbing/china/area/1782016506",  # 重复数据，直接移除
    "金榜公园": "https://www.thecrag.com/zh_hans/climbing/china/fujian/area/9648358830",  # 看起来像是错误数据
}

for html in tqdm(html_list):
    statistics = {
        "grad_system_examples": grad_system_examples,
        "grad_system_to_nid_map": grad_system_to_nid_map,
        "grad_atom_keys": grad_atom_keys,
        "history_what_set": history_what_set,
    }
    curr_crag_node = bind_html_to_crag_node(html, crag_node_map, statistics)
    nodes_without_html.pop(curr_crag_node.data_nid, None)

    if len(curr_crag_node.routes) > 0 and curr_crag_node.data_subtype == "region":
        if curr_crag_node.title in region_ignore_routes.keys():
            print(f"Region with routes found: {curr_crag_node.title} ({curr_crag_node.data_nid}), known issue, skipping.")
        elif curr_crag_node.title in region_unknown_routes.keys():
            new_crag_node = CragNode(
                title="未知岩场",
                href=curr_crag_node.href,
                data_nid=curr_crag_node.data_nid + "x",
                data_subtype="crag",
                children=[],
                info_dict={},
                routes=curr_crag_node.routes,
            )
            curr_crag_node.children.append(new_crag_node)
            curr_crag_node.routes = []
            crag_node_map[new_crag_node.data_nid] = new_crag_node
            child_to_parent_map[new_crag_node.data_nid] = curr_crag_node.data_nid
            print(f"Region with routes found: {curr_crag_node.title} ({curr_crag_node.data_nid}), moved {len(new_crag_node.routes)} routes to new crag node {new_crag_node.data_nid}.")
        else:
            raise ValueError(f"Crag Node {curr_crag_node.title} ({curr_crag_node.data_nid}) is region, expected crag.")

  1%|▏         | 27/2020 [00:02<02:18, 14.44it/s]

New History What Found: 首攀, in route NID: 12790623429
New Grade System Found: [7510858: ['5.10c']], in route NID: 12790623429
New Grade Atom Key Found: [grade: 5.10c], in route NID: 12790623429
Region with routes found: 新疆 Xinjiang (1782016506), known issue, skipping.


  2%|▏         | 41/2020 [00:03<02:27, 13.40it/s]

New History What Found: 定线/开线, in route NID: 5435848935
New History What Found: 自由首攀, in route NID: 5435848935
New Grade System Found: [7510864: ['5.10+']], in route NID: 5435848935
New Grade Example Found for System [7510864: 5.10-], in route NID: 5101421643
New Grade Example Found for System [7510858: 5.8], in route NID: 4856659206
New Grade System Found: [7510954: ['C1']], in route NID: 4856659206
New Grade Example Found for System [7510858: 5.11b], in route NID: 2769772212


  2%|▏         | 45/2020 [00:03<04:01,  8.18it/s]

New Grade Example Found for System [7510858: 5.9], in route NID: 812866716
New Grade System Found: [7510948: ['A1']], in route NID: 812866716
New Grade Example Found for System [7510858: 5.12d], in route NID: 812866617
New Grade Example Found for System [7510858: 5.12b], in route NID: 812866518
New Grade Example Found for System [7510858: 5.10b], in route NID: 812858844
New Grade Example Found for System [7510858: 5.11c], in route NID: 812858646
New Grade Example Found for System [7510858: 5.12c], in route NID: 812858547
New Grade Example Found for System [7510858: 5.11a], in route NID: 812858445
New Grade Example Found for System [7510858: 5.10d], in route NID: 812858148
New Grade Example Found for System [7510858: 5.11d], in route NID: 812858049
New Grade Example Found for System [7510858: 5.13b], in route NID: 812843091
New Grade Example Found for System [7510858: 5.13c], in route NID: 812843091
New Grade Example Found for System [7510858: 5.12a], in route NID: 3496753218
New Grade 

  3%|▎         | 61/2020 [00:05<03:33,  9.20it/s]

New Grade Example Found for System [7510960: V4], in route NID: 4952279238
New Grade Example Found for System [7510960: V1], in route NID: 4952279835
New Grade Example Found for System [7510960: V5], in route NID: 4966574751
New Grade Example Found for System [7510960: V3], in route NID: 5001464130
New Grade Example Found for System [7510960: V2], in route NID: 4952276727
Region with routes found: 金榜公园 (9648358830), known issue, skipping.


  3%|▎         | 69/2020 [00:05<02:11, 14.83it/s]

New Grade Example Found for System [7510948: A0], in route NID: 6892249536
New Grade System Found: [7511020: ['PG13']], in route NID: 6892249536


  4%|▍         | 76/2020 [00:06<02:24, 13.45it/s]

Region with routes found: 房山 Fangshan District (3963235614), moved 1 routes to new crag node 3963235614x.
New Grade System Found: [285768865: ['A']], in route NID: 3961166250
New Grade Example Found for System [285768865: B], in route NID: 3961166337
New Grade Example Found for System [285768865: C], in route NID: 3961166424


  4%|▍         | 80/2020 [00:06<02:25, 13.35it/s]

New Grade Example Found for System [7510960: V7], in route NID: 9637227486
New Grade Example Found for System [7510960: V8], in route NID: 9755056167
New Grade Example Found for System [7510960: V0], in route NID: 9645781743


  5%|▍         | 95/2020 [00:07<02:19, 13.82it/s]

New Grade Example Found for System [7510864: 5.13], in route NID: 8083277166


  5%|▌         | 107/2020 [00:08<02:24, 13.26it/s]

New Grade Example Found for System [7510858: 5.13a], in route NID: 5552830764
New Grade Example Found for System [7510864: 5.10], in route NID: 163506345


  8%|▊         | 157/2020 [00:11<01:49, 16.95it/s]

New History What Found: 线路维护, in route NID: 11023652124
New Grade System Found: [7510882: ['9b']], in route NID: 11023652124


  8%|▊         | 167/2020 [00:12<02:09, 14.36it/s]

New Grade Example Found for System [7510882: 8a], in route NID: 156161493
New Grade Example Found for System [7510882: 8a+], in route NID: 156161541
New Grade Example Found for System [7510882: 7a+], in route NID: 156161649
New Grade Example Found for System [7510882: 8c], in route NID: 156159966
New Grade Example Found for System [7510882: 8b], in route NID: 156161697
New Grade Example Found for System [7510882: 8c+], in route NID: 156161745
New Grade Example Found for System [7510882: 9a], in route NID: 156161847
New Grade Example Found for System [7510882: 6a+], in route NID: 156161895
New Grade Example Found for System [7510882: 5c], in route NID: 156161943
New Grade Example Found for System [7510882: 7c+], in route NID: 156162039
New Grade Example Found for System [7510882: 7a], in route NID: 156162135
New Grade Example Found for System [7510882: 7b+], in route NID: 156162183
New Grade Example Found for System [7510882: 6b+], in route NID: 156162375


  9%|▊         | 176/2020 [00:13<01:54, 16.17it/s]

New Grade Example Found for System [7510954: C2], in route NID: 3737371917


 10%|▉         | 193/2020 [00:14<01:48, 16.89it/s]

New Grade Example Found for System [7510864: 5.12], in route NID: 8762501031


 11%|█         | 216/2020 [00:15<01:46, 16.88it/s]

New Grade System Found: [7510996: ['WI2']], in route NID: 3963944310
Region with routes found: 辽阳 (3439706169), moved 1 routes to new crag node 3439706169x.


 14%|█▍        | 278/2020 [00:20<02:35, 11.21it/s]

Region with routes found: 青岛 Qingdao (2637536664), moved 1 routes to new crag node 2637536664x.


 14%|█▍        | 285/2020 [00:20<02:08, 13.53it/s]

New Grade Example Found for System [7510960: V9], in route NID: 5549237643
New Grade Example Found for System [7510960: V10], in route NID: 3681484053


 18%|█▊        | 362/2020 [00:25<02:04, 13.27it/s]

Region with routes found: 双廊 Shuang Lang (12486619), moved 2 routes to new crag node 12486619x.


 20%|██        | 404/2020 [00:28<01:59, 13.50it/s]

New Grade Example Found for System [7510948: A2], in route NID: 4112496363
New Grade Example Found for System [7511020: R], in route NID: 4112496363


 20%|██        | 412/2020 [00:29<02:43,  9.83it/s]

New Grade Example Found for System [7510864: 5.11+], in route NID: 3285811494


 21%|██        | 416/2020 [00:30<02:51,  9.38it/s]

New Grade Example Found for System [7510858: 5.13d], in route NID: 80307705
New Grade Example Found for System [7510858: 5.14a], in route NID: 1947983889
New Grade Example Found for System [7510864: 5.12+], in route NID: 5140230786
New Grade Example Found for System [7510864: 5.12-], in route NID: 5140230873


 21%|██▏       | 434/2020 [00:31<02:28, 10.69it/s]

New Grade Example Found for System [7511020: X], in route NID: 5247544188
New Grade Example Found for System [7510954: C3], in route NID: 5266276344
New Grade Example Found for System [7511020: PG], in route NID: 5185503195
New Grade Example Found for System [7510996: WI3], in route NID: 3966989829


 22%|██▏       | 442/2020 [00:32<02:19, 11.31it/s]

New Grade Example Found for System [7510864: 5.11-], in route NID: 8254927674


 22%|██▏       | 454/2020 [00:33<02:14, 11.64it/s]

New Grade Example Found for System [7510996: WI4], in route NID: 3958227714
New Grade Example Found for System [7510996: WI3+], in route NID: 3958227801
New Grade Example Found for System [7510996: WI2+], in route NID: 3958228236


 23%|██▎       | 456/2020 [00:33<02:46,  9.40it/s]

New Grade Example Found for System [7510864: 5.13+], in route NID: 5995579572


 23%|██▎       | 462/2020 [00:34<02:18, 11.21it/s]

New Grade Example Found for System [7510882: 6c], in route NID: 155646627
New Grade Example Found for System [7510882: 7b], in route NID: 155646681
New Grade Example Found for System [7510882: 7c], in route NID: 155647005
New Grade Example Found for System [7510882: 6b], in route NID: 155647113
New Grade Example Found for System [7510882: 6c+], in route NID: 155647167


 23%|██▎       | 468/2020 [00:35<02:53,  8.97it/s]

New Grade Example Found for System [7510882: 6a], in route NID: 157738962


 23%|██▎       | 470/2020 [00:35<03:10,  8.14it/s]

New Grade Example Found for System [7510858: 5.14d], in route NID: 80223201
New Grade Example Found for System [7510858: 5.14b], in route NID: 80222481
New History What Found: NA, in route NID: 24535542
New Grade Example Found for System [7510858: 5.14c], in route NID: 1593522720


 24%|██▍       | 482/2020 [00:36<01:48, 14.22it/s]

New Grade Example Found for System [7510882: 8b+], in route NID: 156157182


 24%|██▍       | 484/2020 [00:36<02:33,  9.98it/s]

New Grade Example Found for System [7510948: A4+], in route NID: 155797896
New Grade Example Found for System [7510864: 5.14], in route NID: 5132588844


 24%|██▍       | 488/2020 [00:37<02:28, 10.34it/s]

New Grade Example Found for System [7510882: 5b], in route NID: 156271632


 25%|██▍       | 497/2020 [00:37<02:09, 11.72it/s]

Gym Found: Climb On Gym (3767235219)


 25%|██▌       | 505/2020 [00:38<01:59, 12.65it/s]

Gym Found: 香蕉攀岩上地店 (3286639806)


 26%|██▋       | 532/2020 [00:41<02:28, 10.02it/s]

Gym Found: 奥攀攀岩（玉泉营店） (3963556593)


 27%|██▋       | 539/2020 [00:41<01:59, 12.39it/s]

New Grade Example Found for System [7510960: V0+], in route NID: 9676318323


 27%|██▋       | 553/2020 [00:43<02:05, 11.72it/s]

Gym Found: 人人攀岩(丽泽店) (7085439966)


 28%|██▊       | 566/2020 [00:44<02:06, 11.53it/s]

Gym Found: Blue Whale Climbing Gym 蓝鲸攀岩 (4491199443)
Gym Found: 常州RED攀岩馆 (4654358511)


 29%|██▉       | 590/2020 [00:46<02:44,  8.71it/s]

New Grade System Found: [7511014: ['III']], in route NID: 2326299912
New Grade Example Found for System [7511014: IV], in route NID: 2326300752


 29%|██▉       | 595/2020 [00:46<01:56, 12.20it/s]

Gym Found: O’le 798 Climbing Gym (2569226856)
New Grade Example Found for System [7510864: 5.13-], in route NID: 6060766653


 30%|███       | 610/2020 [00:47<01:21, 17.21it/s]

Gym Found: 浩石运动攀岩空间（798店） (3286639866)


 31%|███       | 628/2020 [00:49<01:53, 12.26it/s]

New Grade Example Found for System [7510882: 5a], in route NID: 156167967


 31%|███▏      | 636/2020 [00:50<01:58, 11.67it/s]

Gym Found: ET攀岩俱乐部 (7083977949)


 32%|███▏      | 649/2020 [00:52<02:36,  8.76it/s]

New Grade Example Found for System [7510858: 5.2], in route NID: 3914441700


 32%|███▏      | 655/2020 [00:52<02:05, 10.86it/s]

New Grade System Found: [7511002: ['M3']], in route NID: 5623835844


 33%|███▎      | 664/2020 [00:53<01:59, 11.34it/s]

New Grade Example Found for System [7510948: A1+], in route NID: 7816044456


 34%|███▍      | 682/2020 [00:54<01:51, 12.02it/s]

New Grade Example Found for System [7510858: 5.4], in route NID: 11509863603
New Grade System Found: [7510870: ['Class 4']], in route NID: 11509951464


 35%|███▍      | 698/2020 [00:56<01:14, 17.72it/s]

Gym Found: 攀岩最爱常营店 (3963561405)


 35%|███▌      | 714/2020 [00:57<01:46, 12.21it/s]

New Grade Example Found for System [7511002: M5], in route NID: 10783283994
New Grade Example Found for System [7511002: M8], in route NID: 10783284093
New Grade Example Found for System [7511002: M7], in route NID: 10783284204
New Grade Example Found for System [7511002: M6], in route NID: 10783284414
New Grade Example Found for System [7511002: M10], in route NID: 10783284624
New Grade Example Found for System [7511002: M11], in route NID: 10783284723


 40%|████      | 811/2020 [01:05<01:22, 14.70it/s]

New Grade Example Found for System [7510864: 5.14+], in route NID: 2177432382


 41%|████      | 821/2020 [01:06<01:23, 14.33it/s]

New Grade Example Found for System [7510996: WI4+], in route NID: 9194996550
New Grade Example Found for System [7510996: WI5], in route NID: 9194996718
New Grade System Found: [208414621: ['6C+']], in route NID: 6096875961


 43%|████▎     | 864/2020 [01:10<01:29, 12.99it/s]

New Grade Example Found for System [7510858: 5.1], in route NID: 2326292721
New Grade Example Found for System [7510858: 5.3], in route NID: 2326293879
New Grade Example Found for System [7510858: 5.0], in route NID: 2326294353


 43%|████▎     | 873/2020 [01:10<01:24, 13.58it/s]

New Grade Example Found for System [7510858: --], in route NID: 7839796002


 44%|████▍     | 891/2020 [01:12<01:27, 12.86it/s]

New Grade Example Found for System [7510960: V11], in route NID: 11277721836
New Grade Example Found for System [7510960: V12], in route NID: 11277721836
Gym Found: 道博攀岩馆 (5803751907)


 45%|████▌     | 909/2020 [01:13<01:35, 11.65it/s]

Gym Found: 香蕉攀岩768店 (3286639746)


 45%|████▌     | 913/2020 [01:14<01:49, 10.09it/s]

Gym Found: 岩舞空间攀岩厦门馆 (10941029322)


 47%|████▋     | 957/2020 [01:18<01:10, 15.12it/s]

Gym Found: 人人攀岩 (2735156892)


 48%|████▊     | 964/2020 [01:18<01:14, 14.17it/s]

New Grade Example Found for System [7510882: 9a+], in route NID: 2268004638
Gym Found: 岩时攀岩馆 Rock Hour (2565197880)


 50%|████▉     | 1003/2020 [01:21<01:04, 15.80it/s]

New Grade System Found: [7510852: ['19']], in route NID: 13971097


 50%|█████     | 1013/2020 [01:22<01:18, 12.88it/s]

New Grade System Found: [7510894: ['E2']], in route NID: 2326311930


 51%|█████     | 1030/2020 [01:24<01:22, 12.06it/s]

New Grade Example Found for System [7510996: WI6], in route NID: 3140858208


 51%|█████▏    | 1040/2020 [01:25<01:30, 10.88it/s]

New Grade Example Found for System [7510996: WI3-], in route NID: 5756570073


 52%|█████▏    | 1052/2020 [01:26<01:08, 14.22it/s]

New Grade Example Found for System [7510960: V0-], in route NID: 4013036442


 58%|█████▊    | 1170/2020 [01:36<01:20, 10.55it/s]

New Grade Example Found for System [7510996: WI5+], in route NID: 4314502047
New Grade Example Found for System [7510996: WI6+], in route NID: 4442178450
New Grade Example Found for System [7510864: 5.5], in route NID: 2326290225
New Grade Example Found for System [7510894: E1], in route NID: 2326290225
New Grade System Found: [7510984: ['A']], in route NID: 2326290225


 60%|██████    | 1217/2020 [01:41<01:31,  8.75it/s]

New Grade Example Found for System [7511002: M9], in route NID: 3680299812
New Grade Example Found for System [7511002: M12], in route NID: 3680299890


 61%|██████    | 1235/2020 [01:43<01:17, 10.19it/s]

Gym Found: 问山攀岩馆 (3767236725)


 62%|██████▏   | 1252/2020 [01:45<01:05, 11.69it/s]

New Grade Example Found for System [7510882: 4b], in route NID: 156276456
New Grade Example Found for System [7510882: 5c+], in route NID: 156276894


 63%|██████▎   | 1282/2020 [01:48<01:02, 11.78it/s]

New Grade Example Found for System [7511020: G], in route NID: 6629290173


 65%|██████▌   | 1318/2020 [01:51<01:08, 10.18it/s]

New Grade Example Found for System [7510852: 23], in route NID: 15732673


 69%|██████▉   | 1390/2020 [01:57<00:51, 12.16it/s]

Gym Found: Shanghai Indoor Stadium (2317347636)


 70%|██████▉   | 1408/2020 [01:59<01:04,  9.45it/s]

New Grade System Found: [343015924: ['D7']], in route NID: 11154524622
Gym Found: 滨江攀岩 (2320925778)


 70%|███████   | 1415/2020 [01:59<00:43, 13.81it/s]

New Grade Example Found for System [7511014: V], in route NID: 8100977547
New Grade Example Found for System [7511014: VI], in route NID: 8100982266


 70%|███████   | 1419/2020 [02:00<00:58, 10.26it/s]

New Grade System Found: [622228540: ['II']], in route NID: 3131996874
New Grade System Found: [7511008: ['D']], in route NID: 3131996874


 71%|███████   | 1425/2020 [02:00<00:41, 14.24it/s]

Gym Found: 蜘蛛侠攀岩馆 (2320917921)


 72%|███████▏  | 1460/2020 [02:03<00:33, 16.51it/s]

New Grade Example Found for System [7510960: VB], in route NID: 3175229283
New Grade Example Found for System [7510948: A3+], in route NID: 8087385963


 72%|███████▏  | 1464/2020 [02:03<00:51, 10.78it/s]

Gym Found: Park Climbing Gym 爬客攀岩馆 (2320917813)


 75%|███████▍  | 1514/2020 [02:08<00:55,  9.18it/s]

New Grade System Found: [7510888: ['5+']], in route NID: 986040756


 75%|███████▌  | 1523/2020 [02:08<00:37, 13.36it/s]

New Grade Example Found for System [7511008: PD], in route NID: 11507510694
New Grade Example Found for System [7511008: AD], in route NID: 11507524311


 76%|███████▌  | 1529/2020 [02:09<00:48, 10.03it/s]

New Grade Example Found for System [7510864: 5.14-], in route NID: 2803249152


 78%|███████▊  | 1571/2020 [02:12<00:34, 12.95it/s]

New Grade Example Found for System [7510858: 5.15a], in route NID: 1823074896
New Grade Example Found for System [7510960: VB-], in route NID: 3175283754


 79%|███████▉  | 1597/2020 [02:14<00:32, 12.82it/s]

Gym Found: 西柚攀岩SeeYou Climbing Gym (11649108411)


 79%|███████▉  | 1602/2020 [02:15<00:28, 14.77it/s]

Gym Found: 攀岩主义(金高路店) (2320925724)


 80%|███████▉  | 1614/2020 [02:16<00:25, 16.15it/s]

New Grade System Found: [7510990: ['AI4']], in route NID: 3914398860


 81%|████████  | 1640/2020 [02:18<00:29, 12.74it/s]

Gym Found: 攀岩梦工厂 (3307108638)


 82%|████████▏ | 1651/2020 [02:18<00:26, 13.80it/s]

New Grade Example Found for System [7510984: D+], in route NID: 4429645242


 82%|████████▏ | 1660/2020 [02:19<00:28, 12.60it/s]

Gym Found: Roaring Climbing Gym 啸岩馆 (2320918083)


 82%|████████▏ | 1664/2020 [02:19<00:24, 14.57it/s]

Gym Found: 攀王室内攀岩 (2320918191)


 84%|████████▎ | 1687/2020 [02:21<00:18, 17.82it/s]

Gym Found: 大仁攀岩 (9255624135)


 85%|████████▌ | 1725/2020 [02:24<00:22, 12.96it/s]

New Grade Example Found for System [7510984: B], in route NID: 10825719168


 86%|████████▌ | 1731/2020 [02:25<00:21, 13.39it/s]

Gym Found: 魔岩攀登 (2320917705)


 86%|████████▌ | 1740/2020 [02:26<00:21, 12.90it/s]

Gym Found: 岩十三攀岩 (9543079563)


 87%|████████▋ | 1765/2020 [02:28<00:19, 13.03it/s]

New Grade Example Found for System [7511008: F], in route NID: 3131898069
New Grade Example Found for System [7510948: A3], in route NID: 8090918310
New Grade Example Found for System [7510948: A2+], in route NID: 8091043290


 88%|████████▊ | 1778/2020 [02:29<00:17, 13.92it/s]

Gym Found: 常春藤攀岩馆 (10040572581)


 89%|████████▊ | 1792/2020 [02:30<00:13, 16.69it/s]

New Grade System Found: [621218338: ['IV']], in route NID: 3137607339
New Grade Example Found for System [7510894: E6], in route NID: 3137607339
New Grade Example Found for System [7510954: C2+], in route NID: 3137633550


 91%|█████████ | 1830/2020 [02:33<00:19,  9.65it/s]

New Grade Example Found for System [7510888: 5], in route NID: 3680709681


 91%|█████████▏| 1844/2020 [02:34<00:13, 13.28it/s]

New Grade Example Found for System [285768865: D], in route NID: 3977592612


 92%|█████████▏| 1851/2020 [02:35<00:11, 14.55it/s]

New Grade Example Found for System [7510864: 5.9], in route NID: 8593244622
Gym Found: 变色龙攀岩 (3365086374)


 92%|█████████▏| 1862/2020 [02:36<00:18,  8.51it/s]

New Grade Example Found for System [208414621: 7C], in route NID: 3401122443
New Grade Example Found for System [208414621: 7C+], in route NID: 3401122443


 93%|█████████▎| 1878/2020 [02:37<00:10, 13.57it/s]

Gym Found: 9A攀岩(七宝万科广场店) (2320925832)


 94%|█████████▍| 1904/2020 [02:40<00:11, 10.00it/s]

Gym Found: 尽峰攀岩 (2320917975)
New Grade Example Found for System [7511002: M4], in route NID: 5716739328


 95%|█████████▍| 1910/2020 [02:40<00:07, 14.43it/s]

Gym Found: 岩舞空间攀岩(梅赛德斯奔驰文化中心) (2320917867)
New Grade Example Found for System [7510960: VB+], in route NID: 3175125732


 95%|█████████▌| 1926/2020 [02:42<00:07, 12.29it/s]

Gym Found: 家家乐国际攀岩中心 (2320925886)


 96%|█████████▋| 1947/2020 [02:43<00:04, 15.95it/s]

Gym Found: Red Gym Climbing 红攀攀岩体验中心 (2320918029)


 97%|█████████▋| 1969/2020 [02:45<00:04, 11.19it/s]

New Grade Example Found for System [7510870: Class 5], in route NID: 1129861155
Gym Found: Ribbon Climbing Gym 叶岩攀岩馆 (2320917759)


 98%|█████████▊| 1989/2020 [02:47<00:02, 12.97it/s]

Gym Found: High Five Climbing Gym (2320918137)
Gym Found: Redpoint Climbing Gym红点攀岩 (7627803783)


100%|█████████▉| 2013/2020 [02:49<00:00, 15.51it/s]

Gym Found: PONGO Climbing Gym (2317345350)
New Grade Example Found for System [7510990: AI2], in route NID: 4429666830


100%|██████████| 2020/2020 [02:49<00:00, 11.90it/s]


In [28]:
print(grad_system_examples)
print(grad_system_to_nid_map)
print(grad_atom_keys)
print(history_what_set)

defaultdict(<class 'set'>, {'7510858': {'5.13b', '5.11b', '5.9', '5.11a', '5.13c', '--', '5.7', '5.15a', '5.10a', '5.11d', '5.12d', '5.10c', '5.3', '5.4', '5.13d', '5.0', '5.6', '5.14b', '5.2', '5.12c', '5.11c', '5.13a', '5.1', '5.14c', '5.5', '5.10d', '5.10b', '5.14d', '5.12b', '5.12a', '5.8', '5.14a'}, '7510864': {'5.10+', '5.14-', '5.12+', '5.13', '5.14', '5.11+', '5.9', '5.11', '5.12', '5.13+', '5.12-', '5.14+', '5.10-', '5.13-', '5.11-', '5.5', '5.10'}, '7510954': {'C2', 'C1', 'C2+', 'C1+', 'C3'}, '7510948': {'A4+', 'A2', 'A3+', 'A0', 'A2+', 'A1+', 'A1', 'A3'}, '7510960': {'VB', 'VB+', 'V7', 'V3', 'V0', 'V9', 'V1', 'V11', 'V12', 'VB-', 'V6', 'V0-', 'V0+', 'V5', 'V4', 'V8', 'V2', 'V10'}, '7511020': {'R', 'PG13', 'G', 'X', 'PG'}, '285768865': {'A', 'D', 'B', 'C'}, '7510882': {'6c', '5a', '8b+', '5b', '4b', '9b', '5c', '6b', '7a+', '7a', '7c+', '9a+', '8a+', '8a', '8c', '7b+', '6b+', '5c+', '6a+', '8b', '8c+', '7c', '9a', '6a', '6c+', '7b'}, '7510996': {'WI3-', 'WI6', 'WI5', 'WI5+', 

In [29]:
with open("/Users/liao/myProjects/VSCode_workspace/rockbook/resources/thecrag_nodes.json", "w") as f:
    f.write(json.dumps(root_node, default=lambda o: o.__dict__, indent=2, ensure_ascii=False))