In [16]:
import json
from enum import Enum, auto

import requests

iana_subtags = requests.get("https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry").text
lines = iana_subtags.splitlines()


class ParseState(Enum):
    HEADER = auto()
    RECORDS = auto()


metadata = {}
records = []

current_record = {}
current_key = ""

state = ParseState.HEADER
for line in lines:
    if state == ParseState.HEADER:
        if line == r"%%":
            state = ParseState.RECORDS
            continue

        key, value = line.split(":", 1)
        metadata[key] = value.strip()
    
    if state == ParseState.RECORDS:
        if line == r"%%":
            records.append(current_record)
            current_record = {}
            continue

        if line.startswith("  "):
            current_record[current_key] += " " + line.strip()
            continue

        key, value = line.split(":", 1)
        current_key = key
        if key in current_record and isinstance(current_record[key], str):
            current_record[key] = [current_record[key], value.strip()]
        elif key in current_record and isinstance(current_record[key], list):
            current_record[key].append(value.strip())
        else:
            current_record[key] = value.strip()

if current_record != {}:
    records.append(current_record)

print(json.dumps({"Metadata": metadata, "Records": records}, indent=2))

{
  "Metadata": {
    "File-Date": "2023-08-02"
  },
  "Records": [
    {
      "Type": "language",
      "Subtag": "aa",
      "Description": "Afar",
      "Added": "2005-10-16"
    },
    {
      "Type": "language",
      "Subtag": "ab",
      "Description": "Abkhazian",
      "Added": "2005-10-16",
      "Suppress-Script": "Cyrl"
    },
    {
      "Type": "language",
      "Subtag": "ae",
      "Description": "Avestan",
      "Added": "2005-10-16"
    },
    {
      "Type": "language",
      "Subtag": "af",
      "Description": "Afrikaans",
      "Added": "2005-10-16",
      "Suppress-Script": "Latn"
    },
    {
      "Type": "language",
      "Subtag": "ak",
      "Description": "Akan",
      "Added": "2005-10-16",
      "Scope": "macrolanguage"
    },
    {
      "Type": "language",
      "Subtag": "am",
      "Description": "Amharic",
      "Added": "2005-10-16",
      "Suppress-Script": "Ethi"
    },
    {
      "Type": "language",
      "Subtag": "an",
      "Description": "A