In [1]:
include("_.jl")

./1.example.md ==> ../input/1.example.md
./hierarchy/ ==> ../input/hierarchy/


make_id (generic function with 1 method)

In [2]:
tr = SE["hierarchy"]

"../input/hierarchy/"

In [3]:
em = SE["example_md"]

he_ = [li for li in readlines(em) if li != ""]

3-element Vector{String}:
 "# ."
 "# <"
 "# >"

In [4]:
println("Enforcing name format")

for (ro, di_, fi_) in walk_up(tr)

    for na in [di_; fi_]

        cl = Kwat.path.clean(na)

        if na != cl

            move(joinpath(ro, na), joinpath(ro, cl))

        end
        
        if occursin(r"^[0-9]+\.[0-9]+[_0-9a-z]+(\.md$|$)", na)

            println("Bad name? ", na)
            
        end
        
    end

end

Enforcing name format
Bad name? 2.22q11_syndrome.md
Bad name? 2.5a_reductase_deficiency.md
Bad name? 2.46xx
Bad name? 3.46xy


In [5]:
println("Enforcing name numbering")

for (ro, di_, fi_) in walk_up(tr)

    go_ = Vector{String}()

    ba_ = Vector{String}()

    for na in [di_; fi_]

        if is_good_structure(na)

            push!(go_, na)

        else

            push!(ba_, na)

        end

    end

    sort!(go_; by = split_good_structure)

    n_go = length(go_)

    for (id, na) in enumerate([go_; ba_])

        pa1 = joinpath(ro, na)

        if id <= n_go

            pa2 = joinpath(ro, join([id; split_good_structure(na)[2:end]], "."))

        else

            pa2 = joinpath(ro, join([id, na], "."))

        end

        if pa1 != pa2

            move(pa1, pa2)

        end

    end

end

Enforcing name numbering


In [6]:
println("Enforcing 1._.md")

for (ro, di_, fi_) in walk_up(tr)

    for fi in fi_

        id, ti = split(fi, ".")

        pa1 = joinpath(ro, fi)

        pa2 = joinpath(ro, string("1._.md"))

        if ti == split(splitdir(ro)[2], ".")[2] || ti == "_" && id != "1"

            move(pa1, pa2)

        end

    end

end

Enforcing 1._.md


In [7]:
println("Checking .md content")

for (ro, di_, fi_) in walk_up(tr)

    for fi in fi_

        pa = joinpath(ro, fi)

        li_ = readlines(pa)

        if length(li_) == 0

            println("Is empty (populating): ", pa)

            cp(em, pa; force = true)

        elseif !all(he in li_ for he in he_)

            #error("Is missing at least 1 required header: ", pa)

        elseif !all(0 .< diff([findfirst(li_ .== he) for he in he_]))

            error("Has incorrectly ordered headers: ", pa)

        end

    end

end

Checking .md content


In [8]:
println("IDing .md")

ti_id = Dict{String,Vector{Int64}}()

hi = splitdir(dirname(tr))[2]

for (ro, di_, fi_) in walk_up(tr)

    for fi in fi_

        id, ti = split(fi, ".")

        if ti == "_"

            if id != "1"

                error("ID is not 1: ", fi)

            end

            ti = split(splitdir(ro)[2], ".")[2]

        end

        pa = joinpath(ro, fi)

        if haskey(ti_id, ti)

            error("Duplicated: ", ti_id[ti], " and ", pa)

        else
            
            ti_id[ti] = make_id(pa, hi)

        end

    end

end

IDing .md


In [9]:
na_ = sort(collect(keys(ti_id)))

1779-element Vector{String}:
 "22q11_syndrome"
 "46xx"
 "46xy"
 "5a_reductase_deficiency"
 "a_antitrypsin_deficiency"
 "abdominal_distension"
 "abdominal_obesity"
 "abdominal_pain"
 "abruptio_placenta"
 "absent_axillary_hair"
 "absent_pubic_hair"
 "acalculous_cholecystitis"
 "acetaminophen"
 ⋮
 "xanthelasma"
 "xanthogranulomatous_pyelonephritis"
 "xanthoma"
 "xxy"
 "yellow_fever_virus"
 "yersinia_pestis"
 "yolk_sac_tumor"
 "young_adult"
 "zenker_diverticulum"
 "zika_virus"
 "ziprasidone"
 "zollinger_ellison_syndrome"

In [10]:
println("Checking name")

for wo in SE["words_to_check"]
    
    println("-" ^ 80)
    
    for na in na_
        
        if occursin(Regex("(?<![a-z])$wo(?![a-z])"), na)

            println(na)

        end
        
    end
    
    println()

end

Checking name
--------------------------------------------------------------------------------
decreased_appetite
decreased_breath_sound
decreased_heart_sound
decreased_sexual_drive
decreased_weight

--------------------------------------------------------------------------------
increased_weight

--------------------------------------------------------------------------------
low_bone_density
low_diet_ca
low_diet_carbohydrate
low_diet_fe
low_diet_i
low_diet_k
low_diet_vitamin_b1
low_diet_vitamin_b12
low_diet_vitamin_b3
low_diet_vitamin_b6
low_diet_vitamin_b9
low_diet_vitamin_d
low_hemoglobin_a1c
low_pulse_pressure
low_serum_acth
low_serum_adh
low_serum_aldosterone
low_serum_bicarbonate
low_serum_ca
low_serum_cholecystokinin
low_serum_cholesterol
low_serum_clotting_factor
low_serum_cortisol
low_serum_crh
low_serum_ferritin
low_serum_free_t4
low_serum_fsh
low_serum_gastric_inhibitory_peptide
low_serum_gastrin
low_serum_gh
low_serum_glucagon
low_serum_hdl
low_serum_hemoglobin
low_serum_i

In [11]:
be_af = Dict(be => af for (be, af) in SE["renaming_pairs"])

Dict{String, String} with 1 entry:
  "" => ""