reformat metadata for https://www.youtube.com/watch?v=i0b29lAuMlg&feature=youtu.be

In [10]:
from classical_music import *

file_txt = "100-classical-pieces.txt"

file_html = file_txt.replace(".txt", ".html")

In [11]:
namemap = read_namemap()

In [12]:
meta_map, lines = parse_file_txt(file_txt)

meta_map, lines[:2]

({'vid': 'K3He_GSvFzY',
  'vid_name': '100 Classical Pieces',
  'channel_id': 'UCyOfqgtsQaM3S-VZnsYnHjQ',
  'channel_name': 'HALIDONMUSIC'},
 ['### THE BAROQUE PERIOD',
  '1-3 Vivaldi - The Four Seasons, ‘Spring’ / I. Allegro 00:00 '])

In [13]:
# this is data file specific
def parse_line(l):
    """parse text-line into data elements
    
    sample:
        11-13 Bach - Harpsichord Concerto No. 1 (Arr. for Piano) I. Allegro 28:19 
        
    returns:
        [id, ts, name, title]
    """
    tmp = l.split(" - ")
    tmp1 = tmp[0].split(" ")
    _id, last_name = tmp1[0], " ".join(tmp1[1:])
    tmp2 = [i.strip() for i in tmp[-1].split(" ") if i.strip()]
    title = " ".join(tmp2[:-1])
    ts = tmp2[-1]
    return [_id.strip(), ts.strip(), last_name.strip(), title.strip()]

In [14]:
line = "11-13 Bach - Harpsichord Concerto No. 1 (Arr. for Piano) I. Allegro 28:19 "
d = parse_line(line)
d

['11-13',
 '28:19',
 'Bach',
 'Harpsichord Concerto No. 1 (Arr. for Piano) I. Allegro']

In [15]:
records = []
for i in lines:
    i = i.strip()
    if not i: 
        continue
        
    # handle header
    if "###" in i:
        records.append([i])
        continue
        
    d = parse_line(i)
    if d[2] not in namemap:
        print(f"[ERROR] {d[2]} not found in namemap")
    else:
        records.append([d[0].replace(".", ""), 
                        make_youtube_link(meta_map["vid"], d[1]), 
                        make_href(namemap[d[2]]["fullname"] ,namemap[d[2]]["url"]), 
                        d[-1]]
                    )

[ERROR] Floridia not found in namemap
[ERROR] Bach (attr.) not found in namemap
[ERROR] Einaudi not found in namemap
[ERROR] Rota not found in namemap
[ERROR] Lanzetta not found in namemap
[ERROR] Rota not found in namemap


In [16]:
records[:3]

[['### THE BAROQUE PERIOD'],
 ['1-3',
  '<a href=https://www.youtube.com/watch?v=K3He_GSvFzY&t=0s target=new>00:00</a>',
  '<a href=https://www.wikiwand.com/en/Antonio_Vivaldi target=new>Antonio Vivaldi</a>',
  'The Four Seasons, ‘Spring’ / I. Allegro'],
 ['1-3',
  '<a href=https://www.youtube.com/watch?v=K3He_GSvFzY&t=195s target=new>03:15</a>',
  '<a href=https://www.wikiwand.com/en/Antonio_Vivaldi target=new>Antonio Vivaldi</a>',
  'The Four Seasons, ‘Spring’ / II. Largo e pianissimo sempre']]

In [17]:
# write out HTML file
with open(file_html, "w") as f:
    
    f.write(f"""
        <h1>
        <a href=https://www.youtube.com/watch?v={meta_map["vid"]}>{meta_map["vid_name"]}</a> </h1> 
        <h2>by <a href=https://www.youtube.com/channel/{meta_map["channel_id"]}>{meta_map["channel_name"]}</a>
        </h2>
        <br>
    """)

    h3_count = 0
    for r in records:
        # f.write(f"r = {r}\n")
        if len(r) == 1:
            
            if h3_count:
                f.write('</table>')                

            f.write(f"""
                <h3>{r[0].replace("###", "")}</h3>
                <table style="width:100%" border=1px>
                  <tr>
                    <th>Id</th>
                    <th>Track TS</th>
                    <th>Musician</th>
                    <th>Title</th>
                  </tr>
            """)
            
            h3_count += 1

        elif len(r) == 4:
            
            f.write(f"""<tr>
                <td>{r[0]}</td>
                <td>{r[1]}</td>
                <td>{r[2]}</td>
                <td>{r[3]}</td>
              </tr>
            """)
            
        else:
            print(f"[ERROR] unknown record {r}")
            
    f.write('</table>')