In [1]:
import json
import pandas as pd

# Load tunes metadata
with open("../data/TheSession-data/json/tunes.json", "r") as f:
    tunes = json.load(f)

# Convert to DataFrame
tunes_df = pd.DataFrame(tunes)

print(tunes_df.head())
print(tunes_df['type'].value_counts())

  tune_id setting_id                  name        type meter         mode  \
0   18105      35234        $150 Boot, The       polka   2/4       Gmajor   
1   11931      11931  'G Iomain Nan Gamhna    slip jig   9/8       Gmajor   
2   11931      48772  'G Iomain Nan Gamhna    slip jig   9/8  Amixolydian   
3   15326      28560         'S Ann An Ìle  strathspey   4/4       Gmajor   
4   15326      28582         'S Ann An Ìle  strathspey   4/4       Gmajor   

                                                 abc                 date  \
0  |:d>g fe|dB AG|E/F/G E/F/G|BA GF|\r\nd>g fe|dB...  2019-07-06 04:39:09   
1  dBB B2 A BAG|dBB Bcd efg|dBB B2 A BAG|eAA dBG ...  2012-05-17 07:49:26   
2  |:dBB BBA BAG|dBB Bcd efg|dBB BBA BAG|e2A dBG ...  2023-11-25 22:54:00   
3  |:G>A B>G c>A B>G|E<E A>G F<D D2|G>A B>G c>A B...  2016-03-31 15:34:45   
4  uD2|:{F}v[G,2G2]uB>ud c>A B>G|{D}E2 uA>uG F<D ...  2016-04-03 09:15:08   

       username  
0  NfldWhistler  
1  iliketurtles  
2   birlibirdie  
3 

In [2]:
import os
import json
import pandas as pd
from tqdm import tqdm

# Path setup (update if your structure changes)
BASE_DIR = "/Users/wcramer2/Desktop/irish-music-analysis/data/TheSession-data/json"
TUNES_FILE = os.path.join(BASE_DIR, "tunes.json")
SETTINGS_DIR = os.path.join(BASE_DIR, "tune-settings")

# Load tunes metadata
with open(TUNES_FILE, "r") as f:
    tunes = json.load(f)

tune_records = []

print(f"Processing {len(tunes)} tunes...")

for tune in tqdm(tunes):
    tune_id = tune.get("id")
    setting_path = os.path.join(SETTINGS_DIR, f"{tune_id}.json")

    # Default ABC info
    abc = key = meter = None

    if os.path.exists(setting_path):
        with open(setting_path, "r") as sf:
            try:
                settings = json.load(sf)
                if settings and isinstance(settings, list):
                    first = settings[0]
                    abc = first.get("abc")
                    key = first.get("key")
                    meter = first.get("meter")
            except Exception as e:
                print(f"[!] Failed to read {setting_path}: {e}")
    
    tune_records.append({
        "id": tune_id,
        "name": tune.get("name"),
        "type": tune.get("type"),
        "date": tune.get("date"),
        "key": key,
        "meter": meter,
        "abc": abc
    })

# Create DataFrame
df = pd.DataFrame(tune_records)

# Save merged CSV
output_path = "data/processed/merged_tunes.csv"
os.makedirs(os.path.dirname(output_path), exist_ok=True)
df.to_csv(output_path, index=False)

print(f"[✓] Saved merged tune data to {output_path}")


Processing 51128 tunes...


100%|████████████████████████████████████████████████████████████████████████████| 51128/51128 [00:00<00:00, 376844.50it/s]


[✓] Saved merged tune data to data/processed/merged_tunes.csv


In [4]:
import json
import pandas as pd
import os

# Path to your JSON file
json_path = "/Users/wcramer2/Desktop/irish-music-analysis/data/TheSession-data/json/tunes.json"
output_path = "../data/processed/parsed_tunes.csv"

# Load the JSON data
with open(json_path, "r") as f:
    tunes = json.load(f)

# Convert to DataFrame
df = pd.DataFrame(tunes)

# Keep only relevant columns
df = df[["tune_id", "name", "type", "meter", "mode", "abc", "date", "username"]]

# Clean missing or blank ABCs
df = df[df["abc"].notnull() & (df["abc"].str.strip() != "")]

# Save for analysis
os.makedirs(os.path.dirname(output_path), exist_ok=True)
df.to_csv(output_path, index=False)

print(f"[✓] Saved {len(df)} parsed tunes to {output_path}")


[✓] Saved 51128 parsed tunes to data/processed/parsed_tunes.csv
