In [1]:
from concurrent.futures import ThreadPoolExecutor
import json
import sqlite3

def connect_db(db_name):
    conn = sqlite3.connect(db_name)
    return conn

def insert_into_playlists(conn, playlist_data_batch):
    cur = conn.cursor()
    cur.executemany('''
        INSERT OR IGNORE INTO playlists (
            playlist_id,
            p_name, 
            p_num_tracks, 
            p_num_albums, 
            p_num_followers, 
            p_num_artists, 
            p_duration_ms, 
            p_num_edits, 
            p_modified_at, 
            p_collaborative
        )
        VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
    ''', playlist_data_batch)
    conn.commit()

# Similar functions for other tables...
""" def insert_into_tracks(conn, track_data_batch):
    cur = conn.cursor()
    print(f"Track Data Batch: {track_data_batch}")
    cur.executemany('''INSERT OR IGNORE INTO tracks (t_uri, t_name, t_duration_ms, artist_name, artist_uri, album_uri) VALUES (?, ?, ?, ?, ?, ?)''', track_data_batch)
    conn.commit()

def insert_into_albums(conn, album_data_batch):
    cur = conn.cursor()
    print(f"Album Data Batch: {album_data_batch}")
    cur.executemany('''INSERT OR IGNORE INTO albums (album_uri, album_name) VALUES (?, ?)''', album_data_batch)
    conn.commit() """

def insert_into_artists(conn, artist_data_batch):
    cur = conn.cursor()
    cur.executemany('''INSERT OR IGNORE INTO artists (artist_uri, artist_name) VALUES (?, ?)''', artist_data_batch)
    conn.commit()

def insert_into_playlist_tracks(conn, playlist_track_data_batch):
    cur = conn.cursor()
    cur.executemany('''INSERT OR IGNORE INTO playlist_tracks (playlist_id, t_uri, position) VALUES (?, ?, ?)''', playlist_track_data_batch)
    conn.commit()

def insert_into_albums(conn, album_data_batch):
    cur = conn.cursor()
    try:
        cur.executemany('''INSERT OR IGNORE INTO albums (album_uri, album_name) VALUES (?, ?)''', album_data_batch)
        conn.commit()
        print("Successfully inserted into albums.")
    except sqlite3.Error as e:
        print(f"SQLite error while inserting into albums: {e}")
        conn.rollback()

def insert_into_tracks(conn, track_data_batch):
    cur = conn.cursor()
    try:
        cur.executemany('''INSERT OR IGNORE INTO tracks (t_uri, t_name, t_duration_ms, artist_name, artist_uri, album_uri) VALUES (?, ?, ?, ?, ?, ?)''', track_data_batch)
        conn.commit()
        print("Successfully inserted into tracks.")
    except sqlite3.Error as e:
        print(f"SQLite error while inserting into tracks: {e}")
        conn.rollback()


def process_file(json_file):
    playlist_data_batch = []
    track_data_batch = []
    album_data_batch = []
    artist_data_batch = []
    playlist_track_data_batch = []

    
    with open(json_file, 'r') as f:
        print(f"Processing {json_file}...")
        data = json.load(f)

    for playlist in data['playlists']:
        playlist_data = (
            playlist.get('pid'),
            playlist.get('name'),
            playlist.get('num_tracks'),
            playlist.get('num_albums'),
            playlist.get('num_followers'),
            playlist.get('num_artists'),
            playlist.get('duration_ms'),
            playlist.get('num_edits'),
            playlist.get('modified_at'),
            playlist.get('collaborative')
        )
        playlist_data_batch.append(playlist_data)

        for track in playlist['tracks']:
            track_data = (
                track.get('track_uri'),
                track.get('track_name'),
                track.get('duration_ms'),
                track.get('artist_name'),
                track.get('artist_uri'),
                track.get('album_uri')
            )
            track_data_batch.append(track_data)

            album_data = (track.get('album_uri'), track.get('album_name'))
            album_data_batch.append(album_data)

            artist_data = (track.get('artist_uri'), track.get('artist_name'))
            artist_data_batch.append(artist_data)

            playlist_track_data = (playlist.get('pid'), track.get('track_uri'), track.get('pos'))
            playlist_track_data_batch.append(playlist_track_data)

    conn = connect_db('playlist_analysis.db')

    print(f"Length of album_data_batch: {len(album_data_batch)}")
    print(f"Length of track_data_batch: {len(track_data_batch)}")

    
    insert_into_playlists(conn, playlist_data_batch)
    insert_into_albums(conn, album_data_batch)
    insert_into_artists(conn, artist_data_batch)
    insert_into_tracks(conn, track_data_batch)
    insert_into_playlist_tracks(conn, playlist_track_data_batch)
    conn.commit()  

def check_data(conn):
    cur = conn.cursor()
    cur.execute("SELECT COUNT(*) FROM albums")
    print(f"Number of rows in albums: {cur.fetchone()[0]}")
    cur.execute("SELECT COUNT(*) FROM tracks")
    print(f"Number of rows in tracks: {cur.fetchone()[0]}")

def main():
    conn = connect_db('playlist_analysis.db')
    # Create a list of JSON files
    path = '/Users/fried/Documents/DataScience/4season/DSP/Spotify/Datensatz/playlists/mpd.slice.'
    json_file_list = [f"{path}{i * 1000}-{(i + 1) * 1000 - 1}.json" for i in range(25)]

    with ThreadPoolExecutor() as executor:
        executor.map(process_file, json_file_list)
    
if __name__ == '__main__':
    main()
    conn = connect_db('playlist_analysis.db')
    # Check the data after processing all files
    check_data(conn)
    conn.close()

Processing /Users/fried/Documents/DataScience/4season/DSP/Spotify/Datensatz/playlists/mpd.slice.0-999.json...
Processing /Users/fried/Documents/DataScience/4season/DSP/Spotify/Datensatz/playlists/mpd.slice.1000-1999.json...
Processing /Users/fried/Documents/DataScience/4season/DSP/Spotify/Datensatz/playlists/mpd.slice.2000-2999.json...
Processing /Users/fried/Documents/DataScience/4season/DSP/Spotify/Datensatz/playlists/mpd.slice.3000-3999.json...
Processing /Users/fried/Documents/DataScience/4season/DSP/Spotify/Datensatz/playlists/mpd.slice.4000-4999.json...
Processing /Users/fried/Documents/DataScience/4season/DSP/Spotify/Datensatz/playlists/mpd.slice.5000-5999.json...
Processing /Users/fried/Documents/DataScience/4season/DSP/Spotify/Datensatz/playlists/mpd.slice.6000-6999.json...
Processing /Users/fried/Documents/DataScience/4season/DSP/Spotify/Datensatz/playlists/mpd.slice.7000-7999.json...
Processing /Users/fried/Documents/DataScience/4season/DSP/Spotify/Datensatz/playlists/mpd.sl