# Preprocessing
This file takes the raw JSON file data from the data folder and will output a clean, usable, and readable CSV file. This will be performed on each artist in order to get appropriate data for them.

In [49]:
# import packages
import json
import csv
from datetime import datetime

In [50]:
# select raw data file path and desired csv file path. select artist name to use later on
# this was collected for the following artists: Miley Cyrus, Selena Gomez, Jonas Brothers, Demi Lovato
# Vanessa Hudgens, Bridget Mendler, Dove Cameron, Ross Lynch

json_file_path='../DATA/Raw JSON Data/vanessa_hudgens.json'
csv_file_path='../DATA/monthly_listeners_final.csv'
artist_name='Vanessa Hudgens' # change for every artist

In [51]:
# load json file and extract popularity and monthly listeners list
with open(json_file_path) as json_file:
    data = json.load(json_file)

popularity = data["popupInfo"]["seriesData"][0]['data']
monthly_listeners = data["popupInfo"]["seriesData"][1]['data']

In [52]:
# write to CSV file the artist name, date, and number of monthly listeners
# reference: https://www.geeksforgeeks.org/convert-json-to-csv-in-python/
## in append mode so that different artist data can be added to same file. If desired to overwrite, use mode='w'
with open(csv_file_path, mode='a', newline='') as csv_file:
    csv_writer = csv.writer(csv_file)
    csv_writer.writerow(['Artist', 'Date', 'Monthly Listeners'])
    # turn json date values into user-readable attribute and check if values are between 2021-2024
    cutoff_timestamp = datetime(2021, 1, 1).timestamp()
    for entry in monthly_listeners:
        if len(entry) == 2:
            timestamp = entry[0]/1000
            if timestamp >= cutoff_timestamp:
                readable_date=datetime.fromtimestamp(timestamp).strftime('%m-%d-%Y')
                monthly_listeners=entry[1]
                csv_writer.writerow([artist_name, readable_date, monthly_listeners])