In [4]:
import pandas as pd
import json

def open_file_and_extract(file_name):
    with open(file_name) as f:
        json_data = json.load(f)
        
    columns_count = {}
    
    # Get the max number of occurances of a trait across all NFTs
    for column in json_data['columns']:
        for distribution in json_data['distributions']:
            result = [x for x in distribution["totalScoreDistribution"] if x["trait_type"] == column]
        
            if column in columns_count:
                columns_count[column] = max(columns_count[column], len(result))
            else:
                columns_count[column] = len(result)

    # Create columns list for data frame with max occurances accounted for
    data_frame_columns = []
    for k in columns_count:
        for i in range(1, columns_count[k] + 1):
            data_frame_columns.append(k)

    # Get list of indexes by tokenId
    indexes = [distribution['tokenId'] for distribution in json_data['distributions']]

    # Create data frame with columns and tokenIds as indexes
    data_frame = pd.DataFrame(columns=[*data_frame_columns], index=indexes)
    data_frame.index.name = 'token_id'

    # Loop through data frame columns to fill in data from opened file
    for index, token in enumerate(data_frame.index):
        row = []

        # Format data per row
        for key in columns_count:
            result = [x['score'] for x in json_data['distributions'][index]["totalScoreDistribution"] if x["trait_type"] == key]

            # Fill remaining columns with 0s if no other data is available
            while len(result) < columns_count[key]:
                result.append(0)
        
            row += result

        # Fill entire row of data frame with newly formatted row
        data_frame.loc[token] = row

    return data_frame
    
data_frame = open_file_and_extract('./score_distributions/DOODLES_totalScoreDistribution.json')

data_frame.to_csv('DOODLES.csv')