# Read dataset 
from RMRK 2 consolidated dump

In [None]:
import json

# see https://docs.rmrk.app/syncing/#consolidation -> "RMRK 2 consolidated dump" for files
with open('rmrk20211118.json') as json_file:
    data = json.load(json_file)

In [None]:
for key in data.keys():
    if type(data[key]) == int:
        print(key, '=', data[key])
    else:
        print(key, ': ', len(data[key]), 'items')

In [None]:
for collection in data['collections']:
    print(collection)

In [None]:
collection_id = 'e0b9bdcc456a36497a-KANBIRD'
birds_number = 0
birds = list()
for nft_key in data['nfts'].keys():
    if collection_id in nft_key:
        birds.append(data['nfts'][nft_key])
        birds_number = birds_number + 1
print(birds_number)
print("The description states: 8478 limited edition single-mint NFTs")

## Examples

In [None]:
bird_index = 2878
print(int(birds[bird_index]["id"][-8:]))
print(json.dumps(birds[bird_index]["resources"][0]["parts"], indent=4, sort_keys=True))
print(json.dumps(birds[bird_index]["resources"][0]["themeId"], indent=4, sort_keys=True))

In [None]:
bird_index = 11
print(json.dumps(birds[bird_index], indent=4, sort_keys=True))

In [None]:
birds_for_sale = list()
for bird_index in range(len(birds)):
    if len(birds[bird_index]['changes']) > 1:
        birds_for_sale.append(bird_index)

In [None]:
for bird_index in birds_for_sale:
    print(bird_index)
    print(json.dumps(birds[bird_index]['changes'], indent=4, sort_keys=True))

In [None]:
bird_index = 1001
print(json.dumps(birds[bird_index], indent=4, sort_keys=True))

In [None]:
print(json.dumps(birds[bird_index]["metadata"], indent=4, sort_keys=True))

* "1f970_beak", +/2 (face)
* "var2_body", +
* "1f970_eyes", +/2 (face)
* "var2_footLeft", +/2 (feet)
* "var2_footRight", +/2 (feet)
* "var2_handLeft", - (items?)
* "var2_handRight", - (items?)
* "2600_head", +
* "2600_tail", +
* "var2_wingLeft", +
* "var2_wingRight", +

and

* "themeId"

## Construct table

In [None]:
tails = list()
tops = list()
bodies = list()

def get_row(bird):
    bird_id = int(bird["id"][-8:])
    theme = bird["resources"][0]["themeId"]
    parts_list = bird["resources"][0]["parts"]
    parts_list = [x.lower() for x in parts_list]
    for key_string in parts_list:
        if key_string.find('_head') != -1:
            head = key_string[:-5]
        if key_string.find('_eyes') != -1:
            eyes = key_string[:-5]
        if key_string.find('_body') != -1:
            body = key_string[:-5]
            bodies.append(body)
        has_tail = False
        if key_string.find('_tail') != -1:
            tail = key_string[:-5]
            has_tail = True
            tails.append(tail)
        has_top = False
        if key_string.find('_top_rare') != -1:
            tail = key_string[:-9]
            has_top = True
            tops.append(tail)
        if has_tail and has_top:
            print(bird_id)
        if key_string.find('_wingleft') != -1:
            wingLeft = key_string[:-9]
        if key_string.find('_handleft') != -1:
            handLeft = key_string[:-9]
        if key_string.find('_wingright') != -1:
            wingRight = key_string[:-10]
        if key_string.find('_handright') != -1:
            handRight = key_string[:-10]
        if key_string.find('_footleft') != -1:
            footLeft = key_string[:-9]
        if key_string.find('_footright') != -1:
            footRight = key_string[:-10]
        if key_string.find('_beak') != -1:
            beak = key_string[:-5]
    # print(bird_id, parts_list)
    assert footLeft == footRight, bird_id
    assert wingLeft == handLeft, bird_id
    assert wingRight == handRight, bird_id
    feet = footLeft
    return [bird_id, theme, head, eyes, body, tail, wingLeft, wingRight, feet, beak]

In [None]:
import csv

column_names = ['bird_id', 'theme', 'head', 'eyes', 'body', 'tail', 'wingLeft', 'wingRight', 'feet', 'beak']
birds_dataset = list()
for bird in birds:
    birds_dataset.append(get_row(bird))
    
with open('birds_dataset.csv', 'w', newline='') as birds_file:
    bird_writer = csv.writer(birds_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
    bird_writer.writerow(column_names)
    for row_index in range(len(birds_dataset)):
        bird_writer.writerow(birds_dataset[row_index])

birds_dataset = list()
with open('birds_dataset.csv') as csv_file:
    csv_reader = csv.reader(csv_file, delimiter=',')
    line_count = 0
    for row in csv_reader:
        if line_count == 0:
            print(f'Column names are {", ".join(row)}')
            column_names = row
            line_count += 1
        else:
            birds_dataset.append(row)
            line_count += 1
    print(f'Processed {line_count} lines.')

## Interesting stats

In [None]:
print(len(tails), len(bodies), len(tops))
print("There are tails and tops with the following same names:", list(set(tails) & set(tops)))
print("There are bodies and tops with the following same names:", list(set(tails) & set(tops)))
print(set(tops))

with open('tails.txt','w') as file:
    file.write(str(set(tops)))
    
import ast
with open('tails.txt','r') as file:
    tops_set = ast.literal_eval(file.read())
print(tops_set)

# Dataset with alias instead of strings

In [None]:
from itertools import count
from collections import defaultdict

rows_number = len(birds_dataset)
columns_number = len(birds_dataset[0])
birds_traits_list = list()
for row_index in range(rows_number):
    for column_index in range(1, columns_number):
        birds_traits_list.append(birds_dataset[row_index][column_index])

mapping = defaultdict(count().__next__)
result = list()
for element in birds_traits_list:
    result.append(mapping[element])

birds_dataset_alias = list()
for row_index in range(rows_number):
    current_row = [birds_dataset[row_index][0]]
    current_row.extend(result[row_index * (columns_number - 1): (row_index + 1) * (columns_number - 1)])
    birds_dataset_alias.append(current_row)
    
with open('birds_dataset_alias.csv', 'w', newline='') as birds_file:
    bird_writer = csv.writer(birds_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
    bird_writer.writerow(column_names)
    for row_index in range(len(birds_dataset_alias)):
        bird_writer.writerow(birds_dataset_alias[row_index])

In [None]:
birds_dataset_alias = list()
with open('birds_dataset_alias.csv') as csv_file:
    csv_reader = csv.reader(csv_file, delimiter=',')
    line_count = 0
    for row in csv_reader:
        if line_count == 0:
            print(f'Column names are {", ".join(row)}')
            column_names = row
            line_count += 1
        else:
            birds_dataset_alias.append(row)
            line_count += 1
    print(f'Processed {line_count} lines.')

## Pie plots for traits

In [None]:
import numpy as np
from collections import Counter
import matplotlib.pyplot as plt

birds_dataset = np.array(birds_dataset)
for column_index in range(1, columns_number):
    entries = Counter(birds_dataset[:, column_index])
    print(column_names[column_index])
    plt.pie(entries.values(), labels = entries.keys())
    plt.show()