In [3]:
import csv
import json
import glob
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Scaling and Balancing
from sklearn import preprocessing
from sklearn.preprocessing import PolynomialFeatures
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import make_classification
from imblearn.over_sampling import SMOTE

# Libraries required for classification
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.cluster import k_means

# Libraries required for model evaluation
from sklearn import metrics
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Ridge
from sklearn.model_selection import GridSearchCV

from sklearn.metrics import (accuracy_score, 
                             auc, 
                             precision_score,
                             recall_score,
                             f1_score, 
                             roc_curve,
                             roc_auc_score,
                             confusion_matrix)

In [5]:
#load JSON files
main_dir = '../fsm-data/'
f1 = open(main_dir + 'DidYouCatchTheBall_US.json')
f2 = open(main_dir + 'Fireballs20-US.json')
f3 = open(main_dir + 'Hackers_IN.json')
f4 = open(main_dir + 'Indianboys_IN.json')
f5 = open(main_dir + 'Indiangirls_IN.json')
f6 = open(main_dir + 'MathParkour_US.json')
f7 = open(main_dir + 'Raistar_IN.json')
f8 = open(main_dir + 'TangramsRace.json')
f9 = open(main_dir + 'TheDice_US.json')
data1 = json.load(f1)
data2 = json.load(f2)
data3 = json.load(f3)
data4 = json.load(f4)
data5 = json.load(f5)
data6 = json.load(f6)
data7 = json.load(f7)
data8 = json.load(f8)
data9 = json.load(f9)

data_arr = [data1, data2, data3, data4, data5, data6, data7, data8, data9]

In [6]:
print(data_arr[0])

{'gameId': 'Did you catch the ball', 'teamCount': 3, 'playersPerTeam': 3, 'username': {'usernameId': ''}, 'visibility': True, 'stateIdCount': 6, 'transitionIdCount': 5, 'connectionIdCount': 9, 'dataLog': False, 'states': [{'stateType': 'START_STATE', 'stateId': 'cd5cc25795171f429f49e183ceed2bb1_start', 'game': 'Did you catch the ball', 'positionX': 659.0, 'positionY': 100.0, 'inputConnections': [], 'outputConnections': ['cd5cc25795171f429f49e183ceed2bb1_connection_1'], 'globalVariables': []}, {'stateType': 'OUTPUT_STATE', 'stateId': 'cd5cc25795171f429f49e183ceed2bb1_state_1', 'game': 'Did you catch the ball', 'positionX': 628.0, 'positionY': 301.0, 'inputConnections': ['cd5cc25795171f429f49e183ceed2bb1_connection_1'], 'outputConnections': ['cd5cc25795171f429f49e183ceed2bb1_connection_2'], 'description': 'State', 'displayText': {'Game Wide': "Welcome to , Did you catch the ball? \nIf you get the question wrong, you'll get a ball thrown at you!\nClick any button to continue to questions"

In [7]:
def is_float(string):
    try:
        float(string)
        return True
    except ValueError:
        return False

#Remove non-integer, non-float, and non-boolean values
number_data = []
for data in data_arr:
    num_dict = {k: v for k, v in data.items() if isinstance(v, (int, float, bool)) or (str(v).isnumeric()) or is_float(str(v))}
    number_data.append(num_dict)

print(number_data[0])
print(number_data[1])
print(number_data[2])

{'teamCount': 3, 'playersPerTeam': 3, 'visibility': True, 'stateIdCount': 6, 'transitionIdCount': 5, 'connectionIdCount': 9, 'dataLog': False}
{'teamCount': 2, 'playersPerTeam': 2, 'visibility': True, 'stateIdCount': 8, 'transitionIdCount': 7, 'connectionIdCount': 12, 'dataLog': False}
{'teamCount': 2, 'playersPerTeam': 3, 'visibility': True, 'stateIdCount': 11, 'transitionIdCount': 10, 'connectionIdCount': 11, 'dataLog': False}


In [8]:
#Convert dicts to CSV files
col_names = ['teamCount', 'playersPerTeam', 'visibility', 'stateIdCount', 'states', 'connections', 'transitions', 'username', 'gameId','transitionIdCount', 'connectionIdCount', 'dataLog']
with open('data.csv', 'w') as csvfile:
    writer = csv.DictWriter(csvfile, fieldnames=col_names)
    writer.writeheader()
    writer.writerows(number_data)