In [None]:
%pip install autogluon



In [None]:
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from sklearn.model_selection import train_test_split
import plotly.express as px
import plotly.graph_objs as go
from google.colab import files
from autogluon.tabular import TabularDataset, TabularPredictor

In [None]:
# This is the path to the self-reported motion sickness scores.
scoresp = "drive/MyDrive/CS291I/SelfReportData"

# This is where we'll look for the intermediate per-game data we generate.
pergamep = "drive/MyDrive/CS291I/gamedata"

# Each key is the string we'll use to find user-reported sickness data, and each
# value is the path to the game engine data for that game.
gamedirs = {
    'Beat Saber': 'drive/MyDrive/CS291I/Beat_saber',
    'Cartoon Network Journeys VR': 'drive/MyDrive/CS291I/Carton_Network',
    'Epic Roller Coasters': 'drive/MyDrive/CS291I/Epic RollerCoaster',
    'Mini Motor Racing X': 'drive/MyDrive/CS291I/Mini_Racing',
    'Traffic Cop': 'drive/MyDrive/CS291I/Traffic_Cop',
    'Voxel Shot VR': 'drive/MyDrive/CS291I/Voxel_Shot_VR',
    'VR Rome': 'drive/MyDrive/CS291I/VR_ROME',
    'Monster Awakens': 'drive/MyDrive/CS291I/Monster_awaken'
}

In [None]:
# Get all the game-related CSV file names for a particular game in one place.
def getfiles(gamesp):
  allfiles = []
  for root, dirs, files in os.walk(gamesp):
    for file in files:
      if file.startswith('.'): continue
      allfiles.append(os.path.join(root, file))
  return allfiles

# Each key is the game name, and the associated value is a list of all game
# engine data files associated with that game.
allfiles = {}
for k, v in gamedirs.items():
  allfiles[k] = getfiles(v)
print(allfiles)

{'Beat Saber': ['drive/MyDrive/CS291I/Beat_saber/P5 VRLOG-5051632/control.csv', 'drive/MyDrive/CS291I/Beat_saber/P5 VRLOG-5051632/pose.csv', 'drive/MyDrive/CS291I/Beat_saber/P5 VRLOG-5051632/light.csv', 'drive/MyDrive/CS291I/Beat_saber/P5 VRLOG-5051632/camera.csv', 'drive/MyDrive/CS291I/Beat_saber/P6 VRLOG-5051825/light.csv', 'drive/MyDrive/CS291I/Beat_saber/P6 VRLOG-5051825/control.csv', 'drive/MyDrive/CS291I/Beat_saber/P6 VRLOG-5051825/camera.csv', 'drive/MyDrive/CS291I/Beat_saber/P6 VRLOG-5051825/pose.csv', 'drive/MyDrive/CS291I/Beat_saber/P3 VRLOG-5051000/light.csv', 'drive/MyDrive/CS291I/Beat_saber/P3 VRLOG-5051000/control.csv', 'drive/MyDrive/CS291I/Beat_saber/P3 VRLOG-5051000/pose.csv', 'drive/MyDrive/CS291I/Beat_saber/P3 VRLOG-5051000/camera.csv', 'drive/MyDrive/CS291I/Beat_saber/P4 VRLOG-5051047/camera.csv', 'drive/MyDrive/CS291I/Beat_saber/P4 VRLOG-5051047/light.csv', 'drive/MyDrive/CS291I/Beat_saber/P4 VRLOG-5051047/pose.csv', 'drive/MyDrive/CS291I/Beat_saber/P4 VRLOG-505104

In [None]:
# Get the frame number from a given timestamp. I don't really understand this,
# but I trust that it works.
def getframe(csvfile, time, gamename, filelist):
  participant = csvfile.split(' ')[1]
  for file in filelist:
    if gamename in file and participant in file and 'control.csv' in file:
      ctrldata = pd.read_csv(file)
      timescol = ctrldata['timestamp']
      rownum = np.argmin(np.abs(timescol - time))
      framecol = ctrldata['framecounter']
      framenum = framecol[rownum]
      #print(framenum)
      return framenum

In [None]:
def getscore(csvfile, gamename, filelist):
  scores = []
  frames = []
  df = pd.read_csv(csvfile, header=None)
  for index, row in df.iterrows():
    scores.append(row.iloc[1])
    rawts = row.iloc[0]
    frames.append(getframe(csvfile, rawts / 1000, gamename, filelist))
  return scores, frames

In [None]:
# Get score data for a particular game.
def getdf(gamename, scoresp, gamedir):
  parts = []
  ascores = []
  aframes = []
  scoresdf = pd.DataFrame()
  for filename in os.listdir(scoresp):
    if not filename.startswith(".") and filename.endswith(".csv") and gamename in filename:
      partid = filename.split(' ')[0]
      parts.append(partid)
      scores, frames = getscore(scoresp + "/" + filename, gamedir.split('/')[3], allfiles[gamename])
      if (frames[0] != None):
        aframes.extend(frames)
        ascores.extend(scores)
      scoredf = pd.DataFrame({"score": scores, "framecounter": frames})

      # Workaround for the Mini Motor Racing X file names.
      if gamename == "Mini Motor Racing X":
        partstr = filename.split(' ')[1]
      else:
        partstr = "P" + str(partid)
      scoredf["participant"] = partstr
      scoresdf = pd.concat([scoresdf, scoredf])
    else: continue
  return scoresdf

allscore = pd.DataFrame()
for k, v in gamedirs.items():
  scoresdf = getdf(k, scoresp, v)
  scoresdf["game"] = k
  allscore = pd.concat([allscore, scoresdf])
allscore.head()

Unnamed: 0,score,framecounter,participant,game
0,1,979,P1,Beat Saber
1,4,1444,P1,Beat Saber
2,1,1566,P1,Beat Saber
3,2,1721,P1,Beat Saber
4,4,2010,P1,Beat Saber


In [None]:
# Let's see how many unique participants there are for the self-report data.
# Note that the Mini Motor Racing X participant IDs are going to look different
# because the only constant in life is suffering.
print(allscore["participant"].unique())

['P1' 'P2' 'P3' 'P4' 'P5' 'P6' 'P12' 'P13' 'P14' 'P15' 'P16' 'P17' 'P18'
 'P19' 'P20' 'P21' 'VRLOG-6061345' 'VRLOG-6061400' 'VRLOG-6061651'
 'VRLOG-6061738' 'VRLOG-6062028' 'P7' 'P8' 'P9' 'P10' 'P11']


In [None]:
# Get in-game camera data for a single game.
def getcam(filelist):
  cameradf = pd.DataFrame()
  for file in filelist:
    if 'camera.csv' not in file: continue
    df = pd.read_csv(file)
    df['participant'] = file.split('/')[4].split(' ')[0]
    cameradf = pd.concat([cameradf, df])
  return cameradf

cameradf = pd.DataFrame()
for k, v in gamedirs.items():
  camdf = getcam(allfiles[k])
  camdf['game'] = k
  cameradf = pd.concat([cameradf, camdf])
cameradf.head()

Unnamed: 0,framecounter,timestamp,name,projection,view,participant,game
0,3,1683275576675,MenuMainCamera,0.9027 0.0000 -0.2425 0.0000 0.0000 0.8784 -0....,0.9951 0.0931 -0.0326 -0.0933 -0.0966 0.9868 -...,P5,Beat Saber
1,6,1683275576755,MenuMainCamera,0.9027 0.0000 -0.2425 0.0000 0.0000 0.8784 -0....,0.9955 0.0934 -0.0127 -0.0969 -0.0943 0.9871 -...,P5,Beat Saber
2,9,1683275576820,MenuMainCamera,0.9027 0.0000 -0.2425 0.0000 0.0000 0.8784 -0....,0.9957 0.0930 -0.0013 -0.0982 -0.0924 0.9872 -...,P5,Beat Saber
3,12,1683275576874,MenuMainCamera,0.9027 0.0000 -0.2425 0.0000 0.0000 0.8784 -0....,0.9957 0.0921 0.0054 -0.0983 -0.0906 0.9876 -0...,P5,Beat Saber
4,15,1683275576970,MenuMainCamera,0.9027 0.0000 -0.2425 0.0000 0.0000 0.8784 -0....,0.9958 0.0897 0.0154 -0.0974 -0.0870 0.9880 -0...,P5,Beat Saber


In [None]:
# Print all unique camera names across all games.
print(cameradf['name'].unique())

['MenuMainCamera' 'MainCamera' 'Camera (eye)' 'Camera' 'LoadCamera'
 'ReflectionCamera_R' 'ReflectionCamera_L' 'Splatter Camera' 'poseUpdater'
 'Pause:SubCamera' 'Pause:Camera (eye)' 'Camera_Indicators'
 'Camera_HurtHUD']


In [None]:
# I also want to see the unique participants.
print(cameradf['participant'].unique())

['P5' 'P6' 'P3' 'P4' 'P1' 'P2' 'P16' 'P13' 'P14' 'P15' 'P12' 'P17'
 'VRLOG-6051750' 'VRLOG-6051805' 'VRLOG-6051213' 'VRLOG-6051819'
 'VRLOG-6061400' 'VRLOG-6061651' 'VRLOG-6061345' 'VRLOG-6061738'
 'VRLOG-6062028' 'P11' 'P8' 'P7' 'P10' 'P9']


In [None]:
# Mwahaha let's throw some pose.csv into the mix!
def getpose(gamename):
  posedf = pd.DataFrame()
  for file in allfiles[gamename]:
    if 'pose.csv' not in file: continue
    df = pd.read_csv(file)
    df['participant'] = file.split('/')[4].split(' ')[0]
    posedf = pd.concat([posedf, df])
  return posedf

posedf = pd.DataFrame()
for k, v in gamedirs.items():
  df = getpose(k)
  df['game'] = k
  posedf = pd.concat([posedf, df])
posedf.head()

Unnamed: 0,framecounter,timestamp,device_id,deviceToAbsoluteTracking,velocity,angularVelocity,participant,game
0,1,1683275576635,0,0.9950 -0.0969 -0.0220 -0.0062 0.0932 0.9866 -...,0.0020 -0.0067 0.0039,-0.0705 0.1313 0.0114,P5,Beat Saber
1,1,1683275576635,1,0.9393 -0.3430 -0.0109 -0.0589 0.3259 0.9014 -...,0.0584 0.0234 0.0447,0.2473 -0.1045 -0.0184,P5,Beat Saber
2,1,1683275576635,2,0.8541 0.5163 -0.0628 0.0662 -0.5193 0.8395 -0...,-0.0961 0.1984 0.0489,0.9920 0.3670 -0.0545,P5,Beat Saber
3,2,1683275576643,0,0.9951 -0.0966 -0.0200 -0.0062 0.0931 0.9868 -...,0.0021 -0.0062 0.0048,-0.0694 0.1334 0.0078,P5,Beat Saber
4,2,1683275576643,1,0.9401 -0.3410 -0.0057 -0.0589 0.3236 0.8973 -...,0.0491 0.0415 0.0461,0.3787 -0.0290 -0.0192,P5,Beat Saber


In [None]:
# It's time for the moment of truth. We'll merge cameradf and scoresdf and see
# what we can do with it.

print(cameradf)
print(allscore)
print(allscore['game'].unique())
cameradf = cameradf.merge(right=allscore, on=['participant', 'framecounter', 'game'])

# Hee hoo let's throw posedf in there too.
cameradf = cameradf.merge(right=posedf, on=['participant', 'framecounter', 'game'])

cameradf.head()

       framecounter      timestamp               name  \
0                 3  1683275576675     MenuMainCamera   
1                 6  1683275576755     MenuMainCamera   
2                 9  1683275576820     MenuMainCamera   
3                12  1683275576874     MenuMainCamera   
4                15  1683275576970     MenuMainCamera   
...             ...            ...                ...   
24729         25077  1683277341943     Camera_HurtHUD   
24730         25077  1683277341943       Camera (eye)   
24731         25080  1683277342026  Camera_Indicators   
24732         25080  1683277342026     Camera_HurtHUD   
24733         25080  1683277342026       Camera (eye)   

                                              projection  \
0      0.9027 0.0000 -0.2425 0.0000 0.0000 0.8784 -0....   
1      0.9027 0.0000 -0.2425 0.0000 0.0000 0.8784 -0....   
2      0.9027 0.0000 -0.2425 0.0000 0.0000 0.8784 -0....   
3      0.9027 0.0000 -0.2425 0.0000 0.0000 0.8784 -0....   
4      0.9027 0

Unnamed: 0,framecounter,timestamp_x,name,projection,view,participant,game,score,timestamp_y,device_id,deviceToAbsoluteTracking,velocity,angularVelocity
0,1464,1683275619681,MainCamera,0.9027 0.0000 -0.2425 0.0000 0.0000 0.8784 -0....,0.9981 0.0511 0.0346 -0.0610 -0.0487 0.9967 -0...,P5,Beat Saber,1,1683275619687,0,0.9984 -0.0452 0.0332 0.0157 0.0474 0.9965 -0....,0.0138 -0.0113 -0.0511,0.1674 -0.1152 -0.1002
1,1464,1683275619681,MainCamera,0.9027 0.0000 -0.2425 0.0000 0.0000 0.8784 -0....,0.9981 0.0511 0.0346 -0.0610 -0.0487 0.9967 -0...,P5,Beat Saber,1,1683275619687,1,0.9915 -0.0139 0.1296 -0.0853 0.0578 0.9378 -0...,-0.0263 -0.2247 -0.1801,-0.3960 0.3043 -0.3445
2,1464,1683275619681,MainCamera,0.9027 0.0000 -0.2425 0.0000 0.0000 0.8784 -0....,0.9981 0.0511 0.0346 -0.0610 -0.0487 0.9967 -0...,P5,Beat Saber,1,1683275619687,2,0.9518 -0.1029 -0.2891 0.1577 -0.0100 0.9312 -...,0.2197 -0.0746 -0.0218,-0.5254 -0.8153 -0.2530
3,1671,1683275625075,MainCamera,0.9027 0.0000 -0.2425 0.0000 0.0000 0.8784 -0....,0.9845 0.1134 0.1335 0.0607 -0.1161 0.9932 0.0...,P5,Beat Saber,1,1683275625084,0,0.9838 -0.1152 0.1371 -0.2411 0.1132 0.9933 0....,-0.0218 -0.0152 0.0370,-0.1713 0.1301 0.0912
4,1671,1683275625075,MainCamera,0.9027 0.0000 -0.2425 0.0000 0.0000 0.8784 -0....,0.9845 0.1134 0.1335 0.0607 -0.1161 0.9932 0.0...,P5,Beat Saber,1,1683275625084,1,0.8857 0.4007 0.2346 -0.4390 -0.3064 0.8840 -0...,0.1059 0.6842 -0.5074,1.8486 -0.6242 -0.6973


In [None]:
cameradf.describe().transpose()

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
timestamp_x,1263.0,1683754000000.0,737826200.0,1683191000000.0,1683277000000.0,1683626000000.0,1683793000000.0,1686055000000.0
score,1263.0,1.800475,1.134295,1.0,1.0,1.0,2.0,5.0
timestamp_y,1263.0,1683754000000.0,737826200.0,1683191000000.0,1683277000000.0,1683626000000.0,1683793000000.0,1686055000000.0
device_id,1263.0,1.0,0.81682,0.0,0.0,1.0,2.0,2.0


In [None]:
# Define a helper function to convert those matrix strings into actual matrices.
def str2mat(strmat):
  mat = np.array([float(num) for num in strmat.split()])
  return mat.reshape(4, 4)

In [None]:
# Helper functions for dealing with projection and view matrices.

# Calculate field of view (FOV) from a projection matrix. This function returns
# FOV in degrees, not radians.
def getfov(projmat):
  yfov = 2.0 * np.arctan(1.0 / projmat[1][1])
  aspectr = projmat[1][1] / projmat[0][0]
  xfov = 2.0 * np.arctan(np.tan(yfov / 2.0) * aspectr)
  xfovdeg = np.degrees(xfov)
  yfovdeg = np.degrees(yfov)
  return xfovdeg, yfovdeg, aspectr

# Determine if a given project matrix represents a perspective projection or an
# orthographic projection.
def ispersp(row):
  projmat = row["projection"]
  return projmat[3][3] == 0.0 and projmat[2][3] != 0.0
def isortho(row):
  projmat = row["projection"]
  return projmat[3][3] == 1.0 and projmat[2][3] == 0.0

# Extract a bunch of information from a given projection matrix.
def projdcmp(row):
  projmat = str2mat(row["projection"])
  near = projmat[2][3] / (projmat[2][2] - 1.0)
  far = projmat[2][3] / (projmat[2][2] + 1.0)
  xfovdeg, yfovdeg, aspectr = getfov(projmat)
  return pd.Series([near, far, xfovdeg, yfovdeg, aspectr], index=["near",
                                                                  "far",
                                                                  "xfovdeg",
                                                                  "yfovdeg",
                                                                  "aspectr"])

# Extract a bunch of information from a given view matrix.
def viewdcmp(row):
  viewmat = str2mat(row["view"])
  position = viewmat[:3, 3]
  rotation = viewmat[:3, :3]
  forward = -viewmat[:3, 2]
  up = viewmat[:3, 2]
  return pd.Series([position, rotation, forward, up], index=["position",
                                                             "rotation",
                                                             "forward",
                                                             "up"])

# This is just a wrapper around str2mat because I am lazy. TODO TODO TODO
def velocity(row):
  mat = np.array([float(num) for num in row["velocity"].split()])
  return pd.Series(mat.flatten(), index=["v1", "v2", "v3"])

def angular(row):
  mat = np.array([float(num) for num in row["angularVelocity"].split()])
  #mat = mat.reshape(3, 3)
  return pd.Series(mat.flatten(), index=["a1", "a2", "a3"])

In [None]:
# Convert a 3x3 rotation matrix into a unit quaternion.
def rot2quat(rot):
  w = np.sqrt(1 + rot[0, 0] + rot[1, 1] + rot[2, 2]) / 2
  x = (rot[2, 1] - rot[1, 2]) / (4 * w)
  y = (rot[0, 2] - rot[2, 0]) / (4 * w)
  z = (rot[1, 0] - rot[0, 1]) / (4 * w)
  return np.array([w, x, y, z])

# A wrapper function around rot2quat that we can apply to DF rows.
def r2qwrap(row):
  quat = rot2quat(row["rotation"])
  return pd.Series(quat.flatten(), index=["rotquatw",
                                          "rotquatx",
                                          "rotquaty",
                                          "rotquatz"])

In [None]:
# Let's use our helper functions to add some useful features to cameradf.
cameradf[["near", "far", "xfovdeg", "yfovdeg", "aspectr"]] = cameradf.apply(projdcmp, axis=1)
cameradf[["position", "rotation", "forward", "up"]] = cameradf.apply(viewdcmp, axis=1)
cameradf[["v1", "v2", "v3"]] = cameradf.apply(velocity, axis=1)
cameradf[["a1", "a2", "a3"]] = cameradf.apply(angular, axis=1)

# Add the quaternion representation of the rotation matrix.
cameradf[["rotquatw", "rotquatx", "rotquaty", "rotquatz"]] = cameradf.apply(r2qwrap, axis=1)

cameradf.tail()

  far = projmat[2][3] / (projmat[2][2] + 1.0)


Unnamed: 0,framecounter,timestamp_x,name,projection,view,participant,game,score,timestamp_y,device_id,...,v1,v2,v3,a1,a2,a3,rotquatw,rotquatx,rotquaty,rotquatz
1258,24312,1683277318915,Camera_HurtHUD,0.7510 0.0000 0.0000 0.0000 0.0000 0.7265 0.00...,-0.4381 0.0543 -0.8973 171.6034 -0.0809 0.9917...,P5,Monster Awakens,3,1683277318927,1,...,0.2169,0.083,0.0406,1.033,-0.4655,-0.8477,0.704219,-0.076539,-0.00071,-0.047996
1259,24312,1683277318915,Camera_HurtHUD,0.7510 0.0000 0.0000 0.0000 0.0000 0.7265 0.00...,-0.4381 0.0543 -0.8973 171.6034 -0.0809 0.9917...,P5,Monster Awakens,3,1683277318927,2,...,-0.0086,0.2084,-0.0703,0.6391,-0.2054,1.0836,0.704219,-0.076539,-0.00071,-0.047996
1260,24312,1683277318915,Camera (eye),0.9027 0.0000 0.2425 0.0000 0.0000 0.8784 -0.2...,-0.4381 0.0543 -0.8973 171.5110 -0.0809 0.9917...,P5,Monster Awakens,3,1683277318927,0,...,0.0927,0.0041,-0.0308,0.0549,-0.9866,-0.3272,0.704219,-0.076539,-0.00071,-0.047996
1261,24312,1683277318915,Camera (eye),0.9027 0.0000 0.2425 0.0000 0.0000 0.8784 -0.2...,-0.4381 0.0543 -0.8973 171.5110 -0.0809 0.9917...,P5,Monster Awakens,3,1683277318927,1,...,0.2169,0.083,0.0406,1.033,-0.4655,-0.8477,0.704219,-0.076539,-0.00071,-0.047996
1262,24312,1683277318915,Camera (eye),0.9027 0.0000 0.2425 0.0000 0.0000 0.8784 -0.2...,-0.4381 0.0543 -0.8973 171.5110 -0.0809 0.9917...,P5,Monster Awakens,3,1683277318927,2,...,-0.0086,0.2084,-0.0703,0.6391,-0.2054,1.0836,0.704219,-0.076539,-0.00071,-0.047996


In [None]:
# Let's see some statistics about sickness score before doing fancy analysis.
def stats(df, column):
  print(column)
  print("\tmin\t", df[column].min())
  print("\tmax\t", df[column].max())
  print("\tmean\t", df[column].mean())
  print("\tstd dev\t", df[column].std())

stats(cameradf, "score")

score
	min	 1
	max	 5
	mean	 1.8004750593824228
	std dev	 1.1342945736646122


In [None]:
# We also want some FOV information about this game.
camtypes = cameradf["name"].unique()
for camtype in camtypes:
  print(camtype)
  stats(cameradf[cameradf["name"] == camtype], "xfovdeg")
  stats(cameradf[cameradf["name"] == camtype], "yfovdeg")
  stats(cameradf[cameradf["name"] == camtype], "score")

MainCamera
xfovdeg
	min	 95.85486663314946
	max	 95.85486663314946
	mean	 95.85486663314944
	std dev	 1.4283175053588122e-14
yfovdeg
	min	 97.4078563174975
	max	 97.4078563174975
	mean	 97.40785631749749
	std dev	 1.4283175053588122e-14
score
	min	 1
	max	 5
	mean	 1.8484848484848484
	std dev	 1.1897833481707565
MenuMainCamera
xfovdeg
	min	 95.85486663314946
	max	 95.85486663314946
	mean	 95.85486663314948
	std dev	 2.874286888342292e-14
yfovdeg
	min	 97.4078563174975
	max	 97.4078563174975
	mean	 97.40785631749748
	std dev	 2.874286888342292e-14
score
	min	 1
	max	 5
	mean	 2.0
	std dev	 1.381698559415515
Camera (eye)
xfovdeg
	min	 95.85486663314946
	max	 95.85486663314946
	mean	 95.85486663314944
	std dev	 1.4222200708797986e-14
yfovdeg
	min	 97.4078563174975
	max	 97.4078563174975
	mean	 97.40785631749749
	std dev	 1.4222200708797986e-14
score
	min	 1
	max	 5
	mean	 1.7129186602870814
	std dev	 1.0470751586882254
Camera
xfovdeg
	min	 95.85486663314946
	max	 101.50235502224486
	mean	

In [None]:
# Before we do anything crazy, let's clean up cameradf by getting rid of the
# columns we don't care about and decomposing the matrix features into their own
# columns.
# TODO I should turn this into a function so that I can do it to multiple DFs.

cameradf[["pos1", "pos2", "pos3"]] = pd.DataFrame(cameradf["position"].to_list())
cameradf.drop("position", axis=1, inplace=True)

cameradf[["forward1", "forward2", "forward3"]] = pd.DataFrame(cameradf["forward"].to_list())
cameradf.drop("forward", axis=1, inplace=True)

cameradf[["up1", "up2", "up3"]] = pd.DataFrame(cameradf["up"].to_list())
cameradf.drop("up", axis=1, inplace=True)

rotvals = cameradf["rotation"].apply(lambda x: pd.Series(x.flatten()))
rotvals.columns = [f"rot{i}" for i in range(9)]
cameradf = pd.concat([cameradf.drop("rotation", axis=1), rotvals], axis=1)

cameradf.drop("velocity", axis=1, inplace=True)
cameradf.drop("angularVelocity", axis=1, inplace=True)

# TODO verify that we don't actually need these
cameradf.drop("timestamp_x", axis=1, inplace=True)
cameradf.drop("timestamp_y", axis=1, inplace=True)
cameradf.drop("device_id", axis=1, inplace=True)

# TODO Maybe I should explore this further?
cameradf.drop("deviceToAbsoluteTracking", axis=1, inplace=True)

cameradf.drop("name", axis=1, inplace=True)
cameradf.drop("projection", axis=1, inplace=True)
cameradf.drop("view", axis=1, inplace=True)

cameradf.head()

Unnamed: 0,framecounter,participant,game,score,near,far,xfovdeg,yfovdeg,aspectr,v1,...,up3,rot0,rot1,rot2,rot3,rot4,rot5,rot6,rot7,rot8
0,1464,P5,Beat Saber,1,0.1,-inf,95.854867,97.407856,0.973081,0.0138,...,-0.9972,0.9981,0.0511,0.0346,-0.0487,0.9967,-0.0656,0.0378,-0.0638,-0.9972
1,1464,P5,Beat Saber,1,0.1,-inf,95.854867,97.407856,0.973081,-0.0263,...,-0.9972,0.9981,0.0511,0.0346,-0.0487,0.9967,-0.0656,0.0378,-0.0638,-0.9972
2,1464,P5,Beat Saber,1,0.1,-inf,95.854867,97.407856,0.973081,0.2197,...,-0.9972,0.9981,0.0511,0.0346,-0.0487,0.9967,-0.0656,0.0378,-0.0638,-0.9972
3,1671,P5,Beat Saber,1,0.1,-inf,95.854867,97.407856,0.973081,-0.0218,...,-0.991,0.9845,0.1134,0.1335,-0.1161,0.9932,0.0123,0.1312,0.0276,-0.991
4,1671,P5,Beat Saber,1,0.1,-inf,95.854867,97.407856,0.973081,0.1059,...,-0.991,0.9845,0.1134,0.1335,-0.1161,0.9932,0.0123,0.1312,0.0276,-0.991


In [None]:
# Let's do some time series shenanigans. We can start with just plotting stuff.
fig = px.scatter(cameradf,
                 x="framecounter",
                 y="score",
                 color="game",
                 template="plotly_white")
fig.show()

In [None]:
cameradf.drop("framecounter", axis=1, inplace=True)
cameradf.drop("participant", axis=1, inplace=True)
cameradf.drop("game", axis=1, inplace=True)
cameradf.head()

Unnamed: 0,score,near,far,xfovdeg,yfovdeg,aspectr,v1,v2,v3,a1,...,up3,rot0,rot1,rot2,rot3,rot4,rot5,rot6,rot7,rot8
0,1,0.1,-inf,95.854867,97.407856,0.973081,0.0138,-0.0113,-0.0511,0.1674,...,-0.9972,0.9981,0.0511,0.0346,-0.0487,0.9967,-0.0656,0.0378,-0.0638,-0.9972
1,1,0.1,-inf,95.854867,97.407856,0.973081,-0.0263,-0.2247,-0.1801,-0.396,...,-0.9972,0.9981,0.0511,0.0346,-0.0487,0.9967,-0.0656,0.0378,-0.0638,-0.9972
2,1,0.1,-inf,95.854867,97.407856,0.973081,0.2197,-0.0746,-0.0218,-0.5254,...,-0.9972,0.9981,0.0511,0.0346,-0.0487,0.9967,-0.0656,0.0378,-0.0638,-0.9972
3,1,0.1,-inf,95.854867,97.407856,0.973081,-0.0218,-0.0152,0.037,-0.1713,...,-0.991,0.9845,0.1134,0.1335,-0.1161,0.9932,0.0123,0.1312,0.0276,-0.991
4,1,0.1,-inf,95.854867,97.407856,0.973081,0.1059,0.6842,-0.5074,1.8486,...,-0.991,0.9845,0.1134,0.1335,-0.1161,0.9932,0.0123,0.1312,0.0276,-0.991


In [None]:
# We have to normalize our data for best results. We'll do min-max normalization
# here.
def mmnormal(df, range=(0, 1)):
  return (df - df.min()) / (df.max() - df.min()) * (range[1] - range[0]) + range[0]

# TODO Don't normalize the score; make it categorical instead.
scores = cameradf['score']
cameradf = mmnormal(cameradf.drop(columns=['score']))
cameradf['score'] = scores
cameradf.head()

Unnamed: 0,near,far,xfovdeg,yfovdeg,aspectr,v1,v2,v3,a1,a2,...,rot0,rot1,rot2,rot3,rot4,rot5,rot6,rot7,rot8,score
0,0.045232,,0.296473,0.779285,0.008141,0.383769,0.470518,0.486806,0.494656,0.486147,...,0.99905,0.377499,0.517331,0.487387,0.998282,0.386174,0.515656,0.400532,0.001411,1
1,0.045232,,0.296473,0.779285,0.008141,0.369402,0.415086,0.438179,0.443369,0.522927,...,0.99905,0.377499,0.517331,0.487387,0.998282,0.386174,0.515656,0.400532,0.001411,1
2,0.045232,,0.296473,0.779285,0.008141,0.457542,0.454076,0.497851,0.43159,0.424765,...,0.99905,0.377499,0.517331,0.487387,0.998282,0.386174,0.515656,0.400532,0.001411,1
3,0.045232,,0.296473,0.779285,0.008141,0.371014,0.469505,0.520017,0.463824,0.507654,...,0.992247,0.483057,0.566798,0.439086,0.996459,0.43443,0.562902,0.455816,0.004537,1
4,0.045232,,0.296473,0.779285,0.008141,0.416768,0.651177,0.314799,0.647699,0.44152,...,0.992247,0.483057,0.566798,0.439086,0.996459,0.43443,0.562902,0.455816,0.004537,1


In [None]:
# We will also divide what we got into training and testing DFs.
traindf, testdf = train_test_split(cameradf, test_size=0.2, random_state=42, shuffle=True)
traindf.to_csv("train.csv", index=False)
testdf.to_csv("test.csv", index=False)

In [None]:
# Define a function to display the correlation matrix for a DataFrame.
def plotcorr(df):
  corr = df.corr()
  fig = px.imshow(corr,
                  labels=dict(x="Features", y="Features", color="Correlation"),
                  x=corr.columns,
                  y=corr.columns)
  fig.show()

# Give me the correlation matrix for cameradf.
plotcorr(cameradf)

In [None]:
# Display a bar graph of correlations.
def plotbar(df):
  corr = df.corr()
  scorcorr = corr["score"]
  fig = go.Figure(data=[go.Bar(x=scorcorr.index, y=scorcorr.values)])
  fig.show()

plotbar(cameradf)

In [None]:
def scatter(df, var1, var2, colorcol):
  fig = px.scatter(df, x=var1, y=var2, color=colorcol)
  fig.show()

scatter(cameradf, "rot6", "score", "score")

In [None]:
# Now let's use AutoGluon.
predictor = TabularPredictor(label='score', problem_type='multiclass').fit(train_data='train.csv')
predictions = predictor.predict('test.csv')

No path specified. Models will be saved in: "AutogluonModels/ag-20240529_042656"
No presets specified! To achieve strong results with AutoGluon, it is recommended to use the available presets.
	Recommended Presets (For more details refer to https://auto.gluon.ai/stable/tutorials/tabular/tabular-essentials.html#presets):
	presets='best_quality'   : Maximize accuracy. Default time_limit=3600.
	presets='high_quality'   : Strong accuracy with fast inference speed. Default time_limit=3600.
	presets='good_quality'   : Good accuracy with very fast inference speed. Default time_limit=3600.
	presets='medium_quality' : Fast training time, ideal for initial prototyping.
Loaded data from: train.csv | Columns = 34 / 34 | Rows = 1010 -> 1010
Beginning AutoGluon training ...
AutoGluon will save models to "AutogluonModels/ag-20240529_042656"
AutoGluon Version:  1.1.0
Python Version:     3.10.12
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #1 SMP PREEMPT_DYNAMIC Sun Apr 28 1

In [None]:
predictor.evaluate('test.csv', silent=True)

Loaded data from: test.csv | Columns = 34 / 34 | Rows = 253 -> 253


{'accuracy': 0.9209486166007905,
 'balanced_accuracy': 0.9037567658330371,
 'mcc': 0.8696043116598308}

In [None]:
ldrboard = predictor.leaderboard('test.csv')
fig = px.bar(ldrboard, x='model', y='score_test')
fig.show()

Loaded data from: test.csv | Columns = 34 / 34 | Rows = 253 -> 253


In [None]:
fig = px.bar(ldrboard, x='model', y='pred_time_test')
fig.show()

In [None]:
features = predictor.feature_importance('test.csv')
fig = px.bar(features, x=features.index, y='importance')
fig.show()

Loaded data from: test.csv | Columns = 34 / 34 | Rows = 253 -> 253
These features in provided data are not utilized by the predictor and will be ignored: ['far', 'rot2', 'rot5', 'rot8']
Computing feature importance via permutation shuffling for 29 features using 253 rows with 5 shuffle sets...
	54.86s	= Expected runtime (10.97s per shuffle set)
	17.9s	= Actual runtime (Completed 5 of 5 shuffle sets)
