In [6]:
from scipy.io import loadmat
import pandas as pd
import numpy as np

# Đọc file .mat
data = loadmat("./data/golfDB.mat")

# Chuyển đổi dữ liệu thành DataFrame của pandas
# The MATLAB struct is typically loaded as a 1x1 ndarray containing a structured array.
# We need to extract the struct and convert its fields (each usually a 1xN cell/array)
# into columns of a pandas DataFrame.
g = data['golfDB']

def matlab_struct_to_df(mat_struct):
	# unwrap 1-element ndarray if necessary
	if isinstance(mat_struct, np.ndarray) and mat_struct.size == 1:
		mat_struct = mat_struct.flat[0]

	if not hasattr(mat_struct, 'dtype') or mat_struct.dtype.names is None:
		raise ValueError("Provided object is not a MATLAB struct array")

	out = {}
	for name in mat_struct.dtype.names:
		arr = mat_struct[name]
		arr = np.squeeze(arr)

		if isinstance(arr, np.ndarray) and arr.dtype == object:
			# convert each element to a python object, unwrapping nested arrays if present
			def unwrap(x):
				if isinstance(x, np.ndarray):
					x = np.squeeze(x)
					if np.isscalar(x):
						return x.item()
					if x.dtype == object:
						return [unwrap(e) for e in x.tolist()]
					return x.tolist()
				else:
					return x

			# arr might be a 1D object array of length N -> produce list length N
			out[name] = [unwrap(x) for x in arr.tolist()]
		elif np.isscalar(arr):
			out[name] = [arr]
		else:
			# numeric arrays: convert to plain Python lists
			out[name] = arr.tolist()

	return pd.DataFrame(out)

df = matlab_struct_to_df(g)


In [None]:
df.head()


Unnamed: 0,id,youtube_id,player,sex,club,view,slow,events,bbox,split
0,0,f1BWA5F87Jc,SANDRA GAL,f,driver,down-the-line,0,"[408, 455, 473, 476, 490, 495, 498, 501, 514, ...","[0.09765625000000001, 0.006944444444444444, 0....",3
1,1,f1BWA5F87Jc,SANDRA GAL,f,driver,down-the-line,1,"[814, 854, 917, 931, 988, 1006, 1019, 1030, 10...","[0.039062500000000014, 0.0006944444444444445, ...",3
2,2,tA1iotgtMyc,CHRIS DIMARCO,m,driver,down-the-line,0,"[521, 659, 678, 683, 692, 696, 698, 701, 715, ...","[0.165625, 0.0006944444444444445, 0.48359375, ...",3
3,3,tA1iotgtMyc,CHRIS DIMARCO,m,driver,down-the-line,1,"[1106, 1190, 1244, 1264, 1300, 1313, 1324, 133...","[0.18515625, 0.0006944444444444445, 0.465625, ...",3
4,4,wDCKLePrwHA,BROOKE HENDERSON,f,driver,down-the-line,0,"[157, 170, 183, 188, 197, 201, 205, 207, 220, ...","[0.11015625, 0.0006944444444444445, 0.4984375,...",3


In [None]:
# dọc file pkl
df = pd.read_pickle("./data/golfDB.pkl")
df.head()


Unnamed: 0,id,youtube_id,player,sex,club,view,slow,events,bbox,split
0,0,f1BWA5F87Jc,SANDRA GAL,f,driver,down-the-line,0,"[408, 455, 473, 476, 490, 495, 498, 501, 514, ...","[0.09765625000000001, 0.006944444444444444, 0....",3
1,1,f1BWA5F87Jc,SANDRA GAL,f,driver,down-the-line,1,"[814, 854, 917, 931, 988, 1006, 1019, 1030, 10...","[0.039062500000000014, 0.0006944444444444445, ...",3
2,2,tA1iotgtMyc,CHRIS DIMARCO,m,driver,down-the-line,0,"[521, 659, 678, 683, 692, 696, 698, 701, 715, ...","[0.165625, 0.0006944444444444445, 0.48359375, ...",3
3,3,tA1iotgtMyc,CHRIS DIMARCO,m,driver,down-the-line,1,"[1106, 1190, 1244, 1264, 1300, 1313, 1324, 133...","[0.18515625, 0.0006944444444444445, 0.465625, ...",3
4,4,wDCKLePrwHA,BROOKE HENDERSON,f,driver,down-the-line,0,"[157, 170, 183, 188, 197, 201, 205, 207, 220, ...","[0.11015625, 0.0006944444444444445, 0.4984375,...",3
