In [3]:
import pandas as pd
import numpy as np
import pickle
import matplotlib.pyplot as plt
from scipy import stats
import tensorflow as tf
import seaborn as sns
from pylab import rcParams
from sklearn import metrics
from sklearn.model_selection import train_test_split
from scipy.stats import zscore
%matplotlib inline
sns.set(style='whitegrid', palette='muted', font_scale=1.5)
rcParams['figure.figsize'] = 14, 8
RANDOM_SEED = 42
plt.rc('text', usetex=True)
plt.rc('font', family='serif')

In [4]:
df = pd.read_csv("./data_old/Phones_accelerometer.csv")
df = df.dropna(subset=['gt'])
df.head()
columns_to_standardize = ['x', 'y', 'z']
df[columns_to_standardize] = df.groupby('Device')[columns_to_standardize].transform(zscore)


In [5]:
# 定义常量
N_TIME_STEPS = 200
N_FEATURES = 3
step = 20

In [6]:
segments = []
labels = []

for i in range(0, len(df) - N_TIME_STEPS, step):
    xs = df['x'].values[i: i + N_TIME_STEPS]
    ys = df['y'].values[i: i + N_TIME_STEPS]
    zs = df['z'].values[i: i + N_TIME_STEPS]
    label = np.unique(df['gt'][i: i + N_TIME_STEPS].astype(str))[0]  # 使用np.unique替代stats.mode
    segments.append([xs, ys, zs])
    labels.append(label)

In [7]:
reshaped_segments = np.asarray(segments, dtype= np.float32).reshape(-1, N_TIME_STEPS, N_FEATURES)
labels_np = np.asarray(labels)

In [8]:
X_train, X_test, y_train, y_test = train_test_split(reshaped_segments, labels_np, test_size=0.2)

In [9]:
x_train_reshaped = X_train.reshape(X_train.shape[0], N_TIME_STEPS * N_FEATURES)
x_test_reshaped = X_test.reshape(X_test.shape[0], N_TIME_STEPS * N_FEATURES)

In [10]:
np.savetxt("x_train.csv", x_train_reshaped, delimiter=",")
np.savetxt("x_test.csv", x_test_reshaped, delimiter=",")
np.savetxt("y_train.csv", y_train, delimiter=",", fmt="%s")
np.savetxt("y_test.csv", y_test, delimiter=",", fmt="%s")

In [15]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pylab import rcParams

# 设置绘图
sns.set(style='whitegrid', palette='muted', font_scale=1.5)
rcParams['figure.figsize'] = 14, 8
plt.rc('text', usetex=True)
plt.rc('font', family='serif')

# 读取数据
df = pd.read_csv("./data_old/Phones_accelerometer.csv")
df = df.dropna(subset=['gt'])
df.head()
columns_to_standardize = ['x', 'y', 'z']
df[columns_to_standardize] = df.groupby('Device')[columns_to_standardize].transform(zscore)
# 定义常量
N_TIME_STEPS = 200
N_FEATURES = 3
step = 20

# 存储分段数据
segments = []
labels = []
devices = []  # 添加设备信息

# 分段并处理标签
for i in range(0, len(df) - N_TIME_STEPS, step):
    xs = df['x'].values[i: i + N_TIME_STEPS]
    ys = df['y'].values[i: i + N_TIME_STEPS]
    zs = df['z'].values[i: i + N_TIME_STEPS]
    label = np.unique(df['gt'][i: i + N_TIME_STEPS].astype(str))[0]  # 使用np.unique替代stats.mode
    device = df['Device'].iloc[i]  # 使用.iloc[]确保按行位置获取设备信息
    segments.append([xs, ys, zs])
    labels.append(label)
    devices.append(device)

# 转换为NumPy数组
reshaped_segments = np.asarray(segments, dtype=np.float32).reshape(-1, N_TIME_STEPS, N_FEATURES)
labels_np = np.asarray(labels)
devices_np = np.asarray(devices)  # 转换设备信息为NumPy数组


In [16]:
print(2)

2


In [17]:

# 遍历所有设备
device_set = np.unique(devices_np)
for device in device_set:
    # 提取当前设备的数据
    device_indices = np.where(devices_np == device)[0]
    device_segments = reshaped_segments[device_indices]
    device_labels = labels_np[device_indices]
    
    # 合并加速度数据 (x, y, z) 和标签
    device_x = device_segments.reshape(-1, N_TIME_STEPS * N_FEATURES)  # x包含加速度数据
    device_y = device_labels  # y包含标签数据
    
    # 保存为CSV文件
    np.savetxt(f"device{device}_x.csv", device_x, delimiter=",")
    np.savetxt(f"device{device}_y.csv", device_y, delimiter=",", fmt="%s")