## original

In [10]:
# 验证 tabularmark 的不可感知性

# 导入所需的库
import pandas as pd 
from sklearn.model_selection import train_test_split
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
import json


# 读取数据
file_path = "/home/zhengyihao/dataset/covertype/covtype_with_key.subset.data"
ordinary_data = pd.read_csv(file_path)

# 数据预处理
X_orignal = ordinary_data.iloc[:,0:-1] 
y_original = ordinary_data.iloc[:,-1] 


# 划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X_orignal, y_original, test_size=0.3, random_state=42)

y_test = y_test - 1
y_train = y_train - 1

# 使用 XGBoost 创建模型
model = XGBClassifier(n_estimators = 10)

# 训练模型
model.fit(X_train, y_train)

# 预测测试集数据
y_pred = model.predict(X_test)

# 评估模型
# accuracy = accuracy_score(y_test, y_pred)
print(classification_report(y_test, y_pred))

# 评估模型
classification_dict = classification_report(y_test, y_pred, output_dict=True)
classification_df = pd.DataFrame(classification_dict).transpose()
classification_df = classification_df['f1-score']

# 生成适合JSON的dict
classification_json = classification_df.to_dict()
classification_json['algorithm'] = 'original'

# 将数据保存到JSON文件
with open('classification_report.json', 'a') as json_file:
    json.dump(classification_json, json_file)


              precision    recall  f1-score   support

           0       0.69      0.71      0.70       590
           1       0.78      0.53      0.63       635
           2       0.79      0.74      0.76       587
           3       0.90      0.97      0.94       619
           4       0.79      0.93      0.85       589
           5       0.77      0.79      0.78       582
           6       0.89      0.95      0.92       598

    accuracy                           0.80      4200
   macro avg       0.80      0.81      0.80      4200
weighted avg       0.80      0.80      0.80      4200



## tabularmark

In [11]:
# 导入所需的库
import pandas as pd 
from sklearn.model_selection import train_test_split
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report


# 读取数据
file_path = "/home/zhengyihao/dataset/covertype/covtype_with_key.subset.data"
ordinary_data = pd.read_csv(file_path)

file_path = "/home/zhengyihao/dataset/covertype/tabularmark_covertype.data.csv"
watermarked_data = pd.read_csv(file_path)

# 数据预处理
X_orignal = ordinary_data.iloc[:,0:-1] 
y_original = ordinary_data.iloc[:,-1] 

X_watermark = watermarked_data.iloc[:,0:-1] 
y_watermark = watermarked_data.iloc[:,-1] 

# 划分训练集和测试集
_, X_test, _, y_test = train_test_split(X_orignal, y_original, test_size=0.3, random_state=42)
X_train, _, y_train, _ = train_test_split(X_watermark, y_watermark, test_size=0.3, random_state=42)

y_test = y_test - 1
y_train = y_train - 1

# 使用 XGBoost 创建模型
model = XGBClassifier(n_estimators = 10)

# 训练模型
model.fit(X_train, y_train)

# 预测测试集数据
y_pred = model.predict(X_test)

# 评估模型
accuracy = accuracy_score(y_test, y_pred)
print(classification_report(y_test, y_pred))

# 评估模型
classification_dict = classification_report(y_test, y_pred, output_dict=True)
classification_df = pd.DataFrame(classification_dict).transpose()
classification_df = classification_df['f1-score']

# 生成适合JSON的dict
classification_json = classification_df.to_dict()
classification_json['algorithm'] = 'tabulamark'

# 将数据保存到JSON文件
with open('classification_report.json', 'a') as json_file:
    json.dump(classification_json, json_file)


              precision    recall  f1-score   support

           0       0.69      0.69      0.69       590
           1       0.76      0.56      0.64       635
           2       0.80      0.74      0.77       587
           3       0.90      0.97      0.93       619
           4       0.78      0.92      0.84       589
           5       0.80      0.81      0.80       582
           6       0.90      0.96      0.93       598

    accuracy                           0.81      4200
   macro avg       0.80      0.81      0.80      4200
weighted avg       0.80      0.81      0.80      4200



## histogrammark

In [12]:
# 导入所需的库
import pandas as pd 
from sklearn.model_selection import train_test_split
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report


# 读取数据
file_path = "/home/zhengyihao/dataset/covertype/covtype_with_key.subset.data"
ordinary_data = pd.read_csv(file_path)
ordinary_data.drop(columns=['primary_key'], inplace=True)

file_path = "/home/zhengyihao/dataset/covertype/histogrammark_covertype.data.csv"
watermarked_data = pd.read_csv(file_path)
watermarked_data.drop(columns=['y_hat'], inplace=True)
watermarked_data = watermarked_data.sort_values(by='primary_key')
watermarked_data.drop(columns=['primary_key'], inplace=True)
watermarked_data.drop(columns=['group_number'], inplace=True)

# 数据预处理
X_orignal = ordinary_data.iloc[:,0:-1] 
y_original = ordinary_data.iloc[:,-1] 

X_watermark = watermarked_data.iloc[:,0:-1] 
y_watermark = watermarked_data.iloc[:,-1] 

# 划分训练集和测试集
_, X_test, _, y_test = train_test_split(X_orignal, y_original, test_size=0.3, random_state=42)
X_train, _, y_train, _ = train_test_split(X_watermark, y_watermark, test_size=0.3, random_state=42)

y_test = y_test - 1
y_train = y_train - 1

# 使用 XGBoost 创建模型
model = XGBClassifier(n_estimators = 10)

# 训练模型
model.fit(X_train, y_train)

# 预测测试集数据
y_pred = model.predict(X_test)

# 评估模型
accuracy = accuracy_score(y_test, y_pred)
print(classification_report(y_test, y_pred))

# 评估模型
classification_dict = classification_report(y_test, y_pred, output_dict=True)
classification_df = pd.DataFrame(classification_dict).transpose()
classification_df = classification_df['f1-score']

# 生成适合JSON的dict
classification_json = classification_df.to_dict()
classification_json['algorithm'] = 'histogrammark'

# 将数据保存到JSON文件
with open('classification_report.json', 'a') as json_file:
    json.dump(classification_json, json_file)


              precision    recall  f1-score   support

           0       0.42      0.90      0.57       590
           1       0.00      0.00      0.00       635
           2       0.82      0.30      0.44       587
           3       0.85      0.98      0.91       619
           4       0.85      0.57      0.68       589
           5       0.04      0.01      0.01       582
           6       0.41      0.94      0.57       598

    accuracy                           0.53      4200
   macro avg       0.49      0.53      0.46      4200
weighted avg       0.48      0.53      0.46      4200



## semanticmark

In [13]:
# 导入所需的库
import pandas as pd 
from sklearn.model_selection import train_test_split
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report


# 读取数据
file_path = "/home/zhengyihao/dataset/covertype/covtype_with_key.subset.data"
ordinary_data = pd.read_csv(file_path)
ordinary_data.drop(columns=['primary_key'], inplace=True)

file_path = "/home/zhengyihao/dataset/covertype/semanticmark_covertype.data.csv"
watermarked_data = pd.read_csv(file_path)
watermarked_data.drop(columns=['primary_key'], inplace=True)

# 数据预处理
X_orignal = ordinary_data.iloc[:,0:-1] 
y_original = ordinary_data.iloc[:,-1] 

X_watermark = watermarked_data.iloc[:,0:-1] 
y_watermark = watermarked_data.iloc[:,-1] 

# 划分训练集和测试集
_, X_test, _, y_test = train_test_split(X_orignal, y_original, test_size=0.3, random_state=42)
X_train, _, y_train, _ = train_test_split(X_watermark, y_watermark, test_size=0.3, random_state=42)

y_test = y_test - 1
y_train = y_train - 1

# 使用 XGBoost 创建模型
model = XGBClassifier(n_estimators = 10)

# 训练模型
model.fit(X_train, y_train)

# 预测测试集数据
y_pred = model.predict(X_test)

# 评估模型
accuracy = accuracy_score(y_test, y_pred)
print(classification_report(y_test, y_pred))

# 评估模型
classification_dict = classification_report(y_test, y_pred, output_dict=True)
classification_df = pd.DataFrame(classification_dict).transpose()
classification_df = classification_df['f1-score']

# 生成适合JSON的dict
classification_json = classification_df.to_dict()
classification_json['algorithm'] = 'semanticmark'

# 将数据保存到JSON文件
with open('classification_report.json', 'a') as json_file:
    json.dump(classification_json, json_file)


              precision    recall  f1-score   support

           0       0.69      0.65      0.67       590
           1       0.73      0.49      0.59       635
           2       0.74      0.56      0.64       587
           3       0.78      0.98      0.87       619
           4       0.70      0.93      0.80       589
           5       0.74      0.73      0.73       582
           6       0.89      0.96      0.92       598

    accuracy                           0.76      4200
   macro avg       0.75      0.76      0.75      4200
weighted avg       0.75      0.76      0.75      4200

