In [None]:
# detect insertion
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import f1_score
import xgboost as xgb
import numpy as np
import math

dataset = 'covertype'
seed = 10000

gamma = 1/2 # ratio between the length of green domain and red domain


# 读取数据
origin = pd.read_csv("../dataset/covtype_with_key.subset.data")

proportions = [0.2, 0.4, 0.6, 0.8, 1.0]

for proportion in proportions:
    loaded_results = np.load(f"{dataset}-{seed}.npy", allow_pickle=True).item()
    watermarked_data = pd.read_csv(f"alteration_{dataset}-{seed}-{proportion}.csv")

    cover_types = watermarked_data['Cover_Type'].unique()
    cover_types.sort()  

    X = watermarked_data.drop(columns=['Cover_Type'])
    y = watermarked_data['Cover_Type']

    le = LabelEncoder()
    y = le.fit_transform(y)

    X_train, _, y_train, _ = train_test_split(X, y, test_size=0.3, random_state=42)

    X = origin.drop(columns=['Cover_Type'])
    y = origin['Cover_Type']

    le = LabelEncoder()
    y = le.fit_transform(y)

    _, X_test, _, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

    model = xgb.XGBClassifier(n_estimators=30, max_depth=10, n_jobs=4)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    f1_scores = f1_score(y_test, y_pred, average=None)
    print(f"{proportion}:")
    for i, score in enumerate(f1_scores):
        print(f"Category {le.inverse_transform([i])[0]}: F1-score = {score:.4f}")




In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import f1_score
import xgboost as xgb

# Set the path to the datasets and define the dataset names
base_path = "different_version_datasets"
dataset_folders_1 = ["BMark"]
dataset_folders_2 = ["0.2attack", "0.4attack", "0.6attack", "0.8attack", "1.0attack"]
seed = 10000

# Load original dataset for testing
origin = pd.read_csv("../dataset/covtype_with_key.subset.data")

for folder in dataset_folders_1:
    print(f"Processing {folder}:")
    average_f1_scores = []

    # Loop through each .npy file in the folder
    for i in range(1, 129):  # assuming 128 files numbered 1 to 128
        file_path = f"{base_path}/{folder}/covertype-{i}.npy"
        data = np.load(file_path, allow_pickle=True).item()
        df = data["watermarked_data"]

        # Preparing data
        X = df.drop(columns=['Cover_Type'])
        y = df['Cover_Type']

        le = LabelEncoder()
        y_encoded = le.fit_transform(y)

        X_train, _, y_train, _ = train_test_split(X, y_encoded, test_size=0.3, random_state=42)

        # Preparing original dataset for testing
        X_test = origin.drop(columns=[origin.columns[-1]])
        y_test = origin[origin.columns[-1]]
        y_test_encoded = le.transform(y_test)  # Ensure label encoder is fitted with all possible labels

        _, X_test, _, y_test = train_test_split(X_test, y_test_encoded, test_size=0.3, random_state=42)

        # Training XGBoost model
        model = xgb.XGBClassifier(n_estimators=30, max_depth=10, n_jobs=4)
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)

        # Calculating F1 scores
        f1_scores = f1_score(y_test, y_pred, average=None)

        average_f1_scores.append(f1_scores)

    # Calculate average F1 scores across all files
    average_f1_scores = np.mean(average_f1_scores, axis=0)
    for i, score in enumerate(average_f1_scores):
        print(f"Category {le.inverse_transform([i])[0]}: Average F1-score = {score:.4f}")


# Loop through each dataset version
for folder in dataset_folders_2:
    print(f"Processing {folder}:")
    average_f1_scores = []

    # Loop through each .npy file in the folder
    for i in range(1, 129):  # assuming 128 files numbered 1 to 128
        file_path = f"{base_path}/{folder}/covertype-{i}.npy"
        df = pd.read_csv(file_path)

        # Preparing data
        X = df.drop(columns=['Cover_Type'])
        y = df['Cover_Type']

        le = LabelEncoder()
        y_encoded = le.fit_transform(y)

        X_train, _, y_train, _ = train_test_split(X, y_encoded, test_size=0.3, random_state=42)

        # Preparing original dataset for testing
        X_test = origin.drop(columns=[origin.columns[-1]])
        y_test = origin[origin.columns[-1]]
        y_test_encoded = le.transform(y_test)  # Ensure label encoder is fitted with all possible labels

        _, X_test, _, y_test = train_test_split(X_test, y_test_encoded, test_size=0.3, random_state=42)

        # Training XGBoost model
        model = xgb.XGBClassifier(n_estimators=30, max_depth=10, n_jobs=4)
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)

        # Calculating F1 scores
        f1_scores = f1_score(y_test, y_pred, average=None)

        average_f1_scores.append(f1_scores)

    # Calculate average F1 scores across all files
    average_f1_scores = np.mean(average_f1_scores, axis=0)
    for i, score in enumerate(average_f1_scores):
        print(f"Category {le.inverse_transform([i])[0]}: Average F1-score = {score:.4f}")


Processing BMark:
Category 1: Average F1-score = 0.7611
Category 2: Average F1-score = 0.7448
Category 3: Average F1-score = 0.8590
Category 4: Average F1-score = 0.9556
Category 5: Average F1-score = 0.9159
Category 6: Average F1-score = 0.8827
Category 7: Average F1-score = 0.9535
Processing 0.2attack:
Category 1: Average F1-score = 0.7254
Category 2: Average F1-score = 0.7073
Category 3: Average F1-score = 0.8149
Category 4: Average F1-score = 0.9351
Category 5: Average F1-score = 0.8830
Category 6: Average F1-score = 0.8384
Category 7: Average F1-score = 0.9290
Processing 0.4attack:
Category 1: Average F1-score = 0.6696
Category 2: Average F1-score = 0.6477
Category 3: Average F1-score = 0.7449
Category 4: Average F1-score = 0.8942
Category 5: Average F1-score = 0.8267
Category 6: Average F1-score = 0.7622
Category 7: Average F1-score = 0.8818
Processing 0.6attack:
Category 1: Average F1-score = 0.5685
Category 2: Average F1-score = 0.5415
Category 3: Average F1-score = 0.6135
Cate