In [82]:
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import joblib
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
from sklearn.decomposition import PCA

from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from imblearn.over_sampling import SMOTE

In [83]:
data_path = "./Data.csv"
df = pd.read_csv(data_path)

# df = df[['networkExp', 'bedwars_level', 'Experience', 'beds_broken_bedwars', 'beds_lost_bedwars', 'coins', 'deaths_bedwars', 'diamond_resources_collected_bedwars', 'emerald_resources_collected_bedwars', 'final_deaths_bedwars', 'final_kills_bedwars', 'games_played_bedwars', 'games_played_bedwars_1', 'kills_bedwars', 'losses_bedwars', 'void_final_deaths_bedwars',  'wins_bedwars','fkdr','wlr','bblr','fk_lev','bb_lev','kill_lev', "Cheat"]]
df = df[['karma', 'bedwars_level',
        'bedwars_loot_box',
        'all_timeBEDWARS__defensive', 'BEDWARS__offensive',
        'BEDWARS__support', 'Bedwars_openedChests',
        'beds_broken_bedwars', 'beds_lost_bedwars',
        'deaths_bedwars', 'diamond_resources_collected_bedwars',
        'emerald_resources_collected_bedwars', 'fall_deaths_bedwars',
        'final_deaths_bedwars', 'final_kills_bedwars',
        'games_played_bedwars', 'games_played_bedwars_1', 'kills_bedwars',
        'losses_bedwars', 'void_deaths_bedwars', 'void_final_deaths_bedwars',
        'void_final_kills_bedwars', 'void_kills_bedwars', 'wins_bedwars', "Cheat"
        ]]

In [84]:
# Cheatカラムを削除して、各レコードの合計を計算する
row_sums = df.drop('Cheat', axis=1).sum(axis=1)

# 分母が0になる可能性がある行のインデックスを取得
zero_division_indices = row_sums[row_sums == 0].index

# 10,000をその行の合計値で割った値を計算して保持する
divisors = pd.Series(0, index=df.index)  # 全ての行を0で初期化
divisors[zero_division_indices] = 0  # 分母が0になる行のみ0に設定
divisors[~divisors.index.isin(zero_division_indices)] = 10000 / row_sums[~row_sums.index.isin(zero_division_indices)]

# それぞれの行に対して計算した値を掛ける
df_scaled = df.drop('Cheat', axis=1).mul(divisors, axis=0)

# データフレームにCheatカラムを追加する
df_scaled['Cheat'] = df['Cheat']
print(df_scaled)

# 新しいデータベースとして保存する
# df_scaled.to_csv('ScaledData.csv', index=False)

            karma  bedwars_level  bedwars_loot_box  \
0     5985.843250      10.258515          1.025851   
1       75.770332      23.572992         26.940562   
2     9702.113182       0.882068          0.630048   
3     5869.074492      12.899065          0.000000   
4        0.000000      22.994379          8.942259   
...           ...            ...               ...   
4673  2476.176620       0.727431          0.000000   
4674    30.487805       1.219512          3.658537   
4675   344.332855       1.434720          2.869440   
4676   117.429542       0.499700          3.497901   
4677   116.324157       3.877472          0.000000   

      all_timeBEDWARS__defensive  BEDWARS__offensive  BEDWARS__support  \
0                            0.0           55.395979          2.051703   
1                            0.0           15.154066         25.256777   
2                            0.0            2.142164          1.008077   
3                            0.0           48.371493   

In [85]:
# df = pd.get_dummies(df)
# print("dummies")

X = df_scaled.drop('Cheat', axis=1)
y = df_scaled['Cheat']
df_scaled

# データの標準化
# scaler = StandardScaler()
# X = scaler.fit_transform(X)

# 訓練データとテストデータに分割
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=0)


# 不足しているデータのインデックスを特定
underrepresented_indices = df_scaled[df_scaled['bedwars_level'] <= 20].index

missing_indices = [index for index in underrepresented_indices if index not in X_train.index]
print("Missing indices:", missing_indices)

# Filter out the missing indices
underrepresented_indices = [index for index in underrepresented_indices if index in X_train.index]
X_underrepresented = X_train.loc[underrepresented_indices]
y_underrepresented = y_train.loc[underrepresented_indices]


# 不足しているデータだけを抽出
X_underrepresented = X_train.loc[underrepresented_indices]
y_underrepresented = y_train.loc[underrepresented_indices]

# SMOTEでオーバーサンプリングを実行
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X_underrepresented, y_underrepresented)

# オーバーサンプリング後のデータを元のデータに結合
X_train_resampled = pd.concat([X_train, X_resampled])
y_train_resampled = pd.concat([y_train, y_resampled])

# モデルの作成と訓練（オーバーサンプリング後のデータを使用）
model = RandomForestClassifier(n_estimators=2000, random_state=42, class_weight={0: 1, 1: 5})
model.fit(X_train_resampled, y_train_resampled)


# テストデータを使って予測を行う
y_pred = model.predict(X_test)


# # モデルの作成
# model = RandomForestClassifier(n_estimators=200, random_state=42)
# model.fit(X_train, y_train)


# # モデルの訓練
# model.fit(X_train, y_train)

# # テストデータを使って予測を行う
# y_pred = model.predict(X_test)

# 正解率を計算する
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy}')

# 適合率を計算する
precision = precision_score(y_test, y_pred)
print(f'Precision: {precision}')

# 再現率を計算する
recall = recall_score(y_test, y_pred)
print(f'Recall: {recall}')

# F1スコアを計算する
f1 = f1_score(y_test, y_pred)
print(f'F1 Score: {f1}')

# 混同行列を表示する
conf_matrix = confusion_matrix(y_test, y_pred)
print('Confusion Matrix:')
print(conf_matrix)

# モデルと標準化器の保存
# joblib.dump(model, './models/Cheater.pkl')
# joblib.dump(scaler, './models/scaler.joblib')


Missing indices: [39, 42, 45, 49, 98, 104, 113, 117, 124, 130, 134, 138, 154, 159, 162, 170, 173, 192, 214, 218, 220, 223, 224, 246, 269, 271, 294, 302, 308, 348, 379, 383, 398, 402, 405, 415, 453, 472, 481, 483, 489, 517, 526, 530, 531, 538, 541, 543, 567, 584, 599, 634, 636, 727, 776, 812, 825, 847, 878, 883, 918, 933, 991, 1011, 1091, 1098, 1118, 1185, 1196, 1220, 1223, 1261, 1267, 1275, 1298, 1351, 1357, 1362, 1366, 1376, 1377, 1385, 1389, 1412, 1423, 1450, 1452, 1487, 1490, 1509, 1511, 1513, 1515, 1522, 1530, 1531, 1556, 1563, 1564, 1567, 1571, 1632, 1657, 1698, 1705, 1710, 1742, 1761, 1767, 1828, 1831, 1906, 1922, 1955, 2041, 2060, 2065, 2066, 2078, 2111, 2134, 2139, 2146, 2214, 2216, 2230, 2239, 2270, 2284, 2299, 2331, 2340, 2345, 2346, 2351, 2369, 2382, 2383, 2385, 2404, 2436, 2445, 2448, 2453, 2460, 2464, 2471, 2481, 2483, 2490, 2498, 2500, 2519, 2534, 2542, 2554, 2558, 2565, 2566, 2573, 2574, 2575, 2584, 2612, 2623, 2626, 2627, 2645, 2657, 2664, 2673, 2678, 2702, 2705, 2707, 

In [86]:
joblib.dump(model, '../Cheater.pkl')
# joblib.dump(scaler, './models/scaler.joblib')

['../Cheater.pkl']

# snipe垢の条件

fkdr, wlr, bblrは低い

star n.levは30以下

sniper%が高い　（ターゲットが見つかるまで何回もｒｑするから）

fk/lev bb/lev kill/lev が高い　fk/levは7.5 bb/levは6.5 kill/levは20以上で高い

# cheatersデータ - snipe垢 = not sniper cheaters
snipe垢の条件外のcheaterを予測する。

# cubelifyのsniper%
cubelifyのsniper%を取得して、学習データとする