In [1]:
!pip install demoparser2
!pip install pandas numpy scipy scikit-learn


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.3.1[0m[39;49m -> [0m[32;49m25.0.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.3.1[0m[39;49m -> [0m[32;49m25.0.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


<h1>Parsing</h1>

In [2]:
from demoparser2 import DemoParser

parser = DemoParser("Data/Cheater/Cheather.dem")

player_death_events = parser.parse_event("player_death")
df = parser.parse_ticks(["pitch", "yaw"])

for (idx, event) in player_death_events.iterrows():
    start_tick = event["tick"] - 300
    end_tick = event["tick"]
    attacker = event["attacker_steamid"]
    if attacker != None:
        subdf = df[(df["tick"].between(start_tick, end_tick)) & (df["steamid"] == int(attacker))]
        print(subdf)        

          pitch         yaw  tick            steamid     name
20089  1.121979  173.135956  2011  76561198389634571  RedNose
20099  1.143951  173.246155  2012  76561198389634571  RedNose
20109  1.231842  173.554108  2013  76561198389634571  RedNose
20119  1.320068  173.707916  2014  76561198389634571  RedNose
20129  1.429932  173.927979  2015  76561198389634571  RedNose
...         ...         ...   ...                ...      ...
23049 -0.726135  147.022125  2307  76561198389634571  RedNose
23059 -0.704163  147.022125  2308  76561198389634571  RedNose
23069 -0.704163  147.022125  2309  76561198389634571  RedNose
23079 -0.637894  147.022125  2310  76561198389634571  RedNose
23089 -0.571976  147.066071  2311  76561198389634571  RedNose

[301 rows x 5 columns]
          pitch        yaw  tick            steamid    name
21407 -0.014420 -15.151184  2143  76561199469944110  3jatt_
21417 -0.028503 -14.264374  2144  76561199469944110  3jatt_
21427 -0.028503 -13.406403  2145  76561199469944110 

<h1>Processing</h1>

In [3]:
print(df.columns)

Index(['pitch', 'yaw', 'tick', 'steamid', 'name'], dtype='object')


In [13]:
import pandas as pd
from demoparser2 import DemoParser
import os

# Parse the demo file
parser = DemoParser("Data/Cheater/Cheather.dem")

# Get player_death events and parse ticks for pitch and yaw
player_death_events = parser.parse_event("player_death")
df = parser.parse_ticks(["pitch", "yaw"])

# Create a base directory for output
base_dir = "output_data"
os.makedirs(base_dir, exist_ok=True)

# Iterate through player_death events
for (idx, event) in player_death_events.iterrows():
    start_tick = event["tick"] - 300
    end_tick = event["tick"]
    attacker = event["attacker_steamid"]
    
    if attacker is not None:
        subdf = df[(df["tick"].between(start_tick, end_tick)) & (df["steamid"] == int(attacker))]
        
        if not subdf.empty:
            # Create a directory for the user if it doesn't exist
            user_dir = os.path.join(base_dir, f"user_{attacker}")
            os.makedirs(user_dir, exist_ok=True)
            
            # Create a CSV file for this kill event
            filename = f"kill_event_{start_tick}_to_{end_tick}.csv"
            file_path = os.path.join(user_dir, filename)
            
            # Save the subdf to a CSV file
            subdf.to_csv(file_path, index=False)
            
            print(f"Data saved to {file_path}")

print("All kill events have been processed and saved.")


Data saved to output_data/user_76561198389634571/kill_event_2011_to_2311.csv
Data saved to output_data/user_76561199469944110/kill_event_2143_to_2443.csv
Data saved to output_data/user_76561199469944110/kill_event_2314_to_2614.csv
Data saved to output_data/user_76561199780519055/kill_event_2500_to_2800.csv
Data saved to output_data/user_76561199039565051/kill_event_4671_to_4971.csv
Data saved to output_data/user_76561199470374288/kill_event_4726_to_5026.csv
Data saved to output_data/user_76561199479902304/kill_event_5008_to_5308.csv
Data saved to output_data/user_76561199479902304/kill_event_7398_to_7698.csv
Data saved to output_data/user_76561198389634571/kill_event_7552_to_7852.csv
Data saved to output_data/user_76561198822643426/kill_event_7876_to_8176.csv
Data saved to output_data/user_76561198389634571/kill_event_8143_to_8443.csv
Data saved to output_data/user_76561199479902304/kill_event_8170_to_8470.csv
Data saved to output_data/user_76561198822643426/kill_event_8301_to_8601.csv

<h1>Segmentation</h1>

In [1]:
import os
import pandas as pd
import numpy as np

def segment_data(df, kill_event_id, window_size=32):
    segments = []
    segment_count = 0
    for i in range(0, len(df) - window_size + 1, window_size):
        segment = df.iloc[i:i+window_size].copy()
        segment_count += 1
        segment['kill_event_id'] = kill_event_id
        segment['segment_number'] = segment_count
        segment['segment_id'] = f"{kill_event_id}_{segment_count}"
        segments.append(segment)
    return segments

def process_player_data(player_folder):
    all_segments = []
    kill_event_id = 0
    for file in os.listdir(player_folder):
        if file.endswith('.csv'):
            file_path = os.path.join(player_folder, file)
            df = pd.read_csv(file_path)
            if 'name' in df.columns:
                df = df.drop('name', axis=1)
            segments = segment_data(df, kill_event_id)
            all_segments.extend(segments)
            kill_event_id += 1  # Increment kill_event_id for each new file
    return pd.concat(all_segments, ignore_index=True) if all_segments else pd.DataFrame()

base_dir = "output_data"
processed_dir = "processed_data"
os.makedirs(processed_dir, exist_ok=True)

for player_folder in os.listdir(base_dir):
    player_path = os.path.join(base_dir, player_folder)
    if os.path.isdir(player_path):
        print(f"Processing data for {player_folder}")
        player_data = process_player_data(player_path)
        if not player_data.empty:
            output_file = os.path.join(processed_dir, f"{player_folder}_segmented.csv")
            player_data.to_csv(output_file, index=False)
            print(f"Segmented data saved to {output_file}")
        else:
            print(f"No valid data found for {player_folder}")

print("All player data has been processed and segmented.")


Processing data for Legit
No valid data found for Legit
Processing data for old
No valid data found for old
Processing data for Cheats
No valid data found for Cheats
All player data has been processed and segmented.


<h1>Feature Engineering</h1>

In [18]:
import os
import pandas as pd
import numpy as np

def engineer_features(df):
    # Calculate velocities
    df['pitch_velocity'] = df['pitch'].diff() / df['tick'].diff()
    df['yaw_velocity'] = df['yaw'].diff() / df['tick'].diff()
    
    # Calculate accelerations
    df['pitch_acceleration'] = df['pitch_velocity'].diff() / df['tick'].diff()
    df['yaw_acceleration'] = df['yaw_velocity'].diff() / df['tick'].diff()
    
    # Calculate jerk
    df['pitch_jerk'] = df['pitch_acceleration'].diff() / df['tick'].diff()
    df['yaw_jerk'] = df['yaw_acceleration'].diff() / df['tick'].diff()
    
    # Calculate statistical features
    for col in ['pitch', 'yaw']:
        df[f'{col}_mean'] = df[col].mean()
        df[f'{col}_std'] = df[col].std()
        df[f'{col}_min'] = df[col].min()
        df[f'{col}_max'] = df[col].max()
    
    return df.dropna()  # Remove NaN values from diff operations

processed_dir = "segmented_data"
engineered_dir = "engineered_data"
os.makedirs(engineered_dir, exist_ok=True)

for file in os.listdir(processed_dir):
    if file.endswith("_segmented.csv"):
        file_path = os.path.join(processed_dir, file)
        df = pd.read_csv(file_path)
        
        engineered_df = engineer_features(df)
        
        output_file = os.path.join(engineered_dir, f"{file.replace('_segmented.csv', '_engineered.csv')}")
        engineered_df.to_csv(output_file, index=False)
        print(f"Engineered features saved to {output_file}")

print("Feature engineering completed for all segmented files.")


Engineered features saved to engineered_data/user_76561198410519875_engineered.csv
Engineered features saved to engineered_data/user_76561199469944110_engineered.csv
Engineered features saved to engineered_data/user_76561198822643426_engineered.csv
Engineered features saved to engineered_data/user_76561199470374288_engineered.csv
Engineered features saved to engineered_data/user_76561198312099217_engineered.csv
Engineered features saved to engineered_data/user_76561199479902304_engineered.csv
Engineered features saved to engineered_data/user_76561199039565051_engineered.csv
Engineered features saved to engineered_data/user_76561198389634571_engineered.csv
Engineered features saved to engineered_data/user_76561199780519055_engineered.csv
Feature engineering completed for all segmented files.


<h1>Normalization Z-Score</h1>

In [19]:
import os
import pandas as pd
from sklearn.preprocessing import StandardScaler

# Define the directory paths
engineered_dir = "engineered_data"
normalized_dir = "normalized_data"
os.makedirs(normalized_dir, exist_ok=True)

# Initialize the scaler
scaler = StandardScaler()

# Process each engineered CSV file
for file in os.listdir(engineered_dir):
    if file.endswith("_engineered.csv"):
        file_path = os.path.join(engineered_dir, file)
        df = pd.read_csv(file_path)

        # Select only the columns to normalize (exclude identifiers)
        columns_to_normalize = [
            col for col in df.columns if col not in ['kill_event_id', 'segment_number', 'segment_id', 'tick', 'steamid']
        ]

        # Apply Z-score normalization
        df[columns_to_normalize] = scaler.fit_transform(df[columns_to_normalize])

        # Save the normalized data
        output_file = os.path.join(normalized_dir, file.replace("_engineered.csv", "_normalized.csv"))
        df.to_csv(output_file, index=False)
        print(f"Normalized data saved to {output_file}")

print("Z-score normalization completed for all engineered files.")


Normalized data saved to normalized_data/user_76561199039565051_normalized.csv
Normalized data saved to normalized_data/user_76561198312099217_normalized.csv
Normalized data saved to normalized_data/user_76561198410519875_normalized.csv
Normalized data saved to normalized_data/user_76561198822643426_normalized.csv
Normalized data saved to normalized_data/user_76561198389634571_normalized.csv
Normalized data saved to normalized_data/user_76561199469944110_normalized.csv
Normalized data saved to normalized_data/user_76561199470374288_normalized.csv
Normalized data saved to normalized_data/user_76561199780519055_normalized.csv
Normalized data saved to normalized_data/user_76561199479902304_normalized.csv
Z-score normalization completed for all engineered files.
