In [46]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MaxAbsScaler

In [34]:
def load_data(file_path):
    with open(file_path, 'r') as file:
        lines = file.readlines()

    data = []
    current_class = None

    # Columns from the first line (removing the last two characters \n and space at the end)
    columns = lines[0].strip().split()
    columns.append('Class')  # Adding the 'Class' column for labels

    # Parse each line
    for line in lines[1:]:  # Start from the second line to skip header
        stripped_line = line.strip()
        if stripped_line.lstrip('-').isdigit():  # This is a class label, handles negative numbers
            current_class = int(stripped_line)
        else:
            # This is a data line, split by spaces and convert to float
            data_values = list(map(float, stripped_line.split()))
            data_values.append(current_class)  # Append the current class
            data.append(data_values)

    # Create DataFrame
    df = pd.DataFrame(data, columns=columns)
    return df


In [35]:
# Load and label the data including -1 correctly
df = load_data('rocket_league_skillshots.data')

In [48]:
df.head()

Unnamed: 0,BallAcceleration,Time,DistanceWall,DistanceCeil,DistanceBall,PlayerSpeed,BallSpeed,goal,Class,move
0,0.005129,0.0,0.012304,0.498266,0.031512,0.453337,0.300277,0,6,101
1,0.010021,0.010311,0.01229,0.498266,0.024175,0.540209,0.309974,0,6,100101
2,0.0,0.012889,0.01229,0.498266,0.024959,0.540209,0.312201,0,6,100000
3,0.031069,0.0232,0.012311,0.498266,0.015971,0.501078,0.342262,0,6,100
4,0.018512,0.0464,0.012387,0.498271,0.016252,0.543875,0.360175,0,6,101


In [37]:
df.dtypes

BallAcceleration    float64
Time                float64
DistanceWall        float64
DistanceCeil        float64
DistanceBall        float64
PlayerSpeed         float64
BallSpeed           float64
up                  float64
accelerate          float64
slow                float64
goal                float64
left                float64
boost               float64
camera              float64
down                float64
right               float64
slide               float64
jump                float64
Class                 int64
dtype: object

In [38]:
indices_to_convert = [7,8, 9, 10, 11, 12, 13, 14, 15, 16, 17]  # Corresponding to columns 'A', 'C', 'E'

# Converting specified columns to string using indices
for index in indices_to_convert:
    column_name = df.columns[index]
    df[column_name] = df[column_name].astype(str)
        

In [39]:
df.head()

Unnamed: 0,BallAcceleration,Time,DistanceWall,DistanceCeil,DistanceBall,PlayerSpeed,BallSpeed,up,accelerate,slow,goal,left,boost,camera,down,right,slide,jump,Class
0,1636.798772,0.0,3498.01,2012.98,299.66827,104267.426232,99035.849338,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,6
1,3198.029397,0.138893,3494.08,2012.98,229.89678,124248.031988,102233.878734,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,6
2,0.0,0.173617,3494.08,2012.98,237.350599,124248.031988,102968.35899,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,6
3,9914.766242,0.31251,3500.08,2012.98,151.880921,115248.016009,112883.125231,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,6
4,5907.747166,0.625019,3521.65,2013.0,154.556104,125091.256173,118790.872398,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,6


In [40]:
df.head()

Unnamed: 0,BallAcceleration,Time,DistanceWall,DistanceCeil,DistanceBall,PlayerSpeed,BallSpeed,up,accelerate,slow,goal,left,boost,camera,down,right,slide,jump,Class
0,1636.798772,0.0,3498.01,2012.98,299.66827,104267.426232,99035.849338,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,6
1,3198.029397,0.138893,3494.08,2012.98,229.89678,124248.031988,102233.878734,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,6
2,0.0,0.173617,3494.08,2012.98,237.350599,124248.031988,102968.35899,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,6
3,9914.766242,0.31251,3500.08,2012.98,151.880921,115248.016009,112883.125231,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,6
4,5907.747166,0.625019,3521.65,2013.0,154.556104,125091.256173,118790.872398,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,6


In [41]:
indices_to_convert = [7,8,9,10,11,12,13,14,15,16,17]  # Assuming we want to convert all columns

# Define a custom function to convert 0.0 and 1.0 to '0' and '1', respectively
def convert_float(x):
    if x == '0.0':
        return '0'
    elif x == '1.0':
        return '1'
    # Convert other numbers to strings with their decimal part

# Apply the function to specified columns using indices
df.iloc[:, indices_to_convert] = df.iloc[:, indices_to_convert].applymap(convert_float)

In [42]:
df.head()

Unnamed: 0,BallAcceleration,Time,DistanceWall,DistanceCeil,DistanceBall,PlayerSpeed,BallSpeed,up,accelerate,slow,goal,left,boost,camera,down,right,slide,jump,Class
0,1636.798772,0.0,3498.01,2012.98,299.66827,104267.426232,99035.849338,0,0,0,0,0,0,0,0,1,0,1,6
1,3198.029397,0.138893,3494.08,2012.98,229.89678,124248.031988,102233.878734,0,0,0,0,0,1,0,0,1,0,1,6
2,0.0,0.173617,3494.08,2012.98,237.350599,124248.031988,102968.35899,0,0,0,0,0,1,0,0,0,0,0,6
3,9914.766242,0.31251,3500.08,2012.98,151.880921,115248.016009,112883.125231,0,0,0,0,0,0,0,0,1,0,0,6
4,5907.747166,0.625019,3521.65,2013.0,154.556104,125091.256173,118790.872398,0,0,0,0,0,0,0,0,1,0,1,6


## Merging inputs into one column

In [43]:
df['move'] = df['up']+df['accelerate']+df['slow']+df['left']+df['boost']+df['camera']+df['down']+df['right']+df['slide']+df['jump']
df.head(5)

df = df.drop(['up', 'accelerate','slow', 'left','boost','camera','down', 'right','slide', 'jump'], axis = 1)
df.head(5)

Unnamed: 0,BallAcceleration,Time,DistanceWall,DistanceCeil,DistanceBall,PlayerSpeed,BallSpeed,goal,Class,move
0,1636.798772,0.0,3498.01,2012.98,299.66827,104267.426232,99035.849338,0,6,101
1,3198.029397,0.138893,3494.08,2012.98,229.89678,124248.031988,102233.878734,0,6,100101
2,0.0,0.173617,3494.08,2012.98,237.350599,124248.031988,102968.35899,0,6,100000
3,9914.766242,0.31251,3500.08,2012.98,151.880921,115248.016009,112883.125231,0,6,100
4,5907.747166,0.625019,3521.65,2013.0,154.556104,125091.256173,118790.872398,0,6,101


In [44]:
df.to_csv('rocket_league_skillshots_raw.csv')

## Normalize the numerical data

In [47]:
scaler = MaxAbsScaler()
float_columns = df.select_dtypes(include=['float']).columns
df[float_columns] = scaler.fit_transform(df[float_columns])

df.head(5)

Unnamed: 0,BallAcceleration,Time,DistanceWall,DistanceCeil,DistanceBall,PlayerSpeed,BallSpeed,goal,Class,move
0,0.005129,0.0,0.012304,0.498266,0.031512,0.453337,0.300277,0,6,101
1,0.010021,0.010311,0.01229,0.498266,0.024175,0.540209,0.309974,0,6,100101
2,0.0,0.012889,0.01229,0.498266,0.024959,0.540209,0.312201,0,6,100000
3,0.031069,0.0232,0.012311,0.498266,0.015971,0.501078,0.342262,0,6,100
4,0.018512,0.0464,0.012387,0.498271,0.016252,0.543875,0.360175,0,6,101


In [51]:
df.dtypes

BallAcceleration    float64
Time                float64
DistanceWall        float64
DistanceCeil        float64
DistanceBall        float64
PlayerSpeed         float64
BallSpeed           float64
goal                 object
Class                 int64
move                 object
dtype: object

In [None]:
df.to_csv('preprocessed_rocket_league_skillshots.csv')

## Label Encoding the moves

In [54]:
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
df['move'] = le.fit_transform(df['move'])

In [55]:
df.head()

Unnamed: 0,BallAcceleration,Time,DistanceWall,DistanceCeil,DistanceBall,PlayerSpeed,BallSpeed,goal,Class,move
0,0.005129,0.0,0.012304,0.498266,0.031512,0.453337,0.300277,0,6,3
1,0.010021,0.010311,0.01229,0.498266,0.024175,0.540209,0.309974,0,6,22
2,0.0,0.012889,0.01229,0.498266,0.024959,0.540209,0.312201,0,6,18
3,0.031069,0.0232,0.012311,0.498266,0.015971,0.501078,0.342262,0,6,2
4,0.018512,0.0464,0.012387,0.498271,0.016252,0.543875,0.360175,0,6,3


In [56]:
df.to_csv('preprocessed_encoded_rocket_league_skillshots.csv')