### Data Prep

In [1]:
import librosa
import librosa.display
import matplotlib.pyplot as plt
import numpy as np
from sklearn.preprocessing import MinMaxScaler
import pandas as pd
import os
from sklearn.model_selection import train_test_split

In [2]:
# File paths
TESTDIR = './../Data/Hyundai/'
TESTPATH = 'Lock/'
TESTSIG = 'keyfob_signal2023Y1022163258.raw'

Separate I from Q
- we only care about the "real" portion

Trim our samples to get rid of the noise from start and end of capture

In [3]:
"""# Specify the path to your .raw file
data_dir = './../Data/Hyundai/Unlock/'

# Get a list of all .raw files in the directory
raw_files = [file for file in os.listdir(data_dir) if file.endswith(".raw")]

# Loop through each .raw file in the directory
for raw_file in raw_files:
    file_path = os.path.join(data_dir, raw_file)

    # Read the raw file as bytes
    with open(file_path, 'rb') as file:
        raw_data = file.read()

    # Convert the raw data to a NumPy array of complex numbers
    # Assuming the raw data consists of interleaved I and Q samples (16-bit signed integers)
    raw_array = np.frombuffer(raw_data, dtype=np.int16).astype(np.complex64)

    # Extract the in-phase (I) and quadrature (Q) components
    I = raw_array.real
    Q = raw_array.imag

    # Create a time axis for the signal (assuming a fixed sample rate)
    sample_rate = 2e6  # Replace with your actual sample rate
    time = np.arange(0, len(I)) / sample_rate

    # Define the trim duration in seconds
    trim_start = 0.005  # 5 ms
    trim_end = 0.005    # 5 ms

    # Calculate the number of samples to trim
    trim_samples = int(sample_rate * (trim_start + trim_end))

    # Trim the signal
    I_trimmed = I[trim_samples:-trim_samples]
    Q_trimmed = Q[trim_samples:-trim_samples]
    time_trimmed = time[trim_samples:-trim_samples]

    # Plot the I and Q components
    plt.figure(figsize=(12, 6))
    plt.suptitle(f'File: {raw_file}')
    plt.subplot(2, 1, 1)
    plt.plot(time_trimmed, I_trimmed)
    plt.title('In-phase Component (Trimmed)')
    plt.xlabel('Time (s)')
    plt.grid()"""

    #plt.subplot(2, 1, 2)
    #plt.plot(time_trimmed, Q_trimmed)
    #plt.title('Quadrature Component (Trimmed)')
    #plt.xlabel('Time (s)')
    #plt.grid()"""

    #plt.tight_layout()
    #plt.show()

'# Specify the path to your .raw file\ndata_dir = \'./../Data/Hyundai/Unlock/\'\n\n# Get a list of all .raw files in the directory\nraw_files = [file for file in os.listdir(data_dir) if file.endswith(".raw")]\n\n# Loop through each .raw file in the directory\nfor raw_file in raw_files:\n    file_path = os.path.join(data_dir, raw_file)\n\n    # Read the raw file as bytes\n    with open(file_path, \'rb\') as file:\n        raw_data = file.read()\n\n    # Convert the raw data to a NumPy array of complex numbers\n    # Assuming the raw data consists of interleaved I and Q samples (16-bit signed integers)\n    raw_array = np.frombuffer(raw_data, dtype=np.int16).astype(np.complex64)\n\n    # Extract the in-phase (I) and quadrature (Q) components\n    I = raw_array.real\n    Q = raw_array.imag\n\n    # Create a time axis for the signal (assuming a fixed sample rate)\n    sample_rate = 2e6  # Replace with your actual sample rate\n    time = np.arange(0, len(I)) / sample_rate\n\n    # Define th

Normalize our Data

In [4]:
# Create a Min-Max scaler
scaler = MinMaxScaler()

# Fit and transform the data (I)
#I_scaled = scaler.fit_transform(I_trimmed.reshape(-1, 1))

Split our data

In [5]:
def create_signal_dataframe(data_dir):
    I_data_list = []
    labels_list = []

    for category in os.listdir(data_dir):
        category_dir = os.path.join(data_dir, category)
        if os.path.isdir(category_dir):
            for file in os.listdir(category_dir):
                if file.endswith(".raw"):
                    file_path = os.path.join(category_dir, file)

                    with open(file_path, 'rb') as file:
                        raw_data = file.read()

                    raw_array = np.frombuffer(raw_data, dtype=np.int16).astype(np.complex64)
                    I_data = raw_array.real
                    Q_data = raw_array.imag

                    # Create a time axis for the signal (assuming a fixed sample rate)
                    sample_rate = 2e6  # Replace with your actual sample rate
                    time = np.arange(0, len(I_data)) / sample_rate

                    # Define the trim duration in seconds
                    trim_start = 0.005  # 5 ms
                    trim_end = 0.005    # 5 ms

                    # Calculate the number of samples to trim
                    trim_samples = int(sample_rate * (trim_start + trim_end))

                    # Trim the signal
                    I_trimmed = I_data[trim_samples:-trim_samples]
                    time_trimmed = time[trim_samples:-trim_samples]
                    #

                    # Pad the signal to the desired length
                    if len(I_trimmed) < 22766528:
                        pad_width = 22766528 - len(I_trimmed)
                        I_padded = np.pad(I_trimmed, (0, pad_width), 'constant')
                    else:
                        I_padded = I_trimmed

                    I_scaled = scaler.fit_transform(I_padded.reshape(-1,1))

                    I_data_list.append(I_scaled)
                    #I_data_list.append(I_trimmed)
                    # Process and pack the signals
                    labels_list.append(category)
    
    #processed_signals = process_signals(I_data_list)

    one_hot_labels = pd.get_dummies(labels_list, columns=['Label'])                

    df = pd.DataFrame({'Signal': I_data_list})
    #df = pd.DataFrame({'Signal': processed_signals})
    df = pd.concat([df, one_hot_labels], axis=1)
    
    return df

In [6]:
# Example usage
data_directory = TESTDIR  # Change this to the path of your 'Data' directory
hyundai_df = create_signal_dataframe(data_directory)

In [7]:
hyundai_df.head()
#hyundai_df.to_csv('hyundai_data_early.csv', index=False)


In [8]:
hyundai_df_shuffled = hyundai_df.sample(frac=1, random_state=43).reset_index(drop=True)
hyundai_df_shuffled.head()

Unnamed: 0,Signal,Lock,Unlock
0,"[[0.5001626], [0.5001626], [0.5001626], [0.416...",False,True
1,"[[0.4663887], [0.5330901], [0.5330901], [0.533...",False,True
2,"[[0.5328125], [0.5328125], [0.5328125], [0.532...",True,False
3,"[[0.6662338], [0.6662338], [0.5], [0.6662338],...",True,False
4,"[[0.50009775], [0.4500489], [0.50009775], [0.5...",True,False


In [9]:
X = hyundai_df_shuffled['Signal'].values
y = hyundai_df_shuffled.drop('Signal',axis=1)

In [10]:
X_train, X_test, y_train, y_test = train_test_split(
    X,
    y,
    test_size=0.2,
    random_state=43,
)

In [11]:

test = max(X_train,key=len)

In [12]:
X_train[4].size

22766528

In [13]:
test.size

22766528

In [14]:
# Save the trained model
#torch.save(model.state_dict(), 'rf_signal_classifier.pth')