In [1]:
import pickle
import time

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scipy

In [2]:
plt.rcParams["font.family"] = "Linux Libertine O"
plt.rcParams["font.size"] = 20

## Full Dataset

In [3]:
protocols = np.array([59, 60, 82, 88, 89, 90, 95, 110, 123, 140, 156, 168, 180, 186, 201, 203, 208, 212, 225, 226, 241, 248])

In [None]:
# We need to read data and transform the time column into datetime object
files = ['<sensor_id>']

data = pd.DataFrame()
for f in files:
    df = pd.read_csv(f"../data/tpms-{f}.csv")
    df["time"] = pd.to_datetime(df["time"])
    df.set_index(["time"], inplace=True)
    df = df[df["protocol"].isin(protocols)]
    if data.empty:
        data = df
    else:
        data = pd.concat((data,df))

In [9]:
cars = pd.read_csv('../data/cars.csv')
cars_np = cars[['id1','id2','id3','id4','id5','id6','id7','id8']].to_numpy(dtype=str).flatten()
cars_np = cars_np[~(cars_np == 'nan')]
cars_np = np.random.permutation(cars_np)

In [None]:
duplicates = {}

for duplicate in duplicates.keys():
    mask = data['id'] == duplicate
    data.loc[mask,'id'] = duplicates[duplicate]
    data.loc[mask,'car'] = 'Renault'

In [11]:
# # Threshold for minimum number of occurrences
# threshold = 1  # Adjust this number as needed

# # Group by 'id' and count occurrences
# id_counts = data.groupby("id").size().sort_values(ascending=False)

# # Filter IDs by the threshold
# filtered_ids = id_counts[id_counts >= threshold].index

# Filter the original DataFrame to keep only rows with these IDs
filtered = data[data["id"].isin(cars_np)].drop(["Unnamed: 0"], axis=1)
filtered.shape

(75578, 16)

## Single car analysis

In [12]:
grouping_analysis = [['5S','10 sec.'],['10S','10 sec.'],['30S','30 sec.'],['1T','1 min.'],['2T','2 min.'],['5T','5 min.']]

In [13]:
index = 0
grouping_time = grouping_analysis[index][0]

# Group by time bins and 'id', and count occurrences
grouped = filtered.groupby([pd.Grouper(freq=grouping_time), 'id']).size().reset_index(name='count')

# Create pivot table
pivot_table = grouped.pivot_table(index='time', columns='id', values='count', fill_value=0)
pivot_table = pivot_table.loc[:,cars_np]

# Calculating the correlation result
corr_mat = np.zeros((pivot_table.shape[1],pivot_table.shape[1]))
pivot_sum = pivot_table
for i in range(len(cars_np)):
    c1 = pivot_sum.loc[:,cars_np[i]].to_numpy()
    m1 = np.max(scipy.signal.correlate(c1,c1))
    for j in range(i,len(cars_np)):
        try:
            c2 = pivot_sum.loc[:,cars_np[j]].to_numpy()
            m2 = np.max(scipy.signal.correlate(c2,c2))
            la = scipy.signal.correlation_lags(len(c1),len(c2))
            xc = scipy.signal.correlate(c1,c2)

            l0 = np.where(la == 0)[0]
            corr_mat[i,j] = np.abs(xc[l0][0]) / np.max([m1,m2])
        except:
            corr_mat[i,j] = 0.0

# Let's create the full matrix
i_lower = np.tril_indices(corr_mat.shape[0], -1)
corr_mat[i_lower] = corr_mat.T[i_lower]
pivot_aux = pivot_table.copy()
corr_mat_db = 10*np.log10(corr_mat+1e-6)
corr_mat_db[corr_mat_db < -30] = -30

In [14]:
corr_threshold = -5.0
# Let's get all cars
cars_ids = np.array(['','','',''], dtype=str)
cars_dic = {}

car_num = 0
while (corr_threshold > -30.0) and (corr_mat_db.shape[0] > 1):
    i = 0
    while i < corr_mat_db.shape[0]:
        row = corr_mat_db[i,:]
        best_ids = row[row > corr_threshold]
        if len(best_ids) > 1:
            actual_ids = np.argsort(row)[::-1][0:min([4,len(best_ids)])]
            id_values = pivot_aux.columns[actual_ids]
            intersection = np.in1d(cars_ids, id_values)

            if (~intersection).any():
                if len(id_values) < 4:
                    filler = [''] * (4-len(id_values))
                    id_values = np.append(id_values,filler)

                if ~(id_values == '').any():
                    pivot_aux = pivot_aux.drop(id_values,axis=1)
                    corr_mat_db = np.delete(corr_mat_db, actual_ids, axis=0)
                    corr_mat_db = np.delete(corr_mat_db, actual_ids, axis=1)
                    cars_dic[car_num] = id_values
                    car_num+=1
                else:
                    cars_ids = np.vstack((cars_ids,id_values))
            else:
                intersection_opposite = np.in1d(id_values, cars_ids)
                if (~intersection_opposite).any():
                    missing_ids = id_values[~intersection_opposite.any()]
                    remaining = 4-len(id_values)

                    data_row = np.argmax(intersection.reshape(-1,4).sum(axis=1))
                    cars_ids[data_row,remaining:] = missing_ids[:remaining]

                    if (~(cars_ids[data_row,:] == '')).any():
                        found_car = cars_ids[data_row,:]
                        pivot_aux = pivot_aux.drop(id_values,axis=1)
                        corr_mat_db = np.delete(corr_mat_db, actual_ids, axis=0)
                        corr_mat_db = np.delete(corr_mat_db, actual_ids, axis=1)

                        cars_dic[car_num] = cars_ids[data_row,:]
                        car_num += 1

                        cars_ids = np.delete(cars_ids, data_row, axis=0)
                        cars_ids[cars_ids.isin(found_car)] = ''
        i += 1
    
    corr_threshold -= 2.5


In [16]:
cars_dic

{0: Index(['D51A0AD4', 'D3B91061', 'D3B9536D', 'D3B9434C'], dtype='object', name='id'),
 1: Index(['BDC7D1', 'BE0562', 'BDF9DD', 'BDCD95'], dtype='object', name='id'),
 2: Index(['B6324C', 'B698E8', 'B67366', 'B696A2'], dtype='object', name='id'),
 3: Index(['F311F1AA', 'F311F336', 'F311EF53', 'F3120345'], dtype='object', name='id'),
 4: Index(['FB1F76FD', 'FB1F6F0C', 'FB1F7574', 'FB1F6F35'], dtype='object', name='id'),
 5: Index(['226797C', '22664C8', '22674FB', '2267C9F'], dtype='object', name='id'),
 6: Index(['370C9C', '370C77', '370C81', '370CB8'], dtype='object', name='id'),
 7: Index(['73E9B0', 'B62C58', 'B624EB', '4170AD'], dtype='object', name='id'),
 8: Index(['45CAE007', '45CB5A75', '45C78786', '45CB8050'], dtype='object', name='id')}

## Naive

In [17]:
grouping_analysis = [['5S','5 sec.'],['10S','10 sec.'],['30S','30 sec.'],['1T','1 min.'],['2T','2 min.'],['5T','5 min.']]

In [65]:
index = 2
grouping_time = grouping_analysis[index][0]

# Group by time bins and 'id', and count occurrences
grouped = filtered.groupby([pd.Grouper(freq=grouping_time), 'id']).size().reset_index(name='count')

# Create pivot table
pivot_table = grouped.pivot_table(index='time', columns='id', values='count', fill_value=0)
pivot_table = pivot_table.loc[:,cars_np]

# Calculating the correlation result
corr_mat = np.zeros((pivot_table.shape[1],pivot_table.shape[1]))
pivot_sum = pivot_table
for i in range(len(cars_np)):
    c1 = pivot_sum.loc[:,cars_np[i]].to_numpy()
    m1 = np.max(scipy.signal.correlate(c1,c1))
    for j in range(i,len(cars_np)):
        try:
            c2 = pivot_sum.loc[:,cars_np[j]].to_numpy()
            m2 = np.max(scipy.signal.correlate(c2,c2))
            la = scipy.signal.correlation_lags(len(c1),len(c2))
            xc = scipy.signal.correlate(c1,c2)

            l0 = np.where(la == 0)[0]
            corr_mat[i,j] = np.abs(xc[l0][0]) / np.max([m1,m2])
        except:
            corr_mat[i,j] = 0.0

# Let's create the full matrix
i_lower = np.tril_indices(corr_mat.shape[0], -1)
corr_mat[i_lower] = corr_mat.T[i_lower]
corr_mat_db = 10*np.log10(corr_mat+1e-6)
corr_mat_db[corr_mat_db < -30] = -30

In [66]:
pivot_aux = pivot_table.copy()

corr_threshold = -15.0
# Let's get all cars
cars_ids = np.array(['','','',''], dtype=str)
cars_dic = {}

car_num = 0
i = 0
while i < corr_mat_db.shape[0]:
    row = corr_mat_db[i,:]
    best_ids = row[row > corr_threshold]
    if len(best_ids) > 1:
        actual_ids = np.argsort(row)[::-1][0:min([4,len(best_ids)])]
        id_values = pivot_aux.columns[actual_ids]
        intersection = np.in1d(cars_ids, id_values)

        if (~intersection).any():
            if len(id_values) < 4:
                filler = [''] * (4-len(id_values))
                id_values = np.append(id_values,filler)

            if ~(id_values == '').any():
                pivot_aux = pivot_aux.drop(id_values,axis=1)
                corr_mat_db = np.delete(corr_mat_db, actual_ids, axis=0)
                corr_mat_db = np.delete(corr_mat_db, actual_ids, axis=1)
                cars_dic[car_num] = id_values.to_numpy()
                car_num+=1
            else:
                cars_ids = np.vstack((cars_ids,id_values))
        else:
            intersection_opposite = np.in1d(id_values, cars_ids)
            if (~intersection_opposite).any():
                missing_ids = id_values[~intersection_opposite.any()].to_numpy()
                remaining = 4-len(id_values)

                data_row = np.argmax(intersection.reshape(-1,4).sum(axis=1))
                cars_ids[data_row,remaining:] = missing_ids[:remaining]

                if ~(cars_ids[data_row,:] == '').any():
                    found_car = cars_ids[data_row,:]
                    pivot_aux = pivot_aux.drop(id_values,axis=1)
                    corr_mat_db = np.delete(corr_mat_db, actual_ids, axis=0)
                    corr_mat_db = np.delete(corr_mat_db, actual_ids, axis=1)

                    cars_dic[car_num] = cars_ids[data_row,:]
                    car_num += 1

                    cars_ids = np.delete(cars_ids, data_row, axis=0)
                    cars_ids[cars_ids.isin(found_car)] = ''
    i += 1

In [67]:
cars_dic

{0: array(['370C77', '370CB8', '370C81', '370C9C'], dtype=object),
 1: array(['BE0562', 'BDC7D1', 'BDCD95', 'BDF9DD'], dtype=object),
 2: array(['B6324C', 'B698E8', 'B67366', 'B696A2'], dtype=object),
 3: array(['226797C', '2267C9F', '22664C8', '22674FB'], dtype=object),
 4: array(['D3B9536D', 'D3B9434C', 'D51A0AD4', 'D3B91061'], dtype=object),
 5: array(['C4500CBF', 'C4502099', 'C4500CDD', 'C4500CD3'], dtype=object),
 6: array(['FB1F76FD', 'FB1F6F0C', 'FB1F7574', 'FB1F6F35'], dtype=object),
 7: array(['F311EF53', 'F311F1AA', 'F311F336', 'F3120345'], dtype=object),
 8: array(['417B33', '4170AD', '3E536A', '3E538A'], dtype=object)}

In [52]:
cars_ids

array([['', '', '', ''],
       ['0D6F63', '3E536A', '', ''],
       ['3E536A', '0D6F63', '', ''],
       ['0D51F6', 'B6324C', '', ''],
       ['B6324C', '0D51F6', '', '']], dtype=object)

## Big Analysis

In [99]:
grouping_analysis = [['5S','10 sec.'],['10S','10 sec.'],['30S','30 sec.'],['1T','1 min.'],['2T','2 min.'],['5T','5 min.']]

In [100]:
results = {}

for (grouping_time, _) in grouping_analysis:

    # Group by time bins and 'id', and count occurrences
    grouped = filtered.groupby([pd.Grouper(freq=grouping_time), 'id']).size().reset_index(name='count')

    # Create pivot table
    pivot_table = grouped.pivot_table(index='time', columns='id', values='count', fill_value=0)

    # If you want to randomize
    pivot_table = pivot_table.loc[:,cars_np]

    # Calculating the correlation result
    corr_mat = np.zeros((pivot_table.shape[1],pivot_table.shape[1]))
    pivot_sum = pivot_table.copy()
    for i in range(len(cars_np)):
        c1 = pivot_sum.loc[:,cars_np[i]].to_numpy()
        m1 = np.max(scipy.signal.correlate(c1,c1))
        for j in range(i,len(cars_np)):
            try:
                c2 = pivot_sum.loc[:,cars_np[j]].to_numpy()
                m2 = np.max(scipy.signal.correlate(c2,c2))
                la = scipy.signal.correlation_lags(len(c1),len(c2))
                xc = scipy.signal.correlate(c1,c2)

                l0 = np.where(la == 0)[0]
                corr_mat[i,j] = np.abs(xc[l0][0]) / np.max([m1,m2])
            except:
                corr_mat[i,j] = 0.0

    # Let's create the full matrix
    i_lower = np.tril_indices(corr_mat.shape[0], -1)
    corr_mat[i_lower] = corr_mat.T[i_lower]

    pivot_aux = pivot_table.copy()
    corr_mat_db = 10*np.log10(corr_mat+1e-6)
    corr_mat_db[corr_mat_db < -30] = -30

    corr_threshold = -5.0
    # Let's get all cars
    cars_ids = np.array(['','','',''], dtype=str)
    cars_dic = {}

    car_num = 0
    while (corr_threshold > -30.0) and (corr_mat_db.shape[0] > 1):
        i = 0
        while i < corr_mat_db.shape[0]:
            row = corr_mat_db[i,:]
            best_ids = row[row > corr_threshold]
            if len(best_ids) > 1:
                actual_ids = np.argsort(row)[::-1][0:min([4,len(best_ids)])]
                id_values = pivot_aux.columns[actual_ids]
                intersection = np.in1d(cars_ids, id_values)

                if (~intersection).any():
                    if len(id_values) < 4:
                        filler = [''] * (4-len(id_values))
                        id_values = np.append(id_values,filler)

                    if ~(id_values == '').any():
                        pivot_aux = pivot_aux.drop(id_values,axis=1)
                        corr_mat_db = np.delete(corr_mat_db, actual_ids, axis=0)
                        corr_mat_db = np.delete(corr_mat_db, actual_ids, axis=1)
                        cars_dic[car_num] = id_values
                        car_num+=1
                    else:
                        cars_ids = np.vstack((cars_ids,id_values))
                else:
                    intersection_opposite = np.in1d(id_values, cars_ids)
                    if (~intersection_opposite).any():
                        missing_ids = id_values[~intersection_opposite.any()]
                        remaining = 4-len(id_values)

                        data_row = np.argmax(intersection.reshape(-1,4).sum(axis=1))
                        cars_ids[data_row,remaining:] = missing_ids[:remaining]

                        if (~(cars_ids[data_row,:] == '')).any():
                            found_car = cars_ids[data_row,:]
                            pivot_aux = pivot_aux.drop(id_values,axis=1)
                            corr_mat_db = np.delete(corr_mat_db, actual_ids, axis=0)
                            corr_mat_db = np.delete(corr_mat_db, actual_ids, axis=1)

                            cars_dic[car_num] = cars_ids[data_row,:]
                            car_num += 1

                            cars_ids = np.delete(cars_ids, data_row, axis=0)
                            cars_ids[cars_ids.isin(found_car)] = ''
            i += 1
        
        corr_threshold -= 2.5
        cars_ids = np.array(['','','',''], dtype=str)

    results[grouping_time] = cars_dic

In [101]:
with open(f'./results/detected_cars_n{len(files)}.pickle', 'wb') as handle:
    pickle.dump(results, handle, protocol=pickle.HIGHEST_PROTOCOL)
    