## WLAN data plotting

In [None]:
# This notebook supports input parameters for automatic report generation. The parameters must be variables in this
# cell, which has a special 'parameters' tag.
DATA_ROOT = r'G:\Shared drives\Covid-19 Spectrum Monitoring\Data'

EXPORT_DATA_ROOT = r'G:\Shared drives\Covid-19 Spectrum Monitoring Data Export'

figure_format = 'svg'

In [None]:
# permits 'import read' from the parent directory
import sys
from environment import *
set_matplotlib_formats(figure_format)

DATA_ROOT = Path(DATA_ROOT)/'..'

def swept_power_single_frequency(power_path):
    spectrum, metadata = read_dat.swept_power(power_path, holdoff=0)
    power = spectrum.droplevel(['Sweep','Frequency'])

    # convert time units to time elapsed
    power.index = (power.index - power.index[0]).total_seconds()

    if power.index[-1] == 0:
        power.index += metadata['dwell_time']*np.arange(power.index.size)

    power = power.unstack()
    power.index = power.index.get_level_values(0)+power.index.get_level_values(1)
    power = power.sort_index()

    power = pd.Series(
        0.5*(power.values[::2]+power.values[1::2]),
        index=np.round(power.index[::2] - power.index[0],3),
        name='Power'
    ) # average into 1 ms bins
    
    return power, spectrum, metadata

def read_sanjole_parsed_lte(path):
    df = pd.read_csv(path, usecols=range(7)).set_index('Time (s)')
    df.index = np.round(df.index-df.index[0],3)
    reindex = np.arange(0,df.index.max()+1e-3,1e-3)

    try:
        df = df.reindex(reindex, fill_value=np.nan)
    except ValueError:
        print('warning: could not reindex')

    return df

def read_sanjole_csv_export(path):
    df = pd.read_csv(path).set_index('Start time')
    df.index = pd.TimedeltaIndex(df.index).total_seconds()
    # df.index = df.index.astype(pd.DatetimeIndex)
    df.index = df.index-df.index[0]
    return df

sanjole_csv_path = None

hospital_2021_09_2412MHz = dict(
    label = 'hospital 2021-09-24',
    sanjole_root = DATA_ROOT/r'Hospital Data\2021-09-24\2412 MHz ISM',
    # power_path = DATA_ROOT/r'Hospital Data\2021-09-24\739 MHz 10 MHz AT&T\SDR\dUsVcuPP 2021-09-24_10h27m50s.swept_power.dat'
    fc = 2412., # MHz downlink
)

hospital_2021_09_2437MHz = dict(
    label = 'hospital 2021-09-24',
    sanjole_root = DATA_ROOT/r'Hospital Data\2021-09-24\2437 MHz ISM',
    # power_path = DATA_ROOT/r'Hospital Data\2021-09-24\739 MHz 10 MHz AT&T\SDR\dUsVcuPP 2021-09-24_10h27m50s.swept_power.dat'
    fc = 2437., # MHz downlink
)

hospital_2021_09_2462MHz = dict(
    label = 'hospital 2021-09-24',
    sanjole_root = DATA_ROOT/r'Hospital Data\2021-09-24\2462 MHz ISM',
    # power_path = DATA_ROOT/r'Hospital Data\2021-09-24\739 MHz 10 MHz AT&T\SDR\dUsVcuPP 2021-09-24_10h27m50s.swept_power.dat'
    fc = 2462., # MHz downlink
)


datasets = (
    hospital_2021_09_2412MHz,
    hospital_2021_09_2437MHz,
    hospital_2021_09_2462MHz
)

# # make the metadata into variables here
# globals().update(metadata) 

# # to plot by frequency, re-sort to group data at each frequency 
# spectrum = spectrum.sort_index(level=['Sweep', 'Frequency', 'Time'])

# spectrum
# Hospital Data/2020-12-07/739MHz 10 MHz 2020-12-07 ends 12h02m full intelligudge
# Hospital Data/2020-12-07/866.3MHz 5 MHz 2020-12-07 ends 13h14m
# Hospital Data/2021-09-24/866.3 MHz 5 MHz Sprint
# Hospital Data/2021-09-24/739 MHz 10 MHz AT&T

In [None]:
def rfind_one(path, glob_pattern, check_count=True):
    """ return the one unique match to glob inside the directory 'path' 
    
    Raises:
         raise IOError if the number of matches is not exactly one

    """
    matches = list(path.glob(glob_pattern))

    for f in Path(path).iterdir():
        if f.is_dir():
            matches.extend(rfind_one(f, glob_pattern, check_count=False))

    if check_count:
        if len(matches) != 1:
            raise IOError(f'pattern "{glob_pattern}" matched {len(matches)} files instead 1')
        return matches[0]
    else:
        return matches

def plot_uplink_histograms(df, dataset_files):
    fig, axs = subplots(2,2, figsize=(figsize_fullwidth[0], 1.66*figsize_fullwidth[1]))
    df.Users.fillna(0).hist(bins=np.arange(10), log=True, ax=axs[0,0])
    axs[0,0].set_ylabel('Number of observations')
    axs[0,0].set_xlabel('Number of uplink users')

    df['TX Bits'].fillna(0).hist(bins=np.arange(0,df['TX Bits'].max(), 5000),log=True, ax=axs[0,1])
    axs[0,1].set_xlabel('Data rate (kbps)')

    df['RIV RBs'].hist(bins=np.arange(51), log=False, ax=axs[1,0])
    axs[1,0].set_ylabel('Number of observations')
    axs[1,0].set_xlabel('Number of RIV RBs')

    df['Average MCS'].hist(bins=np.arange(35), log=False, ax=axs[1,1])
    axs[1,1].set_xlabel('Average MCS')

    fig.suptitle(f'phy data - {dataset_files["label"]} uplink {dataset_files["fc_ul"]} MHz', visible=False)

    set_caption(fig, f'Histograms of LTE uplink resource grants from {dataset_files["label"]} observed at {dataset_files["fc_dl"]:0.1f} MHz')

    return fig

def plot_downlink_histograms(df, dataset_files):
    fig, axs = subplots(2,2, figsize=(figsize_fullwidth[0], 1.66*figsize_fullwidth[1]))
    df.Users.fillna(0).hist(bins=np.arange(10), log=True, ax=axs[0,0])
    axs[0,0].set_ylabel('Number of observations')
    axs[0,0].set_xlabel('Number of downlink users')

    axs[0,1].set_visible(False)

    df['RBs'].hist(bins=np.arange(51), log=False, ax=axs[1,0])
    axs[1,0].set_ylabel('Number of observations')
    axs[1,0].set_xlabel('Number of RBs')

    df['Average MCS'].hist(bins=np.arange(35), log=False, ax=axs[1,1])
    axs[1,1].set_xlabel('Average MCS')

    fig.suptitle(f'phy data - {dataset_files["label"]} downlink {dataset_files["fc_dl"]} MHz', visible=False)
    set_caption(fig, f'Histograms of LTE downlink resource grants from {dataset_files["label"]} observed at {dataset_files["fc_dl"]:0.1f} MHz')

    return fig

def read_messages(dataset_root):
    files = sorted(rfind_one(dataset_root, '*messages.csv', check_count=False))
    messages = pd.concat([pd.read_csv(fn) for fn in files])
    messages = (
        messages
        .reset_index(drop=True)
        # .drop(['MAC Dest Addr', 'MAC Source Addr', 'Scrambling ID'], axis=1)
        .dropna(axis=1, how='all')
    )

    mac_addrs = messages[['MAC Dest Addr','MAC Source Addr']].values.flatten().astype('str')
    mac_addrs = mac_addrs[mac_addrs != 'nan']
    mac_addrs = np.sort(np.unique(mac_addrs))[:-1]
    new_ids = np.char.mod('%02d', np.arange(len(mac_addrs)))
    remapping = dict(zip(mac_addrs,new_ids), **{'ff:ff:ff:ff:ff:ff': 'broadcast'})    

    messages[['Dest User ID','Source User ID']] = messages[['MAC Dest Addr','MAC Source Addr']].replace(remapping)
    # messages['Duration'] = messages['End Time'] - messages['Start Time']

    start_diffs = messages['Start Time'].diff()
    end_diffs = messages['End Time']
    time_resets = start_diffs < 0
    offsets = start_diffs * 0
    offsets.loc[time_resets] = np.abs(start_diffs.loc[time_resets]) + 10

    time_offset = messages.loc[:,'Start Time'].iloc[0]
    messages.loc[:,'Start Time'] = (messages.loc[:,'Start Time'] + offsets.cumsum() - time_offset).fillna(0)/1e3 
    messages.loc[:,'End Time'] = (messages.loc[:,'End Time'] + offsets.cumsum() - time_offset).fillna(0)/1e3
    messages['Duration'] = messages['End Time'] - messages['Start Time']

    messages = messages.set_index('Start Time')
    messages.index = pd.to_timedelta(messages.index, unit='s')

    messages = messages[['Name', 'MCS', 'Power', '# Bytes', 'EVM', 'Dest User ID', 'Source User ID', 'Duration']]
    messages['Data rate (kbps)'] = messages['# Bytes']/messages['Duration'] / 1000*8
    messages['Data rate (kbps)'] = messages['Data rate (kbps)'].replace({np.inf: np.nan})

    return messages

for dataset in datasets:
    # if sanjole_csv_path is not None:
    #     network_csv = read_sanjole_csv_export(sanjole_csv_path)
    # else:
    #     network_csv = None

    if not Path(dataset['sanjole_root']).exists():
        raise IOError(f"root director {dataset['sanjole_root'].absolute()} does not exist")

    messages = read_messages(dataset['sanjole_root'])

    do_log = messages.shape[0] > 1000

    fig, ax = subplots(figsize=figsize_fullwidth)
    sns.stripplot(x='Source User ID', y='Power', data=messages.sort_values('Source User ID'),
                size=4, color=".3", linewidth=0, ax=ax)
    title(f"Sanjole WLAN at {dataset['label']} {dataset['fc']:0.1f} MHz - power levels by transmission source", visible=False)
    ylabel('Received power (dBm)')
    set_caption(fig, f"Received power levels by transmission source in WLAN measured from {dataset['label']} data at {dataset['fc']:0.1f} MHz")    

    fig, ax = subplots(figsize=figsize_fullwidth)
    sns.stripplot(x='Source User ID', y='Data rate (kbps)', data=messages.sort_values('Source User ID'),
                size=4, color=".3", linewidth=0, ax=ax, alpha=0.5)
    title(f"Sanjole WLAN at {dataset['label']} {dataset['fc']:0.1f} MHz - power levels by transmission source", visible=False)
    set_caption(fig, f"Data rate by transmission source in WLAN measured from {dataset['label']} data at {dataset['fc']:0.1f} MHz")    

    fig, ax = subplots(figsize=figsize_fullwidth)
    messages['Name'].hist(bins=messages['Name'].unique()[:-1],log=do_log, ax=ax, align='right')
    xticks(rotation=15, ha='right');
    ylabel('Number of observations')    
    title(f"Sanjole WLAN at {dataset['label']} {dataset['fc']:0.1f} MHz - packet type histogram", visible=False)
    set_caption(fig, f"Histogram of packet types observed in WLAN measured from {dataset['label']} data at {dataset['fc']:0.1f} MHz")

    ### stuff
    fig, axs = subplots(1,2, figsize=figsize_fullwidth)
    messages['Data rate (kbps)'].hist(bins=101, log=do_log, ax=axs[0])
    axs[0].set_ylabel('Number of observations')
    axs[0].set_xlabel('Data rate (kbps)')

    messages['# Bytes'].fillna(0).hist(bins=101,log=do_log, ax=axs[1])
    axs[1].set_xlabel('Data payload size (bytes)')

    fig.suptitle(f'Sanjole WLAN at {dataset["label"]} {dataset["fc"]:0.1f} MHz - payload histograms', visible=False)
    set_caption(fig, f'Histograms of WLAN data rate and payload sizes from {dataset["label"]} observed at {dataset["fc"]:0.1f} MHz')

In [None]:
import shutil
import gzip

if EXPORT_DATA_ROOT:
    for dataset in datasets:
        print(dataset)
        # if sanjole_csv_path is not None:
        #     network_csv = read_sanjole_csv_export(sanjole_csv_path)
        # else:
        #     network_csv = None

        if not Path(dataset['sanjole_root']).exists():
            raise IOError(f"root director {dataset['sanjole_root'].absolute()} does not exist")

        messages = read_messages(dataset['sanjole_root'])

        print(messages.size)

        dest = Path(EXPORT_DATA_ROOT)/'network profiling'/dataset['label']/f"{dataset['fc']:0.1f} MHz wlan profiling.csv.gz"
        print(dest)
        dest.parent.mkdir(exist_ok=True, parents=True)
        messages.to_csv(dest, chunksize=messages.shape[0])