## LTE data plotting

In [None]:
# This notebook supports input parameters for automatic report generation. The parameters must be variables in this
# cell, which has a special 'parameters' tag.
DATA_ROOT = r'G:\Shared drives\Covid-19 Spectrum Monitoring\Data'

EXPORT_DATA_ROOT = r'G:\Shared drives\Covid-19 Spectrum Monitoring Data Export'

figure_format = 'svg'

In [None]:
# permits 'import read' from the parent directory
import sys
from environment import *
set_matplotlib_formats(figure_format)

DATA_ROOT = Path(DATA_ROOT)/'..'

def swept_power_single_frequency(power_path):
    spectrum, metadata = read_dat.swept_power(power_path, holdoff=0)
    power = spectrum.droplevel(['Sweep','Frequency'])

    # convert time units to time elapsed
    power.index = (power.index - power.index[0]).total_seconds()

    if power.index[-1] == 0:
        power.index += metadata['dwell_time']*np.arange(power.index.size)

    power = power.unstack()
    power.index = power.index.get_level_values(0)+power.index.get_level_values(1)
    power = power.sort_index()

    power = pd.Series(
        0.5*(power.values[::2]+power.values[1::2]),
        index=np.round(power.index[::2] - power.index[0],3),
        name='Power'
    ) # average into 1 ms bins
    
    return power, spectrum, metadata

def read_sanjole_parsed_lte(path):
    df = pd.read_csv(path, usecols=range(7)).set_index('Time (s)')
    df.index = np.round(df.index-df.index[0],3)
    reindex = np.arange(0,df.index.max()+1e-3,1e-3)

    try:
        df = df.reindex(reindex, fill_value=np.nan)
    except ValueError:
        print('warning: could not reindex')

    return df

def read_sanjole_csv_export(path):
    df = pd.read_csv(path).set_index('Start time')
    df.index = pd.TimedeltaIndex(df.index).total_seconds()
    # df.index = df.index.astype(pd.DatetimeIndex)
    df.index = df.index-df.index[0]
    return df

sanjole_csv_path = None

site_07_sprint = dict(
    label = 'site 07',
    sanjole_root = DATA_ROOT/r'Supplemental Data\Network Profiling\Residential\vIYeinjb\Sprint866_3-5MHz-10072020',
    power_path = DATA_ROOT/r'Data/vIYeinjb/vIYeinjb 2020-10-07_05h49m59s.swept_power.dat',
    fc_dl = 866.2999877929688, # downlink
    fc_ul = 821.2999877929688, # uplink
)

site_07_tmobile = dict(
    label = 'site 07',
    sanjole_root = DATA_ROOT/r'Supplemental Data\Network Profiling\Residential\vIYeinjb\Tmobile731_5-5MHz-10072020',
    power_path = DATA_ROOT/r'Data/vIYeinjb/vIYeinjb 2020-10-07_05h49m59s.swept_power.dat',
    fc_dl = 731.5, # downlink
    fc_ul = 701.5, # uplink
)

# # site_07 Verizon captures
site_07_verizon = dict(
    label = 'site 07',
    sanjole_root = DATA_ROOT/r'Supplemental Data\Network Profiling\Residential\vIYeinjb\Verizon751-10MHz-10072020',
    power_path = DATA_ROOT/r'Data/vIYeinjb/vIYeinjb 2020-10-07_05h49m59s.swept_power.dat',
    fc_dl = 751., # downlink
    fc_ul = 782., # uplink
)

# site_07 AT&T captures
site_07_att = dict(
    label = 'site 07',
    sanjole_root = DATA_ROOT/r'Supplemental Data\Network Profiling\Residential\vIYeinjb\Att739-10MHz-10072020',
    power_path = DATA_ROOT/r'Supplemental Data\SDR Band Power\At NIST\vIYeinjb\vIYeinjb 2020-10-07_05h49m59s.swept_power.dat',
    fc_dl = 739., # downlink
    fc_ul = 709. # uplink
)

site_11_sprint = dict(
    label = 'site 11',
    sanjole_root = DATA_ROOT/r'Supplemental Data\Network Profiling\Residential\dUsVcuPP\866_3MHz_5MHz_2020-08-28',
    power_path = DATA_ROOT/r'Data/vIYeinjb/vIYeinjb 2020-10-07_05h49m59s.swept_power.dat',
    fc_dl = 866.2999877929688, # downlink
    fc_ul = 821.2999877929688, # uplink
)

site_11_tmobile = dict(
    label = 'site 11',
    sanjole_root = DATA_ROOT/r'Supplemental Data\Network Profiling\Residential\dUsVcuPP\731.5MHz T-Mobile 5MHz 2020-08-26',
    power_path = DATA_ROOT/r'Data/vIYeinjb/vIYeinjb 2020-10-07_05h49m59s.swept_power.dat',
    fc_dl = 731.5, # downlink
    fc_ul = 701.5, # uplink
)

# # site_07 Verizon captures
site_11_verizon = dict(
    label = 'site 11',
    sanjole_root = DATA_ROOT/r'Supplemental Data\Network Profiling\Residential\dUsVcuPP\751MHz Verizon 10MHz 2020-08-26',
    power_path = DATA_ROOT/r'Data/vIYeinjb/vIYeinjb 2020-10-07_05h49m59s.swept_power.dat',
    fc_dl = 751., # downlink
    fc_ul = 782., # uplink
)

# site_07 AT&T captures
site_11_att = dict(
    label = 'site 11',
    sanjole_root = DATA_ROOT/r'Supplemental Data\Network Profiling\Residential\dUsVcuPP\739MHz ATT 10MHz 2020-08-25',
    power_path = DATA_ROOT/r'Supplemental Data\SDR Band Power\At NIST\vIYeinjb\vIYeinjb 2020-10-07_05h49m59s.swept_power.dat',
    fc_dl = 739., # downlink
    fc_ul = 709. # uplink
)


# # 24-1300 captures
# sanjole_ul_path = DATA_ROOT/r'Hospital Data\NIST\LTE\Sanjole\Sprint 866_3_5MHZ_20210401\processed\Sprint 866_3_5MHZ_20210401_UL_Processed.csv'
# sanjole_dl_path = DATA_ROOT/r'Hospital Data\NIST\LTE\Sanjole\Sprint 866_3_5MHZ_20210401\processed\Sprint 866_3_5MHZ_20210401_DL_Processed.csv'
# power_path = DATA_ROOT/r'Hospital Data\NIST\LTE\SDR\b0LGfTXu 2021-04-01_14h19m09s.swept_power.dat'
# fc = 866.2999877929688 # downlink
# # fc = 821.2999877929688 # uplink

# hospital captures
hospital_2020_12_739MHz = dict(
    label = 'hospital 2020-12-07',
    sanjole_root = DATA_ROOT/'Hospital Data/2020-12-07/739MHz 10 MHz 2020-12-07 ends 12h02m full intelligudge',
    # power_path = DATA_ROOT/r'Hospital Data\2021-09-24\739 MHz 10 MHz AT&T\SDR\dUsVcuPP 2021-09-24_10h27m50s.swept_power.dat'
    fc_dl = 739., # MHz downlink
    fc_ul = 821.2999877929688 # uplink
)

hospital_2020_12_866_3MHz = dict(
    label = 'hospital 2020-12-07',
    sanjole_root = DATA_ROOT/'Hospital Data/2020-12-07/866.3MHz 5 MHz 2020-12-07 ends 13h14m',
    # power_path = DATA_ROOT/r'Hospital Data\2021-09-24\739 MHz 10 MHz AT&T\SDR\dUsVcuPP 2021-09-24_10h27m50s.swept_power.dat'
    fc_dl = 866.2999877929688, # downlink
    fc_ul = 821.2999877929688 # uplink
)

hospital_2021_09_739MHz = dict(
    label = 'hospital 2021-09-24',
    sanjole_root = DATA_ROOT/'Hospital Data/2021-09-24/739 MHz 10 MHz AT&T',
    # power_path = DATA_ROOT/r'Hospital Data\2021-09-24\739 MHz 10 MHz AT&T\SDR\dUsVcuPP 2021-09-24_10h27m50s.swept_power.dat'
    fc_dl = 739., # MHz downlink
    fc_ul = 821.2999877929688 # uplink
)

hospital_2021_09_866_3MHz = dict(
    label = 'hospital 2021-09-24',
    sanjole_root = DATA_ROOT/'Hospital Data/2021-09-24/866.3 MHz 5 MHz Sprint',
    # power_path = DATA_ROOT/r'Hospital Data\2021-09-24\739 MHz 10 MHz AT&T\SDR\dUsVcuPP 2021-09-24_10h27m50s.swept_power.dat'
    fc_dl = 866.2999877929688, # downlink
    fc_ul = 821.2999877929688 # uplink
)

datasets = (
    hospital_2020_12_739MHz,
    hospital_2020_12_866_3MHz,
    hospital_2021_09_739MHz,
    hospital_2021_09_866_3MHz,
    site_07_att,
    site_07_sprint,
    site_07_tmobile,
    site_07_verizon,
    site_11_att,
    site_11_sprint,
    site_11_tmobile,
    site_11_verizon
)

# # make the metadata into variables here
# globals().update(metadata) 

# # to plot by frequency, re-sort to group data at each frequency 
# spectrum = spectrum.sort_index(level=['Sweep', 'Frequency', 'Time'])

# spectrum
# Hospital Data/2020-12-07/739MHz 10 MHz 2020-12-07 ends 12h02m full intelligudge
# Hospital Data/2020-12-07/866.3MHz 5 MHz 2020-12-07 ends 13h14m
# Hospital Data/2021-09-24/866.3 MHz 5 MHz Sprint
# Hospital Data/2021-09-24/739 MHz 10 MHz AT&T

In [None]:
def rfind_one(path, glob_pattern, check_count=True):
    """ return the one unique match to glob inside the directory 'path' 
    
    Raises:
         raise IOError if the number of matches is not exactly one

    """
    matches = list(path.glob(glob_pattern))

    for f in Path(path).iterdir():
        if f.is_dir():
            matches.extend(rfind_one(f, glob_pattern, check_count=False))

    if check_count:
        if len(matches) != 1:
            raise IOError(f'pattern "{glob_pattern}" matched {len(matches)} files instead 1')
        return matches[0]
    else:
        return matches

def plot_uplink_histograms(df, dataset_files):
    fig, axs = subplots(2,2, figsize=(figsize_fullwidth[0], 1.66*figsize_fullwidth[1]))
    df.Users.fillna(0).hist(bins=np.arange(10), log=True, ax=axs[0,0])
    axs[0,0].set_ylabel('Number of observations')
    axs[0,0].set_xlabel('Number of uplink users')

    df['TX Bits'].fillna(0).hist(bins=np.arange(0,df['TX Bits'].max(), 5000),log=True, ax=axs[0,1])
    axs[0,1].set_xlabel('Data rate (kbps)')

    df['RIV RBs'].hist(bins=np.arange(51), log=False, ax=axs[1,0])
    axs[1,0].set_ylabel('Number of observations')
    axs[1,0].set_xlabel('Number of RIV RBs')

    df['Average MCS'].hist(bins=np.arange(35), log=False, ax=axs[1,1])
    axs[1,1].set_xlabel('Average MCS')

    fig.suptitle(f'phy data - {dataset_files["label"]} uplink {dataset_files["fc_ul"]} MHz', visible=False)

    set_caption(fig, f'Histograms of LTE uplink resource grants from {dataset_files["label"]} observed at {dataset_files["fc_dl"]:0.1f} MHz')

    return fig

def plot_downlink_histograms(df, dataset_files):
    fig, axs = subplots(2,2, figsize=(figsize_fullwidth[0], 1.66*figsize_fullwidth[1]))
    df.Users.fillna(0).hist(bins=np.arange(10), log=True, ax=axs[0,0])
    axs[0,0].set_ylabel('Number of observations')
    axs[0,0].set_xlabel('Number of downlink users')

    axs[0,1].set_visible(False)

    df['RBs'].hist(bins=np.arange(51), log=False, ax=axs[1,0])
    axs[1,0].set_ylabel('Number of observations')
    axs[1,0].set_xlabel('Number of RBs')

    df['Average MCS'].hist(bins=np.arange(35), log=False, ax=axs[1,1])
    axs[1,1].set_xlabel('Average MCS')

    fig.suptitle(f'phy data - {dataset_files["label"]} downlink {dataset_files["fc_dl"]} MHz', visible=False)
    set_caption(fig, f'Histograms of LTE downlink resource grants from {dataset_files["label"]} observed at {dataset_files["fc_dl"]:0.1f} MHz')

    return fig

for dataset_files in datasets:
    # if sanjole_csv_path is not None:
    #     network_csv = read_sanjole_csv_export(sanjole_csv_path)
    # else:
    #     network_csv = None

    if not Path(dataset_files['sanjole_root']).exists():
        raise IOError(f"root directory {dataset_files['sanjole_root'].absolute()} does not exist")

    try:
        network_ul = read_sanjole_parsed_lte(rfind_one(dataset_files['sanjole_root'], '*UL_Processed.csv'))

        print(network_ul.corr().to_latex())
        display(HTML('<b>Uplink sample correlation matrix</b>'))
        display(network_ul.corr())
        plot_uplink_histograms(network_ul, dataset_files);

    except (IOError,IndexError):
        print('no uplink')
    
    try:
        network_dl = read_sanjole_parsed_lte(rfind_one(dataset_files['sanjole_root'], '*DL_Processed.csv'))  

        print(network_dl.corr().to_latex())
        display(HTML('<b>Downlink sample correlation matrix</b>'))
        display(network_dl.corr())    
        plot_downlink_histograms(network_dl, dataset_files);

    except (IOError,IndexError):
        print('no downlink data')

    display(HTML(f'<b>{dataset_files["label"]} {dataset_files["fc_dl"]} MHz downlink / {dataset_files["fc_dl"]} MHz uplink</b>'))

In [None]:
import shutil
import gzip

if EXPORT_DATA_ROOT:
    for dataset in datasets:
        try:
            ul_path = rfind_one(dataset['sanjole_root'], '*UL_Processed.csv')
            network_ul = read_sanjole_parsed_lte(ul_path)

        except (IOError,IndexError):
            print('no uplink')

        else:
            dest = Path(EXPORT_DATA_ROOT)/'network profiling'/dataset['label']/f"{dataset['fc_ul']:0.1f} MHz cellular uplink profiling.csv.gz"
            dest.parent.mkdir(exist_ok=True, parents=True)

            with gzip.open(dest, 'wb') as fd:
                fd.write(ul_path.read_bytes())
        
        try:
            dl_path = rfind_one(dataset['sanjole_root'], '*DL_Processed.csv')
            network_dl = read_sanjole_parsed_lte(dl_path) 

        except (IOError,IndexError):
            print('no downlink data')
        
        else:
            dest = Path(EXPORT_DATA_ROOT)/'network profiling'/dataset['label']/f"{dataset['fc_dl']:0.1f} MHz cellular downlink profiling.csv.gz"
            dest.parent.mkdir(exist_ok=True, parents=True)

            with gzip.open(dest, 'wb') as fd:
                fd.write(dl_path.read_bytes())
