# RSV and Influenza Dashboard

Below we have 2 graphs from data obtained from https://ukhsa-dashboard.data.gov.uk.

Graph 1 shows shows the percentage of the total number of PCR tests for RSV and Influenza taken weekly which had a positive result in 2024, both metrics are plotted seperately on the graph

Graph 2 shows the percentage of the total number of PCR tests for RSV taken weekly which had a positive result seperated by different age groups (0-4 years, 5-14 years, 15-44 years, 45-64 years, 65-79 years and 80+ years)

### Guidance:
You are able to select different metrics, as well as display all metrics on the graph by using the dropdown button, you can also choose whether you want to see linear or log graph and display a grid for better viewing of the plot.

Click the refresh data button to obtain the latest data from UKHSA API. 

In [2]:
#imports required libraries
from IPython.display import clear_output
import ipywidgets as wdg
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import requests
import time
import json

In [2]:
%matplotlib inline
plt.rcParams['figure.dpi'] = 100 #make figures larger

In [3]:
#load data for Graph 1 (RSV and Influenza)
with open("RSVpos.json", "rt") as infile:
    rsv_data = json.load(infile) #RSV data
with open("flupos.json", "rt") as infile:
    flu_data = json.load(infile) #influenza data

#loads data for Graph 2 (RSV by Age Groups) and each file corresponds to a specific age group
with open("RSV0_4.json", "rt") as infile:
    RSV0_4 = json.load(infile) #RSV data for age group 0-4
with open("RSV5_14.json", "rt") as infile:
    RSV5_14 = json.load(infile) #RSV data for age group 5-14
with open("RSV15_44.json", "rt") as infile:
    RSV15_44 = json.load(infile) #RSV data for age group 15-44
with open("RSV45_64.json", "rt") as infile:
    RSV45_64 = json.load(infile) #RSV data for age group 45-64
with open("RSV65_79.json", "rt") as infile:
    RSV65_79 = json.load(infile) #RSV data for age group 65-79
with open("RSV80.json", "rt") as infile:
    RSV80 = json.load(infile) #RSV data for age group 80+

#creates empty dictionary for data for Graph 1 (RSV and Influenza)
jsondata_1 = {}
#loop through both RSV and Influenza datasets
for dataset in [rsv_data, flu_data]:
    for entry in dataset:
        date = entry['date'] #get date from the entry
        metric = entry['metric'] #get metric name
        value = entry['metric_value'] #get metric value
        if date not in jsondata_1:
            jsondata_1[date] = {}
        jsondata_1[date][metric] = value #populate the dictionary with values
dates_1 = sorted(jsondata_1.keys()) #sort dates

#creates empty dictionary for data for Graph 2 (RSV by Age Groups)
jsondata_2 = {}
age_groups = ['0-4', '5-14', '15-44', '45-64', '65-79', '80+'] #define age groups
datasets = [RSV0_4, RSV5_14, RSV15_44, RSV45_64, RSV65_79, RSV80] #corresponding datasets (jsons)
for dataset, age_group in zip(datasets, age_groups):
    column_name = f"Age {age_group}"  #format column name
    for entry in dataset:
        date = entry['date'] #get date from the entry
        value = entry['metric_value'] #get metric value
        if date not in jsondata_2:
            jsondata_2[date] = {}
        jsondata_2[date][column_name] = value #populate the dictionary with values
dates_2 = sorted(jsondata_2.keys()) #sort dates 

#metrics for Graph 1 (RSV and Influenza)
#maps metric names to dataset keys
metrics_1 = {
    'RSV': 'RSV_testing_positivityByWeek', #RSV metric
    'Influenza': 'influenza_testing_positivityByWeek' #influenza metric
}

#metrics for Graph 2 
#maps age group labels directly to their dataset keys
metrics_2 = {f"Age {age}": f"Age {age}" for age in age_groups}

In [4]:
def wrangle_data(rawdata, metrics, use_metrics_mapping=True):
    """
    processes raw JSON or API data into a df with specific metrics.
    params: -rawdata: dictionary containing the raw data,
    -metrics: dictionary mapping column names to metric keys.
    -use_metrics_mapping: boolean indicating whether to use the metrics mapping.
    returns:
    - df: Panda dataframe with the processed data.
    """
    def parse_date(datestring):
        """Convert a date string into a pandas datetime object."""
        return pd.to_datetime(datestring, format="%Y-%m-%d")
    
    dates = sorted(rawdata.keys())  #sort the dates in data

    #generate a weekly date range for the dataset
    startdate = parse_date(dates[0])  #parse the start date
    enddate = parse_date(dates[-1])  #parse the end date
    index = pd.date_range(start=startdate, end=enddate, freq='W-Mon') #weekly date range from mondays
    columns = list(metrics.keys())  #use metric names as column headers
    df = pd.DataFrame(index=index, columns=columns) #initialises dataframe
    df[:] = None  #det all values to none initially
    
    for date, entry in rawdata.items():
        pd_date = parse_date(date)  #convert date to pandas datetime
        if pd_date in df.index:  #ensure the date exists in the index
            for column in columns:
                #map metric names using the provided metrics dictionary
                metric_name = metrics[column] if use_metrics_mapping else column
                value = entry.get(metric_name, None) #get the value for the metric
                if value is not None:
                    df.loc[pd_date, column] = value #populate df
    return df


#wrangle data for Graph 1 (RSV and Influenza)
df1 = wrangle_data(jsondata_1, metrics_1, use_metrics_mapping=True)

#wrangle data for Graph 2 (RSV by Age Groups)
df2 = wrangle_data(jsondata_2, metrics_2, use_metrics_mapping=False)


In [5]:
def access_api_graph1():
    """fetch RSV and influenza data from the UKHSA API for graph 1."""
    try:
        rawdata_1 = {} #initialise empty dictionary for data
        #endpoints for RSV and influenza metrics including parameters for filtering
        api_endpoints_1 = [
            {
                'url': "https://api.ukhsa-dashboard.data.gov.uk/themes/infectious_disease/sub_themes/respiratory/topics/RSV/geography_types/Nation/geographies/England/metrics/RSV_testing_positivityByWeek",
                'params': {'age': "all", 'year': 2024, 'page_size': 52}
            },
            {
                'url': "https://api.ukhsa-dashboard.data.gov.uk/themes/infectious_disease/sub_themes/respiratory/topics/Influenza/geography_types/Nation/geographies/England/metrics/influenza_testing_positivityByWeek",
                'params': {'age': "all", 'year': 2024, 'page_size': 52}
            }
        ]
        
        #loop through endpoints and fetch data
        for endpoint in api_endpoints_1:
            response = requests.get(endpoint['url'], params=endpoint['params'])  #API request
            response.raise_for_status() #raise an exception for any HTTP errors
            response_json = response.json() #parse response JSON
            
            #get 'results' from API response
            if 'results' in response_json:
                data = response_json['results']
                for entry in data:
                    date = entry.get('date')  #get date
                    metric = entry.get('metric') #get metric name
                    value = entry.get('metric_value') #get metric value
                    if date and metric and value is not None:
                        if date not in rawdata_1:
                            rawdata_1[date] = {}
                        rawdata_1[date][metric] = value #populate the dictionary
        
        return rawdata_1 #return processed raw data
    except requests.exceptions.RequestException as e: #catches any request related exceptions
        print(f"API request failed for Graph 1: {e}")
        return None

#function to fetch RSV data by age group for Graph 2
def access_api_graph2():
    """fetch RSV data by age group from the UKHSA API for graph 2."""
    try:
        rawdata_2 = {} #initialise empty dictionary for data
        age_groups_api = {
            'Age 0-4': '00-04',
            'Age 5-14': '05-14',
            'Age 15-44': '15-44',
            'Age 45-64': '45-64',
            'Age 65-79': '65-79',
            'Age 80+': '80+'
        }

        #loop through each age group and fetch data
        for display_age, api_age in age_groups_api.items():
            rsv_request = requests.get(
                'https://api.ukhsa-dashboard.data.gov.uk/themes/infectious_disease/sub_themes/respiratory/topics/RSV/geography_types/Nation/geographies/England/metrics/RSV_testing_positivityByWeek',
                params={'age': api_age, 'year': 2024, 'page_size': 52}
            )
            rsv_request.raise_for_status() #raise an exception for HTTP errors
            response_json = rsv_request.json() #parse response JSON

            #get 'results' from API response
            if 'results' in response_json:
                data = response_json['results']
                for entry in data:
                    date = entry.get('date')  #get date
                    value = entry.get('metric_value') #get metric value
                    if date and value is not None:
                        if date not in rawdata_2:
                            rawdata_2[date] = {}
                        rawdata_2[date][display_age] = value #use display_age as a column name

        return rawdata_2 #return processed raw data
    except requests.exceptions.RequestException as e: #catches any request related exceptions
        print(f"API request failed for Graph 2: {e}")
        return None


In [6]:
def plot_data(metric, scale, grid, df, metrics, graph_output, plot_title, x_label, y_label):
    """
    pots the selected metrics with user-selected options.
    params:- metric: Selected metric or 'All Metrics' for plotting.
    -scale: Scale type ('linear' or 'log').
    -grid: Boolean to show/hide grid lines.
    -df: DataFrame containing the data to be plotted.
    -metrics: Dictionary mapping column names to metric keys.
    -graph_output: Output widget for rendering the plot.
    -plot_title: Title of the plot.
    -x_label: x axis label.
    -y_label: y axis label.
    """
    with graph_output:
        #clear previous output in the widget
        graph_output.clear_output(wait=True)

        #determine if log scale is required (when user chooses the option)
        logscale = scale == 'log'
        

        #decides which metrics to plot based on what the user has selected
        if metric == 'All Metrics':
            plot_columns = list(metrics.keys()) #plot all metrics first
        else:
            plot_columns = [metric] #plot only the selected metric
            
        df_to_plot = df[plot_columns] #creates a df that contains only the columns specified in plot_columns
        
        #plots the data
        ax = df_to_plot.plot(logy=logscale, title=plot_title) #plots log scale 
        ax.set_xlabel(x_label) #sets x axis label
        ax.set_ylabel(y_label) #sets y axis label
        ax.grid(grid) #enable or disable grid lines
        plt.tight_layout() #adjust layout to prevent any overlap
        plt.show() #show the plot


In [7]:
def refresh_graph1(*args):#*args used to give function a variable number of arguments 
    """
    Refreshes the first graph using the current widget values.
    """
    plot_data(metric_selector1.value, scale1.value, show_grid1.value, df1, metrics_1, graph_output1, plot_title1, x_label1, y_label1)

def refresh_graph2(*args):#*args used to give function a variable number of arguments 
    """
    Refreshes the second graph using the current widget values.
    """
    plot_data(metric_selector2.value, scale2.value, show_grid2.value, df2, metrics_2, graph_output2, plot_title2, x_label2, y_label2)

def api_button_callback1(button):
    """
    fetch new data from the API for graph 1, wrangle it, and update the graph.
    params: -button: button widget triggering this function.
    """
    apibutton1.icon = 'spinner' #to show that it is loading
    rawdata_1 = access_api_graph1() #fetches new data
    
    global df1 #declares variable as global
    df1 = wrangle_data(rawdata_1, metrics_1, use_metrics_mapping=True) #wrangles the fetched API data using the function defined before
    apibutton1.icon = 'check' #shows that is successful
    
    if rawdata_1 is None: #if case so if the API fetch is not sucessful it displays an error
        print("API refresh failed for Graph 1. Using preloaded data from JSON.")
        apibutton1.icon = 'times' #shows that it has failed failure
        return 

    # Refresh the graph with new data
    refresh_graph1()

def api_button_callback2(button):
    """
    fetch new data from the API for graph 1, wrangle it, and update the graph.
    params: -button: button widget triggering this function.
    """
    apibutton2.icon = 'spinner' #to show that it is loading
    rawdata_2 = access_api_graph2() #fetches new data
    
    global df2 #declares variable as global
    df2 = wrangle_data(rawdata_2, metrics_2, use_metrics_mapping=False) #wrangles the fetched API data using the function defined before
    apibutton2.icon = 'check' #shows that is successful
    
    if rawdata_2 is None: #if case so if the API fetch is not sucessful it displays an error
        print("API refresh failed for Graph 1. Using preloaded data from JSON.")
        apibutton2.icon = 'times'  #shows that it has failed failure
        return 


    #refresh the graph with new data
    refresh_graph2()

In [8]:
#button to refresh data for graph 1
apibutton1 = wdg.Button(
    description='Refresh Data for Graph 1', #button label
    disabled=False,
    button_style='info',
    tooltip='Download latest data for Graph 1 from the API',
    icon='download',  #download icon
    layout=wdg.Layout(width='200px') #adjust button width so text fits
)
apibutton1.on_click(api_button_callback1) #attaches the callback to button click

#button to refresh data for Graph 2
apibutton2 = wdg.Button(
    description='Refresh Data for Graph 2', #button label
    disabled=False,
    button_style='info',
    tooltip='Download latest data for Graph 2 from the API',
    icon='download', #download icon
    layout=wdg.Layout(width='200px') #adjust button width so text fits
)
apibutton2.on_click(api_button_callback2) #attaches the callback to button click


#titles and labels for graph 1 (RSV and Influenza)
plot_title1 = 'Weekly Positivity Rates (PCR Testing) in 2024' #graph title
x_label1 = 'Calendar Week (2024)' #x axis label
y_label1 = 'Positivity Rate (%)' #y axis label

#titles and labels for Graph 2 (RSV by Age Groups)
plot_title2 = 'RSV Positivity Rates by Age Group in 2024' #graph title
x_label2 = 'Calendar Week (2024)' #x axis label
y_label2 = 'Positivity Rate (%)' #y axis label


#dropdown to select metrics for Graph 1
metric_options1 = ['All Metrics'] + list(metrics_1.keys())
metric_selector1 = wdg.Dropdown(
    options=metric_options1, #list of available options
    value='All Metrics', #default selection
    description='Metric:',
    disabled=False #widget is enabled
)

#radio buttons for scale selection (linear or log)
scale1 = wdg.RadioButtons(
    options=['linear', 'log'],
    description='Scale:',
    disabled=False #widget is enabled
)

#checkbox to make the grid visible
show_grid1 = wdg.Checkbox(
    value=True, #default state is enabled
    description='Show Grid', #widget text
    disabled=False #widget is enabled
)


#dropdown to select age groups for Graph 2
metric_options2 = ['All Metrics'] + list(metrics_2.keys())
metric_selector2 = wdg.Dropdown(
    options=metric_options2,
    value='All Metrics',
    description='Age Group:', #widget text
    disabled=False #widget is enabled
)

#radio buttons for scale selection (linear or log)
scale2 = wdg.RadioButtons(
    options=['linear', 'log'],
    description='Scale:', #widget text
    disabled=False #widget is enabled
)

#checkbox to make the grid visible
show_grid2 = wdg.Checkbox(
    value=True, #default state is enabled
    description='Show Grid', #widget text
    disabled=False #widget is enabled
)

#horizontal box for graph 1 controls
controls1 = wdg.HBox([metric_selector1, scale1, show_grid1])

#horizontal box for graph 2 controls
controls2 = wdg.HBox([metric_selector2, scale2, show_grid2])

#attaches refresh function to widget changes for graph 1
metric_selector1.observe(refresh_graph1, names='value')
scale1.observe(refresh_graph1, names='value')
show_grid1.observe(refresh_graph1, names='value')

#attach refresh function to widget changes for graph 2
metric_selector2.observe(refresh_graph2, names='value')
scale2.observe(refresh_graph2, names='value')
show_grid2.observe(refresh_graph2, names='value')


#output widget for graph 1
graph_output1 = wdg.Output()

#output widget for graph 2
graph_output2 = wdg.Output()


refresh_graph1() #refresh graph1
refresh_graph2() #refresh graph2

print('Graph 1 Controls:')
#display controls, graph output, and refresh button for graph 1
display(controls1, graph_output1, apibutton1)

print("\n\n")

print('Graph 2 Controls:')
#display controls, graph output, and refresh button for graph 2
display(controls2, graph_output2, apibutton2)


Graph 1 Controls:


HBox(children=(Dropdown(description='Metric:', options=('All Metrics', 'RSV', 'Influenza'), value='All Metrics…

Output()

Button(button_style='info', description='Refresh Data for Graph 1', icon='download', layout=Layout(width='200p…




Graph 2 Controls:


HBox(children=(Dropdown(description='Age Group:', options=('All Metrics', 'Age 0-4', 'Age 5-14', 'Age 15-44', …

Output()

Button(button_style='info', description='Refresh Data for Graph 2', icon='download', layout=Layout(width='200p…

**Author and License** Code written by: Fathima Yashra Muzamil. "Based on UK Government [data](https://ukhsa-dashboard.data.gov.uk/) published by the [UK Health Security Agency](https://www.gov.uk/government/organisations/uk-health-security-agency) and on the [DIY Disease Tracking Dashboard Kit](https://github.com/fsmeraldi/diy-covid19dash) by Fabrizio Smeraldi. Released under the [GNU GPLv3.0 or later](https://www.gnu.org/licenses/)."