In [1]:
import matplotlib.pyplot as plt
import matplotlib
import numpy as np
import pandas as pd
import bqplot
import ipywidgets

data_url = 'https://raw.githubusercontent.com/UIUC-iSchool-DataViz/is445_bcubcg_fall2022/main/data/licenses_fall2022.csv'
df_licenses = pd.read_csv(data_url)
df_licenses.head()

Unnamed: 0,_id,License Type,Description,License Number,License Status,Business,Title,First Name,Middle,Last Name,...,Specialty/Qualifier,Controlled Substance Schedule,Delegated Controlled Substance Schedule,Ever Disciplined,LastModifiedDate,Case Number,Action,Discipline Start Date,Discipline End Date,Discipline Reason
0,1189509,DETECTIVE BOARD,PERMANENT EMPLOYEE REGISTRATION,129446286.0,NOT RENEWED,N,,EILEEN,,SANTACRUZ,...,,,,N,03/18/2022,,,,,
1,801037,DETECTIVE BOARD,FIREARM CONTROL CARD,229030294.0,NOT RENEWED,N,,DAGMAR,J,NORDLUND,...,,,,N,08/16/2006,,,,,
2,365129,COSMO,LICENSED COSMETOLOGIST,11053076.0,NOT RENEWED,N,,RADOJE,,ZELENOVIC,...,,,,N,05/26/2006,,,,,
3,595427,COSMO,LICENSED COSMETOLOGIST,11295645.0,ACTIVE,N,,BECKY SUE,L,BURROUGHS,...,,,,N,11/12/2021,,,,,
4,653668,COSMO,LICENSED NAIL TECHNICIAN,169006247.0,NOT RENEWED,N,,BILL G,L,LETNER,...,,,,N,05/30/2006,,,,,


In [2]:
df_licenses.columns

Index(['_id', 'License Type', 'Description', 'License Number',
       'License Status', 'Business', 'Title', 'First Name', 'Middle',
       'Last Name', 'Prefix', 'Suffix', 'Business Name', 'BusinessDBA',
       'Original Issue Date', 'Effective Date', 'Expiration Date', 'City',
       'State', 'Zip', 'County', 'Specialty/Qualifier',
       'Controlled Substance Schedule',
       'Delegated Controlled Substance Schedule', 'Ever Disciplined',
       'LastModifiedDate', 'Case Number', 'Action', 'Discipline Start Date',
       'Discipline End Date', 'Discipline Reason'],
      dtype='object')

In [3]:
columns = df_licenses.columns.tolist()
arr_list = []
for column in columns:
    if 'Date' in column:
        arr_list.append(column)
df_licenses[arr_list]

Unnamed: 0,Original Issue Date,Effective Date,Expiration Date,LastModifiedDate,Discipline Start Date,Discipline End Date
0,02/03/2020,02/03/2020,09/30/2021,03/18/2022,,
1,02/07/1995,02/07/1995,12/31/2003,08/16/2006,,
2,02/28/1945,02/28/1945,09/30/1983,05/26/2006,,
3,11/22/2011,11/12/2021,09/30/2023,11/12/2021,,
4,07/12/1995,07/12/1995,10/31/2002,05/30/2006,,
...,...,...,...,...,...,...
9995,08/04/1987,08/04/1987,05/31/1997,08/03/2006,,
9996,05/22/1989,05/22/1989,06/11/1990,08/07/2006,,
9997,05/09/1966,05/09/1966,,05/26/2006,,
9998,10/04/1983,12/17/2021,09/30/2023,12/17/2021,,


In [4]:
df_licenses['Diff_Date'] = pd.to_datetime(df_licenses['Expiration Date'], errors='coerce',format='%m/%d/%Y') - pd.to_datetime(df_licenses['Effective Date'],errors='coerce',format='%m/%d/%Y')
df_licenses['Diff_Date'] = df_licenses['Diff_Date'].dt.days
df_licenses['Diff_Date'].fillna(0, inplace=True)

In [5]:
licenses_group = df_licenses.groupby(['License Status', 'License Type'])['Diff_Date'].mean()
uniq_status = df_licenses['License Status'].unique()
uniq_type = df_licenses['License Type'].unique()

In [6]:
def switchYear(column):
    df_licenses[column] = pd.to_datetime(df_licenses[column],errors='coerce',format='%m/%d/%Y').dt.year

In [7]:
for column in arr_list:
    switchYear(column)
df_licenses[arr_list]

Unnamed: 0,Original Issue Date,Effective Date,Expiration Date,LastModifiedDate,Discipline Start Date,Discipline End Date
0,2020.0,2020.0,2021.0,2022,,
1,1995.0,1995.0,2003.0,2006,,
2,1945.0,1945.0,1983.0,2006,,
3,2011.0,2021.0,2023.0,2021,,
4,1995.0,1995.0,2002.0,2006,,
...,...,...,...,...,...,...
9995,1987.0,1987.0,1997.0,2006,,
9996,1989.0,1989.0,1990.0,2006,,
9997,1966.0,1966.0,,2006,,
9998,1983.0,2021.0,2023.0,2021,,


In [8]:
# list1 = df_licenses['Original Issue Date'].unique()
# list1.sort()
# list1

In [9]:
licenses_group

License Status                        License Type     
ACTIVE                                ARCHITECT             753.750000
                                      ATHLETIC TRAINER      716.000000
                                      AUCTIONEER            816.000000
                                      CLIN PSYCHOLOGIST     692.777778
                                      COLLECTION AGENCY    1038.285714
                                                              ...     
Non Sufficient Fund Check Terminated  COSMO                5220.000000
                                      DETECTIVE BOARD       220.000000
TERMINATED CARD RETURNED              DETECTIVE BOARD       764.844548
TERMINATED VALID REASON               DETECTIVE BOARD       834.421053
TERMINATED WITHOUT CARD               DETECTIVE BOARD      1019.468750
Name: Diff_Date, Length: 105, dtype: float64

In [10]:
print(len(uniq_status))
print(len(uniq_type))

13
28


In [11]:
df_temp = df_licenses[(df_licenses['License Type'] == 'DETECTIVE BOARD') & (df_licenses['License Status'] == 'NOT RENEWED')]
df_median = df_temp.groupby(['Original Issue Date'])['Diff_Date'].median().to_frame().reset_index()

def l_filter(types, status):
    df_temp = df_licenses[(df_licenses['License Type'] == types) & (df_licenses['License Status'] == status)]
    df_median = df_temp.groupby(['Original Issue Date'])['Diff_Date'].median().to_frame().reset_index()
    
    return df_median['Diff_Date'].values

In [12]:
x_sc = bqplot.OrdinalScale()
y_sc = bqplot.LinearScale()
ax_xcl = bqplot.Axis(label='Original Issue Year', scale=x_sc, tick_rotate=60,label_offset='50', tick_style={'text-anchor': 'start'})
ax_ycl = bqplot.Axis(label='Date Difference', scale=y_sc, label_offset='-45',
                    orientation='vertical')

bar = bqplot.Bars(x=df_median['Original Issue Date'], 
                  y=df_median['Diff_Date'],
                  scales={'x': x_sc, 'y': y_sc})

figh = bqplot.Figure(marks=[bar], axes = [ax_xcl, ax_ycl])
figh

Figure(axes=[Axis(label='Original Issue Year', label_offset='50', scale=OrdinalScale(), tick_rotate=60, tick_s…

In [13]:
heatmap_data = []
for s in uniq_status:
    status_row = np.array([licenses_group[s][t] if t in licenses_group[s].index else 0 for t in uniq_type])
    heatmap_data.append(status_row)
    
heatmap_data = np.array(heatmap_data)

In [14]:
label_message = ipywidgets.Label()

In [15]:
def on_selected(change):
    if len(change['owner'].selected)==1: # only do things if I select a single cell
        #print(change['owner'].selected[0])
        i, j =change['owner'].selected[0]
        v = heatmap_data[i,j]
        selected_type = uniq_type[j]
        selected_status = uniq_status[i]
        bar.x = list1
        bar.y = l_filter(selected_type, selected_status)
        label_message.value = f'    Selected Box: {(i, j)};\n   Selected Status: {str(selected_status)};\n  Selected Type: {str(selected_type)};\n  Mean of the days between Effective Date and Expiration Date={v:.2f}'

# 2. Scales
col_sc = bqplot.ColorScale(scheme="RdPu", 
                           min=np.nanmin(heatmap_data),
                           max=np.nanmax(heatmap_data))
x_sc = bqplot.OrdinalScale()
y_sc = bqplot.OrdinalScale()

# 3. Axis -- skip this for right now
col_ax = bqplot.ColorAxis(scale=col_sc, 
                        # orientation='vertical',
                         side='right')
x_ax = bqplot.Axis(label='Type', scale=x_sc, tick_rotate=15, label_offset='60', tick_style={'text-anchor': 'start', 'font-size': 8})
y_ax = bqplot.Axis(label='Status', scale=y_sc, orientation='vertical', tick_rotate=0, label_offset='-120', tick_style={'text-anchor': 'end', 'font-size': 8})

# 4. Mark -- grid heatmap
heat_map = bqplot.GridHeatMap(color=heatmap_data,
                            row=uniq_status,
                            column=uniq_type,
                            scales={'color':col_sc, 'row':y_sc, 'column':x_sc},
                            interactions={'click':'select'},
                            selected_style={'fill':'magenta'},
                            row_align='start')

# 5. interactions -- skip for now as well
heat_map.observe(on_selected,'selected')

# put it all together as a figure
fig = bqplot.Figure(marks=[heat_map], axes=[col_ax, x_ax, y_ax], fig_margin={'top':30, 'bottom':80, 'left':150, 'right':80})

fig

Figure(axes=[ColorAxis(scale=ColorScale(max=8705.5, min=-7.0, scheme='RdPu'), side='right'), Axis(label='Type'…

In [16]:
fig.layout.min_width = '1200px'
figh.layout.min_width = '1200px'

figures = ipywidgets.HBox([fig, figh])

myDashboard = ipywidgets.VBox([label_message, figures])
myDashboard

VBox(children=(Label(value=''), HBox(children=(Figure(axes=[ColorAxis(scale=ColorScale(max=8705.5, min=-7.0, s…

### Can you keep the x and y ranges static on the bar plot?

We tried to  tick_values