In [1]:
%%html
<style>
/* Any CSS style can go in here. */
.dataframe th {
    font-size: 18px;
    font-weight:normal;
    broder: 1px solid grey;
}
.dataframe td {
    font-size: 16px;
}
</style>

In [2]:
#IMPORTS
#-------------------------------------------------------------------------------------------------------

import re
import io
import os
import sys
import csv
import time
import numpy as np
import pandas as pd
import warnings
import pathlib
import zipfile
from zipfile import ZipFile
import math
import ipywidgets as widgets
from ipywidgets import interact, interactive, IntProgress
from IPython.display import Markdown, HTML, Javascript, display, Image, clear_output
def printmd(string):
    display(Markdown(string))
import markdown as md
from ipywidgets import HTML

import ipyvuetify as v
from ipyvuetify.extra import FileInput
import plotly.express as px

In [3]:
def intro_html():
    html = md.markdown("""
<style>
div.s {    
    font-size: 18px;
    h1 {font-size: 28px; margin-bottom: 40px;}
    h2 {font-size: 24px; margin-bottom: 32px;}
    ul,ol {font-size: 18px; color: #333333; margin-bottom: 24px;}
    }
 </style>

<div class="s"">
This editor merges one row of units and one row of VMV codes to column headers, producing a file with one cleaned header row without spaces or special characters.<br>

You can also plot data by date and station. 
</div>                     
    """)
        
    intro=HTML(html)
    return intro
    

In [4]:
def main():
    path=os.path.abspath(os.curdir)
    # Clear output data
    for f in os.listdir(path):
        if 'cwout' in f:
            os.remove(os.path.join(path, f))

    #----Header Card---------------------------------------------
    card = v.Card(height=150, outlined=False,class_="my-4 mx-1",
                    children=[v.Toolbar(flat=True, color="primary",children=[v.ToolbarTitle(children=['Canadian Watershed Information Network'], style_="color:white"),v.Spacer(), 
                                                                             v.Icon(children=['mdi-flask'])]),
                              v.CardTitle(primary_title=True, children=["Provincial Chemistry File Editor 🧑🏽‍🔬"], 
                                          style_="font-size: 28px;font-weight:normal; margin-bottom: 30px;font-family:'Helvetica Neue', Helvetica, arial, sans-serif;")
                             
                             ])
    display(card)

    #----App Description---------------------------------------------
    introtext=widgets.Output()
    with introtext:
    
        intro=intro_html()
        info=v.Alert(text=True, children=["You can change your answers in this app at any time!"],title="Alert title",type="info",style_="max-width:500px", class_ = 'my-5')
        
        Begin_button=v.Btn(children=['BEGIN'],color='primary',tooltip='Click me')
        row = v.Row(class_ = 'mx-1',children=[Begin_button])
        vbox=widgets.VBox([intro,info,row])
        display(vbox)
    display(introtext)

    # On Click Function
    on_click_out_beg=widgets.Output()
    @on_click_out_beg.capture()
    def on_click(widget, event, data):
        on_click_out_beg.clear_output()
        introtext.clear_output()

        tabs()

    Begin_button.on_event('click',on_click)
    display(on_click_out_beg)
    
  

In [5]:
def tabs():
    
    #Set up our TABS!!
    t1=v.TabItem(children=[])
    t2=v.TabItem(children=[])
    t3=v.TabItem(children=[])
    t=v.Tabs(vertical=False, children=[
        v.Tab(children=['Checking out the data 🧐'],class_="mx-9", style_='font-size:16px' ),
        v.Tab(children=['Data Download ⬇️'], class_="mx-9", style_='font-size:16px'),
        v.Tab(children=['Data Dashboard 📊'], class_="mx-9", style_='font-size:16px'),
        t1,t2, t3],
        v_model=0,
            )
    
    display(t)

    file_upload(t, t1,t2, t3)

In [6]:
def file_upload(t,t1,t2, t3):

    head1=v.Html(tag='div',style_='font-size:20px; padding-top: 50px; padding-bottom: 10px',
                children=['Upload CSV File(s) here'])

    myfile = FileInput(Label="Upload CSV")

    t1.children = [*t1.children, head1, myfile]

    #Get the current state of the second tab, which is empty here
    tab1_state1=t1.children

    # reports value when finished
    out=widgets.Output()
    @out.capture()
    def on_file_upload(change):
        out.clear_output()
        t1.children=tab1_state1  #Clear tab state
        datafiles = myfile.get_files()
        
        #Call next function
        merge_rows_widget(datafiles,t,t1,t2, t3 )
    myfile.observe(on_file_upload, names='file_info')

    
    display(out)
        

In [7]:
def merge_rows_widget(datafiles,t,t1,t2, t3):
    
    
    #############################################################################################################
    #
    # This function creates widgets allowing the user to chose the rows that contain the units and VMV codes
    #
    #############################################################################################################

    f=0
    for file in datafiles:
        f=f+1
        if f==1: #read only once to get the columns

            # Read the data!
            file['file_obj'].seek(0)
            data = file['file_obj'].read()
            
            # Read the actual data
            rawdata_df=pd.read_csv(io.BytesIO(data), low_memory=False)
            cols=rawdata_df.columns  #Get the columns from the data frame to display


    head2=v.Html(tag='div',style_='font-size:20px; padding-top: 50px; padding-bottom: 20px',
                children=['Choose the row that contains the variable units and the Valid Method Variable (VMV) code. For example:'])

    img=v.Img(width='600', src="https://cwincloud.cc.umanitoba.ca/canwin_public/datamanagement/-/raw/master/Kaggle/img/provincial_chem0.png?ref_type=heads", cover=True)
    space=v.Html(tag='div',style_='padding-top: 20px; padding-bottom: 20px',children=[])

    vmvCode_row = v.Select(label='Select the VMV code row',items=['0', '1'],v_model='0', multiple=False, style_="max-width:200px")
    units_row = v.Select(label='Select the Units row',items=['0', '1'],v_model='1', multiple=False, style_="max-width:200px", class_ = 'mx-6')
    Next_button=v.Btn(children=['Next'],color='primary',tooltip='Click me')
    row0= v.Row(class_ = 'mx-1',children=[vmvCode_row, units_row])

    t1.children = [*t1.children, head2, img, space,row0, Next_button]
    tab1_state2=t1.children
    tab2_state1=t2.children
    tab3_state1=t3.children


    # On Click Function
    on_click_out=widgets.Output()
    @on_click_out.capture()
    def on_click(widget, event, data):
        on_click_out.clear_output()
        t1.children=tab1_state2  #Clear tab state
        t2.children=tab2_state1
        t3.children=tab3_state1
        
        # Call next function
        merge_rows(datafiles, vmvCode_row,units_row, t,t1,t2, t3)

    Next_button.on_event('click', on_click)
    display(on_click_out)


In [8]:
def merge_rows(datafiles, vmvCode_row,units_row, t,t1,t2, t3):

    #Change tabs
    t.v_model=1
    
    #############################################################################################################
    #
    # This function merges the user defined rows containing units and VMV codes
    #
    ############################################################################################################
    
    #Get the last data file
    num_of_files=len(datafiles)
    last_file=[datafiles[num_of_files-1]]
    lastfile, = last_file

    if len(datafiles)>0:
        #progress loader
        loader=v.ProgressCircular(indeterminate=True,size="50",color="primary", class_='my-9', style_='padding-top:60px; padding-bottom: 20px')
    
    count=0
    df_list=[] # create a list of datafiles
    filename_list=[] #create a list of filenames
    for file in datafiles: # Loop through all the files

        #Display loader
        if len(datafiles)>0:
            count=count+1
            if count==1:
                info=v.Alert(text=True, children=["Cleaning headers and saving files...A few seconds please 🙂"],title="Alert title",type="info", style_="max-width:700px", class_='my-5')
                space=v.Html(tag='div',style_='padding-top: 10px; padding-bottom: 20px',children=[])
                t2.children=[*t2.children,info, space,loader]
            
        # Read the data!
        file['file_obj'].seek(0)
        data = file['file_obj'].read()
        rawdata_df=pd.read_csv(io.BytesIO(data), low_memory=False) # Get the data from the rawdata spreadsheet
    
        # make an copy of original data frame before correcting the headers
        orig_df=rawdata_df.copy()
        
        headers=list(rawdata_df.columns) #get the headers
        units=list(rawdata_df.iloc[int(units_row.v_model)]) #Get the units row
        codes=list(rawdata_df.iloc[int(vmvCode_row.v_model)]) #Get the units row

        # Ensure there are no spaces or brackets in header name
        headers_list=[]
        for header, code, unit in zip(headers, codes, units):

            # Cleaning up the headers
            header=re.sub(r'\([^)]*\)', '', header) #Remove brackets and contents
            header=header.rstrip() # Remove trailing white space
            header = re.sub(r"[^\w\s]", '', header)# Remove all non-word characters (everything except numbers and letters)
            header = re.sub(r"\s+", '_', header) # Replace all remaining whitespace with _
            #header=header.replace('_1','')  # Remove the _1 for duplicated variables

            # Merging the nvm code and units
            if pd.isna(code)==False:
                header=header+'_'+str(code)+'_'+str(unit)
                header = re.sub(r"[^\w\s]", '_', header)# Remove all non-word characters
                header = re.sub(r"\s+", '_', header) # Replace all remaining whitespace with _

            headers_list.append(header) #append to final header list

        #Save updated column headers to data frame
        rawdata_df.columns=headers_list
        rawdata_df=rawdata_df.tail(-2)
        
        #Save as csv file
        output_filename=file['name'][:-4]+'_cwout.csv'
        rawdata_df.to_csv(output_filename, float_format="%.4f", index=False)
        df_list.append(rawdata_df)
        filename_list.append(file['name'][:-4])


    if len(datafiles)>0:
        t2.children=[*t2.children[:-3]]
        #loader.class_ = 'd-none' # disapear
    # ------------------------------- Download Files ----------------------------------------------- 
    path=os.path.abspath(os.curdir)
    download_output(path,df_list,filename_list, t1, t2, t3)


In [9]:
def download_output(path,df_list,filename_list, t1, t2, t3):
    from IPython.display import HTML
    import base64
    printmd('<br><br>')

    _, _, files = next(os.walk(path))
    files=[f for f in files if '_cwout' in f]
    file_count = len(files)


    def download(filename):
        data = open(filename, "rb").read()
        b64 = base64.b64encode(data)
        payload = b64.decode()
        href=f"data:text/csv;base64,{payload}"

        downloadbutton=v.Btn(children=["Click Me"],attributes={"download": filename})
        downloadbutton.href=href
        row = v.Row(class_ = 'mx-4',children=[downloadbutton])
        return row
    
    if file_count==1:
        filename=files.pop()
        down_btn=download(filename)

    if file_count>1:
        filename='output_data.zip'

        from os.path import basename
        with ZipFile(filename, 'w') as zipObj:
           # Iterate over all the files in directory
            for file in files:
                filePath = os.path.join(path, file)
                # Add file to zip
                zipObj.write(filePath, basename(filePath))
        
        down_btn=download(filename)



    #----Header Card---------------------------------------------
    info=v.Alert(text=True, children=["All Done!  🎉"],title="Alert title",type="success", style_="max-width:500px")
    card = v.Card(height=200,width=500,outlined=False, class_="my-9 mx-1",
            children=[info,
                      v.CardTitle(primary_title=True, children=["Download Cleaned Files"], 
                                  style_="font-size: 18px;font-weight:normal; margin-bottom: 10px;font-family:'Helvetica Neue', Helvetica, arial, sans-serif;"),
                      down_btn  
                     ])

    info2=v.Alert(text=True, children=["Go to the Data Dashbord tab to create some plots!"],title="Alert title",type="info", style_="max-width:500px")
    space=v.Html(tag='div',style_='padding-top:60px; padding-bottom: 20px',children=[])
    t2.children=[*t2.children, card, info2, space]

    choose_file(df_list, filename_list,t2, t3)
    #display(card)

In [10]:
def choose_file(df_list,filename_list, t2, t3):

    head3=v.Html(tag='div',style_='font-size:20px; padding-top: 50px; padding-bottom: 40px',
                children=['Let\'s plot some data! 🕺'])

    head4=v.Html(tag='div',style_='font-size:16px; padding-bottom: 20px',
                children=['Choose the data file'])

    select_file = v.Select(label='Select file',items=filename_list,v_model=None, multiple=False, style_="max-width:300px")

    #define the columns of widgets
    col_html1=v.Html(tag='div', class_='d-flex flex-column mr-5', children=[head4, select_file], style_='padding-right: 30px')
    col_html2=v.Html(tag='div', class_='d-flex flex-column mr-5', children=[], style_='padding-right: 30px')
    col_html3=v.Html(tag='div', class_='d-flex flex-column mr-2', children=[])
    col_html_Next=v.Html(tag='div', class_='d-flex flex-column', children=[])
    col_html4=v.Html(tag='div', class_='d-flex flex-column mr-5', children=[], style_='padding-right: 30px')
    col_html5=v.Html(tag='div', class_='d-flex flex-column mr-5', children=[], style_='padding-right: 30px')
    col_html_info=v.Html(tag='div', class_='d-flex flex-column mr-2', children=[])
    col_html_Plot=v.Html(tag='div', class_='d-flex flex-column', children=[])
    

    #add each column of widges to boxes (rows)
    box1=v.Html(tag='div', class_='d-flex flex-row', children=[col_html1, col_html2, col_html3, col_html_Next], style_='padding-bottom: 40px')
    box2=v.Html(tag='div', class_='d-flex flex-row', children=[col_html4, col_html5, col_html_info, col_html_Plot], style_='padding-bottom: 40px')


    card2 = v.Card(outlined=False, class_="mx-2 my-2", style_=' padding: 20px;',
        children=[box1, box2])
    
    
    #t3.children=[*t3.children, head3, head4, select_file, Next_button]
    t3.children=[*t3.children, head3,card2]
    tab3_state2=t3.children


    # Observe Function
    out=widgets.Output()
    @out.capture()
    def obs(change):
        out.clear_output()
        t3.children=tab3_state2

        if select_file.v_model and select_file.v_model!='None' :
            for f, df in zip(filename_list, df_list):
                if f==select_file.v_model:
                    sel_file=f
                    sel_df=df
                    choose_cols(sel_file, sel_df, t2, t3, col_html2, col_html3, col_html4, col_html5, col_html_Next, col_html_info, col_html_Plot)
                    break
        else:
            info=v.Alert(text=True, children=["You did not choose a file!"],title="Alert title",type="error", style_="max-width:700px;", class_='my-10')
            t3.children=[*t3.children, info]
        
    select_file.observe(obs, '_property_lock')
    display(out)

In [11]:
def choose_cols(sel_file, sel_df, t2, t3, col_html2, col_html3, col_html4, col_html5, col_html_Next, col_html_info, col_html_Plot):
    cols=list(sel_df.columns)

    head5=v.Html(tag='div',style_='font-size:16px; padding-bottom: 20px',
                children=['Select DateTime column'])
    
    head6=v.Html(tag='div',style_='font-size:16px; padding-bottom: 20px',
            children=['Select Station column'])

    #Find date time
    datetime=[d for d in cols if 'Date' in d or 'date' in d or 'DATE' in d]

    #Date widget
    if datetime:
        sel_date = v.Select(label='DateTime',items=cols,v_model=datetime[0], multiple=False, style_="max-width:300px")
    else:
        sel_date = v.Select(label='DateTime',items=cols,v_model=None, multiple=False, style_="max-width:300px")

    
    #Find station
    station_var=[s for s in cols if 'Station' in s or 'station' in s or 'STATION' in s]
    st_name=[s for s in station_var if 'NAME' in s or 'name' in s or 'Name' in s] #Check if there is a station_name, and chose this rather than station number
    

    #station name widget
    if station_var and station_var!='None':

        if st_name:
            sel_station = v.Select(label='Station Column',items=cols,v_model=st_name[0], multiple=False, style_="max-width:300px",  class_='mx-0')
        else:
            sel_station = v.Select(label='Station Column',items=cols,v_model=station_var[0], multiple=False, style_="max-width:300px",  class_='mx-0')
    else:
        sel_station = v.Select(label='Station Column',items=cols,v_model=None, multiple=False, style_="max-width:300px",  class_='mx-0')

    Next_button=v.Btn(children=['Next'],color='primary',tooltip='Click me', class_='mx-1')
    empty=v.Html(tag='div',style_='padding-top: 55px;',children=[''])

    #Update the column box
    col_html2.children=[*col_html2.children,head5, sel_date]
    col_html3.children=[*col_html3.children,head6, sel_station ]
    col_html_Next.children=[*col_html_Next.children,empty,Next_button ] 


    #Get the empty states of these col boxes
    col4_state=[*col_html4.children]
    col5_state=[*col_html5.children]
    colInfo_state=[*col_html_info.children]
    colPlot_state=[*col_html_Plot.children]

    #update state
    tab3_state3=t3.children

    # On Click Function
    out=widgets.Output()
    @out.capture()
    def on_click(widget, event, data):
        out.clear_output()
        t3.children=tab3_state3

        #Create empty states fro the next row of boxes.
        col_html4.children=col4_state
        col_html5.children=col5_state
        col_html_Plot.children=colPlot_state
        col_html_info.children=colInfo_state
        
        date_col=sel_date.v_model #The selected date column
        station=sel_station.v_model  #The selected station column
        get_vars(sel_file, cols, sel_df, t2, t3, col_html2, col_html3, col_html4, col_html5,col_html_Next, col_html_Plot, col_html_info, date_col,station)


    Next_button.on_event('click',on_click)
    display(out)

In [12]:
def get_vars(sel_file, cols, sel_df, t2, t3, col_html2, col_html3, col_html4, col_html5,col_html_Next,col_html_Plot, col_html_info, date_col,station):
    
        head7=v.Html(tag='div',style_='font-size:16px; padding-bottom: 20px',
                children=['Select variable to plot'])
    
        head8=v.Html(tag='div',style_='font-size:16px; padding-bottom: 20px',
                children=['Select Station for plot'])

        # Get the var to plot - widget
        sel_var = v.Select(label='Variable',items=cols,v_model=None, multiple=False, style_="max-width:300px",  class_='mx-0')
        
        #get the station names
        station_data=list(sel_df[station]) 
        display(station_data)

    
        station_data.insert(0,"None")

        # get the exact station name - widget
        sel_station_name=v.Select(label='Station Name',items=station_data,v_model='None', multiple=False, style_="max-width:300px",  class_='mx-0')

        info=v.Alert(text=True, children=["You can select None under Station Name to plot all stations"],title="Alert title",type="info", class_='mb-5',style_="max-width:300px")

        #Plot btton
        Plot_button=v.Btn(children=['Plot'],color='primary',tooltip='Click me', class_='mx-1')

        empty=v.Html(tag='div',style_='padding-top: 55px;',
                    children=[''])

        #Update box
        col_html4.children=[*col_html4.children,head7, sel_var ]
        col_html5.children=[*col_html5.children,head8, sel_station_name]
        col_html_info.children=[*col_html_Plot.children,info ] 
        col_html_Plot.children=[*col_html_Plot.children,empty,Plot_button ] 

        #Update tabs
        t3.children=[*t3.children]
        tab3_state4=t3.children

        # On Click Function
        on_click_out=widgets.Output()
        @on_click_out.capture()
        def on_click(widget, event, data):
            on_click_out.clear_output()
            t3.children=tab3_state4
    
            #get varibale to plot and station
            var=sel_var.v_model
            station_var=sel_station_name.v_model #the actual station to plot
    
            import dateutil.parser
            dates=list(sel_df[date_col])
            converted_dates=[]
            for date in dates:
                converted_dates.append(dateutil.parser.parse(date).strftime("%Y-%m-%d %H:%M:%S"))
              
            sel_df[date_col]= converted_dates

    
            if var == None:
                info=v.Alert(text=True, children=["You did not choose a variable to plot"],title="Alert title",type="error", style_="max-width:700px;", class_='my-10')
                t3.children=[*t3.children, info]
            else:
                plot(date_col, var, station, station_var, sel_df, t2, t3 )
    
        Plot_button.on_event('click',on_click)
        display(on_click_out)


In [13]:
def plot(date_col, var, station, station_var, sel_df, t2,t3 ):
    import math
    loader=v.ProgressCircular(indeterminate=True,size="50",color="primary", class_='my-9', style_='padding-top:60px; padding-bottom: 20px')
    space=v.Html(tag='div',style_='padding-top: 10px; padding-bottom: 20px',children=[])
    tempstate= t3.children
    t3.children = [*t3.children, space, loader]


    #Check the var type
    from pandas.api.types import is_string_dtype
    from pandas.api.types import is_numeric_dtype

    if is_string_dtype(sel_df[var])==True or is_numeric_dtype(sel_df[var])==False:
        info=v.Alert(text=True, children=["This does not seem to be numeric data"],title="Alert title",type="warning", class_='my-7',style_="max-width:500px")
        
        # plot_non_numeric=False
        # switch=v.Switch(v_model=False,color="primary", label="Plot anyway" )
        # def changed1(b):
        #     if switch.v_model=='model' or switch.v_model==True:
        #         plot_non_numeric=True
        #     else:
        #         plot_non_numeric=False
        # switch.observe(changed1, names=['v_model'])

        # t3.children=tempstate
        # t3.children = [*t3.children, info, switch]

        
        # #if plot_non_numeric=True:

    #Get the data for the chosen varibale if the station is equal to the chosen station
    var_data=list(sel_df.loc[sel_df[station]==station_var, var])
    var_dates=list(sel_df.loc[sel_df[station]==station_var, date_col])
    
    good_dates=[]
    good_data=[]

    for data, date in zip(var_data, var_dates):
        if data is not np.nan and data!='' and data!=None:
            good_dates.append(date)
            good_data.append(data)


    var_data=good_data
    var_dates=good_dates


    if not var_data and station_var!='None':
        info=v.Alert(text=True, children=["Sorry, there is no data for the selected Station"],title="Alert title",type="error", class_='my-5',style_="max-width:500px")
        t3.children=tempstate
        t3.children = [*t3.children, info]
        
    else:
        out = widgets.Output()
        with out:
            printmd('<br>')
            def fig_update():
                fig.update_layout( yaxis_title=var, height=500, xaxis_title='DateTime',
                                 yaxis = dict(tickfont =dict(size=14)),xaxis = dict(tickfont =dict(size=14)),font=dict(size=14), plot_bgcolor='ghostwhite', 
                                  legend= {'itemsizing': 'constant'})
                fig.update_traces(marker={'size': 7})
                fig.update_xaxes(showline=True, linewidth=0.7, linecolor='LightGrey', mirror=True, showgrid=True, gridwidth=0.7, gridcolor='LightGrey')
                fig.update_yaxes(showline=True, linewidth=0.7, linecolor='LightGrey', mirror=True, showgrid=True, gridwidth=0.7, gridcolor='LightGrey')
                fig.show()
                loader.class_ = 'd-none' # disapear
            
            if station_var=='None':
                dates=list(sel_df[date_col])
                values=list(sel_df[var])
                colors=list(sel_df[station])

                good_dates=[]
                good_data=[]
                good_colors=[]
                for data, date, colour in zip(values, dates, colors):
                    if data is not np.nan and data!='' and data!=None:
                        good_dates.append(date)
                        good_data.append(data)
                        good_colors.append(colour)
            
                fig = px.scatter(x=good_dates, y=good_data, color=good_colors)
                fig.update_layout(legend_title='Stations')
                fig_update()
    
            else:
                if var_data:
                    fig = px.scatter(x=var_dates, y=var_data)
                    fig['data'][0]['showlegend']=True
                    fig.update_layout(legend_title=station_var)
                    fig_update()
                    
        t3.children = [*t3.children, out]        
 
    

In [14]:
main()
printmd('<br><br><br><br><br><br>')

Card(children=[Toolbar(children=[ToolbarTitle(children=['Canadian Watershed Information Network'], layout=None…

Output()

Output()

<br><br><br><br><br><br>