In [1]:
%%html
<style>
/* Any CSS style can go in here. */
.dataframe th {
    font-size: 18px;
    font-weight:normal;
    broder: 1px solid grey;
}
.dataframe td {
    font-size: 16px;
}
</style>

In [2]:
# Imports and other foundational functions
import io
import re
import os
import pandas as pd
from zipfile import ZipFile
import ipywidgets as widgets
from IPython.display import display, Markdown
def printmd(string):
    display(Markdown(string))

import markdown as md
from ipywidgets import HTML
from datetime import datetime as dt   # Import datetime module
import ipyvuetify as v
from ipyvuetify.extra import FileInput
import plotly.express as px
import base64

# SOME FOUNDATIONAL FUNCTIONS---------------------------------------------------

#Header Card
def card_func():
    card = v.Card(height=150, outlined=False,class_="my-4 mx-1",
                    children=[v.Toolbar(flat=True, color="primary",children=[v.ToolbarTitle(children=['Canadian Watershed Information Network'], style_="color:white"),v.Spacer(), 
                                                                             v.Icon(children=['mdi-flask'])]),
                              v.CardTitle(primary_title=True, children=["Castaway CTD Processor ⛴️"], 
                                          style_="font-size: 28px;font-weight:normal; margin-bottom: 30px;font-family:'Helvetica Neue', Helvetica, arial, sans-serif;")
                             
                             ])
    display(card)

# Intro text
def intro_html():

    ctd_img=v.Img(width="100", src="https://cwincloud.cc.umanitoba.ca/canwin_public/datamanagement/-/raw/master/Apps/Castaway/img/ctd.jpg")
    display(ctd_img)
    
    html = md.markdown("""
<style>
div.s {    
    font-size: 18px;
    ul,ol {font-size: 18px; color: #333333; margin-bottom: 24px;}
    }
 </style>

<div class="s"">

    <b>The Castaway CTD</b> <br>

    The small, handy CastAway-CTD is a lightweight, easy-to-use instrument designed for quick and accurate conductivity, temperature and depth profiles.<br>
    Each cast is automatically referenced with both time and location using its built-in GPS receiver and data can be viewed immediately on its colour LCD screen.<br>
    Designed primarily for coastal profiling.
    <br>
    <i>Source:https://geo-matching.com/</i>
    <br><br>

    <b>What This App Does</b> <br>
    This app cleans Castaway CTD files to be ingested into the ODV software. It does the following:<br>
    <ul>
      <li>Extracts specific variables from the metadata rows of the files</li>
      <li>Removes the metadata rows</li>
      <li>Creates a data new table</li>
      <li>Adds variables extracted from the metadata to new table</li>
      <li>Adds any additional variables to the new table</li>
      <li>Organizes table so that it is ODV compatible</li>
      <li>Renames specific variable names</li>
    </ul>
    
<details>
<summary> Expand to see example of an input file </summary>

<img src="https://cwincloud.cc.umanitoba.ca/canwin_public/datamanagement/-/raw/master/Kaggle/img/datavsmetadata.jpg" width="60%" >
</details>    

</div>                      
""")
        
    intro=HTML(html)
    return intro

# Set up tabs to initial empty state 
def tabs():
    global t, t1,t2, t3, t4, t5, t6
    t1=v.TabItem(children=[])
    t2=v.TabItem(children=[])
    t3=v.TabItem(children=[])
    t4=v.TabItem(children=[])
    t5=v.TabItem(children=[])
    t6=v.TabItem(children=[])
tabs()

#Set up our tab names
def tab_names():
    global t, t1,t2, t3, t4, t5, t6
    t=v.Tabs(vertical=False, color='primary',children=[
        v.Tab(children=['Data Upload'],class_="mx-5", style_='font-size:16px;' ),
        v.Tab(children=['Get column headers and metadata'],class_="mx-5", style_='font-size:16px;' ),
        v.Tab(children=['Add metadata variables'], class_="mx-5", style_='font-size:16px'),
        v.Tab(children=['Add new columns'], class_="mx-5", style_='font-size:16px'),
        v.Tab(children=['Data Download'], class_="mx-5", style_='font-size:16px'),
        v.Tab(children=['Data Dashboard'], class_="mx-5", style_='font-size:16px'),
        t1,t2, t3, t4, t5, t6],
        v_model=0
            )
    
    display(t)

# The below function clears subsequent tabs when a particular tab is selected.
# Note that the tabs could have displays/widgets on them already. 
# So if one wants to go back to a previous tabs, we need to clear the tab pages of the 
# subsequent tabs so we can start over the processing at the currently clicked tab.

def update_tabs(which_tab):
    global t, t1,t2, t3, t4, t5, t6

    if which_tab=='t1': # Tab 1 is clicked 
        t2.children=[]
        t3.children=[]
        t4.children=[]
        t5.children=[]
        t6.children=[]
    elif which_tab=='t2': # Tab 2 is clicked 
        t3.children=[]
        t4.children=[]
        t5.children=[]
        t6.children=[]
    elif which_tab=='t3': # Tab 3 is clicked 
        t4.children=[]
        t5.children=[]
        t6.children=[]
    elif which_tab=='t4': # Tab 4 is clicked 
        t5.children=[]
        t6.children=[]
    elif which_tab=='t5': # Tab 5 is clicked 
        t5.children=[]
        t6.children=[]
    elif which_tab=='t6': # Tab 6 is clicked 
        t5.children=[]
        t6.children=[]


def example_files():
    from IPython.display import HTML

    main_path=os.path.abspath(os.curdir)
    #path=main_path+'/input'
    _, _, files = next(os.walk(main_path))
    files=[f for f in files if 'example' in f]
    file_count = len(files)

    def download(filename):
        data = open(filename, "rb").read()
        b64 = base64.b64encode(data)
        payload = b64.decode()
        href=f"data:text/csv;base64,{payload}"

        downloadbutton=v.Btn(children=["Click Me"],attributes={"download": filename})
        downloadbutton.href=href
        row = v.Row(class_ = 'mx-4',children=[downloadbutton])
        return row
    
    if file_count==1:
        filename=files.pop()
        down_btn=download(filename)

    if file_count>1:
        filename='example_input_data.zip'

        from os.path import basename
        with ZipFile(filename, 'w') as zipObj:
           # Iterate over all the files in directory
            for file in files:
                filePath = os.path.join(main_path, file)
                # Add file to zip
                zipObj.write(filePath, basename(filePath))
        
        down_btn=download(filename)

    #----Header Card---------------------------------------------
    info=v.Alert(text=True, children=["All Done!  🎉"],title="Alert title",type="success", style_="max-width:500px")
    card = v.Card(height=120,width=320, outlined=False, class_="my-4 mx-1",
            children=[v.CardTitle(primary_title=True, children=["🧑🏻‍💻 Download Example Input Data"], 
                                  style_="font-size: 18px;font-weight:normal; margin-bottom: 1px;font-family:'Helvetica Neue', Helvetica, arial, sans-serif;"),
                      down_btn  
                     ])
    display(card)
    

In [3]:
def main():

    # Clear output data
    path=os.path.abspath(os.curdir)
    for f in os.listdir(path):
        if 'cwout' in f or 'example_input_data' in f:
            os.remove(os.path.join(path, f))

    #----App Description Page -----------
    
    introtext=widgets.Output()
    with introtext:
        card_func() # Display main card
        intro=intro_html() # Display intro text
        info=v.Alert(text=True, children=["You can change your answers in this app at any time!"],title="Alert title",type="info",style_="max-width:500px")
        Begin_button=v.Btn(children=['BEGIN'],color='primary',tooltip='Click me')
        row = v.Row(class_ = 'mx-1',children=[Begin_button])
        vbox=widgets.VBox([intro,info,row])
        display(vbox)
    display(introtext)

    # On Click Function - whenever we click the button
    on_click_out_beg=widgets.Output()
    @on_click_out_beg.capture()
    def on_click(widget, event, data):
        on_click_out_beg.clear_output()
        introtext.clear_output()

        file_upload() # Call file uplaod function

    Begin_button.on_event('click',on_click)
    display(on_click_out_beg)

In [4]:
def file_upload():
    global t, t1,t2, t3, t4, t5, t6
    global t2state0,t3state0,t4state0,t5state0,t6state0

    #This output is specifically for tabs and the header card. Gets cleared when the back to intro button is pressed.
    intro_out=widgets.Output()
    with intro_out:
        card_func() # Display main card
        tab_names() # Call tabs
    display(intro_out)
    
    outtext=widgets.Output()
    with outtext:
        printmd('<br><br>')
        printmd('<div style="font-size:20px;">Upload CSV File(s) here </div>')

        # Upload file widget
        myfile = FileInput(label="Upload CSV") 
        display(myfile)

        example_files() # Call example files function
        printmd('<br>')
        
        #Back button
        Back_button=v.Btn(children=['Back to Introduction'],color='primary',class_ = 'mx-1')
        row2 = v.Row(class_ = 'mx-1',children=[Back_button])
        display(row2)
    
    #Update Tab states States - TAB 1 (t.v_model=1=0)
    t1.children = [*t1.children,outtext]

    # Observes changes to file upload widget
    out=widgets.Output()
    @out.capture()
    def on_file_upload(change):
        out.clear_output()
        update_tabs('t1') #clears subsequent tabs

        datafiles = myfile.get_files()
        the_filenames=[d['name'] for d in datafiles if '.csv' in d['name']] #check for csv files

        if the_filenames:
            get_header_widget(datafiles)

        else:
            printmd('<br>')
            info=v.Alert(text=True, children=["Oops, please uplaod CSV files"],title="Alert title",type="warning", style_="max-width:500px")
            display(info)
            printmd('<br>')
            
    myfile.observe(on_file_upload, names='file_info')
    display(out)


    # On Click Function
    on_click_out=widgets.Output()
    @on_click_out.capture()
    def on_click(widget, event, d):
        intro_out.clear_output()
        on_click_out.clear_output()
        outtext.clear_output()
    
        tabs() #Blank state for all tabs
        main() #Call main function
    Back_button.on_event('click', on_click)
    display(on_click_out)
    

In [5]:
def get_header_widget(datafiles):

    global t, t1,t2, t3, t4, t5, t6

    # TAB 2
    t.v_model=1 

    f=0
    metadata_df_list=[]
    df_list=[]
    for file in datafiles:
        f=f+1
         
        # Read the data!
        file['file_obj'].seek(0)
        data = file['file_obj'].read()

        if f==1: #Get the header row from the first file
            str_data=data.decode('utf-8')
            # get individual lines from string output
            lines=[]
            for l in str_data.split('\n'):
                if l:
                    lines.append(l)
            last_line=lines[-1] #will be the last line with data
    
            #Delimiter
            data_file_delimiter = ','
    
            #The max num of columns come from the last line
            max_col_num = len(last_line.split(data_file_delimiter)) + 1
    
            # Any rows with num of cols less than the max are likely metadata rows
            met_count=0
            for l in lines:
                #Count the column count for the current line
                col_count = len(l.split(data_file_delimiter)) + 1
                
                if col_count<max_col_num:
                    met_count=met_count+1
                else:
                    break
    
            header_row=met_count#met_count is 0 if there are no metadata rows
            
            # Get the metadata df to see if there is atually metadata
            df=pd.read_csv(io.BytesIO(data), header=header_row)
            if header_row>0:
                if len(df.columns)>3 or 'Temperature (Celsius)'.casefold() in (name.casefold() for name in list(df.columns)):
                    
                    # Grab all the metadata before the actual data
                    metadata_df=pd.read_csv(io.BytesIO(data),nrows=header_row-1)
                    
            else:
                metadata_df=pd.DataFrame()
        

    # Get the number of rows to remove
    outtext1=widgets.Output()
    with outtext1:
        printmd('<div style="font-size:20px;"<br><br><br>Lets find the column headers and any metadata 🔎')

        info=v.Alert(text=True, children=["All the information above your actual data is referred to as metadata"],
                        title="Alert title",
                        type="info", style_="max-width:700px")
        display(info)
        
        if header_row!=None:
            printmd(f'<div style="font-size: 18px;"<br><br>The header row seems to be row {header_row+1}:<br>')
            display(df.head(0))
    
            printmd('<div style="font-size:18px;"<br><br>If that is incorrect, change it here. Otherwise, click <b>Next</b>.')
        else:
            printmd('<div style="font-size:18px;"<br>Please add your header row below (as shown in your csv viewer)')

        txt=v.TextField(label="Add header row", v_model='29', type='number', style_="max-width:100px") 
        Next_button=v.Btn(children=['Next'],color='primary',tooltip='Click me')

        row0 = v.Row(class_ = 'mx-1',children=[txt])
        row = v.Row(class_ = 'mx-1',children=[Next_button])
        display(row0,row)

    # Update the tab page
    t2.children = [*t2.children,outtext1]
    
    # On Click Function
    on_click_out=widgets.Output()
    @on_click_out.capture()
    def on_click(widget, event, d):
        on_click_out.clear_output()

        update_tabs('t2') #clears subsequent tabs

        # Header row from user
        header_row=int(txt.v_model)-2

        # Check if this is indeed the header row
        # Read the based on the input
        user_header_row=lines[header_row+1]
        #Count the number of columns in this header row
        user_header_row_count=len(user_header_row.split(data_file_delimiter))

        if user_header_row_count<3:
            printmd('<br>')
            warning=v.Alert(text=True, children=["This does not look like a header row. Please check the number you selected."],
                title="Alert title",
                type="warning", style_="max-width:700px")
            display(warning)
            printmd('<br>')
            display(user_header_row)
        else:
            # Call next function
            if metadata_df.empty:
                metadata_vars=[]
                add_extra_vars_widget(header_row, metadata_vars, datafiles)
            else:
                get_met_vars(header_row,datafiles,metadata_df)    
                          
    Next_button.on_event('click', on_click)
    display(on_click_out)


In [6]:
def get_met_vars(header_row,datafiles,metadata_df):
    global t, t1,t2, t3, t4, t5, t6

    #TAB 3
    t.v_model=2
    
    outtext2=widgets.Output()
    with outtext2:
        printmd('<div style="font-size:20px;"<br><br><br>Ok, let\'s add some variables from the metadata to the new data table 🕺🏻')

        info=v.Alert(text=True, children=["The values are to the right of these variables in the CSV file."],
                        title="Alert title",
                        type="info", style_="max-width:700px")
        display(info)

        # Get the metadata variables from first metadata_df in list
        mcols=list(metadata_df.columns)
        
        #generate new col names
        mcols_len=len(mcols)
        newmcol_list=[]
        for i in range(0,mcols_len):
            newmcol=f'col{i}'
            newmcol_list.append(newmcol)

        metadata_df.columns=newmcol_list #(col0, col1 etc)

        #first column with met variables
        col1=newmcol_list[0]
        col1_rows=metadata_df[col1].tolist()
        col1_rows=col1_rows[:-1]
    
        #Clean up the metadata variables exracted from column 1 of the metadta df
        allowed_rows=[]
        col1_rows_cleaned=[]
        for c in col1_rows:
            c=str(c)
            #remove special characters
            c2=re.sub(r"[^a-zA-Z0-9 ]+", '', c)
            c2=c2.strip()
            col1_rows_cleaned.append(c2)

            #typical met variables to extract
            if 'Cast time (UTC)' in c or 'Start latitude' in c or 'Start longitude' in c or 'File name' in c:
                allowed_rows.append(c2)
        
        tags = v.Select(label='Click the dropdown menu below to add variables',items=col1_rows_cleaned,v_model=allowed_rows, multiple=True, style_="max-width:700px")
        info2=v.Alert(text=True, children=["The File name from metadata will be used as the Station"],
                       type="info", style_="max-width:700px")
        display(info2)
        
        Next_button=v.Btn(children=['Next'],color='primary',tooltip='Click me')

        row0 = v.Row(class_ = 'mx-1',children=[tags])
        row = v.Row(class_ = 'mx-1',children=[Next_button])
        display(row0,row)
        
    # Update the tab page
    t3.children=[*t3.children,outtext2 ]

    # On Click Function
    on_click_out=widgets.Output()
    @on_click_out.capture()
    def on_click(widget, event, data):
        on_click_out.clear_output()
        update_tabs('t3') #clears subsequent tabs

        #Get chosen vars
        metadata_vars=tags.v_model
        
        # Call next function
        add_extra_vars_widget( header_row, metadata_vars, datafiles, col1_rows_cleaned,newmcol_list)

    Next_button.on_event('click', on_click)
    display(on_click_out)

In [7]:
def add_extra_vars_widget(header_row, metadata_vars, datafiles, col1_rows_cleaned,newmcol_list):
    global t, t1,t2, t3, t4, t5, t6

    # TAB 4
    t.v_model=3
    
    outtext3=widgets.Output()
    with outtext3:
        printmd('<div style="font-size:20px;"<br><br><br>Are there any new variables that you would like to add as columns to the merged file? 🤔')
        info=v.Alert(text=True, children=["This variable will have the same value throughout the column in the merged file."],
                title="Alert title",
                type="info", style_="max-width:700px")
        display(info)

        rb=v.RadioGroup(v_model='',children=[v.Radio(label="Yes", value='Yes'),v.Radio(label="No", value='No') ])
        row = v.Row(class_ = 'mx-1',children=[rb])
        display(row)

    # Update the tab page
    t4.children=[*t4.children,outtext3]

    out=widgets.Output()
    @out.capture()
    def on_change(change):

        out.clear_output()
        update_tabs('t4') #clears subsequent tabs

        if rb.v_model=='Yes':

            # Call next function
            how_many_vars_widget(header_row, metadata_vars, datafiles, col1_rows_cleaned,newmcol_list)

        elif rb.v_model=='No':

            var_list=[]
            var_values_list=[]
            remove_and_merge(var_list,var_values_list,header_row, metadata_vars, datafiles,col1_rows_cleaned,newmcol_list)

    rb.observe(on_change,names=['v_model'])
    display(out)    

In [8]:
def how_many_vars_widget( header_row, metadata_vars, datafiles, col1_rows_cleaned, newmcol_list):
    global t, t1,t2, t3, t4, t5, t6
    
    outtext4=widgets.Output()
    with outtext4:
        # Ask how many variables to add?
        printmd('<div style="font-size:20px;"<br><br>How many variables would you like to add? 🤓')
        
        # int widget
        int_txt=v.TextField(label=" ", v_model='2', type='number', style_="max-width:100px")
        Next_button=v.Btn(children=['Next'],color='primary',tooltip='Click me')

        row0 = v.Row(class_ = 'mx-1',children=[int_txt])
        row = v.Row(class_ = 'mx-1',children=[Next_button])
        display(row0,row)

    # Update the tab page
    t4.children=[*t4.children,outtext4] 

    # On Click Function
    on_click_out=widgets.Output()
    @on_click_out.capture()
    def on_click(widget, event, data):
        on_click_out.clear_output()

        update_tabs('t4') #clears subsequent tabs
        
        var_num=int(int_txt.v_model)
        enter_vars_widget( var_num, header_row, metadata_vars,datafiles,col1_rows_cleaned, newmcol_list)

    Next_button.on_event('click',on_click)
    display(on_click_out)

In [9]:
def enter_vars_widget( var_num, header_row, metadata_vars, datafiles, col1_rows_cleaned, newmcol_list):
    global t, t1,t2, t3, t4, t5, t6

    outtext5=widgets.Output()
    with outtext5:
        printmd('<div style="font-size:20px;"<br><br>Enter the variable names and their corresponding values:📝')
        #printmd('<div class="alert alert-info" style="width:48%;">These variables will be added to the front of the file in the order they are listed below')
        
        txt_list=[]
        txt_values_list=[]
        value=''
        the_var=''
        for c in range(0, var_num):   
            
            if c==0:
                the_var='Cruise'
                val='WK22'
                
            elif c==1:
                the_var='Type'
                val='C'

            else:
                the_var=None
                val=None
                
            # Widget for getting the variable names
            txt=v.TextField(label="Variable", v_model=the_var, type='text', style_="max-width:200px")
            
            #txt=widgets.Text(placeholder='Variable', value=the_var)
            txt_list.append(txt)

            txt_val=v.TextField(label="Value of variable", v_model=val, type='text', style_="max-width:200px", class_ = 'mx-6')
            #txt_val=widgets.Text(placeholder='Value of variable', value=val)
            txt_values_list.append(txt_val)

            row0 = v.Row(class_ ='mx-1',children=[txt, txt_val])
            display(row0)

        # Widget for Next button
        Next_button=v.Btn(children=['Next'],color='primary',tooltip='Click me')
        row = v.Row(class_ = 'mx-1',children=[Next_button])
        display(row)

    # Update the tab page
    t4.children=[*t4.children,outtext5]

    # On Click Function
    on_click_out=widgets.Output()
    @on_click_out.capture()
    def on_click(widget, event, data):
        on_click_out.clear_output()
        
        update_tabs('t4') #clears subsequent tabs

        # Get the variable entries
        var_list=[]
        var_values_list=[]
        for t, tv in zip(txt_list, txt_values_list):
            var_list.append(t.v_model)
            var_values_list.append(tv.v_model)
            
        # Call next function
        remove_and_merge(var_list,var_values_list,header_row, metadata_vars, datafiles, col1_rows_cleaned, newmcol_list)

    Next_button.on_event('click',on_click)
    display(on_click_out)

In [10]:
def remove_and_merge(var_list,var_values_list,header_row, metadata_vars, datafiles, col1_rows_cleaned, newmcol_list):
    global t, t1,t2, t3, t4, t5, t6

    #TAB 5
    t.v_model=4

    #Progress loader
    info=v.Alert(text=True, children=["Processing files...This might take a few seconds 🙂"],title="Alert title",type="info", style_="max-width:500px", class_='my-5')
    loader=v.ProgressCircular(indeterminate=True,size="50",color="primary", class_='my-7')
    tempstate=t5.children
    
    t5.children=[*t5.children, info, loader]

    download_output_text=widgets.Output()
    with download_output_text:

        df_list_cleaned=[] #add cleaned dfs to this list
        plot_df_list=[] 
        f=0
        for file in datafiles:
            f=f+1
             
            # Read the data!
            file['file_obj'].seek(0)
            data = file['file_obj'].read()

            # Read the actual data
            df=pd.read_csv(io.BytesIO(data), header=header_row+1)

            if header_row>0:
                if len(df.columns)>3 or 'Temperature (Celsius)'.casefold() in (name.casefold() for name in list(df.columns)):
                    
                    # Grab all the metadata before the actual data
                    metadata_df=pd.read_csv(io.BytesIO(data),nrows=header_row-1)
            else:
                metadata_df=pd.DataFrame()
 
            #Display loader
            if len(datafiles)>0:
                if f==1:
                    display(loader)

            if df.empty:
                continue
    
            if not metadata_df.empty:
        
                metadata_df.columns=newmcol_list #(col0, col1 etc)
        
                #first column with met variables
                col1=newmcol_list[0]

                #update metadata dfs with the cleaned variables (metadata_vars will be from this list)
                for i, c1 in zip(metadata_df.index,col1_rows_cleaned):
                    metadata_df.at[i, col1] = c1
                
                # Grab the metadata vars that should be added as columns
                c=-1
                for mvar in metadata_vars:
        
                    r_index=999
                    c=c+1

                    #Get the rows and columns in metadata_df where the string mvar is present (boolean values)
                    ro=metadata_df.apply(lambda row: row.astype(str).str.contains(mvar,regex=False).any(), axis=1)
                    rows_list=ro.tolist()
                    co=metadata_df.apply(lambda column: column.astype(str).str.contains(mvar,regex=False).any(), axis=0)
                    cols_list=co.tolist()
                    
                    # Get the indices for that variable name
                    for rows in rows_list:    
                        if rows:
                            r_index=rows_list.index(rows)
        
                    for cols in cols_list:
                        if cols:
                            c_index=cols_list.index(cols)
        
                    if r_index!=999:                        
                        # Get the value
                        
                        value=metadata_df.iloc[r_index, c_index+1]
                        
                        if 'longitude' in mvar:
                            mvar=  'Longitude [degrees_east]'
                        if 'latitude' in mvar:
                            mvar= 'Latitude [degrees_north]'
                        # If there is a date-time variable:
                        if 'time' in mvar or 'date' in mvar or 'Time' in mvar or 'Date' in mvar:
                            year_str='Year'
                            month_str='Month'
                            day_str='Day'
                            hour_str='Hour'
                            min_str='Minute'
                            sec_str='Second'
                            
                            #If there are no seconds in the format
                            if len(value)==16:
                                value=value+':00'
                            
                            # Convert string to datetime
                            datetime_object = dt.strptime(value, '%Y-%m-%d %H:%M:%S')

                            #odv iso date format
                            iso_date=datetime_object.strftime("%Y-%m-%dT%H:%M:%S")
                            
                            #Convert datetime to individual strings->integers
                            year = int(datetime_object.strftime("%Y"))
                            month = int(datetime_object.strftime("%m"))
                            day = int(datetime_object.strftime("%d"))
                            hour = int(datetime_object.strftime("%H"))
                            minute = int(datetime_object.strftime("%M"))
                            second = int(datetime_object.strftime("%S"))
        
                            # Insert into dataframe
                            df.insert(c, "yyyy-mm-ddThh:mm:ss.sss",iso_date)
                            df.insert(c+1, year_str,year)
                            df.insert(c+2, month_str,month)
                            df.insert(c+3, day_str,day)
                            df.insert(c+4, hour_str,hour)
                            df.insert(c+5, min_str,minute)
                            df.insert(c+6, sec_str,second)
                            
                            c=c+6
                        else:
                            # Insert into dataframe
                            df.insert(c, mvar,value)
                    else:
                        if f==1:
                            printmd('<br>**WARNING**: The variable name **{}** was not found in the rows removed and was therefore not added to the final file.<br>Re-run notebook and enter the correct variable name if you made a mistake.<br><br>'.format(mvar))

            # Get any additional vars that should be added to columns 
            c=-1
            for var,val in zip(var_list, var_values_list):   
                c=c+1
                if var != '':
                    df.insert(c,var,val)
                   
            # To omit certain varibales and rearrange--------------------------------------------------------------------
            all_cols=df.columns
            for col in all_cols:
                if "File name" in col:
                    column_to_move = df.pop(col)
                    # insert column with insert(location, column_name, column_value)
                    df.insert(0, "Station", column_to_move)
    
                if 'Specific conductance' in col:
                    df = df.drop(col, axis=1)
                if 'Sound velocity' in col:
                    df = df.drop(col, axis=1)
                if 'Density' in col:
                    df = df.drop(col, axis=1)

            #Make a copy of the df that has the filename as a column (for plotting in case the user removes file name)
            plot_df=df.copy()
            plot_df=plot_df.assign(File=file['name'])
            #plot_df['File']=file['name']
            plot_df_list.append(plot_df)
            
            # ----------------------------------------------------------------------------------------------
            # Add data frames to a list
            df_list_cleaned.append(df)
            plot_df_list.append(plot_df)

       
        # Concatenate the list of data frames
        final_df=pd.concat(df_list_cleaned)  
        final_plot_df=pd.concat(plot_df_list)

        # Save as csv
        csvname='output_cwout.csv'
        csvname2='output_file.csv'
        final_df.to_csv(csvname, index=False)
        final_plot_df.to_csv(csvname2, index=False)
        loader.class_ = 'd-none' # disapear

    t5.children=tempstate
    t5.children=[*t5.children, download_output_text]

    #Call download function
    path=os.path.abspath(os.curdir)
    download_output(path, final_df, final_plot_df)

In [11]:
def download_output(path, final_df, final_plot_df):
    from IPython.display import HTML

    global t, t1,t2, t3, t4, t5, t6
    
    printmd('<br><br>')
    _, _, files = next(os.walk(path))
    files=[f for f in files if '_cwout' in f]
    file_count = len(files)

    def download(filename):
        data = open(filename, "rb").read()
        b64 = base64.b64encode(data)
        payload = b64.decode()
        href=f"data:text/csv;base64,{payload}"

        downloadbutton=v.Btn(children=["Click Me"],attributes={"download": filename})
        downloadbutton.href=href
        row = v.Row(class_ = 'mx-4',children=[downloadbutton])
        return row
    
    if file_count==1:
        filename=files.pop()
        down_btn=download(filename)

    if file_count>1:
        filename='output_data.zip'

        from os.path import basename
        with ZipFile(filename, 'w') as zipObj:
           # Iterate over all the files in directory
            for file in files:
                filePath = os.path.join(path, file)
                # Add file to zip
                zipObj.write(filePath, basename(filePath))
        
        down_btn=download(filename)

    #----Header Card---------------------------------------------
    info=v.Alert(text=True, children=["All Done!  🎉"],title="Alert title",type="success", style_="max-width:500px")
    card = v.Card(height=220,width=500, outlined=False, class_="my-4 mx-1",
            children=[info,
                      v.CardTitle(primary_title=True, children=["Download Output"], 
                                  style_="font-size: 24px;font-weight:normal; margin-bottom: 30px;font-family:'Helvetica Neue', Helvetica, arial, sans-serif;"),
                      down_btn  
                     ])

    info2=v.Alert(text=True, children=["Go to the Data Dashbord tab to create some plots!"],title="Alert title",type="info", style_="max-width:500px")
    space=v.Html(tag='div',style_='padding-top:60px; padding-bottom: 20px',children=[])
    t5.children=[*t5.children, space, card, info2]

    get_vars(final_df, final_plot_df)
    

In [12]:
def get_vars(final_df, final_plot_df):
    global t6

    cols=list(final_df.columns)
    head1=v.Html(tag='div',style_='font-size:20px; padding-top: 40px; padding-bottom: 20px',
                children=['Let\'s plot some data! 🕺'])

    head2=v.Html(tag='div',style_='font-size:16px; padding-top: 40px; padding-bottom: 20px',
                children=['Select the Depth column '])

    depth_col=[c for c in cols if 'depth' in c or 'Depth' in c or 'DEPTH' in c]
    select_depth = v.Select(label='Depth',items=cols,v_model=depth_col[0], multiple=False, style_="max-width:300px")

    head3=v.Html(tag='div',style_='font-size:16px; padding-top: 40px;padding-bottom: 20px',
                children=['Select Variable to plot with depth '])
    temp_var=[c for c in cols if 'temp' in c or 'Temp' in c or 'TEMP' in c]
    if temp_var:
        select_var1 = v.Select(label='Variable',items=cols,v_model=temp_var[0], multiple=False, style_="max-width:300px")
    else:
        select_var1 = v.Select(label='Variable',items=cols,v_model=None, multiple=False, style_="max-width:300px")

    # head4=v.Html(tag='div',style_='font-size:16px; padding-top: 40px;padding-bottom: 20px',
    #             children=['Select Variable 2 to plot with depth '])

    # sal_var=[c for c in cols if 'sal' in c or 'Sal' in c or 'SAL' in c]
    # if sal_var:
    #     select_var2 = v.Select(label='Variable 2',items=cols,v_model=sal_var[0], multiple=False, style_="max-width:300px")
    # else:
    #     select_var2 = v.Select(label='Variable 2',items=cols,v_model=None, multiple=False, style_="max-width:300px")

    #Plot btton
    Plot_button=v.Btn(children=['Plot'],color='primary',tooltip='Click me', class_='mx-1')

    col_html1=v.Html(tag='div', class_='d-flex flex-column mr-5', children=[head2, select_depth], style_='padding-right: 30px')
    col_html2=v.Html(tag='div', class_='d-flex flex-column mr-5', children=[head3, select_var1], style_='padding-right: 30px')
    #col_html3=v.Html(tag='div', class_='d-flex flex-column mr-2', children=[head4, select_var2],style_='padding-right: 30px')
    box1=v.Html(tag='div', class_='d-flex flex-row', children=[col_html1, col_html2], style_='padding-bottom: 10px')

    t6.children=[*t6.children, head1, box1,Plot_button ]

    #Update tabs
    t6.children=[*t6.children]
    tab6_state1=t6.children

    # On Click Function
    on_click_out=widgets.Output()
    @on_click_out.capture()
    def on_click(widget, event, data):
        on_click_out.clear_output()
        t6.children=tab6_state1

        depth=select_depth.v_model
        var1=select_var1.v_model
        #var2=select_var2.v_model

        if var1 == None and var2==None:
            info=v.Alert(text=True, children=["You did not choose a variable to plot"],title="Alert title",type="error", style_="max-width:700px;", class_='my-10')
            t6.children=[*t6.children, info]
        else:
            plot(final_df, depth, var1, final_plot_df)

    Plot_button.on_event('click',on_click)
    display(on_click_out)

In [13]:
def  plot(final_df, depth, var1, final_plot_df):
    global t6
    import plotly.graph_objects as go
    from plotly.subplots import make_subplots
    from plotly.graph_objs.layout import YAxis,XAxis,Margin


    loader=v.ProgressCircular(indeterminate=True,size="50",color="primary", class_='my-9', style_='padding-top:60px; padding-bottom: 20px')
    space=v.Html(tag='div',style_='padding-top: 10px; padding-bottom: 20px',children=[])
    tempstate= t6.children
    t6.children = [*t6.children, space, loader]

    plot_out=widgets.Output()
    with plot_out:

        if 'Station' in final_df.columns:  
            final_df = final_df.sort_values(by=depth, ascending=True)
            fig = px.scatter(final_df, x=var1, y=depth, color='Station')
            fig.update_layout(legend_title='Stations')
        else:
            #Use the file names as colours instead
            final_plot_df = final_plot_df.sort_values(by=depth, ascending=True)
            fig = px.scatter(final_plot_df, x=var1, y=depth, color='File')
            fig.update_layout(legend_title=var1)

        fig.update_layout(yaxis_title='Depth', height=500, xaxis_title=var1,legend=dict(itemsizing='constant'),plot_bgcolor='ghostwhite',
                          yaxis = dict(autorange="reversed",tickfont =dict(size=14)),
                          xaxis = dict(tickfont =dict(size=14)))
        fig.update_traces(mode="markers+lines", marker={'size': 5})
        fig.update_xaxes(showline=True, linewidth=0.7, linecolor='LightGrey', mirror=True, showgrid=True, gridwidth=0.7, gridcolor='LightGrey')
        fig.update_yaxes(showline=True, linewidth=0.7, linecolor='LightGrey', mirror=True, showgrid=True, gridwidth=0.7, gridcolor='LightGrey')
                
        printmd('<br><br>')

        # Create figure with secondary y-axis
        # layout = go.Layout(
        #     xaxis=XAxis(title=var1),
        #     xaxis2 = XAxis(title=var2,overlaying= 'x',side= 'top'),
        #     yaxis=dict(title="Depth"),
        #     )
        # fig = go.Figure(layout=layout)
        
        # # Add traces
        # fig.add_trace(
        #     go.Scatter(x=list(final_df[var1]), y=list(final_df[depth]), name=var1)
        # )
    
        # # Add traces
        # fig.add_trace(
        #     go.Scatter(x=list(final_df[var2]), y=list(final_df[depth]), name=var2, xaxis='x2')
        # )
    
        # fig.update_layout(height=550,legend=dict(itemsizing='constant'),
        #                   yaxis = dict(autorange="reversed",tickfont =dict(size=14)),
        #                   xaxis = dict(tickfont =dict(size=14)),
        #                     xaxis2 = dict(tickfont =dict(size=14)))
        # fig.update_traces(mode="markers", marker={'size': 5})
        # printmd('<br><br>')

        fig.show()

    t6.children=tempstate
    t6.children=[*t6.children, plot_out]

In [14]:
main()
printmd('<br><br><br><br><br><br>')

Output()

Output()

<br><br><br><br><br><br>