### SODA for SPARC reports 2022 Update

In [None]:
!pip3 install oauth2client
!pip3 install google-api-python-client
!pip3 install ipywidgets
!pip3 install plotly
!pip3 install tqdm -U
!pip3 install pandas --user

### Pre-requisite Installations if Needed

In [1]:
import argparse
import pandas as pd
import json
import os
import ipywidgets as widgets
from pathlib import Path
from datetime import datetime, timedelta
from dateutil.relativedelta import relativedelta
import calendar

import uuid

from tqdm.notebook import tqdm, trange

import plotly
import plotly.graph_objs as go
import plotly.express as px
import plotly.io as pio

from apiclient.discovery import build
from oauth2client.service_account import ServiceAccountCredentials

import httplib2
from oauth2client import client
from oauth2client import file
from oauth2client import tools
from helper_functions import initialize_analyticsreporting, get_report, print_response, VIEW_ID, next_date_interval, progress_bar_counter

pio.renderers.default = "iframe"

analytics = initialize_analyticsreporting()

### --------------------------------------------------------------------------------------------------------------------------------------------

#### The cell below will render the widgets needed to select the items in the graph. This cell only needs to be run ONCE (to show the widgets only). After display, you don't have to run this cell. The report/graph will include the end date.

### --------------------------------------------------------------------------------------------------------------------------------------------

In [2]:
feature = widgets.Dropdown(
    options=[
        'New/Returning Users',
        'App Launched - OS', 
        'App Launched - SODA', 
        'Manage Datasets - Create a new dataset',
        'Manage Datasets - Rename an existing dataset',
        'Manage Datasets - Make PI owner of dataset',
        'Manage Datasets - Add/Edit Permissions',
        'Manage Datasets - Add/Edit Permissions - Add User Permissions',
        'Manage Datasets - Add/Edit Permissions - Add Team Permissions',
        "Manage Datasets - Add/Edit Subtitle",
        "Manage Datasets - Add/Edit Subtitle - Get Subtitle",
        "Manage Datasets - Add/Edit Readme",
        "Manage Datasets - Add/Edit Readme - Get Readme",
        "Manage Datasets - Add/Edit Readme - Parse Readme",  
        "Manage Datasets - Upload a Banner Image",
        "Manage Datasets - Upload a Banner Image - Size",
        "Manage Datasets - Upload a Banner Image - Importing Banner Image",
        "Manage Datasets - Upload a Banner Image - Get Banner Image",
        "Manage Datasets - Add/Edit Tags",
        "Manage Datasets - Add/Edit Tags - Get Tags",
        "Manage Datasets - Assign a License",
        "Manage Datasets - Assign a License - Get License",
        "Manage Datasets - Upload Local Dataset",
        "Manage Datasets - Upload Local Dataset - size",
        "Manage Datasets - Upload Local Dataset - name - size",
        "Manage Datasets - Upload Local Dataset - Number of Folders",
        "Manage Datasets - Upload Local Dataset - name - Number of Folders",
        "Manage Datasets - Upload Local Dataset - Number of Files",
        "Manage Datasets - Upload Local Dataset - name - Number of Files",
        "Manage Datasets - Change Dataset Status",
        "Manage Datasets - Change Dataset Status - Get Dataset Status"
        
        
        
        'Prepare Metadata - Add Airtable account',
        'Prepare Metadata - Add DDD',
        'Prepare Metadata - Create Submission',
        'Prepare Metadata - Create dataset_description',
        'Prepare Metadata - samples',
        'Prepare Metadata - samples - Generate',
        'Prepare Metadata - samples - Generate - Local',
        'Prepare Metadata - samples - Generate - Pennsieve',
        'Prepare Metadata - samples - Existing',
        'Prepare Metadata - samples - Existing - Local',
        'Prepare Metadata - samples - Existing - Pennsieve',
        'Prepare Metadata - submission',
        'Prepare Metadata - submission - Generate',
        'Prepare Metadata - submission - Generate - Local',
        'Prepare Metadata - submission - Generate - Pennsieve',
        'Prepare Metadata - submission - Existing',
        'Prepare Metadata - submission - Existing - Local',
        'Prepare Metadata - submission - Existing - Pennsieve',
        'Prepare Metadata - dataset_description',
        'Prepare Metadata - dataset_description - Generate',
        'Prepare Metadata - dataset_description - Generate - Local',
        'Prepare Metadata - dataset_description - Generate - Pennsieve',
        'Prepare Metadata - dataset_description - Existing',
        'Prepare Metadata - dataset_description - Existing - Local',
        'Prepare Metadata - dataset_description - Existing - Pennsieve',
        'Prepare Metadata - subjects',
        'Prepare Metadata - subjects - Generate',
        'Prepare Metadata - subjects - Generate - Local',
        'Prepare Metadata - subjects - Generate - Pennsieve',
        'Prepare Metadata - subjects - Existing',
        'Prepare Metadata - subjects - Existing - Local',
        'Prepare Metadata - subjects - Existing - Pennsieve',
        'Prepare Metadata - readme',
        'Prepare Metadata - readme - Generate',
        'Prepare Metadata - readme - Generate - Local',
        'Prepare Metadata - readme - Generate - Pennsieve',
        'Prepare Metadata - readme - Existing',
        'Prepare Metadata - readme - Existing - Local',
        'Prepare Metadata - readme - Existing - Pennsieve',
        'Prepare Metadata - changes',
        'Prepare Metadata - changes - Generate',
        'Prepare Metadata - changes - Generate - Local',
        'Prepare Metadata - changes - Generate - Pennsieve',
        'Prepare Metadata - changes - Existing',
        'Prepare Metadata - changes - Existing - Local',
        'Prepare Metadata - changes - Existing - Pennsieve',
        'Prepare Metadata - manifest',
        'Prepare Metadata - manifest - Generate',
        'Prepare Metadata - manifest - Generate - Local',
        'Prepare Metadata - manifest - Generate - Pennsieve',
        'Prepare Metadata - manifest - Existing',
        'Prepare Metadata - manifest - Existing - Local',
        'Prepare Metadata - manifest - Existing - Pennsieve',
        
        
        'Download Template - manifest.xlsx',
        'Download Template - manifest.xlsx',
        'Download Template - dataset_description.xlsx',
        'Download Template - subjects.xlsx',
        'Download Template - samples.xlsx',
        'Download Template - submission.xlsx',
        
        
        'Prepare Datasets - Organize dataset',
        'Prepare Datasets - Organize dataset - Existing',
        'Prepare Datasets - Organize dataset - Existing - Pennsieve',
        'Prepare Datasets - Organize dataset - Existing - Local',
        'Prepare Datasets - Organize dataset - Existing - Saved',
        
        
        'Prepare Datasets - Organize dataset - Step 3',
        'Prepare Datasets - Organize dataset - Step 3 - Import',
        'Prepare Datasets - Organize dataset - Step 3 - Import - File',
        'Prepare Datasets - Organize dataset - Step 3 - Import - File - Local',
        'Prepare Datasets - Organize dataset - Step 3 - Import - File - Saved',
        'Prepare Datasets - Organize dataset - Step 3 - Import - File - Pennsieve',
        'Prepare Datasets - Organize dataset - Step 3 - Import - File - New',
        
        'Prepare Datasets - Organize dataset - Step 3 - Import - Folder',
        'Prepare Datasets - Organize dataset - Step 3 - Import - Folder - Local',
        'Prepare Datasets - Organize dataset - Step 3 - Import - Folder - Saved',
        'Prepare Datasets - Organize dataset - Step 3 - Import - Folder - Pennsieve',
        'Prepare Datasets - Organize dataset - Step 3 - Import - Folder - New',
        
        'Prepare Datasets - Organize dataset - Step 3 - Add - Folder',
        'Prepare Datasets - Organize dataset - Step 3 - Add - Folder - Local',
        'Prepare Datasets - Organize dataset - Step 3 - Add - Folder - Saved',
        'Prepare Datasets - Organize dataset - Step 3 - Add - Folder - Pennsieve',
        'Prepare Datasets - Organize dataset - Step 3 - Add - Folder - New',
        
        
        'Prepare Datasets - Organize dataset - Step 4',
        'Prepare Datasets - Organize dataset - Step 4 - Import',
        'Prepare Datasets - Organize dataset - Step 4 - Import - subjects - Local',
        'Prepare Datasets - Organize dataset - Step 4 - Import - subjects - Local - Saved',
        'Prepare Datasets - Organize dataset - Step 4 - Import - subjects - Local - Pennsieve',
        'Prepare Datasets - Organize dataset - Step 4 - Import - subjects - Local - Local',
        'Prepare Datasets - Organize dataset - Step 4 - Import - subjects - Local - New',
        'Prepare Datasets - Organize dataset - Step 4 - Import - subjects - Pennsieve',
        'Prepare Datasets - Organize dataset - Step 4 - Import - subjects - Pennsieve - Saved',
        'Prepare Datasets - Organize dataset - Step 4 - Import - subjects - Pennsieve - Pennsieve',
        'Prepare Datasets - Organize dataset - Step 4 - Import - subjects - Pennsieve - Local',
        'Prepare Datasets - Organize dataset - Step 4 - Import - subjects - Pennsieve - New',
        
        'Prepare Datasets - Organize dataset - Step 4 - Import - samples - Local',
        'Prepare Datasets - Organize dataset - Step 4 - Import - samples - Local - Saved',
        'Prepare Datasets - Organize dataset - Step 4 - Import - samples - Local - Pennsieve',
        'Prepare Datasets - Organize dataset - Step 4 - Import - samples - Local - Local',
        'Prepare Datasets - Organize dataset - Step 4 - Import - samples - Local - New',
        'Prepare Datasets - Organize dataset - Step 4 - Import - samples - Pennsieve',
        'Prepare Datasets - Organize dataset - Step 4 - Import - samples - Pennsieve - Saved',
        'Prepare Datasets - Organize dataset - Step 4 - Import - samples - Pennsieve - Pennsieve',
        'Prepare Datasets - Organize dataset - Step 4 - Import - samples - Pennsieve - Local',
        'Prepare Datasets - Organize dataset - Step 4 - Import - samples - Pennsieve - New',
        
        
        'Prepare Datasets - Organize dataset - Step 4 - Import - submission - Local',
        'Prepare Datasets - Organize dataset - Step 4 - Import - submission - Local - Saved',
        'Prepare Datasets - Organize dataset - Step 4 - Import - submission - Local - Pennsieve',
        'Prepare Datasets - Organize dataset - Step 4 - Import - submission - Local - Local',
        'Prepare Datasets - Organize dataset - Step 4 - Import - submission - Local - New',
        'Prepare Datasets - Organize dataset - Step 4 - Import - submission - Pennsieve',
        'Prepare Datasets - Organize dataset - Step 4 - Import - submission - Pennsieve - Saved',
        'Prepare Datasets - Organize dataset - Step 4 - Import - submission - Pennsieve - Pennsieve',
        'Prepare Datasets - Organize dataset - Step 4 - Import - submission - Pennsieve - Local',
        'Prepare Datasets - Organize dataset - Step 4 - Import - submission - Pennsieve - New',
        
        'Prepare Datasets - Organize dataset - Step 4 - Import - dataset_description - Local',
        'Prepare Datasets - Organize dataset - Step 4 - Import - dataset_description - Local - Saved',
        'Prepare Datasets - Organize dataset - Step 4 - Import - dataset_description - Local - Pennsieve',
        'Prepare Datasets - Organize dataset - Step 4 - Import - dataset_description - Local - Local',
        'Prepare Datasets - Organize dataset - Step 4 - Import - dataset_description - Local - New',
        'Prepare Datasets - Organize dataset - Step 4 - Import - dataset_description - Pennsieve',
        'Prepare Datasets - Organize dataset - Step 4 - Import - dataset_description - Pennsieve - Saved',
        'Prepare Datasets - Organize dataset - Step 4 - Import - dataset_description - Pennsieve - Pennsieve',
        'Prepare Datasets - Organize dataset - Step 4 - Import - dataset_description - Pennsieve - Local',
        'Prepare Datasets - Organize dataset - Step 4 - Import - dataset_description - Pennsieve - New',
        
        'Prepare Datasets - Organize dataset - Step 4 - Import - README - Local',
        'Prepare Datasets - Organize dataset - Step 4 - Import - README - Local - Saved',
        'Prepare Datasets - Organize dataset - Step 4 - Import - README - Local - Pennsieve',
        'Prepare Datasets - Organize dataset - Step 4 - Import - README - Local - Local',
        'Prepare Datasets - Organize dataset - Step 4 - Import - README - Local - New',
        'Prepare Datasets - Organize dataset - Step 4 - Import - README - Pennsieve',
        'Prepare Datasets - Organize dataset - Step 4 - Import - README - Pennsieve - Saved',
        'Prepare Datasets - Organize dataset - Step 4 - Import - README - Pennsieve - Pennsieve',
        'Prepare Datasets - Organize dataset - Step 4 - Import - README - Pennsieve - Local',
        'Prepare Datasets - Organize dataset - Step 4 - Import - README - Pennsieve - New',
        
        'Prepare Datasets - Organize dataset - Step 4 - Import - CHANGES - Local',
        'Prepare Datasets - Organize dataset - Step 4 - Import - CHANGES - Local - Saved',
        'Prepare Datasets - Organize dataset - Step 4 - Import - CHANGES - Local - Pennsieve',
        'Prepare Datasets - Organize dataset - Step 4 - Import - CHANGES - Local - Local',
        'Prepare Datasets - Organize dataset - Step 4 - Import - CHANGES - Local - New',
        'Prepare Datasets - Organize dataset - Step 4 - Import - CHANGES - Pennsieve',
        'Prepare Datasets - Organize dataset - Step 4 - Import - CHANGES - Pennsieve - Saved',
        'Prepare Datasets - Organize dataset - Step 4 - Import - CHANGES - Pennsieve - Pennsieve',
        'Prepare Datasets - Organize dataset - Step 4 - Import - CHANGES - Pennsieve - Local',
        'Prepare Datasets - Organize dataset - Step 4 - Import - CHANGES - Pennsieve - New',
        'Prepare Datasets - Organize dataset - Step 4 - Import - CHANGES - Pennsieve',
        
        
        'Prepare Datasets - Organize dataset - Step 7',
        'Prepare Datasets - Organize dataset - Step 7 - Generate',
        'Prepare Datasets - Organize dataset - Step 7 - Generate - Dataset',
        'Prepare Datasets - Organize dataset - Step 7 - Generate - Dataset - Pennsieve',
        'Prepare Datasets - Organize dataset - Step 7 - Generate - Dataset - Pennsieve - Local',
        'Prepare Datasets - Organize dataset - Step 7 - Generate - Dataset - Pennsieve - Saved',
        'Prepare Datasets - Organize dataset - Step 7 - Generate - Dataset - Pennsieve - New',
        'Prepare Datasets - Organize dataset - Step 7 - Generate - Dataset - Pennsieve - Pennsieve',
        
        'Prepare Datasets - Organize dataset - Step 7 - Generate - Dataset - Local',
        'Prepare Datasets - Organize dataset - Step 7 - Generate - Dataset - Local - Local',
        'Prepare Datasets - Organize dataset - Step 7 - Generate - Dataset - Local - Saved',
        'Prepare Datasets - Organize dataset - Step 7 - Generate - Dataset - Local - New',
        'Prepare Datasets - Organize dataset - Step 7 - Generate - Dataset - Local - Pennsieve',
        
        'Prepare Datasets - Organize dataset - Step 7 - Generate - Dataset - Size',
        'Prepare Datasets - Organize dataset - Step 7 - Generate - Dataset - Pennsieve - Size',
        'Prepare Datasets - Organize dataset - Step 7 - Generate - Dataset - Local - Size',
        'Prepare Datasets - Organize dataset - Step 7 - Generate - Dataset - Number of Files',
        'Prepare Datasets - Organize dataset - Step 7 - Generate - Dataset - Pennsieve - Number of Files',
        'Prepare Datasets - Organize dataset - Step 7 - Generate - Dataset - Local - Number of Files',
        'Prepare Datasets - Organize dataset - Step 7 - Generate - Manifest',
        'Prepare Datasets - Organize dataset - Step 7 - Generate - Manifest - Pennsieve',
        'Prepare Datasets - Organize dataset - Step 7 - Generate - Manifest - Local',
        
        
        'Prepare Datasets - Organize dataset - Step 7 - Generate - Dataset - Pennsieve - Create a duplicate',
        'Prepare Datasets - Organize dataset - Step 7 - Generate - Dataset - Pennsieve - Replace',
        'Prepare Datasets - Organize dataset - Step 7 - Generate - Dataset - Pennsieve - Merge',
        'Prepare Datasets - Organize dataset - Step 7 - Generate - Dataset - Pennsieve - Skip',
        
        'Disseminate Datasets - Show current dataset permission',
        'Disseminate Datasets - Show current dataset status',
        'Disseminate Datasets - Pre-publishing Review - Integrate ORCID iD',
        'Disseminate Datasets - Pre-publishing Review - Get Excluded Files',
        'Disseminate Datasets - Pre-publishing Review - Get Metadata Files',
        'Disseminate Datasets - Pre-publishing Review - Update excluded files',
        'Disseminate Datasets - Pre-publishing Review - Publish',
        'Disseminate Datasets - Pre-publishing Review - Submit dataset',
        'Disseminate Datasets - Pre-publishing Review - Withdraw dataset',
        'Disseminate Datasets - Pre-publishing Review - Fetch Pre-publishing Checklist Statuses',
        "Disseminate Datasets - Pre-publishing Review - Determine User's Dataset Role",
        "Disseminate Datasets - Pre-publishing Review - Show publishing status"
        
        'Disseminate Datasets - Share with Curation Team',
        "Disseminate Datasets - Share with Curation Team - Remove Consortium's Team Permissions",
        "Disseminate Datasets - Share with Curation Team - Give Consortium Team Permissions",
        "Disseminate Datasets - Share with Curation Team - Change Dataset Status to Work In Progress",
        "Disseminate Datasets - Share with Curation Team - Change Dataset Status to Ready for Curation",
        
        'Disseminate Datasets - Share with Consortium',
        'Disseminate Datasets - Share with Consortium - Removed Team Permissions SPARC Consortium',
        'Disseminate Datasets - Share with Consortium - Add Team Permissions SPARC Consortium',
        'Disseminate Datasets - Share with Consortium - Curated & Awaiting PI Approval',
        'Disseminate Datasets - Share with Consortium - Change Dataset Status to Under Embargo'
        'Disseminate Datasets - Pre-publishing Review',
        
        'Guided Mode - Generate - Dataset',
        'Guided Mode - Generate - Dataset - Size',
        'Guided Mode - Generate - Dataset - Number of Files'
        ],
    value='New/Returning Users',
    description='Option:',
    disabled=False,
)

start_date = widgets.DatePicker(description='Start Date:', disabled=False)
end_date = widgets.DatePicker(description='End Date:', disabled=False)

update_interval = widgets.Dropdown(options=['Daily', 'Weekly', 'Monthly', "No Separation"], description='Update Interval:', disabled=False)

display(feature, start_date, end_date, update_interval)

Dropdown(description='Option:', options=('New/Returning Users', 'App Launched - OS', 'App Launched - SODA', 'M…

DatePicker(value=None, description='Start Date:')

DatePicker(value=None, description='End Date:')

Dropdown(description='Update Interval:', options=('Daily', 'Weekly', 'Monthly', 'No Separation'), value='Daily…

# --------------------------------------------------------------------------------------------------

#### The cell below is a basic function that uses the widgets in the cell above to create a graph. If the widgets are not showing, run the widget cell. You don't have to run it again after selecting a value. Changing the value of the dropdown will dynamically change the value of the variable in the next cell.

# --------------------------------------------------------------------------------------------------

In [3]:
dt = start_date.value
ds = end_date.value

data, new_user_data, returning_user_data = [], [], []
column_headers = []
file_name = ""
bar_counter = 0

if update_interval.value == "Daily":
    bar_counter = progress_bar_counter(dt, ds, "Daily")
    start = end = dt
    column_headers = ['Day', 'Frequency']
    file_name = "daily"
if update_interval.value == "Weekly":
    bar_counter = progress_bar_counter(dt, ds, "Weekly")    
    start = dt - timedelta(days=dt.weekday())
    end = start + timedelta(days=6)
    column_headers = ['Week', 'Frequency']
    file_name = "weekly"
if update_interval.value == "Monthly":
    bar_counter = progress_bar_counter(dt, ds, "Monthly")
    start = end = dt
    end = end.replace(day = calendar.monthrange(start.year, start.month)[1])
    column_headers = ['Month', 'Frequency']
    file_name = "monthly"
if update_interval.value == "No Separation":
    bar_counter = 1
    start = dt
    end = ds
    column_headers = ['Time Period', 'Frequency']
    file_name = "no_Separation"
    
for i in trange(bar_counter):
    if start <= ds:
        if feature.value == "New/Returning Users":
            query = {
                'reportRequests': [
                {
                    'viewId': VIEW_ID,
                    'dateRanges': [{'startDate': start.strftime('%Y-%m-%d'), 'endDate': end.strftime('%Y-%m-%d')}],
                    'metrics': [{'expression': 'ga:users'}],
                    'dimensions': [{'name': 'ga:userType'}]
                }]
            }
        else:
            query = {
                'reportRequests': [
                {
                    'viewId': VIEW_ID,
                    'dateRanges': [{'startDate': start.strftime('%Y-%m-%d'), 'endDate': end.strftime('%Y-%m-%d')}],
                    'metrics': [{'expression': 'ga:totalEvents'}],
                    'dimensions': [{'name': 'ga:eventAction'}]
                }]
            }
            
        
            
        cell_data, new_user_cell_data, returning_user_cell_data = [], [], []
        
        if update_interval.value == "Daily":
            cell_data_date = start.strftime("%d %b, %Y")     
        if update_interval.value == "Weekly" or update_interval.value == "No Separation":
            cell_data_date = start.strftime("%d %b, %Y") + " - " + end.strftime("%d %b, %Y")    
        if update_interval.value == "Monthly":
            cell_data_date = start.strftime("%b %Y")
        
        response = response_rows = []
        response = get_report(analytics, query)
        if "rows" in response["reports"][0]["data"]:
            response_rows = response["reports"][0]["data"]["rows"]
            
        else:
            response_rows = []
            if feature.value == "New/Returning Users":
                new_user_cell_data = [cell_data_date, 0]
                new_user_data.append(new_user_cell_data)
                returning_user_cell_data = [cell_data_date, 0]
                returning_user_data.append(returning_user_cell_data)
            else:
                cell_data = [cell_data_date, 0]
                data.append(cell_data)
        
        if feature.value == "New/Returning Users":
            if response_rows != []:
                new_user = False
                returning_user = False
                for res in response_rows:
                    if (res["dimensions"][0] == "New Visitor"):
                        new_user_cell_data = [cell_data_date, int(res["metrics"][0]["values"][0])]
                        new_user_data.append(new_user_cell_data)
                        new_user = True
                    if (res["dimensions"][0] == "Returning Visitor"):
                        returning_user_cell_data = [cell_data_date, int(res["metrics"][0]["values"][0])]
                        returning_user_data.append(returning_user_cell_data)
                        returning_user = True
                if new_user == False:
                    new_user_cell_data = [cell_data_date, 0]
                    new_user_data.append(new_user_cell_data)
                if returning_user == False:
                    returning_user_cell_data = [cell_data_date, 0]
                    returning_user_data.append(returning_user_cell_data)
        else:
            if response_rows != []:
                response_present = False
                for res in response_rows:
                    if res["dimensions"][0] == feature.value:
                        cell_data = [cell_data_date, int(res["metrics"][0]["values"][0])]
                        data.append(cell_data)
                        response_present = True
                if response_present == False:
                    cell_data = [cell_data_date, 0]
                    data.append(cell_data)
        
        start, end = next_date_interval(start, end, update_interval.value)
        
folder_path = os.path.join("result_csv", "graph_data")
Path(folder_path).mkdir(parents=True, exist_ok=True)

df = new_df = returning_df = None
if feature.value == "New/Returning Users":
    
    new_df = pd.DataFrame(new_user_data, columns = column_headers)
    returning_df = pd.DataFrame(returning_user_data, columns = column_headers)
    
    new_action_column = new_df.iloc[:, 0]
    new_frequency_column = new_df.iloc[:, 1]
    new_x_markers = pd.Series(new_action_column).array
    new_y_markers = pd.Series(new_frequency_column).array
    new_y_markers = new_y_markers.astype(int)
    
    fig = go.Figure()
    fig.add_trace(go.Scatter(x = new_x_markers, y = new_y_markers, mode = 'lines', name = 'New Users'))
    
    ret_action_column = returning_df.iloc[:, 0]
    ret_frequency_column = returning_df.iloc[:, 1]
    ret_x_markers = pd.Series(ret_action_column).array
    ret_y_markers = pd.Series(ret_frequency_column).array
    ret_y_markers = ret_y_markers.astype(int)
    
    fig.add_trace(go.Scatter(x = ret_x_markers, y = ret_y_markers, mode = 'lines', name = 'Returning Users'))
    
    fig.show()
    
else:
    df = pd.DataFrame(data, columns = column_headers)
    result_path = os.path.join(folder_path, file_name + "_graph-" + dt.strftime("%d %b, %Y") + " - " + ds.strftime("%d %b, %Y") + ".csv")
    df.to_csv(result_path, encoding='utf-8', index=False)    
    df.astype({'Frequency': 'int32'}).dtypes
    
    fig = None

    if update_interval.value == "Daily":
        fig = px.line(df, x = "Day", y = "Frequency", render_mode = "auto", labels = {"Day": "Date","Frequency": "Frequency"},
            title = update_interval.value + " Chart for '" + feature.value + "': " + dt.strftime("%d %b, %Y") + " - " + ds.strftime("%d %b, %Y"))
    if update_interval.value == "Weekly":
        fig = px.line(df, x = "Week", y = "Frequency", render_mode = "auto", labels = {"Day": "Week","Frequency": "Frequency"},
            title = update_interval.value + " Chart for '" + feature.value + "': " + dt.strftime("%d %b, %Y") + " - " + ds.strftime("%d %b, %Y"))
    if update_interval.value == "Monthly":
        fig = px.line(df, x = "Month", y = "Frequency", render_mode = "auto", labels = {"Day": "Month","Frequency": "Frequency"},
            title = update_interval.value + " Chart for '" + feature.value + "': " + dt.strftime("%d %b, %Y") + " - " + ds.strftime("%d %b, %Y"))
    if update_interval.value == "No Separation":
        fig = px.scatter(df, x = "Time Period", y = "Frequency", render_mode = "auto", labels = {"Day": "Time Period","Frequency": "Frequency"},
            title = update_interval.value + " Chart for '" + feature.value + "': " + dt.strftime("%d %b, %Y") + " - " + ds.strftime("%d %b, %Y"))
        fig.update_traces(marker={'size': 15})

    fig.show()


  0%|          | 0/9 [00:00<?, ?it/s]

# --------------------------------------------------------------------------------------------------

### Sunburst graph of actions excluding those in the ignore list

In [None]:
# sunburst shows items within the date range selected in the date range UI elements
start = start_date.value
end = end_date.value
category_dict = {
    "Manage Dataset": {},
    "App": {},
    "Disseminate Dataset": {},
    "Generate Dataset": {},
    "Prepare Metadata": {},
    "Other": {}
}


# to exclude items from the Sunburst graphic add their Action names here
ignore_list = ["Establishing Python Connection", 
               "App Launched - OS", 
               "App Launched - SODA",
               "App Restarted",
               "Update Downloaded",
               "Update Requested",
              ]

data = []

query = {
    'reportRequests': [
    {
        'viewId': VIEW_ID,
        'dateRanges': [{'startDate': start.strftime('%Y-%m-%d'), 'endDate': end.strftime('%Y-%m-%d')}],
        'metrics': [{'expression': 'ga:totalEvents'}],
        'dimensions': [{'name': 'ga:eventAction'}]
    }]
}

response = get_report(analytics, query)
response_rows = response["reports"][0]["data"]["rows"]

for res in response_rows:
    response_action = res["dimensions"][0]
    response_value = res["metrics"][0]["values"][0]
    
    if(response_action in ignore_list):
        continue
    
    action_found = False
    for key in category_dict:
        if (response_action.find(key) != -1 ):
            if response_action in category_dict[key]:
                category_dict[key][response_action] += response_value
            else:
                category_dict[key][response_action] = response_value
            action_found = True
            
    if action_found == False:
        if response_action.find("Manifest Files Created") != -1:
            if response_action in category_dict["Generate Dataset"]:
                category_dict["Generate Dataset"][response_action] += response_value
            else:
                category_dict["Generate Dataset"][response_action] = response_value
        elif response_action in category_dict["Other"]:
            category_dict["Other"][response_action] += response_value
        else:
            category_dict["Other"][response_action] = response_value
    
for key in category_dict:
    for action_key in category_dict[key]:
        cell_data = [key, action_key, category_dict[key][action_key]]
        data.append(cell_data)
    
df = pd.DataFrame(data, columns = ["Action", "Subaction", "Total"])
result_path = os.path.join("test.csv")
df.to_csv(result_path, encoding='utf-8', index=False)

fig = px.sunburst(df, path=['Action', 'Subaction'], values='Total',
                  color='Subaction', hover_data=['Total'])
fig.show()

### Set start and end date for all further reports

In [4]:
# Using the dropdown values here
dt = start_date.value
ds = end_date.value

# Date format in 'YYYY-MM-DD'
# You can also use relative dates for simplicity
# start_date = "50daysAgo"
# end_date = "today"
# end_date = "yesterday"
# start_date = "2021-01-23"
# end_date = "2021-04-23"

# Comment this out to use the  regular format dates above 
start = dt.strftime('%Y-%m-%d')
end = ds.strftime('%Y-%m-%d')

### Create dataset ID to dataset name mapping for all datasets (used in some of the reports below run first) 

In [5]:
# stores a Set of datasetId -> name pairs
# IMP: mapping only exists from January 10th, 2022 onward
# Gets populated by the function 'createDatasetIdToNameMapping'
# Datasets being in this Set does not gurantee they have been touched/processed;
# but simply that they have been selected by a SODA user.
idNameMap = {}



# The below function can result in duplicates when: 
# Multiple name changes occur during the same moment in time, where a moment is the Year, Month, Day, Hour, Minute. 
# Why: Google Analytics will sort dataset names alphabetically within a single time frame/moment. If a dataset 
#      changes names twice within that timeframe then the alphabetically greater name will be selected as opposed 
#      to the chronologically greater name. This means another datasetId can also be mapped to that name given the below code
#      should they vacate the name within the same minute it was vacated by another datasetId/user in SODA.
# That said this is unlikely, especially as if the user changes their dataset name after this occurs in a separate moment
# then their mapping will not result in this situation as we take that most recent mapping from Analytics first then stop using
# their datasetId.
def createDatasetIdToNameMapping(start, end):
    query = {
    'reportRequests': [
    {
        'viewId': VIEW_ID,
        'dateRanges': [{'startDate': start, 'endDate': end}],
        'dimensions': [{'name': 'ga:eventCategory'}, {'name': 'ga:eventAction'}, {'name': 'ga:eventLabel'}, 
                       {'name': 'ga:date'}, {'name': 'ga:hour'}, {'name': 'ga:minute'} ],
        'orderBys': [
            {
                 "fieldName": "ga:date", 
                 "sortOrder": "DESCENDING"
            },
            {
                "fieldName": "ga:hour",
                "sortOrder": "DESCENDING"
            },
            {
                "fieldName": "ga:minute",
                "sortOrder": "DESCENDING"
            }
        ]
    }]
    }
    
    visited_ids = {}
    response = get_report(analytics, query)
    response_rows = response["reports"][0]["data"]["rows"]
    
    for res in response_rows:
        
        # check if the res category is "Dataset ID to Dataset Name Map"
        if res["dimensions"][0] == "Dataset ID to Dataset Name Map":
            lb = res["dimensions"][1]
            
            # check if this datasetId has been visited before 
            if lb in visited_ids.keys():
                # if so do not process the id to name
                continue
                
            # mark the id as having been visited 
            visited_ids[lb] = True
                
            # print(res)
            # get the action (the datasetId)
            did = res["dimensions"][1]
            
            # get the label (dataset name)
            dname = res["dimensions"][2]
        
            # assign the action to the label (dataset name) in the idNameMap 
            idNameMap[did] = dname
        
    
 
createDatasetIdToNameMapping(start, end)
print(idNameMap)

{'N:dataset:b0c104b4-f448-4a33-9adb-33704a1890f8': 'Quantification of nerve fiber populations in the external muscle of the human gastric corpus', 'N:dataset:df4c4f1c-14e7-46ed-86d2-c6e04a6f53f3': 'Stimulation of the pig vagus nerve to modulate target effect versus side effect', 'N:dataset:7ed98147-b010-4e6c-992b-bd165c8b3d2b': 'Decoding vagus-nerve activity with carbon nanotube sensors in freely moving rodents', 'N:dataset:c71941ce-40b5-4a41-89aa-3c6d755ab7cf': 'gmps-1', 'N:dataset:7ade515d-22f9-41f8-9d3b-b7569a47600d': '974-files', 'N:dataset:647044fe-6aae-45bd-808e-d169f9fa5230': 'additional-metadata', 'N:dataset:cad39610-dba6-4346-aded-071281fbce37': 'Human SPARC data', 'N:dataset:84403bb7-7f4c-4e40-832a-0e092d5338a8': 'Mapping of vagal sensory nerve populations and their brainstem projections in mice', 'N:dataset:f9c29dbe-a250-41b9-922d-1ade26fc86ed': 'Computational model of heart rate modulation in mice during vagus nerve stimulation', 'N:dataset:51a2b0c3-5314-4f3a-8a48-86ded5f30

### Get a list of all datasets for which the actions below have been done on.

In [6]:
# all of the actions that will be used when considering if a dataset has been touched/processed
# add or remove tracked Actions at will
all_actions = [
        'Manage Datasets - Create a new dataset',
        'Manage Datasets - Rename an existing dataset',
        'Manage Datasets - Make PI owner of dataset',
        'Manage Datasets - Add/Edit Permissions',
        'Manage Datasets - Add/Edit Permissions - Add User Permissions',
        'Manage Datasets - Add/Edit Permissions - Add Team Permissions',
        "Manage Datasets - Add/Edit Subtitle",
        "Manage Datasets - Add/Edit Subtitle - Get Subtitle",
        "Manage Datasets - Add/Edit Readme",
        "Manage Datasets - Add/Edit Readme - Get Readme",
        "Manage Datasets - Add/Edit Readme - Parse Readme",  
        "Manage Datasets - Upload a Banner Image",
        "Manage Datasets - Upload a Banner Image - Size",
        "Manage Datasets - Upload a Banner Image - Importing Banner Image",
        "Manage Datasets - Upload a Banner Image - Get Banner Image",
        "Manage Datasets - Add/Edit Tags",
        "Manage Datasets - Add/Edit Tags - Get Tags",
        "Manage Datasets - Assign a License",
        "Manage Datasets - Assign a License - Get License",
        "Manage Datasets - Upload Local Dataset",
        "Manage Datasets - Upload Local Dataset - size",
        "Manage Datasets - Upload Local Dataset - name - size",
        "Manage Datasets - Upload Local Dataset - Number of Folders",
        "Manage Datasets - Upload Local Dataset - name - Number of Folders",
        "Manage Datasets - Upload Local Dataset - Number of Files",
        "Manage Datasets - Upload Local Dataset - name - Number of Files",
        "Manage Datasets - Change Dataset Status",
        "Manage Datasets - Change Dataset Status - Get Dataset Status"
        
        
        
        'Prepare Metadata - Add Airtable account',
        'Prepare Metadata - Add DDD',
        'Prepare Metadata - Create Submission',
        'Prepare Metadata - Create dataset_description',
        'Prepare Metadata - samples',
        'Prepare Metadata - samples - Generate',
        'Prepare Metadata - samples - Generate - Local',
        'Prepare Metadata - samples - Generate - Pennsieve',
        'Prepare Metadata - samples - Existing',
        'Prepare Metadata - samples - Existing - Local',
        'Prepare Metadata - samples - Existing - Pennsieve',
        'Prepare Metadata - submission',
        'Prepare Metadata - submission - Generate',
        'Prepare Metadata - submission - Generate - Local',
        'Prepare Metadata - submission - Generate - Pennsieve',
        'Prepare Metadata - submission - Existing',
        'Prepare Metadata - submission - Existing - Local',
        'Prepare Metadata - submission - Existing - Pennsieve',
        'Prepare Metadata - dataset_description',
        'Prepare Metadata - dataset_description - Generate',
        'Prepare Metadata - dataset_description - Generate - Local',
        'Prepare Metadata - dataset_description - Generate - Pennsieve',
        'Prepare Metadata - dataset_description - Existing',
        'Prepare Metadata - dataset_description - Existing - Local',
        'Prepare Metadata - dataset_description - Existing - Pennsieve',
        'Prepare Metadata - subjects',
        'Prepare Metadata - subjects - Generate',
        'Prepare Metadata - subjects - Generate - Local',
        'Prepare Metadata - subjects - Generate - Pennsieve',
        'Prepare Metadata - subjects - Existing',
        'Prepare Metadata - subjects - Existing - Local',
        'Prepare Metadata - subjects - Existing - Pennsieve',
        'Prepare Metadata - readme',
        'Prepare Metadata - readme - Generate',
        'Prepare Metadata - readme - Generate - Local',
        'Prepare Metadata - readme - Generate - Pennsieve',
        'Prepare Metadata - readme - Existing',
        'Prepare Metadata - readme - Existing - Local',
        'Prepare Metadata - readme - Existing - Pennsieve',
        'Prepare Metadata - changes',
        'Prepare Metadata - changes - Generate',
        'Prepare Metadata - changes - Generate - Local',
        'Prepare Metadata - changes - Generate - Pennsieve',
        'Prepare Metadata - changes - Existing',
        'Prepare Metadata - changes - Existing - Local',
        'Prepare Metadata - changes - Existing - Pennsieve',
        'Prepare Metadata - manifest',
        'Prepare Metadata - manifest - Generate',
        'Prepare Metadata - manifest - Generate - Local',
        'Prepare Metadata - manifest - Generate - Pennsieve',
        'Prepare Metadata - manifest - Existing',
        'Prepare Metadata - manifest - Existing - Local',
        'Prepare Metadata - manifest - Existing - Pennsieve',
        
        
        'Download Template - manifest.xlsx',
        'Download Template - manifest.xlsx',
        'Download Template - dataset_description.xlsx',
        'Download Template - subjects.xlsx',
        'Download Template - samples.xlsx',
        'Download Template - submission.xlsx',
        
        
        'Prepare Datasets - Organize dataset',
        'Prepare Datasets - Organize dataset - Existing',
        'Prepare Datasets - Organize dataset - Existing - Pennsieve',
        'Prepare Datasets - Organize dataset - Existing - Local',
        'Prepare Datasets - Organize dataset - Existing - Saved',
        
        
        'Prepare Datasets - Organize dataset - Step 3',
        'Prepare Datasets - Organize dataset - Step 3 - Import',
        'Prepare Datasets - Organize dataset - Step 3 - Import - File',
        'Prepare Datasets - Organize dataset - Step 3 - Import - File - Local',
        'Prepare Datasets - Organize dataset - Step 3 - Import - File - Saved',
        'Prepare Datasets - Organize dataset - Step 3 - Import - File - Pennsieve',
        'Prepare Datasets - Organize dataset - Step 3 - Import - File - New',
        
        'Prepare Datasets - Organize dataset - Step 3 - Import - Folder',
        'Prepare Datasets - Organize dataset - Step 3 - Import - Folder - Local',
        'Prepare Datasets - Organize dataset - Step 3 - Import - Folder - Saved',
        'Prepare Datasets - Organize dataset - Step 3 - Import - Folder - Pennsieve',
        'Prepare Datasets - Organize dataset - Step 3 - Import - Folder - New',
        
        'Prepare Datasets - Organize dataset - Step 3 - Add - Folder',
        'Prepare Datasets - Organize dataset - Step 3 - Add - Folder - Local',
        'Prepare Datasets - Organize dataset - Step 3 - Add - Folder - Saved',
        'Prepare Datasets - Organize dataset - Step 3 - Add - Folder - Pennsieve',
        'Prepare Datasets - Organize dataset - Step 3 - Add - Folder - New',
        
        
        'Prepare Datasets - Organize dataset - Step 4',
        'Prepare Datasets - Organize dataset - Step 4 - Import',
        'Prepare Datasets - Organize dataset - Step 4 - Import - subjects - Local',
        'Prepare Datasets - Organize dataset - Step 4 - Import - subjects - Local - Saved',
        'Prepare Datasets - Organize dataset - Step 4 - Import - subjects - Local - Pennsieve',
        'Prepare Datasets - Organize dataset - Step 4 - Import - subjects - Local - Local',
        'Prepare Datasets - Organize dataset - Step 4 - Import - subjects - Local - New',
        'Prepare Datasets - Organize dataset - Step 4 - Import - subjects - Pennsieve',
        'Prepare Datasets - Organize dataset - Step 4 - Import - subjects - Pennsieve - Saved',
        'Prepare Datasets - Organize dataset - Step 4 - Import - subjects - Pennsieve - Pennsieve',
        'Prepare Datasets - Organize dataset - Step 4 - Import - subjects - Pennsieve - Local',
        'Prepare Datasets - Organize dataset - Step 4 - Import - subjects - Pennsieve - New',
        
        'Prepare Datasets - Organize dataset - Step 4 - Import - samples - Local',
        'Prepare Datasets - Organize dataset - Step 4 - Import - samples - Local - Saved',
        'Prepare Datasets - Organize dataset - Step 4 - Import - samples - Local - Pennsieve',
        'Prepare Datasets - Organize dataset - Step 4 - Import - samples - Local - Local',
        'Prepare Datasets - Organize dataset - Step 4 - Import - samples - Local - New',
        'Prepare Datasets - Organize dataset - Step 4 - Import - samples - Pennsieve',
        'Prepare Datasets - Organize dataset - Step 4 - Import - samples - Pennsieve - Saved',
        'Prepare Datasets - Organize dataset - Step 4 - Import - samples - Pennsieve - Pennsieve',
        'Prepare Datasets - Organize dataset - Step 4 - Import - samples - Pennsieve - Local',
        'Prepare Datasets - Organize dataset - Step 4 - Import - samples - Pennsieve - New',
        
        
        'Prepare Datasets - Organize dataset - Step 4 - Import - submission - Local',
        'Prepare Datasets - Organize dataset - Step 4 - Import - submission - Local - Saved',
        'Prepare Datasets - Organize dataset - Step 4 - Import - submission - Local - Pennsieve',
        'Prepare Datasets - Organize dataset - Step 4 - Import - submission - Local - Local',
        'Prepare Datasets - Organize dataset - Step 4 - Import - submission - Local - New',
        'Prepare Datasets - Organize dataset - Step 4 - Import - submission - Pennsieve',
        'Prepare Datasets - Organize dataset - Step 4 - Import - submission - Pennsieve - Saved',
        'Prepare Datasets - Organize dataset - Step 4 - Import - submission - Pennsieve - Pennsieve',
        'Prepare Datasets - Organize dataset - Step 4 - Import - submission - Pennsieve - Local',
        'Prepare Datasets - Organize dataset - Step 4 - Import - submission - Pennsieve - New',
        
        'Prepare Datasets - Organize dataset - Step 4 - Import - dataset_description - Local',
        'Prepare Datasets - Organize dataset - Step 4 - Import - dataset_description - Local - Saved',
        'Prepare Datasets - Organize dataset - Step 4 - Import - dataset_description - Local - Pennsieve',
        'Prepare Datasets - Organize dataset - Step 4 - Import - dataset_description - Local - Local',
        'Prepare Datasets - Organize dataset - Step 4 - Import - dataset_description - Local - New',
        'Prepare Datasets - Organize dataset - Step 4 - Import - dataset_description - Pennsieve',
        'Prepare Datasets - Organize dataset - Step 4 - Import - dataset_description - Pennsieve - Saved',
        'Prepare Datasets - Organize dataset - Step 4 - Import - dataset_description - Pennsieve - Pennsieve',
        'Prepare Datasets - Organize dataset - Step 4 - Import - dataset_description - Pennsieve - Local',
        'Prepare Datasets - Organize dataset - Step 4 - Import - dataset_description - Pennsieve - New',
        
        'Prepare Datasets - Organize dataset - Step 4 - Import - README - Local',
        'Prepare Datasets - Organize dataset - Step 4 - Import - README - Local - Saved',
        'Prepare Datasets - Organize dataset - Step 4 - Import - README - Local - Pennsieve',
        'Prepare Datasets - Organize dataset - Step 4 - Import - README - Local - Local',
        'Prepare Datasets - Organize dataset - Step 4 - Import - README - Local - New',
        'Prepare Datasets - Organize dataset - Step 4 - Import - README - Pennsieve',
        'Prepare Datasets - Organize dataset - Step 4 - Import - README - Pennsieve - Saved',
        'Prepare Datasets - Organize dataset - Step 4 - Import - README - Pennsieve - Pennsieve',
        'Prepare Datasets - Organize dataset - Step 4 - Import - README - Pennsieve - Local',
        'Prepare Datasets - Organize dataset - Step 4 - Import - README - Pennsieve - New',
        
        'Prepare Datasets - Organize dataset - Step 4 - Import - CHANGES - Local',
        'Prepare Datasets - Organize dataset - Step 4 - Import - CHANGES - Local - Saved',
        'Prepare Datasets - Organize dataset - Step 4 - Import - CHANGES - Local - Pennsieve',
        'Prepare Datasets - Organize dataset - Step 4 - Import - CHANGES - Local - Local',
        'Prepare Datasets - Organize dataset - Step 4 - Import - CHANGES - Local - New',
        'Prepare Datasets - Organize dataset - Step 4 - Import - CHANGES - Pennsieve',
        'Prepare Datasets - Organize dataset - Step 4 - Import - CHANGES - Pennsieve - Saved',
        'Prepare Datasets - Organize dataset - Step 4 - Import - CHANGES - Pennsieve - Pennsieve',
        'Prepare Datasets - Organize dataset - Step 4 - Import - CHANGES - Pennsieve - Local',
        'Prepare Datasets - Organize dataset - Step 4 - Import - CHANGES - Pennsieve - New',
        'Prepare Datasets - Organize dataset - Step 4 - Import - CHANGES - Pennsieve',
        
        
        'Prepare Datasets - Organize dataset - Step 7',
        'Prepare Datasets - Organize dataset - Step 7 - Generate',
        'Prepare Datasets - Organize dataset - Step 7 - Generate - Dataset',
        'Prepare Datasets - Organize dataset - Step 7 - Generate - Dataset - Pennsieve',
        'Prepare Datasets - Organize dataset - Step 7 - Generate - Dataset - Pennsieve - Local',
        'Prepare Datasets - Organize dataset - Step 7 - Generate - Dataset - Pennsieve - Saved',
        'Prepare Datasets - Organize dataset - Step 7 - Generate - Dataset - Pennsieve - New',
        'Prepare Datasets - Organize dataset - Step 7 - Generate - Dataset - Pennsieve - Pennsieve',
        
        'Prepare Datasets - Organize dataset - Step 7 - Generate - Dataset - Local',
        'Prepare Datasets - Organize dataset - Step 7 - Generate - Dataset - Local - Local',
        'Prepare Datasets - Organize dataset - Step 7 - Generate - Dataset - Local - Saved',
        'Prepare Datasets - Organize dataset - Step 7 - Generate - Dataset - Local - New',
        'Prepare Datasets - Organize dataset - Step 7 - Generate - Dataset - Local - Pennsieve',
        
        'Prepare Datasets - Organize dataset - Step 7 - Generate - Dataset - Size',
        'Prepare Datasets - Organize dataset - Step 7 - Generate - Dataset - Pennsieve - Size',
        'Prepare Datasets - Organize dataset - Step 7 - Generate - Dataset - Local - Size',
        'Prepare Datasets - Organize dataset - Step 7 - Generate - Dataset - Number of Files',
        'Prepare Datasets - Organize dataset - Step 7 - Generate - Dataset - Pennsieve - Number of Files',
        'Prepare Datasets - Organize dataset - Step 7 - Generate - Dataset - Local - Number of Files',
        'Prepare Datasets - Organize dataset - Step 7 - Generate - Manifest',
        'Prepare Datasets - Organize dataset - Step 7 - Generate - Manifest - Pennsieve',
        'Prepare Datasets - Organize dataset - Step 7 - Generate - Manifest - Local',
        
        
        'Prepare Datasets - Organize dataset - Step 7 - Generate - Dataset - Pennsieve - Create a duplicate',
        'Prepare Datasets - Organize dataset - Step 7 - Generate - Dataset - Pennsieve - Replace',
        'Prepare Datasets - Organize dataset - Step 7 - Generate - Dataset - Pennsieve - Merge',
        'Prepare Datasets - Organize dataset - Step 7 - Generate - Dataset - Pennsieve - Skip',
        
        'Disseminate Datasets - Show current dataset permission',
        'Disseminate Datasets - Show current dataset status',
        'Disseminate Datasets - Pre-publishing Review - Integrate ORCID iD',
        'Disseminate Datasets - Pre-publishing Review - Get Excluded Files',
        'Disseminate Datasets - Pre-publishing Review - Get Metadata Files',
        'Disseminate Datasets - Pre-publishing Review - Update excluded files',
        'Disseminate Datasets - Pre-publishing Review - Publish',
        'Disseminate Datasets - Pre-publishing Review - Submit dataset',
        'Disseminate Datasets - Pre-publishing Review - Withdraw dataset',
        'Disseminate Datasets - Pre-publishing Review - Fetch Pre-publishing Checklist Statuses',
        "Disseminate Datasets - Pre-publishing Review - Determine User's Dataset Role",
        "Disseminate Datasets - Pre-publishing Review - Show publishing status"
        
        'Disseminate Datasets - Share with Curation Team',
        "Disseminate Datasets - Share with Curation Team - Remove Consortium's Team Permissions",
        "Disseminate Datasets - Share with Curation Team - Give Consortium Team Permissions",
        "Disseminate Datasets - Share with Curation Team - Change Dataset Status to Work In Progress",
        "Disseminate Datasets - Share with Curation Team - Change Dataset Status to Ready for Curation",
        
        'Disseminate Datasets - Share with Consortium',
        'Disseminate Datasets - Share with Consortium - Removed Team Permissions SPARC Consortium',
        'Disseminate Datasets - Share with Consortium - Add Team Permissions SPARC Consortium',
        'Disseminate Datasets - Share with Consortium - Curated & Awaiting PI Approval',
        'Disseminate Datasets - Share with Consortium - Change Dataset Status to Under Embargo'
        'Disseminate Datasets - Pre-publishing Review',
        'Guided Mode - Generate - Dataset'
]

In [7]:
def is_valid_uuid(value):
    try:
        uuid.UUID(value)
        return True
    except ValueError:
        return False
    

dataset_list = []

# tracks local datasets that have been processed through SODA
# does not remove duplicates
dataset_list_local = []

processed_dataset_id_to_name_list = {}

def datasets_and_actions_update(start, end):
    query = {
        'reportRequests': [
        {
            'viewId': VIEW_ID,
            'dateRanges': [{'startDate': start, 'endDate': end}],
            'metrics': [{'expression': 'ga:totalEvents'}],
            'dimensions': [{'name': 'ga:eventCategory'}, {'name': 'ga:eventAction'}, {'name': 'ga:eventLabel'}, 
                           {'name': 'ga:Date'}, {'name': 'ga:hour'}, {'name': 'ga:minute'}],
            # important for ensuring we get the latest name attached to a datasetID in the event of a user 
            # renaming their dataset.
            # coupled with marking when a datasetId has been 'visited' we will always get the latest dataset names
            # without including a single dataset's previous dataset name in the final list unless another dataset
            # took that name after it was made available by a name change. This is mostly true except for when 
            # the key collisions (as outlined in the create dataset id to name mapping ) occur.
            'orderBys': [
            {
                 "fieldName": "ga:date", 
                 "sortOrder": "DESCENDING"
            },
            {
                "fieldName": "ga:hour",
                "sortOrder": "DESCENDING"
            },
            {
                "fieldName": "ga:minute",
                "sortOrder": "DESCENDING"
            }
            ]
        }]
    }
    response = get_report(analytics, query)
    response_rows = response["reports"][0]["data"]["rows"]
    data = []
    visited_dataset_id = {}

    for res in response_rows:
      
        # only consider Actions that were successful 
        # if commented out unsuccessful user Actions will also be considered but the 
        # indentation will need to be adjusted
        if res["dimensions"][0] == "Success":
            
            lb = ""
            
            # renaming Actions have irregular formatting so handle parsing here
            if res["dimensions"][1] == "Manage Datasets - Rename an existing dataset":
                # get the label value
                temp_label = res["dimensions"][2]
                # strip the dataset id out of the label
                # last occurrence of ':' - 1 indicates the end of an id
                lb = temp_label[0:temp_label.rfind(":")]
            else:
                # the label is a name or datasetId
                lb = res["dimensions"][2]
            
           
            # check if the label is a datasetId, stored as a UUID on Pennsieve
            is_valid = is_valid_uuid(lb[10:])
            
                
            # if a UUID convert to dataset name 
            if is_valid:
                # get the dataset name
                dsname = idNameMap[lb]
                
                # check if this datasetId has been visited before 
                if lb in visited_dataset_id.keys():
                    # if so do not process the id to name
                    continue
                
                # mark the id as having been visited 
                visited_dataset_id[lb] = True
                
                # add the file to the list of processed id->name pairs
                processed_dataset_id_to_name_list[lb] = dsname
                
                # change the label to the dataset name 
                lb = dsname
                
                
            # check if the Action indicates a Local dataset generated in the Organize Dataset step
            if res["dimensions"][1] == "Prepare Datasets - Organize dataset - Step 7 - Generate - Dataset - Local" and res["dimensions"][2] != "Local":
                # IMP: When a local dataset is created in Organize section we log its name; therefore 
                #      to track those datasets there is a check for ... - Generate - Local to grab the name out of the label
                #      and include it in the dataset list. This is the only kind of 'touched dataset' that will not
                #      have a value in the idNameMap as of January 5th, 2022
                dataset_list_local.append(lb)
            
                    
                
            # check if the label is a dataset name stored in the mapping - if we decide to track local datasets in Prepare 
            # Metadata then we should add them to the dataset_list_local otherwise they will get ignored at this step 
            if lb not in idNameMap.values():
                # any dataset that has been worked on will have been selected; creating a dataset id to name mapping
                # if the current label is not a value in that mapping then it is not a dataset name.
                # One side effect is that any 'saved' datasets in the Organize datasets section will not be
                # considered a touched dataset until they have been finalized by either being generated 
                # locally or on Pennsieve.
                continue
            
            # do not include the dataset name if it is already in the list
            if lb not in dataset_list:
                dataset_list.append(lb)

        
    folder_path = os.path.join("result_csv", "custom")
    Path(folder_path).mkdir(parents=True, exist_ok=True)


    return


datasets_and_actions_update(start, end)

finalized_dataset_list = [*dataset_list, *dataset_list_local]

print(len(finalized_dataset_list), "|||||", finalized_dataset_list)

11 ||||| ['Quantification of nerve fiber populations in the external muscle of the human gastric corpus', 'Stimulation of the pig vagus nerve to modulate target effect versus side effect', 'Decoding vagus-nerve activity with carbon nanotube sensors in freely moving rodents', 'gmps-1', '974-files', 'additional-metadata', 'Human SPARC data', 'Mapping of vagal sensory nerve populations and their brainstem projections in mice', 'Computational model of heart rate modulation in mice during vagus nerve stimulation', 'spgmds-3', '500-1mb']


### View all processed dataset ID-Name pairs available

In [8]:
# mapping/Set of datasetId -> dataset name pairs that have been processed/touched in some way 
# that does not include dataset selection.
# IMP: will not include local datasets as they do not have an ID
# IMP: Run the above Cell to get meaningful results from this Cell
processed_id_name_pairs = []
for key in processed_dataset_id_to_name_list.keys():
    processed_id_name_pairs.append(key + ': ' + processed_dataset_id_to_name_list[key])
print(len(processed_id_name_pairs), "|||||", processed_id_name_pairs)

11 ||||| ['N:dataset:b0c104b4-f448-4a33-9adb-33704a1890f8: Quantification of nerve fiber populations in the external muscle of the human gastric corpus', 'N:dataset:df4c4f1c-14e7-46ed-86d2-c6e04a6f53f3: Stimulation of the pig vagus nerve to modulate target effect versus side effect', 'N:dataset:7ed98147-b010-4e6c-992b-bd165c8b3d2b: Decoding vagus-nerve activity with carbon nanotube sensors in freely moving rodents', 'N:dataset:c71941ce-40b5-4a41-89aa-3c6d755ab7cf: gmps-1', 'N:dataset:7ade515d-22f9-41f8-9d3b-b7569a47600d: 974-files', 'N:dataset:647044fe-6aae-45bd-808e-d169f9fa5230: additional-metadata', 'N:dataset:cad39610-dba6-4346-aded-071281fbce37: Human SPARC data', 'N:dataset:84403bb7-7f4c-4e40-832a-0e092d5338a8: Mapping of vagal sensory nerve populations and their brainstem projections in mice', 'N:dataset:f9c29dbe-a250-41b9-922d-1ade26fc86ed: Computational model of heart rate modulation in mice during vagus nerve stimulation', 'N:dataset:51a2b0c3-5314-4f3a-8a48-86ded5f30ba4: spgm

### Get a report of all unique users within a given time frame

In [None]:
# this is how to get total SODA users within a daterange
query = {
    'reportRequests': [
    {
        'viewId': VIEW_ID,
        'dateRanges': [{'startDate': start, 'endDate': end}],
        'metrics': [{'expression': 'ga:users'}],
        'dimensions': [{'name': 'ga:userType'}]
    }]
}

response = get_report(analytics, query)
response_rows = response["reports"][0]["data"]["rows"]
data = []

for res in response_rows:
    cell_data = [res["dimensions"][0], res["metrics"][0]["values"][0]]
    data.append(cell_data)
    
folder_path = os.path.join("result_csv", "users")
Path(folder_path).mkdir(parents=True, exist_ok=True)
        
df = pd.DataFrame(data, columns = ['Type', 'Values'])
result_path = os.path.join(folder_path, "users-" + start + "_" + end + ".csv")
df.to_csv(result_path, encoding='utf-8', index=False)

### Get a report of all new users within a given time frame

In [None]:
query = {
    'reportRequests': [
    {
        'viewId': VIEW_ID,
        'dateRanges': [{'startDate': start, 'endDate': end}],
        # use the metric for tracking new users within a date range
        'metrics': [{'expression': 'ga:users'}, {'expression': 'ga:newUsers'}],
        'dimensions': [{'name': 'ga:userType',}]
    }]
}

response = get_report(analytics, query)
response_rows = response["reports"][0]["data"]["rows"]
data = []

for res in response_rows:
    # get the new users from the reponses
    if res["dimensions"][0] == "New Visitor":
        cell_data = ["New Users", res["metrics"][0]["values"][1]]
        data.append(cell_data)

# place the list of new users in a csv titled new_users
folder_path = os.path.join("result_csv", "new_users")
Path(folder_path).mkdir(parents=True, exist_ok=True)
        
df = pd.DataFrame(data, columns = ['Type', 'Values'])
result_path = os.path.join(folder_path, "users-" + start + "_" + end + ".csv")
df.to_csv(result_path, encoding='utf-8', index=False)

### Get the number of files and the size of all datasets that was uploaded through SODA for a given time frame 2022 Update

In [None]:
def dataset_statistics(start, end):
            
    query = {
        'reportRequests': [
        {
            'viewId': VIEW_ID,
            'dateRanges': [{'startDate': start, 'endDate': end}],
            'metrics': [{'expression': 'ga:uniqueEvents'}, {'expression': 'ga:eventValue'}, {'expression': 'ga:totalEvents'}],
            'dimensions': [{'name': 'ga:eventCategory'}, {'name': 'ga:eventAction'}, 
                           {'name': 'ga:eventLabel'}],

        }]
    }
    data = []
    response = get_report(analytics, query)
    response_rows = response["reports"][0]["data"]["rows"]
    
    for res in response_rows:
        # do not track Errors; comment out to receive Errors in the report
        if res["dimensions"][0] == "Error":
            continue

        # report the aggregate number of files for all local dataset uploads
        if res["dimensions"][1] == "Manage Datasets - Upload Local Dataset - Number of Files" and res["dimensions"][2] == "Number of files local dataset":
            value = int(res["metrics"][0]["values"][1])
            cell_data = [res["dimensions"][0], value , res["dimensions"][1]]
            data.append(cell_data)
        # report the aggregate size of all local dataset uploads
        if res["dimensions"][1] == "Manage Datasets - Upload Local Dataset - size" and res["dimensions"][2] == "Size":
            value =  int(res["metrics"][0]["values"][1])
            cell_data = [res["dimensions"][0], res["dimensions"][1] , value]
            data.append(cell_data)
            
        
        # report the aggregate number of files for all datasets generated in Organize datasets - both locally and on 
        # Pennsieve
        if res["dimensions"][1] == "Prepare Datasets - Organize dataset - Step 7 - Generate - Dataset - Number of Files" and res["dimensions"][2] == "Number of Files":
            value =  int(res["metrics"][0]["values"][1])
            cell_data = [res["dimensions"][0], value , res["dimensions"][1]]
            data.append(cell_data)
        # report the aggregate size of the files uploaded in Organize datasets - both locally and on Pennsieve
        if res["dimensions"][1] == "Prepare Datasets - Organize dataset - Step 7 - Generate - Dataset - Size" and res["dimensions"][2] == "Size":
            value = int(res["metrics"][0]["values"][1])
            cell_data = [res["dimensions"][0], res["dimensions"][1], value]
            data.append(cell_data)
        # number of manifest files created/processed when generating a dataset in the Organize dataset section
        # occurs when a user wants to create additional manifest files for their dataset upon Generation
        # existing manifest files are automatically included
        if res["dimensions"][1] == "Prepare Datasets - Organize dataset - Step 7 - Generate - Manifest":
            value = int(res["metrics"][0]["values"][1])
            cell_data = [res["dimensions"][0], value, res["dimensions"][1]]
            data.append(cell_data)
            
            
        # get number of banner image files uploaded
        if res["dimensions"][1] == "Manage Datasets - Upload a Banner Image":
            value = res["metrics"][0]["values"][2]
            cell_data = [res["dimensions"][0], value, res["dimensions"][1]]
            data.append(cell_data)
        # aggregate size of uploaded banner image files uploaded through SODA
        if res["dimensions"][1] == "Manage Datasets - Upload a Banner Image - Size" and res["dimensions"][2] == "Size":
            print(value)
            value = int(res["metrics"][0]["values"][1])
            cell_data = [res["dimensions"][0], res["dimensions"][1], value]
            data.append(cell_data)
            
            
        # count amount of metadata files created -- equivalent to the amount of times a generate action was emitted
        if res["dimensions"][1] == "Prepare Metadata - Generate":
            totalEvents = int(res["metrics"][0]["values"][2])
            cell_data = [res["dimensions"][0], totalEvents, res["dimensions"][1]]
            data.append(cell_data)
        # aggregate size of metadata files created through SODA
        if res["dimensions"][1] == "Prepare Metadata - Generate" and res["dimensions"][2] == "Size of Total Metadata Files Generated":
            value = int(res["metrics"][0]["values"][1])
            cell_data = [res["dimensions"][0], res["dimensions"][1], value]
            data.append(cell_data)
            
            
        # count amount of Manifest files created through
        if res["dimensions"][1] == "Prepare Metadata - manifest - Generate - Number of Files" and res["dimensions"][2] == "Number of Files":
            value = int(res["metrics"][0]["values"][1])
            cell_data = [res["dimensions"][0], value, res["dimensions"][1]]
            data.append(cell_data)
    
    folder_path = os.path.join("result_csv", "custom")
    Path(folder_path).mkdir(parents=True, exist_ok=True)

    df = pd.DataFrame(data, columns = ['Status', 'Number of Files', 'Size in (bytes)'])
    result_path = os.path.join(folder_path, "dataset_statistics-update" + start + "_" + end + ".csv")
    df.to_csv(result_path, encoding='utf-8', index=False)
    return

## useful for getting all details for upload to Pennsieve for a specific time period
## all responses go to the custom folder
# num_of_files_folders_in_dataset(start_date, end_date)
dataset_statistics(start, end)