# explore models

In [None]:
model = '''
    connection: "data_warehouse"

    datagroup: sfdc_oppty_data {
      max_cache_age: "24 hours"
      sql_trigger:
        select DATE_PART('day',max(received_at)) || '-' || DATE_PART('hour',max(received_at)) as day_hour from salesforce.opportunities ;;
    }
    datagroup: sfdc_lead_and_contact_data {
      max_cache_age: "24 hours"
      sql_trigger:
        WITH leads_and_contacts as (select received_at from salesforce.leads UNION select received_at from salesforce.contacts )
        select DATE_PART('day',max(received_at)) || '-' || DATE_PART('hour',max(received_at)) as day_hour from leads_and_contacts ;;
    }

    # model-level access grants
    access_grant: can_view_revenue_data {
      user_attribute: access_grant_revenue
      allowed_values: [ "yes" ]
    }

    # include extended views
    include: "sf__*[!.][!z].view.lkml"
    include: "stripe*[!.][!z].view.lkml"
    include: "marketo*[!.][!z].view.lkml"
    include: "reghub_dockeruser.view.lkml"
    include: "ucp_*[!.][!z].view.lkml"
    include: "dtr_*[!.][!z].view.lkml"
    include: "finance_revenue_forecast.view.lkml"

    include: "sf__opportunity_dockercon_renewals.z.view.lkml"
    include: "salesforce/*.view"


    # include just SFDC-related LookML dashboards
    include: "sf__marketing_leadership.dashboard.lookml"

    # Define the Fiscal Year offset
    fiscal_month_offset:  -11 # starts in February

    explore: bizible_attribution_touchpoint {}
    
    #
    # Explore joins SFDC account and lead objects on an engagio determined custom field
    #
    explore: sf__accounts_leads_engagio {
      label: "Accounts (Engagio)"
      # Base view of explore
      from: sf__accounts
      # Hide it from the main list -- still accessible via URL
      hidden: yes
      # Label the view in the Explore UI as Accounts (instead of the long explore name)
      view_label: "Accounts"
      # Add a convenient description to Explore
      description: "Accounts and associated leads as identified by engagio"
      # Ignore deleted accounts
      sql_always_where: NOT ${sf__accounts_leads_engagio.is_deleted}
        ;;

      join: sf__leads {
        view_label: "Leads"
        sql_on: ${sf__accounts_leads_engagio.id} = ${sf__leads.engagio_matched_account} ;;
        # One account will have many leads,
        # and a lead (from engagio's perspective) belongs to a single account
        relationship: one_to_many
      }

join: sf__opportunities {
    from: sf__opportunities_extended
    view_label: "Opportunities"
    sql_on: ${sf__opportunity_ltm_acv.opportunity_id} = ${sf__opportunities.id} ;;
    relationship: many_to_one
    }
  # join only opportunities with close dates matching the LTM ACV report dates (used by the Sales/Exec Dashboards)
    join: sf__accounts {
      sql_table_name: SEGMENT.SALESFORCE.ACCOUNTS ;;
      sql_on: ${sf__opportunities.account_id} = ${sf__accounts.id} ;;
      }
  
#  join: contacts {
#    from: sf__contacts
#    sql_table_name: SEGMENT.SALESFORCE.CONTACTS ;;
#    sql_on: ${contacts.id} = ${opportunity_contacts.contact_id} ;;
#  }
}
'''

In [None]:
model = model.lstrip('\n').rstrip('\n').split('\n')

In [None]:
model

In [None]:
def joining_joins(join_list):
    """
    This function takes in a list of rows in an lkml file, and groups each joins with its own parameters.
    Returns a list of joined tables.
    :param join_list: the preprocessed list version of lkml file.
    :type: join_list list
    :return: stitch_join 
    """
    stitch_join = []
    for num, line in enumerate(join_list):
        
        if 'connection:' == line.lstrip().split(' ')[0] and '#' not in line:
            stitch_join.append(line.lstrip())
        
        elif 'explore:' == line.lstrip().split(' ')[0] and '#' not in line:
            begin = num
            for num2 in range(begin, len(join_list)):

                if any(join_list[num2+1].lstrip().split(':')[0] == x for x in ['join', 'explore']):
                    end = num2
                    stitch_join.append(list(filter(lambda x: '#' not in x, join_list[begin:end])))
                    break
        
        elif 'join:' == line.lstrip().split(' ')[0] and '#' not in line:
            begin = num

            for num2 in range(begin, len(join_list)):
                if '}' == join_list[num2].lstrip()[0]:
                    end = num2 
                    stitch_join.append(list(filter(lambda x: '#' not in x, join_list[begin:end])))
                    break
                    
    return stitch_join


In [None]:
# any(x == join_list[num2].lstrip().split(':')[0] for x in ['join', 'explore'])

In [None]:
clean_model = joining_joins(model)

In [None]:
clean_model

In [None]:
def extract_lineage(clean_model):
    """
    This function extracts explore name and all dependent views with key params.
    :param clean_model: a list of explore details
    :type: clean_model list
    :return model_dict: a dictionary of explore and dependent view detials in a list
    """
    model_dict = {}
    joins = []
    explores = []
    for table in clean_model:
        
        if 'connection: "' in table:
            connection = table.split(': ')[1].lstrip('"').rstrip('"')
        
        table_list = list(filter(lambda x: any(key_word in x for key_word in ['connection:', 'explore:', 'join:', 'from:', 'view_label:', 'sql_table_name:']), table))
        table_joined = ' '.join(table_list)
        table_list_new = list(filter(None, re.split(r'\W', table_joined)))
        print(table_list_new)
        if 'explore' in table_list_new:
            explore = table_list_new[1]
            explores.extend(table_list_new)
#             model_dict[f'{explore}'] = explore
            
        elif 'join' in table_list_new:
            joins.extend(table_list_new)
            model_dict[f'{explore}'] = joins

        model_dict['connection'] = connection
#         model_dict[f'{explore}'] = explores
#         model_dict[f'{explore}']['joins'] = joins
    
    return model_dict


In [None]:
extract_lineage(clean_model)

In [None]:
with open('sfdc.model.lkml', 'r+') as f:
    for num, line in enumerate(f):
        print(num, line)

In [None]:
line_file = [line.rstrip('\n') for line in open('sfdc.model.lkml')]
loc_explore = []
for num, line in enumerate(line_file):
    if 'explore:' == line.split(' ')[0]:
        loc_explore.append(num)
    elif 'connection:' == line.split(' ')[0]:
        conn = line.split(' ')[1]
loc_explore.append(len(line_file)+1)
print(loc_explore)


In [None]:
import json

In [None]:
for i in range(len(loc_explore)-1):
    explore_dict = {}
    explore_dict['conn'] = conn.lstrip('"').rstrip('"')
    explore_dict['explore'] = line_file[loc_explore[i]:loc_explore[i+1]]
    explore_dict['explore_name'] = explore_dict['explore'][0].split(' ')[1]
    explore_json = json.dumps(explore_dict)
    f = open(f"{explore_dict['explore_name']}.json","w")
    f.write(explore_json)
    f.close()

In [None]:
with open('bizible_histories.json', 'r') as f:
    explore = json.load(f)


In [None]:
explore

# testing main

In [1]:
import re
import json
import os
import logging
import time
from model_separator import split_up_model


In [3]:
for model in os.listdir('models'):
    model_folder = model.split('.')[0]
    split_up_model(f'models/{model}', f"explores/{model_folder}")
    logging.info(f'Completed splitting model {model_folder} into explores.')

UnicodeDecodeError: 'utf-8' codec can't decode byte 0x80 in position 3131: invalid start byte

In [4]:
os.listdir('models/')

['.DS_Store', 'sample_model.model.lkml']

In [None]:
split_up_model(f'models/sample_', f"explores/{model_folder}")

# view files

## type A: NDT

## type B: sql_table_name

## type C: raw sql

## type D: (most complicated) view extends from another view within the same file