In [34]:
import os
import sys
import pandas as pd

src_path = os.path.abspath("../src")
sys.path.insert(0, src_path)

%load_ext autoreload
%autoreload 2

from utils import *
from dag_creation import *

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [35]:
filename = "../ingestion.csv"
df = pd.read_csv(filename)
df.head()

Unnamed: 0,icdsTableName,BANNER_NAME,dataSensitivity,OP-Company code,dlSchemaName,dlTableName,tableLoadType,keyPreCombine,keyPrimaryKey
0,IKPF,"MDD,MAK,MSB",se,"SA-MDD,SA-MAK,SA-MSB",sa_mdse_dl_secure,PHYSL_INVT_DOC,INC,ds_load_ts,"clnt,application,cond_type,sales_org,distr_cha..."


## MODULE 1 : DAG PREPARATION

In [36]:
def prepare_dag_configuration(df):
    
    
    tableLoadType = df['tableLoadType'].values[0]  # INC
    dlSchemaName = df['dlSchemaName'].values[0]  # example : sa_mdse_dl_secure
    dlTableName = df['dlTableName'].values[0] #example : PHYSL_INVT_DOC
    dataSensitivity = df['dataSensitivity'].values[0] # SE/NS/HS
    banner_list = df['BANNER_NAME'].tolist()[0].split(',')   # [MDD,MAK,MSB]
    
    table_names = prepare_table_name(banner_list, dlTableName)
    
    cluster_names = prepare_cluster_name(dlSchemaName, banner_list, dlTableName)
    
    sample_dag_file = "../sample_dag.py"
    

    # for each banner, there will be dag_config
    for i, b in enumerate(banner_list) :
        
        dag_config = {
            "sensitivity": dataSensitivity,
            "cluster_name": cluster_names[i],
            "banner_name": b[i],
            "table_name": table_names[i],
            "tags": ["Massmart-eComm","P2","Ephemeral","SA","SECURE","MDSE",f"{b}",f"{banner_list[i]}","SLT"],
            "tableLoadType" :   tableLoadType,
            "output_dir" : f"../output/{dlSchemaName}/{table_names[i]}",
            "dag_name" : f"INTLDLDAT-SA{banner_list[i]}-{tableLoadType}-{dlSchemaName.upper()}-{banner_list[i]}_{dlTableName}"
        }
        
        prepare_dag_file(sample_dag_file, dag_config)
        
    return dag_config
   
prepare_dag_configuration(df)


✓ Created: ../output/sa_mdse_dl_secure/mdd_physl_invt_doc/INTLDLDAT-SAMDD-INC-SA_MDSE_DL_SECURE-MDD_PHYSL_INVT_DOC.py
✓ Created: ../output/sa_mdse_dl_secure/mak_physl_invt_doc/INTLDLDAT-SAMAK-INC-SA_MDSE_DL_SECURE-MAK_PHYSL_INVT_DOC.py
✓ Created: ../output/sa_mdse_dl_secure/msb_physl_invt_doc/INTLDLDAT-SAMSB-INC-SA_MDSE_DL_SECURE-MSB_PHYSL_INVT_DOC.py


{'sensitivity': 'se',
 'cluster_name': 'sa-mdse-dl-secure-msb-physl-invt-doc',
 'banner_name': 'B',
 'table_name': 'msb_physl_invt_doc',
 'tags': ['Massmart-eComm',
  'P2',
  'Ephemeral',
  'SA',
  'SECURE',
  'MDSE',
  'MSB',
  'MSB',
  'SLT'],
 'tableLoadType': 'INC',
 'output_dir': '../output/sa_mdse_dl_secure/msb_physl_invt_doc',
 'dag_name': 'INTLDLDAT-SAMSB-INC-SA_MDSE_DL_SECURE-MSB_PHYSL_INVT_DOC'}