* Project: Ontology Management Platform
* Notebook: Read Data from Trusted layer, apply minor transformation, join with metadata and load final data to Unified Layer.
* Author: Ullas Vashista
* Last Update: 05/01/2024

In [0]:
from datetime import datetime, timedelta
from pyspark.sql.functions import *
from pyspark.sql.types import *
from pyspark.sql.utils import AnalysisException
import re
import configparser

In [0]:
# ------------------- CONFIG -------------------
# Create parser and read config file
config = configparser.ConfigParser()
config.read("../config/config.ini")

# Define path to your Delta table & checkpoint
usage_raw_table           = config.get("TargetStorage", "usage_raw_table")
usage_trusted_table           = config.get("TargetStorage", "usage_trusted_table")
metadata_final_table           = config.get("TargetStorage", "metadata_final_table")
usage_final_table           = config.get("TargetStorage", "usage_final_table")

In [0]:
# Read Delta table in batch mode
usage_raw_df = spark.read.format("delta").load(usage_raw_table)
usage_trusted_df = spark.read.format("delta").load(usage_trusted_table)
metadata_final_df = spark.read.format("delta").load(metadata_final_table)
usage_final_df = spark.read.format("delta").load(usage_final_table)

In [0]:
# Show the data for validation
display(usage_raw_df)

start_date,start_time,id,ontology,activity,TimeGenerated,input_file_path
17/Sep/2025,06:26,np29263,onto_7,API Access,2025-09-17T06:26:13.602Z,uxv_omp_usage-2025-09-17 06:23:53
17/Sep/2025,06:26,np52464,onto_7,Data Query,2025-09-17T06:26:13.602Z,uxv_omp_usage-2025-09-17 06:23:53
17/Sep/2025,06:26,np31168,onto_2,Data Query,2025-09-17T06:26:35.299Z,uxv_omp_usage-2025-09-17 06:23:53
17/Sep/2025,06:26,np31723,onto_6,Data Query,2025-09-17T06:26:35.299Z,uxv_omp_usage-2025-09-17 06:23:53
17/Sep/2025,06:27,np37015,onto_11,API Access,2025-09-17T06:27:21.61Z,uxv_omp_usage-2025-09-17 06:23:53
17/Sep/2025,06:27,np51533,onto_11,Login,2025-09-17T06:27:21.61Z,uxv_omp_usage-2025-09-17 06:23:53
17/Sep/2025,06:26,np95782,onto_2,Data Query,2025-09-17T06:26:29.32Z,uxv_omp_usage-2025-09-17 06:23:53
17/Sep/2025,06:26,np96775,onto_19,API Access,2025-09-17T06:26:29.32Z,uxv_omp_usage-2025-09-17 06:23:53
17/Sep/2025,06:26,np78526,onto_11,API Access,2025-09-17T06:26:07.482Z,uxv_omp_usage-2025-09-17 06:23:53
17/Sep/2025,06:26,np62506,onto_3,Data Query,2025-09-17T06:26:07.482Z,uxv_omp_usage-2025-09-17 06:23:53


In [0]:
# Show the data for validation
display(usage_trusted_df)

start_date,start_time,id,ontology,activity,MetadataLogType,MetadataLogTimeGenerated,MetadataLogDate,TimeGenerated,MetadataLogFileName,TenantId,Type
17/Sep/2025,06:26,np67297,onto_15,Upload,logm.omp.metadata,2025-09-17T06:26:15.411Z,20250917062615411,2025-09-17 06:34:22.741,uxv_omp_usage-2025-09-17 06:23:53,63982aff-fb6c-4c22-973b-70e4acfb63e6,LOGM_OMP_MEATADATA_CL
17/Sep/2025,06:26,np37642,onto_13,API Access,logm.omp.metadata,2025-09-17T06:26:15.411Z,20250917062615411,2025-09-17 06:34:22.741,uxv_omp_usage-2025-09-17 06:23:53,63982aff-fb6c-4c22-973b-70e4acfb63e6,LOGM_OMP_MEATADATA_CL
17/Sep/2025,06:26,np85158,onto_12,API Access,logm.omp.metadata,2025-09-17T06:26:42.263Z,20250917062642263,2025-09-17 06:34:22.741,uxv_omp_usage-2025-09-17 06:23:53,63982aff-fb6c-4c22-973b-70e4acfb63e6,LOGM_OMP_MEATADATA_CL
17/Sep/2025,06:26,np47992,onto_20,Upload,logm.omp.metadata,2025-09-17T06:26:42.263Z,20250917062642263,2025-09-17 06:34:22.741,uxv_omp_usage-2025-09-17 06:23:53,63982aff-fb6c-4c22-973b-70e4acfb63e6,LOGM_OMP_MEATADATA_CL
17/Sep/2025,06:26,np61565,onto_7,Download,logm.omp.metadata,2025-09-17T06:26:50.661Z,20250917062650661,2025-09-17 06:34:22.741,uxv_omp_usage-2025-09-17 06:23:53,63982aff-fb6c-4c22-973b-70e4acfb63e6,LOGM_OMP_MEATADATA_CL
17/Sep/2025,06:26,np82648,onto_4,Data Query,logm.omp.metadata,2025-09-17T06:26:50.661Z,20250917062650661,2025-09-17 06:34:22.741,uxv_omp_usage-2025-09-17 06:23:53,63982aff-fb6c-4c22-973b-70e4acfb63e6,LOGM_OMP_MEATADATA_CL
17/Sep/2025,06:27,np39292,onto_12,Upload,logm.omp.metadata,2025-09-17T06:27:03.78Z,20250917062703780,2025-09-17 06:34:22.741,uxv_omp_usage-2025-09-17 06:23:53,63982aff-fb6c-4c22-973b-70e4acfb63e6,LOGM_OMP_MEATADATA_CL
17/Sep/2025,06:27,np18192,onto_11,Data Query,logm.omp.metadata,2025-09-17T06:27:03.78Z,20250917062703780,2025-09-17 06:34:22.741,uxv_omp_usage-2025-09-17 06:23:53,63982aff-fb6c-4c22-973b-70e4acfb63e6,LOGM_OMP_MEATADATA_CL
17/Sep/2025,06:26,np67531,onto_1,Login,logm.omp.metadata,2025-09-17T06:26:25.271Z,20250917062625271,2025-09-17 06:34:22.741,uxv_omp_usage-2025-09-17 06:23:53,63982aff-fb6c-4c22-973b-70e4acfb63e6,LOGM_OMP_MEATADATA_CL
17/Sep/2025,06:26,np98447,onto_20,Login,logm.omp.metadata,2025-09-17T06:26:25.271Z,20250917062625271,2025-09-17 06:34:22.741,uxv_omp_usage-2025-09-17 06:23:53,63982aff-fb6c-4c22-973b-70e4acfb63e6,LOGM_OMP_MEATADATA_CL


In [0]:
# Show the data for validation
display(metadata_final_df)

ontology,filename,size_in_kb,MetadataLogId,MetadataLogType,MetadataLogTimeGenerated,MetadataLogDate,TimeGenerated,MetadataLogGuid,MetadataLogFileName,TenantId,Type
onto_20,file_13.owl,577,ab33e8b5f28159b84fda55d8dfd395c86d1e4ee4a920e7a63f2db59c87cb8a69bb012095d704bfd7627bc9027aba53e6eb558f68254e2b6b2b65d29a1623df19,logm.omp.metadata,2025-09-17T07:49:08.001Z,20250917074908001,2025-09-17 07:49:08.001,2509170749000000019,uxv_omp_metadata_dev-2025-09-17 07:49:07,63982aff-fb6c-4c22-973b-70e4acfb63e6,LOGM_OMP_MEATADATA_CL
onto_13,file_17.owl,949,b157c31dba2527372143d18c9a0c23af92663f53c05d557d7afab97532d40b666a834d8401dc37af9fdc02b23e55c20f0715d199083dace5ce25715df33272a7,logm.omp.metadata,2025-09-17T07:49:08.001Z,20250917074908001,2025-09-17 07:49:08.001,2509170749000000012,uxv_omp_metadata_dev-2025-09-17 07:49:07,63982aff-fb6c-4c22-973b-70e4acfb63e6,LOGM_OMP_MEATADATA_CL
onto_12,file_20.owl,913,d50524d7ca666e74cb2bd39807baa0f57ddc59fd30e5a7039abd51b97074dd1ce03cb510390f4d84ef416ad2307e169447a66692d585c1cad17f80c507627ccc,logm.omp.metadata,2025-09-17T07:49:08.001Z,20250917074908001,2025-09-17 07:49:08.001,2509170749000000011,uxv_omp_metadata_dev-2025-09-17 07:49:07,63982aff-fb6c-4c22-973b-70e4acfb63e6,LOGM_OMP_MEATADATA_CL
onto_17,file_20.owl,132,817cd998526622816066195ddcd0f08d3451d5da6299b6fa13b6b3b6c45afd9900925f12275890e421d3f61b19a6b8a62b3ef3fabe03d7118804ac40936e2936,logm.omp.metadata,2025-09-17T07:49:08.001Z,20250917074908001,2025-09-17 07:49:08.001,2509170749000000016,uxv_omp_metadata_dev-2025-09-17 07:49:07,63982aff-fb6c-4c22-973b-70e4acfb63e6,LOGM_OMP_MEATADATA_CL
onto_19,file_5.owl,828,7ddb3db4a3684129af232589ce0ce3effebc09198e532fb4eab885002f0922d5ddb46ea16556e8bb75464b05eb279cbfe88b42e952366fd6a7a863faef4d6368,logm.omp.metadata,2025-09-17T07:49:08.001Z,20250917074908001,2025-09-17 07:49:08.001,2509170749000000018,uxv_omp_metadata_dev-2025-09-17 07:49:07,63982aff-fb6c-4c22-973b-70e4acfb63e6,LOGM_OMP_MEATADATA_CL
onto_18,file_8.owl,735,341e3c2ba94ec75094f0ac588ffe679b64802227c5205a71d11d2900c650f9ea915ac17608d3eaaaf53ccf0950d6fccf598d5abaf6a71b9fc83b53eb7f52453a,logm.omp.metadata,2025-09-17T07:49:08.001Z,20250917074908001,2025-09-17 07:49:08.001,2509170749000000017,uxv_omp_metadata_dev-2025-09-17 07:49:07,63982aff-fb6c-4c22-973b-70e4acfb63e6,LOGM_OMP_MEATADATA_CL
onto_16,file_9.owl,523,2417743a0dce3e838f83d1d02cb9a66498ba86851ac7b8226ecf686a65f768d5095356438d17f61b5a1952c927a01e405761073a1e58abb06f7442132edc3920,logm.omp.metadata,2025-09-17T07:49:08.001Z,20250917074908001,2025-09-17 07:49:08.001,2509170749000000015,uxv_omp_metadata_dev-2025-09-17 07:49:07,63982aff-fb6c-4c22-973b-70e4acfb63e6,LOGM_OMP_MEATADATA_CL
onto_15,file_6.owl,506,4b25ccbd77a7e490050ca1c0405265461a51a862e795759b740f8128680de6b8ebcb60c4163602617ca3e84895cb58806612b923766afa7a8e76f82b83d3f822,logm.omp.metadata,2025-09-17T07:49:08.001Z,20250917074908001,2025-09-17 07:49:08.001,2509170749000000014,uxv_omp_metadata_dev-2025-09-17 07:49:07,63982aff-fb6c-4c22-973b-70e4acfb63e6,LOGM_OMP_MEATADATA_CL
onto_10,file_7.owl,776,e9969f453b46d0f9a72497bea1cb6a9b45d011c5f32a7426d9dc646db01fef9a056cc4a1bbaa5a36d1f99b10c24cb1f2848d0d2faab63e3d31e11b6e2ac35e00,logm.omp.metadata,2025-09-17T07:49:08.001Z,20250917074908001,2025-09-17 07:49:08.001,2509170749000000009,uxv_omp_metadata_dev-2025-09-17 07:49:07,63982aff-fb6c-4c22-973b-70e4acfb63e6,LOGM_OMP_MEATADATA_CL
onto_4,file_11.owl,324,10f465ff83951d5809e5c23b70a9649769e84ea56e6d8c3264ada2019cedf3a6252d9f4887bce37715b084a3e6148b7371e328540fd091dc1bd1e29692fd2ba2,logm.omp.metadata,2025-09-17T07:49:08.001Z,20250917074908001,2025-09-17 07:49:08.001,2509170749000000003,uxv_omp_metadata_dev-2025-09-17 07:49:07,63982aff-fb6c-4c22-973b-70e4acfb63e6,LOGM_OMP_MEATADATA_CL


In [0]:
# Show the data for validation
display(usage_final_df)

start_date,start_time,id,ontology,activity,MetadataLogType,MetadataLogTimeGenerated,MetadataLogDate,TimeGenerated,MetadataLogFileName,TenantId,Type,MetadataLogId,MetadataLogGuid,filename,size_in_kb
17/Sep/2025,06:26,np84807,onto_1,Download,logm.omp.metadata,2025-09-17T06:26:54.69Z,20250917062654690,2025-09-17 06:34:22.741,uxv_omp_usage-2025-09-17 06:23:53,63982aff-fb6c-4c22-973b-70e4acfb63e6,LOGM_OMP_MEATADATA_CL,3d8ea184a564d4bb16c3eb184942a47a533afa04c716a7919e984dc2ff328dfd718f131a1cd00c3de9e16f658b1348d0a23aac7ba6dc92ef0d406ee7e736986e,2509170626858993459,file_7.owl,453.0
17/Sep/2025,06:26,np40360,onto_1,Download,logm.omp.metadata,2025-09-17T06:26:10.584Z,20250917062610584,2025-09-17 06:34:22.741,uxv_omp_usage-2025-09-17 06:23:53,63982aff-fb6c-4c22-973b-70e4acfb63e6,LOGM_OMP_MEATADATA_CL,5c24cda5f2378ccc637e6fb5fcbd489cafae3701f7f7e822364afb5ef5c0a15783aad14bd47884b41bdee14e76f97d16094469732fbc65acde5b5d7d28f8b346,2509170626858993459,file_7.owl,453.0
17/Sep/2025,06:26,np95782,onto_2,Data Query,logm.omp.metadata,2025-09-17T06:26:29.32Z,20250917062629320,2025-09-17 06:34:22.741,uxv_omp_usage-2025-09-17 06:23:53,63982aff-fb6c-4c22-973b-70e4acfb63e6,LOGM_OMP_MEATADATA_CL,f7ab8ecfc080c2e698d1ec479f0b8bbf8ae650e15b8381ad013121d3b9b6c96287e24f681c76c3071ea8eed7890affb9925b864fe618bee547a1e47df9c9f372,2509170626858993459,,
17/Sep/2025,06:26,np96775,onto_19,API Access,logm.omp.metadata,2025-09-17T06:26:29.32Z,20250917062629320,2025-09-17 06:34:22.741,uxv_omp_usage-2025-09-17 06:23:53,63982aff-fb6c-4c22-973b-70e4acfb63e6,LOGM_OMP_MEATADATA_CL,3b3e0f36be4ae693e45f45b70a9dae20313bc1c6bb2c82d5dec682f21ee1705d88c25040ff42c0c852a0c39c35103fe61b63d4d27d8f22a548382101be59a8bd,2509170626858993459,,
17/Sep/2025,06:27,np66637,onto_13,Upload,logm.omp.metadata,2025-09-17T06:27:09.769Z,20250917062709769,2025-09-17 06:34:22.741,uxv_omp_usage-2025-09-17 06:23:53,63982aff-fb6c-4c22-973b-70e4acfb63e6,LOGM_OMP_MEATADATA_CL,ba3019d47463285fbaeae69826966d2b05a480dc9346d200451bcd3388a7ceba119a07f822197d899477a219edda362e807f8a941bced5c6e2407dd17613afd8,2509170627858993459,,
17/Sep/2025,06:27,np13386,onto_16,Upload,logm.omp.metadata,2025-09-17T06:27:09.769Z,20250917062709769,2025-09-17 06:34:22.741,uxv_omp_usage-2025-09-17 06:23:53,63982aff-fb6c-4c22-973b-70e4acfb63e6,LOGM_OMP_MEATADATA_CL,b9bde8f70a593f12985859810831a5bba8206e11e7601afea0770040d9f360266631b36d6e6e195301a057087a17d828e8a013e25aa4c3e7f3223975be7a0085,2509170627858993459,,
17/Sep/2025,06:26,np87078,onto_12,API Access,logm.omp.metadata,2025-09-17T06:26:54.69Z,20250917062654690,2025-09-17 06:34:22.741,uxv_omp_usage-2025-09-17 06:23:53,63982aff-fb6c-4c22-973b-70e4acfb63e6,LOGM_OMP_MEATADATA_CL,4c8d44680957f40bd7863ecc487d4c4798a0bc6babaff872eeaa5fe581005ea1589fa47e7e370c4e61bea925c94a1b9c8e8d264b3548b46794917b4dea7df9a5,2509170626858993459,,
17/Sep/2025,06:26,np88314,onto_6,Data Query,logm.omp.metadata,2025-09-17T06:26:10.584Z,20250917062610584,2025-09-17 06:34:22.741,uxv_omp_usage-2025-09-17 06:23:53,63982aff-fb6c-4c22-973b-70e4acfb63e6,LOGM_OMP_MEATADATA_CL,1cf4e85938b0584a068450da32d75dfdb3d59c68cbc98edd010d0e9399df8cda21d6c930f5d509364be8c39261e55fa05125bb9b7dcc4fc8c1c44a1aefe111a1,2509170626858993459,,
17/Sep/2025,06:27,np63849,onto_11,Download,logm.omp.metadata,2025-09-17T06:27:26.541Z,20250917062726541,2025-09-17 06:34:22.741,uxv_omp_usage-2025-09-17 06:23:53,63982aff-fb6c-4c22-973b-70e4acfb63e6,LOGM_OMP_MEATADATA_CL,ff9992f67eaf788bc15f3d1ca9d0c9c1be61de051006f3b75157b593a9a78bd9c4d93a0af21676a6a69d9eb2156537b62404bee400c78762dc8c9ff3481bf70c,2509170627343597383,file_4.owl,581.0
17/Sep/2025,06:26,np67531,onto_1,Login,logm.omp.metadata,2025-09-17T06:26:25.271Z,20250917062625271,2025-09-17 06:34:22.741,uxv_omp_usage-2025-09-17 06:23:53,63982aff-fb6c-4c22-973b-70e4acfb63e6,LOGM_OMP_MEATADATA_CL,e170f586b6e096f1911a35be3b4bdd316718135cfada04114ad6e6a565d0c17c8f4e2658dfee0c492fd6777507838e3e365e6a639e302f5e38a0676e8fae797b,2509170626343597383,,
