In [0]:
# install necessary modules
%pip install databricks.sdk

In [0]:
# Import necessary modules
from datetime import datetime, timedelta
from pyspark.sql.functions import col
from databricks.sdk import WorkspaceClient

In [0]:
def get_human_readable_time(time):
    """
    Converts a timestamp in milliseconds to a human-readable date and time format.
    Args:
        time (int): A timestamp in milliseconds.
    Returns:
        str: A string representing the human-readable date and time in the format 'YYYY-MM-DD HH:MM:SS'.
    """
    timestamp_in_seconds = time / 1000
    # Convert the timestamp to a human-readable date and time format
    human_readable_time = datetime.fromtimestamp(timestamp_in_seconds).strftime('%Y-%m-%d %H:%M:%S')
    return human_readable_time
    
def get_start_and_end_hour(job_run_id):
    """  
      Retrieves the start and end times of a job run and returns them in human-readable format.
    Args:
        job_run_id (str): The ID of the job run.
    Returns:
        Tuple[str, str]: A tuple containing two strings representing the human-readable start and end times.
    """
    # Retrieve the start and end times of the job run
    start_time=w.jobs.get_run(job_run_id).start_time
    end_time=w.jobs.get_run(job_run_id).end_time
    # Convert the start and end times to human-readable format
    hr_start_time=get_human_readable_time(start_time)
    hr_end_time=get_human_readable_time(end_time)
    return hr_start_time,hr_end_time

In [0]:
# Retrieve job run id
job_run_id_str=str(dbutils.notebook.entry_point.getDbutils().notebook().getContext().tags().get("multitaskParentRunId"))[5:-1]
job_run_id=int(job_run_id_str)
search_context='Error encountered when'

In [0]:
# extracting cluster IDs from specific task creating a list
w = WorkspaceClient()
no_of_task=len(w.jobs.get_run(job_run_id).tasks)
cluster_id_li=[w.jobs.get_run(job_run_id).tasks[task_no].cluster_instance.cluster_id for task_no in range(no_of_task) if w.jobs.get_run(job_run_id).tasks[task_no].cluster_instance !=None]
cluster_ids=list(set(cluster_id_li))
print(cluster_ids)

In [0]:
start_time,end_time=get_start_and_end_hour(job_run_id)
current_time=datetime.now()

In [0]:
# creating cluster paths
cluster_paths=[]
for cluster_id in cluster_ids:
    if cluster_id=='0530-064903-emg91jkm':
        cluster_path = f'dbfs:/cluster-logs/highmemory/{cluster_id}'
    else:
        cluster_path = f'dbfs:/cluster-logs/{cluster_id}'
    cluster_paths.append(cluster_path)

In [0]:
#  select files in driver 
files_in_driver=[]
dr_count=0
for cluster_path in cluster_paths:
    try:
        files_in_driver.extend(dbutils.fs.ls(f'{cluster_path}/driver/'))
    except:
        dr_count+=1
        print(f'driver files missing for {dr_count} Cluster')

In [0]:
if end_time=='1970-01-01 00:00:00':
    end_time=str(current_time)[:-7]
    print('Current date selected')

In [0]:
# Convert the date-time strings to datetime objects
start_datetime = datetime.strptime(start_time, "%Y-%m-%d %H:%M:%S")
end_datetime = datetime.strptime(end_time, "%Y-%m-%d %H:%M:%S")

datetime_list = []
# Generate a list of consecutive date-times
while start_datetime <= end_datetime:
    datetime_list.append(start_datetime.strftime("%Y-%m-%d %H"))
    start_datetime += timedelta(hours=1)

In [0]:
# selecting stderr file 
time_stamp_ending=end_time[:11]+str(int(end_time[11:13])+1)
final_file_created_ts=files_in_driver[-1].name[8:22].replace('--',' ')
stderr_file_name=[]
if (str(current_time)[:-13]<=start_time[:-6]) or (time_stamp_ending>=final_file_created_ts):
    stderr_file_name.append('stderr')
stderr_file_name.extend([f'stderr--{date_time[:10]}--{int(date_time[11:])+1}' for date_time in datetime_list])
stderr_file_list = [dr_file for dr_file in files_in_driver if dr_file.name[:19] in stderr_file_name]

#Driver stderr files


In [0]:
no_of_error_df=0
for files in stderr_file_list:
    df=spark.read.text(files.path)
    df=df.filter((col("value").like(f"%{search_context}%")))
    if df.isEmpty():
        continue
    else:
        no_of_error_df+=1
        display(df)

In [0]:
if no_of_error_df!=0:
     raise Exception("Errors were encounterd while running the pipline")