Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New Notifier to check the DMesg Logs for OOPS Exceptions #3617

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
adding the dmesg_oops notifier with relevant update to mixin_module
  • Loading branch information
umfranci committed Jan 21, 2025
commit 150679be1538145efaae9d0f5c8c1349298d348c
1 change: 1 addition & 0 deletions lisa/mixin_modules.py
Original file line number Diff line number Diff line change
@@ -11,6 +11,7 @@
import lisa.combinators.git_bisect_combinator # noqa: F401
import lisa.combinators.grid_combinator # noqa: F401
import lisa.notifiers.console # noqa: F401
import lisa.notifiers.dmesg_oops # noqa: F401
import lisa.notifiers.env_stats # noqa: F401
import lisa.notifiers.file # noqa: F401
import lisa.notifiers.html # noqa: F401
141 changes: 141 additions & 0 deletions lisa/notifiers/dmesg_oops.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

import logging
import re
import subprocess
from dataclasses import dataclass
from typing import Any, List, Pattern, Type, cast
import smtplib
import os
from pathlib import Path
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
from dataclasses_json import dataclass_json

from lisa import messages, notifier, schema
from lisa.util import constants
from lisa.tools import Dmesg
from lisa.messages import (
MessageBase,
TestResultMessage,
TestRunMessage,
TestRunStatus,
TestStatus,
)
from lisa.util import (
check_panic,
get_matched_str,
KernelPanicException
)

oops_regex_patterns: List[Pattern[str]] = [
re.compile(r"Oops: [0-9]+ \[\#.*\]"), # Basic Oops Detection
re.compile(r"BUG: unable to handle kernel NULL pointer dereference at (0x)?[0-9a-fA-F]+"), # Null Pointer Dereference
re.compile(r"BUG: unable to handle kernel paging request at (0x)?[0-9a-fA-F]+"), # Invalid Memory Access
re.compile(r"RIP: [0-9a-fA-F]+:([a-zA-Z0-9_]+)\+[0-9a-fA-Fx]+/[0-9a-fA-Fx]+"), # RIP in Trace
re.compile(r"Call Trace:\s*(.*)"), # Kernel Call Trace
re.compile(r"general protection fault: [0-9]+ \[#.*\]"), # General Fault Errors
re.compile(r"Kernel panic - not syncing: (.*)"), # Kernel Panic Information
re.compile(r"Process: ([a-zA-Z0-9_]+)\s*\(pid:\s*\d+\)"), # Process Details
re.compile(r"Stack:\s*(.*)"), # Stack Dump
re.compile(r"Code:\s*(.*)"), # Code Dump
]

@dataclass_json
@dataclass
class DmsgOopsSchema(schema.Notifier):
log_level: str = logging.getLevelName(logging.DEBUG)
output_file: str = "dmesg_errors.json"


class DmsgOops(notifier.Notifier):
"""
A sample notifier to check for Panics/OOPs Errors in the DMesg Logs.
"""

@classmethod
def type_name(cls) -> str:
return "dmsg_oops_notifier"

@classmethod
def type_schema(cls) -> Type[schema.TypedSchema]:
return DmsgOopsSchema

def save_results(self) -> None:
file_path = Path(self.runbook.output_file)
if not file_path.is_absolute():
file_path = constants.RUN_LOCAL_LOG_PATH / file_path
self._log.info(f"Writing output to file {file_path}")
with open(file_path, "w") as f:
f.write(str(self.dmesg_errors)) # type: ignore

def check_kernel_oops(self, dmesg_logs: str, context_lines: int = 4) -> list[str]:
oops_list = []
lines = dmesg_logs.splitlines()
for i, line in enumerate(lines):
for pattern in oops_regex_patterns:
if pattern.search(line):
start = max(i - context_lines, 0)
end = min(i + context_lines + 1, len(lines))
context = lines[start:end]
oops_list.append("\n".join(context))
break
return oops_list

def dmesg_error_check(self, test_name: str, dmesg_logs: str) -> None:
try:
check_panic(dmesg_logs, "Result", self._log)
except KernelPanicException as e:
self._log.error(
f"Kernel Panic found in the dmesg logs. {e}"
)
self.dmesg_errors['panics'].setdefault(test_name, []).append(e)
oops_list = self.check_kernel_oops(dmesg_logs)
self.dmesg_errors['oops'].setdefault(test_name, []).append(oops_list)
self._log.info(f"DMesg logs check completed")

def process_serial_logs(self, test_name: str, file_path: str, pattern_start: str, pattern_end: str) -> None:
with open(file_path, 'r') as file:
buffer = file.read()
while True:
start_index = buffer.find(pattern_start)
end_index = buffer.find(pattern_end, start_index + len(pattern_start))
if start_index == -1 or end_index == -1:
break
data_segment = buffer[start_index + len(pattern_start):end_index]
self.dmesg_error_check(test_name, data_segment)
buffer = buffer[end_index + len(pattern_end):]

def process_test_result_message(self, message: TestResultMessage) -> None:
if message.log_file and message.status in [
TestStatus.PASSED,
TestStatus.FAILED,
TestStatus.SKIPPED,
TestStatus.ATTEMPTED,
]:
local_file_path = constants.RUN_LOCAL_LOG_PATH / message.log_file
local_absolute_file_path = local_file_path.absolute()
try:
self.process_serial_logs(message.name, local_absolute_file_path, "cmd: ['sudo', 'dmesg']", "execution time:")
except Exception as e:
self._log.error(
f"Error while Processing Serial Console Logs : {e}"
)

self.save_results()

def _received_message(self, message: messages.MessageBase) -> None:
if isinstance(message, TestResultMessage):
self.process_test_result_message(message=message)

def _subscribed_message_type(self) -> List[Type[messages.MessageBase]]:
return [TestResultMessage]

def _initialize(self, *args: Any, **kwargs: Any) -> None:
runbook = cast(DmsgOopsSchema, self.runbook)
self._log_level = runbook.log_level
self.dmesg_errors = {"panics": {}, "oops": {}}

def __init__(self, runbook: DmsgOopsSchema) -> None:
notifier.Notifier.__init__(self, runbook)
Loading
Oops, something went wrong.