forked from apache/spark
-
Notifications
You must be signed in to change notification settings - Fork 8
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[SPARK-47574][INFRA] Introduce Structured Logging Framework
### What changes were proposed in this pull request? Introduce Structured Logging Framework as per [SPIP: Structured Logging Framework for Apache Spark](https://docs.google.com/document/d/1rATVGmFLNVLmtxSpWrEceYm7d-ocgu8ofhryVs4g3XU/edit?usp=sharing) . * The default logging output format will be json lines. For example ``` { "ts":"2023-03-12T12:02:46.661-0700", "level":"ERROR", "msg":"Cannot determine whether executor 289 is alive or not", "context":{ "executor_id":"289" }, "exception":{ "class":"org.apache.spark.SparkException", "msg":"Exception thrown in awaitResult", "stackTrace":"..." }, "source":"BlockManagerMasterEndpoint" } ``` * Introduce a new configuration `spark.log.structuredLogging.enabled` to set the default log4j configuration. It is true by default. Users can disable it to get plain text log outputs. * The change will start with the `logError` method. Example changes on the API: from ``` logError(s"Cannot determine whether executor $executorId is alive or not.", e) ``` to ``` logError(log"Cannot determine whether executor ${MDC(EXECUTOR_ID, executorId)} is alive or not.", e) ``` ### Why are the changes needed? To enhance Apache Spark's logging system by implementing structured logging. This transition will change the format of the default log output from plain text to JSON lines, making it more analyzable. ### Does this PR introduce _any_ user-facing change? Yes, the default log output format will be json lines instead of plain text. User can restore the default plain text output when disabling configuration `spark.log.structuredLogging.enabled`. If a user is a customized log4j configuration, there is no changes in the log output. ### How was this patch tested? New Unit tests ### Was this patch authored or co-authored using generative AI tooling? Yes, some of the code comments are from github copilot Closes apache#45729 from gengliangwang/LogInterpolator. Authored-by: Gengliang Wang <gengliang@apache.org> Signed-off-by: Gengliang Wang <gengliang@apache.org>
- Loading branch information
1 parent
a8b247e
commit 874d033
Showing
14 changed files
with
441 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
38 changes: 38 additions & 0 deletions
38
common/utils/src/main/resources/org/apache/spark/SparkLayout.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
{ | ||
"ts": { | ||
"$resolver": "timestamp" | ||
}, | ||
"level": { | ||
"$resolver": "level", | ||
"field": "name" | ||
}, | ||
"msg": { | ||
"$resolver": "message", | ||
"stringified": true | ||
}, | ||
"context": { | ||
"$resolver": "mdc" | ||
}, | ||
"exception": { | ||
"class": { | ||
"$resolver": "exception", | ||
"field": "className" | ||
}, | ||
"msg": { | ||
"$resolver": "exception", | ||
"field": "message", | ||
"stringified": true | ||
}, | ||
"stacktrace": { | ||
"$resolver": "exception", | ||
"field": "stackTrace", | ||
"stackTrace": { | ||
"stringified": true | ||
} | ||
} | ||
}, | ||
"logger": { | ||
"$resolver": "logger", | ||
"field": "name" | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
55 changes: 55 additions & 0 deletions
55
common/utils/src/main/resources/org/apache/spark/log4j2-pattern-layout-defaults.properties
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
# | ||
# Licensed to the Apache Software Foundation (ASF) under one or more | ||
# contributor license agreements. See the NOTICE file distributed with | ||
# this work for additional information regarding copyright ownership. | ||
# The ASF licenses this file to You under the Apache License, Version 2.0 | ||
# (the "License"); you may not use this file except in compliance with | ||
# the License. You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
# | ||
|
||
# Set everything to be logged to the console | ||
rootLogger.level = info | ||
rootLogger.appenderRef.stdout.ref = console | ||
|
||
appender.console.type = Console | ||
appender.console.name = console | ||
appender.console.target = SYSTEM_ERR | ||
appender.console.layout.type = PatternLayout | ||
appender.console.layout.pattern = %d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n%ex | ||
|
||
# Settings to quiet third party logs that are too verbose | ||
logger.jetty.name = org.sparkproject.jetty | ||
logger.jetty.level = warn | ||
logger.jetty2.name = org.sparkproject.jetty.util.component.AbstractLifeCycle | ||
logger.jetty2.level = error | ||
logger.repl1.name = org.apache.spark.repl.SparkIMain$exprTyper | ||
logger.repl1.level = info | ||
logger.repl2.name = org.apache.spark.repl.SparkILoop$SparkILoopInterpreter | ||
logger.repl2.level = info | ||
|
||
# Set the default spark-shell log level to WARN. When running the spark-shell, the | ||
# log level for this class is used to overwrite the root logger's log level, so that | ||
# the user can have different defaults for the shell and regular Spark apps. | ||
logger.repl.name = org.apache.spark.repl.Main | ||
logger.repl.level = warn | ||
|
||
# SPARK-9183: Settings to avoid annoying messages when looking up nonexistent UDFs | ||
# in SparkSQL with Hive support | ||
logger.metastore.name = org.apache.hadoop.hive.metastore.RetryingHMSHandler | ||
logger.metastore.level = fatal | ||
logger.hive_functionregistry.name = org.apache.hadoop.hive.ql.exec.FunctionRegistry | ||
logger.hive_functionregistry.level = error | ||
|
||
# Parquet related logging | ||
logger.parquet.name = org.apache.parquet.CorruptStatistics | ||
logger.parquet.level = error | ||
logger.parquet2.name = parquet.CorruptStatistics | ||
logger.parquet2.level = error |
25 changes: 25 additions & 0 deletions
25
common/utils/src/main/scala/org/apache/spark/internal/LogKey.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one or more | ||
* contributor license agreements. See the NOTICE file distributed with | ||
* this work for additional information regarding copyright ownership. | ||
* The ASF licenses this file to You under the Apache License, Version 2.0 | ||
* (the "License"); you may not use this file except in compliance with | ||
* the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
package org.apache.spark.internal | ||
|
||
/** | ||
* Various keys used for mapped diagnostic contexts(MDC) in logging. | ||
* All structured logging keys should be defined here for standardization. | ||
*/ | ||
object LogKey extends Enumeration { | ||
val EXECUTOR_ID = Value | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
# | ||
# Licensed to the Apache Software Foundation (ASF) under one or more | ||
# contributor license agreements. See the NOTICE file distributed with | ||
# this work for additional information regarding copyright ownership. | ||
# The ASF licenses this file to You under the Apache License, Version 2.0 | ||
# (the "License"); you may not use this file except in compliance with | ||
# the License. You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
# | ||
|
||
rootLogger.level = info | ||
rootLogger.appenderRef.file.ref = ${sys:test.appender:-File} | ||
|
||
appender.file.type = File | ||
appender.file.name = File | ||
appender.file.fileName = target/unit-tests.log | ||
appender.file.layout.type = JsonTemplateLayout | ||
appender.file.layout.eventTemplateUri = classpath:org/apache/spark/SparkLayout.json | ||
|
||
# Structured Logging Appender | ||
appender.structured.type = File | ||
appender.structured.name = structured | ||
appender.structured.fileName = target/structured.log | ||
appender.structured.layout.type = JsonTemplateLayout | ||
appender.structured.layout.eventTemplateUri = classpath:org/apache/spark/SparkLayout.json | ||
|
||
# Pattern Logging Appender | ||
appender.pattern.type = File | ||
appender.pattern.name = pattern | ||
appender.pattern.fileName = target/pattern.log | ||
appender.pattern.layout.type = PatternLayout | ||
appender.pattern.layout.pattern = %d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n%ex | ||
|
||
# Custom loggers | ||
logger.structured.name = org.apache.spark.util.StructuredLoggingSuite | ||
logger.structured.level = info | ||
logger.structured.appenderRefs = structured | ||
logger.structured.appenderRef.structured.ref = structured | ||
|
||
logger.pattern.name = org.apache.spark.util.PatternLoggingSuite | ||
logger.pattern.level = info | ||
logger.pattern.appenderRefs = pattern | ||
logger.pattern.appenderRef.pattern.ref = pattern |
Oops, something went wrong.